1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "except.h"
38 #include "function.h"
39 #include "recog.h"
40 #include "expr.h"
41 #include "optabs.h"
42 #include "toplev.h"
43 #include "basic-block.h"
44 #include "ggc.h"
45 #include "target.h"
46 #include "target-def.h"
47 #include "langhooks.h"
48 #include "cgraph.h"
49
50 #ifndef CHECK_STACK_LIMIT
51 #define CHECK_STACK_LIMIT (-1)
52 #endif
53
54 /* Return index of given mode in mult and division cost tables. */
55 #define MODE_INDEX(mode) \
56 ((mode) == QImode ? 0 \
57 : (mode) == HImode ? 1 \
58 : (mode) == SImode ? 2 \
59 : (mode) == DImode ? 3 \
60 : 4)
61
62 /* Processor costs (relative to an add) */
63 static const
64 struct processor_costs size_cost = { /* costs for tunning for size */
65 2, /* cost of an add instruction */
66 3, /* cost of a lea instruction */
67 2, /* variable shift costs */
68 3, /* constant shift costs */
69 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
70 0, /* cost of multiply per each bit set */
71 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
72 3, /* cost of movsx */
73 3, /* cost of movzx */
74 0, /* "large" insn */
75 2, /* MOVE_RATIO */
76 2, /* cost for loading QImode using movzbl */
77 {2, 2, 2}, /* cost of loading integer registers
78 in QImode, HImode and SImode.
79 Relative to reg-reg move (2). */
80 {2, 2, 2}, /* cost of storing integer registers */
81 2, /* cost of reg,reg fld/fst */
82 {2, 2, 2}, /* cost of loading fp registers
83 in SFmode, DFmode and XFmode */
84 {2, 2, 2}, /* cost of loading integer registers */
85 3, /* cost of moving MMX register */
86 {3, 3}, /* cost of loading MMX registers
87 in SImode and DImode */
88 {3, 3}, /* cost of storing MMX registers
89 in SImode and DImode */
90 3, /* cost of moving SSE register */
91 {3, 3, 3}, /* cost of loading SSE registers
92 in SImode, DImode and TImode */
93 {3, 3, 3}, /* cost of storing SSE registers
94 in SImode, DImode and TImode */
95 3, /* MMX or SSE register to integer */
96 0, /* size of prefetch block */
97 0, /* number of parallel prefetches */
98 1, /* Branch cost */
99 2, /* cost of FADD and FSUB insns. */
100 2, /* cost of FMUL instruction. */
101 2, /* cost of FDIV instruction. */
102 2, /* cost of FABS instruction. */
103 2, /* cost of FCHS instruction. */
104 2, /* cost of FSQRT instruction. */
105 };
106
107 /* Processor costs (relative to an add) */
108 static const
109 struct processor_costs i386_cost = { /* 386 specific costs */
110 1, /* cost of an add instruction */
111 1, /* cost of a lea instruction */
112 3, /* variable shift costs */
113 2, /* constant shift costs */
114 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
115 1, /* cost of multiply per each bit set */
116 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
117 3, /* cost of movsx */
118 2, /* cost of movzx */
119 15, /* "large" insn */
120 3, /* MOVE_RATIO */
121 4, /* cost for loading QImode using movzbl */
122 {2, 4, 2}, /* cost of loading integer registers
123 in QImode, HImode and SImode.
124 Relative to reg-reg move (2). */
125 {2, 4, 2}, /* cost of storing integer registers */
126 2, /* cost of reg,reg fld/fst */
127 {8, 8, 8}, /* cost of loading fp registers
128 in SFmode, DFmode and XFmode */
129 {8, 8, 8}, /* cost of loading integer registers */
130 2, /* cost of moving MMX register */
131 {4, 8}, /* cost of loading MMX registers
132 in SImode and DImode */
133 {4, 8}, /* cost of storing MMX registers
134 in SImode and DImode */
135 2, /* cost of moving SSE register */
136 {4, 8, 16}, /* cost of loading SSE registers
137 in SImode, DImode and TImode */
138 {4, 8, 16}, /* cost of storing SSE registers
139 in SImode, DImode and TImode */
140 3, /* MMX or SSE register to integer */
141 0, /* size of prefetch block */
142 0, /* number of parallel prefetches */
143 1, /* Branch cost */
144 23, /* cost of FADD and FSUB insns. */
145 27, /* cost of FMUL instruction. */
146 88, /* cost of FDIV instruction. */
147 22, /* cost of FABS instruction. */
148 24, /* cost of FCHS instruction. */
149 122, /* cost of FSQRT instruction. */
150 };
151
152 static const
153 struct processor_costs i486_cost = { /* 486 specific costs */
154 1, /* cost of an add instruction */
155 1, /* cost of a lea instruction */
156 3, /* variable shift costs */
157 2, /* constant shift costs */
158 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
159 1, /* cost of multiply per each bit set */
160 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
161 3, /* cost of movsx */
162 2, /* cost of movzx */
163 15, /* "large" insn */
164 3, /* MOVE_RATIO */
165 4, /* cost for loading QImode using movzbl */
166 {2, 4, 2}, /* cost of loading integer registers
167 in QImode, HImode and SImode.
168 Relative to reg-reg move (2). */
169 {2, 4, 2}, /* cost of storing integer registers */
170 2, /* cost of reg,reg fld/fst */
171 {8, 8, 8}, /* cost of loading fp registers
172 in SFmode, DFmode and XFmode */
173 {8, 8, 8}, /* cost of loading integer registers */
174 2, /* cost of moving MMX register */
175 {4, 8}, /* cost of loading MMX registers
176 in SImode and DImode */
177 {4, 8}, /* cost of storing MMX registers
178 in SImode and DImode */
179 2, /* cost of moving SSE register */
180 {4, 8, 16}, /* cost of loading SSE registers
181 in SImode, DImode and TImode */
182 {4, 8, 16}, /* cost of storing SSE registers
183 in SImode, DImode and TImode */
184 3, /* MMX or SSE register to integer */
185 0, /* size of prefetch block */
186 0, /* number of parallel prefetches */
187 1, /* Branch cost */
188 8, /* cost of FADD and FSUB insns. */
189 16, /* cost of FMUL instruction. */
190 73, /* cost of FDIV instruction. */
191 3, /* cost of FABS instruction. */
192 3, /* cost of FCHS instruction. */
193 83, /* cost of FSQRT instruction. */
194 };
195
196 static const
197 struct processor_costs pentium_cost = {
198 1, /* cost of an add instruction */
199 1, /* cost of a lea instruction */
200 4, /* variable shift costs */
201 1, /* constant shift costs */
202 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
203 0, /* cost of multiply per each bit set */
204 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
205 3, /* cost of movsx */
206 2, /* cost of movzx */
207 8, /* "large" insn */
208 6, /* MOVE_RATIO */
209 6, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {2, 2, 6}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {4, 4, 6}, /* cost of loading integer registers */
218 8, /* cost of moving MMX register */
219 {8, 8}, /* cost of loading MMX registers
220 in SImode and DImode */
221 {8, 8}, /* cost of storing MMX registers
222 in SImode and DImode */
223 2, /* cost of moving SSE register */
224 {4, 8, 16}, /* cost of loading SSE registers
225 in SImode, DImode and TImode */
226 {4, 8, 16}, /* cost of storing SSE registers
227 in SImode, DImode and TImode */
228 3, /* MMX or SSE register to integer */
229 0, /* size of prefetch block */
230 0, /* number of parallel prefetches */
231 2, /* Branch cost */
232 3, /* cost of FADD and FSUB insns. */
233 3, /* cost of FMUL instruction. */
234 39, /* cost of FDIV instruction. */
235 1, /* cost of FABS instruction. */
236 1, /* cost of FCHS instruction. */
237 70, /* cost of FSQRT instruction. */
238 };
239
240 static const
241 struct processor_costs pentiumpro_cost = {
242 1, /* cost of an add instruction */
243 1, /* cost of a lea instruction */
244 1, /* variable shift costs */
245 1, /* constant shift costs */
246 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
247 0, /* cost of multiply per each bit set */
248 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
249 1, /* cost of movsx */
250 1, /* cost of movzx */
251 8, /* "large" insn */
252 6, /* MOVE_RATIO */
253 2, /* cost for loading QImode using movzbl */
254 {4, 4, 4}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
256 Relative to reg-reg move (2). */
257 {2, 2, 2}, /* cost of storing integer registers */
258 2, /* cost of reg,reg fld/fst */
259 {2, 2, 6}, /* cost of loading fp registers
260 in SFmode, DFmode and XFmode */
261 {4, 4, 6}, /* cost of loading integer registers */
262 2, /* cost of moving MMX register */
263 {2, 2}, /* cost of loading MMX registers
264 in SImode and DImode */
265 {2, 2}, /* cost of storing MMX registers
266 in SImode and DImode */
267 2, /* cost of moving SSE register */
268 {2, 2, 8}, /* cost of loading SSE registers
269 in SImode, DImode and TImode */
270 {2, 2, 8}, /* cost of storing SSE registers
271 in SImode, DImode and TImode */
272 3, /* MMX or SSE register to integer */
273 32, /* size of prefetch block */
274 6, /* number of parallel prefetches */
275 2, /* Branch cost */
276 3, /* cost of FADD and FSUB insns. */
277 5, /* cost of FMUL instruction. */
278 56, /* cost of FDIV instruction. */
279 2, /* cost of FABS instruction. */
280 2, /* cost of FCHS instruction. */
281 56, /* cost of FSQRT instruction. */
282 };
283
284 static const
285 struct processor_costs k6_cost = {
286 1, /* cost of an add instruction */
287 2, /* cost of a lea instruction */
288 1, /* variable shift costs */
289 1, /* constant shift costs */
290 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
291 0, /* cost of multiply per each bit set */
292 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
293 2, /* cost of movsx */
294 2, /* cost of movzx */
295 8, /* "large" insn */
296 4, /* MOVE_RATIO */
297 3, /* cost for loading QImode using movzbl */
298 {4, 5, 4}, /* cost of loading integer registers
299 in QImode, HImode and SImode.
300 Relative to reg-reg move (2). */
301 {2, 3, 2}, /* cost of storing integer registers */
302 4, /* cost of reg,reg fld/fst */
303 {6, 6, 6}, /* cost of loading fp registers
304 in SFmode, DFmode and XFmode */
305 {4, 4, 4}, /* cost of loading integer registers */
306 2, /* cost of moving MMX register */
307 {2, 2}, /* cost of loading MMX registers
308 in SImode and DImode */
309 {2, 2}, /* cost of storing MMX registers
310 in SImode and DImode */
311 2, /* cost of moving SSE register */
312 {2, 2, 8}, /* cost of loading SSE registers
313 in SImode, DImode and TImode */
314 {2, 2, 8}, /* cost of storing SSE registers
315 in SImode, DImode and TImode */
316 6, /* MMX or SSE register to integer */
317 32, /* size of prefetch block */
318 1, /* number of parallel prefetches */
319 1, /* Branch cost */
320 2, /* cost of FADD and FSUB insns. */
321 2, /* cost of FMUL instruction. */
322 56, /* cost of FDIV instruction. */
323 2, /* cost of FABS instruction. */
324 2, /* cost of FCHS instruction. */
325 56, /* cost of FSQRT instruction. */
326 };
327
328 static const
329 struct processor_costs athlon_cost = {
330 1, /* cost of an add instruction */
331 2, /* cost of a lea instruction */
332 1, /* variable shift costs */
333 1, /* constant shift costs */
334 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
335 0, /* cost of multiply per each bit set */
336 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
337 1, /* cost of movsx */
338 1, /* cost of movzx */
339 8, /* "large" insn */
340 9, /* MOVE_RATIO */
341 4, /* cost for loading QImode using movzbl */
342 {3, 4, 3}, /* cost of loading integer registers
343 in QImode, HImode and SImode.
344 Relative to reg-reg move (2). */
345 {3, 4, 3}, /* cost of storing integer registers */
346 4, /* cost of reg,reg fld/fst */
347 {4, 4, 12}, /* cost of loading fp registers
348 in SFmode, DFmode and XFmode */
349 {6, 6, 8}, /* cost of loading integer registers */
350 2, /* cost of moving MMX register */
351 {4, 4}, /* cost of loading MMX registers
352 in SImode and DImode */
353 {4, 4}, /* cost of storing MMX registers
354 in SImode and DImode */
355 2, /* cost of moving SSE register */
356 {4, 4, 6}, /* cost of loading SSE registers
357 in SImode, DImode and TImode */
358 {4, 4, 5}, /* cost of storing SSE registers
359 in SImode, DImode and TImode */
360 5, /* MMX or SSE register to integer */
361 64, /* size of prefetch block */
362 6, /* number of parallel prefetches */
363 2, /* Branch cost */
364 4, /* cost of FADD and FSUB insns. */
365 4, /* cost of FMUL instruction. */
366 24, /* cost of FDIV instruction. */
367 2, /* cost of FABS instruction. */
368 2, /* cost of FCHS instruction. */
369 35, /* cost of FSQRT instruction. */
370 };
371
372 static const
373 struct processor_costs k8_cost = {
374 1, /* cost of an add instruction */
375 2, /* cost of a lea instruction */
376 1, /* variable shift costs */
377 1, /* constant shift costs */
378 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
379 0, /* cost of multiply per each bit set */
380 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
381 1, /* cost of movsx */
382 1, /* cost of movzx */
383 8, /* "large" insn */
384 9, /* MOVE_RATIO */
385 4, /* cost for loading QImode using movzbl */
386 {3, 4, 3}, /* cost of loading integer registers
387 in QImode, HImode and SImode.
388 Relative to reg-reg move (2). */
389 {3, 4, 3}, /* cost of storing integer registers */
390 4, /* cost of reg,reg fld/fst */
391 {4, 4, 12}, /* cost of loading fp registers
392 in SFmode, DFmode and XFmode */
393 {6, 6, 8}, /* cost of loading integer registers */
394 2, /* cost of moving MMX register */
395 {3, 3}, /* cost of loading MMX registers
396 in SImode and DImode */
397 {4, 4}, /* cost of storing MMX registers
398 in SImode and DImode */
399 2, /* cost of moving SSE register */
400 {4, 3, 6}, /* cost of loading SSE registers
401 in SImode, DImode and TImode */
402 {4, 4, 5}, /* cost of storing SSE registers
403 in SImode, DImode and TImode */
404 5, /* MMX or SSE register to integer */
405 64, /* size of prefetch block */
406 6, /* number of parallel prefetches */
407 2, /* Branch cost */
408 4, /* cost of FADD and FSUB insns. */
409 4, /* cost of FMUL instruction. */
410 19, /* cost of FDIV instruction. */
411 2, /* cost of FABS instruction. */
412 2, /* cost of FCHS instruction. */
413 35, /* cost of FSQRT instruction. */
414 };
415
416 static const
417 struct processor_costs pentium4_cost = {
418 1, /* cost of an add instruction */
419 1, /* cost of a lea instruction */
420 4, /* variable shift costs */
421 4, /* constant shift costs */
422 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
423 0, /* cost of multiply per each bit set */
424 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
425 1, /* cost of movsx */
426 1, /* cost of movzx */
427 16, /* "large" insn */
428 6, /* MOVE_RATIO */
429 2, /* cost for loading QImode using movzbl */
430 {4, 5, 4}, /* cost of loading integer registers
431 in QImode, HImode and SImode.
432 Relative to reg-reg move (2). */
433 {2, 3, 2}, /* cost of storing integer registers */
434 2, /* cost of reg,reg fld/fst */
435 {2, 2, 6}, /* cost of loading fp registers
436 in SFmode, DFmode and XFmode */
437 {4, 4, 6}, /* cost of loading integer registers */
438 2, /* cost of moving MMX register */
439 {2, 2}, /* cost of loading MMX registers
440 in SImode and DImode */
441 {2, 2}, /* cost of storing MMX registers
442 in SImode and DImode */
443 12, /* cost of moving SSE register */
444 {12, 12, 12}, /* cost of loading SSE registers
445 in SImode, DImode and TImode */
446 {2, 2, 8}, /* cost of storing SSE registers
447 in SImode, DImode and TImode */
448 10, /* MMX or SSE register to integer */
449 64, /* size of prefetch block */
450 6, /* number of parallel prefetches */
451 2, /* Branch cost */
452 5, /* cost of FADD and FSUB insns. */
453 7, /* cost of FMUL instruction. */
454 43, /* cost of FDIV instruction. */
455 2, /* cost of FABS instruction. */
456 2, /* cost of FCHS instruction. */
457 43, /* cost of FSQRT instruction. */
458 };
459
460 const struct processor_costs *ix86_cost = &pentium_cost;
461
462 /* Processor feature/optimization bitmasks. */
463 #define m_386 (1<<PROCESSOR_I386)
464 #define m_486 (1<<PROCESSOR_I486)
465 #define m_PENT (1<<PROCESSOR_PENTIUM)
466 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
467 #define m_K6 (1<<PROCESSOR_K6)
468 #define m_ATHLON (1<<PROCESSOR_ATHLON)
469 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
470 #define m_K8 (1<<PROCESSOR_K8)
471 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
472
473 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
474 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
475 const int x86_zero_extend_with_and = m_486 | m_PENT;
476 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
477 const int x86_double_with_add = ~m_386;
478 const int x86_use_bit_test = m_386;
479 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
480 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
481 const int x86_3dnow_a = m_ATHLON_K8;
482 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
483 const int x86_branch_hints = m_PENT4;
484 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
485 const int x86_partial_reg_stall = m_PPRO;
486 const int x86_use_loop = m_K6;
487 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
488 const int x86_use_mov0 = m_K6;
489 const int x86_use_cltd = ~(m_PENT | m_K6);
490 const int x86_read_modify_write = ~m_PENT;
491 const int x86_read_modify = ~(m_PENT | m_PPRO);
492 const int x86_split_long_moves = m_PPRO;
493 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
494 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
495 const int x86_single_stringop = m_386 | m_PENT4;
496 const int x86_qimode_math = ~(0);
497 const int x86_promote_qi_regs = 0;
498 const int x86_himode_math = ~(m_PPRO);
499 const int x86_promote_hi_regs = m_PPRO;
500 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
501 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
502 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
503 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
504 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
505 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
506 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
507 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
508 const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
509 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
510 const int x86_decompose_lea = m_PENT4;
511 const int x86_shift1 = ~m_486;
512 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
513 const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
514 /* Set for machines where the type and dependencies are resolved on SSE register
515 parts instead of whole registers, so we may maintain just lower part of
516 scalar values in proper format leaving the upper part undefined. */
517 const int x86_sse_partial_regs = m_ATHLON_K8;
518 /* Athlon optimizes partial-register FPS special case, thus avoiding the
519 need for extra instructions beforehand */
520 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
521 const int x86_sse_typeless_stores = m_ATHLON_K8;
522 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
523 const int x86_use_ffreep = m_ATHLON_K8;
524 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
525 const int x86_inter_unit_moves = ~(m_ATHLON_K8);
526 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_PPRO;
527
528 /* In case the average insn count for single function invocation is
529 lower than this constant, emit fast (but longer) prologue and
530 epilogue code. */
531 #define FAST_PROLOGUE_INSN_COUNT 20
532
533 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
534 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
535 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
536 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
537
538 /* Array of the smallest class containing reg number REGNO, indexed by
539 REGNO. Used by REGNO_REG_CLASS in i386.h. */
540
541 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
542 {
543 /* ax, dx, cx, bx */
544 AREG, DREG, CREG, BREG,
545 /* si, di, bp, sp */
546 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
547 /* FP registers */
548 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
549 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
550 /* arg pointer */
551 NON_Q_REGS,
552 /* flags, fpsr, dirflag, frame */
553 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
554 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
555 SSE_REGS, SSE_REGS,
556 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
557 MMX_REGS, MMX_REGS,
558 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
559 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
560 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
561 SSE_REGS, SSE_REGS,
562 };
563
564 /* The "default" register map used in 32bit mode. */
565
566 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
567 {
568 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
569 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
570 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
571 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
572 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
573 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
574 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
575 };
576
577 static int const x86_64_int_parameter_registers[6] =
578 {
579 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
580 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
581 };
582
583 static int const x86_64_int_return_registers[4] =
584 {
585 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
586 };
587
588 /* The "default" register map used in 64bit mode. */
589 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
590 {
591 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
592 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
593 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
594 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
595 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
596 8,9,10,11,12,13,14,15, /* extended integer registers */
597 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
598 };
599
600 /* Define the register numbers to be used in Dwarf debugging information.
601 The SVR4 reference port C compiler uses the following register numbers
602 in its Dwarf output code:
603 0 for %eax (gcc regno = 0)
604 1 for %ecx (gcc regno = 2)
605 2 for %edx (gcc regno = 1)
606 3 for %ebx (gcc regno = 3)
607 4 for %esp (gcc regno = 7)
608 5 for %ebp (gcc regno = 6)
609 6 for %esi (gcc regno = 4)
610 7 for %edi (gcc regno = 5)
611 The following three DWARF register numbers are never generated by
612 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
613 believes these numbers have these meanings.
614 8 for %eip (no gcc equivalent)
615 9 for %eflags (gcc regno = 17)
616 10 for %trapno (no gcc equivalent)
617 It is not at all clear how we should number the FP stack registers
618 for the x86 architecture. If the version of SDB on x86/svr4 were
619 a bit less brain dead with respect to floating-point then we would
620 have a precedent to follow with respect to DWARF register numbers
621 for x86 FP registers, but the SDB on x86/svr4 is so completely
622 broken with respect to FP registers that it is hardly worth thinking
623 of it as something to strive for compatibility with.
624 The version of x86/svr4 SDB I have at the moment does (partially)
625 seem to believe that DWARF register number 11 is associated with
626 the x86 register %st(0), but that's about all. Higher DWARF
627 register numbers don't seem to be associated with anything in
628 particular, and even for DWARF regno 11, SDB only seems to under-
629 stand that it should say that a variable lives in %st(0) (when
630 asked via an `=' command) if we said it was in DWARF regno 11,
631 but SDB still prints garbage when asked for the value of the
632 variable in question (via a `/' command).
633 (Also note that the labels SDB prints for various FP stack regs
634 when doing an `x' command are all wrong.)
635 Note that these problems generally don't affect the native SVR4
636 C compiler because it doesn't allow the use of -O with -g and
637 because when it is *not* optimizing, it allocates a memory
638 location for each floating-point variable, and the memory
639 location is what gets described in the DWARF AT_location
640 attribute for the variable in question.
641 Regardless of the severe mental illness of the x86/svr4 SDB, we
642 do something sensible here and we use the following DWARF
643 register numbers. Note that these are all stack-top-relative
644 numbers.
645 11 for %st(0) (gcc regno = 8)
646 12 for %st(1) (gcc regno = 9)
647 13 for %st(2) (gcc regno = 10)
648 14 for %st(3) (gcc regno = 11)
649 15 for %st(4) (gcc regno = 12)
650 16 for %st(5) (gcc regno = 13)
651 17 for %st(6) (gcc regno = 14)
652 18 for %st(7) (gcc regno = 15)
653 */
654 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
655 {
656 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
657 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
658 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
659 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
660 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
661 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
662 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
663 };
664
665 /* Test and compare insns in i386.md store the information needed to
666 generate branch and scc insns here. */
667
668 rtx ix86_compare_op0 = NULL_RTX;
669 rtx ix86_compare_op1 = NULL_RTX;
670
671 #define MAX_386_STACK_LOCALS 3
672 /* Size of the register save area. */
673 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
674
675 /* Define the structure for the machine field in struct function. */
676
677 struct stack_local_entry GTY(())
678 {
679 unsigned short mode;
680 unsigned short n;
681 rtx rtl;
682 struct stack_local_entry *next;
683 };
684
685 /* Structure describing stack frame layout.
686 Stack grows downward:
687
688 [arguments]
689 <- ARG_POINTER
690 saved pc
691
692 saved frame pointer if frame_pointer_needed
693 <- HARD_FRAME_POINTER
694 [saved regs]
695
696 [padding1] \
697 )
698 [va_arg registers] (
699 > to_allocate <- FRAME_POINTER
700 [frame] (
701 )
702 [padding2] /
703 */
704 struct ix86_frame
705 {
706 int nregs;
707 int padding1;
708 int va_arg_size;
709 HOST_WIDE_INT frame;
710 int padding2;
711 int outgoing_arguments_size;
712 int red_zone_size;
713
714 HOST_WIDE_INT to_allocate;
715 /* The offsets relative to ARG_POINTER. */
716 HOST_WIDE_INT frame_pointer_offset;
717 HOST_WIDE_INT hard_frame_pointer_offset;
718 HOST_WIDE_INT stack_pointer_offset;
719
720 /* When save_regs_using_mov is set, emit prologue using
721 move instead of push instructions. */
722 bool save_regs_using_mov;
723 };
724
725 /* Used to enable/disable debugging features. */
726 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
727 /* Code model option as passed by user. */
728 const char *ix86_cmodel_string;
729 /* Parsed value. */
730 enum cmodel ix86_cmodel;
731 /* Asm dialect. */
732 const char *ix86_asm_string;
733 enum asm_dialect ix86_asm_dialect = ASM_ATT;
734 /* TLS dialext. */
735 const char *ix86_tls_dialect_string;
736 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
737
738 /* Which unit we are generating floating point math for. */
739 enum fpmath_unit ix86_fpmath;
740
741 /* Which cpu are we scheduling for. */
742 enum processor_type ix86_tune;
743 /* Which instruction set architecture to use. */
744 enum processor_type ix86_arch;
745
746 /* Strings to hold which cpu and instruction set architecture to use. */
747 const char *ix86_tune_string; /* for -mtune=<xxx> */
748 const char *ix86_arch_string; /* for -march=<xxx> */
749 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
750
751 /* # of registers to use to pass arguments. */
752 const char *ix86_regparm_string;
753
754 /* true if sse prefetch instruction is not NOOP. */
755 int x86_prefetch_sse;
756
757 /* ix86_regparm_string as a number */
758 int ix86_regparm;
759
760 /* Alignment to use for loops and jumps: */
761
762 /* Power of two alignment for loops. */
763 const char *ix86_align_loops_string;
764
765 /* Power of two alignment for non-loop jumps. */
766 const char *ix86_align_jumps_string;
767
768 /* Power of two alignment for stack boundary in bytes. */
769 const char *ix86_preferred_stack_boundary_string;
770
771 /* Preferred alignment for stack boundary in bits. */
772 int ix86_preferred_stack_boundary;
773
774 /* Values 1-5: see jump.c */
775 int ix86_branch_cost;
776 const char *ix86_branch_cost_string;
777
778 /* Power of two alignment for functions. */
779 const char *ix86_align_funcs_string;
780
781 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
782 static char internal_label_prefix[16];
783 static int internal_label_prefix_len;
784
785 static int local_symbolic_operand (rtx, enum machine_mode);
786 static int tls_symbolic_operand_1 (rtx, enum tls_model);
787 static void output_pic_addr_const (FILE *, rtx, int);
788 static void put_condition_code (enum rtx_code, enum machine_mode,
789 int, int, FILE *);
790 static const char *get_some_local_dynamic_name (void);
791 static int get_some_local_dynamic_name_1 (rtx *, void *);
792 static rtx maybe_get_pool_constant (rtx);
793 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
794 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
795 rtx *);
796 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
797 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
798 enum machine_mode);
799 static rtx get_thread_pointer (int);
800 static rtx legitimize_tls_address (rtx, enum tls_model, int);
801 static void get_pc_thunk_name (char [32], unsigned int);
802 static rtx gen_push (rtx);
803 static int memory_address_length (rtx addr);
804 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
805 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
806 static enum attr_ppro_uops ix86_safe_ppro_uops (rtx);
807 static void ix86_dump_ppro_packet (FILE *);
808 static void ix86_reorder_insn (rtx *, rtx *);
809 static struct machine_function * ix86_init_machine_status (void);
810 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
811 static int ix86_nsaved_regs (void);
812 static void ix86_emit_save_regs (void);
813 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
814 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
815 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
816 static void ix86_sched_reorder_ppro (rtx *, rtx *);
817 static HOST_WIDE_INT ix86_GOT_alias_set (void);
818 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
819 static rtx ix86_expand_aligntest (rtx, int);
820 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
821 static int ix86_issue_rate (void);
822 static int ix86_adjust_cost (rtx, rtx, rtx, int);
823 static void ix86_sched_init (FILE *, int, int);
824 static int ix86_sched_reorder (FILE *, int, rtx *, int *, int);
825 static int ix86_variable_issue (FILE *, int, rtx, int);
826 static int ia32_use_dfa_pipeline_interface (void);
827 static int ia32_multipass_dfa_lookahead (void);
828 static void ix86_init_mmx_sse_builtins (void);
829 static rtx x86_this_parameter (tree);
830 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
831 HOST_WIDE_INT, tree);
832 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
833 static void x86_file_start (void);
834 static void ix86_reorg (void);
835 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
836 static tree ix86_build_builtin_va_list (void);
837
838 struct ix86_address
839 {
840 rtx base, index, disp;
841 HOST_WIDE_INT scale;
842 enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS } seg;
843 };
844
845 static int ix86_decompose_address (rtx, struct ix86_address *);
846 static int ix86_address_cost (rtx);
847 static bool ix86_cannot_force_const_mem (rtx);
848 static rtx ix86_delegitimize_address (rtx);
849
850 struct builtin_description;
851 static rtx ix86_expand_sse_comi (const struct builtin_description *,
852 tree, rtx);
853 static rtx ix86_expand_sse_compare (const struct builtin_description *,
854 tree, rtx);
855 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
856 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
857 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
858 static rtx ix86_expand_store_builtin (enum insn_code, tree);
859 static rtx safe_vector_operand (rtx, enum machine_mode);
860 static enum rtx_code ix86_fp_compare_code_to_integer (enum rtx_code);
861 static void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *,
862 enum rtx_code *, enum rtx_code *);
863 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
864 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
865 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
866 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
867 static int ix86_fp_comparison_cost (enum rtx_code code);
868 static unsigned int ix86_select_alt_pic_regnum (void);
869 static int ix86_save_reg (unsigned int, int);
870 static void ix86_compute_frame_layout (struct ix86_frame *);
871 static int ix86_comp_type_attributes (tree, tree);
872 static int ix86_function_regparm (tree, tree);
873 const struct attribute_spec ix86_attribute_table[];
874 static bool ix86_function_ok_for_sibcall (tree, tree);
875 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
876 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
877 static int ix86_value_regno (enum machine_mode);
878 static bool contains_128bit_aligned_vector_p (tree);
879 static bool ix86_ms_bitfield_layout_p (tree);
880 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
881 static int extended_reg_mentioned_1 (rtx *, void *);
882 static bool ix86_rtx_costs (rtx, int, int, int *);
883 static int min_insn_size (rtx);
884 static void k8_avoid_jump_misspredicts (void);
885
886 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
887 static void ix86_svr3_asm_out_constructor (rtx, int);
888 #endif
889
890 /* Register class used for passing given 64bit part of the argument.
891 These represent classes as documented by the PS ABI, with the exception
892 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
893 use SF or DFmode move instead of DImode to avoid reformatting penalties.
894
895 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
896 whenever possible (upper half does contain padding).
897 */
898 enum x86_64_reg_class
899 {
900 X86_64_NO_CLASS,
901 X86_64_INTEGER_CLASS,
902 X86_64_INTEGERSI_CLASS,
903 X86_64_SSE_CLASS,
904 X86_64_SSESF_CLASS,
905 X86_64_SSEDF_CLASS,
906 X86_64_SSEUP_CLASS,
907 X86_64_X87_CLASS,
908 X86_64_X87UP_CLASS,
909 X86_64_MEMORY_CLASS
910 };
911 static const char * const x86_64_reg_class_name[] =
912 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
913
914 #define MAX_CLASSES 4
915 static int classify_argument (enum machine_mode, tree,
916 enum x86_64_reg_class [MAX_CLASSES], int);
917 static int examine_argument (enum machine_mode, tree, int, int *, int *);
918 static rtx construct_container (enum machine_mode, tree, int, int, int,
919 const int *, int);
920 static enum x86_64_reg_class merge_classes (enum x86_64_reg_class,
921 enum x86_64_reg_class);
922
923 /* Table of constants used by fldpi, fldln2, etc.... */
924 static REAL_VALUE_TYPE ext_80387_constants_table [5];
925 static bool ext_80387_constants_init = 0;
926 static void init_ext_80387_constants (void);
927
928 /* Initialize the GCC target structure. */
929 #undef TARGET_ATTRIBUTE_TABLE
930 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
931 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
932 # undef TARGET_MERGE_DECL_ATTRIBUTES
933 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
934 #endif
935
936 #undef TARGET_COMP_TYPE_ATTRIBUTES
937 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
938
939 #undef TARGET_INIT_BUILTINS
940 #define TARGET_INIT_BUILTINS ix86_init_builtins
941
942 #undef TARGET_EXPAND_BUILTIN
943 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
944
945 #undef TARGET_ASM_FUNCTION_EPILOGUE
946 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
947
948 #undef TARGET_ASM_OPEN_PAREN
949 #define TARGET_ASM_OPEN_PAREN ""
950 #undef TARGET_ASM_CLOSE_PAREN
951 #define TARGET_ASM_CLOSE_PAREN ""
952
953 #undef TARGET_ASM_ALIGNED_HI_OP
954 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
955 #undef TARGET_ASM_ALIGNED_SI_OP
956 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
957 #ifdef ASM_QUAD
958 #undef TARGET_ASM_ALIGNED_DI_OP
959 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
960 #endif
961
962 #undef TARGET_ASM_UNALIGNED_HI_OP
963 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
964 #undef TARGET_ASM_UNALIGNED_SI_OP
965 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
966 #undef TARGET_ASM_UNALIGNED_DI_OP
967 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
968
969 #undef TARGET_SCHED_ADJUST_COST
970 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
971 #undef TARGET_SCHED_ISSUE_RATE
972 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
973 #undef TARGET_SCHED_VARIABLE_ISSUE
974 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
975 #undef TARGET_SCHED_INIT
976 #define TARGET_SCHED_INIT ix86_sched_init
977 #undef TARGET_SCHED_REORDER
978 #define TARGET_SCHED_REORDER ix86_sched_reorder
979 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
980 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
981 ia32_use_dfa_pipeline_interface
982 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
983 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
984 ia32_multipass_dfa_lookahead
985
986 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
987 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
988
989 #ifdef HAVE_AS_TLS
990 #undef TARGET_HAVE_TLS
991 #define TARGET_HAVE_TLS true
992 #endif
993 #undef TARGET_CANNOT_FORCE_CONST_MEM
994 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
995
996 #undef TARGET_DELEGITIMIZE_ADDRESS
997 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
998
999 #undef TARGET_MS_BITFIELD_LAYOUT_P
1000 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1001
1002 #undef TARGET_ASM_OUTPUT_MI_THUNK
1003 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1004 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1005 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1006
1007 #undef TARGET_ASM_FILE_START
1008 #define TARGET_ASM_FILE_START x86_file_start
1009
1010 #undef TARGET_RTX_COSTS
1011 #define TARGET_RTX_COSTS ix86_rtx_costs
1012 #undef TARGET_ADDRESS_COST
1013 #define TARGET_ADDRESS_COST ix86_address_cost
1014
1015 #undef TARGET_FIXED_CONDITION_CODE_REGS
1016 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1017 #undef TARGET_CC_MODES_COMPATIBLE
1018 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1019
1020 #undef TARGET_MACHINE_DEPENDENT_REORG
1021 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1022
1023 #undef TARGET_BUILD_BUILTIN_VA_LIST
1024 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1025
1026 struct gcc_target targetm = TARGET_INITIALIZER;
1027
1028 /* The svr4 ABI for the i386 says that records and unions are returned
1029 in memory. */
1030 #ifndef DEFAULT_PCC_STRUCT_RETURN
1031 #define DEFAULT_PCC_STRUCT_RETURN 1
1032 #endif
1033
1034 /* Sometimes certain combinations of command options do not make
1035 sense on a particular target machine. You can define a macro
1036 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1037 defined, is executed once just after all the command options have
1038 been parsed.
1039
1040 Don't use this macro to turn on various extra optimizations for
1041 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1042
1043 void
override_options(void)1044 override_options (void)
1045 {
1046 int i;
1047 /* Comes from final.c -- no real reason to change it. */
1048 #define MAX_CODE_ALIGN 16
1049
1050 static struct ptt
1051 {
1052 const struct processor_costs *cost; /* Processor costs */
1053 const int target_enable; /* Target flags to enable. */
1054 const int target_disable; /* Target flags to disable. */
1055 const int align_loop; /* Default alignments. */
1056 const int align_loop_max_skip;
1057 const int align_jump;
1058 const int align_jump_max_skip;
1059 const int align_func;
1060 }
1061 const processor_target_table[PROCESSOR_max] =
1062 {
1063 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1064 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1065 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1066 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1067 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1068 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1069 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1070 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
1071 };
1072
1073 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1074 static struct pta
1075 {
1076 const char *const name; /* processor name or nickname. */
1077 const enum processor_type processor;
1078 const enum pta_flags
1079 {
1080 PTA_SSE = 1,
1081 PTA_SSE2 = 2,
1082 PTA_SSE3 = 4,
1083 PTA_MMX = 8,
1084 PTA_PREFETCH_SSE = 16,
1085 PTA_3DNOW = 32,
1086 PTA_3DNOW_A = 64,
1087 PTA_64BIT = 128
1088 } flags;
1089 }
1090 const processor_alias_table[] =
1091 {
1092 {"i386", PROCESSOR_I386, 0},
1093 {"i486", PROCESSOR_I486, 0},
1094 {"i586", PROCESSOR_PENTIUM, 0},
1095 {"pentium", PROCESSOR_PENTIUM, 0},
1096 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1097 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1098 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1099 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1100 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1101 {"i686", PROCESSOR_PENTIUMPRO, 0},
1102 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1103 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1104 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1105 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1106 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1107 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1108 | PTA_MMX | PTA_PREFETCH_SSE},
1109 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1110 | PTA_MMX | PTA_PREFETCH_SSE},
1111 {"prescott", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 | PTA_SSE3
1112 | PTA_MMX | PTA_PREFETCH_SSE},
1113 {"nocona", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1114 | PTA_MMX | PTA_PREFETCH_SSE},
1115 {"k6", PROCESSOR_K6, PTA_MMX},
1116 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1117 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1118 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1119 | PTA_3DNOW_A},
1120 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1121 | PTA_3DNOW | PTA_3DNOW_A},
1122 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1123 | PTA_3DNOW_A | PTA_SSE},
1124 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1125 | PTA_3DNOW_A | PTA_SSE},
1126 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1127 | PTA_3DNOW_A | PTA_SSE},
1128 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1129 | PTA_SSE | PTA_SSE2 },
1130 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1131 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1132 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1133 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1134 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1135 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1136 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1137 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1138 };
1139
1140 int const pta_size = ARRAY_SIZE (processor_alias_table);
1141
1142 /* Set the default values for switches whose default depends on TARGET_64BIT
1143 in case they weren't overwritten by command line options. */
1144 if (TARGET_64BIT)
1145 {
1146 if (flag_omit_frame_pointer == 2)
1147 flag_omit_frame_pointer = 1;
1148 if (flag_asynchronous_unwind_tables == 2)
1149 flag_asynchronous_unwind_tables = 1;
1150 if (flag_pcc_struct_return == 2)
1151 flag_pcc_struct_return = 0;
1152 }
1153 else
1154 {
1155 if (flag_omit_frame_pointer == 2)
1156 flag_omit_frame_pointer = 0;
1157 if (flag_asynchronous_unwind_tables == 2)
1158 flag_asynchronous_unwind_tables = 0;
1159 if (flag_pcc_struct_return == 2)
1160 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1161 }
1162
1163 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1164 SUBTARGET_OVERRIDE_OPTIONS;
1165 #endif
1166
1167 if (!ix86_tune_string && ix86_arch_string)
1168 ix86_tune_string = ix86_arch_string;
1169 if (!ix86_tune_string)
1170 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1171 if (!ix86_arch_string)
1172 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1173
1174 if (ix86_cmodel_string != 0)
1175 {
1176 if (!strcmp (ix86_cmodel_string, "small"))
1177 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1178 else if (flag_pic)
1179 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1180 else if (!strcmp (ix86_cmodel_string, "32"))
1181 ix86_cmodel = CM_32;
1182 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1183 ix86_cmodel = CM_KERNEL;
1184 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1185 ix86_cmodel = CM_MEDIUM;
1186 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1187 ix86_cmodel = CM_LARGE;
1188 else
1189 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1190 }
1191 else
1192 {
1193 ix86_cmodel = CM_32;
1194 if (TARGET_64BIT)
1195 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1196 }
1197 if (ix86_asm_string != 0)
1198 {
1199 if (!strcmp (ix86_asm_string, "intel"))
1200 ix86_asm_dialect = ASM_INTEL;
1201 else if (!strcmp (ix86_asm_string, "att"))
1202 ix86_asm_dialect = ASM_ATT;
1203 else
1204 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1205 }
1206 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1207 error ("code model `%s' not supported in the %s bit mode",
1208 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1209 if (ix86_cmodel == CM_LARGE)
1210 sorry ("code model `large' not supported yet");
1211 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1212 sorry ("%i-bit mode not compiled in",
1213 (target_flags & MASK_64BIT) ? 64 : 32);
1214
1215 for (i = 0; i < pta_size; i++)
1216 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1217 {
1218 ix86_arch = processor_alias_table[i].processor;
1219 /* Default cpu tuning to the architecture. */
1220 ix86_tune = ix86_arch;
1221 if (processor_alias_table[i].flags & PTA_MMX
1222 && !(target_flags_explicit & MASK_MMX))
1223 target_flags |= MASK_MMX;
1224 if (processor_alias_table[i].flags & PTA_3DNOW
1225 && !(target_flags_explicit & MASK_3DNOW))
1226 target_flags |= MASK_3DNOW;
1227 if (processor_alias_table[i].flags & PTA_3DNOW_A
1228 && !(target_flags_explicit & MASK_3DNOW_A))
1229 target_flags |= MASK_3DNOW_A;
1230 if (processor_alias_table[i].flags & PTA_SSE
1231 && !(target_flags_explicit & MASK_SSE))
1232 target_flags |= MASK_SSE;
1233 if (processor_alias_table[i].flags & PTA_SSE2
1234 && !(target_flags_explicit & MASK_SSE2))
1235 target_flags |= MASK_SSE2;
1236 if (processor_alias_table[i].flags & PTA_SSE3
1237 && !(target_flags_explicit & MASK_SSE3))
1238 target_flags |= MASK_SSE3;
1239 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1240 x86_prefetch_sse = true;
1241 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1242 error ("CPU you selected does not support x86-64 instruction set");
1243 break;
1244 }
1245
1246 if (i == pta_size)
1247 error ("bad value (%s) for -march= switch", ix86_arch_string);
1248
1249 for (i = 0; i < pta_size; i++)
1250 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1251 {
1252 ix86_tune = processor_alias_table[i].processor;
1253 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1254 error ("CPU you selected does not support x86-64 instruction set");
1255
1256 /* Intel CPUs have always interpreted SSE prefetch instructions as
1257 NOPs; so, we can enable SSE prefetch instructions even when
1258 -mtune (rather than -march) points us to a processor that has them.
1259 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1260 higher processors. */
1261 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1262 x86_prefetch_sse = true;
1263 break;
1264 }
1265 if (i == pta_size)
1266 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1267
1268 if (optimize_size)
1269 ix86_cost = &size_cost;
1270 else
1271 ix86_cost = processor_target_table[ix86_tune].cost;
1272 target_flags |= processor_target_table[ix86_tune].target_enable;
1273 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1274
1275 /* Arrange to set up i386_stack_locals for all functions. */
1276 init_machine_status = ix86_init_machine_status;
1277
1278 /* Validate -mregparm= value. */
1279 if (ix86_regparm_string)
1280 {
1281 i = atoi (ix86_regparm_string);
1282 if (i < 0 || i > REGPARM_MAX)
1283 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1284 else
1285 ix86_regparm = i;
1286 }
1287 else
1288 if (TARGET_64BIT)
1289 ix86_regparm = REGPARM_MAX;
1290
1291 /* If the user has provided any of the -malign-* options,
1292 warn and use that value only if -falign-* is not set.
1293 Remove this code in GCC 3.2 or later. */
1294 if (ix86_align_loops_string)
1295 {
1296 warning ("-malign-loops is obsolete, use -falign-loops");
1297 if (align_loops == 0)
1298 {
1299 i = atoi (ix86_align_loops_string);
1300 if (i < 0 || i > MAX_CODE_ALIGN)
1301 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1302 else
1303 align_loops = 1 << i;
1304 }
1305 }
1306
1307 if (ix86_align_jumps_string)
1308 {
1309 warning ("-malign-jumps is obsolete, use -falign-jumps");
1310 if (align_jumps == 0)
1311 {
1312 i = atoi (ix86_align_jumps_string);
1313 if (i < 0 || i > MAX_CODE_ALIGN)
1314 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1315 else
1316 align_jumps = 1 << i;
1317 }
1318 }
1319
1320 if (ix86_align_funcs_string)
1321 {
1322 warning ("-malign-functions is obsolete, use -falign-functions");
1323 if (align_functions == 0)
1324 {
1325 i = atoi (ix86_align_funcs_string);
1326 if (i < 0 || i > MAX_CODE_ALIGN)
1327 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1328 else
1329 align_functions = 1 << i;
1330 }
1331 }
1332
1333 /* Default align_* from the processor table. */
1334 if (align_loops == 0)
1335 {
1336 align_loops = processor_target_table[ix86_tune].align_loop;
1337 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1338 }
1339 if (align_jumps == 0)
1340 {
1341 align_jumps = processor_target_table[ix86_tune].align_jump;
1342 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1343 }
1344 if (align_functions == 0)
1345 {
1346 align_functions = processor_target_table[ix86_tune].align_func;
1347 }
1348
1349 /* Validate -mpreferred-stack-boundary= value, or provide default.
1350 The default of 128 bits is for Pentium III's SSE __m128, but we
1351 don't want additional code to keep the stack aligned when
1352 optimizing for code size. */
1353 ix86_preferred_stack_boundary = (optimize_size
1354 ? TARGET_64BIT ? 128 : 32
1355 : 128);
1356 if (ix86_preferred_stack_boundary_string)
1357 {
1358 i = atoi (ix86_preferred_stack_boundary_string);
1359 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1360 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1361 TARGET_64BIT ? 4 : 2);
1362 else
1363 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1364 }
1365
1366 /* Validate -mbranch-cost= value, or provide default. */
1367 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1368 if (ix86_branch_cost_string)
1369 {
1370 i = atoi (ix86_branch_cost_string);
1371 if (i < 0 || i > 5)
1372 error ("-mbranch-cost=%d is not between 0 and 5", i);
1373 else
1374 ix86_branch_cost = i;
1375 }
1376
1377 if (ix86_tls_dialect_string)
1378 {
1379 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1380 ix86_tls_dialect = TLS_DIALECT_GNU;
1381 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1382 ix86_tls_dialect = TLS_DIALECT_SUN;
1383 else
1384 error ("bad value (%s) for -mtls-dialect= switch",
1385 ix86_tls_dialect_string);
1386 }
1387
1388 /* Keep nonleaf frame pointers. */
1389 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1390 flag_omit_frame_pointer = 1;
1391
1392 /* If we're doing fast math, we don't care about comparison order
1393 wrt NaNs. This lets us use a shorter comparison sequence. */
1394 if (flag_unsafe_math_optimizations)
1395 target_flags &= ~MASK_IEEE_FP;
1396
1397 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1398 since the insns won't need emulation. */
1399 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1400 target_flags &= ~MASK_NO_FANCY_MATH_387;
1401
1402 /* Turn on SSE2 builtins for -msse3. */
1403 if (TARGET_SSE3)
1404 target_flags |= MASK_SSE2;
1405
1406 /* Turn on SSE builtins for -msse2. */
1407 if (TARGET_SSE2)
1408 target_flags |= MASK_SSE;
1409
1410 if (TARGET_64BIT)
1411 {
1412 if (TARGET_ALIGN_DOUBLE)
1413 error ("-malign-double makes no sense in the 64bit mode");
1414 if (TARGET_RTD)
1415 error ("-mrtd calling convention not supported in the 64bit mode");
1416 /* Enable by default the SSE and MMX builtins. */
1417 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1418 ix86_fpmath = FPMATH_SSE;
1419 }
1420 else
1421 {
1422 ix86_fpmath = FPMATH_387;
1423 /* i386 ABI does not specify red zone. It still makes sense to use it
1424 when programmer takes care to stack from being destroyed. */
1425 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1426 target_flags |= MASK_NO_RED_ZONE;
1427 }
1428
1429 if (ix86_fpmath_string != 0)
1430 {
1431 if (! strcmp (ix86_fpmath_string, "387"))
1432 ix86_fpmath = FPMATH_387;
1433 else if (! strcmp (ix86_fpmath_string, "sse"))
1434 {
1435 if (!TARGET_SSE)
1436 {
1437 warning ("SSE instruction set disabled, using 387 arithmetics");
1438 ix86_fpmath = FPMATH_387;
1439 }
1440 else
1441 ix86_fpmath = FPMATH_SSE;
1442 }
1443 else if (! strcmp (ix86_fpmath_string, "387,sse")
1444 || ! strcmp (ix86_fpmath_string, "sse,387"))
1445 {
1446 if (!TARGET_SSE)
1447 {
1448 warning ("SSE instruction set disabled, using 387 arithmetics");
1449 ix86_fpmath = FPMATH_387;
1450 }
1451 else if (!TARGET_80387)
1452 {
1453 warning ("387 instruction set disabled, using SSE arithmetics");
1454 ix86_fpmath = FPMATH_SSE;
1455 }
1456 else
1457 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1458 }
1459 else
1460 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1461 }
1462
1463 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1464 on by -msse. */
1465 if (TARGET_SSE)
1466 {
1467 target_flags |= MASK_MMX;
1468 x86_prefetch_sse = true;
1469 }
1470
1471 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1472 if (TARGET_3DNOW)
1473 {
1474 target_flags |= MASK_MMX;
1475 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1476 extensions it adds. */
1477 if (x86_3dnow_a & (1 << ix86_arch))
1478 target_flags |= MASK_3DNOW_A;
1479 }
1480 if ((x86_accumulate_outgoing_args & TUNEMASK)
1481 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1482 && !optimize_size)
1483 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1484
1485 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1486 {
1487 char *p;
1488 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1489 p = strchr (internal_label_prefix, 'X');
1490 internal_label_prefix_len = p - internal_label_prefix;
1491 *p = '\0';
1492 }
1493 }
1494
1495 void
optimization_options(int level,int size ATTRIBUTE_UNUSED)1496 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1497 {
1498 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1499 make the problem with not enough registers even worse. */
1500 #ifdef INSN_SCHEDULING
1501 if (level > 1)
1502 flag_schedule_insns = 0;
1503 #endif
1504
1505 /* The default values of these switches depend on the TARGET_64BIT
1506 that is not known at this moment. Mark these values with 2 and
1507 let user the to override these. In case there is no command line option
1508 specifying them, we will set the defaults in override_options. */
1509 if (optimize >= 1)
1510 flag_omit_frame_pointer = 2;
1511 flag_pcc_struct_return = 2;
1512 flag_asynchronous_unwind_tables = 2;
1513 }
1514
1515 /* Table of valid machine attributes. */
1516 const struct attribute_spec ix86_attribute_table[] =
1517 {
1518 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1519 /* Stdcall attribute says callee is responsible for popping arguments
1520 if they are not variable. */
1521 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1522 /* Fastcall attribute says callee is responsible for popping arguments
1523 if they are not variable. */
1524 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1525 /* Cdecl attribute says the callee is a normal C declaration */
1526 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1527 /* Regparm attribute specifies how many integer arguments are to be
1528 passed in registers. */
1529 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1530 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1531 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1532 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1533 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1534 #endif
1535 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1536 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1537 { NULL, 0, 0, false, false, false, NULL }
1538 };
1539
1540 /* Decide whether we can make a sibling call to a function. DECL is the
1541 declaration of the function being targeted by the call and EXP is the
1542 CALL_EXPR representing the call. */
1543
1544 static bool
ix86_function_ok_for_sibcall(tree decl,tree exp)1545 ix86_function_ok_for_sibcall (tree decl, tree exp)
1546 {
1547 /* If we are generating position-independent code, we cannot sibcall
1548 optimize any indirect call, or a direct call to a global function,
1549 as the PLT requires %ebx be live. */
1550 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1551 return false;
1552
1553 /* If we are returning floats on the 80387 register stack, we cannot
1554 make a sibcall from a function that doesn't return a float to a
1555 function that does or, conversely, from a function that does return
1556 a float to a function that doesn't; the necessary stack adjustment
1557 would not be executed. */
1558 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1559 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1560 return false;
1561
1562 /* If this call is indirect, we'll need to be able to use a call-clobbered
1563 register for the address of the target function. Make sure that all
1564 such registers are not used for passing parameters. */
1565 if (!decl && !TARGET_64BIT)
1566 {
1567 tree type;
1568
1569 /* We're looking at the CALL_EXPR, we need the type of the function. */
1570 type = TREE_OPERAND (exp, 0); /* pointer expression */
1571 type = TREE_TYPE (type); /* pointer type */
1572 type = TREE_TYPE (type); /* function type */
1573
1574 if (ix86_function_regparm (type, NULL) >= 3)
1575 {
1576 /* ??? Need to count the actual number of registers to be used,
1577 not the possible number of registers. Fix later. */
1578 return false;
1579 }
1580 }
1581
1582 /* Otherwise okay. That also includes certain types of indirect calls. */
1583 return true;
1584 }
1585
1586 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1587 arguments as in struct attribute_spec.handler. */
1588 static tree
ix86_handle_cdecl_attribute(tree * node,tree name,tree args ATTRIBUTE_UNUSED,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)1589 ix86_handle_cdecl_attribute (tree *node, tree name,
1590 tree args ATTRIBUTE_UNUSED,
1591 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1592 {
1593 if (TREE_CODE (*node) != FUNCTION_TYPE
1594 && TREE_CODE (*node) != METHOD_TYPE
1595 && TREE_CODE (*node) != FIELD_DECL
1596 && TREE_CODE (*node) != TYPE_DECL)
1597 {
1598 warning ("`%s' attribute only applies to functions",
1599 IDENTIFIER_POINTER (name));
1600 *no_add_attrs = true;
1601 }
1602 else
1603 {
1604 if (is_attribute_p ("fastcall", name))
1605 {
1606 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1607 {
1608 error ("fastcall and stdcall attributes are not compatible");
1609 }
1610 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1611 {
1612 error ("fastcall and regparm attributes are not compatible");
1613 }
1614 }
1615 else if (is_attribute_p ("stdcall", name))
1616 {
1617 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1618 {
1619 error ("fastcall and stdcall attributes are not compatible");
1620 }
1621 }
1622 }
1623
1624 if (TARGET_64BIT)
1625 {
1626 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1627 *no_add_attrs = true;
1628 }
1629
1630 return NULL_TREE;
1631 }
1632
1633 /* Handle a "regparm" attribute;
1634 arguments as in struct attribute_spec.handler. */
1635 static tree
ix86_handle_regparm_attribute(tree * node,tree name,tree args,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)1636 ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1637 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1638 {
1639 if (TREE_CODE (*node) != FUNCTION_TYPE
1640 && TREE_CODE (*node) != METHOD_TYPE
1641 && TREE_CODE (*node) != FIELD_DECL
1642 && TREE_CODE (*node) != TYPE_DECL)
1643 {
1644 warning ("`%s' attribute only applies to functions",
1645 IDENTIFIER_POINTER (name));
1646 *no_add_attrs = true;
1647 }
1648 else
1649 {
1650 tree cst;
1651
1652 cst = TREE_VALUE (args);
1653 if (TREE_CODE (cst) != INTEGER_CST)
1654 {
1655 warning ("`%s' attribute requires an integer constant argument",
1656 IDENTIFIER_POINTER (name));
1657 *no_add_attrs = true;
1658 }
1659 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1660 {
1661 warning ("argument to `%s' attribute larger than %d",
1662 IDENTIFIER_POINTER (name), REGPARM_MAX);
1663 *no_add_attrs = true;
1664 }
1665
1666 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1667 {
1668 error ("fastcall and regparm attributes are not compatible");
1669 }
1670 }
1671
1672 return NULL_TREE;
1673 }
1674
1675 /* Return 0 if the attributes for two types are incompatible, 1 if they
1676 are compatible, and 2 if they are nearly compatible (which causes a
1677 warning to be generated). */
1678
1679 static int
ix86_comp_type_attributes(tree type1,tree type2)1680 ix86_comp_type_attributes (tree type1, tree type2)
1681 {
1682 /* Check for mismatch of non-default calling convention. */
1683 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1684
1685 if (TREE_CODE (type1) != FUNCTION_TYPE)
1686 return 1;
1687
1688 /* Check for mismatched fastcall types */
1689 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1690 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1691 return 0;
1692
1693 /* Check for mismatched return types (cdecl vs stdcall). */
1694 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1695 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1696 return 0;
1697 if (ix86_function_regparm (type1, NULL)
1698 != ix86_function_regparm (type2, NULL))
1699 return 0;
1700 return 1;
1701 }
1702
1703 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1704 DECL may be NULL when calling function indirectly
1705 or considering a libcall. */
1706
1707 static int
ix86_function_regparm(tree type,tree decl)1708 ix86_function_regparm (tree type, tree decl)
1709 {
1710 tree attr;
1711 int regparm = ix86_regparm;
1712 bool user_convention = false;
1713
1714 if (!TARGET_64BIT)
1715 {
1716 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1717 if (attr)
1718 {
1719 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1720 user_convention = true;
1721 }
1722
1723 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1724 {
1725 regparm = 2;
1726 user_convention = true;
1727 }
1728
1729 /* Use register calling convention for local functions when possible. */
1730 if (!TARGET_64BIT && !user_convention && decl
1731 && flag_unit_at_a_time && !profile_flag)
1732 {
1733 struct cgraph_local_info *i = cgraph_local_info (decl);
1734 if (i && i->local)
1735 {
1736 /* We can't use regparm(3) for nested functions as these use
1737 static chain pointer in third argument. */
1738 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1739 regparm = 2;
1740 else
1741 regparm = 3;
1742 }
1743 }
1744 }
1745 return regparm;
1746 }
1747
1748 /* Return true if EAX is live at the start of the function. Used by
1749 ix86_expand_prologue to determine if we need special help before
1750 calling allocate_stack_worker. */
1751
1752 static bool
ix86_eax_live_at_start_p(void)1753 ix86_eax_live_at_start_p (void)
1754 {
1755 /* Cheat. Don't bother working forward from ix86_function_regparm
1756 to the function type to whether an actual argument is located in
1757 eax. Instead just look at cfg info, which is still close enough
1758 to correct at this point. This gives false positives for broken
1759 functions that might use uninitialized data that happens to be
1760 allocated in eax, but who cares? */
1761 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1762 }
1763
1764 /* Value is the number of bytes of arguments automatically
1765 popped when returning from a subroutine call.
1766 FUNDECL is the declaration node of the function (as a tree),
1767 FUNTYPE is the data type of the function (as a tree),
1768 or for a library call it is an identifier node for the subroutine name.
1769 SIZE is the number of bytes of arguments passed on the stack.
1770
1771 On the 80386, the RTD insn may be used to pop them if the number
1772 of args is fixed, but if the number is variable then the caller
1773 must pop them all. RTD can't be used for library calls now
1774 because the library is compiled with the Unix compiler.
1775 Use of RTD is a selectable option, since it is incompatible with
1776 standard Unix calling sequences. If the option is not selected,
1777 the caller must always pop the args.
1778
1779 The attribute stdcall is equivalent to RTD on a per module basis. */
1780
1781 int
ix86_return_pops_args(tree fundecl,tree funtype,int size)1782 ix86_return_pops_args (tree fundecl, tree funtype, int size)
1783 {
1784 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1785
1786 /* Cdecl functions override -mrtd, and never pop the stack. */
1787 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1788
1789 /* Stdcall and fastcall functions will pop the stack if not
1790 variable args. */
1791 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1792 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1793 rtd = 1;
1794
1795 if (rtd
1796 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1797 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1798 == void_type_node)))
1799 return size;
1800 }
1801
1802 /* Lose any fake structure return argument if it is passed on the stack. */
1803 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1804 && !TARGET_64BIT)
1805 {
1806 int nregs = ix86_function_regparm (funtype, fundecl);
1807
1808 if (!nregs)
1809 return GET_MODE_SIZE (Pmode);
1810 }
1811
1812 return 0;
1813 }
1814
1815 /* Argument support functions. */
1816
1817 /* Return true when register may be used to pass function parameters. */
1818 bool
ix86_function_arg_regno_p(int regno)1819 ix86_function_arg_regno_p (int regno)
1820 {
1821 int i;
1822 if (!TARGET_64BIT)
1823 return (regno < REGPARM_MAX
1824 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1825 if (SSE_REGNO_P (regno) && TARGET_SSE)
1826 return true;
1827 /* RAX is used as hidden argument to va_arg functions. */
1828 if (!regno)
1829 return true;
1830 for (i = 0; i < REGPARM_MAX; i++)
1831 if (regno == x86_64_int_parameter_registers[i])
1832 return true;
1833 return false;
1834 }
1835
1836 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1837 for a call to a function whose data type is FNTYPE.
1838 For a library call, FNTYPE is 0. */
1839
1840 void
init_cumulative_args(CUMULATIVE_ARGS * cum,tree fntype,rtx libname,tree fndecl)1841 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1842 tree fntype, /* tree ptr for function decl */
1843 rtx libname, /* SYMBOL_REF of library name or 0 */
1844 tree fndecl)
1845 {
1846 static CUMULATIVE_ARGS zero_cum;
1847 tree param, next_param;
1848
1849 if (TARGET_DEBUG_ARG)
1850 {
1851 fprintf (stderr, "\ninit_cumulative_args (");
1852 if (fntype)
1853 fprintf (stderr, "fntype code = %s, ret code = %s",
1854 tree_code_name[(int) TREE_CODE (fntype)],
1855 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1856 else
1857 fprintf (stderr, "no fntype");
1858
1859 if (libname)
1860 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1861 }
1862
1863 *cum = zero_cum;
1864
1865 /* Set up the number of registers to use for passing arguments. */
1866 if (fntype)
1867 cum->nregs = ix86_function_regparm (fntype, fndecl);
1868 else
1869 cum->nregs = ix86_regparm;
1870 cum->sse_nregs = SSE_REGPARM_MAX;
1871 cum->mmx_nregs = MMX_REGPARM_MAX;
1872 cum->warn_sse = true;
1873 cum->warn_mmx = true;
1874 cum->maybe_vaarg = false;
1875
1876 /* Use ecx and edx registers if function has fastcall attribute */
1877 if (fntype && !TARGET_64BIT)
1878 {
1879 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1880 {
1881 cum->nregs = 2;
1882 cum->fastcall = 1;
1883 }
1884 }
1885
1886
1887 /* Determine if this function has variable arguments. This is
1888 indicated by the last argument being 'void_type_mode' if there
1889 are no variable arguments. If there are variable arguments, then
1890 we won't pass anything in registers */
1891
1892 if (cum->nregs || !TARGET_MMX || !TARGET_SSE)
1893 {
1894 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1895 param != 0; param = next_param)
1896 {
1897 next_param = TREE_CHAIN (param);
1898 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1899 {
1900 if (!TARGET_64BIT)
1901 {
1902 cum->nregs = 0;
1903 cum->sse_nregs = 0;
1904 cum->mmx_nregs = 0;
1905 cum->warn_sse = 0;
1906 cum->warn_mmx = 0;
1907 cum->fastcall = 0;
1908 }
1909 cum->maybe_vaarg = true;
1910 }
1911 }
1912 }
1913 if ((!fntype && !libname)
1914 || (fntype && !TYPE_ARG_TYPES (fntype)))
1915 cum->maybe_vaarg = 1;
1916
1917 if (TARGET_DEBUG_ARG)
1918 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1919
1920 return;
1921 }
1922
1923 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1924 of this code is to classify each 8bytes of incoming argument by the register
1925 class and assign registers accordingly. */
1926
1927 /* Return the union class of CLASS1 and CLASS2.
1928 See the x86-64 PS ABI for details. */
1929
1930 static enum x86_64_reg_class
merge_classes(enum x86_64_reg_class class1,enum x86_64_reg_class class2)1931 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
1932 {
1933 /* Rule #1: If both classes are equal, this is the resulting class. */
1934 if (class1 == class2)
1935 return class1;
1936
1937 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1938 the other class. */
1939 if (class1 == X86_64_NO_CLASS)
1940 return class2;
1941 if (class2 == X86_64_NO_CLASS)
1942 return class1;
1943
1944 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1945 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1946 return X86_64_MEMORY_CLASS;
1947
1948 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1949 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1950 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1951 return X86_64_INTEGERSI_CLASS;
1952 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1953 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1954 return X86_64_INTEGER_CLASS;
1955
1956 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1957 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1958 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1959 return X86_64_MEMORY_CLASS;
1960
1961 /* Rule #6: Otherwise class SSE is used. */
1962 return X86_64_SSE_CLASS;
1963 }
1964
1965 /* Classify the argument of type TYPE and mode MODE.
1966 CLASSES will be filled by the register class used to pass each word
1967 of the operand. The number of words is returned. In case the parameter
1968 should be passed in memory, 0 is returned. As a special case for zero
1969 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1970
1971 BIT_OFFSET is used internally for handling records and specifies offset
1972 of the offset in bits modulo 256 to avoid overflow cases.
1973
1974 See the x86-64 PS ABI for details.
1975 */
1976
1977 static int
classify_argument(enum machine_mode mode,tree type,enum x86_64_reg_class classes[MAX_CLASSES],int bit_offset)1978 classify_argument (enum machine_mode mode, tree type,
1979 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
1980 {
1981 HOST_WIDE_INT bytes =
1982 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1983 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1984
1985 /* Variable sized entities are always passed/returned in memory. */
1986 if (bytes < 0)
1987 return 0;
1988
1989 if (mode != VOIDmode
1990 && MUST_PASS_IN_STACK (mode, type))
1991 return 0;
1992
1993 if (type && AGGREGATE_TYPE_P (type))
1994 {
1995 int i;
1996 tree field;
1997 enum x86_64_reg_class subclasses[MAX_CLASSES];
1998
1999 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2000 if (bytes > 16)
2001 return 0;
2002
2003 for (i = 0; i < words; i++)
2004 classes[i] = X86_64_NO_CLASS;
2005
2006 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2007 signalize memory class, so handle it as special case. */
2008 if (!words)
2009 {
2010 classes[0] = X86_64_NO_CLASS;
2011 return 1;
2012 }
2013
2014 /* Classify each field of record and merge classes. */
2015 if (TREE_CODE (type) == RECORD_TYPE)
2016 {
2017 /* For classes first merge in the field of the subclasses. */
2018 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2019 {
2020 tree bases = TYPE_BINFO_BASETYPES (type);
2021 int n_bases = TREE_VEC_LENGTH (bases);
2022 int i;
2023
2024 for (i = 0; i < n_bases; ++i)
2025 {
2026 tree binfo = TREE_VEC_ELT (bases, i);
2027 int num;
2028 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2029 tree type = BINFO_TYPE (binfo);
2030
2031 num = classify_argument (TYPE_MODE (type),
2032 type, subclasses,
2033 (offset + bit_offset) % 256);
2034 if (!num)
2035 return 0;
2036 for (i = 0; i < num; i++)
2037 {
2038 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2039 classes[i + pos] =
2040 merge_classes (subclasses[i], classes[i + pos]);
2041 }
2042 }
2043 }
2044 /* And now merge the fields of structure. */
2045 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2046 {
2047 if (TREE_CODE (field) == FIELD_DECL)
2048 {
2049 int num;
2050
2051 /* Bitfields are always classified as integer. Handle them
2052 early, since later code would consider them to be
2053 misaligned integers. */
2054 if (DECL_BIT_FIELD (field))
2055 {
2056 for (i = int_bit_position (field) / 8 / 8;
2057 i < (int_bit_position (field)
2058 + tree_low_cst (DECL_SIZE (field), 0)
2059 + 63) / 8 / 8; i++)
2060 classes[i] =
2061 merge_classes (X86_64_INTEGER_CLASS,
2062 classes[i]);
2063 }
2064 else
2065 {
2066 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2067 TREE_TYPE (field), subclasses,
2068 (int_bit_position (field)
2069 + bit_offset) % 256);
2070 if (!num)
2071 return 0;
2072 for (i = 0; i < num; i++)
2073 {
2074 int pos =
2075 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2076 classes[i + pos] =
2077 merge_classes (subclasses[i], classes[i + pos]);
2078 }
2079 }
2080 }
2081 }
2082 }
2083 /* Arrays are handled as small records. */
2084 else if (TREE_CODE (type) == ARRAY_TYPE)
2085 {
2086 int num;
2087 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2088 TREE_TYPE (type), subclasses, bit_offset);
2089 if (!num)
2090 return 0;
2091
2092 /* The partial classes are now full classes. */
2093 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2094 subclasses[0] = X86_64_SSE_CLASS;
2095 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2096 subclasses[0] = X86_64_INTEGER_CLASS;
2097
2098 for (i = 0; i < words; i++)
2099 classes[i] = subclasses[i % num];
2100 }
2101 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2102 else if (TREE_CODE (type) == UNION_TYPE
2103 || TREE_CODE (type) == QUAL_UNION_TYPE)
2104 {
2105 /* For classes first merge in the field of the subclasses. */
2106 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2107 {
2108 tree bases = TYPE_BINFO_BASETYPES (type);
2109 int n_bases = TREE_VEC_LENGTH (bases);
2110 int i;
2111
2112 for (i = 0; i < n_bases; ++i)
2113 {
2114 tree binfo = TREE_VEC_ELT (bases, i);
2115 int num;
2116 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2117 tree type = BINFO_TYPE (binfo);
2118
2119 num = classify_argument (TYPE_MODE (type),
2120 type, subclasses,
2121 (offset + (bit_offset % 64)) % 256);
2122 if (!num)
2123 return 0;
2124 for (i = 0; i < num; i++)
2125 {
2126 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2127 classes[i + pos] =
2128 merge_classes (subclasses[i], classes[i + pos]);
2129 }
2130 }
2131 }
2132 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2133 {
2134 if (TREE_CODE (field) == FIELD_DECL)
2135 {
2136 int num;
2137 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2138 TREE_TYPE (field), subclasses,
2139 bit_offset);
2140 if (!num)
2141 return 0;
2142 for (i = 0; i < num; i++)
2143 classes[i] = merge_classes (subclasses[i], classes[i]);
2144 }
2145 }
2146 }
2147 else if (TREE_CODE (type) == SET_TYPE)
2148 {
2149 if (bytes <= 4)
2150 {
2151 classes[0] = X86_64_INTEGERSI_CLASS;
2152 return 1;
2153 }
2154 else if (bytes <= 8)
2155 {
2156 classes[0] = X86_64_INTEGER_CLASS;
2157 return 1;
2158 }
2159 else if (bytes <= 12)
2160 {
2161 classes[0] = X86_64_INTEGER_CLASS;
2162 classes[1] = X86_64_INTEGERSI_CLASS;
2163 return 2;
2164 }
2165 else
2166 {
2167 classes[0] = X86_64_INTEGER_CLASS;
2168 classes[1] = X86_64_INTEGER_CLASS;
2169 return 2;
2170 }
2171 }
2172 else
2173 abort ();
2174
2175 /* Final merger cleanup. */
2176 for (i = 0; i < words; i++)
2177 {
2178 /* If one class is MEMORY, everything should be passed in
2179 memory. */
2180 if (classes[i] == X86_64_MEMORY_CLASS)
2181 return 0;
2182
2183 /* The X86_64_SSEUP_CLASS should be always preceded by
2184 X86_64_SSE_CLASS. */
2185 if (classes[i] == X86_64_SSEUP_CLASS
2186 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2187 classes[i] = X86_64_SSE_CLASS;
2188
2189 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2190 if (classes[i] == X86_64_X87UP_CLASS
2191 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2192 classes[i] = X86_64_SSE_CLASS;
2193 }
2194 return words;
2195 }
2196
2197 /* Compute alignment needed. We align all types to natural boundaries with
2198 exception of XFmode that is aligned to 64bits. */
2199 if (mode != VOIDmode && mode != BLKmode)
2200 {
2201 int mode_alignment = GET_MODE_BITSIZE (mode);
2202
2203 if (mode == XFmode)
2204 mode_alignment = 128;
2205 else if (mode == XCmode)
2206 mode_alignment = 256;
2207 if (COMPLEX_MODE_P (mode))
2208 mode_alignment /= 2;
2209 /* Misaligned fields are always returned in memory. */
2210 if (bit_offset % mode_alignment)
2211 return 0;
2212 }
2213
2214 /* Classification of atomic types. */
2215 switch (mode)
2216 {
2217 case DImode:
2218 case SImode:
2219 case HImode:
2220 case QImode:
2221 case CSImode:
2222 case CHImode:
2223 case CQImode:
2224 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2225 classes[0] = X86_64_INTEGERSI_CLASS;
2226 else
2227 classes[0] = X86_64_INTEGER_CLASS;
2228 return 1;
2229 case CDImode:
2230 case TImode:
2231 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2232 return 2;
2233 case CTImode:
2234 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2235 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2236 return 4;
2237 case SFmode:
2238 if (!(bit_offset % 64))
2239 classes[0] = X86_64_SSESF_CLASS;
2240 else
2241 classes[0] = X86_64_SSE_CLASS;
2242 return 1;
2243 case DFmode:
2244 classes[0] = X86_64_SSEDF_CLASS;
2245 return 1;
2246 case XFmode:
2247 classes[0] = X86_64_X87_CLASS;
2248 classes[1] = X86_64_X87UP_CLASS;
2249 return 2;
2250 case TFmode:
2251 case TCmode:
2252 return 0;
2253 case XCmode:
2254 classes[0] = X86_64_X87_CLASS;
2255 classes[1] = X86_64_X87UP_CLASS;
2256 classes[2] = X86_64_X87_CLASS;
2257 classes[3] = X86_64_X87UP_CLASS;
2258 return 4;
2259 case DCmode:
2260 classes[0] = X86_64_SSEDF_CLASS;
2261 classes[1] = X86_64_SSEDF_CLASS;
2262 return 2;
2263 case SCmode:
2264 classes[0] = X86_64_SSE_CLASS;
2265 return 1;
2266 case V4SFmode:
2267 case V4SImode:
2268 case V16QImode:
2269 case V8HImode:
2270 case V2DFmode:
2271 case V2DImode:
2272 classes[0] = X86_64_SSE_CLASS;
2273 classes[1] = X86_64_SSEUP_CLASS;
2274 return 2;
2275 case V2SFmode:
2276 case V2SImode:
2277 case V4HImode:
2278 case V8QImode:
2279 return 0;
2280 case BLKmode:
2281 case VOIDmode:
2282 return 0;
2283 default:
2284 abort ();
2285 }
2286 }
2287
2288 /* Examine the argument and return set number of register required in each
2289 class. Return 0 iff parameter should be passed in memory. */
2290 static int
examine_argument(enum machine_mode mode,tree type,int in_return,int * int_nregs,int * sse_nregs)2291 examine_argument (enum machine_mode mode, tree type, int in_return,
2292 int *int_nregs, int *sse_nregs)
2293 {
2294 enum x86_64_reg_class class[MAX_CLASSES];
2295 int n = classify_argument (mode, type, class, 0);
2296
2297 *int_nregs = 0;
2298 *sse_nregs = 0;
2299 if (!n)
2300 return 0;
2301 for (n--; n >= 0; n--)
2302 switch (class[n])
2303 {
2304 case X86_64_INTEGER_CLASS:
2305 case X86_64_INTEGERSI_CLASS:
2306 (*int_nregs)++;
2307 break;
2308 case X86_64_SSE_CLASS:
2309 case X86_64_SSESF_CLASS:
2310 case X86_64_SSEDF_CLASS:
2311 (*sse_nregs)++;
2312 break;
2313 case X86_64_NO_CLASS:
2314 case X86_64_SSEUP_CLASS:
2315 break;
2316 case X86_64_X87_CLASS:
2317 case X86_64_X87UP_CLASS:
2318 if (!in_return)
2319 return 0;
2320 break;
2321 case X86_64_MEMORY_CLASS:
2322 abort ();
2323 }
2324 return 1;
2325 }
2326 /* Construct container for the argument used by GCC interface. See
2327 FUNCTION_ARG for the detailed description. */
2328 static rtx
construct_container(enum machine_mode mode,tree type,int in_return,int nintregs,int nsseregs,const int * intreg,int sse_regno)2329 construct_container (enum machine_mode mode, tree type, int in_return,
2330 int nintregs, int nsseregs, const int * intreg,
2331 int sse_regno)
2332 {
2333 enum machine_mode tmpmode;
2334 int bytes =
2335 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2336 enum x86_64_reg_class class[MAX_CLASSES];
2337 int n;
2338 int i;
2339 int nexps = 0;
2340 int needed_sseregs, needed_intregs;
2341 rtx exp[MAX_CLASSES];
2342 rtx ret;
2343
2344 n = classify_argument (mode, type, class, 0);
2345 if (TARGET_DEBUG_ARG)
2346 {
2347 if (!n)
2348 fprintf (stderr, "Memory class\n");
2349 else
2350 {
2351 fprintf (stderr, "Classes:");
2352 for (i = 0; i < n; i++)
2353 {
2354 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2355 }
2356 fprintf (stderr, "\n");
2357 }
2358 }
2359 if (!n)
2360 return NULL;
2361 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2362 return NULL;
2363 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2364 return NULL;
2365
2366 /* First construct simple cases. Avoid SCmode, since we want to use
2367 single register to pass this type. */
2368 if (n == 1 && mode != SCmode)
2369 switch (class[0])
2370 {
2371 case X86_64_INTEGER_CLASS:
2372 case X86_64_INTEGERSI_CLASS:
2373 return gen_rtx_REG (mode, intreg[0]);
2374 case X86_64_SSE_CLASS:
2375 case X86_64_SSESF_CLASS:
2376 case X86_64_SSEDF_CLASS:
2377 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2378 case X86_64_X87_CLASS:
2379 return gen_rtx_REG (mode, FIRST_STACK_REG);
2380 case X86_64_NO_CLASS:
2381 /* Zero sized array, struct or class. */
2382 return NULL;
2383 default:
2384 abort ();
2385 }
2386 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
2387 && mode != BLKmode)
2388 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2389 if (n == 2
2390 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2391 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2392 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2393 && class[1] == X86_64_INTEGER_CLASS
2394 && (mode == CDImode || mode == TImode || mode == TFmode)
2395 && intreg[0] + 1 == intreg[1])
2396 return gen_rtx_REG (mode, intreg[0]);
2397 if (n == 4
2398 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2399 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS
2400 && mode != BLKmode)
2401 return gen_rtx_REG (XCmode, FIRST_STACK_REG);
2402
2403 /* Otherwise figure out the entries of the PARALLEL. */
2404 for (i = 0; i < n; i++)
2405 {
2406 switch (class[i])
2407 {
2408 case X86_64_NO_CLASS:
2409 break;
2410 case X86_64_INTEGER_CLASS:
2411 case X86_64_INTEGERSI_CLASS:
2412 /* Merge TImodes on aligned occasions here too. */
2413 if (i * 8 + 8 > bytes)
2414 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2415 else if (class[i] == X86_64_INTEGERSI_CLASS)
2416 tmpmode = SImode;
2417 else
2418 tmpmode = DImode;
2419 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2420 if (tmpmode == BLKmode)
2421 tmpmode = DImode;
2422 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2423 gen_rtx_REG (tmpmode, *intreg),
2424 GEN_INT (i*8));
2425 intreg++;
2426 break;
2427 case X86_64_SSESF_CLASS:
2428 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2429 gen_rtx_REG (SFmode,
2430 SSE_REGNO (sse_regno)),
2431 GEN_INT (i*8));
2432 sse_regno++;
2433 break;
2434 case X86_64_SSEDF_CLASS:
2435 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2436 gen_rtx_REG (DFmode,
2437 SSE_REGNO (sse_regno)),
2438 GEN_INT (i*8));
2439 sse_regno++;
2440 break;
2441 case X86_64_SSE_CLASS:
2442 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2443 tmpmode = TImode;
2444 else
2445 tmpmode = DImode;
2446 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2447 gen_rtx_REG (tmpmode,
2448 SSE_REGNO (sse_regno)),
2449 GEN_INT (i*8));
2450 if (tmpmode == TImode)
2451 i++;
2452 sse_regno++;
2453 break;
2454 default:
2455 abort ();
2456 }
2457 }
2458 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2459 for (i = 0; i < nexps; i++)
2460 XVECEXP (ret, 0, i) = exp [i];
2461 return ret;
2462 }
2463
2464 /* Update the data in CUM to advance over an argument
2465 of mode MODE and data type TYPE.
2466 (TYPE is null for libcalls where that information may not be available.) */
2467
2468 void
function_arg_advance(CUMULATIVE_ARGS * cum,enum machine_mode mode,tree type,int named)2469 function_arg_advance (CUMULATIVE_ARGS *cum, /* current arg information */
2470 enum machine_mode mode, /* current arg mode */
2471 tree type, /* type of the argument or 0 if lib support */
2472 int named) /* whether or not the argument was named */
2473 {
2474 int bytes =
2475 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2476 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2477
2478 if (TARGET_DEBUG_ARG)
2479 fprintf (stderr,
2480 "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, mode=%s, named=%d)\n\n",
2481 words, cum->words, cum->nregs, cum->sse_nregs, GET_MODE_NAME (mode), named);
2482 if (TARGET_64BIT)
2483 {
2484 int int_nregs, sse_nregs;
2485 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2486 cum->words += words;
2487 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2488 {
2489 cum->nregs -= int_nregs;
2490 cum->sse_nregs -= sse_nregs;
2491 cum->regno += int_nregs;
2492 cum->sse_regno += sse_nregs;
2493 }
2494 else
2495 cum->words += words;
2496 }
2497 else
2498 {
2499 if (TARGET_SSE && SSE_REG_MODE_P (mode)
2500 && (!type || !AGGREGATE_TYPE_P (type)))
2501 {
2502 cum->sse_words += words;
2503 cum->sse_nregs -= 1;
2504 cum->sse_regno += 1;
2505 if (cum->sse_nregs <= 0)
2506 {
2507 cum->sse_nregs = 0;
2508 cum->sse_regno = 0;
2509 }
2510 }
2511 else if (TARGET_MMX && MMX_REG_MODE_P (mode)
2512 && (!type || !AGGREGATE_TYPE_P (type)))
2513 {
2514 cum->mmx_words += words;
2515 cum->mmx_nregs -= 1;
2516 cum->mmx_regno += 1;
2517 if (cum->mmx_nregs <= 0)
2518 {
2519 cum->mmx_nregs = 0;
2520 cum->mmx_regno = 0;
2521 }
2522 }
2523 else
2524 {
2525 cum->words += words;
2526 cum->nregs -= words;
2527 cum->regno += words;
2528
2529 if (cum->nregs <= 0)
2530 {
2531 cum->nregs = 0;
2532 cum->regno = 0;
2533 }
2534 }
2535 }
2536 return;
2537 }
2538
2539 /* Define where to put the arguments to a function.
2540 Value is zero to push the argument on the stack,
2541 or a hard register in which to store the argument.
2542
2543 MODE is the argument's machine mode.
2544 TYPE is the data type of the argument (as a tree).
2545 This is null for libcalls where that information may
2546 not be available.
2547 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2548 the preceding args and about the function being called.
2549 NAMED is nonzero if this argument is a named parameter
2550 (otherwise it is an extra parameter matching an ellipsis). */
2551
2552 rtx
function_arg(CUMULATIVE_ARGS * cum,enum machine_mode mode,tree type,int named)2553 function_arg (CUMULATIVE_ARGS *cum, /* current arg information */
2554 enum machine_mode mode, /* current arg mode */
2555 tree type, /* type of the argument or 0 if lib support */
2556 int named) /* != 0 for normal args, == 0 for ... args */
2557 {
2558 rtx ret = NULL_RTX;
2559 int bytes =
2560 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2561 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2562 static bool warnedsse, warnedmmx;
2563
2564 /* Handle a hidden AL argument containing number of registers for varargs
2565 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2566 any AL settings. */
2567 if (mode == VOIDmode)
2568 {
2569 if (TARGET_64BIT)
2570 return GEN_INT (cum->maybe_vaarg
2571 ? (cum->sse_nregs < 0
2572 ? SSE_REGPARM_MAX
2573 : cum->sse_regno)
2574 : -1);
2575 else
2576 return constm1_rtx;
2577 }
2578 if (TARGET_64BIT)
2579 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2580 &x86_64_int_parameter_registers [cum->regno],
2581 cum->sse_regno);
2582 else
2583 switch (mode)
2584 {
2585 /* For now, pass fp/complex values on the stack. */
2586 default:
2587 break;
2588
2589 case BLKmode:
2590 if (bytes < 0)
2591 break;
2592 /* FALLTHRU */
2593 case DImode:
2594 case SImode:
2595 case HImode:
2596 case QImode:
2597 if (words <= cum->nregs)
2598 {
2599 int regno = cum->regno;
2600
2601 /* Fastcall allocates the first two DWORD (SImode) or
2602 smaller arguments to ECX and EDX. */
2603 if (cum->fastcall)
2604 {
2605 if (mode == BLKmode || mode == DImode)
2606 break;
2607
2608 /* ECX not EAX is the first allocated register. */
2609 if (regno == 0)
2610 regno = 2;
2611 }
2612 ret = gen_rtx_REG (mode, regno);
2613 }
2614 break;
2615 case TImode:
2616 case V16QImode:
2617 case V8HImode:
2618 case V4SImode:
2619 case V2DImode:
2620 case V4SFmode:
2621 case V2DFmode:
2622 if (!type || !AGGREGATE_TYPE_P (type))
2623 {
2624 if (!TARGET_SSE && !warnedmmx && cum->warn_sse)
2625 {
2626 warnedsse = true;
2627 warning ("SSE vector argument without SSE enabled "
2628 "changes the ABI");
2629 }
2630 if (cum->sse_nregs)
2631 ret = gen_rtx_REG (mode, cum->sse_regno + FIRST_SSE_REG);
2632 }
2633 break;
2634 case V8QImode:
2635 case V4HImode:
2636 case V2SImode:
2637 case V2SFmode:
2638 if (!type || !AGGREGATE_TYPE_P (type))
2639 {
2640 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
2641 {
2642 warnedmmx = true;
2643 warning ("MMX vector argument without MMX enabled "
2644 "changes the ABI");
2645 }
2646 if (cum->mmx_nregs)
2647 ret = gen_rtx_REG (mode, cum->mmx_regno + FIRST_MMX_REG);
2648 }
2649 break;
2650 }
2651
2652 if (TARGET_DEBUG_ARG)
2653 {
2654 fprintf (stderr,
2655 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2656 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2657
2658 if (ret)
2659 print_simple_rtl (stderr, ret);
2660 else
2661 fprintf (stderr, ", stack");
2662
2663 fprintf (stderr, " )\n");
2664 }
2665
2666 return ret;
2667 }
2668
2669 /* A C expression that indicates when an argument must be passed by
2670 reference. If nonzero for an argument, a copy of that argument is
2671 made in memory and a pointer to the argument is passed instead of
2672 the argument itself. The pointer is passed in whatever way is
2673 appropriate for passing a pointer to that type. */
2674
2675 int
function_arg_pass_by_reference(CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,enum machine_mode mode ATTRIBUTE_UNUSED,tree type,int named ATTRIBUTE_UNUSED)2676 function_arg_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2677 enum machine_mode mode ATTRIBUTE_UNUSED,
2678 tree type, int named ATTRIBUTE_UNUSED)
2679 {
2680 if (!TARGET_64BIT)
2681 return 0;
2682
2683 if (type && int_size_in_bytes (type) == -1)
2684 {
2685 if (TARGET_DEBUG_ARG)
2686 fprintf (stderr, "function_arg_pass_by_reference\n");
2687 return 1;
2688 }
2689
2690 return 0;
2691 }
2692
2693 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2694 ABI */
2695 static bool
contains_128bit_aligned_vector_p(tree type)2696 contains_128bit_aligned_vector_p (tree type)
2697 {
2698 enum machine_mode mode = TYPE_MODE (type);
2699 if (SSE_REG_MODE_P (mode)
2700 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2701 return true;
2702 if (TYPE_ALIGN (type) < 128)
2703 return false;
2704
2705 if (AGGREGATE_TYPE_P (type))
2706 {
2707 /* Walk the aggregates recursively. */
2708 if (TREE_CODE (type) == RECORD_TYPE
2709 || TREE_CODE (type) == UNION_TYPE
2710 || TREE_CODE (type) == QUAL_UNION_TYPE)
2711 {
2712 tree field;
2713
2714 if (TYPE_BINFO (type) != NULL
2715 && TYPE_BINFO_BASETYPES (type) != NULL)
2716 {
2717 tree bases = TYPE_BINFO_BASETYPES (type);
2718 int n_bases = TREE_VEC_LENGTH (bases);
2719 int i;
2720
2721 for (i = 0; i < n_bases; ++i)
2722 {
2723 tree binfo = TREE_VEC_ELT (bases, i);
2724 tree type = BINFO_TYPE (binfo);
2725
2726 if (contains_128bit_aligned_vector_p (type))
2727 return true;
2728 }
2729 }
2730 /* And now merge the fields of structure. */
2731 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2732 {
2733 if (TREE_CODE (field) == FIELD_DECL
2734 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2735 return true;
2736 }
2737 }
2738 /* Just for use if some languages passes arrays by value. */
2739 else if (TREE_CODE (type) == ARRAY_TYPE)
2740 {
2741 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2742 return true;
2743 }
2744 else
2745 abort ();
2746 }
2747 return false;
2748 }
2749
2750 /* Gives the alignment boundary, in bits, of an argument with the
2751 specified mode and type. */
2752
2753 int
ix86_function_arg_boundary(enum machine_mode mode,tree type)2754 ix86_function_arg_boundary (enum machine_mode mode, tree type)
2755 {
2756 int align;
2757 if (type)
2758 align = TYPE_ALIGN (type);
2759 else
2760 align = GET_MODE_ALIGNMENT (mode);
2761 if (align < PARM_BOUNDARY)
2762 align = PARM_BOUNDARY;
2763 if (!TARGET_64BIT)
2764 {
2765 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2766 make an exception for SSE modes since these require 128bit
2767 alignment.
2768
2769 The handling here differs from field_alignment. ICC aligns MMX
2770 arguments to 4 byte boundaries, while structure fields are aligned
2771 to 8 byte boundaries. */
2772 if (!type)
2773 {
2774 if (!SSE_REG_MODE_P (mode))
2775 align = PARM_BOUNDARY;
2776 }
2777 else
2778 {
2779 if (!contains_128bit_aligned_vector_p (type))
2780 align = PARM_BOUNDARY;
2781 }
2782 }
2783 if (align > 128)
2784 align = 128;
2785 return align;
2786 }
2787
2788 /* Return true if N is a possible register number of function value. */
2789 bool
ix86_function_value_regno_p(int regno)2790 ix86_function_value_regno_p (int regno)
2791 {
2792 if (!TARGET_64BIT)
2793 {
2794 return ((regno) == 0
2795 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2796 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2797 }
2798 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2799 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2800 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2801 }
2802
2803 /* Define how to find the value returned by a function.
2804 VALTYPE is the data type of the value (as a tree).
2805 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2806 otherwise, FUNC is 0. */
2807 rtx
ix86_function_value(tree valtype)2808 ix86_function_value (tree valtype)
2809 {
2810 if (TARGET_64BIT)
2811 {
2812 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2813 REGPARM_MAX, SSE_REGPARM_MAX,
2814 x86_64_int_return_registers, 0);
2815 /* For zero sized structures, construct_container return NULL, but we need
2816 to keep rest of compiler happy by returning meaningful value. */
2817 if (!ret)
2818 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2819 return ret;
2820 }
2821 else
2822 return gen_rtx_REG (TYPE_MODE (valtype),
2823 ix86_value_regno (TYPE_MODE (valtype)));
2824 }
2825
2826 /* Return false iff type is returned in memory. */
2827 int
ix86_return_in_memory(tree type)2828 ix86_return_in_memory (tree type)
2829 {
2830 int needed_intregs, needed_sseregs, size;
2831 enum machine_mode mode = TYPE_MODE (type);
2832
2833 if (TARGET_64BIT)
2834 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
2835
2836 if (mode == BLKmode)
2837 return 1;
2838
2839 size = int_size_in_bytes (type);
2840
2841 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
2842 return 0;
2843
2844 if (VECTOR_MODE_P (mode) || mode == TImode)
2845 {
2846 /* User-created vectors small enough to fit in EAX. */
2847 if (size < 8)
2848 return 0;
2849
2850 /* MMX/3dNow values are returned on the stack, since we've
2851 got to EMMS/FEMMS before returning. */
2852 if (size == 8)
2853 return 1;
2854
2855 /* SSE values are returned in XMM0. */
2856 /* ??? Except when it doesn't exist? We have a choice of
2857 either (1) being abi incompatible with a -march switch,
2858 or (2) generating an error here. Given no good solution,
2859 I think the safest thing is one warning. The user won't
2860 be able to use -Werror, but.... */
2861 if (size == 16)
2862 {
2863 static bool warned;
2864
2865 if (TARGET_SSE)
2866 return 0;
2867
2868 if (!warned)
2869 {
2870 warned = true;
2871 warning ("SSE vector return without SSE enabled "
2872 "changes the ABI");
2873 }
2874 return 1;
2875 }
2876 }
2877
2878 if (mode == XFmode)
2879 return 0;
2880
2881 if (size > 12)
2882 return 1;
2883 return 0;
2884 }
2885
2886 /* Define how to find the value returned by a library function
2887 assuming the value has mode MODE. */
2888 rtx
ix86_libcall_value(enum machine_mode mode)2889 ix86_libcall_value (enum machine_mode mode)
2890 {
2891 if (TARGET_64BIT)
2892 {
2893 switch (mode)
2894 {
2895 case SFmode:
2896 case SCmode:
2897 case DFmode:
2898 case DCmode:
2899 return gen_rtx_REG (mode, FIRST_SSE_REG);
2900 case XFmode:
2901 case XCmode:
2902 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2903 case TFmode:
2904 case TCmode:
2905 return NULL;
2906 default:
2907 return gen_rtx_REG (mode, 0);
2908 }
2909 }
2910 else
2911 return gen_rtx_REG (mode, ix86_value_regno (mode));
2912 }
2913
2914 /* Given a mode, return the register to use for a return value. */
2915
2916 static int
ix86_value_regno(enum machine_mode mode)2917 ix86_value_regno (enum machine_mode mode)
2918 {
2919 /* Floating point return values in %st(0). */
2920 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2921 return FIRST_FLOAT_REG;
2922 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
2923 we prevent this case when sse is not available. */
2924 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
2925 return FIRST_SSE_REG;
2926 /* Everything else in %eax. */
2927 return 0;
2928 }
2929
2930 /* Create the va_list data type. */
2931
2932 static tree
ix86_build_builtin_va_list(void)2933 ix86_build_builtin_va_list (void)
2934 {
2935 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2936
2937 /* For i386 we use plain pointer to argument area. */
2938 if (!TARGET_64BIT)
2939 return build_pointer_type (char_type_node);
2940
2941 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2942 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2943
2944 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2945 unsigned_type_node);
2946 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2947 unsigned_type_node);
2948 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2949 ptr_type_node);
2950 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2951 ptr_type_node);
2952
2953 DECL_FIELD_CONTEXT (f_gpr) = record;
2954 DECL_FIELD_CONTEXT (f_fpr) = record;
2955 DECL_FIELD_CONTEXT (f_ovf) = record;
2956 DECL_FIELD_CONTEXT (f_sav) = record;
2957
2958 TREE_CHAIN (record) = type_decl;
2959 TYPE_NAME (record) = type_decl;
2960 TYPE_FIELDS (record) = f_gpr;
2961 TREE_CHAIN (f_gpr) = f_fpr;
2962 TREE_CHAIN (f_fpr) = f_ovf;
2963 TREE_CHAIN (f_ovf) = f_sav;
2964
2965 layout_type (record);
2966
2967 /* The correct type is an array type of one element. */
2968 return build_array_type (record, build_index_type (size_zero_node));
2969 }
2970
2971 /* Perform any needed actions needed for a function that is receiving a
2972 variable number of arguments.
2973
2974 CUM is as above.
2975
2976 MODE and TYPE are the mode and type of the current parameter.
2977
2978 PRETEND_SIZE is a variable that should be set to the amount of stack
2979 that must be pushed by the prolog to pretend that our caller pushed
2980 it.
2981
2982 Normally, this macro will push all remaining incoming registers on the
2983 stack and set PRETEND_SIZE to the length of the registers pushed. */
2984
2985 void
ix86_setup_incoming_varargs(CUMULATIVE_ARGS * cum,enum machine_mode mode,tree type,int * pretend_size ATTRIBUTE_UNUSED,int no_rtl)2986 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
2987 tree type, int *pretend_size ATTRIBUTE_UNUSED,
2988 int no_rtl)
2989 {
2990 CUMULATIVE_ARGS next_cum;
2991 rtx save_area = NULL_RTX, mem;
2992 rtx label;
2993 rtx label_ref;
2994 rtx tmp_reg;
2995 rtx nsse_reg;
2996 int set;
2997 tree fntype;
2998 int stdarg_p;
2999 int i;
3000
3001 if (!TARGET_64BIT)
3002 return;
3003
3004 /* Indicate to allocate space on the stack for varargs save area. */
3005 ix86_save_varrargs_registers = 1;
3006
3007 cfun->stack_alignment_needed = 128;
3008
3009 fntype = TREE_TYPE (current_function_decl);
3010 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
3011 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3012 != void_type_node));
3013
3014 /* For varargs, we do not want to skip the dummy va_dcl argument.
3015 For stdargs, we do want to skip the last named argument. */
3016 next_cum = *cum;
3017 if (stdarg_p)
3018 function_arg_advance (&next_cum, mode, type, 1);
3019
3020 if (!no_rtl)
3021 save_area = frame_pointer_rtx;
3022
3023 set = get_varargs_alias_set ();
3024
3025 for (i = next_cum.regno; i < ix86_regparm; i++)
3026 {
3027 mem = gen_rtx_MEM (Pmode,
3028 plus_constant (save_area, i * UNITS_PER_WORD));
3029 set_mem_alias_set (mem, set);
3030 emit_move_insn (mem, gen_rtx_REG (Pmode,
3031 x86_64_int_parameter_registers[i]));
3032 }
3033
3034 if (next_cum.sse_nregs)
3035 {
3036 /* Now emit code to save SSE registers. The AX parameter contains number
3037 of SSE parameter registers used to call this function. We use
3038 sse_prologue_save insn template that produces computed jump across
3039 SSE saves. We need some preparation work to get this working. */
3040
3041 label = gen_label_rtx ();
3042 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3043
3044 /* Compute address to jump to :
3045 label - 5*eax + nnamed_sse_arguments*5 */
3046 tmp_reg = gen_reg_rtx (Pmode);
3047 nsse_reg = gen_reg_rtx (Pmode);
3048 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3049 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3050 gen_rtx_MULT (Pmode, nsse_reg,
3051 GEN_INT (4))));
3052 if (next_cum.sse_regno)
3053 emit_move_insn
3054 (nsse_reg,
3055 gen_rtx_CONST (DImode,
3056 gen_rtx_PLUS (DImode,
3057 label_ref,
3058 GEN_INT (next_cum.sse_regno * 4))));
3059 else
3060 emit_move_insn (nsse_reg, label_ref);
3061 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3062
3063 /* Compute address of memory block we save into. We always use pointer
3064 pointing 127 bytes after first byte to store - this is needed to keep
3065 instruction size limited by 4 bytes. */
3066 tmp_reg = gen_reg_rtx (Pmode);
3067 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3068 plus_constant (save_area,
3069 8 * REGPARM_MAX + 127)));
3070 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
3071 set_mem_alias_set (mem, set);
3072 set_mem_align (mem, BITS_PER_WORD);
3073
3074 /* And finally do the dirty job! */
3075 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3076 GEN_INT (next_cum.sse_regno), label));
3077 }
3078
3079 }
3080
3081 /* Implement va_start. */
3082
3083 void
ix86_va_start(tree valist,rtx nextarg)3084 ix86_va_start (tree valist, rtx nextarg)
3085 {
3086 HOST_WIDE_INT words, n_gpr, n_fpr;
3087 tree f_gpr, f_fpr, f_ovf, f_sav;
3088 tree gpr, fpr, ovf, sav, t;
3089
3090 /* Only 64bit target needs something special. */
3091 if (!TARGET_64BIT)
3092 {
3093 std_expand_builtin_va_start (valist, nextarg);
3094 return;
3095 }
3096
3097 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3098 f_fpr = TREE_CHAIN (f_gpr);
3099 f_ovf = TREE_CHAIN (f_fpr);
3100 f_sav = TREE_CHAIN (f_ovf);
3101
3102 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3103 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3104 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3105 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3106 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3107
3108 /* Count number of gp and fp argument registers used. */
3109 words = current_function_args_info.words;
3110 n_gpr = current_function_args_info.regno;
3111 n_fpr = current_function_args_info.sse_regno;
3112
3113 if (TARGET_DEBUG_ARG)
3114 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3115 (int) words, (int) n_gpr, (int) n_fpr);
3116
3117 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3118 build_int_2 (n_gpr * 8, 0));
3119 TREE_SIDE_EFFECTS (t) = 1;
3120 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3121
3122 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3123 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
3124 TREE_SIDE_EFFECTS (t) = 1;
3125 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3126
3127 /* Find the overflow area. */
3128 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3129 if (words != 0)
3130 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3131 build_int_2 (words * UNITS_PER_WORD, 0));
3132 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3133 TREE_SIDE_EFFECTS (t) = 1;
3134 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3135
3136 /* Find the register save area.
3137 Prologue of the function save it right above stack frame. */
3138 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3139 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3140 TREE_SIDE_EFFECTS (t) = 1;
3141 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3142 }
3143
3144 /* Implement va_arg. */
3145 rtx
ix86_va_arg(tree valist,tree type)3146 ix86_va_arg (tree valist, tree type)
3147 {
3148 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3149 tree f_gpr, f_fpr, f_ovf, f_sav;
3150 tree gpr, fpr, ovf, sav, t;
3151 int size, rsize;
3152 rtx lab_false, lab_over = NULL_RTX;
3153 rtx addr_rtx, r;
3154 rtx container;
3155 int indirect_p = 0;
3156
3157 /* Only 64bit target needs something special. */
3158 if (!TARGET_64BIT)
3159 {
3160 return std_expand_builtin_va_arg (valist, type);
3161 }
3162
3163 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3164 f_fpr = TREE_CHAIN (f_gpr);
3165 f_ovf = TREE_CHAIN (f_fpr);
3166 f_sav = TREE_CHAIN (f_ovf);
3167
3168 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3169 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3170 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3171 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3172 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3173
3174 size = int_size_in_bytes (type);
3175 if (size == -1)
3176 {
3177 /* Passed by reference. */
3178 indirect_p = 1;
3179 type = build_pointer_type (type);
3180 size = int_size_in_bytes (type);
3181 }
3182 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3183
3184 container = construct_container (TYPE_MODE (type), type, 0,
3185 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3186 /*
3187 * Pull the value out of the saved registers ...
3188 */
3189
3190 addr_rtx = gen_reg_rtx (Pmode);
3191
3192 if (container)
3193 {
3194 rtx int_addr_rtx, sse_addr_rtx;
3195 int needed_intregs, needed_sseregs;
3196 int need_temp;
3197
3198 lab_over = gen_label_rtx ();
3199 lab_false = gen_label_rtx ();
3200
3201 examine_argument (TYPE_MODE (type), type, 0,
3202 &needed_intregs, &needed_sseregs);
3203
3204
3205 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
3206 || TYPE_ALIGN (type) > 128);
3207
3208 /* In case we are passing structure, verify that it is consecutive block
3209 on the register save area. If not we need to do moves. */
3210 if (!need_temp && !REG_P (container))
3211 {
3212 /* Verify that all registers are strictly consecutive */
3213 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3214 {
3215 int i;
3216
3217 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3218 {
3219 rtx slot = XVECEXP (container, 0, i);
3220 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3221 || INTVAL (XEXP (slot, 1)) != i * 16)
3222 need_temp = 1;
3223 }
3224 }
3225 else
3226 {
3227 int i;
3228
3229 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3230 {
3231 rtx slot = XVECEXP (container, 0, i);
3232 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3233 || INTVAL (XEXP (slot, 1)) != i * 8)
3234 need_temp = 1;
3235 }
3236 }
3237 }
3238 if (!need_temp)
3239 {
3240 int_addr_rtx = addr_rtx;
3241 sse_addr_rtx = addr_rtx;
3242 }
3243 else
3244 {
3245 int_addr_rtx = gen_reg_rtx (Pmode);
3246 sse_addr_rtx = gen_reg_rtx (Pmode);
3247 }
3248 /* First ensure that we fit completely in registers. */
3249 if (needed_intregs)
3250 {
3251 emit_cmp_and_jump_insns (expand_expr
3252 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
3253 GEN_INT ((REGPARM_MAX - needed_intregs +
3254 1) * 8), GE, const1_rtx, SImode,
3255 1, lab_false);
3256 }
3257 if (needed_sseregs)
3258 {
3259 emit_cmp_and_jump_insns (expand_expr
3260 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
3261 GEN_INT ((SSE_REGPARM_MAX -
3262 needed_sseregs + 1) * 16 +
3263 REGPARM_MAX * 8), GE, const1_rtx,
3264 SImode, 1, lab_false);
3265 }
3266
3267 /* Compute index to start of area used for integer regs. */
3268 if (needed_intregs)
3269 {
3270 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
3271 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
3272 if (r != int_addr_rtx)
3273 emit_move_insn (int_addr_rtx, r);
3274 }
3275 if (needed_sseregs)
3276 {
3277 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
3278 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
3279 if (r != sse_addr_rtx)
3280 emit_move_insn (sse_addr_rtx, r);
3281 }
3282 if (need_temp)
3283 {
3284 int i;
3285 rtx mem;
3286 rtx x;
3287
3288 /* Never use the memory itself, as it has the alias set. */
3289 x = XEXP (assign_temp (type, 0, 1, 0), 0);
3290 mem = gen_rtx_MEM (BLKmode, x);
3291 force_operand (x, addr_rtx);
3292 set_mem_alias_set (mem, get_varargs_alias_set ());
3293 set_mem_align (mem, BITS_PER_UNIT);
3294
3295 for (i = 0; i < XVECLEN (container, 0); i++)
3296 {
3297 rtx slot = XVECEXP (container, 0, i);
3298 rtx reg = XEXP (slot, 0);
3299 enum machine_mode mode = GET_MODE (reg);
3300 rtx src_addr;
3301 rtx src_mem;
3302 int src_offset;
3303 rtx dest_mem;
3304
3305 if (SSE_REGNO_P (REGNO (reg)))
3306 {
3307 src_addr = sse_addr_rtx;
3308 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3309 }
3310 else
3311 {
3312 src_addr = int_addr_rtx;
3313 src_offset = REGNO (reg) * 8;
3314 }
3315 src_mem = gen_rtx_MEM (mode, src_addr);
3316 set_mem_alias_set (src_mem, get_varargs_alias_set ());
3317 src_mem = adjust_address (src_mem, mode, src_offset);
3318 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
3319 emit_move_insn (dest_mem, src_mem);
3320 }
3321 }
3322
3323 if (needed_intregs)
3324 {
3325 t =
3326 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3327 build_int_2 (needed_intregs * 8, 0));
3328 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3329 TREE_SIDE_EFFECTS (t) = 1;
3330 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3331 }
3332 if (needed_sseregs)
3333 {
3334 t =
3335 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3336 build_int_2 (needed_sseregs * 16, 0));
3337 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3338 TREE_SIDE_EFFECTS (t) = 1;
3339 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3340 }
3341
3342 emit_jump_insn (gen_jump (lab_over));
3343 emit_barrier ();
3344 emit_label (lab_false);
3345 }
3346
3347 /* ... otherwise out of the overflow area. */
3348
3349 /* Care for on-stack alignment if needed. */
3350 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3351 t = ovf;
3352 else
3353 {
3354 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3355 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3356 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3357 }
3358 t = save_expr (t);
3359
3360 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3361 if (r != addr_rtx)
3362 emit_move_insn (addr_rtx, r);
3363
3364 t =
3365 build (PLUS_EXPR, TREE_TYPE (t), t,
3366 build_int_2 (rsize * UNITS_PER_WORD, 0));
3367 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3368 TREE_SIDE_EFFECTS (t) = 1;
3369 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3370
3371 if (container)
3372 emit_label (lab_over);
3373
3374 if (indirect_p)
3375 {
3376 r = gen_rtx_MEM (Pmode, addr_rtx);
3377 set_mem_alias_set (r, get_varargs_alias_set ());
3378 emit_move_insn (addr_rtx, r);
3379 }
3380
3381 return addr_rtx;
3382 }
3383
3384 /* Return nonzero if OP is either a i387 or SSE fp register. */
3385 int
any_fp_register_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)3386 any_fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3387 {
3388 return ANY_FP_REG_P (op);
3389 }
3390
3391 /* Return nonzero if OP is an i387 fp register. */
3392 int
fp_register_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)3393 fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3394 {
3395 return FP_REG_P (op);
3396 }
3397
3398 /* Return nonzero if OP is a non-fp register_operand. */
3399 int
register_and_not_any_fp_reg_operand(rtx op,enum machine_mode mode)3400 register_and_not_any_fp_reg_operand (rtx op, enum machine_mode mode)
3401 {
3402 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3403 }
3404
3405 /* Return nonzero if OP is a register operand other than an
3406 i387 fp register. */
3407 int
register_and_not_fp_reg_operand(rtx op,enum machine_mode mode)3408 register_and_not_fp_reg_operand (rtx op, enum machine_mode mode)
3409 {
3410 return register_operand (op, mode) && !FP_REG_P (op);
3411 }
3412
3413 /* Return nonzero if OP is general operand representable on x86_64. */
3414
3415 int
x86_64_general_operand(rtx op,enum machine_mode mode)3416 x86_64_general_operand (rtx op, enum machine_mode mode)
3417 {
3418 if (!TARGET_64BIT)
3419 return general_operand (op, mode);
3420 if (nonimmediate_operand (op, mode))
3421 return 1;
3422 return x86_64_sign_extended_value (op);
3423 }
3424
3425 /* Return nonzero if OP is general operand representable on x86_64
3426 as either sign extended or zero extended constant. */
3427
3428 int
x86_64_szext_general_operand(rtx op,enum machine_mode mode)3429 x86_64_szext_general_operand (rtx op, enum machine_mode mode)
3430 {
3431 if (!TARGET_64BIT)
3432 return general_operand (op, mode);
3433 if (nonimmediate_operand (op, mode))
3434 return 1;
3435 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3436 }
3437
3438 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3439
3440 int
x86_64_nonmemory_operand(rtx op,enum machine_mode mode)3441 x86_64_nonmemory_operand (rtx op, enum machine_mode mode)
3442 {
3443 if (!TARGET_64BIT)
3444 return nonmemory_operand (op, mode);
3445 if (register_operand (op, mode))
3446 return 1;
3447 return x86_64_sign_extended_value (op);
3448 }
3449
3450 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3451
3452 int
x86_64_movabs_operand(rtx op,enum machine_mode mode)3453 x86_64_movabs_operand (rtx op, enum machine_mode mode)
3454 {
3455 if (!TARGET_64BIT || !flag_pic)
3456 return nonmemory_operand (op, mode);
3457 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3458 return 1;
3459 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3460 return 1;
3461 return 0;
3462 }
3463
3464 /* Return nonzero if OPNUM's MEM should be matched
3465 in movabs* patterns. */
3466
3467 int
ix86_check_movabs(rtx insn,int opnum)3468 ix86_check_movabs (rtx insn, int opnum)
3469 {
3470 rtx set, mem;
3471
3472 set = PATTERN (insn);
3473 if (GET_CODE (set) == PARALLEL)
3474 set = XVECEXP (set, 0, 0);
3475 if (GET_CODE (set) != SET)
3476 abort ();
3477 mem = XEXP (set, opnum);
3478 while (GET_CODE (mem) == SUBREG)
3479 mem = SUBREG_REG (mem);
3480 if (GET_CODE (mem) != MEM)
3481 abort ();
3482 return (volatile_ok || !MEM_VOLATILE_P (mem));
3483 }
3484
3485 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3486
3487 int
x86_64_szext_nonmemory_operand(rtx op,enum machine_mode mode)3488 x86_64_szext_nonmemory_operand (rtx op, enum machine_mode mode)
3489 {
3490 if (!TARGET_64BIT)
3491 return nonmemory_operand (op, mode);
3492 if (register_operand (op, mode))
3493 return 1;
3494 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3495 }
3496
3497 /* Return nonzero if OP is immediate operand representable on x86_64. */
3498
3499 int
x86_64_immediate_operand(rtx op,enum machine_mode mode)3500 x86_64_immediate_operand (rtx op, enum machine_mode mode)
3501 {
3502 if (!TARGET_64BIT)
3503 return immediate_operand (op, mode);
3504 return x86_64_sign_extended_value (op);
3505 }
3506
3507 /* Return nonzero if OP is immediate operand representable on x86_64. */
3508
3509 int
x86_64_zext_immediate_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)3510 x86_64_zext_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3511 {
3512 return x86_64_zero_extended_value (op);
3513 }
3514
3515 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3516 for shift & compare patterns, as shifting by 0 does not change flags),
3517 else return zero. */
3518
3519 int
const_int_1_31_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)3520 const_int_1_31_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3521 {
3522 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3523 }
3524
3525 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3526 reference and a constant. */
3527
3528 int
symbolic_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)3529 symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3530 {
3531 switch (GET_CODE (op))
3532 {
3533 case SYMBOL_REF:
3534 case LABEL_REF:
3535 return 1;
3536
3537 case CONST:
3538 op = XEXP (op, 0);
3539 if (GET_CODE (op) == SYMBOL_REF
3540 || GET_CODE (op) == LABEL_REF
3541 || (GET_CODE (op) == UNSPEC
3542 && (XINT (op, 1) == UNSPEC_GOT
3543 || XINT (op, 1) == UNSPEC_GOTOFF
3544 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3545 return 1;
3546 if (GET_CODE (op) != PLUS
3547 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3548 return 0;
3549
3550 op = XEXP (op, 0);
3551 if (GET_CODE (op) == SYMBOL_REF
3552 || GET_CODE (op) == LABEL_REF)
3553 return 1;
3554 /* Only @GOTOFF gets offsets. */
3555 if (GET_CODE (op) != UNSPEC
3556 || XINT (op, 1) != UNSPEC_GOTOFF)
3557 return 0;
3558
3559 op = XVECEXP (op, 0, 0);
3560 if (GET_CODE (op) == SYMBOL_REF
3561 || GET_CODE (op) == LABEL_REF)
3562 return 1;
3563 return 0;
3564
3565 default:
3566 return 0;
3567 }
3568 }
3569
3570 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3571
3572 int
pic_symbolic_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)3573 pic_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3574 {
3575 if (GET_CODE (op) != CONST)
3576 return 0;
3577 op = XEXP (op, 0);
3578 if (TARGET_64BIT)
3579 {
3580 if (GET_CODE (op) == UNSPEC
3581 && XINT (op, 1) == UNSPEC_GOTPCREL)
3582 return 1;
3583 if (GET_CODE (op) == PLUS
3584 && GET_CODE (XEXP (op, 0)) == UNSPEC
3585 && XINT (XEXP (op, 0), 1) == UNSPEC_GOTPCREL)
3586 return 1;
3587 }
3588 else
3589 {
3590 if (GET_CODE (op) == UNSPEC)
3591 return 1;
3592 if (GET_CODE (op) != PLUS
3593 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3594 return 0;
3595 op = XEXP (op, 0);
3596 if (GET_CODE (op) == UNSPEC)
3597 return 1;
3598 }
3599 return 0;
3600 }
3601
3602 /* Return true if OP is a symbolic operand that resolves locally. */
3603
3604 static int
local_symbolic_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)3605 local_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3606 {
3607 if (GET_CODE (op) == CONST
3608 && GET_CODE (XEXP (op, 0)) == PLUS
3609 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3610 op = XEXP (XEXP (op, 0), 0);
3611
3612 if (GET_CODE (op) == LABEL_REF)
3613 return 1;
3614
3615 if (GET_CODE (op) != SYMBOL_REF)
3616 return 0;
3617
3618 if (SYMBOL_REF_LOCAL_P (op))
3619 return 1;
3620
3621 /* There is, however, a not insubstantial body of code in the rest of
3622 the compiler that assumes it can just stick the results of
3623 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3624 /* ??? This is a hack. Should update the body of the compiler to
3625 always create a DECL an invoke targetm.encode_section_info. */
3626 if (strncmp (XSTR (op, 0), internal_label_prefix,
3627 internal_label_prefix_len) == 0)
3628 return 1;
3629
3630 return 0;
3631 }
3632
3633 /* Test for various thread-local symbols. */
3634
3635 int
tls_symbolic_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)3636 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3637 {
3638 if (GET_CODE (op) != SYMBOL_REF)
3639 return 0;
3640 return SYMBOL_REF_TLS_MODEL (op);
3641 }
3642
3643 static inline int
tls_symbolic_operand_1(rtx op,enum tls_model kind)3644 tls_symbolic_operand_1 (rtx op, enum tls_model kind)
3645 {
3646 if (GET_CODE (op) != SYMBOL_REF)
3647 return 0;
3648 return SYMBOL_REF_TLS_MODEL (op) == kind;
3649 }
3650
3651 int
global_dynamic_symbolic_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)3652 global_dynamic_symbolic_operand (rtx op,
3653 enum machine_mode mode ATTRIBUTE_UNUSED)
3654 {
3655 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3656 }
3657
3658 int
local_dynamic_symbolic_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)3659 local_dynamic_symbolic_operand (rtx op,
3660 enum machine_mode mode ATTRIBUTE_UNUSED)
3661 {
3662 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3663 }
3664
3665 int
initial_exec_symbolic_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)3666 initial_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3667 {
3668 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3669 }
3670
3671 int
local_exec_symbolic_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)3672 local_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3673 {
3674 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3675 }
3676
3677 /* Test for a valid operand for a call instruction. Don't allow the
3678 arg pointer register or virtual regs since they may decay into
3679 reg + const, which the patterns can't handle. */
3680
3681 int
call_insn_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)3682 call_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3683 {
3684 /* Disallow indirect through a virtual register. This leads to
3685 compiler aborts when trying to eliminate them. */
3686 if (GET_CODE (op) == REG
3687 && (op == arg_pointer_rtx
3688 || op == frame_pointer_rtx
3689 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3690 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3691 return 0;
3692
3693 /* Disallow `call 1234'. Due to varying assembler lameness this
3694 gets either rejected or translated to `call .+1234'. */
3695 if (GET_CODE (op) == CONST_INT)
3696 return 0;
3697
3698 /* Explicitly allow SYMBOL_REF even if pic. */
3699 if (GET_CODE (op) == SYMBOL_REF)
3700 return 1;
3701
3702 /* Otherwise we can allow any general_operand in the address. */
3703 return general_operand (op, Pmode);
3704 }
3705
3706 /* Test for a valid operand for a call instruction. Don't allow the
3707 arg pointer register or virtual regs since they may decay into
3708 reg + const, which the patterns can't handle. */
3709
3710 int
sibcall_insn_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)3711 sibcall_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3712 {
3713 /* Disallow indirect through a virtual register. This leads to
3714 compiler aborts when trying to eliminate them. */
3715 if (GET_CODE (op) == REG
3716 && (op == arg_pointer_rtx
3717 || op == frame_pointer_rtx
3718 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3719 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3720 return 0;
3721
3722 /* Explicitly allow SYMBOL_REF even if pic. */
3723 if (GET_CODE (op) == SYMBOL_REF)
3724 return 1;
3725
3726 /* Otherwise we can only allow register operands. */
3727 return register_operand (op, Pmode);
3728 }
3729
3730 int
constant_call_address_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)3731 constant_call_address_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3732 {
3733 if (GET_CODE (op) == CONST
3734 && GET_CODE (XEXP (op, 0)) == PLUS
3735 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3736 op = XEXP (XEXP (op, 0), 0);
3737 return GET_CODE (op) == SYMBOL_REF;
3738 }
3739
3740 /* Match exactly zero and one. */
3741
3742 int
const0_operand(rtx op,enum machine_mode mode)3743 const0_operand (rtx op, enum machine_mode mode)
3744 {
3745 return op == CONST0_RTX (mode);
3746 }
3747
3748 int
const1_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)3749 const1_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3750 {
3751 return op == const1_rtx;
3752 }
3753
3754 /* Match 2, 4, or 8. Used for leal multiplicands. */
3755
3756 int
const248_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)3757 const248_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3758 {
3759 return (GET_CODE (op) == CONST_INT
3760 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3761 }
3762
3763 int
const_0_to_3_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)3764 const_0_to_3_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3765 {
3766 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 4);
3767 }
3768
3769 int
const_0_to_7_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)3770 const_0_to_7_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3771 {
3772 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 8);
3773 }
3774
3775 int
const_0_to_15_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)3776 const_0_to_15_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3777 {
3778 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 16);
3779 }
3780
3781 int
const_0_to_255_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)3782 const_0_to_255_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3783 {
3784 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 256);
3785 }
3786
3787
3788 /* True if this is a constant appropriate for an increment or decrement. */
3789
3790 int
incdec_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)3791 incdec_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3792 {
3793 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3794 registers, since carry flag is not set. */
3795 if (TARGET_PENTIUM4 && !optimize_size)
3796 return 0;
3797 return op == const1_rtx || op == constm1_rtx;
3798 }
3799
3800 /* Return nonzero if OP is acceptable as operand of DImode shift
3801 expander. */
3802
3803 int
shiftdi_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)3804 shiftdi_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3805 {
3806 if (TARGET_64BIT)
3807 return nonimmediate_operand (op, mode);
3808 else
3809 return register_operand (op, mode);
3810 }
3811
3812 /* Return false if this is the stack pointer, or any other fake
3813 register eliminable to the stack pointer. Otherwise, this is
3814 a register operand.
3815
3816 This is used to prevent esp from being used as an index reg.
3817 Which would only happen in pathological cases. */
3818
3819 int
reg_no_sp_operand(rtx op,enum machine_mode mode)3820 reg_no_sp_operand (rtx op, enum machine_mode mode)
3821 {
3822 rtx t = op;
3823 if (GET_CODE (t) == SUBREG)
3824 t = SUBREG_REG (t);
3825 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3826 return 0;
3827
3828 return register_operand (op, mode);
3829 }
3830
3831 int
mmx_reg_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)3832 mmx_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3833 {
3834 return MMX_REG_P (op);
3835 }
3836
3837 /* Return false if this is any eliminable register. Otherwise
3838 general_operand. */
3839
3840 int
general_no_elim_operand(rtx op,enum machine_mode mode)3841 general_no_elim_operand (rtx op, enum machine_mode mode)
3842 {
3843 rtx t = op;
3844 if (GET_CODE (t) == SUBREG)
3845 t = SUBREG_REG (t);
3846 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3847 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3848 || t == virtual_stack_dynamic_rtx)
3849 return 0;
3850 if (REG_P (t)
3851 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3852 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3853 return 0;
3854
3855 return general_operand (op, mode);
3856 }
3857
3858 /* Return false if this is any eliminable register. Otherwise
3859 register_operand or const_int. */
3860
3861 int
nonmemory_no_elim_operand(rtx op,enum machine_mode mode)3862 nonmemory_no_elim_operand (rtx op, enum machine_mode mode)
3863 {
3864 rtx t = op;
3865 if (GET_CODE (t) == SUBREG)
3866 t = SUBREG_REG (t);
3867 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3868 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3869 || t == virtual_stack_dynamic_rtx)
3870 return 0;
3871
3872 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3873 }
3874
3875 /* Return false if this is any eliminable register or stack register,
3876 otherwise work like register_operand. */
3877
3878 int
index_register_operand(rtx op,enum machine_mode mode)3879 index_register_operand (rtx op, enum machine_mode mode)
3880 {
3881 rtx t = op;
3882 if (GET_CODE (t) == SUBREG)
3883 t = SUBREG_REG (t);
3884 if (!REG_P (t))
3885 return 0;
3886 if (t == arg_pointer_rtx
3887 || t == frame_pointer_rtx
3888 || t == virtual_incoming_args_rtx
3889 || t == virtual_stack_vars_rtx
3890 || t == virtual_stack_dynamic_rtx
3891 || REGNO (t) == STACK_POINTER_REGNUM)
3892 return 0;
3893
3894 return general_operand (op, mode);
3895 }
3896
3897 /* Return true if op is a Q_REGS class register. */
3898
3899 int
q_regs_operand(rtx op,enum machine_mode mode)3900 q_regs_operand (rtx op, enum machine_mode mode)
3901 {
3902 if (mode != VOIDmode && GET_MODE (op) != mode)
3903 return 0;
3904 if (GET_CODE (op) == SUBREG)
3905 op = SUBREG_REG (op);
3906 return ANY_QI_REG_P (op);
3907 }
3908
3909 /* Return true if op is an flags register. */
3910
3911 int
flags_reg_operand(rtx op,enum machine_mode mode)3912 flags_reg_operand (rtx op, enum machine_mode mode)
3913 {
3914 if (mode != VOIDmode && GET_MODE (op) != mode)
3915 return 0;
3916 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3917 }
3918
3919 /* Return true if op is a NON_Q_REGS class register. */
3920
3921 int
non_q_regs_operand(rtx op,enum machine_mode mode)3922 non_q_regs_operand (rtx op, enum machine_mode mode)
3923 {
3924 if (mode != VOIDmode && GET_MODE (op) != mode)
3925 return 0;
3926 if (GET_CODE (op) == SUBREG)
3927 op = SUBREG_REG (op);
3928 return NON_QI_REG_P (op);
3929 }
3930
3931 int
zero_extended_scalar_load_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)3932 zero_extended_scalar_load_operand (rtx op,
3933 enum machine_mode mode ATTRIBUTE_UNUSED)
3934 {
3935 unsigned n_elts;
3936 if (GET_CODE (op) != MEM)
3937 return 0;
3938 op = maybe_get_pool_constant (op);
3939 if (!op)
3940 return 0;
3941 if (GET_CODE (op) != CONST_VECTOR)
3942 return 0;
3943 n_elts =
3944 (GET_MODE_SIZE (GET_MODE (op)) /
3945 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3946 for (n_elts--; n_elts > 0; n_elts--)
3947 {
3948 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3949 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3950 return 0;
3951 }
3952 return 1;
3953 }
3954
3955 /* Return 1 when OP is operand acceptable for standard SSE move. */
3956 int
vector_move_operand(rtx op,enum machine_mode mode)3957 vector_move_operand (rtx op, enum machine_mode mode)
3958 {
3959 if (nonimmediate_operand (op, mode))
3960 return 1;
3961 if (GET_MODE (op) != mode && mode != VOIDmode)
3962 return 0;
3963 return (op == CONST0_RTX (GET_MODE (op)));
3964 }
3965
3966 /* Return true if op if a valid address, and does not contain
3967 a segment override. */
3968
3969 int
no_seg_address_operand(rtx op,enum machine_mode mode)3970 no_seg_address_operand (rtx op, enum machine_mode mode)
3971 {
3972 struct ix86_address parts;
3973
3974 if (! address_operand (op, mode))
3975 return 0;
3976
3977 if (! ix86_decompose_address (op, &parts))
3978 abort ();
3979
3980 return parts.seg == SEG_DEFAULT;
3981 }
3982
3983 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3984 insns. */
3985 int
sse_comparison_operator(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)3986 sse_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3987 {
3988 enum rtx_code code = GET_CODE (op);
3989 switch (code)
3990 {
3991 /* Operations supported directly. */
3992 case EQ:
3993 case LT:
3994 case LE:
3995 case UNORDERED:
3996 case NE:
3997 case UNGE:
3998 case UNGT:
3999 case ORDERED:
4000 return 1;
4001 /* These are equivalent to ones above in non-IEEE comparisons. */
4002 case UNEQ:
4003 case UNLT:
4004 case UNLE:
4005 case LTGT:
4006 case GE:
4007 case GT:
4008 return !TARGET_IEEE_FP;
4009 default:
4010 return 0;
4011 }
4012 }
4013 /* Return 1 if OP is a valid comparison operator in valid mode. */
4014 int
ix86_comparison_operator(rtx op,enum machine_mode mode)4015 ix86_comparison_operator (rtx op, enum machine_mode mode)
4016 {
4017 enum machine_mode inmode;
4018 enum rtx_code code = GET_CODE (op);
4019 if (mode != VOIDmode && GET_MODE (op) != mode)
4020 return 0;
4021 if (GET_RTX_CLASS (code) != '<')
4022 return 0;
4023 inmode = GET_MODE (XEXP (op, 0));
4024
4025 if (inmode == CCFPmode || inmode == CCFPUmode)
4026 {
4027 enum rtx_code second_code, bypass_code;
4028 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4029 return (bypass_code == NIL && second_code == NIL);
4030 }
4031 switch (code)
4032 {
4033 case EQ: case NE:
4034 return 1;
4035 case LT: case GE:
4036 if (inmode == CCmode || inmode == CCGCmode
4037 || inmode == CCGOCmode || inmode == CCNOmode)
4038 return 1;
4039 return 0;
4040 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
4041 if (inmode == CCmode)
4042 return 1;
4043 return 0;
4044 case GT: case LE:
4045 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
4046 return 1;
4047 return 0;
4048 default:
4049 return 0;
4050 }
4051 }
4052
4053 /* Return 1 if OP is a valid comparison operator testing carry flag
4054 to be set. */
4055 int
ix86_carry_flag_operator(rtx op,enum machine_mode mode)4056 ix86_carry_flag_operator (rtx op, enum machine_mode mode)
4057 {
4058 enum machine_mode inmode;
4059 enum rtx_code code = GET_CODE (op);
4060
4061 if (mode != VOIDmode && GET_MODE (op) != mode)
4062 return 0;
4063 if (GET_RTX_CLASS (code) != '<')
4064 return 0;
4065 inmode = GET_MODE (XEXP (op, 0));
4066 if (GET_CODE (XEXP (op, 0)) != REG
4067 || REGNO (XEXP (op, 0)) != 17
4068 || XEXP (op, 1) != const0_rtx)
4069 return 0;
4070
4071 if (inmode == CCFPmode || inmode == CCFPUmode)
4072 {
4073 enum rtx_code second_code, bypass_code;
4074
4075 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4076 if (bypass_code != NIL || second_code != NIL)
4077 return 0;
4078 code = ix86_fp_compare_code_to_integer (code);
4079 }
4080 else if (inmode != CCmode)
4081 return 0;
4082 return code == LTU;
4083 }
4084
4085 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
4086
4087 int
fcmov_comparison_operator(rtx op,enum machine_mode mode)4088 fcmov_comparison_operator (rtx op, enum machine_mode mode)
4089 {
4090 enum machine_mode inmode;
4091 enum rtx_code code = GET_CODE (op);
4092
4093 if (mode != VOIDmode && GET_MODE (op) != mode)
4094 return 0;
4095 if (GET_RTX_CLASS (code) != '<')
4096 return 0;
4097 inmode = GET_MODE (XEXP (op, 0));
4098 if (inmode == CCFPmode || inmode == CCFPUmode)
4099 {
4100 enum rtx_code second_code, bypass_code;
4101
4102 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4103 if (bypass_code != NIL || second_code != NIL)
4104 return 0;
4105 code = ix86_fp_compare_code_to_integer (code);
4106 }
4107 /* i387 supports just limited amount of conditional codes. */
4108 switch (code)
4109 {
4110 case LTU: case GTU: case LEU: case GEU:
4111 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
4112 return 1;
4113 return 0;
4114 case ORDERED: case UNORDERED:
4115 case EQ: case NE:
4116 return 1;
4117 default:
4118 return 0;
4119 }
4120 }
4121
4122 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
4123
4124 int
promotable_binary_operator(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)4125 promotable_binary_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4126 {
4127 switch (GET_CODE (op))
4128 {
4129 case MULT:
4130 /* Modern CPUs have same latency for HImode and SImode multiply,
4131 but 386 and 486 do HImode multiply faster. */
4132 return ix86_tune > PROCESSOR_I486;
4133 case PLUS:
4134 case AND:
4135 case IOR:
4136 case XOR:
4137 case ASHIFT:
4138 return 1;
4139 default:
4140 return 0;
4141 }
4142 }
4143
4144 /* Nearly general operand, but accept any const_double, since we wish
4145 to be able to drop them into memory rather than have them get pulled
4146 into registers. */
4147
4148 int
cmp_fp_expander_operand(rtx op,enum machine_mode mode)4149 cmp_fp_expander_operand (rtx op, enum machine_mode mode)
4150 {
4151 if (mode != VOIDmode && mode != GET_MODE (op))
4152 return 0;
4153 if (GET_CODE (op) == CONST_DOUBLE)
4154 return 1;
4155 return general_operand (op, mode);
4156 }
4157
4158 /* Match an SI or HImode register for a zero_extract. */
4159
4160 int
ext_register_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)4161 ext_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4162 {
4163 int regno;
4164 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
4165 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
4166 return 0;
4167
4168 if (!register_operand (op, VOIDmode))
4169 return 0;
4170
4171 /* Be careful to accept only registers having upper parts. */
4172 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
4173 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
4174 }
4175
4176 /* Return 1 if this is a valid binary floating-point operation.
4177 OP is the expression matched, and MODE is its mode. */
4178
4179 int
binary_fp_operator(rtx op,enum machine_mode mode)4180 binary_fp_operator (rtx op, enum machine_mode mode)
4181 {
4182 if (mode != VOIDmode && mode != GET_MODE (op))
4183 return 0;
4184
4185 switch (GET_CODE (op))
4186 {
4187 case PLUS:
4188 case MINUS:
4189 case MULT:
4190 case DIV:
4191 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
4192
4193 default:
4194 return 0;
4195 }
4196 }
4197
4198 int
mult_operator(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)4199 mult_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4200 {
4201 return GET_CODE (op) == MULT;
4202 }
4203
4204 int
div_operator(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)4205 div_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4206 {
4207 return GET_CODE (op) == DIV;
4208 }
4209
4210 int
arith_or_logical_operator(rtx op,enum machine_mode mode)4211 arith_or_logical_operator (rtx op, enum machine_mode mode)
4212 {
4213 return ((mode == VOIDmode || GET_MODE (op) == mode)
4214 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
4215 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
4216 }
4217
4218 /* Returns 1 if OP is memory operand with a displacement. */
4219
4220 int
memory_displacement_operand(rtx op,enum machine_mode mode)4221 memory_displacement_operand (rtx op, enum machine_mode mode)
4222 {
4223 struct ix86_address parts;
4224
4225 if (! memory_operand (op, mode))
4226 return 0;
4227
4228 if (! ix86_decompose_address (XEXP (op, 0), &parts))
4229 abort ();
4230
4231 return parts.disp != NULL_RTX;
4232 }
4233
4234 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4235 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4236
4237 ??? It seems likely that this will only work because cmpsi is an
4238 expander, and no actual insns use this. */
4239
4240 int
cmpsi_operand(rtx op,enum machine_mode mode)4241 cmpsi_operand (rtx op, enum machine_mode mode)
4242 {
4243 if (nonimmediate_operand (op, mode))
4244 return 1;
4245
4246 if (GET_CODE (op) == AND
4247 && GET_MODE (op) == SImode
4248 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4249 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4250 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4251 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4252 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4253 && GET_CODE (XEXP (op, 1)) == CONST_INT)
4254 return 1;
4255
4256 return 0;
4257 }
4258
4259 /* Returns 1 if OP is memory operand that can not be represented by the
4260 modRM array. */
4261
4262 int
long_memory_operand(rtx op,enum machine_mode mode)4263 long_memory_operand (rtx op, enum machine_mode mode)
4264 {
4265 if (! memory_operand (op, mode))
4266 return 0;
4267
4268 return memory_address_length (op) != 0;
4269 }
4270
4271 /* Return nonzero if the rtx is known aligned. */
4272
4273 int
aligned_operand(rtx op,enum machine_mode mode)4274 aligned_operand (rtx op, enum machine_mode mode)
4275 {
4276 struct ix86_address parts;
4277
4278 if (!general_operand (op, mode))
4279 return 0;
4280
4281 /* Registers and immediate operands are always "aligned". */
4282 if (GET_CODE (op) != MEM)
4283 return 1;
4284
4285 /* Don't even try to do any aligned optimizations with volatiles. */
4286 if (MEM_VOLATILE_P (op))
4287 return 0;
4288
4289 op = XEXP (op, 0);
4290
4291 /* Pushes and pops are only valid on the stack pointer. */
4292 if (GET_CODE (op) == PRE_DEC
4293 || GET_CODE (op) == POST_INC)
4294 return 1;
4295
4296 /* Decode the address. */
4297 if (! ix86_decompose_address (op, &parts))
4298 abort ();
4299
4300 /* Look for some component that isn't known to be aligned. */
4301 if (parts.index)
4302 {
4303 if (parts.scale < 4
4304 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
4305 return 0;
4306 }
4307 if (parts.base)
4308 {
4309 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4310 return 0;
4311 }
4312 if (parts.disp)
4313 {
4314 if (GET_CODE (parts.disp) != CONST_INT
4315 || (INTVAL (parts.disp) & 3) != 0)
4316 return 0;
4317 }
4318
4319 /* Didn't find one -- this must be an aligned address. */
4320 return 1;
4321 }
4322
4323 /* Initialize the table of extra 80387 mathematical constants. */
4324
4325 static void
init_ext_80387_constants(void)4326 init_ext_80387_constants (void)
4327 {
4328 static const char * cst[5] =
4329 {
4330 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4331 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4332 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4333 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4334 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4335 };
4336 int i;
4337
4338 for (i = 0; i < 5; i++)
4339 {
4340 real_from_string (&ext_80387_constants_table[i], cst[i]);
4341 /* Ensure each constant is rounded to XFmode precision. */
4342 real_convert (&ext_80387_constants_table[i],
4343 XFmode, &ext_80387_constants_table[i]);
4344 }
4345
4346 ext_80387_constants_init = 1;
4347 }
4348
4349 /* Return true if the constant is something that can be loaded with
4350 a special instruction. */
4351
4352 int
standard_80387_constant_p(rtx x)4353 standard_80387_constant_p (rtx x)
4354 {
4355 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4356 return -1;
4357
4358 if (x == CONST0_RTX (GET_MODE (x)))
4359 return 1;
4360 if (x == CONST1_RTX (GET_MODE (x)))
4361 return 2;
4362
4363 /* For XFmode constants, try to find a special 80387 instruction on
4364 those CPUs that benefit from them. */
4365 if (GET_MODE (x) == XFmode
4366 && x86_ext_80387_constants & TUNEMASK)
4367 {
4368 REAL_VALUE_TYPE r;
4369 int i;
4370
4371 if (! ext_80387_constants_init)
4372 init_ext_80387_constants ();
4373
4374 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4375 for (i = 0; i < 5; i++)
4376 if (real_identical (&r, &ext_80387_constants_table[i]))
4377 return i + 3;
4378 }
4379
4380 return 0;
4381 }
4382
4383 /* Return the opcode of the special instruction to be used to load
4384 the constant X. */
4385
4386 const char *
standard_80387_constant_opcode(rtx x)4387 standard_80387_constant_opcode (rtx x)
4388 {
4389 switch (standard_80387_constant_p (x))
4390 {
4391 case 1:
4392 return "fldz";
4393 case 2:
4394 return "fld1";
4395 case 3:
4396 return "fldlg2";
4397 case 4:
4398 return "fldln2";
4399 case 5:
4400 return "fldl2e";
4401 case 6:
4402 return "fldl2t";
4403 case 7:
4404 return "fldpi";
4405 }
4406 abort ();
4407 }
4408
4409 /* Return the CONST_DOUBLE representing the 80387 constant that is
4410 loaded by the specified special instruction. The argument IDX
4411 matches the return value from standard_80387_constant_p. */
4412
4413 rtx
standard_80387_constant_rtx(int idx)4414 standard_80387_constant_rtx (int idx)
4415 {
4416 int i;
4417
4418 if (! ext_80387_constants_init)
4419 init_ext_80387_constants ();
4420
4421 switch (idx)
4422 {
4423 case 3:
4424 case 4:
4425 case 5:
4426 case 6:
4427 case 7:
4428 i = idx - 3;
4429 break;
4430
4431 default:
4432 abort ();
4433 }
4434
4435 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4436 XFmode);
4437 }
4438
4439 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4440 */
4441 int
standard_sse_constant_p(rtx x)4442 standard_sse_constant_p (rtx x)
4443 {
4444 if (x == const0_rtx)
4445 return 1;
4446 return (x == CONST0_RTX (GET_MODE (x)));
4447 }
4448
4449 /* Returns 1 if OP contains a symbol reference */
4450
4451 int
symbolic_reference_mentioned_p(rtx op)4452 symbolic_reference_mentioned_p (rtx op)
4453 {
4454 const char *fmt;
4455 int i;
4456
4457 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4458 return 1;
4459
4460 fmt = GET_RTX_FORMAT (GET_CODE (op));
4461 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4462 {
4463 if (fmt[i] == 'E')
4464 {
4465 int j;
4466
4467 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4468 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4469 return 1;
4470 }
4471
4472 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4473 return 1;
4474 }
4475
4476 return 0;
4477 }
4478
4479 /* Return 1 if it is appropriate to emit `ret' instructions in the
4480 body of a function. Do this only if the epilogue is simple, needing a
4481 couple of insns. Prior to reloading, we can't tell how many registers
4482 must be saved, so return 0 then. Return 0 if there is no frame
4483 marker to de-allocate.
4484
4485 If NON_SAVING_SETJMP is defined and true, then it is not possible
4486 for the epilogue to be simple, so return 0. This is a special case
4487 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4488 until final, but jump_optimize may need to know sooner if a
4489 `return' is OK. */
4490
4491 int
ix86_can_use_return_insn_p(void)4492 ix86_can_use_return_insn_p (void)
4493 {
4494 struct ix86_frame frame;
4495
4496 #ifdef NON_SAVING_SETJMP
4497 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4498 return 0;
4499 #endif
4500
4501 if (! reload_completed || frame_pointer_needed)
4502 return 0;
4503
4504 /* Don't allow more than 32 pop, since that's all we can do
4505 with one instruction. */
4506 if (current_function_pops_args
4507 && current_function_args_size >= 32768)
4508 return 0;
4509
4510 ix86_compute_frame_layout (&frame);
4511 return frame.to_allocate == 0 && frame.nregs == 0;
4512 }
4513
4514 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4515 int
x86_64_sign_extended_value(rtx value)4516 x86_64_sign_extended_value (rtx value)
4517 {
4518 switch (GET_CODE (value))
4519 {
4520 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4521 to be at least 32 and this all acceptable constants are
4522 represented as CONST_INT. */
4523 case CONST_INT:
4524 if (HOST_BITS_PER_WIDE_INT == 32)
4525 return 1;
4526 else
4527 {
4528 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4529 return trunc_int_for_mode (val, SImode) == val;
4530 }
4531 break;
4532
4533 /* For certain code models, the symbolic references are known to fit.
4534 in CM_SMALL_PIC model we know it fits if it is local to the shared
4535 library. Don't count TLS SYMBOL_REFs here, since they should fit
4536 only if inside of UNSPEC handled below. */
4537 case SYMBOL_REF:
4538 /* TLS symbols are not constant. */
4539 if (tls_symbolic_operand (value, Pmode))
4540 return false;
4541 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4542
4543 /* For certain code models, the code is near as well. */
4544 case LABEL_REF:
4545 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4546 || ix86_cmodel == CM_KERNEL);
4547
4548 /* We also may accept the offsetted memory references in certain special
4549 cases. */
4550 case CONST:
4551 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4552 switch (XINT (XEXP (value, 0), 1))
4553 {
4554 case UNSPEC_GOTPCREL:
4555 case UNSPEC_DTPOFF:
4556 case UNSPEC_GOTNTPOFF:
4557 case UNSPEC_NTPOFF:
4558 return 1;
4559 default:
4560 break;
4561 }
4562 if (GET_CODE (XEXP (value, 0)) == PLUS)
4563 {
4564 rtx op1 = XEXP (XEXP (value, 0), 0);
4565 rtx op2 = XEXP (XEXP (value, 0), 1);
4566 HOST_WIDE_INT offset;
4567
4568 if (ix86_cmodel == CM_LARGE)
4569 return 0;
4570 if (GET_CODE (op2) != CONST_INT)
4571 return 0;
4572 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4573 switch (GET_CODE (op1))
4574 {
4575 case SYMBOL_REF:
4576 /* For CM_SMALL assume that latest object is 16MB before
4577 end of 31bits boundary. We may also accept pretty
4578 large negative constants knowing that all objects are
4579 in the positive half of address space. */
4580 if (ix86_cmodel == CM_SMALL
4581 && offset < 16*1024*1024
4582 && trunc_int_for_mode (offset, SImode) == offset)
4583 return 1;
4584 /* For CM_KERNEL we know that all object resist in the
4585 negative half of 32bits address space. We may not
4586 accept negative offsets, since they may be just off
4587 and we may accept pretty large positive ones. */
4588 if (ix86_cmodel == CM_KERNEL
4589 && offset > 0
4590 && trunc_int_for_mode (offset, SImode) == offset)
4591 return 1;
4592 break;
4593 case LABEL_REF:
4594 /* These conditions are similar to SYMBOL_REF ones, just the
4595 constraints for code models differ. */
4596 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4597 && offset < 16*1024*1024
4598 && trunc_int_for_mode (offset, SImode) == offset)
4599 return 1;
4600 if (ix86_cmodel == CM_KERNEL
4601 && offset > 0
4602 && trunc_int_for_mode (offset, SImode) == offset)
4603 return 1;
4604 break;
4605 case UNSPEC:
4606 switch (XINT (op1, 1))
4607 {
4608 case UNSPEC_DTPOFF:
4609 case UNSPEC_NTPOFF:
4610 if (offset > 0
4611 && trunc_int_for_mode (offset, SImode) == offset)
4612 return 1;
4613 }
4614 break;
4615 default:
4616 return 0;
4617 }
4618 }
4619 return 0;
4620 default:
4621 return 0;
4622 }
4623 }
4624
4625 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4626 int
x86_64_zero_extended_value(rtx value)4627 x86_64_zero_extended_value (rtx value)
4628 {
4629 switch (GET_CODE (value))
4630 {
4631 case CONST_DOUBLE:
4632 if (HOST_BITS_PER_WIDE_INT == 32)
4633 return (GET_MODE (value) == VOIDmode
4634 && !CONST_DOUBLE_HIGH (value));
4635 else
4636 return 0;
4637 case CONST_INT:
4638 if (HOST_BITS_PER_WIDE_INT == 32)
4639 return INTVAL (value) >= 0;
4640 else
4641 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4642 break;
4643
4644 /* For certain code models, the symbolic references are known to fit. */
4645 case SYMBOL_REF:
4646 /* TLS symbols are not constant. */
4647 if (tls_symbolic_operand (value, Pmode))
4648 return false;
4649 return ix86_cmodel == CM_SMALL;
4650
4651 /* For certain code models, the code is near as well. */
4652 case LABEL_REF:
4653 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4654
4655 /* We also may accept the offsetted memory references in certain special
4656 cases. */
4657 case CONST:
4658 if (GET_CODE (XEXP (value, 0)) == PLUS)
4659 {
4660 rtx op1 = XEXP (XEXP (value, 0), 0);
4661 rtx op2 = XEXP (XEXP (value, 0), 1);
4662
4663 if (ix86_cmodel == CM_LARGE)
4664 return 0;
4665 switch (GET_CODE (op1))
4666 {
4667 case SYMBOL_REF:
4668 return 0;
4669 /* For small code model we may accept pretty large positive
4670 offsets, since one bit is available for free. Negative
4671 offsets are limited by the size of NULL pointer area
4672 specified by the ABI. */
4673 if (ix86_cmodel == CM_SMALL
4674 && GET_CODE (op2) == CONST_INT
4675 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4676 && (trunc_int_for_mode (INTVAL (op2), SImode)
4677 == INTVAL (op2)))
4678 return 1;
4679 /* ??? For the kernel, we may accept adjustment of
4680 -0x10000000, since we know that it will just convert
4681 negative address space to positive, but perhaps this
4682 is not worthwhile. */
4683 break;
4684 case LABEL_REF:
4685 /* These conditions are similar to SYMBOL_REF ones, just the
4686 constraints for code models differ. */
4687 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4688 && GET_CODE (op2) == CONST_INT
4689 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4690 && (trunc_int_for_mode (INTVAL (op2), SImode)
4691 == INTVAL (op2)))
4692 return 1;
4693 break;
4694 default:
4695 return 0;
4696 }
4697 }
4698 return 0;
4699 default:
4700 return 0;
4701 }
4702 }
4703
4704 /* Value should be nonzero if functions must have frame pointers.
4705 Zero means the frame pointer need not be set up (and parms may
4706 be accessed via the stack pointer) in functions that seem suitable. */
4707
4708 int
ix86_frame_pointer_required(void)4709 ix86_frame_pointer_required (void)
4710 {
4711 /* If we accessed previous frames, then the generated code expects
4712 to be able to access the saved ebp value in our frame. */
4713 if (cfun->machine->accesses_prev_frame)
4714 return 1;
4715
4716 /* Several x86 os'es need a frame pointer for other reasons,
4717 usually pertaining to setjmp. */
4718 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4719 return 1;
4720
4721 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4722 the frame pointer by default. Turn it back on now if we've not
4723 got a leaf function. */
4724 if (TARGET_OMIT_LEAF_FRAME_POINTER
4725 && (!current_function_is_leaf))
4726 return 1;
4727
4728 if (current_function_profile)
4729 return 1;
4730
4731 return 0;
4732 }
4733
4734 /* Record that the current function accesses previous call frames. */
4735
4736 void
ix86_setup_frame_addresses(void)4737 ix86_setup_frame_addresses (void)
4738 {
4739 cfun->machine->accesses_prev_frame = 1;
4740 }
4741
4742 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4743 # define USE_HIDDEN_LINKONCE 1
4744 #else
4745 # define USE_HIDDEN_LINKONCE 0
4746 #endif
4747
4748 static int pic_labels_used;
4749
4750 /* Fills in the label name that should be used for a pc thunk for
4751 the given register. */
4752
4753 static void
get_pc_thunk_name(char name[32],unsigned int regno)4754 get_pc_thunk_name (char name[32], unsigned int regno)
4755 {
4756 if (USE_HIDDEN_LINKONCE)
4757 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4758 else
4759 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4760 }
4761
4762
4763 /* This function generates code for -fpic that loads %ebx with
4764 the return address of the caller and then returns. */
4765
4766 void
ix86_file_end(void)4767 ix86_file_end (void)
4768 {
4769 rtx xops[2];
4770 int regno;
4771
4772 for (regno = 0; regno < 8; ++regno)
4773 {
4774 char name[32];
4775
4776 if (! ((pic_labels_used >> regno) & 1))
4777 continue;
4778
4779 get_pc_thunk_name (name, regno);
4780
4781 if (USE_HIDDEN_LINKONCE)
4782 {
4783 tree decl;
4784
4785 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4786 error_mark_node);
4787 TREE_PUBLIC (decl) = 1;
4788 TREE_STATIC (decl) = 1;
4789 DECL_ONE_ONLY (decl) = 1;
4790
4791 (*targetm.asm_out.unique_section) (decl, 0);
4792 named_section (decl, NULL, 0);
4793
4794 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4795 fputs ("\t.hidden\t", asm_out_file);
4796 assemble_name (asm_out_file, name);
4797 fputc ('\n', asm_out_file);
4798 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4799 }
4800 else
4801 {
4802 text_section ();
4803 ASM_OUTPUT_LABEL (asm_out_file, name);
4804 }
4805
4806 xops[0] = gen_rtx_REG (SImode, regno);
4807 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4808 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4809 output_asm_insn ("ret", xops);
4810 }
4811
4812 if (NEED_INDICATE_EXEC_STACK)
4813 file_end_indicate_exec_stack ();
4814 }
4815
4816 /* Emit code for the SET_GOT patterns. */
4817
4818 const char *
output_set_got(rtx dest)4819 output_set_got (rtx dest)
4820 {
4821 rtx xops[3];
4822
4823 xops[0] = dest;
4824 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4825
4826 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4827 {
4828 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4829
4830 if (!flag_pic)
4831 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4832 else
4833 output_asm_insn ("call\t%a2", xops);
4834
4835 #if TARGET_MACHO
4836 /* Output the "canonical" label name ("Lxx$pb") here too. This
4837 is what will be referred to by the Mach-O PIC subsystem. */
4838 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4839 #endif
4840 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4841 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4842
4843 if (flag_pic)
4844 output_asm_insn ("pop{l}\t%0", xops);
4845 }
4846 else
4847 {
4848 char name[32];
4849 get_pc_thunk_name (name, REGNO (dest));
4850 pic_labels_used |= 1 << REGNO (dest);
4851
4852 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4853 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4854 output_asm_insn ("call\t%X2", xops);
4855 }
4856
4857 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4858 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4859 else if (!TARGET_MACHO)
4860 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4861
4862 return "";
4863 }
4864
4865 /* Generate an "push" pattern for input ARG. */
4866
4867 static rtx
gen_push(rtx arg)4868 gen_push (rtx arg)
4869 {
4870 return gen_rtx_SET (VOIDmode,
4871 gen_rtx_MEM (Pmode,
4872 gen_rtx_PRE_DEC (Pmode,
4873 stack_pointer_rtx)),
4874 arg);
4875 }
4876
4877 /* Return >= 0 if there is an unused call-clobbered register available
4878 for the entire function. */
4879
4880 static unsigned int
ix86_select_alt_pic_regnum(void)4881 ix86_select_alt_pic_regnum (void)
4882 {
4883 if (current_function_is_leaf && !current_function_profile)
4884 {
4885 int i;
4886 for (i = 2; i >= 0; --i)
4887 if (!regs_ever_live[i])
4888 return i;
4889 }
4890
4891 return INVALID_REGNUM;
4892 }
4893
4894 /* Return 1 if we need to save REGNO. */
4895 static int
ix86_save_reg(unsigned int regno,int maybe_eh_return)4896 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4897 {
4898 if (pic_offset_table_rtx
4899 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4900 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4901 || current_function_profile
4902 || current_function_calls_eh_return
4903 || current_function_uses_const_pool))
4904 {
4905 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4906 return 0;
4907 return 1;
4908 }
4909
4910 if (current_function_calls_eh_return && maybe_eh_return)
4911 {
4912 unsigned i;
4913 for (i = 0; ; i++)
4914 {
4915 unsigned test = EH_RETURN_DATA_REGNO (i);
4916 if (test == INVALID_REGNUM)
4917 break;
4918 if (test == regno)
4919 return 1;
4920 }
4921 }
4922
4923 return (regs_ever_live[regno]
4924 && !call_used_regs[regno]
4925 && !fixed_regs[regno]
4926 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4927 }
4928
4929 /* Return number of registers to be saved on the stack. */
4930
4931 static int
ix86_nsaved_regs(void)4932 ix86_nsaved_regs (void)
4933 {
4934 int nregs = 0;
4935 int regno;
4936
4937 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4938 if (ix86_save_reg (regno, true))
4939 nregs++;
4940 return nregs;
4941 }
4942
4943 /* Return the offset between two registers, one to be eliminated, and the other
4944 its replacement, at the start of a routine. */
4945
4946 HOST_WIDE_INT
ix86_initial_elimination_offset(int from,int to)4947 ix86_initial_elimination_offset (int from, int to)
4948 {
4949 struct ix86_frame frame;
4950 ix86_compute_frame_layout (&frame);
4951
4952 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4953 return frame.hard_frame_pointer_offset;
4954 else if (from == FRAME_POINTER_REGNUM
4955 && to == HARD_FRAME_POINTER_REGNUM)
4956 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4957 else
4958 {
4959 if (to != STACK_POINTER_REGNUM)
4960 abort ();
4961 else if (from == ARG_POINTER_REGNUM)
4962 return frame.stack_pointer_offset;
4963 else if (from != FRAME_POINTER_REGNUM)
4964 abort ();
4965 else
4966 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4967 }
4968 }
4969
4970 /* Fill structure ix86_frame about frame of currently computed function. */
4971
4972 static void
ix86_compute_frame_layout(struct ix86_frame * frame)4973 ix86_compute_frame_layout (struct ix86_frame *frame)
4974 {
4975 HOST_WIDE_INT total_size;
4976 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4977 HOST_WIDE_INT offset;
4978 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4979 HOST_WIDE_INT size = get_frame_size ();
4980
4981 frame->nregs = ix86_nsaved_regs ();
4982 total_size = size;
4983
4984 /* During reload iteration the amount of registers saved can change.
4985 Recompute the value as needed. Do not recompute when amount of registers
4986 didn't change as reload does mutiple calls to the function and does not
4987 expect the decision to change within single iteration. */
4988 if (!optimize_size
4989 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4990 {
4991 int count = frame->nregs;
4992
4993 cfun->machine->use_fast_prologue_epilogue_nregs = count;
4994 /* The fast prologue uses move instead of push to save registers. This
4995 is significantly longer, but also executes faster as modern hardware
4996 can execute the moves in parallel, but can't do that for push/pop.
4997
4998 Be careful about choosing what prologue to emit: When function takes
4999 many instructions to execute we may use slow version as well as in
5000 case function is known to be outside hot spot (this is known with
5001 feedback only). Weight the size of function by number of registers
5002 to save as it is cheap to use one or two push instructions but very
5003 slow to use many of them. */
5004 if (count)
5005 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5006 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5007 || (flag_branch_probabilities
5008 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5009 cfun->machine->use_fast_prologue_epilogue = false;
5010 else
5011 cfun->machine->use_fast_prologue_epilogue
5012 = !expensive_function_p (count);
5013 }
5014 if (TARGET_PROLOGUE_USING_MOVE
5015 && cfun->machine->use_fast_prologue_epilogue)
5016 frame->save_regs_using_mov = true;
5017 else
5018 frame->save_regs_using_mov = false;
5019
5020
5021 /* Skip return address and saved base pointer. */
5022 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5023
5024 frame->hard_frame_pointer_offset = offset;
5025
5026 /* Do some sanity checking of stack_alignment_needed and
5027 preferred_alignment, since i386 port is the only using those features
5028 that may break easily. */
5029
5030 if (size && !stack_alignment_needed)
5031 abort ();
5032 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
5033 abort ();
5034 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5035 abort ();
5036 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5037 abort ();
5038
5039 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5040 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5041
5042 /* Register save area */
5043 offset += frame->nregs * UNITS_PER_WORD;
5044
5045 /* Va-arg area */
5046 if (ix86_save_varrargs_registers)
5047 {
5048 offset += X86_64_VARARGS_SIZE;
5049 frame->va_arg_size = X86_64_VARARGS_SIZE;
5050 }
5051 else
5052 frame->va_arg_size = 0;
5053
5054 /* Align start of frame for local function. */
5055 frame->padding1 = ((offset + stack_alignment_needed - 1)
5056 & -stack_alignment_needed) - offset;
5057
5058 offset += frame->padding1;
5059
5060 /* Frame pointer points here. */
5061 frame->frame_pointer_offset = offset;
5062
5063 offset += size;
5064
5065 /* Add outgoing arguments area. Can be skipped if we eliminated
5066 all the function calls as dead code.
5067 Skipping is however impossible when function calls alloca. Alloca
5068 expander assumes that last current_function_outgoing_args_size
5069 of stack frame are unused. */
5070 if (ACCUMULATE_OUTGOING_ARGS
5071 && (!current_function_is_leaf || current_function_calls_alloca))
5072 {
5073 offset += current_function_outgoing_args_size;
5074 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5075 }
5076 else
5077 frame->outgoing_arguments_size = 0;
5078
5079 /* Align stack boundary. Only needed if we're calling another function
5080 or using alloca. */
5081 if (!current_function_is_leaf || current_function_calls_alloca)
5082 frame->padding2 = ((offset + preferred_alignment - 1)
5083 & -preferred_alignment) - offset;
5084 else
5085 frame->padding2 = 0;
5086
5087 offset += frame->padding2;
5088
5089 /* We've reached end of stack frame. */
5090 frame->stack_pointer_offset = offset;
5091
5092 /* Size prologue needs to allocate. */
5093 frame->to_allocate =
5094 (size + frame->padding1 + frame->padding2
5095 + frame->outgoing_arguments_size + frame->va_arg_size);
5096
5097 if ((!frame->to_allocate && frame->nregs <= 1)
5098 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5099 frame->save_regs_using_mov = false;
5100
5101 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5102 && current_function_is_leaf)
5103 {
5104 frame->red_zone_size = frame->to_allocate;
5105 if (frame->save_regs_using_mov)
5106 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5107 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5108 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5109 }
5110 else
5111 frame->red_zone_size = 0;
5112 frame->to_allocate -= frame->red_zone_size;
5113 frame->stack_pointer_offset -= frame->red_zone_size;
5114 #if 0
5115 fprintf (stderr, "nregs: %i\n", frame->nregs);
5116 fprintf (stderr, "size: %i\n", size);
5117 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5118 fprintf (stderr, "padding1: %i\n", frame->padding1);
5119 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5120 fprintf (stderr, "padding2: %i\n", frame->padding2);
5121 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5122 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5123 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5124 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5125 frame->hard_frame_pointer_offset);
5126 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5127 #endif
5128 }
5129
5130 /* Emit code to save registers in the prologue. */
5131
5132 static void
ix86_emit_save_regs(void)5133 ix86_emit_save_regs (void)
5134 {
5135 int regno;
5136 rtx insn;
5137
5138 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5139 if (ix86_save_reg (regno, true))
5140 {
5141 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5142 RTX_FRAME_RELATED_P (insn) = 1;
5143 }
5144 }
5145
5146 /* Emit code to save registers using MOV insns. First register
5147 is restored from POINTER + OFFSET. */
5148 static void
ix86_emit_save_regs_using_mov(rtx pointer,HOST_WIDE_INT offset)5149 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5150 {
5151 int regno;
5152 rtx insn;
5153
5154 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5155 if (ix86_save_reg (regno, true))
5156 {
5157 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5158 Pmode, offset),
5159 gen_rtx_REG (Pmode, regno));
5160 RTX_FRAME_RELATED_P (insn) = 1;
5161 offset += UNITS_PER_WORD;
5162 }
5163 }
5164
5165 /* Expand prologue or epilogue stack adjustment.
5166 The pattern exist to put a dependency on all ebp-based memory accesses.
5167 STYLE should be negative if instructions should be marked as frame related,
5168 zero if %r11 register is live and cannot be freely used and positive
5169 otherwise. */
5170
5171 static void
pro_epilogue_adjust_stack(rtx dest,rtx src,rtx offset,int style)5172 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5173 {
5174 rtx insn;
5175
5176 if (! TARGET_64BIT)
5177 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5178 else if (x86_64_immediate_operand (offset, DImode))
5179 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5180 else
5181 {
5182 rtx r11;
5183 /* r11 is used by indirect sibcall return as well, set before the
5184 epilogue and used after the epilogue. ATM indirect sibcall
5185 shouldn't be used together with huge frame sizes in one
5186 function because of the frame_size check in sibcall.c. */
5187 if (style == 0)
5188 abort ();
5189 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5190 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5191 if (style < 0)
5192 RTX_FRAME_RELATED_P (insn) = 1;
5193 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5194 offset));
5195 }
5196 if (style < 0)
5197 RTX_FRAME_RELATED_P (insn) = 1;
5198 }
5199
5200 /* Expand the prologue into a bunch of separate insns. */
5201
5202 void
ix86_expand_prologue(void)5203 ix86_expand_prologue (void)
5204 {
5205 rtx insn;
5206 bool pic_reg_used;
5207 struct ix86_frame frame;
5208 HOST_WIDE_INT allocate;
5209
5210 ix86_compute_frame_layout (&frame);
5211
5212 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5213 slower on all targets. Also sdb doesn't like it. */
5214
5215 if (frame_pointer_needed)
5216 {
5217 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5218 RTX_FRAME_RELATED_P (insn) = 1;
5219
5220 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5221 RTX_FRAME_RELATED_P (insn) = 1;
5222 }
5223
5224 allocate = frame.to_allocate;
5225
5226 if (!frame.save_regs_using_mov)
5227 ix86_emit_save_regs ();
5228 else
5229 allocate += frame.nregs * UNITS_PER_WORD;
5230
5231 /* When using red zone we may start register saving before allocating
5232 the stack frame saving one cycle of the prologue. */
5233 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5234 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5235 : stack_pointer_rtx,
5236 -frame.nregs * UNITS_PER_WORD);
5237
5238 if (allocate == 0)
5239 ;
5240 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5241 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5242 GEN_INT (-allocate), -1);
5243 else
5244 {
5245 /* Only valid for Win32. */
5246 rtx eax = gen_rtx_REG (SImode, 0);
5247 bool eax_live = ix86_eax_live_at_start_p ();
5248
5249 if (TARGET_64BIT)
5250 abort ();
5251
5252 if (eax_live)
5253 {
5254 emit_insn (gen_push (eax));
5255 allocate -= 4;
5256 }
5257
5258 insn = emit_move_insn (eax, GEN_INT (allocate));
5259 RTX_FRAME_RELATED_P (insn) = 1;
5260
5261 insn = emit_insn (gen_allocate_stack_worker (eax));
5262 RTX_FRAME_RELATED_P (insn) = 1;
5263
5264 if (eax_live)
5265 {
5266 rtx t = plus_constant (stack_pointer_rtx, allocate);
5267 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5268 }
5269 }
5270
5271 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5272 {
5273 if (!frame_pointer_needed || !frame.to_allocate)
5274 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5275 else
5276 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5277 -frame.nregs * UNITS_PER_WORD);
5278 }
5279
5280 pic_reg_used = false;
5281 if (pic_offset_table_rtx
5282 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5283 || current_function_profile))
5284 {
5285 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5286
5287 if (alt_pic_reg_used != INVALID_REGNUM)
5288 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5289
5290 pic_reg_used = true;
5291 }
5292
5293 if (pic_reg_used)
5294 {
5295 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5296
5297 /* Even with accurate pre-reload life analysis, we can wind up
5298 deleting all references to the pic register after reload.
5299 Consider if cross-jumping unifies two sides of a branch
5300 controlled by a comparison vs the only read from a global.
5301 In which case, allow the set_got to be deleted, though we're
5302 too late to do anything about the ebx save in the prologue. */
5303 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5304 }
5305
5306 /* Prevent function calls from be scheduled before the call to mcount.
5307 In the pic_reg_used case, make sure that the got load isn't deleted. */
5308 if (current_function_profile)
5309 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5310 }
5311
5312 /* Emit code to restore saved registers using MOV insns. First register
5313 is restored from POINTER + OFFSET. */
5314 static void
ix86_emit_restore_regs_using_mov(rtx pointer,HOST_WIDE_INT offset,int maybe_eh_return)5315 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5316 int maybe_eh_return)
5317 {
5318 int regno;
5319 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5320
5321 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5322 if (ix86_save_reg (regno, maybe_eh_return))
5323 {
5324 /* Ensure that adjust_address won't be forced to produce pointer
5325 out of range allowed by x86-64 instruction set. */
5326 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5327 {
5328 rtx r11;
5329
5330 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5331 emit_move_insn (r11, GEN_INT (offset));
5332 emit_insn (gen_adddi3 (r11, r11, pointer));
5333 base_address = gen_rtx_MEM (Pmode, r11);
5334 offset = 0;
5335 }
5336 emit_move_insn (gen_rtx_REG (Pmode, regno),
5337 adjust_address (base_address, Pmode, offset));
5338 offset += UNITS_PER_WORD;
5339 }
5340 }
5341
5342 /* Restore function stack, frame, and registers. */
5343
5344 void
ix86_expand_epilogue(int style)5345 ix86_expand_epilogue (int style)
5346 {
5347 int regno;
5348 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5349 struct ix86_frame frame;
5350 HOST_WIDE_INT offset;
5351
5352 ix86_compute_frame_layout (&frame);
5353
5354 /* Calculate start of saved registers relative to ebp. Special care
5355 must be taken for the normal return case of a function using
5356 eh_return: the eax and edx registers are marked as saved, but not
5357 restored along this path. */
5358 offset = frame.nregs;
5359 if (current_function_calls_eh_return && style != 2)
5360 offset -= 2;
5361 offset *= -UNITS_PER_WORD;
5362
5363 /* If we're only restoring one register and sp is not valid then
5364 using a move instruction to restore the register since it's
5365 less work than reloading sp and popping the register.
5366
5367 The default code result in stack adjustment using add/lea instruction,
5368 while this code results in LEAVE instruction (or discrete equivalent),
5369 so it is profitable in some other cases as well. Especially when there
5370 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5371 and there is exactly one register to pop. This heuristic may need some
5372 tuning in future. */
5373 if ((!sp_valid && frame.nregs <= 1)
5374 || (TARGET_EPILOGUE_USING_MOVE
5375 && cfun->machine->use_fast_prologue_epilogue
5376 && (frame.nregs > 1 || frame.to_allocate))
5377 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5378 || (frame_pointer_needed && TARGET_USE_LEAVE
5379 && cfun->machine->use_fast_prologue_epilogue
5380 && frame.nregs == 1)
5381 || current_function_calls_eh_return)
5382 {
5383 /* Restore registers. We can use ebp or esp to address the memory
5384 locations. If both are available, default to ebp, since offsets
5385 are known to be small. Only exception is esp pointing directly to the
5386 end of block of saved registers, where we may simplify addressing
5387 mode. */
5388
5389 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5390 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5391 frame.to_allocate, style == 2);
5392 else
5393 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5394 offset, style == 2);
5395
5396 /* eh_return epilogues need %ecx added to the stack pointer. */
5397 if (style == 2)
5398 {
5399 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5400
5401 if (frame_pointer_needed)
5402 {
5403 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5404 tmp = plus_constant (tmp, UNITS_PER_WORD);
5405 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5406
5407 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5408 emit_move_insn (hard_frame_pointer_rtx, tmp);
5409
5410 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5411 const0_rtx, style);
5412 }
5413 else
5414 {
5415 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5416 tmp = plus_constant (tmp, (frame.to_allocate
5417 + frame.nregs * UNITS_PER_WORD));
5418 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5419 }
5420 }
5421 else if (!frame_pointer_needed)
5422 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5423 GEN_INT (frame.to_allocate
5424 + frame.nregs * UNITS_PER_WORD),
5425 style);
5426 /* If not an i386, mov & pop is faster than "leave". */
5427 else if (TARGET_USE_LEAVE || optimize_size
5428 || !cfun->machine->use_fast_prologue_epilogue)
5429 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5430 else
5431 {
5432 pro_epilogue_adjust_stack (stack_pointer_rtx,
5433 hard_frame_pointer_rtx,
5434 const0_rtx, style);
5435 if (TARGET_64BIT)
5436 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5437 else
5438 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5439 }
5440 }
5441 else
5442 {
5443 /* First step is to deallocate the stack frame so that we can
5444 pop the registers. */
5445 if (!sp_valid)
5446 {
5447 if (!frame_pointer_needed)
5448 abort ();
5449 pro_epilogue_adjust_stack (stack_pointer_rtx,
5450 hard_frame_pointer_rtx,
5451 GEN_INT (offset), style);
5452 }
5453 else if (frame.to_allocate)
5454 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5455 GEN_INT (frame.to_allocate), style);
5456
5457 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5458 if (ix86_save_reg (regno, false))
5459 {
5460 if (TARGET_64BIT)
5461 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5462 else
5463 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5464 }
5465 if (frame_pointer_needed)
5466 {
5467 /* Leave results in shorter dependency chains on CPUs that are
5468 able to grok it fast. */
5469 if (TARGET_USE_LEAVE)
5470 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5471 else if (TARGET_64BIT)
5472 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5473 else
5474 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5475 }
5476 }
5477
5478 /* Sibcall epilogues don't want a return instruction. */
5479 if (style == 0)
5480 return;
5481
5482 if (current_function_pops_args && current_function_args_size)
5483 {
5484 rtx popc = GEN_INT (current_function_pops_args);
5485
5486 /* i386 can only pop 64K bytes. If asked to pop more, pop
5487 return address, do explicit add, and jump indirectly to the
5488 caller. */
5489
5490 if (current_function_pops_args >= 65536)
5491 {
5492 rtx ecx = gen_rtx_REG (SImode, 2);
5493
5494 /* There is no "pascal" calling convention in 64bit ABI. */
5495 if (TARGET_64BIT)
5496 abort ();
5497
5498 emit_insn (gen_popsi1 (ecx));
5499 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5500 emit_jump_insn (gen_return_indirect_internal (ecx));
5501 }
5502 else
5503 emit_jump_insn (gen_return_pop_internal (popc));
5504 }
5505 else
5506 emit_jump_insn (gen_return_internal ());
5507 }
5508
5509 /* Reset from the function's potential modifications. */
5510
5511 static void
ix86_output_function_epilogue(FILE * file ATTRIBUTE_UNUSED,HOST_WIDE_INT size ATTRIBUTE_UNUSED)5512 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5513 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5514 {
5515 if (pic_offset_table_rtx)
5516 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5517 }
5518
5519 /* Extract the parts of an RTL expression that is a valid memory address
5520 for an instruction. Return 0 if the structure of the address is
5521 grossly off. Return -1 if the address contains ASHIFT, so it is not
5522 strictly valid, but still used for computing length of lea instruction. */
5523
5524 static int
ix86_decompose_address(rtx addr,struct ix86_address * out)5525 ix86_decompose_address (rtx addr, struct ix86_address *out)
5526 {
5527 rtx base = NULL_RTX;
5528 rtx index = NULL_RTX;
5529 rtx disp = NULL_RTX;
5530 HOST_WIDE_INT scale = 1;
5531 rtx scale_rtx = NULL_RTX;
5532 int retval = 1;
5533 enum ix86_address_seg seg = SEG_DEFAULT;
5534
5535 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5536 base = addr;
5537 else if (GET_CODE (addr) == PLUS)
5538 {
5539 rtx addends[4], op;
5540 int n = 0, i;
5541
5542 op = addr;
5543 do
5544 {
5545 if (n >= 4)
5546 return 0;
5547 addends[n++] = XEXP (op, 1);
5548 op = XEXP (op, 0);
5549 }
5550 while (GET_CODE (op) == PLUS);
5551 if (n >= 4)
5552 return 0;
5553 addends[n] = op;
5554
5555 for (i = n; i >= 0; --i)
5556 {
5557 op = addends[i];
5558 switch (GET_CODE (op))
5559 {
5560 case MULT:
5561 if (index)
5562 return 0;
5563 index = XEXP (op, 0);
5564 scale_rtx = XEXP (op, 1);
5565 break;
5566
5567 case UNSPEC:
5568 if (XINT (op, 1) == UNSPEC_TP
5569 && TARGET_TLS_DIRECT_SEG_REFS
5570 && seg == SEG_DEFAULT)
5571 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5572 else
5573 return 0;
5574 break;
5575
5576 case REG:
5577 case SUBREG:
5578 if (!base)
5579 base = op;
5580 else if (!index)
5581 index = op;
5582 else
5583 return 0;
5584 break;
5585
5586 case CONST:
5587 case CONST_INT:
5588 case SYMBOL_REF:
5589 case LABEL_REF:
5590 if (disp)
5591 return 0;
5592 disp = op;
5593 break;
5594
5595 default:
5596 return 0;
5597 }
5598 }
5599 }
5600 else if (GET_CODE (addr) == MULT)
5601 {
5602 index = XEXP (addr, 0); /* index*scale */
5603 scale_rtx = XEXP (addr, 1);
5604 }
5605 else if (GET_CODE (addr) == ASHIFT)
5606 {
5607 rtx tmp;
5608
5609 /* We're called for lea too, which implements ashift on occasion. */
5610 index = XEXP (addr, 0);
5611 tmp = XEXP (addr, 1);
5612 if (GET_CODE (tmp) != CONST_INT)
5613 return 0;
5614 scale = INTVAL (tmp);
5615 if ((unsigned HOST_WIDE_INT) scale > 3)
5616 return 0;
5617 scale = 1 << scale;
5618 retval = -1;
5619 }
5620 else
5621 disp = addr; /* displacement */
5622
5623 /* Extract the integral value of scale. */
5624 if (scale_rtx)
5625 {
5626 if (GET_CODE (scale_rtx) != CONST_INT)
5627 return 0;
5628 scale = INTVAL (scale_rtx);
5629 }
5630
5631 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5632 if (base && index && scale == 1
5633 && (index == arg_pointer_rtx
5634 || index == frame_pointer_rtx
5635 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
5636 {
5637 rtx tmp = base;
5638 base = index;
5639 index = tmp;
5640 }
5641
5642 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5643 if ((base == hard_frame_pointer_rtx
5644 || base == frame_pointer_rtx
5645 || base == arg_pointer_rtx) && !disp)
5646 disp = const0_rtx;
5647
5648 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5649 Avoid this by transforming to [%esi+0]. */
5650 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5651 && base && !index && !disp
5652 && REG_P (base)
5653 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5654 disp = const0_rtx;
5655
5656 /* Special case: encode reg+reg instead of reg*2. */
5657 if (!base && index && scale && scale == 2)
5658 base = index, scale = 1;
5659
5660 /* Special case: scaling cannot be encoded without base or displacement. */
5661 if (!base && !disp && index && scale != 1)
5662 disp = const0_rtx;
5663
5664 out->base = base;
5665 out->index = index;
5666 out->disp = disp;
5667 out->scale = scale;
5668 out->seg = seg;
5669
5670 return retval;
5671 }
5672
5673 /* Return cost of the memory address x.
5674 For i386, it is better to use a complex address than let gcc copy
5675 the address into a reg and make a new pseudo. But not if the address
5676 requires to two regs - that would mean more pseudos with longer
5677 lifetimes. */
5678 static int
ix86_address_cost(rtx x)5679 ix86_address_cost (rtx x)
5680 {
5681 struct ix86_address parts;
5682 int cost = 1;
5683
5684 if (!ix86_decompose_address (x, &parts))
5685 abort ();
5686
5687 /* More complex memory references are better. */
5688 if (parts.disp && parts.disp != const0_rtx)
5689 cost--;
5690 if (parts.seg != SEG_DEFAULT)
5691 cost--;
5692
5693 /* Attempt to minimize number of registers in the address. */
5694 if ((parts.base
5695 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5696 || (parts.index
5697 && (!REG_P (parts.index)
5698 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5699 cost++;
5700
5701 if (parts.base
5702 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5703 && parts.index
5704 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5705 && parts.base != parts.index)
5706 cost++;
5707
5708 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5709 since it's predecode logic can't detect the length of instructions
5710 and it degenerates to vector decoded. Increase cost of such
5711 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5712 to split such addresses or even refuse such addresses at all.
5713
5714 Following addressing modes are affected:
5715 [base+scale*index]
5716 [scale*index+disp]
5717 [base+index]
5718
5719 The first and last case may be avoidable by explicitly coding the zero in
5720 memory address, but I don't have AMD-K6 machine handy to check this
5721 theory. */
5722
5723 if (TARGET_K6
5724 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5725 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5726 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5727 cost += 10;
5728
5729 return cost;
5730 }
5731
5732 /* If X is a machine specific address (i.e. a symbol or label being
5733 referenced as a displacement from the GOT implemented using an
5734 UNSPEC), then return the base term. Otherwise return X. */
5735
5736 rtx
ix86_find_base_term(rtx x)5737 ix86_find_base_term (rtx x)
5738 {
5739 rtx term;
5740
5741 if (TARGET_64BIT)
5742 {
5743 if (GET_CODE (x) != CONST)
5744 return x;
5745 term = XEXP (x, 0);
5746 if (GET_CODE (term) == PLUS
5747 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5748 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5749 term = XEXP (term, 0);
5750 if (GET_CODE (term) != UNSPEC
5751 || XINT (term, 1) != UNSPEC_GOTPCREL)
5752 return x;
5753
5754 term = XVECEXP (term, 0, 0);
5755
5756 if (GET_CODE (term) != SYMBOL_REF
5757 && GET_CODE (term) != LABEL_REF)
5758 return x;
5759
5760 return term;
5761 }
5762
5763 term = ix86_delegitimize_address (x);
5764
5765 if (GET_CODE (term) != SYMBOL_REF
5766 && GET_CODE (term) != LABEL_REF)
5767 return x;
5768
5769 return term;
5770 }
5771
5772 /* Determine if a given RTX is a valid constant. We already know this
5773 satisfies CONSTANT_P. */
5774
5775 bool
legitimate_constant_p(rtx x)5776 legitimate_constant_p (rtx x)
5777 {
5778 switch (GET_CODE (x))
5779 {
5780 case CONST:
5781 x = XEXP (x, 0);
5782
5783 if (GET_CODE (x) == PLUS)
5784 {
5785 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
5786 return false;
5787 x = XEXP (x, 0);
5788 }
5789
5790 /* Only some unspecs are valid as "constants". */
5791 if (GET_CODE (x) == UNSPEC)
5792 switch (XINT (x, 1))
5793 {
5794 case UNSPEC_TPOFF:
5795 case UNSPEC_NTPOFF:
5796 return local_exec_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
5797 case UNSPEC_DTPOFF:
5798 return local_dynamic_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
5799 default:
5800 return false;
5801 }
5802
5803 /* We must have drilled down to a symbol. */
5804 if (!symbolic_operand (x, Pmode))
5805 return false;
5806 /* FALLTHRU */
5807
5808 case SYMBOL_REF:
5809 /* TLS symbols are never valid. */
5810 if (tls_symbolic_operand (x, Pmode))
5811 return false;
5812 break;
5813
5814 default:
5815 break;
5816 }
5817
5818 /* Otherwise we handle everything else in the move patterns. */
5819 return true;
5820 }
5821
5822 /* Determine if it's legal to put X into the constant pool. This
5823 is not possible for the address of thread-local symbols, which
5824 is checked above. */
5825
5826 static bool
ix86_cannot_force_const_mem(rtx x)5827 ix86_cannot_force_const_mem (rtx x)
5828 {
5829 return !legitimate_constant_p (x);
5830 }
5831
5832 /* Determine if a given RTX is a valid constant address. */
5833
5834 bool
constant_address_p(rtx x)5835 constant_address_p (rtx x)
5836 {
5837 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5838 }
5839
5840 /* Nonzero if the constant value X is a legitimate general operand
5841 when generating PIC code. It is given that flag_pic is on and
5842 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5843
5844 bool
legitimate_pic_operand_p(rtx x)5845 legitimate_pic_operand_p (rtx x)
5846 {
5847 rtx inner;
5848
5849 switch (GET_CODE (x))
5850 {
5851 case CONST:
5852 inner = XEXP (x, 0);
5853
5854 /* Only some unspecs are valid as "constants". */
5855 if (GET_CODE (inner) == UNSPEC)
5856 switch (XINT (inner, 1))
5857 {
5858 case UNSPEC_TPOFF:
5859 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5860 default:
5861 return false;
5862 }
5863 /* FALLTHRU */
5864
5865 case SYMBOL_REF:
5866 case LABEL_REF:
5867 return legitimate_pic_address_disp_p (x);
5868
5869 default:
5870 return true;
5871 }
5872 }
5873
5874 /* Determine if a given CONST RTX is a valid memory displacement
5875 in PIC mode. */
5876
5877 int
legitimate_pic_address_disp_p(rtx disp)5878 legitimate_pic_address_disp_p (rtx disp)
5879 {
5880 bool saw_plus;
5881
5882 /* In 64bit mode we can allow direct addresses of symbols and labels
5883 when they are not dynamic symbols. */
5884 if (TARGET_64BIT)
5885 {
5886 /* TLS references should always be enclosed in UNSPEC. */
5887 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5888 return 0;
5889 if (GET_CODE (disp) == SYMBOL_REF
5890 && ix86_cmodel == CM_SMALL_PIC
5891 && SYMBOL_REF_LOCAL_P (disp))
5892 return 1;
5893 if (GET_CODE (disp) == LABEL_REF)
5894 return 1;
5895 if (GET_CODE (disp) == CONST
5896 && GET_CODE (XEXP (disp, 0)) == PLUS)
5897 {
5898 rtx op0 = XEXP (XEXP (disp, 0), 0);
5899 rtx op1 = XEXP (XEXP (disp, 0), 1);
5900
5901 /* TLS references should always be enclosed in UNSPEC. */
5902 if (tls_symbolic_operand (op0, GET_MODE (op0)))
5903 return 0;
5904 if (((GET_CODE (op0) == SYMBOL_REF
5905 && ix86_cmodel == CM_SMALL_PIC
5906 && SYMBOL_REF_LOCAL_P (op0))
5907 || GET_CODE (op0) == LABEL_REF)
5908 && GET_CODE (op1) == CONST_INT
5909 && INTVAL (op1) < 16*1024*1024
5910 && INTVAL (op1) >= -16*1024*1024)
5911 return 1;
5912 }
5913 }
5914 if (GET_CODE (disp) != CONST)
5915 return 0;
5916 disp = XEXP (disp, 0);
5917
5918 if (TARGET_64BIT)
5919 {
5920 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5921 of GOT tables. We should not need these anyway. */
5922 if (GET_CODE (disp) != UNSPEC
5923 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5924 return 0;
5925
5926 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5927 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5928 return 0;
5929 return 1;
5930 }
5931
5932 saw_plus = false;
5933 if (GET_CODE (disp) == PLUS)
5934 {
5935 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5936 return 0;
5937 disp = XEXP (disp, 0);
5938 saw_plus = true;
5939 }
5940
5941 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5942 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5943 {
5944 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5945 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5946 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5947 {
5948 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5949 if (! strcmp (sym_name, "<pic base>"))
5950 return 1;
5951 }
5952 }
5953
5954 if (GET_CODE (disp) != UNSPEC)
5955 return 0;
5956
5957 switch (XINT (disp, 1))
5958 {
5959 case UNSPEC_GOT:
5960 if (saw_plus)
5961 return false;
5962 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5963 case UNSPEC_GOTOFF:
5964 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5965 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5966 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5967 return false;
5968 case UNSPEC_GOTTPOFF:
5969 case UNSPEC_GOTNTPOFF:
5970 case UNSPEC_INDNTPOFF:
5971 if (saw_plus)
5972 return false;
5973 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5974 case UNSPEC_NTPOFF:
5975 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5976 case UNSPEC_DTPOFF:
5977 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5978 }
5979
5980 return 0;
5981 }
5982
5983 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5984 memory address for an instruction. The MODE argument is the machine mode
5985 for the MEM expression that wants to use this address.
5986
5987 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5988 convert common non-canonical forms to canonical form so that they will
5989 be recognized. */
5990
5991 int
legitimate_address_p(enum machine_mode mode,rtx addr,int strict)5992 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
5993 {
5994 struct ix86_address parts;
5995 rtx base, index, disp;
5996 HOST_WIDE_INT scale;
5997 const char *reason = NULL;
5998 rtx reason_rtx = NULL_RTX;
5999
6000 if (TARGET_DEBUG_ADDR)
6001 {
6002 fprintf (stderr,
6003 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6004 GET_MODE_NAME (mode), strict);
6005 debug_rtx (addr);
6006 }
6007
6008 if (ix86_decompose_address (addr, &parts) <= 0)
6009 {
6010 reason = "decomposition failed";
6011 goto report_error;
6012 }
6013
6014 base = parts.base;
6015 index = parts.index;
6016 disp = parts.disp;
6017 scale = parts.scale;
6018
6019 /* Validate base register.
6020
6021 Don't allow SUBREG's here, it can lead to spill failures when the base
6022 is one word out of a two word structure, which is represented internally
6023 as a DImode int. */
6024
6025 if (base)
6026 {
6027 reason_rtx = base;
6028
6029 if (GET_CODE (base) != REG)
6030 {
6031 reason = "base is not a register";
6032 goto report_error;
6033 }
6034
6035 if (GET_MODE (base) != Pmode)
6036 {
6037 reason = "base is not in Pmode";
6038 goto report_error;
6039 }
6040
6041 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
6042 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
6043 {
6044 reason = "base is not valid";
6045 goto report_error;
6046 }
6047 }
6048
6049 /* Validate index register.
6050
6051 Don't allow SUBREG's here, it can lead to spill failures when the index
6052 is one word out of a two word structure, which is represented internally
6053 as a DImode int. */
6054
6055 if (index)
6056 {
6057 reason_rtx = index;
6058
6059 if (GET_CODE (index) != REG)
6060 {
6061 reason = "index is not a register";
6062 goto report_error;
6063 }
6064
6065 if (GET_MODE (index) != Pmode)
6066 {
6067 reason = "index is not in Pmode";
6068 goto report_error;
6069 }
6070
6071 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
6072 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
6073 {
6074 reason = "index is not valid";
6075 goto report_error;
6076 }
6077 }
6078
6079 /* Validate scale factor. */
6080 if (scale != 1)
6081 {
6082 reason_rtx = GEN_INT (scale);
6083 if (!index)
6084 {
6085 reason = "scale without index";
6086 goto report_error;
6087 }
6088
6089 if (scale != 2 && scale != 4 && scale != 8)
6090 {
6091 reason = "scale is not a valid multiplier";
6092 goto report_error;
6093 }
6094 }
6095
6096 /* Validate displacement. */
6097 if (disp)
6098 {
6099 reason_rtx = disp;
6100
6101 if (GET_CODE (disp) == CONST
6102 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6103 switch (XINT (XEXP (disp, 0), 1))
6104 {
6105 case UNSPEC_GOT:
6106 case UNSPEC_GOTOFF:
6107 case UNSPEC_GOTPCREL:
6108 if (!flag_pic)
6109 abort ();
6110 goto is_legitimate_pic;
6111
6112 case UNSPEC_GOTTPOFF:
6113 case UNSPEC_GOTNTPOFF:
6114 case UNSPEC_INDNTPOFF:
6115 case UNSPEC_NTPOFF:
6116 case UNSPEC_DTPOFF:
6117 break;
6118
6119 default:
6120 reason = "invalid address unspec";
6121 goto report_error;
6122 }
6123
6124 else if (flag_pic && (SYMBOLIC_CONST (disp)
6125 #if TARGET_MACHO
6126 && !machopic_operand_p (disp)
6127 #endif
6128 ))
6129 {
6130 is_legitimate_pic:
6131 if (TARGET_64BIT && (index || base))
6132 {
6133 /* foo@dtpoff(%rX) is ok. */
6134 if (GET_CODE (disp) != CONST
6135 || GET_CODE (XEXP (disp, 0)) != PLUS
6136 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6137 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6138 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6139 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6140 {
6141 reason = "non-constant pic memory reference";
6142 goto report_error;
6143 }
6144 }
6145 else if (! legitimate_pic_address_disp_p (disp))
6146 {
6147 reason = "displacement is an invalid pic construct";
6148 goto report_error;
6149 }
6150
6151 /* This code used to verify that a symbolic pic displacement
6152 includes the pic_offset_table_rtx register.
6153
6154 While this is good idea, unfortunately these constructs may
6155 be created by "adds using lea" optimization for incorrect
6156 code like:
6157
6158 int a;
6159 int foo(int i)
6160 {
6161 return *(&a+i);
6162 }
6163
6164 This code is nonsensical, but results in addressing
6165 GOT table with pic_offset_table_rtx base. We can't
6166 just refuse it easily, since it gets matched by
6167 "addsi3" pattern, that later gets split to lea in the
6168 case output register differs from input. While this
6169 can be handled by separate addsi pattern for this case
6170 that never results in lea, this seems to be easier and
6171 correct fix for crash to disable this test. */
6172 }
6173 else if (GET_CODE (disp) != LABEL_REF
6174 && GET_CODE (disp) != CONST_INT
6175 && (GET_CODE (disp) != CONST
6176 || !legitimate_constant_p (disp))
6177 && (GET_CODE (disp) != SYMBOL_REF
6178 || !legitimate_constant_p (disp)))
6179 {
6180 reason = "displacement is not constant";
6181 goto report_error;
6182 }
6183 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
6184 {
6185 reason = "displacement is out of range";
6186 goto report_error;
6187 }
6188 }
6189
6190 /* Everything looks valid. */
6191 if (TARGET_DEBUG_ADDR)
6192 fprintf (stderr, "Success.\n");
6193 return TRUE;
6194
6195 report_error:
6196 if (TARGET_DEBUG_ADDR)
6197 {
6198 fprintf (stderr, "Error: %s\n", reason);
6199 debug_rtx (reason_rtx);
6200 }
6201 return FALSE;
6202 }
6203
6204 /* Return an unique alias set for the GOT. */
6205
6206 static HOST_WIDE_INT
ix86_GOT_alias_set(void)6207 ix86_GOT_alias_set (void)
6208 {
6209 static HOST_WIDE_INT set = -1;
6210 if (set == -1)
6211 set = new_alias_set ();
6212 return set;
6213 }
6214
6215 /* Return a legitimate reference for ORIG (an address) using the
6216 register REG. If REG is 0, a new pseudo is generated.
6217
6218 There are two types of references that must be handled:
6219
6220 1. Global data references must load the address from the GOT, via
6221 the PIC reg. An insn is emitted to do this load, and the reg is
6222 returned.
6223
6224 2. Static data references, constant pool addresses, and code labels
6225 compute the address as an offset from the GOT, whose base is in
6226 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6227 differentiate them from global data objects. The returned
6228 address is the PIC reg + an unspec constant.
6229
6230 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6231 reg also appears in the address. */
6232
6233 rtx
legitimize_pic_address(rtx orig,rtx reg)6234 legitimize_pic_address (rtx orig, rtx reg)
6235 {
6236 rtx addr = orig;
6237 rtx new = orig;
6238 rtx base;
6239
6240 #if TARGET_MACHO
6241 if (reg == 0)
6242 reg = gen_reg_rtx (Pmode);
6243 /* Use the generic Mach-O PIC machinery. */
6244 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6245 #endif
6246
6247 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6248 new = addr;
6249 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6250 {
6251 /* This symbol may be referenced via a displacement from the PIC
6252 base address (@GOTOFF). */
6253
6254 if (reload_in_progress)
6255 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6256 if (GET_CODE (addr) == CONST)
6257 addr = XEXP (addr, 0);
6258 if (GET_CODE (addr) == PLUS)
6259 {
6260 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6261 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6262 }
6263 else
6264 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6265 new = gen_rtx_CONST (Pmode, new);
6266 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6267
6268 if (reg != 0)
6269 {
6270 emit_move_insn (reg, new);
6271 new = reg;
6272 }
6273 }
6274 else if (GET_CODE (addr) == SYMBOL_REF)
6275 {
6276 if (TARGET_64BIT)
6277 {
6278 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6279 new = gen_rtx_CONST (Pmode, new);
6280 new = gen_rtx_MEM (Pmode, new);
6281 RTX_UNCHANGING_P (new) = 1;
6282 set_mem_alias_set (new, ix86_GOT_alias_set ());
6283
6284 if (reg == 0)
6285 reg = gen_reg_rtx (Pmode);
6286 /* Use directly gen_movsi, otherwise the address is loaded
6287 into register for CSE. We don't want to CSE this addresses,
6288 instead we CSE addresses from the GOT table, so skip this. */
6289 emit_insn (gen_movsi (reg, new));
6290 new = reg;
6291 }
6292 else
6293 {
6294 /* This symbol must be referenced via a load from the
6295 Global Offset Table (@GOT). */
6296
6297 if (reload_in_progress)
6298 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6299 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6300 new = gen_rtx_CONST (Pmode, new);
6301 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6302 new = gen_rtx_MEM (Pmode, new);
6303 RTX_UNCHANGING_P (new) = 1;
6304 set_mem_alias_set (new, ix86_GOT_alias_set ());
6305
6306 if (reg == 0)
6307 reg = gen_reg_rtx (Pmode);
6308 emit_move_insn (reg, new);
6309 new = reg;
6310 }
6311 }
6312 else
6313 {
6314 if (GET_CODE (addr) == CONST)
6315 {
6316 addr = XEXP (addr, 0);
6317
6318 /* We must match stuff we generate before. Assume the only
6319 unspecs that can get here are ours. Not that we could do
6320 anything with them anyway.... */
6321 if (GET_CODE (addr) == UNSPEC
6322 || (GET_CODE (addr) == PLUS
6323 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6324 return orig;
6325 if (GET_CODE (addr) != PLUS)
6326 abort ();
6327 }
6328 if (GET_CODE (addr) == PLUS)
6329 {
6330 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6331
6332 /* Check first to see if this is a constant offset from a @GOTOFF
6333 symbol reference. */
6334 if (local_symbolic_operand (op0, Pmode)
6335 && GET_CODE (op1) == CONST_INT)
6336 {
6337 if (!TARGET_64BIT)
6338 {
6339 if (reload_in_progress)
6340 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6341 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6342 UNSPEC_GOTOFF);
6343 new = gen_rtx_PLUS (Pmode, new, op1);
6344 new = gen_rtx_CONST (Pmode, new);
6345 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6346
6347 if (reg != 0)
6348 {
6349 emit_move_insn (reg, new);
6350 new = reg;
6351 }
6352 }
6353 else
6354 {
6355 if (INTVAL (op1) < -16*1024*1024
6356 || INTVAL (op1) >= 16*1024*1024)
6357 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
6358 }
6359 }
6360 else
6361 {
6362 base = legitimize_pic_address (XEXP (addr, 0), reg);
6363 new = legitimize_pic_address (XEXP (addr, 1),
6364 base == reg ? NULL_RTX : reg);
6365
6366 if (GET_CODE (new) == CONST_INT)
6367 new = plus_constant (base, INTVAL (new));
6368 else
6369 {
6370 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6371 {
6372 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6373 new = XEXP (new, 1);
6374 }
6375 new = gen_rtx_PLUS (Pmode, base, new);
6376 }
6377 }
6378 }
6379 }
6380 return new;
6381 }
6382
6383 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6384
6385 static rtx
get_thread_pointer(int to_reg)6386 get_thread_pointer (int to_reg)
6387 {
6388 rtx tp, reg, insn;
6389
6390 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6391 if (!to_reg)
6392 return tp;
6393
6394 reg = gen_reg_rtx (Pmode);
6395 insn = gen_rtx_SET (VOIDmode, reg, tp);
6396 insn = emit_insn (insn);
6397
6398 return reg;
6399 }
6400
6401 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6402 false if we expect this to be used for a memory address and true if
6403 we expect to load the address into a register. */
6404
6405 static rtx
legitimize_tls_address(rtx x,enum tls_model model,int for_mov)6406 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6407 {
6408 rtx dest, base, off, pic;
6409 int type;
6410
6411 switch (model)
6412 {
6413 case TLS_MODEL_GLOBAL_DYNAMIC:
6414 dest = gen_reg_rtx (Pmode);
6415 if (TARGET_64BIT)
6416 {
6417 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6418
6419 start_sequence ();
6420 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6421 insns = get_insns ();
6422 end_sequence ();
6423
6424 emit_libcall_block (insns, dest, rax, x);
6425 }
6426 else
6427 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6428 break;
6429
6430 case TLS_MODEL_LOCAL_DYNAMIC:
6431 base = gen_reg_rtx (Pmode);
6432 if (TARGET_64BIT)
6433 {
6434 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6435
6436 start_sequence ();
6437 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6438 insns = get_insns ();
6439 end_sequence ();
6440
6441 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6442 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6443 emit_libcall_block (insns, base, rax, note);
6444 }
6445 else
6446 emit_insn (gen_tls_local_dynamic_base_32 (base));
6447
6448 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6449 off = gen_rtx_CONST (Pmode, off);
6450
6451 return gen_rtx_PLUS (Pmode, base, off);
6452
6453 case TLS_MODEL_INITIAL_EXEC:
6454 if (TARGET_64BIT)
6455 {
6456 pic = NULL;
6457 type = UNSPEC_GOTNTPOFF;
6458 }
6459 else if (flag_pic)
6460 {
6461 if (reload_in_progress)
6462 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6463 pic = pic_offset_table_rtx;
6464 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6465 }
6466 else if (!TARGET_GNU_TLS)
6467 {
6468 pic = gen_reg_rtx (Pmode);
6469 emit_insn (gen_set_got (pic));
6470 type = UNSPEC_GOTTPOFF;
6471 }
6472 else
6473 {
6474 pic = NULL;
6475 type = UNSPEC_INDNTPOFF;
6476 }
6477
6478 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6479 off = gen_rtx_CONST (Pmode, off);
6480 if (pic)
6481 off = gen_rtx_PLUS (Pmode, pic, off);
6482 off = gen_rtx_MEM (Pmode, off);
6483 RTX_UNCHANGING_P (off) = 1;
6484 set_mem_alias_set (off, ix86_GOT_alias_set ());
6485
6486 if (TARGET_64BIT || TARGET_GNU_TLS)
6487 {
6488 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6489 off = force_reg (Pmode, off);
6490 return gen_rtx_PLUS (Pmode, base, off);
6491 }
6492 else
6493 {
6494 base = get_thread_pointer (true);
6495 dest = gen_reg_rtx (Pmode);
6496 emit_insn (gen_subsi3 (dest, base, off));
6497 }
6498 break;
6499
6500 case TLS_MODEL_LOCAL_EXEC:
6501 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6502 (TARGET_64BIT || TARGET_GNU_TLS)
6503 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6504 off = gen_rtx_CONST (Pmode, off);
6505
6506 if (TARGET_64BIT || TARGET_GNU_TLS)
6507 {
6508 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6509 return gen_rtx_PLUS (Pmode, base, off);
6510 }
6511 else
6512 {
6513 base = get_thread_pointer (true);
6514 dest = gen_reg_rtx (Pmode);
6515 emit_insn (gen_subsi3 (dest, base, off));
6516 }
6517 break;
6518
6519 default:
6520 abort ();
6521 }
6522
6523 return dest;
6524 }
6525
6526 /* Try machine-dependent ways of modifying an illegitimate address
6527 to be legitimate. If we find one, return the new, valid address.
6528 This macro is used in only one place: `memory_address' in explow.c.
6529
6530 OLDX is the address as it was before break_out_memory_refs was called.
6531 In some cases it is useful to look at this to decide what needs to be done.
6532
6533 MODE and WIN are passed so that this macro can use
6534 GO_IF_LEGITIMATE_ADDRESS.
6535
6536 It is always safe for this macro to do nothing. It exists to recognize
6537 opportunities to optimize the output.
6538
6539 For the 80386, we handle X+REG by loading X into a register R and
6540 using R+REG. R will go in a general reg and indexing will be used.
6541 However, if REG is a broken-out memory address or multiplication,
6542 nothing needs to be done because REG can certainly go in a general reg.
6543
6544 When -fpic is used, special handling is needed for symbolic references.
6545 See comments by legitimize_pic_address in i386.c for details. */
6546
6547 rtx
legitimize_address(rtx x,rtx oldx ATTRIBUTE_UNUSED,enum machine_mode mode)6548 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
6549 {
6550 int changed = 0;
6551 unsigned log;
6552
6553 if (TARGET_DEBUG_ADDR)
6554 {
6555 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6556 GET_MODE_NAME (mode));
6557 debug_rtx (x);
6558 }
6559
6560 log = tls_symbolic_operand (x, mode);
6561 if (log)
6562 return legitimize_tls_address (x, log, false);
6563
6564 if (flag_pic && SYMBOLIC_CONST (x))
6565 return legitimize_pic_address (x, 0);
6566
6567 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6568 if (GET_CODE (x) == ASHIFT
6569 && GET_CODE (XEXP (x, 1)) == CONST_INT
6570 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6571 {
6572 changed = 1;
6573 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6574 GEN_INT (1 << log));
6575 }
6576
6577 if (GET_CODE (x) == PLUS)
6578 {
6579 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6580
6581 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6582 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6583 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6584 {
6585 changed = 1;
6586 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6587 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6588 GEN_INT (1 << log));
6589 }
6590
6591 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6592 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6593 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6594 {
6595 changed = 1;
6596 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6597 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6598 GEN_INT (1 << log));
6599 }
6600
6601 /* Put multiply first if it isn't already. */
6602 if (GET_CODE (XEXP (x, 1)) == MULT)
6603 {
6604 rtx tmp = XEXP (x, 0);
6605 XEXP (x, 0) = XEXP (x, 1);
6606 XEXP (x, 1) = tmp;
6607 changed = 1;
6608 }
6609
6610 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6611 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6612 created by virtual register instantiation, register elimination, and
6613 similar optimizations. */
6614 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6615 {
6616 changed = 1;
6617 x = gen_rtx_PLUS (Pmode,
6618 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6619 XEXP (XEXP (x, 1), 0)),
6620 XEXP (XEXP (x, 1), 1));
6621 }
6622
6623 /* Canonicalize
6624 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6625 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6626 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6627 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6628 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6629 && CONSTANT_P (XEXP (x, 1)))
6630 {
6631 rtx constant;
6632 rtx other = NULL_RTX;
6633
6634 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6635 {
6636 constant = XEXP (x, 1);
6637 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6638 }
6639 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6640 {
6641 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6642 other = XEXP (x, 1);
6643 }
6644 else
6645 constant = 0;
6646
6647 if (constant)
6648 {
6649 changed = 1;
6650 x = gen_rtx_PLUS (Pmode,
6651 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6652 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6653 plus_constant (other, INTVAL (constant)));
6654 }
6655 }
6656
6657 if (changed && legitimate_address_p (mode, x, FALSE))
6658 return x;
6659
6660 if (GET_CODE (XEXP (x, 0)) == MULT)
6661 {
6662 changed = 1;
6663 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6664 }
6665
6666 if (GET_CODE (XEXP (x, 1)) == MULT)
6667 {
6668 changed = 1;
6669 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6670 }
6671
6672 if (changed
6673 && GET_CODE (XEXP (x, 1)) == REG
6674 && GET_CODE (XEXP (x, 0)) == REG)
6675 return x;
6676
6677 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6678 {
6679 changed = 1;
6680 x = legitimize_pic_address (x, 0);
6681 }
6682
6683 if (changed && legitimate_address_p (mode, x, FALSE))
6684 return x;
6685
6686 if (GET_CODE (XEXP (x, 0)) == REG)
6687 {
6688 rtx temp = gen_reg_rtx (Pmode);
6689 rtx val = force_operand (XEXP (x, 1), temp);
6690 if (val != temp)
6691 emit_move_insn (temp, val);
6692
6693 XEXP (x, 1) = temp;
6694 return x;
6695 }
6696
6697 else if (GET_CODE (XEXP (x, 1)) == REG)
6698 {
6699 rtx temp = gen_reg_rtx (Pmode);
6700 rtx val = force_operand (XEXP (x, 0), temp);
6701 if (val != temp)
6702 emit_move_insn (temp, val);
6703
6704 XEXP (x, 0) = temp;
6705 return x;
6706 }
6707 }
6708
6709 return x;
6710 }
6711
6712 /* Print an integer constant expression in assembler syntax. Addition
6713 and subtraction are the only arithmetic that may appear in these
6714 expressions. FILE is the stdio stream to write to, X is the rtx, and
6715 CODE is the operand print code from the output string. */
6716
6717 static void
output_pic_addr_const(FILE * file,rtx x,int code)6718 output_pic_addr_const (FILE *file, rtx x, int code)
6719 {
6720 char buf[256];
6721
6722 switch (GET_CODE (x))
6723 {
6724 case PC:
6725 if (flag_pic)
6726 putc ('.', file);
6727 else
6728 abort ();
6729 break;
6730
6731 case SYMBOL_REF:
6732 assemble_name (file, XSTR (x, 0));
6733 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6734 fputs ("@PLT", file);
6735 break;
6736
6737 case LABEL_REF:
6738 x = XEXP (x, 0);
6739 /* FALLTHRU */
6740 case CODE_LABEL:
6741 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6742 assemble_name (asm_out_file, buf);
6743 break;
6744
6745 case CONST_INT:
6746 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6747 break;
6748
6749 case CONST:
6750 /* This used to output parentheses around the expression,
6751 but that does not work on the 386 (either ATT or BSD assembler). */
6752 output_pic_addr_const (file, XEXP (x, 0), code);
6753 break;
6754
6755 case CONST_DOUBLE:
6756 if (GET_MODE (x) == VOIDmode)
6757 {
6758 /* We can use %d if the number is <32 bits and positive. */
6759 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6760 fprintf (file, "0x%lx%08lx",
6761 (unsigned long) CONST_DOUBLE_HIGH (x),
6762 (unsigned long) CONST_DOUBLE_LOW (x));
6763 else
6764 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6765 }
6766 else
6767 /* We can't handle floating point constants;
6768 PRINT_OPERAND must handle them. */
6769 output_operand_lossage ("floating constant misused");
6770 break;
6771
6772 case PLUS:
6773 /* Some assemblers need integer constants to appear first. */
6774 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6775 {
6776 output_pic_addr_const (file, XEXP (x, 0), code);
6777 putc ('+', file);
6778 output_pic_addr_const (file, XEXP (x, 1), code);
6779 }
6780 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6781 {
6782 output_pic_addr_const (file, XEXP (x, 1), code);
6783 putc ('+', file);
6784 output_pic_addr_const (file, XEXP (x, 0), code);
6785 }
6786 else
6787 abort ();
6788 break;
6789
6790 case MINUS:
6791 if (!TARGET_MACHO)
6792 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6793 output_pic_addr_const (file, XEXP (x, 0), code);
6794 putc ('-', file);
6795 output_pic_addr_const (file, XEXP (x, 1), code);
6796 if (!TARGET_MACHO)
6797 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6798 break;
6799
6800 case UNSPEC:
6801 if (XVECLEN (x, 0) != 1)
6802 abort ();
6803 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6804 switch (XINT (x, 1))
6805 {
6806 case UNSPEC_GOT:
6807 fputs ("@GOT", file);
6808 break;
6809 case UNSPEC_GOTOFF:
6810 fputs ("@GOTOFF", file);
6811 break;
6812 case UNSPEC_GOTPCREL:
6813 fputs ("@GOTPCREL(%rip)", file);
6814 break;
6815 case UNSPEC_GOTTPOFF:
6816 /* FIXME: This might be @TPOFF in Sun ld too. */
6817 fputs ("@GOTTPOFF", file);
6818 break;
6819 case UNSPEC_TPOFF:
6820 fputs ("@TPOFF", file);
6821 break;
6822 case UNSPEC_NTPOFF:
6823 if (TARGET_64BIT)
6824 fputs ("@TPOFF", file);
6825 else
6826 fputs ("@NTPOFF", file);
6827 break;
6828 case UNSPEC_DTPOFF:
6829 fputs ("@DTPOFF", file);
6830 break;
6831 case UNSPEC_GOTNTPOFF:
6832 if (TARGET_64BIT)
6833 fputs ("@GOTTPOFF(%rip)", file);
6834 else
6835 fputs ("@GOTNTPOFF", file);
6836 break;
6837 case UNSPEC_INDNTPOFF:
6838 fputs ("@INDNTPOFF", file);
6839 break;
6840 default:
6841 output_operand_lossage ("invalid UNSPEC as operand");
6842 break;
6843 }
6844 break;
6845
6846 default:
6847 output_operand_lossage ("invalid expression as operand");
6848 }
6849 }
6850
6851 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6852 We need to handle our special PIC relocations. */
6853
6854 void
i386_dwarf_output_addr_const(FILE * file,rtx x)6855 i386_dwarf_output_addr_const (FILE *file, rtx x)
6856 {
6857 #ifdef ASM_QUAD
6858 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6859 #else
6860 if (TARGET_64BIT)
6861 abort ();
6862 fprintf (file, "%s", ASM_LONG);
6863 #endif
6864 if (flag_pic)
6865 output_pic_addr_const (file, x, '\0');
6866 else
6867 output_addr_const (file, x);
6868 fputc ('\n', file);
6869 }
6870
6871 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6872 We need to emit DTP-relative relocations. */
6873
6874 void
i386_output_dwarf_dtprel(FILE * file,int size,rtx x)6875 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
6876 {
6877 fputs (ASM_LONG, file);
6878 output_addr_const (file, x);
6879 fputs ("@DTPOFF", file);
6880 switch (size)
6881 {
6882 case 4:
6883 break;
6884 case 8:
6885 fputs (", 0", file);
6886 break;
6887 default:
6888 abort ();
6889 }
6890 }
6891
6892 /* In the name of slightly smaller debug output, and to cater to
6893 general assembler losage, recognize PIC+GOTOFF and turn it back
6894 into a direct symbol reference. */
6895
6896 static rtx
ix86_delegitimize_address(rtx orig_x)6897 ix86_delegitimize_address (rtx orig_x)
6898 {
6899 rtx x = orig_x, y;
6900
6901 if (GET_CODE (x) == MEM)
6902 x = XEXP (x, 0);
6903
6904 if (TARGET_64BIT)
6905 {
6906 if (GET_CODE (x) != CONST
6907 || GET_CODE (XEXP (x, 0)) != UNSPEC
6908 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6909 || GET_CODE (orig_x) != MEM)
6910 return orig_x;
6911 return XVECEXP (XEXP (x, 0), 0, 0);
6912 }
6913
6914 if (GET_CODE (x) != PLUS
6915 || GET_CODE (XEXP (x, 1)) != CONST)
6916 return orig_x;
6917
6918 if (GET_CODE (XEXP (x, 0)) == REG
6919 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6920 /* %ebx + GOT/GOTOFF */
6921 y = NULL;
6922 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6923 {
6924 /* %ebx + %reg * scale + GOT/GOTOFF */
6925 y = XEXP (x, 0);
6926 if (GET_CODE (XEXP (y, 0)) == REG
6927 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6928 y = XEXP (y, 1);
6929 else if (GET_CODE (XEXP (y, 1)) == REG
6930 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6931 y = XEXP (y, 0);
6932 else
6933 return orig_x;
6934 if (GET_CODE (y) != REG
6935 && GET_CODE (y) != MULT
6936 && GET_CODE (y) != ASHIFT)
6937 return orig_x;
6938 }
6939 else
6940 return orig_x;
6941
6942 x = XEXP (XEXP (x, 1), 0);
6943 if (GET_CODE (x) == UNSPEC
6944 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6945 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6946 {
6947 if (y)
6948 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6949 return XVECEXP (x, 0, 0);
6950 }
6951
6952 if (GET_CODE (x) == PLUS
6953 && GET_CODE (XEXP (x, 0)) == UNSPEC
6954 && GET_CODE (XEXP (x, 1)) == CONST_INT
6955 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6956 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6957 && GET_CODE (orig_x) != MEM)))
6958 {
6959 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6960 if (y)
6961 return gen_rtx_PLUS (Pmode, y, x);
6962 return x;
6963 }
6964
6965 return orig_x;
6966 }
6967
6968 static void
put_condition_code(enum rtx_code code,enum machine_mode mode,int reverse,int fp,FILE * file)6969 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6970 int fp, FILE *file)
6971 {
6972 const char *suffix;
6973
6974 if (mode == CCFPmode || mode == CCFPUmode)
6975 {
6976 enum rtx_code second_code, bypass_code;
6977 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6978 if (bypass_code != NIL || second_code != NIL)
6979 abort ();
6980 code = ix86_fp_compare_code_to_integer (code);
6981 mode = CCmode;
6982 }
6983 if (reverse)
6984 code = reverse_condition (code);
6985
6986 switch (code)
6987 {
6988 case EQ:
6989 suffix = "e";
6990 break;
6991 case NE:
6992 suffix = "ne";
6993 break;
6994 case GT:
6995 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6996 abort ();
6997 suffix = "g";
6998 break;
6999 case GTU:
7000 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
7001 Those same assemblers have the same but opposite losage on cmov. */
7002 if (mode != CCmode)
7003 abort ();
7004 suffix = fp ? "nbe" : "a";
7005 break;
7006 case LT:
7007 if (mode == CCNOmode || mode == CCGOCmode)
7008 suffix = "s";
7009 else if (mode == CCmode || mode == CCGCmode)
7010 suffix = "l";
7011 else
7012 abort ();
7013 break;
7014 case LTU:
7015 if (mode != CCmode)
7016 abort ();
7017 suffix = "b";
7018 break;
7019 case GE:
7020 if (mode == CCNOmode || mode == CCGOCmode)
7021 suffix = "ns";
7022 else if (mode == CCmode || mode == CCGCmode)
7023 suffix = "ge";
7024 else
7025 abort ();
7026 break;
7027 case GEU:
7028 /* ??? As above. */
7029 if (mode != CCmode)
7030 abort ();
7031 suffix = fp ? "nb" : "ae";
7032 break;
7033 case LE:
7034 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
7035 abort ();
7036 suffix = "le";
7037 break;
7038 case LEU:
7039 if (mode != CCmode)
7040 abort ();
7041 suffix = "be";
7042 break;
7043 case UNORDERED:
7044 suffix = fp ? "u" : "p";
7045 break;
7046 case ORDERED:
7047 suffix = fp ? "nu" : "np";
7048 break;
7049 default:
7050 abort ();
7051 }
7052 fputs (suffix, file);
7053 }
7054
7055 /* Print the name of register X to FILE based on its machine mode and number.
7056 If CODE is 'w', pretend the mode is HImode.
7057 If CODE is 'b', pretend the mode is QImode.
7058 If CODE is 'k', pretend the mode is SImode.
7059 If CODE is 'q', pretend the mode is DImode.
7060 If CODE is 'h', pretend the reg is the `high' byte register.
7061 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7062
7063 void
print_reg(rtx x,int code,FILE * file)7064 print_reg (rtx x, int code, FILE *file)
7065 {
7066 if (REGNO (x) == ARG_POINTER_REGNUM
7067 || REGNO (x) == FRAME_POINTER_REGNUM
7068 || REGNO (x) == FLAGS_REG
7069 || REGNO (x) == FPSR_REG)
7070 abort ();
7071
7072 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7073 putc ('%', file);
7074
7075 if (code == 'w' || MMX_REG_P (x))
7076 code = 2;
7077 else if (code == 'b')
7078 code = 1;
7079 else if (code == 'k')
7080 code = 4;
7081 else if (code == 'q')
7082 code = 8;
7083 else if (code == 'y')
7084 code = 3;
7085 else if (code == 'h')
7086 code = 0;
7087 else
7088 code = GET_MODE_SIZE (GET_MODE (x));
7089
7090 /* Irritatingly, AMD extended registers use different naming convention
7091 from the normal registers. */
7092 if (REX_INT_REG_P (x))
7093 {
7094 if (!TARGET_64BIT)
7095 abort ();
7096 switch (code)
7097 {
7098 case 0:
7099 error ("extended registers have no high halves");
7100 break;
7101 case 1:
7102 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7103 break;
7104 case 2:
7105 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7106 break;
7107 case 4:
7108 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7109 break;
7110 case 8:
7111 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7112 break;
7113 default:
7114 error ("unsupported operand size for extended register");
7115 break;
7116 }
7117 return;
7118 }
7119 switch (code)
7120 {
7121 case 3:
7122 if (STACK_TOP_P (x))
7123 {
7124 fputs ("st(0)", file);
7125 break;
7126 }
7127 /* FALLTHRU */
7128 case 8:
7129 case 4:
7130 case 12:
7131 if (! ANY_FP_REG_P (x))
7132 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7133 /* FALLTHRU */
7134 case 16:
7135 case 2:
7136 normal:
7137 fputs (hi_reg_name[REGNO (x)], file);
7138 break;
7139 case 1:
7140 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7141 goto normal;
7142 fputs (qi_reg_name[REGNO (x)], file);
7143 break;
7144 case 0:
7145 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7146 goto normal;
7147 fputs (qi_high_reg_name[REGNO (x)], file);
7148 break;
7149 default:
7150 abort ();
7151 }
7152 }
7153
7154 /* Locate some local-dynamic symbol still in use by this function
7155 so that we can print its name in some tls_local_dynamic_base
7156 pattern. */
7157
7158 static const char *
get_some_local_dynamic_name(void)7159 get_some_local_dynamic_name (void)
7160 {
7161 rtx insn;
7162
7163 if (cfun->machine->some_ld_name)
7164 return cfun->machine->some_ld_name;
7165
7166 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7167 if (INSN_P (insn)
7168 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7169 return cfun->machine->some_ld_name;
7170
7171 abort ();
7172 }
7173
7174 static int
get_some_local_dynamic_name_1(rtx * px,void * data ATTRIBUTE_UNUSED)7175 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7176 {
7177 rtx x = *px;
7178
7179 if (GET_CODE (x) == SYMBOL_REF
7180 && local_dynamic_symbolic_operand (x, Pmode))
7181 {
7182 cfun->machine->some_ld_name = XSTR (x, 0);
7183 return 1;
7184 }
7185
7186 return 0;
7187 }
7188
7189 /* Meaning of CODE:
7190 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7191 C -- print opcode suffix for set/cmov insn.
7192 c -- like C, but print reversed condition
7193 F,f -- likewise, but for floating-point.
7194 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7195 otherwise nothing
7196 R -- print the prefix for register names.
7197 z -- print the opcode suffix for the size of the current operand.
7198 * -- print a star (in certain assembler syntax)
7199 A -- print an absolute memory reference.
7200 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7201 s -- print a shift double count, followed by the assemblers argument
7202 delimiter.
7203 b -- print the QImode name of the register for the indicated operand.
7204 %b0 would print %al if operands[0] is reg 0.
7205 w -- likewise, print the HImode name of the register.
7206 k -- likewise, print the SImode name of the register.
7207 q -- likewise, print the DImode name of the register.
7208 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7209 y -- print "st(0)" instead of "st" as a register.
7210 D -- print condition for SSE cmp instruction.
7211 P -- if PIC, print an @PLT suffix.
7212 X -- don't print any sort of PIC '@' suffix for a symbol.
7213 & -- print some in-use local-dynamic symbol name.
7214 */
7215
7216 void
print_operand(FILE * file,rtx x,int code)7217 print_operand (FILE *file, rtx x, int code)
7218 {
7219 if (code)
7220 {
7221 switch (code)
7222 {
7223 case '*':
7224 if (ASSEMBLER_DIALECT == ASM_ATT)
7225 putc ('*', file);
7226 return;
7227
7228 case '&':
7229 assemble_name (file, get_some_local_dynamic_name ());
7230 return;
7231
7232 case 'A':
7233 if (ASSEMBLER_DIALECT == ASM_ATT)
7234 putc ('*', file);
7235 else if (ASSEMBLER_DIALECT == ASM_INTEL)
7236 {
7237 /* Intel syntax. For absolute addresses, registers should not
7238 be surrounded by braces. */
7239 if (GET_CODE (x) != REG)
7240 {
7241 putc ('[', file);
7242 PRINT_OPERAND (file, x, 0);
7243 putc (']', file);
7244 return;
7245 }
7246 }
7247 else
7248 abort ();
7249
7250 PRINT_OPERAND (file, x, 0);
7251 return;
7252
7253
7254 case 'L':
7255 if (ASSEMBLER_DIALECT == ASM_ATT)
7256 putc ('l', file);
7257 return;
7258
7259 case 'W':
7260 if (ASSEMBLER_DIALECT == ASM_ATT)
7261 putc ('w', file);
7262 return;
7263
7264 case 'B':
7265 if (ASSEMBLER_DIALECT == ASM_ATT)
7266 putc ('b', file);
7267 return;
7268
7269 case 'Q':
7270 if (ASSEMBLER_DIALECT == ASM_ATT)
7271 putc ('l', file);
7272 return;
7273
7274 case 'S':
7275 if (ASSEMBLER_DIALECT == ASM_ATT)
7276 putc ('s', file);
7277 return;
7278
7279 case 'T':
7280 if (ASSEMBLER_DIALECT == ASM_ATT)
7281 putc ('t', file);
7282 return;
7283
7284 case 'z':
7285 /* 387 opcodes don't get size suffixes if the operands are
7286 registers. */
7287 if (STACK_REG_P (x))
7288 return;
7289
7290 /* Likewise if using Intel opcodes. */
7291 if (ASSEMBLER_DIALECT == ASM_INTEL)
7292 return;
7293
7294 /* This is the size of op from size of operand. */
7295 switch (GET_MODE_SIZE (GET_MODE (x)))
7296 {
7297 case 2:
7298 #ifdef HAVE_GAS_FILDS_FISTS
7299 putc ('s', file);
7300 #endif
7301 return;
7302
7303 case 4:
7304 if (GET_MODE (x) == SFmode)
7305 {
7306 putc ('s', file);
7307 return;
7308 }
7309 else
7310 putc ('l', file);
7311 return;
7312
7313 case 12:
7314 case 16:
7315 putc ('t', file);
7316 return;
7317
7318 case 8:
7319 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7320 {
7321 #ifdef GAS_MNEMONICS
7322 putc ('q', file);
7323 #else
7324 putc ('l', file);
7325 putc ('l', file);
7326 #endif
7327 }
7328 else
7329 putc ('l', file);
7330 return;
7331
7332 default:
7333 abort ();
7334 }
7335
7336 case 'b':
7337 case 'w':
7338 case 'k':
7339 case 'q':
7340 case 'h':
7341 case 'y':
7342 case 'X':
7343 case 'P':
7344 break;
7345
7346 case 's':
7347 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7348 {
7349 PRINT_OPERAND (file, x, 0);
7350 putc (',', file);
7351 }
7352 return;
7353
7354 case 'D':
7355 /* Little bit of braindamage here. The SSE compare instructions
7356 does use completely different names for the comparisons that the
7357 fp conditional moves. */
7358 switch (GET_CODE (x))
7359 {
7360 case EQ:
7361 case UNEQ:
7362 fputs ("eq", file);
7363 break;
7364 case LT:
7365 case UNLT:
7366 fputs ("lt", file);
7367 break;
7368 case LE:
7369 case UNLE:
7370 fputs ("le", file);
7371 break;
7372 case UNORDERED:
7373 fputs ("unord", file);
7374 break;
7375 case NE:
7376 case LTGT:
7377 fputs ("neq", file);
7378 break;
7379 case UNGE:
7380 case GE:
7381 fputs ("nlt", file);
7382 break;
7383 case UNGT:
7384 case GT:
7385 fputs ("nle", file);
7386 break;
7387 case ORDERED:
7388 fputs ("ord", file);
7389 break;
7390 default:
7391 abort ();
7392 break;
7393 }
7394 return;
7395 case 'O':
7396 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7397 if (ASSEMBLER_DIALECT == ASM_ATT)
7398 {
7399 switch (GET_MODE (x))
7400 {
7401 case HImode: putc ('w', file); break;
7402 case SImode:
7403 case SFmode: putc ('l', file); break;
7404 case DImode:
7405 case DFmode: putc ('q', file); break;
7406 default: abort ();
7407 }
7408 putc ('.', file);
7409 }
7410 #endif
7411 return;
7412 case 'C':
7413 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7414 return;
7415 case 'F':
7416 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7417 if (ASSEMBLER_DIALECT == ASM_ATT)
7418 putc ('.', file);
7419 #endif
7420 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7421 return;
7422
7423 /* Like above, but reverse condition */
7424 case 'c':
7425 /* Check to see if argument to %c is really a constant
7426 and not a condition code which needs to be reversed. */
7427 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7428 {
7429 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7430 return;
7431 }
7432 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7433 return;
7434 case 'f':
7435 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7436 if (ASSEMBLER_DIALECT == ASM_ATT)
7437 putc ('.', file);
7438 #endif
7439 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7440 return;
7441 case '+':
7442 {
7443 rtx x;
7444
7445 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7446 return;
7447
7448 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7449 if (x)
7450 {
7451 int pred_val = INTVAL (XEXP (x, 0));
7452
7453 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7454 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7455 {
7456 int taken = pred_val > REG_BR_PROB_BASE / 2;
7457 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7458
7459 /* Emit hints only in the case default branch prediction
7460 heuristics would fail. */
7461 if (taken != cputaken)
7462 {
7463 /* We use 3e (DS) prefix for taken branches and
7464 2e (CS) prefix for not taken branches. */
7465 if (taken)
7466 fputs ("ds ; ", file);
7467 else
7468 fputs ("cs ; ", file);
7469 }
7470 }
7471 }
7472 return;
7473 }
7474 default:
7475 output_operand_lossage ("invalid operand code `%c'", code);
7476 }
7477 }
7478
7479 if (GET_CODE (x) == REG)
7480 print_reg (x, code, file);
7481
7482 else if (GET_CODE (x) == MEM)
7483 {
7484 /* No `byte ptr' prefix for call instructions. */
7485 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7486 {
7487 const char * size;
7488 switch (GET_MODE_SIZE (GET_MODE (x)))
7489 {
7490 case 1: size = "BYTE"; break;
7491 case 2: size = "WORD"; break;
7492 case 4: size = "DWORD"; break;
7493 case 8: size = "QWORD"; break;
7494 case 12: size = "XWORD"; break;
7495 case 16: size = "XMMWORD"; break;
7496 default:
7497 abort ();
7498 }
7499
7500 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7501 if (code == 'b')
7502 size = "BYTE";
7503 else if (code == 'w')
7504 size = "WORD";
7505 else if (code == 'k')
7506 size = "DWORD";
7507
7508 fputs (size, file);
7509 fputs (" PTR ", file);
7510 }
7511
7512 x = XEXP (x, 0);
7513 /* Avoid (%rip) for call operands. */
7514 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7515 && GET_CODE (x) != CONST_INT)
7516 output_addr_const (file, x);
7517 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7518 output_operand_lossage ("invalid constraints for operand");
7519 else
7520 output_address (x);
7521 }
7522
7523 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7524 {
7525 REAL_VALUE_TYPE r;
7526 long l;
7527
7528 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7529 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7530
7531 if (ASSEMBLER_DIALECT == ASM_ATT)
7532 putc ('$', file);
7533 fprintf (file, "0x%08lx", l);
7534 }
7535
7536 /* These float cases don't actually occur as immediate operands. */
7537 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7538 {
7539 char dstr[30];
7540
7541 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7542 fprintf (file, "%s", dstr);
7543 }
7544
7545 else if (GET_CODE (x) == CONST_DOUBLE
7546 && GET_MODE (x) == XFmode)
7547 {
7548 char dstr[30];
7549
7550 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7551 fprintf (file, "%s", dstr);
7552 }
7553
7554 else
7555 {
7556 if (code != 'P')
7557 {
7558 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7559 {
7560 if (ASSEMBLER_DIALECT == ASM_ATT)
7561 putc ('$', file);
7562 }
7563 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7564 || GET_CODE (x) == LABEL_REF)
7565 {
7566 if (ASSEMBLER_DIALECT == ASM_ATT)
7567 putc ('$', file);
7568 else
7569 fputs ("OFFSET FLAT:", file);
7570 }
7571 }
7572 if (GET_CODE (x) == CONST_INT)
7573 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7574 else if (flag_pic)
7575 output_pic_addr_const (file, x, code);
7576 else
7577 output_addr_const (file, x);
7578 }
7579 }
7580
7581 /* Print a memory operand whose address is ADDR. */
7582
7583 void
print_operand_address(FILE * file,rtx addr)7584 print_operand_address (FILE *file, rtx addr)
7585 {
7586 struct ix86_address parts;
7587 rtx base, index, disp;
7588 int scale;
7589
7590 if (! ix86_decompose_address (addr, &parts))
7591 abort ();
7592
7593 base = parts.base;
7594 index = parts.index;
7595 disp = parts.disp;
7596 scale = parts.scale;
7597
7598 switch (parts.seg)
7599 {
7600 case SEG_DEFAULT:
7601 break;
7602 case SEG_FS:
7603 case SEG_GS:
7604 if (USER_LABEL_PREFIX[0] == 0)
7605 putc ('%', file);
7606 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7607 break;
7608 default:
7609 abort ();
7610 }
7611
7612 if (!base && !index)
7613 {
7614 /* Displacement only requires special attention. */
7615
7616 if (GET_CODE (disp) == CONST_INT)
7617 {
7618 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
7619 {
7620 if (USER_LABEL_PREFIX[0] == 0)
7621 putc ('%', file);
7622 fputs ("ds:", file);
7623 }
7624 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
7625 }
7626 else if (flag_pic)
7627 output_pic_addr_const (file, disp, 0);
7628 else
7629 output_addr_const (file, disp);
7630
7631 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7632 if (TARGET_64BIT
7633 && ((GET_CODE (disp) == SYMBOL_REF
7634 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
7635 || GET_CODE (disp) == LABEL_REF
7636 || (GET_CODE (disp) == CONST
7637 && GET_CODE (XEXP (disp, 0)) == PLUS
7638 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
7639 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
7640 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
7641 fputs ("(%rip)", file);
7642 }
7643 else
7644 {
7645 if (ASSEMBLER_DIALECT == ASM_ATT)
7646 {
7647 if (disp)
7648 {
7649 if (flag_pic)
7650 output_pic_addr_const (file, disp, 0);
7651 else if (GET_CODE (disp) == LABEL_REF)
7652 output_asm_label (disp);
7653 else
7654 output_addr_const (file, disp);
7655 }
7656
7657 putc ('(', file);
7658 if (base)
7659 print_reg (base, 0, file);
7660 if (index)
7661 {
7662 putc (',', file);
7663 print_reg (index, 0, file);
7664 if (scale != 1)
7665 fprintf (file, ",%d", scale);
7666 }
7667 putc (')', file);
7668 }
7669 else
7670 {
7671 rtx offset = NULL_RTX;
7672
7673 if (disp)
7674 {
7675 /* Pull out the offset of a symbol; print any symbol itself. */
7676 if (GET_CODE (disp) == CONST
7677 && GET_CODE (XEXP (disp, 0)) == PLUS
7678 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7679 {
7680 offset = XEXP (XEXP (disp, 0), 1);
7681 disp = gen_rtx_CONST (VOIDmode,
7682 XEXP (XEXP (disp, 0), 0));
7683 }
7684
7685 if (flag_pic)
7686 output_pic_addr_const (file, disp, 0);
7687 else if (GET_CODE (disp) == LABEL_REF)
7688 output_asm_label (disp);
7689 else if (GET_CODE (disp) == CONST_INT)
7690 offset = disp;
7691 else
7692 output_addr_const (file, disp);
7693 }
7694
7695 putc ('[', file);
7696 if (base)
7697 {
7698 print_reg (base, 0, file);
7699 if (offset)
7700 {
7701 if (INTVAL (offset) >= 0)
7702 putc ('+', file);
7703 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7704 }
7705 }
7706 else if (offset)
7707 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7708 else
7709 putc ('0', file);
7710
7711 if (index)
7712 {
7713 putc ('+', file);
7714 print_reg (index, 0, file);
7715 if (scale != 1)
7716 fprintf (file, "*%d", scale);
7717 }
7718 putc (']', file);
7719 }
7720 }
7721 }
7722
7723 bool
output_addr_const_extra(FILE * file,rtx x)7724 output_addr_const_extra (FILE *file, rtx x)
7725 {
7726 rtx op;
7727
7728 if (GET_CODE (x) != UNSPEC)
7729 return false;
7730
7731 op = XVECEXP (x, 0, 0);
7732 switch (XINT (x, 1))
7733 {
7734 case UNSPEC_GOTTPOFF:
7735 output_addr_const (file, op);
7736 /* FIXME: This might be @TPOFF in Sun ld. */
7737 fputs ("@GOTTPOFF", file);
7738 break;
7739 case UNSPEC_TPOFF:
7740 output_addr_const (file, op);
7741 fputs ("@TPOFF", file);
7742 break;
7743 case UNSPEC_NTPOFF:
7744 output_addr_const (file, op);
7745 if (TARGET_64BIT)
7746 fputs ("@TPOFF", file);
7747 else
7748 fputs ("@NTPOFF", file);
7749 break;
7750 case UNSPEC_DTPOFF:
7751 output_addr_const (file, op);
7752 fputs ("@DTPOFF", file);
7753 break;
7754 case UNSPEC_GOTNTPOFF:
7755 output_addr_const (file, op);
7756 if (TARGET_64BIT)
7757 fputs ("@GOTTPOFF(%rip)", file);
7758 else
7759 fputs ("@GOTNTPOFF", file);
7760 break;
7761 case UNSPEC_INDNTPOFF:
7762 output_addr_const (file, op);
7763 fputs ("@INDNTPOFF", file);
7764 break;
7765
7766 default:
7767 return false;
7768 }
7769
7770 return true;
7771 }
7772
7773 /* Split one or more DImode RTL references into pairs of SImode
7774 references. The RTL can be REG, offsettable MEM, integer constant, or
7775 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7776 split and "num" is its length. lo_half and hi_half are output arrays
7777 that parallel "operands". */
7778
7779 void
split_di(rtx operands[],int num,rtx lo_half[],rtx hi_half[])7780 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7781 {
7782 while (num--)
7783 {
7784 rtx op = operands[num];
7785
7786 /* simplify_subreg refuse to split volatile memory addresses,
7787 but we still have to handle it. */
7788 if (GET_CODE (op) == MEM)
7789 {
7790 lo_half[num] = adjust_address (op, SImode, 0);
7791 hi_half[num] = adjust_address (op, SImode, 4);
7792 }
7793 else
7794 {
7795 lo_half[num] = simplify_gen_subreg (SImode, op,
7796 GET_MODE (op) == VOIDmode
7797 ? DImode : GET_MODE (op), 0);
7798 hi_half[num] = simplify_gen_subreg (SImode, op,
7799 GET_MODE (op) == VOIDmode
7800 ? DImode : GET_MODE (op), 4);
7801 }
7802 }
7803 }
7804 /* Split one or more TImode RTL references into pairs of SImode
7805 references. The RTL can be REG, offsettable MEM, integer constant, or
7806 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7807 split and "num" is its length. lo_half and hi_half are output arrays
7808 that parallel "operands". */
7809
7810 void
split_ti(rtx operands[],int num,rtx lo_half[],rtx hi_half[])7811 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7812 {
7813 while (num--)
7814 {
7815 rtx op = operands[num];
7816
7817 /* simplify_subreg refuse to split volatile memory addresses, but we
7818 still have to handle it. */
7819 if (GET_CODE (op) == MEM)
7820 {
7821 lo_half[num] = adjust_address (op, DImode, 0);
7822 hi_half[num] = adjust_address (op, DImode, 8);
7823 }
7824 else
7825 {
7826 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7827 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7828 }
7829 }
7830 }
7831
7832 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7833 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7834 is the expression of the binary operation. The output may either be
7835 emitted here, or returned to the caller, like all output_* functions.
7836
7837 There is no guarantee that the operands are the same mode, as they
7838 might be within FLOAT or FLOAT_EXTEND expressions. */
7839
7840 #ifndef SYSV386_COMPAT
7841 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7842 wants to fix the assemblers because that causes incompatibility
7843 with gcc. No-one wants to fix gcc because that causes
7844 incompatibility with assemblers... You can use the option of
7845 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7846 #define SYSV386_COMPAT 1
7847 #endif
7848
7849 const char *
output_387_binary_op(rtx insn,rtx * operands)7850 output_387_binary_op (rtx insn, rtx *operands)
7851 {
7852 static char buf[30];
7853 const char *p;
7854 const char *ssep;
7855 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7856
7857 #ifdef ENABLE_CHECKING
7858 /* Even if we do not want to check the inputs, this documents input
7859 constraints. Which helps in understanding the following code. */
7860 if (STACK_REG_P (operands[0])
7861 && ((REG_P (operands[1])
7862 && REGNO (operands[0]) == REGNO (operands[1])
7863 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7864 || (REG_P (operands[2])
7865 && REGNO (operands[0]) == REGNO (operands[2])
7866 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7867 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7868 ; /* ok */
7869 else if (!is_sse)
7870 abort ();
7871 #endif
7872
7873 switch (GET_CODE (operands[3]))
7874 {
7875 case PLUS:
7876 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7877 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7878 p = "fiadd";
7879 else
7880 p = "fadd";
7881 ssep = "add";
7882 break;
7883
7884 case MINUS:
7885 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7886 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7887 p = "fisub";
7888 else
7889 p = "fsub";
7890 ssep = "sub";
7891 break;
7892
7893 case MULT:
7894 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7895 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7896 p = "fimul";
7897 else
7898 p = "fmul";
7899 ssep = "mul";
7900 break;
7901
7902 case DIV:
7903 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7904 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7905 p = "fidiv";
7906 else
7907 p = "fdiv";
7908 ssep = "div";
7909 break;
7910
7911 default:
7912 abort ();
7913 }
7914
7915 if (is_sse)
7916 {
7917 strcpy (buf, ssep);
7918 if (GET_MODE (operands[0]) == SFmode)
7919 strcat (buf, "ss\t{%2, %0|%0, %2}");
7920 else
7921 strcat (buf, "sd\t{%2, %0|%0, %2}");
7922 return buf;
7923 }
7924 strcpy (buf, p);
7925
7926 switch (GET_CODE (operands[3]))
7927 {
7928 case MULT:
7929 case PLUS:
7930 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7931 {
7932 rtx temp = operands[2];
7933 operands[2] = operands[1];
7934 operands[1] = temp;
7935 }
7936
7937 /* know operands[0] == operands[1]. */
7938
7939 if (GET_CODE (operands[2]) == MEM)
7940 {
7941 p = "%z2\t%2";
7942 break;
7943 }
7944
7945 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7946 {
7947 if (STACK_TOP_P (operands[0]))
7948 /* How is it that we are storing to a dead operand[2]?
7949 Well, presumably operands[1] is dead too. We can't
7950 store the result to st(0) as st(0) gets popped on this
7951 instruction. Instead store to operands[2] (which I
7952 think has to be st(1)). st(1) will be popped later.
7953 gcc <= 2.8.1 didn't have this check and generated
7954 assembly code that the Unixware assembler rejected. */
7955 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7956 else
7957 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7958 break;
7959 }
7960
7961 if (STACK_TOP_P (operands[0]))
7962 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7963 else
7964 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7965 break;
7966
7967 case MINUS:
7968 case DIV:
7969 if (GET_CODE (operands[1]) == MEM)
7970 {
7971 p = "r%z1\t%1";
7972 break;
7973 }
7974
7975 if (GET_CODE (operands[2]) == MEM)
7976 {
7977 p = "%z2\t%2";
7978 break;
7979 }
7980
7981 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7982 {
7983 #if SYSV386_COMPAT
7984 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7985 derived assemblers, confusingly reverse the direction of
7986 the operation for fsub{r} and fdiv{r} when the
7987 destination register is not st(0). The Intel assembler
7988 doesn't have this brain damage. Read !SYSV386_COMPAT to
7989 figure out what the hardware really does. */
7990 if (STACK_TOP_P (operands[0]))
7991 p = "{p\t%0, %2|rp\t%2, %0}";
7992 else
7993 p = "{rp\t%2, %0|p\t%0, %2}";
7994 #else
7995 if (STACK_TOP_P (operands[0]))
7996 /* As above for fmul/fadd, we can't store to st(0). */
7997 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7998 else
7999 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8000 #endif
8001 break;
8002 }
8003
8004 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8005 {
8006 #if SYSV386_COMPAT
8007 if (STACK_TOP_P (operands[0]))
8008 p = "{rp\t%0, %1|p\t%1, %0}";
8009 else
8010 p = "{p\t%1, %0|rp\t%0, %1}";
8011 #else
8012 if (STACK_TOP_P (operands[0]))
8013 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8014 else
8015 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8016 #endif
8017 break;
8018 }
8019
8020 if (STACK_TOP_P (operands[0]))
8021 {
8022 if (STACK_TOP_P (operands[1]))
8023 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8024 else
8025 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8026 break;
8027 }
8028 else if (STACK_TOP_P (operands[1]))
8029 {
8030 #if SYSV386_COMPAT
8031 p = "{\t%1, %0|r\t%0, %1}";
8032 #else
8033 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8034 #endif
8035 }
8036 else
8037 {
8038 #if SYSV386_COMPAT
8039 p = "{r\t%2, %0|\t%0, %2}";
8040 #else
8041 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8042 #endif
8043 }
8044 break;
8045
8046 default:
8047 abort ();
8048 }
8049
8050 strcat (buf, p);
8051 return buf;
8052 }
8053
8054 /* Output code to initialize control word copies used by
8055 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
8056 is set to control word rounding downwards. */
8057 void
emit_i387_cw_initialization(rtx normal,rtx round_down)8058 emit_i387_cw_initialization (rtx normal, rtx round_down)
8059 {
8060 rtx reg = gen_reg_rtx (HImode);
8061
8062 emit_insn (gen_x86_fnstcw_1 (normal));
8063 emit_move_insn (reg, normal);
8064 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
8065 && !TARGET_64BIT)
8066 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8067 else
8068 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
8069 emit_move_insn (round_down, reg);
8070 }
8071
8072 /* Output code for INSN to convert a float to a signed int. OPERANDS
8073 are the insn operands. The output may be [HSD]Imode and the input
8074 operand may be [SDX]Fmode. */
8075
8076 const char *
output_fix_trunc(rtx insn,rtx * operands)8077 output_fix_trunc (rtx insn, rtx *operands)
8078 {
8079 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8080 int dimode_p = GET_MODE (operands[0]) == DImode;
8081
8082 /* Jump through a hoop or two for DImode, since the hardware has no
8083 non-popping instruction. We used to do this a different way, but
8084 that was somewhat fragile and broke with post-reload splitters. */
8085 if (dimode_p && !stack_top_dies)
8086 output_asm_insn ("fld\t%y1", operands);
8087
8088 if (!STACK_TOP_P (operands[1]))
8089 abort ();
8090
8091 if (GET_CODE (operands[0]) != MEM)
8092 abort ();
8093
8094 output_asm_insn ("fldcw\t%3", operands);
8095 if (stack_top_dies || dimode_p)
8096 output_asm_insn ("fistp%z0\t%0", operands);
8097 else
8098 output_asm_insn ("fist%z0\t%0", operands);
8099 output_asm_insn ("fldcw\t%2", operands);
8100
8101 return "";
8102 }
8103
8104 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8105 should be used and 2 when fnstsw should be used. UNORDERED_P is true
8106 when fucom should be used. */
8107
8108 const char *
output_fp_compare(rtx insn,rtx * operands,int eflags_p,int unordered_p)8109 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8110 {
8111 int stack_top_dies;
8112 rtx cmp_op0 = operands[0];
8113 rtx cmp_op1 = operands[1];
8114 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
8115
8116 if (eflags_p == 2)
8117 {
8118 cmp_op0 = cmp_op1;
8119 cmp_op1 = operands[2];
8120 }
8121 if (is_sse)
8122 {
8123 if (GET_MODE (operands[0]) == SFmode)
8124 if (unordered_p)
8125 return "ucomiss\t{%1, %0|%0, %1}";
8126 else
8127 return "comiss\t{%1, %0|%0, %1}";
8128 else
8129 if (unordered_p)
8130 return "ucomisd\t{%1, %0|%0, %1}";
8131 else
8132 return "comisd\t{%1, %0|%0, %1}";
8133 }
8134
8135 if (! STACK_TOP_P (cmp_op0))
8136 abort ();
8137
8138 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8139
8140 if (STACK_REG_P (cmp_op1)
8141 && stack_top_dies
8142 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8143 && REGNO (cmp_op1) != FIRST_STACK_REG)
8144 {
8145 /* If both the top of the 387 stack dies, and the other operand
8146 is also a stack register that dies, then this must be a
8147 `fcompp' float compare */
8148
8149 if (eflags_p == 1)
8150 {
8151 /* There is no double popping fcomi variant. Fortunately,
8152 eflags is immune from the fstp's cc clobbering. */
8153 if (unordered_p)
8154 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8155 else
8156 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8157 return "fstp\t%y0";
8158 }
8159 else
8160 {
8161 if (eflags_p == 2)
8162 {
8163 if (unordered_p)
8164 return "fucompp\n\tfnstsw\t%0";
8165 else
8166 return "fcompp\n\tfnstsw\t%0";
8167 }
8168 else
8169 {
8170 if (unordered_p)
8171 return "fucompp";
8172 else
8173 return "fcompp";
8174 }
8175 }
8176 }
8177 else
8178 {
8179 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8180
8181 static const char * const alt[24] =
8182 {
8183 "fcom%z1\t%y1",
8184 "fcomp%z1\t%y1",
8185 "fucom%z1\t%y1",
8186 "fucomp%z1\t%y1",
8187
8188 "ficom%z1\t%y1",
8189 "ficomp%z1\t%y1",
8190 NULL,
8191 NULL,
8192
8193 "fcomi\t{%y1, %0|%0, %y1}",
8194 "fcomip\t{%y1, %0|%0, %y1}",
8195 "fucomi\t{%y1, %0|%0, %y1}",
8196 "fucomip\t{%y1, %0|%0, %y1}",
8197
8198 NULL,
8199 NULL,
8200 NULL,
8201 NULL,
8202
8203 "fcom%z2\t%y2\n\tfnstsw\t%0",
8204 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8205 "fucom%z2\t%y2\n\tfnstsw\t%0",
8206 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8207
8208 "ficom%z2\t%y2\n\tfnstsw\t%0",
8209 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8210 NULL,
8211 NULL
8212 };
8213
8214 int mask;
8215 const char *ret;
8216
8217 mask = eflags_p << 3;
8218 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
8219 mask |= unordered_p << 1;
8220 mask |= stack_top_dies;
8221
8222 if (mask >= 24)
8223 abort ();
8224 ret = alt[mask];
8225 if (ret == NULL)
8226 abort ();
8227
8228 return ret;
8229 }
8230 }
8231
8232 void
ix86_output_addr_vec_elt(FILE * file,int value)8233 ix86_output_addr_vec_elt (FILE *file, int value)
8234 {
8235 const char *directive = ASM_LONG;
8236
8237 if (TARGET_64BIT)
8238 {
8239 #ifdef ASM_QUAD
8240 directive = ASM_QUAD;
8241 #else
8242 abort ();
8243 #endif
8244 }
8245
8246 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8247 }
8248
8249 void
ix86_output_addr_diff_elt(FILE * file,int value,int rel)8250 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8251 {
8252 if (TARGET_64BIT)
8253 fprintf (file, "%s%s%d-%s%d\n",
8254 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8255 else if (HAVE_AS_GOTOFF_IN_DATA)
8256 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8257 #if TARGET_MACHO
8258 else if (TARGET_MACHO)
8259 {
8260 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8261 machopic_output_function_base_name (file);
8262 fprintf(file, "\n");
8263 }
8264 #endif
8265 else
8266 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8267 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8268 }
8269
8270 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8271 for the target. */
8272
8273 void
ix86_expand_clear(rtx dest)8274 ix86_expand_clear (rtx dest)
8275 {
8276 rtx tmp;
8277
8278 /* We play register width games, which are only valid after reload. */
8279 if (!reload_completed)
8280 abort ();
8281
8282 /* Avoid HImode and its attendant prefix byte. */
8283 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8284 dest = gen_rtx_REG (SImode, REGNO (dest));
8285
8286 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8287
8288 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8289 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8290 {
8291 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8292 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8293 }
8294
8295 emit_insn (tmp);
8296 }
8297
8298 /* X is an unchanging MEM. If it is a constant pool reference, return
8299 the constant pool rtx, else NULL. */
8300
8301 static rtx
maybe_get_pool_constant(rtx x)8302 maybe_get_pool_constant (rtx x)
8303 {
8304 x = ix86_delegitimize_address (XEXP (x, 0));
8305
8306 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8307 return get_pool_constant (x);
8308
8309 return NULL_RTX;
8310 }
8311
8312 void
ix86_expand_move(enum machine_mode mode,rtx operands[])8313 ix86_expand_move (enum machine_mode mode, rtx operands[])
8314 {
8315 int strict = (reload_in_progress || reload_completed);
8316 rtx op0, op1;
8317 enum tls_model model;
8318
8319 op0 = operands[0];
8320 op1 = operands[1];
8321
8322 model = tls_symbolic_operand (op1, Pmode);
8323 if (model)
8324 {
8325 op1 = legitimize_tls_address (op1, model, true);
8326 op1 = force_operand (op1, op0);
8327 if (op1 == op0)
8328 return;
8329 }
8330
8331 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8332 {
8333 #if TARGET_MACHO
8334 if (MACHOPIC_PURE)
8335 {
8336 rtx temp = ((reload_in_progress
8337 || ((op0 && GET_CODE (op0) == REG)
8338 && mode == Pmode))
8339 ? op0 : gen_reg_rtx (Pmode));
8340 op1 = machopic_indirect_data_reference (op1, temp);
8341 op1 = machopic_legitimize_pic_address (op1, mode,
8342 temp == op1 ? 0 : temp);
8343 }
8344 else if (MACHOPIC_INDIRECT)
8345 op1 = machopic_indirect_data_reference (op1, 0);
8346 if (op0 == op1)
8347 return;
8348 #else
8349 if (GET_CODE (op0) == MEM)
8350 op1 = force_reg (Pmode, op1);
8351 else
8352 {
8353 rtx temp = op0;
8354 if (GET_CODE (temp) != REG)
8355 temp = gen_reg_rtx (Pmode);
8356 temp = legitimize_pic_address (op1, temp);
8357 if (temp == op0)
8358 return;
8359 op1 = temp;
8360 }
8361 #endif /* TARGET_MACHO */
8362 }
8363 else
8364 {
8365 if (GET_CODE (op0) == MEM
8366 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8367 || !push_operand (op0, mode))
8368 && GET_CODE (op1) == MEM)
8369 op1 = force_reg (mode, op1);
8370
8371 if (push_operand (op0, mode)
8372 && ! general_no_elim_operand (op1, mode))
8373 op1 = copy_to_mode_reg (mode, op1);
8374
8375 /* Force large constants in 64bit compilation into register
8376 to get them CSEed. */
8377 if (TARGET_64BIT && mode == DImode
8378 && immediate_operand (op1, mode)
8379 && !x86_64_zero_extended_value (op1)
8380 && !register_operand (op0, mode)
8381 && optimize && !reload_completed && !reload_in_progress)
8382 op1 = copy_to_mode_reg (mode, op1);
8383
8384 if (FLOAT_MODE_P (mode))
8385 {
8386 /* If we are loading a floating point constant to a register,
8387 force the value to memory now, since we'll get better code
8388 out the back end. */
8389
8390 if (strict)
8391 ;
8392 else if (GET_CODE (op1) == CONST_DOUBLE)
8393 {
8394 op1 = validize_mem (force_const_mem (mode, op1));
8395 if (!register_operand (op0, mode))
8396 {
8397 rtx temp = gen_reg_rtx (mode);
8398 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8399 emit_move_insn (op0, temp);
8400 return;
8401 }
8402 }
8403 }
8404 }
8405
8406 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8407 }
8408
8409 void
ix86_expand_vector_move(enum machine_mode mode,rtx operands[])8410 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
8411 {
8412 /* Force constants other than zero into memory. We do not know how
8413 the instructions used to build constants modify the upper 64 bits
8414 of the register, once we have that information we may be able
8415 to handle some of them more efficiently. */
8416 if ((reload_in_progress | reload_completed) == 0
8417 && register_operand (operands[0], mode)
8418 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
8419 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
8420
8421 /* Make operand1 a register if it isn't already. */
8422 if (!no_new_pseudos
8423 && !register_operand (operands[0], mode)
8424 && !register_operand (operands[1], mode))
8425 {
8426 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8427 emit_move_insn (operands[0], temp);
8428 return;
8429 }
8430
8431 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8432 }
8433
8434 /* Attempt to expand a binary operator. Make the expansion closer to the
8435 actual machine, then just general_operand, which will allow 3 separate
8436 memory references (one output, two input) in a single insn. */
8437
8438 void
ix86_expand_binary_operator(enum rtx_code code,enum machine_mode mode,rtx operands[])8439 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
8440 rtx operands[])
8441 {
8442 int matching_memory;
8443 rtx src1, src2, dst, op, clob;
8444
8445 dst = operands[0];
8446 src1 = operands[1];
8447 src2 = operands[2];
8448
8449 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8450 if (GET_RTX_CLASS (code) == 'c'
8451 && (rtx_equal_p (dst, src2)
8452 || immediate_operand (src1, mode)))
8453 {
8454 rtx temp = src1;
8455 src1 = src2;
8456 src2 = temp;
8457 }
8458
8459 /* If the destination is memory, and we do not have matching source
8460 operands, do things in registers. */
8461 matching_memory = 0;
8462 if (GET_CODE (dst) == MEM)
8463 {
8464 if (rtx_equal_p (dst, src1))
8465 matching_memory = 1;
8466 else if (GET_RTX_CLASS (code) == 'c'
8467 && rtx_equal_p (dst, src2))
8468 matching_memory = 2;
8469 else
8470 dst = gen_reg_rtx (mode);
8471 }
8472
8473 /* Both source operands cannot be in memory. */
8474 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8475 {
8476 if (matching_memory != 2)
8477 src2 = force_reg (mode, src2);
8478 else
8479 src1 = force_reg (mode, src1);
8480 }
8481
8482 /* If the operation is not commutable, source 1 cannot be a constant
8483 or non-matching memory. */
8484 if ((CONSTANT_P (src1)
8485 || (!matching_memory && GET_CODE (src1) == MEM))
8486 && GET_RTX_CLASS (code) != 'c')
8487 src1 = force_reg (mode, src1);
8488
8489 /* If optimizing, copy to regs to improve CSE */
8490 if (optimize && ! no_new_pseudos)
8491 {
8492 if (GET_CODE (dst) == MEM)
8493 dst = gen_reg_rtx (mode);
8494 if (GET_CODE (src1) == MEM)
8495 src1 = force_reg (mode, src1);
8496 if (GET_CODE (src2) == MEM)
8497 src2 = force_reg (mode, src2);
8498 }
8499
8500 /* Emit the instruction. */
8501
8502 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8503 if (reload_in_progress)
8504 {
8505 /* Reload doesn't know about the flags register, and doesn't know that
8506 it doesn't want to clobber it. We can only do this with PLUS. */
8507 if (code != PLUS)
8508 abort ();
8509 emit_insn (op);
8510 }
8511 else
8512 {
8513 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8514 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8515 }
8516
8517 /* Fix up the destination if needed. */
8518 if (dst != operands[0])
8519 emit_move_insn (operands[0], dst);
8520 }
8521
8522 /* Return TRUE or FALSE depending on whether the binary operator meets the
8523 appropriate constraints. */
8524
8525 int
ix86_binary_operator_ok(enum rtx_code code,enum machine_mode mode ATTRIBUTE_UNUSED,rtx operands[3])8526 ix86_binary_operator_ok (enum rtx_code code,
8527 enum machine_mode mode ATTRIBUTE_UNUSED,
8528 rtx operands[3])
8529 {
8530 /* Both source operands cannot be in memory. */
8531 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8532 return 0;
8533 /* If the operation is not commutable, source 1 cannot be a constant. */
8534 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8535 return 0;
8536 /* If the destination is memory, we must have a matching source operand. */
8537 if (GET_CODE (operands[0]) == MEM
8538 && ! (rtx_equal_p (operands[0], operands[1])
8539 || (GET_RTX_CLASS (code) == 'c'
8540 && rtx_equal_p (operands[0], operands[2]))))
8541 return 0;
8542 /* If the operation is not commutable and the source 1 is memory, we must
8543 have a matching destination. */
8544 if (GET_CODE (operands[1]) == MEM
8545 && GET_RTX_CLASS (code) != 'c'
8546 && ! rtx_equal_p (operands[0], operands[1]))
8547 return 0;
8548 return 1;
8549 }
8550
8551 /* Attempt to expand a unary operator. Make the expansion closer to the
8552 actual machine, then just general_operand, which will allow 2 separate
8553 memory references (one output, one input) in a single insn. */
8554
8555 void
ix86_expand_unary_operator(enum rtx_code code,enum machine_mode mode,rtx operands[])8556 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8557 rtx operands[])
8558 {
8559 int matching_memory;
8560 rtx src, dst, op, clob;
8561
8562 dst = operands[0];
8563 src = operands[1];
8564
8565 /* If the destination is memory, and we do not have matching source
8566 operands, do things in registers. */
8567 matching_memory = 0;
8568 if (GET_CODE (dst) == MEM)
8569 {
8570 if (rtx_equal_p (dst, src))
8571 matching_memory = 1;
8572 else
8573 dst = gen_reg_rtx (mode);
8574 }
8575
8576 /* When source operand is memory, destination must match. */
8577 if (!matching_memory && GET_CODE (src) == MEM)
8578 src = force_reg (mode, src);
8579
8580 /* If optimizing, copy to regs to improve CSE */
8581 if (optimize && ! no_new_pseudos)
8582 {
8583 if (GET_CODE (dst) == MEM)
8584 dst = gen_reg_rtx (mode);
8585 if (GET_CODE (src) == MEM)
8586 src = force_reg (mode, src);
8587 }
8588
8589 /* Emit the instruction. */
8590
8591 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8592 if (reload_in_progress || code == NOT)
8593 {
8594 /* Reload doesn't know about the flags register, and doesn't know that
8595 it doesn't want to clobber it. */
8596 if (code != NOT)
8597 abort ();
8598 emit_insn (op);
8599 }
8600 else
8601 {
8602 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8603 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8604 }
8605
8606 /* Fix up the destination if needed. */
8607 if (dst != operands[0])
8608 emit_move_insn (operands[0], dst);
8609 }
8610
8611 /* Return TRUE or FALSE depending on whether the unary operator meets the
8612 appropriate constraints. */
8613
8614 int
ix86_unary_operator_ok(enum rtx_code code ATTRIBUTE_UNUSED,enum machine_mode mode ATTRIBUTE_UNUSED,rtx operands[2]ATTRIBUTE_UNUSED)8615 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8616 enum machine_mode mode ATTRIBUTE_UNUSED,
8617 rtx operands[2] ATTRIBUTE_UNUSED)
8618 {
8619 /* If one of operands is memory, source and destination must match. */
8620 if ((GET_CODE (operands[0]) == MEM
8621 || GET_CODE (operands[1]) == MEM)
8622 && ! rtx_equal_p (operands[0], operands[1]))
8623 return FALSE;
8624 return TRUE;
8625 }
8626
8627 /* Return TRUE or FALSE depending on whether the first SET in INSN
8628 has source and destination with matching CC modes, and that the
8629 CC mode is at least as constrained as REQ_MODE. */
8630
8631 int
ix86_match_ccmode(rtx insn,enum machine_mode req_mode)8632 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
8633 {
8634 rtx set;
8635 enum machine_mode set_mode;
8636
8637 set = PATTERN (insn);
8638 if (GET_CODE (set) == PARALLEL)
8639 set = XVECEXP (set, 0, 0);
8640 if (GET_CODE (set) != SET)
8641 abort ();
8642 if (GET_CODE (SET_SRC (set)) != COMPARE)
8643 abort ();
8644
8645 set_mode = GET_MODE (SET_DEST (set));
8646 switch (set_mode)
8647 {
8648 case CCNOmode:
8649 if (req_mode != CCNOmode
8650 && (req_mode != CCmode
8651 || XEXP (SET_SRC (set), 1) != const0_rtx))
8652 return 0;
8653 break;
8654 case CCmode:
8655 if (req_mode == CCGCmode)
8656 return 0;
8657 /* FALLTHRU */
8658 case CCGCmode:
8659 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8660 return 0;
8661 /* FALLTHRU */
8662 case CCGOCmode:
8663 if (req_mode == CCZmode)
8664 return 0;
8665 /* FALLTHRU */
8666 case CCZmode:
8667 break;
8668
8669 default:
8670 abort ();
8671 }
8672
8673 return (GET_MODE (SET_SRC (set)) == set_mode);
8674 }
8675
8676 /* Generate insn patterns to do an integer compare of OPERANDS. */
8677
8678 static rtx
ix86_expand_int_compare(enum rtx_code code,rtx op0,rtx op1)8679 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
8680 {
8681 enum machine_mode cmpmode;
8682 rtx tmp, flags;
8683
8684 cmpmode = SELECT_CC_MODE (code, op0, op1);
8685 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8686
8687 /* This is very simple, but making the interface the same as in the
8688 FP case makes the rest of the code easier. */
8689 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8690 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8691
8692 /* Return the test that should be put into the flags user, i.e.
8693 the bcc, scc, or cmov instruction. */
8694 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8695 }
8696
8697 /* Figure out whether to use ordered or unordered fp comparisons.
8698 Return the appropriate mode to use. */
8699
8700 enum machine_mode
ix86_fp_compare_mode(enum rtx_code code ATTRIBUTE_UNUSED)8701 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
8702 {
8703 /* ??? In order to make all comparisons reversible, we do all comparisons
8704 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8705 all forms trapping and nontrapping comparisons, we can make inequality
8706 comparisons trapping again, since it results in better code when using
8707 FCOM based compares. */
8708 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8709 }
8710
8711 enum machine_mode
ix86_cc_mode(enum rtx_code code,rtx op0,rtx op1)8712 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
8713 {
8714 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8715 return ix86_fp_compare_mode (code);
8716 switch (code)
8717 {
8718 /* Only zero flag is needed. */
8719 case EQ: /* ZF=0 */
8720 case NE: /* ZF!=0 */
8721 return CCZmode;
8722 /* Codes needing carry flag. */
8723 case GEU: /* CF=0 */
8724 case GTU: /* CF=0 & ZF=0 */
8725 case LTU: /* CF=1 */
8726 case LEU: /* CF=1 | ZF=1 */
8727 return CCmode;
8728 /* Codes possibly doable only with sign flag when
8729 comparing against zero. */
8730 case GE: /* SF=OF or SF=0 */
8731 case LT: /* SF<>OF or SF=1 */
8732 if (op1 == const0_rtx)
8733 return CCGOCmode;
8734 else
8735 /* For other cases Carry flag is not required. */
8736 return CCGCmode;
8737 /* Codes doable only with sign flag when comparing
8738 against zero, but we miss jump instruction for it
8739 so we need to use relational tests against overflow
8740 that thus needs to be zero. */
8741 case GT: /* ZF=0 & SF=OF */
8742 case LE: /* ZF=1 | SF<>OF */
8743 if (op1 == const0_rtx)
8744 return CCNOmode;
8745 else
8746 return CCGCmode;
8747 /* strcmp pattern do (use flags) and combine may ask us for proper
8748 mode. */
8749 case USE:
8750 return CCmode;
8751 default:
8752 abort ();
8753 }
8754 }
8755
8756 /* Return the fixed registers used for condition codes. */
8757
8758 static bool
ix86_fixed_condition_code_regs(unsigned int * p1,unsigned int * p2)8759 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
8760 {
8761 *p1 = FLAGS_REG;
8762 *p2 = FPSR_REG;
8763 return true;
8764 }
8765
8766 /* If two condition code modes are compatible, return a condition code
8767 mode which is compatible with both. Otherwise, return
8768 VOIDmode. */
8769
8770 static enum machine_mode
ix86_cc_modes_compatible(enum machine_mode m1,enum machine_mode m2)8771 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
8772 {
8773 if (m1 == m2)
8774 return m1;
8775
8776 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
8777 return VOIDmode;
8778
8779 if ((m1 == CCGCmode && m2 == CCGOCmode)
8780 || (m1 == CCGOCmode && m2 == CCGCmode))
8781 return CCGCmode;
8782
8783 switch (m1)
8784 {
8785 default:
8786 abort ();
8787
8788 case CCmode:
8789 case CCGCmode:
8790 case CCGOCmode:
8791 case CCNOmode:
8792 case CCZmode:
8793 switch (m2)
8794 {
8795 default:
8796 return VOIDmode;
8797
8798 case CCmode:
8799 case CCGCmode:
8800 case CCGOCmode:
8801 case CCNOmode:
8802 case CCZmode:
8803 return CCmode;
8804 }
8805
8806 case CCFPmode:
8807 case CCFPUmode:
8808 /* These are only compatible with themselves, which we already
8809 checked above. */
8810 return VOIDmode;
8811 }
8812 }
8813
8814 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8815
8816 int
ix86_use_fcomi_compare(enum rtx_code code ATTRIBUTE_UNUSED)8817 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
8818 {
8819 enum rtx_code swapped_code = swap_condition (code);
8820 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8821 || (ix86_fp_comparison_cost (swapped_code)
8822 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8823 }
8824
8825 /* Swap, force into registers, or otherwise massage the two operands
8826 to a fp comparison. The operands are updated in place; the new
8827 comparison code is returned. */
8828
8829 static enum rtx_code
ix86_prepare_fp_compare_args(enum rtx_code code,rtx * pop0,rtx * pop1)8830 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
8831 {
8832 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8833 rtx op0 = *pop0, op1 = *pop1;
8834 enum machine_mode op_mode = GET_MODE (op0);
8835 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8836
8837 /* All of the unordered compare instructions only work on registers.
8838 The same is true of the XFmode compare instructions. The same is
8839 true of the fcomi compare instructions. */
8840
8841 if (!is_sse
8842 && (fpcmp_mode == CCFPUmode
8843 || op_mode == XFmode
8844 || ix86_use_fcomi_compare (code)))
8845 {
8846 op0 = force_reg (op_mode, op0);
8847 op1 = force_reg (op_mode, op1);
8848 }
8849 else
8850 {
8851 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8852 things around if they appear profitable, otherwise force op0
8853 into a register. */
8854
8855 if (standard_80387_constant_p (op0) == 0
8856 || (GET_CODE (op0) == MEM
8857 && ! (standard_80387_constant_p (op1) == 0
8858 || GET_CODE (op1) == MEM)))
8859 {
8860 rtx tmp;
8861 tmp = op0, op0 = op1, op1 = tmp;
8862 code = swap_condition (code);
8863 }
8864
8865 if (GET_CODE (op0) != REG)
8866 op0 = force_reg (op_mode, op0);
8867
8868 if (CONSTANT_P (op1))
8869 {
8870 if (standard_80387_constant_p (op1))
8871 op1 = force_reg (op_mode, op1);
8872 else
8873 op1 = validize_mem (force_const_mem (op_mode, op1));
8874 }
8875 }
8876
8877 /* Try to rearrange the comparison to make it cheaper. */
8878 if (ix86_fp_comparison_cost (code)
8879 > ix86_fp_comparison_cost (swap_condition (code))
8880 && (GET_CODE (op1) == REG || !no_new_pseudos))
8881 {
8882 rtx tmp;
8883 tmp = op0, op0 = op1, op1 = tmp;
8884 code = swap_condition (code);
8885 if (GET_CODE (op0) != REG)
8886 op0 = force_reg (op_mode, op0);
8887 }
8888
8889 *pop0 = op0;
8890 *pop1 = op1;
8891 return code;
8892 }
8893
8894 /* Convert comparison codes we use to represent FP comparison to integer
8895 code that will result in proper branch. Return UNKNOWN if no such code
8896 is available. */
8897 static enum rtx_code
ix86_fp_compare_code_to_integer(enum rtx_code code)8898 ix86_fp_compare_code_to_integer (enum rtx_code code)
8899 {
8900 switch (code)
8901 {
8902 case GT:
8903 return GTU;
8904 case GE:
8905 return GEU;
8906 case ORDERED:
8907 case UNORDERED:
8908 return code;
8909 break;
8910 case UNEQ:
8911 return EQ;
8912 break;
8913 case UNLT:
8914 return LTU;
8915 break;
8916 case UNLE:
8917 return LEU;
8918 break;
8919 case LTGT:
8920 return NE;
8921 break;
8922 default:
8923 return UNKNOWN;
8924 }
8925 }
8926
8927 /* Split comparison code CODE into comparisons we can do using branch
8928 instructions. BYPASS_CODE is comparison code for branch that will
8929 branch around FIRST_CODE and SECOND_CODE. If some of branches
8930 is not required, set value to NIL.
8931 We never require more than two branches. */
8932 static void
ix86_fp_comparison_codes(enum rtx_code code,enum rtx_code * bypass_code,enum rtx_code * first_code,enum rtx_code * second_code)8933 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8934 enum rtx_code *first_code,
8935 enum rtx_code *second_code)
8936 {
8937 *first_code = code;
8938 *bypass_code = NIL;
8939 *second_code = NIL;
8940
8941 /* The fcomi comparison sets flags as follows:
8942
8943 cmp ZF PF CF
8944 > 0 0 0
8945 < 0 0 1
8946 = 1 0 0
8947 un 1 1 1 */
8948
8949 switch (code)
8950 {
8951 case GT: /* GTU - CF=0 & ZF=0 */
8952 case GE: /* GEU - CF=0 */
8953 case ORDERED: /* PF=0 */
8954 case UNORDERED: /* PF=1 */
8955 case UNEQ: /* EQ - ZF=1 */
8956 case UNLT: /* LTU - CF=1 */
8957 case UNLE: /* LEU - CF=1 | ZF=1 */
8958 case LTGT: /* EQ - ZF=0 */
8959 break;
8960 case LT: /* LTU - CF=1 - fails on unordered */
8961 *first_code = UNLT;
8962 *bypass_code = UNORDERED;
8963 break;
8964 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8965 *first_code = UNLE;
8966 *bypass_code = UNORDERED;
8967 break;
8968 case EQ: /* EQ - ZF=1 - fails on unordered */
8969 *first_code = UNEQ;
8970 *bypass_code = UNORDERED;
8971 break;
8972 case NE: /* NE - ZF=0 - fails on unordered */
8973 *first_code = LTGT;
8974 *second_code = UNORDERED;
8975 break;
8976 case UNGE: /* GEU - CF=0 - fails on unordered */
8977 *first_code = GE;
8978 *second_code = UNORDERED;
8979 break;
8980 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8981 *first_code = GT;
8982 *second_code = UNORDERED;
8983 break;
8984 default:
8985 abort ();
8986 }
8987 if (!TARGET_IEEE_FP)
8988 {
8989 *second_code = NIL;
8990 *bypass_code = NIL;
8991 }
8992 }
8993
8994 /* Return cost of comparison done fcom + arithmetics operations on AX.
8995 All following functions do use number of instructions as a cost metrics.
8996 In future this should be tweaked to compute bytes for optimize_size and
8997 take into account performance of various instructions on various CPUs. */
8998 static int
ix86_fp_comparison_arithmetics_cost(enum rtx_code code)8999 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
9000 {
9001 if (!TARGET_IEEE_FP)
9002 return 4;
9003 /* The cost of code output by ix86_expand_fp_compare. */
9004 switch (code)
9005 {
9006 case UNLE:
9007 case UNLT:
9008 case LTGT:
9009 case GT:
9010 case GE:
9011 case UNORDERED:
9012 case ORDERED:
9013 case UNEQ:
9014 return 4;
9015 break;
9016 case LT:
9017 case NE:
9018 case EQ:
9019 case UNGE:
9020 return 5;
9021 break;
9022 case LE:
9023 case UNGT:
9024 return 6;
9025 break;
9026 default:
9027 abort ();
9028 }
9029 }
9030
9031 /* Return cost of comparison done using fcomi operation.
9032 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9033 static int
ix86_fp_comparison_fcomi_cost(enum rtx_code code)9034 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
9035 {
9036 enum rtx_code bypass_code, first_code, second_code;
9037 /* Return arbitrarily high cost when instruction is not supported - this
9038 prevents gcc from using it. */
9039 if (!TARGET_CMOVE)
9040 return 1024;
9041 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9042 return (bypass_code != NIL || second_code != NIL) + 2;
9043 }
9044
9045 /* Return cost of comparison done using sahf operation.
9046 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9047 static int
ix86_fp_comparison_sahf_cost(enum rtx_code code)9048 ix86_fp_comparison_sahf_cost (enum rtx_code code)
9049 {
9050 enum rtx_code bypass_code, first_code, second_code;
9051 /* Return arbitrarily high cost when instruction is not preferred - this
9052 avoids gcc from using it. */
9053 if (!TARGET_USE_SAHF && !optimize_size)
9054 return 1024;
9055 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9056 return (bypass_code != NIL || second_code != NIL) + 3;
9057 }
9058
9059 /* Compute cost of the comparison done using any method.
9060 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9061 static int
ix86_fp_comparison_cost(enum rtx_code code)9062 ix86_fp_comparison_cost (enum rtx_code code)
9063 {
9064 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
9065 int min;
9066
9067 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
9068 sahf_cost = ix86_fp_comparison_sahf_cost (code);
9069
9070 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
9071 if (min > sahf_cost)
9072 min = sahf_cost;
9073 if (min > fcomi_cost)
9074 min = fcomi_cost;
9075 return min;
9076 }
9077
9078 /* Generate insn patterns to do a floating point compare of OPERANDS. */
9079
9080 static rtx
ix86_expand_fp_compare(enum rtx_code code,rtx op0,rtx op1,rtx scratch,rtx * second_test,rtx * bypass_test)9081 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
9082 rtx *second_test, rtx *bypass_test)
9083 {
9084 enum machine_mode fpcmp_mode, intcmp_mode;
9085 rtx tmp, tmp2;
9086 int cost = ix86_fp_comparison_cost (code);
9087 enum rtx_code bypass_code, first_code, second_code;
9088
9089 fpcmp_mode = ix86_fp_compare_mode (code);
9090 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
9091
9092 if (second_test)
9093 *second_test = NULL_RTX;
9094 if (bypass_test)
9095 *bypass_test = NULL_RTX;
9096
9097 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9098
9099 /* Do fcomi/sahf based test when profitable. */
9100 if ((bypass_code == NIL || bypass_test)
9101 && (second_code == NIL || second_test)
9102 && ix86_fp_comparison_arithmetics_cost (code) > cost)
9103 {
9104 if (TARGET_CMOVE)
9105 {
9106 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9107 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
9108 tmp);
9109 emit_insn (tmp);
9110 }
9111 else
9112 {
9113 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9114 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9115 if (!scratch)
9116 scratch = gen_reg_rtx (HImode);
9117 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9118 emit_insn (gen_x86_sahf_1 (scratch));
9119 }
9120
9121 /* The FP codes work out to act like unsigned. */
9122 intcmp_mode = fpcmp_mode;
9123 code = first_code;
9124 if (bypass_code != NIL)
9125 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
9126 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9127 const0_rtx);
9128 if (second_code != NIL)
9129 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9130 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9131 const0_rtx);
9132 }
9133 else
9134 {
9135 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
9136 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9137 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9138 if (!scratch)
9139 scratch = gen_reg_rtx (HImode);
9140 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9141
9142 /* In the unordered case, we have to check C2 for NaN's, which
9143 doesn't happen to work out to anything nice combination-wise.
9144 So do some bit twiddling on the value we've got in AH to come
9145 up with an appropriate set of condition codes. */
9146
9147 intcmp_mode = CCNOmode;
9148 switch (code)
9149 {
9150 case GT:
9151 case UNGT:
9152 if (code == GT || !TARGET_IEEE_FP)
9153 {
9154 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9155 code = EQ;
9156 }
9157 else
9158 {
9159 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9160 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9161 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9162 intcmp_mode = CCmode;
9163 code = GEU;
9164 }
9165 break;
9166 case LT:
9167 case UNLT:
9168 if (code == LT && TARGET_IEEE_FP)
9169 {
9170 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9171 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
9172 intcmp_mode = CCmode;
9173 code = EQ;
9174 }
9175 else
9176 {
9177 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9178 code = NE;
9179 }
9180 break;
9181 case GE:
9182 case UNGE:
9183 if (code == GE || !TARGET_IEEE_FP)
9184 {
9185 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
9186 code = EQ;
9187 }
9188 else
9189 {
9190 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9191 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9192 GEN_INT (0x01)));
9193 code = NE;
9194 }
9195 break;
9196 case LE:
9197 case UNLE:
9198 if (code == LE && TARGET_IEEE_FP)
9199 {
9200 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9201 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9202 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9203 intcmp_mode = CCmode;
9204 code = LTU;
9205 }
9206 else
9207 {
9208 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9209 code = NE;
9210 }
9211 break;
9212 case EQ:
9213 case UNEQ:
9214 if (code == EQ && TARGET_IEEE_FP)
9215 {
9216 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9217 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9218 intcmp_mode = CCmode;
9219 code = EQ;
9220 }
9221 else
9222 {
9223 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9224 code = NE;
9225 break;
9226 }
9227 break;
9228 case NE:
9229 case LTGT:
9230 if (code == NE && TARGET_IEEE_FP)
9231 {
9232 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9233 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9234 GEN_INT (0x40)));
9235 code = NE;
9236 }
9237 else
9238 {
9239 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9240 code = EQ;
9241 }
9242 break;
9243
9244 case UNORDERED:
9245 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9246 code = NE;
9247 break;
9248 case ORDERED:
9249 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9250 code = EQ;
9251 break;
9252
9253 default:
9254 abort ();
9255 }
9256 }
9257
9258 /* Return the test that should be put into the flags user, i.e.
9259 the bcc, scc, or cmov instruction. */
9260 return gen_rtx_fmt_ee (code, VOIDmode,
9261 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9262 const0_rtx);
9263 }
9264
9265 rtx
ix86_expand_compare(enum rtx_code code,rtx * second_test,rtx * bypass_test)9266 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
9267 {
9268 rtx op0, op1, ret;
9269 op0 = ix86_compare_op0;
9270 op1 = ix86_compare_op1;
9271
9272 if (second_test)
9273 *second_test = NULL_RTX;
9274 if (bypass_test)
9275 *bypass_test = NULL_RTX;
9276
9277 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9278 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9279 second_test, bypass_test);
9280 else
9281 ret = ix86_expand_int_compare (code, op0, op1);
9282
9283 return ret;
9284 }
9285
9286 /* Return true if the CODE will result in nontrivial jump sequence. */
9287 bool
ix86_fp_jump_nontrivial_p(enum rtx_code code)9288 ix86_fp_jump_nontrivial_p (enum rtx_code code)
9289 {
9290 enum rtx_code bypass_code, first_code, second_code;
9291 if (!TARGET_CMOVE)
9292 return true;
9293 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9294 return bypass_code != NIL || second_code != NIL;
9295 }
9296
9297 void
ix86_expand_branch(enum rtx_code code,rtx label)9298 ix86_expand_branch (enum rtx_code code, rtx label)
9299 {
9300 rtx tmp;
9301
9302 switch (GET_MODE (ix86_compare_op0))
9303 {
9304 case QImode:
9305 case HImode:
9306 case SImode:
9307 simple:
9308 tmp = ix86_expand_compare (code, NULL, NULL);
9309 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9310 gen_rtx_LABEL_REF (VOIDmode, label),
9311 pc_rtx);
9312 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9313 return;
9314
9315 case SFmode:
9316 case DFmode:
9317 case XFmode:
9318 {
9319 rtvec vec;
9320 int use_fcomi;
9321 enum rtx_code bypass_code, first_code, second_code;
9322
9323 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9324 &ix86_compare_op1);
9325
9326 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9327
9328 /* Check whether we will use the natural sequence with one jump. If
9329 so, we can expand jump early. Otherwise delay expansion by
9330 creating compound insn to not confuse optimizers. */
9331 if (bypass_code == NIL && second_code == NIL
9332 && TARGET_CMOVE)
9333 {
9334 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9335 gen_rtx_LABEL_REF (VOIDmode, label),
9336 pc_rtx, NULL_RTX);
9337 }
9338 else
9339 {
9340 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9341 ix86_compare_op0, ix86_compare_op1);
9342 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9343 gen_rtx_LABEL_REF (VOIDmode, label),
9344 pc_rtx);
9345 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9346
9347 use_fcomi = ix86_use_fcomi_compare (code);
9348 vec = rtvec_alloc (3 + !use_fcomi);
9349 RTVEC_ELT (vec, 0) = tmp;
9350 RTVEC_ELT (vec, 1)
9351 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9352 RTVEC_ELT (vec, 2)
9353 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9354 if (! use_fcomi)
9355 RTVEC_ELT (vec, 3)
9356 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9357
9358 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9359 }
9360 return;
9361 }
9362
9363 case DImode:
9364 if (TARGET_64BIT)
9365 goto simple;
9366 /* Expand DImode branch into multiple compare+branch. */
9367 {
9368 rtx lo[2], hi[2], label2;
9369 enum rtx_code code1, code2, code3;
9370
9371 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9372 {
9373 tmp = ix86_compare_op0;
9374 ix86_compare_op0 = ix86_compare_op1;
9375 ix86_compare_op1 = tmp;
9376 code = swap_condition (code);
9377 }
9378 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9379 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9380
9381 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9382 avoid two branches. This costs one extra insn, so disable when
9383 optimizing for size. */
9384
9385 if ((code == EQ || code == NE)
9386 && (!optimize_size
9387 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9388 {
9389 rtx xor0, xor1;
9390
9391 xor1 = hi[0];
9392 if (hi[1] != const0_rtx)
9393 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9394 NULL_RTX, 0, OPTAB_WIDEN);
9395
9396 xor0 = lo[0];
9397 if (lo[1] != const0_rtx)
9398 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9399 NULL_RTX, 0, OPTAB_WIDEN);
9400
9401 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9402 NULL_RTX, 0, OPTAB_WIDEN);
9403
9404 ix86_compare_op0 = tmp;
9405 ix86_compare_op1 = const0_rtx;
9406 ix86_expand_branch (code, label);
9407 return;
9408 }
9409
9410 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9411 op1 is a constant and the low word is zero, then we can just
9412 examine the high word. */
9413
9414 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9415 switch (code)
9416 {
9417 case LT: case LTU: case GE: case GEU:
9418 ix86_compare_op0 = hi[0];
9419 ix86_compare_op1 = hi[1];
9420 ix86_expand_branch (code, label);
9421 return;
9422 default:
9423 break;
9424 }
9425
9426 /* Otherwise, we need two or three jumps. */
9427
9428 label2 = gen_label_rtx ();
9429
9430 code1 = code;
9431 code2 = swap_condition (code);
9432 code3 = unsigned_condition (code);
9433
9434 switch (code)
9435 {
9436 case LT: case GT: case LTU: case GTU:
9437 break;
9438
9439 case LE: code1 = LT; code2 = GT; break;
9440 case GE: code1 = GT; code2 = LT; break;
9441 case LEU: code1 = LTU; code2 = GTU; break;
9442 case GEU: code1 = GTU; code2 = LTU; break;
9443
9444 case EQ: code1 = NIL; code2 = NE; break;
9445 case NE: code2 = NIL; break;
9446
9447 default:
9448 abort ();
9449 }
9450
9451 /*
9452 * a < b =>
9453 * if (hi(a) < hi(b)) goto true;
9454 * if (hi(a) > hi(b)) goto false;
9455 * if (lo(a) < lo(b)) goto true;
9456 * false:
9457 */
9458
9459 ix86_compare_op0 = hi[0];
9460 ix86_compare_op1 = hi[1];
9461
9462 if (code1 != NIL)
9463 ix86_expand_branch (code1, label);
9464 if (code2 != NIL)
9465 ix86_expand_branch (code2, label2);
9466
9467 ix86_compare_op0 = lo[0];
9468 ix86_compare_op1 = lo[1];
9469 ix86_expand_branch (code3, label);
9470
9471 if (code2 != NIL)
9472 emit_label (label2);
9473 return;
9474 }
9475
9476 default:
9477 abort ();
9478 }
9479 }
9480
9481 /* Split branch based on floating point condition. */
9482 void
ix86_split_fp_branch(enum rtx_code code,rtx op1,rtx op2,rtx target1,rtx target2,rtx tmp)9483 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9484 rtx target1, rtx target2, rtx tmp)
9485 {
9486 rtx second, bypass;
9487 rtx label = NULL_RTX;
9488 rtx condition;
9489 int bypass_probability = -1, second_probability = -1, probability = -1;
9490 rtx i;
9491
9492 if (target2 != pc_rtx)
9493 {
9494 rtx tmp = target2;
9495 code = reverse_condition_maybe_unordered (code);
9496 target2 = target1;
9497 target1 = tmp;
9498 }
9499
9500 condition = ix86_expand_fp_compare (code, op1, op2,
9501 tmp, &second, &bypass);
9502
9503 if (split_branch_probability >= 0)
9504 {
9505 /* Distribute the probabilities across the jumps.
9506 Assume the BYPASS and SECOND to be always test
9507 for UNORDERED. */
9508 probability = split_branch_probability;
9509
9510 /* Value of 1 is low enough to make no need for probability
9511 to be updated. Later we may run some experiments and see
9512 if unordered values are more frequent in practice. */
9513 if (bypass)
9514 bypass_probability = 1;
9515 if (second)
9516 second_probability = 1;
9517 }
9518 if (bypass != NULL_RTX)
9519 {
9520 label = gen_label_rtx ();
9521 i = emit_jump_insn (gen_rtx_SET
9522 (VOIDmode, pc_rtx,
9523 gen_rtx_IF_THEN_ELSE (VOIDmode,
9524 bypass,
9525 gen_rtx_LABEL_REF (VOIDmode,
9526 label),
9527 pc_rtx)));
9528 if (bypass_probability >= 0)
9529 REG_NOTES (i)
9530 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9531 GEN_INT (bypass_probability),
9532 REG_NOTES (i));
9533 }
9534 i = emit_jump_insn (gen_rtx_SET
9535 (VOIDmode, pc_rtx,
9536 gen_rtx_IF_THEN_ELSE (VOIDmode,
9537 condition, target1, target2)));
9538 if (probability >= 0)
9539 REG_NOTES (i)
9540 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9541 GEN_INT (probability),
9542 REG_NOTES (i));
9543 if (second != NULL_RTX)
9544 {
9545 i = emit_jump_insn (gen_rtx_SET
9546 (VOIDmode, pc_rtx,
9547 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9548 target2)));
9549 if (second_probability >= 0)
9550 REG_NOTES (i)
9551 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9552 GEN_INT (second_probability),
9553 REG_NOTES (i));
9554 }
9555 if (label != NULL_RTX)
9556 emit_label (label);
9557 }
9558
9559 int
ix86_expand_setcc(enum rtx_code code,rtx dest)9560 ix86_expand_setcc (enum rtx_code code, rtx dest)
9561 {
9562 rtx ret, tmp, tmpreg, equiv;
9563 rtx second_test, bypass_test;
9564
9565 if (GET_MODE (ix86_compare_op0) == DImode
9566 && !TARGET_64BIT)
9567 return 0; /* FAIL */
9568
9569 if (GET_MODE (dest) != QImode)
9570 abort ();
9571
9572 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9573 PUT_MODE (ret, QImode);
9574
9575 tmp = dest;
9576 tmpreg = dest;
9577
9578 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9579 if (bypass_test || second_test)
9580 {
9581 rtx test = second_test;
9582 int bypass = 0;
9583 rtx tmp2 = gen_reg_rtx (QImode);
9584 if (bypass_test)
9585 {
9586 if (second_test)
9587 abort ();
9588 test = bypass_test;
9589 bypass = 1;
9590 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9591 }
9592 PUT_MODE (test, QImode);
9593 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9594
9595 if (bypass)
9596 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9597 else
9598 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9599 }
9600
9601 /* Attach a REG_EQUAL note describing the comparison result. */
9602 equiv = simplify_gen_relational (code, QImode,
9603 GET_MODE (ix86_compare_op0),
9604 ix86_compare_op0, ix86_compare_op1);
9605 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
9606
9607 return 1; /* DONE */
9608 }
9609
9610 /* Expand comparison setting or clearing carry flag. Return true when
9611 successful and set pop for the operation. */
9612 static bool
ix86_expand_carry_flag_compare(enum rtx_code code,rtx op0,rtx op1,rtx * pop)9613 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
9614 {
9615 enum machine_mode mode =
9616 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9617
9618 /* Do not handle DImode compares that go trought special path. Also we can't
9619 deal with FP compares yet. This is possible to add. */
9620 if ((mode == DImode && !TARGET_64BIT))
9621 return false;
9622 if (FLOAT_MODE_P (mode))
9623 {
9624 rtx second_test = NULL, bypass_test = NULL;
9625 rtx compare_op, compare_seq;
9626
9627 /* Shortcut: following common codes never translate into carry flag compares. */
9628 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9629 || code == ORDERED || code == UNORDERED)
9630 return false;
9631
9632 /* These comparisons require zero flag; swap operands so they won't. */
9633 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9634 && !TARGET_IEEE_FP)
9635 {
9636 rtx tmp = op0;
9637 op0 = op1;
9638 op1 = tmp;
9639 code = swap_condition (code);
9640 }
9641
9642 /* Try to expand the comparison and verify that we end up with carry flag
9643 based comparison. This is fails to be true only when we decide to expand
9644 comparison using arithmetic that is not too common scenario. */
9645 start_sequence ();
9646 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9647 &second_test, &bypass_test);
9648 compare_seq = get_insns ();
9649 end_sequence ();
9650
9651 if (second_test || bypass_test)
9652 return false;
9653 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9654 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9655 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9656 else
9657 code = GET_CODE (compare_op);
9658 if (code != LTU && code != GEU)
9659 return false;
9660 emit_insn (compare_seq);
9661 *pop = compare_op;
9662 return true;
9663 }
9664 if (!INTEGRAL_MODE_P (mode))
9665 return false;
9666 switch (code)
9667 {
9668 case LTU:
9669 case GEU:
9670 break;
9671
9672 /* Convert a==0 into (unsigned)a<1. */
9673 case EQ:
9674 case NE:
9675 if (op1 != const0_rtx)
9676 return false;
9677 op1 = const1_rtx;
9678 code = (code == EQ ? LTU : GEU);
9679 break;
9680
9681 /* Convert a>b into b<a or a>=b-1. */
9682 case GTU:
9683 case LEU:
9684 if (GET_CODE (op1) == CONST_INT)
9685 {
9686 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9687 /* Bail out on overflow. We still can swap operands but that
9688 would force loading of the constant into register. */
9689 if (op1 == const0_rtx
9690 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9691 return false;
9692 code = (code == GTU ? GEU : LTU);
9693 }
9694 else
9695 {
9696 rtx tmp = op1;
9697 op1 = op0;
9698 op0 = tmp;
9699 code = (code == GTU ? LTU : GEU);
9700 }
9701 break;
9702
9703 /* Convert a>=0 into (unsigned)a<0x80000000. */
9704 case LT:
9705 case GE:
9706 if (mode == DImode || op1 != const0_rtx)
9707 return false;
9708 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9709 code = (code == LT ? GEU : LTU);
9710 break;
9711 case LE:
9712 case GT:
9713 if (mode == DImode || op1 != constm1_rtx)
9714 return false;
9715 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9716 code = (code == LE ? GEU : LTU);
9717 break;
9718
9719 default:
9720 return false;
9721 }
9722 /* Swapping operands may cause constant to appear as first operand. */
9723 if (!nonimmediate_operand (op0, VOIDmode))
9724 {
9725 if (no_new_pseudos)
9726 return false;
9727 op0 = force_reg (mode, op0);
9728 }
9729 ix86_compare_op0 = op0;
9730 ix86_compare_op1 = op1;
9731 *pop = ix86_expand_compare (code, NULL, NULL);
9732 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9733 abort ();
9734 return true;
9735 }
9736
9737 int
ix86_expand_int_movcc(rtx operands[])9738 ix86_expand_int_movcc (rtx operands[])
9739 {
9740 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9741 rtx compare_seq, compare_op;
9742 rtx second_test, bypass_test;
9743 enum machine_mode mode = GET_MODE (operands[0]);
9744 bool sign_bit_compare_p = false;;
9745
9746 start_sequence ();
9747 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9748 compare_seq = get_insns ();
9749 end_sequence ();
9750
9751 compare_code = GET_CODE (compare_op);
9752
9753 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9754 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9755 sign_bit_compare_p = true;
9756
9757 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9758 HImode insns, we'd be swallowed in word prefix ops. */
9759
9760 if ((mode != HImode || TARGET_FAST_PREFIX)
9761 && (mode != DImode || TARGET_64BIT)
9762 && GET_CODE (operands[2]) == CONST_INT
9763 && GET_CODE (operands[3]) == CONST_INT)
9764 {
9765 rtx out = operands[0];
9766 HOST_WIDE_INT ct = INTVAL (operands[2]);
9767 HOST_WIDE_INT cf = INTVAL (operands[3]);
9768 HOST_WIDE_INT diff;
9769
9770 diff = ct - cf;
9771 /* Sign bit compares are better done using shifts than we do by using
9772 sbb. */
9773 if (sign_bit_compare_p
9774 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9775 ix86_compare_op1, &compare_op))
9776 {
9777 /* Detect overlap between destination and compare sources. */
9778 rtx tmp = out;
9779
9780 if (!sign_bit_compare_p)
9781 {
9782 bool fpcmp = false;
9783
9784 compare_code = GET_CODE (compare_op);
9785
9786 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9787 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9788 {
9789 fpcmp = true;
9790 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9791 }
9792
9793 /* To simplify rest of code, restrict to the GEU case. */
9794 if (compare_code == LTU)
9795 {
9796 HOST_WIDE_INT tmp = ct;
9797 ct = cf;
9798 cf = tmp;
9799 compare_code = reverse_condition (compare_code);
9800 code = reverse_condition (code);
9801 }
9802 else
9803 {
9804 if (fpcmp)
9805 PUT_CODE (compare_op,
9806 reverse_condition_maybe_unordered
9807 (GET_CODE (compare_op)));
9808 else
9809 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9810 }
9811 diff = ct - cf;
9812
9813 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9814 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9815 tmp = gen_reg_rtx (mode);
9816
9817 if (mode == DImode)
9818 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9819 else
9820 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9821 }
9822 else
9823 {
9824 if (code == GT || code == GE)
9825 code = reverse_condition (code);
9826 else
9827 {
9828 HOST_WIDE_INT tmp = ct;
9829 ct = cf;
9830 cf = tmp;
9831 diff = ct - cf;
9832 }
9833 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9834 ix86_compare_op1, VOIDmode, 0, -1);
9835 }
9836
9837 if (diff == 1)
9838 {
9839 /*
9840 * cmpl op0,op1
9841 * sbbl dest,dest
9842 * [addl dest, ct]
9843 *
9844 * Size 5 - 8.
9845 */
9846 if (ct)
9847 tmp = expand_simple_binop (mode, PLUS,
9848 tmp, GEN_INT (ct),
9849 copy_rtx (tmp), 1, OPTAB_DIRECT);
9850 }
9851 else if (cf == -1)
9852 {
9853 /*
9854 * cmpl op0,op1
9855 * sbbl dest,dest
9856 * orl $ct, dest
9857 *
9858 * Size 8.
9859 */
9860 tmp = expand_simple_binop (mode, IOR,
9861 tmp, GEN_INT (ct),
9862 copy_rtx (tmp), 1, OPTAB_DIRECT);
9863 }
9864 else if (diff == -1 && ct)
9865 {
9866 /*
9867 * cmpl op0,op1
9868 * sbbl dest,dest
9869 * notl dest
9870 * [addl dest, cf]
9871 *
9872 * Size 8 - 11.
9873 */
9874 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9875 if (cf)
9876 tmp = expand_simple_binop (mode, PLUS,
9877 copy_rtx (tmp), GEN_INT (cf),
9878 copy_rtx (tmp), 1, OPTAB_DIRECT);
9879 }
9880 else
9881 {
9882 /*
9883 * cmpl op0,op1
9884 * sbbl dest,dest
9885 * [notl dest]
9886 * andl cf - ct, dest
9887 * [addl dest, ct]
9888 *
9889 * Size 8 - 11.
9890 */
9891
9892 if (cf == 0)
9893 {
9894 cf = ct;
9895 ct = 0;
9896 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9897 }
9898
9899 tmp = expand_simple_binop (mode, AND,
9900 copy_rtx (tmp),
9901 gen_int_mode (cf - ct, mode),
9902 copy_rtx (tmp), 1, OPTAB_DIRECT);
9903 if (ct)
9904 tmp = expand_simple_binop (mode, PLUS,
9905 copy_rtx (tmp), GEN_INT (ct),
9906 copy_rtx (tmp), 1, OPTAB_DIRECT);
9907 }
9908
9909 if (!rtx_equal_p (tmp, out))
9910 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9911
9912 return 1; /* DONE */
9913 }
9914
9915 if (diff < 0)
9916 {
9917 HOST_WIDE_INT tmp;
9918 tmp = ct, ct = cf, cf = tmp;
9919 diff = -diff;
9920 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9921 {
9922 /* We may be reversing unordered compare to normal compare, that
9923 is not valid in general (we may convert non-trapping condition
9924 to trapping one), however on i386 we currently emit all
9925 comparisons unordered. */
9926 compare_code = reverse_condition_maybe_unordered (compare_code);
9927 code = reverse_condition_maybe_unordered (code);
9928 }
9929 else
9930 {
9931 compare_code = reverse_condition (compare_code);
9932 code = reverse_condition (code);
9933 }
9934 }
9935
9936 compare_code = NIL;
9937 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9938 && GET_CODE (ix86_compare_op1) == CONST_INT)
9939 {
9940 if (ix86_compare_op1 == const0_rtx
9941 && (code == LT || code == GE))
9942 compare_code = code;
9943 else if (ix86_compare_op1 == constm1_rtx)
9944 {
9945 if (code == LE)
9946 compare_code = LT;
9947 else if (code == GT)
9948 compare_code = GE;
9949 }
9950 }
9951
9952 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9953 if (compare_code != NIL
9954 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9955 && (cf == -1 || ct == -1))
9956 {
9957 /* If lea code below could be used, only optimize
9958 if it results in a 2 insn sequence. */
9959
9960 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9961 || diff == 3 || diff == 5 || diff == 9)
9962 || (compare_code == LT && ct == -1)
9963 || (compare_code == GE && cf == -1))
9964 {
9965 /*
9966 * notl op1 (if necessary)
9967 * sarl $31, op1
9968 * orl cf, op1
9969 */
9970 if (ct != -1)
9971 {
9972 cf = ct;
9973 ct = -1;
9974 code = reverse_condition (code);
9975 }
9976
9977 out = emit_store_flag (out, code, ix86_compare_op0,
9978 ix86_compare_op1, VOIDmode, 0, -1);
9979
9980 out = expand_simple_binop (mode, IOR,
9981 out, GEN_INT (cf),
9982 out, 1, OPTAB_DIRECT);
9983 if (out != operands[0])
9984 emit_move_insn (operands[0], out);
9985
9986 return 1; /* DONE */
9987 }
9988 }
9989
9990
9991 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9992 || diff == 3 || diff == 5 || diff == 9)
9993 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9994 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
9995 {
9996 /*
9997 * xorl dest,dest
9998 * cmpl op1,op2
9999 * setcc dest
10000 * lea cf(dest*(ct-cf)),dest
10001 *
10002 * Size 14.
10003 *
10004 * This also catches the degenerate setcc-only case.
10005 */
10006
10007 rtx tmp;
10008 int nops;
10009
10010 out = emit_store_flag (out, code, ix86_compare_op0,
10011 ix86_compare_op1, VOIDmode, 0, 1);
10012
10013 nops = 0;
10014 /* On x86_64 the lea instruction operates on Pmode, so we need
10015 to get arithmetics done in proper mode to match. */
10016 if (diff == 1)
10017 tmp = copy_rtx (out);
10018 else
10019 {
10020 rtx out1;
10021 out1 = copy_rtx (out);
10022 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
10023 nops++;
10024 if (diff & 1)
10025 {
10026 tmp = gen_rtx_PLUS (mode, tmp, out1);
10027 nops++;
10028 }
10029 }
10030 if (cf != 0)
10031 {
10032 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
10033 nops++;
10034 }
10035 if (!rtx_equal_p (tmp, out))
10036 {
10037 if (nops == 1)
10038 out = force_operand (tmp, copy_rtx (out));
10039 else
10040 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
10041 }
10042 if (!rtx_equal_p (out, operands[0]))
10043 emit_move_insn (operands[0], copy_rtx (out));
10044
10045 return 1; /* DONE */
10046 }
10047
10048 /*
10049 * General case: Jumpful:
10050 * xorl dest,dest cmpl op1, op2
10051 * cmpl op1, op2 movl ct, dest
10052 * setcc dest jcc 1f
10053 * decl dest movl cf, dest
10054 * andl (cf-ct),dest 1:
10055 * addl ct,dest
10056 *
10057 * Size 20. Size 14.
10058 *
10059 * This is reasonably steep, but branch mispredict costs are
10060 * high on modern cpus, so consider failing only if optimizing
10061 * for space.
10062 */
10063
10064 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10065 && BRANCH_COST >= 2)
10066 {
10067 if (cf == 0)
10068 {
10069 cf = ct;
10070 ct = 0;
10071 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10072 /* We may be reversing unordered compare to normal compare,
10073 that is not valid in general (we may convert non-trapping
10074 condition to trapping one), however on i386 we currently
10075 emit all comparisons unordered. */
10076 code = reverse_condition_maybe_unordered (code);
10077 else
10078 {
10079 code = reverse_condition (code);
10080 if (compare_code != NIL)
10081 compare_code = reverse_condition (compare_code);
10082 }
10083 }
10084
10085 if (compare_code != NIL)
10086 {
10087 /* notl op1 (if needed)
10088 sarl $31, op1
10089 andl (cf-ct), op1
10090 addl ct, op1
10091
10092 For x < 0 (resp. x <= -1) there will be no notl,
10093 so if possible swap the constants to get rid of the
10094 complement.
10095 True/false will be -1/0 while code below (store flag
10096 followed by decrement) is 0/-1, so the constants need
10097 to be exchanged once more. */
10098
10099 if (compare_code == GE || !cf)
10100 {
10101 code = reverse_condition (code);
10102 compare_code = LT;
10103 }
10104 else
10105 {
10106 HOST_WIDE_INT tmp = cf;
10107 cf = ct;
10108 ct = tmp;
10109 }
10110
10111 out = emit_store_flag (out, code, ix86_compare_op0,
10112 ix86_compare_op1, VOIDmode, 0, -1);
10113 }
10114 else
10115 {
10116 out = emit_store_flag (out, code, ix86_compare_op0,
10117 ix86_compare_op1, VOIDmode, 0, 1);
10118
10119 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
10120 copy_rtx (out), 1, OPTAB_DIRECT);
10121 }
10122
10123 out = expand_simple_binop (mode, AND, copy_rtx (out),
10124 gen_int_mode (cf - ct, mode),
10125 copy_rtx (out), 1, OPTAB_DIRECT);
10126 if (ct)
10127 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10128 copy_rtx (out), 1, OPTAB_DIRECT);
10129 if (!rtx_equal_p (out, operands[0]))
10130 emit_move_insn (operands[0], copy_rtx (out));
10131
10132 return 1; /* DONE */
10133 }
10134 }
10135
10136 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10137 {
10138 /* Try a few things more with specific constants and a variable. */
10139
10140 optab op;
10141 rtx var, orig_out, out, tmp;
10142
10143 if (BRANCH_COST <= 2)
10144 return 0; /* FAIL */
10145
10146 /* If one of the two operands is an interesting constant, load a
10147 constant with the above and mask it in with a logical operation. */
10148
10149 if (GET_CODE (operands[2]) == CONST_INT)
10150 {
10151 var = operands[3];
10152 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
10153 operands[3] = constm1_rtx, op = and_optab;
10154 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
10155 operands[3] = const0_rtx, op = ior_optab;
10156 else
10157 return 0; /* FAIL */
10158 }
10159 else if (GET_CODE (operands[3]) == CONST_INT)
10160 {
10161 var = operands[2];
10162 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
10163 operands[2] = constm1_rtx, op = and_optab;
10164 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
10165 operands[2] = const0_rtx, op = ior_optab;
10166 else
10167 return 0; /* FAIL */
10168 }
10169 else
10170 return 0; /* FAIL */
10171
10172 orig_out = operands[0];
10173 tmp = gen_reg_rtx (mode);
10174 operands[0] = tmp;
10175
10176 /* Recurse to get the constant loaded. */
10177 if (ix86_expand_int_movcc (operands) == 0)
10178 return 0; /* FAIL */
10179
10180 /* Mask in the interesting variable. */
10181 out = expand_binop (mode, op, var, tmp, orig_out, 0,
10182 OPTAB_WIDEN);
10183 if (!rtx_equal_p (out, orig_out))
10184 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
10185
10186 return 1; /* DONE */
10187 }
10188
10189 /*
10190 * For comparison with above,
10191 *
10192 * movl cf,dest
10193 * movl ct,tmp
10194 * cmpl op1,op2
10195 * cmovcc tmp,dest
10196 *
10197 * Size 15.
10198 */
10199
10200 if (! nonimmediate_operand (operands[2], mode))
10201 operands[2] = force_reg (mode, operands[2]);
10202 if (! nonimmediate_operand (operands[3], mode))
10203 operands[3] = force_reg (mode, operands[3]);
10204
10205 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10206 {
10207 rtx tmp = gen_reg_rtx (mode);
10208 emit_move_insn (tmp, operands[3]);
10209 operands[3] = tmp;
10210 }
10211 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10212 {
10213 rtx tmp = gen_reg_rtx (mode);
10214 emit_move_insn (tmp, operands[2]);
10215 operands[2] = tmp;
10216 }
10217
10218 if (! register_operand (operands[2], VOIDmode)
10219 && (mode == QImode
10220 || ! register_operand (operands[3], VOIDmode)))
10221 operands[2] = force_reg (mode, operands[2]);
10222
10223 if (mode == QImode
10224 && ! register_operand (operands[3], VOIDmode))
10225 operands[3] = force_reg (mode, operands[3]);
10226
10227 emit_insn (compare_seq);
10228 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10229 gen_rtx_IF_THEN_ELSE (mode,
10230 compare_op, operands[2],
10231 operands[3])));
10232 if (bypass_test)
10233 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10234 gen_rtx_IF_THEN_ELSE (mode,
10235 bypass_test,
10236 copy_rtx (operands[3]),
10237 copy_rtx (operands[0]))));
10238 if (second_test)
10239 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10240 gen_rtx_IF_THEN_ELSE (mode,
10241 second_test,
10242 copy_rtx (operands[2]),
10243 copy_rtx (operands[0]))));
10244
10245 return 1; /* DONE */
10246 }
10247
10248 int
ix86_expand_fp_movcc(rtx operands[])10249 ix86_expand_fp_movcc (rtx operands[])
10250 {
10251 enum rtx_code code;
10252 rtx tmp;
10253 rtx compare_op, second_test, bypass_test;
10254
10255 /* For SF/DFmode conditional moves based on comparisons
10256 in same mode, we may want to use SSE min/max instructions. */
10257 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
10258 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
10259 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
10260 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
10261 && (!TARGET_IEEE_FP
10262 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
10263 /* We may be called from the post-reload splitter. */
10264 && (!REG_P (operands[0])
10265 || SSE_REG_P (operands[0])
10266 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
10267 {
10268 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
10269 code = GET_CODE (operands[1]);
10270
10271 /* See if we have (cross) match between comparison operands and
10272 conditional move operands. */
10273 if (rtx_equal_p (operands[2], op1))
10274 {
10275 rtx tmp = op0;
10276 op0 = op1;
10277 op1 = tmp;
10278 code = reverse_condition_maybe_unordered (code);
10279 }
10280 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10281 {
10282 /* Check for min operation. */
10283 if (code == LT || code == UNLE)
10284 {
10285 if (code == UNLE)
10286 {
10287 rtx tmp = op0;
10288 op0 = op1;
10289 op1 = tmp;
10290 }
10291 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10292 if (memory_operand (op0, VOIDmode))
10293 op0 = force_reg (GET_MODE (operands[0]), op0);
10294 if (GET_MODE (operands[0]) == SFmode)
10295 emit_insn (gen_minsf3 (operands[0], op0, op1));
10296 else
10297 emit_insn (gen_mindf3 (operands[0], op0, op1));
10298 return 1;
10299 }
10300 /* Check for max operation. */
10301 if (code == GT || code == UNGE)
10302 {
10303 if (code == UNGE)
10304 {
10305 rtx tmp = op0;
10306 op0 = op1;
10307 op1 = tmp;
10308 }
10309 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10310 if (memory_operand (op0, VOIDmode))
10311 op0 = force_reg (GET_MODE (operands[0]), op0);
10312 if (GET_MODE (operands[0]) == SFmode)
10313 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10314 else
10315 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10316 return 1;
10317 }
10318 }
10319 /* Manage condition to be sse_comparison_operator. In case we are
10320 in non-ieee mode, try to canonicalize the destination operand
10321 to be first in the comparison - this helps reload to avoid extra
10322 moves. */
10323 if (!sse_comparison_operator (operands[1], VOIDmode)
10324 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10325 {
10326 rtx tmp = ix86_compare_op0;
10327 ix86_compare_op0 = ix86_compare_op1;
10328 ix86_compare_op1 = tmp;
10329 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10330 VOIDmode, ix86_compare_op0,
10331 ix86_compare_op1);
10332 }
10333 /* Similarly try to manage result to be first operand of conditional
10334 move. We also don't support the NE comparison on SSE, so try to
10335 avoid it. */
10336 if ((rtx_equal_p (operands[0], operands[3])
10337 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10338 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
10339 {
10340 rtx tmp = operands[2];
10341 operands[2] = operands[3];
10342 operands[3] = tmp;
10343 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10344 (GET_CODE (operands[1])),
10345 VOIDmode, ix86_compare_op0,
10346 ix86_compare_op1);
10347 }
10348 if (GET_MODE (operands[0]) == SFmode)
10349 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10350 operands[2], operands[3],
10351 ix86_compare_op0, ix86_compare_op1));
10352 else
10353 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10354 operands[2], operands[3],
10355 ix86_compare_op0, ix86_compare_op1));
10356 return 1;
10357 }
10358
10359 /* The floating point conditional move instructions don't directly
10360 support conditions resulting from a signed integer comparison. */
10361
10362 code = GET_CODE (operands[1]);
10363 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10364
10365 /* The floating point conditional move instructions don't directly
10366 support signed integer comparisons. */
10367
10368 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10369 {
10370 if (second_test != NULL || bypass_test != NULL)
10371 abort ();
10372 tmp = gen_reg_rtx (QImode);
10373 ix86_expand_setcc (code, tmp);
10374 code = NE;
10375 ix86_compare_op0 = tmp;
10376 ix86_compare_op1 = const0_rtx;
10377 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10378 }
10379 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10380 {
10381 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10382 emit_move_insn (tmp, operands[3]);
10383 operands[3] = tmp;
10384 }
10385 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10386 {
10387 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10388 emit_move_insn (tmp, operands[2]);
10389 operands[2] = tmp;
10390 }
10391
10392 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10393 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10394 compare_op,
10395 operands[2],
10396 operands[3])));
10397 if (bypass_test)
10398 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10399 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10400 bypass_test,
10401 operands[3],
10402 operands[0])));
10403 if (second_test)
10404 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10405 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10406 second_test,
10407 operands[2],
10408 operands[0])));
10409
10410 return 1;
10411 }
10412
10413 /* Expand conditional increment or decrement using adb/sbb instructions.
10414 The default case using setcc followed by the conditional move can be
10415 done by generic code. */
10416 int
ix86_expand_int_addcc(rtx operands[])10417 ix86_expand_int_addcc (rtx operands[])
10418 {
10419 enum rtx_code code = GET_CODE (operands[1]);
10420 rtx compare_op;
10421 rtx val = const0_rtx;
10422 bool fpcmp = false;
10423 enum machine_mode mode = GET_MODE (operands[0]);
10424
10425 if (operands[3] != const1_rtx
10426 && operands[3] != constm1_rtx)
10427 return 0;
10428 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10429 ix86_compare_op1, &compare_op))
10430 return 0;
10431 code = GET_CODE (compare_op);
10432
10433 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10434 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10435 {
10436 fpcmp = true;
10437 code = ix86_fp_compare_code_to_integer (code);
10438 }
10439
10440 if (code != LTU)
10441 {
10442 val = constm1_rtx;
10443 if (fpcmp)
10444 PUT_CODE (compare_op,
10445 reverse_condition_maybe_unordered
10446 (GET_CODE (compare_op)));
10447 else
10448 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10449 }
10450 PUT_MODE (compare_op, mode);
10451
10452 /* Construct either adc or sbb insn. */
10453 if ((code == LTU) == (operands[3] == constm1_rtx))
10454 {
10455 switch (GET_MODE (operands[0]))
10456 {
10457 case QImode:
10458 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10459 break;
10460 case HImode:
10461 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10462 break;
10463 case SImode:
10464 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10465 break;
10466 case DImode:
10467 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10468 break;
10469 default:
10470 abort ();
10471 }
10472 }
10473 else
10474 {
10475 switch (GET_MODE (operands[0]))
10476 {
10477 case QImode:
10478 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10479 break;
10480 case HImode:
10481 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10482 break;
10483 case SImode:
10484 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10485 break;
10486 case DImode:
10487 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10488 break;
10489 default:
10490 abort ();
10491 }
10492 }
10493 return 1; /* DONE */
10494 }
10495
10496
10497 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10498 works for floating pointer parameters and nonoffsetable memories.
10499 For pushes, it returns just stack offsets; the values will be saved
10500 in the right order. Maximally three parts are generated. */
10501
10502 static int
ix86_split_to_parts(rtx operand,rtx * parts,enum machine_mode mode)10503 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
10504 {
10505 int size;
10506
10507 if (!TARGET_64BIT)
10508 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
10509 else
10510 size = (GET_MODE_SIZE (mode) + 4) / 8;
10511
10512 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10513 abort ();
10514 if (size < 2 || size > 3)
10515 abort ();
10516
10517 /* Optimize constant pool reference to immediates. This is used by fp
10518 moves, that force all constants to memory to allow combining. */
10519 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10520 {
10521 rtx tmp = maybe_get_pool_constant (operand);
10522 if (tmp)
10523 operand = tmp;
10524 }
10525
10526 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10527 {
10528 /* The only non-offsetable memories we handle are pushes. */
10529 if (! push_operand (operand, VOIDmode))
10530 abort ();
10531
10532 operand = copy_rtx (operand);
10533 PUT_MODE (operand, Pmode);
10534 parts[0] = parts[1] = parts[2] = operand;
10535 }
10536 else if (!TARGET_64BIT)
10537 {
10538 if (mode == DImode)
10539 split_di (&operand, 1, &parts[0], &parts[1]);
10540 else
10541 {
10542 if (REG_P (operand))
10543 {
10544 if (!reload_completed)
10545 abort ();
10546 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10547 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10548 if (size == 3)
10549 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10550 }
10551 else if (offsettable_memref_p (operand))
10552 {
10553 operand = adjust_address (operand, SImode, 0);
10554 parts[0] = operand;
10555 parts[1] = adjust_address (operand, SImode, 4);
10556 if (size == 3)
10557 parts[2] = adjust_address (operand, SImode, 8);
10558 }
10559 else if (GET_CODE (operand) == CONST_DOUBLE)
10560 {
10561 REAL_VALUE_TYPE r;
10562 long l[4];
10563
10564 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10565 switch (mode)
10566 {
10567 case XFmode:
10568 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10569 parts[2] = gen_int_mode (l[2], SImode);
10570 break;
10571 case DFmode:
10572 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10573 break;
10574 default:
10575 abort ();
10576 }
10577 parts[1] = gen_int_mode (l[1], SImode);
10578 parts[0] = gen_int_mode (l[0], SImode);
10579 }
10580 else
10581 abort ();
10582 }
10583 }
10584 else
10585 {
10586 if (mode == TImode)
10587 split_ti (&operand, 1, &parts[0], &parts[1]);
10588 if (mode == XFmode || mode == TFmode)
10589 {
10590 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
10591 if (REG_P (operand))
10592 {
10593 if (!reload_completed)
10594 abort ();
10595 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10596 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
10597 }
10598 else if (offsettable_memref_p (operand))
10599 {
10600 operand = adjust_address (operand, DImode, 0);
10601 parts[0] = operand;
10602 parts[1] = adjust_address (operand, upper_mode, 8);
10603 }
10604 else if (GET_CODE (operand) == CONST_DOUBLE)
10605 {
10606 REAL_VALUE_TYPE r;
10607 long l[3];
10608
10609 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10610 real_to_target (l, &r, mode);
10611 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10612 if (HOST_BITS_PER_WIDE_INT >= 64)
10613 parts[0]
10614 = gen_int_mode
10615 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10616 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10617 DImode);
10618 else
10619 parts[0] = immed_double_const (l[0], l[1], DImode);
10620 if (upper_mode == SImode)
10621 parts[1] = gen_int_mode (l[2], SImode);
10622 else if (HOST_BITS_PER_WIDE_INT >= 64)
10623 parts[1]
10624 = gen_int_mode
10625 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
10626 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
10627 DImode);
10628 else
10629 parts[1] = immed_double_const (l[2], l[3], DImode);
10630 }
10631 else
10632 abort ();
10633 }
10634 }
10635
10636 return size;
10637 }
10638
10639 /* Emit insns to perform a move or push of DI, DF, and XF values.
10640 Return false when normal moves are needed; true when all required
10641 insns have been emitted. Operands 2-4 contain the input values
10642 int the correct order; operands 5-7 contain the output values. */
10643
10644 void
ix86_split_long_move(rtx operands[])10645 ix86_split_long_move (rtx operands[])
10646 {
10647 rtx part[2][3];
10648 int nparts;
10649 int push = 0;
10650 int collisions = 0;
10651 enum machine_mode mode = GET_MODE (operands[0]);
10652
10653 /* The DFmode expanders may ask us to move double.
10654 For 64bit target this is single move. By hiding the fact
10655 here we simplify i386.md splitters. */
10656 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10657 {
10658 /* Optimize constant pool reference to immediates. This is used by
10659 fp moves, that force all constants to memory to allow combining. */
10660
10661 if (GET_CODE (operands[1]) == MEM
10662 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10663 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10664 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10665 if (push_operand (operands[0], VOIDmode))
10666 {
10667 operands[0] = copy_rtx (operands[0]);
10668 PUT_MODE (operands[0], Pmode);
10669 }
10670 else
10671 operands[0] = gen_lowpart (DImode, operands[0]);
10672 operands[1] = gen_lowpart (DImode, operands[1]);
10673 emit_move_insn (operands[0], operands[1]);
10674 return;
10675 }
10676
10677 /* The only non-offsettable memory we handle is push. */
10678 if (push_operand (operands[0], VOIDmode))
10679 push = 1;
10680 else if (GET_CODE (operands[0]) == MEM
10681 && ! offsettable_memref_p (operands[0]))
10682 abort ();
10683
10684 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10685 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10686
10687 /* When emitting push, take care for source operands on the stack. */
10688 if (push && GET_CODE (operands[1]) == MEM
10689 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10690 {
10691 if (nparts == 3)
10692 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10693 XEXP (part[1][2], 0));
10694 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10695 XEXP (part[1][1], 0));
10696 }
10697
10698 /* We need to do copy in the right order in case an address register
10699 of the source overlaps the destination. */
10700 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10701 {
10702 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10703 collisions++;
10704 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10705 collisions++;
10706 if (nparts == 3
10707 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10708 collisions++;
10709
10710 /* Collision in the middle part can be handled by reordering. */
10711 if (collisions == 1 && nparts == 3
10712 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10713 {
10714 rtx tmp;
10715 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10716 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10717 }
10718
10719 /* If there are more collisions, we can't handle it by reordering.
10720 Do an lea to the last part and use only one colliding move. */
10721 else if (collisions > 1)
10722 {
10723 rtx base;
10724
10725 collisions = 1;
10726
10727 base = part[0][nparts - 1];
10728
10729 /* Handle the case when the last part isn't valid for lea.
10730 Happens in 64-bit mode storing the 12-byte XFmode. */
10731 if (GET_MODE (base) != Pmode)
10732 base = gen_rtx_REG (Pmode, REGNO (base));
10733
10734 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10735 part[1][0] = replace_equiv_address (part[1][0], base);
10736 part[1][1] = replace_equiv_address (part[1][1],
10737 plus_constant (base, UNITS_PER_WORD));
10738 if (nparts == 3)
10739 part[1][2] = replace_equiv_address (part[1][2],
10740 plus_constant (base, 8));
10741 }
10742 }
10743
10744 if (push)
10745 {
10746 if (!TARGET_64BIT)
10747 {
10748 if (nparts == 3)
10749 {
10750 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
10751 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
10752 emit_move_insn (part[0][2], part[1][2]);
10753 }
10754 }
10755 else
10756 {
10757 /* In 64bit mode we don't have 32bit push available. In case this is
10758 register, it is OK - we will just use larger counterpart. We also
10759 retype memory - these comes from attempt to avoid REX prefix on
10760 moving of second half of TFmode value. */
10761 if (GET_MODE (part[1][1]) == SImode)
10762 {
10763 if (GET_CODE (part[1][1]) == MEM)
10764 part[1][1] = adjust_address (part[1][1], DImode, 0);
10765 else if (REG_P (part[1][1]))
10766 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10767 else
10768 abort ();
10769 if (GET_MODE (part[1][0]) == SImode)
10770 part[1][0] = part[1][1];
10771 }
10772 }
10773 emit_move_insn (part[0][1], part[1][1]);
10774 emit_move_insn (part[0][0], part[1][0]);
10775 return;
10776 }
10777
10778 /* Choose correct order to not overwrite the source before it is copied. */
10779 if ((REG_P (part[0][0])
10780 && REG_P (part[1][1])
10781 && (REGNO (part[0][0]) == REGNO (part[1][1])
10782 || (nparts == 3
10783 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10784 || (collisions > 0
10785 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10786 {
10787 if (nparts == 3)
10788 {
10789 operands[2] = part[0][2];
10790 operands[3] = part[0][1];
10791 operands[4] = part[0][0];
10792 operands[5] = part[1][2];
10793 operands[6] = part[1][1];
10794 operands[7] = part[1][0];
10795 }
10796 else
10797 {
10798 operands[2] = part[0][1];
10799 operands[3] = part[0][0];
10800 operands[5] = part[1][1];
10801 operands[6] = part[1][0];
10802 }
10803 }
10804 else
10805 {
10806 if (nparts == 3)
10807 {
10808 operands[2] = part[0][0];
10809 operands[3] = part[0][1];
10810 operands[4] = part[0][2];
10811 operands[5] = part[1][0];
10812 operands[6] = part[1][1];
10813 operands[7] = part[1][2];
10814 }
10815 else
10816 {
10817 operands[2] = part[0][0];
10818 operands[3] = part[0][1];
10819 operands[5] = part[1][0];
10820 operands[6] = part[1][1];
10821 }
10822 }
10823 emit_move_insn (operands[2], operands[5]);
10824 emit_move_insn (operands[3], operands[6]);
10825 if (nparts == 3)
10826 emit_move_insn (operands[4], operands[7]);
10827
10828 return;
10829 }
10830
10831 void
ix86_split_ashldi(rtx * operands,rtx scratch)10832 ix86_split_ashldi (rtx *operands, rtx scratch)
10833 {
10834 rtx low[2], high[2];
10835 int count;
10836
10837 if (GET_CODE (operands[2]) == CONST_INT)
10838 {
10839 split_di (operands, 2, low, high);
10840 count = INTVAL (operands[2]) & 63;
10841
10842 if (count >= 32)
10843 {
10844 emit_move_insn (high[0], low[1]);
10845 emit_move_insn (low[0], const0_rtx);
10846
10847 if (count > 32)
10848 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10849 }
10850 else
10851 {
10852 if (!rtx_equal_p (operands[0], operands[1]))
10853 emit_move_insn (operands[0], operands[1]);
10854 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10855 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10856 }
10857 }
10858 else
10859 {
10860 if (!rtx_equal_p (operands[0], operands[1]))
10861 emit_move_insn (operands[0], operands[1]);
10862
10863 split_di (operands, 1, low, high);
10864
10865 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10866 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10867
10868 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10869 {
10870 if (! no_new_pseudos)
10871 scratch = force_reg (SImode, const0_rtx);
10872 else
10873 emit_move_insn (scratch, const0_rtx);
10874
10875 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10876 scratch));
10877 }
10878 else
10879 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10880 }
10881 }
10882
10883 void
ix86_split_ashrdi(rtx * operands,rtx scratch)10884 ix86_split_ashrdi (rtx *operands, rtx scratch)
10885 {
10886 rtx low[2], high[2];
10887 int count;
10888
10889 if (GET_CODE (operands[2]) == CONST_INT)
10890 {
10891 split_di (operands, 2, low, high);
10892 count = INTVAL (operands[2]) & 63;
10893
10894 if (count >= 32)
10895 {
10896 emit_move_insn (low[0], high[1]);
10897
10898 if (! reload_completed)
10899 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10900 else
10901 {
10902 emit_move_insn (high[0], low[0]);
10903 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10904 }
10905
10906 if (count > 32)
10907 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10908 }
10909 else
10910 {
10911 if (!rtx_equal_p (operands[0], operands[1]))
10912 emit_move_insn (operands[0], operands[1]);
10913 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10914 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10915 }
10916 }
10917 else
10918 {
10919 if (!rtx_equal_p (operands[0], operands[1]))
10920 emit_move_insn (operands[0], operands[1]);
10921
10922 split_di (operands, 1, low, high);
10923
10924 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10925 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10926
10927 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10928 {
10929 if (! no_new_pseudos)
10930 scratch = gen_reg_rtx (SImode);
10931 emit_move_insn (scratch, high[0]);
10932 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10933 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10934 scratch));
10935 }
10936 else
10937 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10938 }
10939 }
10940
10941 void
ix86_split_lshrdi(rtx * operands,rtx scratch)10942 ix86_split_lshrdi (rtx *operands, rtx scratch)
10943 {
10944 rtx low[2], high[2];
10945 int count;
10946
10947 if (GET_CODE (operands[2]) == CONST_INT)
10948 {
10949 split_di (operands, 2, low, high);
10950 count = INTVAL (operands[2]) & 63;
10951
10952 if (count >= 32)
10953 {
10954 emit_move_insn (low[0], high[1]);
10955 emit_move_insn (high[0], const0_rtx);
10956
10957 if (count > 32)
10958 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10959 }
10960 else
10961 {
10962 if (!rtx_equal_p (operands[0], operands[1]))
10963 emit_move_insn (operands[0], operands[1]);
10964 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10965 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10966 }
10967 }
10968 else
10969 {
10970 if (!rtx_equal_p (operands[0], operands[1]))
10971 emit_move_insn (operands[0], operands[1]);
10972
10973 split_di (operands, 1, low, high);
10974
10975 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10976 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10977
10978 /* Heh. By reversing the arguments, we can reuse this pattern. */
10979 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10980 {
10981 if (! no_new_pseudos)
10982 scratch = force_reg (SImode, const0_rtx);
10983 else
10984 emit_move_insn (scratch, const0_rtx);
10985
10986 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10987 scratch));
10988 }
10989 else
10990 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10991 }
10992 }
10993
10994 /* Helper function for the string operations below. Dest VARIABLE whether
10995 it is aligned to VALUE bytes. If true, jump to the label. */
10996 static rtx
ix86_expand_aligntest(rtx variable,int value)10997 ix86_expand_aligntest (rtx variable, int value)
10998 {
10999 rtx label = gen_label_rtx ();
11000 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
11001 if (GET_MODE (variable) == DImode)
11002 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
11003 else
11004 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
11005 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
11006 1, label);
11007 return label;
11008 }
11009
11010 /* Adjust COUNTER by the VALUE. */
11011 static void
ix86_adjust_counter(rtx countreg,HOST_WIDE_INT value)11012 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
11013 {
11014 if (GET_MODE (countreg) == DImode)
11015 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
11016 else
11017 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
11018 }
11019
11020 /* Zero extend possibly SImode EXP to Pmode register. */
11021 rtx
ix86_zero_extend_to_Pmode(rtx exp)11022 ix86_zero_extend_to_Pmode (rtx exp)
11023 {
11024 rtx r;
11025 if (GET_MODE (exp) == VOIDmode)
11026 return force_reg (Pmode, exp);
11027 if (GET_MODE (exp) == Pmode)
11028 return copy_to_mode_reg (Pmode, exp);
11029 r = gen_reg_rtx (Pmode);
11030 emit_insn (gen_zero_extendsidi2 (r, exp));
11031 return r;
11032 }
11033
11034 /* Expand string move (memcpy) operation. Use i386 string operations when
11035 profitable. expand_clrstr contains similar code. */
11036 int
ix86_expand_movstr(rtx dst,rtx src,rtx count_exp,rtx align_exp)11037 ix86_expand_movstr (rtx dst, rtx src, rtx count_exp, rtx align_exp)
11038 {
11039 rtx srcreg, destreg, countreg, srcexp, destexp;
11040 enum machine_mode counter_mode;
11041 HOST_WIDE_INT align = 0;
11042 unsigned HOST_WIDE_INT count = 0;
11043
11044 if (GET_CODE (align_exp) == CONST_INT)
11045 align = INTVAL (align_exp);
11046
11047 /* Can't use any of this if the user has appropriated esi or edi. */
11048 if (global_regs[4] || global_regs[5])
11049 return 0;
11050
11051 /* This simple hack avoids all inlining code and simplifies code below. */
11052 if (!TARGET_ALIGN_STRINGOPS)
11053 align = 64;
11054
11055 if (GET_CODE (count_exp) == CONST_INT)
11056 {
11057 count = INTVAL (count_exp);
11058 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11059 return 0;
11060 }
11061
11062 /* Figure out proper mode for counter. For 32bits it is always SImode,
11063 for 64bits use SImode when possible, otherwise DImode.
11064 Set count to number of bytes copied when known at compile time. */
11065 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11066 || x86_64_zero_extended_value (count_exp))
11067 counter_mode = SImode;
11068 else
11069 counter_mode = DImode;
11070
11071 if (counter_mode != SImode && counter_mode != DImode)
11072 abort ();
11073
11074 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11075 if (destreg != XEXP (dst, 0))
11076 dst = replace_equiv_address_nv (dst, destreg);
11077 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11078 if (srcreg != XEXP (src, 0))
11079 src = replace_equiv_address_nv (src, srcreg);
11080
11081 /* When optimizing for size emit simple rep ; movsb instruction for
11082 counts not divisible by 4. */
11083
11084 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11085 {
11086 emit_insn (gen_cld ());
11087 countreg = ix86_zero_extend_to_Pmode (count_exp);
11088 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11089 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
11090 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
11091 destexp, srcexp));
11092 }
11093
11094 /* For constant aligned (or small unaligned) copies use rep movsl
11095 followed by code copying the rest. For PentiumPro ensure 8 byte
11096 alignment to allow rep movsl acceleration. */
11097
11098 else if (count != 0
11099 && (align >= 8
11100 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11101 || optimize_size || count < (unsigned int) 64))
11102 {
11103 unsigned HOST_WIDE_INT offset = 0;
11104 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11105 rtx srcmem, dstmem;
11106
11107 emit_insn (gen_cld ());
11108 if (count & ~(size - 1))
11109 {
11110 countreg = copy_to_mode_reg (counter_mode,
11111 GEN_INT ((count >> (size == 4 ? 2 : 3))
11112 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11113 countreg = ix86_zero_extend_to_Pmode (countreg);
11114
11115 destexp = gen_rtx_ASHIFT (Pmode, countreg,
11116 GEN_INT (size == 4 ? 2 : 3));
11117 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11118 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11119
11120 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11121 countreg, destexp, srcexp));
11122 offset = count & ~(size - 1);
11123 }
11124 if (size == 8 && (count & 0x04))
11125 {
11126 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
11127 offset);
11128 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
11129 offset);
11130 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11131 offset += 4;
11132 }
11133 if (count & 0x02)
11134 {
11135 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
11136 offset);
11137 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
11138 offset);
11139 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11140 offset += 2;
11141 }
11142 if (count & 0x01)
11143 {
11144 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
11145 offset);
11146 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
11147 offset);
11148 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11149 }
11150 }
11151 /* The generic code based on the glibc implementation:
11152 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11153 allowing accelerated copying there)
11154 - copy the data using rep movsl
11155 - copy the rest. */
11156 else
11157 {
11158 rtx countreg2;
11159 rtx label = NULL;
11160 rtx srcmem, dstmem;
11161 int desired_alignment = (TARGET_PENTIUMPRO
11162 && (count == 0 || count >= (unsigned int) 260)
11163 ? 8 : UNITS_PER_WORD);
11164 /* Get rid of MEM_OFFSETs, they won't be accurate. */
11165 dst = change_address (dst, BLKmode, destreg);
11166 src = change_address (src, BLKmode, srcreg);
11167
11168 /* In case we don't know anything about the alignment, default to
11169 library version, since it is usually equally fast and result in
11170 shorter code.
11171
11172 Also emit call when we know that the count is large and call overhead
11173 will not be important. */
11174 if (!TARGET_INLINE_ALL_STRINGOPS
11175 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11176 return 0;
11177
11178 if (TARGET_SINGLE_STRINGOP)
11179 emit_insn (gen_cld ());
11180
11181 countreg2 = gen_reg_rtx (Pmode);
11182 countreg = copy_to_mode_reg (counter_mode, count_exp);
11183
11184 /* We don't use loops to align destination and to copy parts smaller
11185 than 4 bytes, because gcc is able to optimize such code better (in
11186 the case the destination or the count really is aligned, gcc is often
11187 able to predict the branches) and also it is friendlier to the
11188 hardware branch prediction.
11189
11190 Using loops is beneficial for generic case, because we can
11191 handle small counts using the loops. Many CPUs (such as Athlon)
11192 have large REP prefix setup costs.
11193
11194 This is quite costly. Maybe we can revisit this decision later or
11195 add some customizability to this code. */
11196
11197 if (count == 0 && align < desired_alignment)
11198 {
11199 label = gen_label_rtx ();
11200 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11201 LEU, 0, counter_mode, 1, label);
11202 }
11203 if (align <= 1)
11204 {
11205 rtx label = ix86_expand_aligntest (destreg, 1);
11206 srcmem = change_address (src, QImode, srcreg);
11207 dstmem = change_address (dst, QImode, destreg);
11208 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11209 ix86_adjust_counter (countreg, 1);
11210 emit_label (label);
11211 LABEL_NUSES (label) = 1;
11212 }
11213 if (align <= 2)
11214 {
11215 rtx label = ix86_expand_aligntest (destreg, 2);
11216 srcmem = change_address (src, HImode, srcreg);
11217 dstmem = change_address (dst, HImode, destreg);
11218 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11219 ix86_adjust_counter (countreg, 2);
11220 emit_label (label);
11221 LABEL_NUSES (label) = 1;
11222 }
11223 if (align <= 4 && desired_alignment > 4)
11224 {
11225 rtx label = ix86_expand_aligntest (destreg, 4);
11226 srcmem = change_address (src, SImode, srcreg);
11227 dstmem = change_address (dst, SImode, destreg);
11228 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11229 ix86_adjust_counter (countreg, 4);
11230 emit_label (label);
11231 LABEL_NUSES (label) = 1;
11232 }
11233
11234 if (label && desired_alignment > 4 && !TARGET_64BIT)
11235 {
11236 emit_label (label);
11237 LABEL_NUSES (label) = 1;
11238 label = NULL_RTX;
11239 }
11240 if (!TARGET_SINGLE_STRINGOP)
11241 emit_insn (gen_cld ());
11242 if (TARGET_64BIT)
11243 {
11244 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11245 GEN_INT (3)));
11246 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11247 }
11248 else
11249 {
11250 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11251 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11252 }
11253 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11254 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11255 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11256 countreg2, destexp, srcexp));
11257
11258 if (label)
11259 {
11260 emit_label (label);
11261 LABEL_NUSES (label) = 1;
11262 }
11263 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11264 {
11265 srcmem = change_address (src, SImode, srcreg);
11266 dstmem = change_address (dst, SImode, destreg);
11267 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11268 }
11269 if ((align <= 4 || count == 0) && TARGET_64BIT)
11270 {
11271 rtx label = ix86_expand_aligntest (countreg, 4);
11272 srcmem = change_address (src, SImode, srcreg);
11273 dstmem = change_address (dst, SImode, destreg);
11274 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11275 emit_label (label);
11276 LABEL_NUSES (label) = 1;
11277 }
11278 if (align > 2 && count != 0 && (count & 2))
11279 {
11280 srcmem = change_address (src, HImode, srcreg);
11281 dstmem = change_address (dst, HImode, destreg);
11282 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11283 }
11284 if (align <= 2 || count == 0)
11285 {
11286 rtx label = ix86_expand_aligntest (countreg, 2);
11287 srcmem = change_address (src, HImode, srcreg);
11288 dstmem = change_address (dst, HImode, destreg);
11289 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11290 emit_label (label);
11291 LABEL_NUSES (label) = 1;
11292 }
11293 if (align > 1 && count != 0 && (count & 1))
11294 {
11295 srcmem = change_address (src, QImode, srcreg);
11296 dstmem = change_address (dst, QImode, destreg);
11297 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11298 }
11299 if (align <= 1 || count == 0)
11300 {
11301 rtx label = ix86_expand_aligntest (countreg, 1);
11302 srcmem = change_address (src, QImode, srcreg);
11303 dstmem = change_address (dst, QImode, destreg);
11304 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11305 emit_label (label);
11306 LABEL_NUSES (label) = 1;
11307 }
11308 }
11309
11310 return 1;
11311 }
11312
11313 /* Expand string clear operation (bzero). Use i386 string operations when
11314 profitable. expand_movstr contains similar code. */
11315 int
ix86_expand_clrstr(rtx dst,rtx count_exp,rtx align_exp)11316 ix86_expand_clrstr (rtx dst, rtx count_exp, rtx align_exp)
11317 {
11318 rtx destreg, zeroreg, countreg, destexp;
11319 enum machine_mode counter_mode;
11320 HOST_WIDE_INT align = 0;
11321 unsigned HOST_WIDE_INT count = 0;
11322
11323 if (GET_CODE (align_exp) == CONST_INT)
11324 align = INTVAL (align_exp);
11325
11326 /* Can't use any of this if the user has appropriated esi. */
11327 if (global_regs[4])
11328 return 0;
11329
11330 /* This simple hack avoids all inlining code and simplifies code below. */
11331 if (!TARGET_ALIGN_STRINGOPS)
11332 align = 32;
11333
11334 if (GET_CODE (count_exp) == CONST_INT)
11335 {
11336 count = INTVAL (count_exp);
11337 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11338 return 0;
11339 }
11340 /* Figure out proper mode for counter. For 32bits it is always SImode,
11341 for 64bits use SImode when possible, otherwise DImode.
11342 Set count to number of bytes copied when known at compile time. */
11343 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11344 || x86_64_zero_extended_value (count_exp))
11345 counter_mode = SImode;
11346 else
11347 counter_mode = DImode;
11348
11349 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11350 if (destreg != XEXP (dst, 0))
11351 dst = replace_equiv_address_nv (dst, destreg);
11352
11353 emit_insn (gen_cld ());
11354
11355 /* When optimizing for size emit simple rep ; movsb instruction for
11356 counts not divisible by 4. */
11357
11358 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11359 {
11360 countreg = ix86_zero_extend_to_Pmode (count_exp);
11361 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11362 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11363 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11364 }
11365 else if (count != 0
11366 && (align >= 8
11367 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11368 || optimize_size || count < (unsigned int) 64))
11369 {
11370 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11371 unsigned HOST_WIDE_INT offset = 0;
11372
11373 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11374 if (count & ~(size - 1))
11375 {
11376 countreg = copy_to_mode_reg (counter_mode,
11377 GEN_INT ((count >> (size == 4 ? 2 : 3))
11378 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11379 countreg = ix86_zero_extend_to_Pmode (countreg);
11380 destexp = gen_rtx_ASHIFT (Pmode, countreg, GEN_INT (size == 4 ? 2 : 3));
11381 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11382 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11383 offset = count & ~(size - 1);
11384 }
11385 if (size == 8 && (count & 0x04))
11386 {
11387 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
11388 offset);
11389 emit_insn (gen_strset (destreg, mem,
11390 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11391 offset += 4;
11392 }
11393 if (count & 0x02)
11394 {
11395 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
11396 offset);
11397 emit_insn (gen_strset (destreg, mem,
11398 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11399 offset += 2;
11400 }
11401 if (count & 0x01)
11402 {
11403 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
11404 offset);
11405 emit_insn (gen_strset (destreg, mem,
11406 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11407 }
11408 }
11409 else
11410 {
11411 rtx countreg2;
11412 rtx label = NULL;
11413 /* Compute desired alignment of the string operation. */
11414 int desired_alignment = (TARGET_PENTIUMPRO
11415 && (count == 0 || count >= (unsigned int) 260)
11416 ? 8 : UNITS_PER_WORD);
11417
11418 /* In case we don't know anything about the alignment, default to
11419 library version, since it is usually equally fast and result in
11420 shorter code.
11421
11422 Also emit call when we know that the count is large and call overhead
11423 will not be important. */
11424 if (!TARGET_INLINE_ALL_STRINGOPS
11425 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11426 return 0;
11427
11428 if (TARGET_SINGLE_STRINGOP)
11429 emit_insn (gen_cld ());
11430
11431 countreg2 = gen_reg_rtx (Pmode);
11432 countreg = copy_to_mode_reg (counter_mode, count_exp);
11433 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11434 /* Get rid of MEM_OFFSET, it won't be accurate. */
11435 dst = change_address (dst, BLKmode, destreg);
11436
11437 if (count == 0 && align < desired_alignment)
11438 {
11439 label = gen_label_rtx ();
11440 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11441 LEU, 0, counter_mode, 1, label);
11442 }
11443 if (align <= 1)
11444 {
11445 rtx label = ix86_expand_aligntest (destreg, 1);
11446 emit_insn (gen_strset (destreg, dst,
11447 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11448 ix86_adjust_counter (countreg, 1);
11449 emit_label (label);
11450 LABEL_NUSES (label) = 1;
11451 }
11452 if (align <= 2)
11453 {
11454 rtx label = ix86_expand_aligntest (destreg, 2);
11455 emit_insn (gen_strset (destreg, dst,
11456 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11457 ix86_adjust_counter (countreg, 2);
11458 emit_label (label);
11459 LABEL_NUSES (label) = 1;
11460 }
11461 if (align <= 4 && desired_alignment > 4)
11462 {
11463 rtx label = ix86_expand_aligntest (destreg, 4);
11464 emit_insn (gen_strset (destreg, dst,
11465 (TARGET_64BIT
11466 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11467 : zeroreg)));
11468 ix86_adjust_counter (countreg, 4);
11469 emit_label (label);
11470 LABEL_NUSES (label) = 1;
11471 }
11472
11473 if (label && desired_alignment > 4 && !TARGET_64BIT)
11474 {
11475 emit_label (label);
11476 LABEL_NUSES (label) = 1;
11477 label = NULL_RTX;
11478 }
11479
11480 if (!TARGET_SINGLE_STRINGOP)
11481 emit_insn (gen_cld ());
11482 if (TARGET_64BIT)
11483 {
11484 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11485 GEN_INT (3)));
11486 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11487 }
11488 else
11489 {
11490 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11491 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11492 }
11493 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11494 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
11495
11496 if (label)
11497 {
11498 emit_label (label);
11499 LABEL_NUSES (label) = 1;
11500 }
11501
11502 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11503 emit_insn (gen_strset (destreg, dst,
11504 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11505 if (TARGET_64BIT && (align <= 4 || count == 0))
11506 {
11507 rtx label = ix86_expand_aligntest (countreg, 4);
11508 emit_insn (gen_strset (destreg, dst,
11509 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11510 emit_label (label);
11511 LABEL_NUSES (label) = 1;
11512 }
11513 if (align > 2 && count != 0 && (count & 2))
11514 emit_insn (gen_strset (destreg, dst,
11515 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11516 if (align <= 2 || count == 0)
11517 {
11518 rtx label = ix86_expand_aligntest (countreg, 2);
11519 emit_insn (gen_strset (destreg, dst,
11520 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11521 emit_label (label);
11522 LABEL_NUSES (label) = 1;
11523 }
11524 if (align > 1 && count != 0 && (count & 1))
11525 emit_insn (gen_strset (destreg, dst,
11526 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11527 if (align <= 1 || count == 0)
11528 {
11529 rtx label = ix86_expand_aligntest (countreg, 1);
11530 emit_insn (gen_strset (destreg, dst,
11531 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11532 emit_label (label);
11533 LABEL_NUSES (label) = 1;
11534 }
11535 }
11536 return 1;
11537 }
11538
11539 /* Expand strlen. */
11540 int
ix86_expand_strlen(rtx out,rtx src,rtx eoschar,rtx align)11541 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
11542 {
11543 rtx addr, scratch1, scratch2, scratch3, scratch4;
11544
11545 /* The generic case of strlen expander is long. Avoid it's
11546 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11547
11548 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11549 && !TARGET_INLINE_ALL_STRINGOPS
11550 && !optimize_size
11551 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11552 return 0;
11553
11554 addr = force_reg (Pmode, XEXP (src, 0));
11555 scratch1 = gen_reg_rtx (Pmode);
11556
11557 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11558 && !optimize_size)
11559 {
11560 /* Well it seems that some optimizer does not combine a call like
11561 foo(strlen(bar), strlen(bar));
11562 when the move and the subtraction is done here. It does calculate
11563 the length just once when these instructions are done inside of
11564 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11565 often used and I use one fewer register for the lifetime of
11566 output_strlen_unroll() this is better. */
11567
11568 emit_move_insn (out, addr);
11569
11570 ix86_expand_strlensi_unroll_1 (out, src, align);
11571
11572 /* strlensi_unroll_1 returns the address of the zero at the end of
11573 the string, like memchr(), so compute the length by subtracting
11574 the start address. */
11575 if (TARGET_64BIT)
11576 emit_insn (gen_subdi3 (out, out, addr));
11577 else
11578 emit_insn (gen_subsi3 (out, out, addr));
11579 }
11580 else
11581 {
11582 rtx unspec;
11583 scratch2 = gen_reg_rtx (Pmode);
11584 scratch3 = gen_reg_rtx (Pmode);
11585 scratch4 = force_reg (Pmode, constm1_rtx);
11586
11587 emit_move_insn (scratch3, addr);
11588 eoschar = force_reg (QImode, eoschar);
11589
11590 emit_insn (gen_cld ());
11591 src = replace_equiv_address_nv (src, scratch3);
11592
11593 /* If .md starts supporting :P, this can be done in .md. */
11594 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
11595 scratch4), UNSPEC_SCAS);
11596 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
11597 if (TARGET_64BIT)
11598 {
11599 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11600 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11601 }
11602 else
11603 {
11604 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11605 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11606 }
11607 }
11608 return 1;
11609 }
11610
11611 /* Expand the appropriate insns for doing strlen if not just doing
11612 repnz; scasb
11613
11614 out = result, initialized with the start address
11615 align_rtx = alignment of the address.
11616 scratch = scratch register, initialized with the startaddress when
11617 not aligned, otherwise undefined
11618
11619 This is just the body. It needs the initializations mentioned above and
11620 some address computing at the end. These things are done in i386.md. */
11621
11622 static void
ix86_expand_strlensi_unroll_1(rtx out,rtx src,rtx align_rtx)11623 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
11624 {
11625 int align;
11626 rtx tmp;
11627 rtx align_2_label = NULL_RTX;
11628 rtx align_3_label = NULL_RTX;
11629 rtx align_4_label = gen_label_rtx ();
11630 rtx end_0_label = gen_label_rtx ();
11631 rtx mem;
11632 rtx tmpreg = gen_reg_rtx (SImode);
11633 rtx scratch = gen_reg_rtx (SImode);
11634 rtx cmp;
11635
11636 align = 0;
11637 if (GET_CODE (align_rtx) == CONST_INT)
11638 align = INTVAL (align_rtx);
11639
11640 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11641
11642 /* Is there a known alignment and is it less than 4? */
11643 if (align < 4)
11644 {
11645 rtx scratch1 = gen_reg_rtx (Pmode);
11646 emit_move_insn (scratch1, out);
11647 /* Is there a known alignment and is it not 2? */
11648 if (align != 2)
11649 {
11650 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11651 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11652
11653 /* Leave just the 3 lower bits. */
11654 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11655 NULL_RTX, 0, OPTAB_WIDEN);
11656
11657 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11658 Pmode, 1, align_4_label);
11659 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
11660 Pmode, 1, align_2_label);
11661 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
11662 Pmode, 1, align_3_label);
11663 }
11664 else
11665 {
11666 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11667 check if is aligned to 4 - byte. */
11668
11669 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
11670 NULL_RTX, 0, OPTAB_WIDEN);
11671
11672 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11673 Pmode, 1, align_4_label);
11674 }
11675
11676 mem = change_address (src, QImode, out);
11677
11678 /* Now compare the bytes. */
11679
11680 /* Compare the first n unaligned byte on a byte per byte basis. */
11681 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11682 QImode, 1, end_0_label);
11683
11684 /* Increment the address. */
11685 if (TARGET_64BIT)
11686 emit_insn (gen_adddi3 (out, out, const1_rtx));
11687 else
11688 emit_insn (gen_addsi3 (out, out, const1_rtx));
11689
11690 /* Not needed with an alignment of 2 */
11691 if (align != 2)
11692 {
11693 emit_label (align_2_label);
11694
11695 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11696 end_0_label);
11697
11698 if (TARGET_64BIT)
11699 emit_insn (gen_adddi3 (out, out, const1_rtx));
11700 else
11701 emit_insn (gen_addsi3 (out, out, const1_rtx));
11702
11703 emit_label (align_3_label);
11704 }
11705
11706 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11707 end_0_label);
11708
11709 if (TARGET_64BIT)
11710 emit_insn (gen_adddi3 (out, out, const1_rtx));
11711 else
11712 emit_insn (gen_addsi3 (out, out, const1_rtx));
11713 }
11714
11715 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11716 align this loop. It gives only huge programs, but does not help to
11717 speed up. */
11718 emit_label (align_4_label);
11719
11720 mem = change_address (src, SImode, out);
11721 emit_move_insn (scratch, mem);
11722 if (TARGET_64BIT)
11723 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11724 else
11725 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11726
11727 /* This formula yields a nonzero result iff one of the bytes is zero.
11728 This saves three branches inside loop and many cycles. */
11729
11730 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11731 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11732 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11733 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11734 gen_int_mode (0x80808080, SImode)));
11735 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11736 align_4_label);
11737
11738 if (TARGET_CMOVE)
11739 {
11740 rtx reg = gen_reg_rtx (SImode);
11741 rtx reg2 = gen_reg_rtx (Pmode);
11742 emit_move_insn (reg, tmpreg);
11743 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11744
11745 /* If zero is not in the first two bytes, move two bytes forward. */
11746 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11747 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11748 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11749 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11750 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11751 reg,
11752 tmpreg)));
11753 /* Emit lea manually to avoid clobbering of flags. */
11754 emit_insn (gen_rtx_SET (SImode, reg2,
11755 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
11756
11757 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11758 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11759 emit_insn (gen_rtx_SET (VOIDmode, out,
11760 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11761 reg2,
11762 out)));
11763
11764 }
11765 else
11766 {
11767 rtx end_2_label = gen_label_rtx ();
11768 /* Is zero in the first two bytes? */
11769
11770 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11771 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11772 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11773 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11774 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11775 pc_rtx);
11776 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11777 JUMP_LABEL (tmp) = end_2_label;
11778
11779 /* Not in the first two. Move two bytes forward. */
11780 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11781 if (TARGET_64BIT)
11782 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
11783 else
11784 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
11785
11786 emit_label (end_2_label);
11787
11788 }
11789
11790 /* Avoid branch in fixing the byte. */
11791 tmpreg = gen_lowpart (QImode, tmpreg);
11792 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11793 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11794 if (TARGET_64BIT)
11795 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11796 else
11797 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11798
11799 emit_label (end_0_label);
11800 }
11801
11802 void
ix86_expand_call(rtx retval,rtx fnaddr,rtx callarg1,rtx callarg2 ATTRIBUTE_UNUSED,rtx pop,int sibcall)11803 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
11804 rtx callarg2 ATTRIBUTE_UNUSED,
11805 rtx pop, int sibcall)
11806 {
11807 rtx use = NULL, call;
11808
11809 if (pop == const0_rtx)
11810 pop = NULL;
11811 if (TARGET_64BIT && pop)
11812 abort ();
11813
11814 #if TARGET_MACHO
11815 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11816 fnaddr = machopic_indirect_call_target (fnaddr);
11817 #else
11818 /* Static functions and indirect calls don't need the pic register. */
11819 if (! TARGET_64BIT && flag_pic
11820 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11821 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
11822 use_reg (&use, pic_offset_table_rtx);
11823
11824 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11825 {
11826 rtx al = gen_rtx_REG (QImode, 0);
11827 emit_move_insn (al, callarg2);
11828 use_reg (&use, al);
11829 }
11830 #endif /* TARGET_MACHO */
11831
11832 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11833 {
11834 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11835 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11836 }
11837 if (sibcall && TARGET_64BIT
11838 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11839 {
11840 rtx addr;
11841 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11842 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
11843 emit_move_insn (fnaddr, addr);
11844 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11845 }
11846
11847 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11848 if (retval)
11849 call = gen_rtx_SET (VOIDmode, retval, call);
11850 if (pop)
11851 {
11852 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11853 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11854 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11855 }
11856
11857 call = emit_call_insn (call);
11858 if (use)
11859 CALL_INSN_FUNCTION_USAGE (call) = use;
11860 }
11861
11862
11863 /* Clear stack slot assignments remembered from previous functions.
11864 This is called from INIT_EXPANDERS once before RTL is emitted for each
11865 function. */
11866
11867 static struct machine_function *
ix86_init_machine_status(void)11868 ix86_init_machine_status (void)
11869 {
11870 struct machine_function *f;
11871
11872 f = ggc_alloc_cleared (sizeof (struct machine_function));
11873 f->use_fast_prologue_epilogue_nregs = -1;
11874
11875 return f;
11876 }
11877
11878 /* Return a MEM corresponding to a stack slot with mode MODE.
11879 Allocate a new slot if necessary.
11880
11881 The RTL for a function can have several slots available: N is
11882 which slot to use. */
11883
11884 rtx
assign_386_stack_local(enum machine_mode mode,int n)11885 assign_386_stack_local (enum machine_mode mode, int n)
11886 {
11887 struct stack_local_entry *s;
11888
11889 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11890 abort ();
11891
11892 for (s = ix86_stack_locals; s; s = s->next)
11893 if (s->mode == mode && s->n == n)
11894 return s->rtl;
11895
11896 s = (struct stack_local_entry *)
11897 ggc_alloc (sizeof (struct stack_local_entry));
11898 s->n = n;
11899 s->mode = mode;
11900 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11901
11902 s->next = ix86_stack_locals;
11903 ix86_stack_locals = s;
11904 return s->rtl;
11905 }
11906
11907 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11908
11909 static GTY(()) rtx ix86_tls_symbol;
11910 rtx
ix86_tls_get_addr(void)11911 ix86_tls_get_addr (void)
11912 {
11913
11914 if (!ix86_tls_symbol)
11915 {
11916 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11917 (TARGET_GNU_TLS && !TARGET_64BIT)
11918 ? "___tls_get_addr"
11919 : "__tls_get_addr");
11920 }
11921
11922 return ix86_tls_symbol;
11923 }
11924
11925 /* Calculate the length of the memory address in the instruction
11926 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11927
11928 static int
memory_address_length(rtx addr)11929 memory_address_length (rtx addr)
11930 {
11931 struct ix86_address parts;
11932 rtx base, index, disp;
11933 int len;
11934
11935 if (GET_CODE (addr) == PRE_DEC
11936 || GET_CODE (addr) == POST_INC
11937 || GET_CODE (addr) == PRE_MODIFY
11938 || GET_CODE (addr) == POST_MODIFY)
11939 return 0;
11940
11941 if (! ix86_decompose_address (addr, &parts))
11942 abort ();
11943
11944 base = parts.base;
11945 index = parts.index;
11946 disp = parts.disp;
11947 len = 0;
11948
11949 /* Rule of thumb:
11950 - esp as the base always wants an index,
11951 - ebp as the base always wants a displacement. */
11952
11953 /* Register Indirect. */
11954 if (base && !index && !disp)
11955 {
11956 /* esp (for its index) and ebp (for its displacement) need
11957 the two-byte modrm form. */
11958 if (addr == stack_pointer_rtx
11959 || addr == arg_pointer_rtx
11960 || addr == frame_pointer_rtx
11961 || addr == hard_frame_pointer_rtx)
11962 len = 1;
11963 }
11964
11965 /* Direct Addressing. */
11966 else if (disp && !base && !index)
11967 len = 4;
11968
11969 else
11970 {
11971 /* Find the length of the displacement constant. */
11972 if (disp)
11973 {
11974 if (GET_CODE (disp) == CONST_INT
11975 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11976 && base)
11977 len = 1;
11978 else
11979 len = 4;
11980 }
11981 /* ebp always wants a displacement. */
11982 else if (base == hard_frame_pointer_rtx)
11983 len = 1;
11984
11985 /* An index requires the two-byte modrm form.... */
11986 if (index
11987 /* ...like esp, which always wants an index. */
11988 || base == stack_pointer_rtx
11989 || base == arg_pointer_rtx
11990 || base == frame_pointer_rtx)
11991 len += 1;
11992 }
11993
11994 return len;
11995 }
11996
11997 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11998 is set, expect that insn have 8bit immediate alternative. */
11999 int
ix86_attr_length_immediate_default(rtx insn,int shortform)12000 ix86_attr_length_immediate_default (rtx insn, int shortform)
12001 {
12002 int len = 0;
12003 int i;
12004 extract_insn_cached (insn);
12005 for (i = recog_data.n_operands - 1; i >= 0; --i)
12006 if (CONSTANT_P (recog_data.operand[i]))
12007 {
12008 if (len)
12009 abort ();
12010 if (shortform
12011 && GET_CODE (recog_data.operand[i]) == CONST_INT
12012 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
12013 len = 1;
12014 else
12015 {
12016 switch (get_attr_mode (insn))
12017 {
12018 case MODE_QI:
12019 len+=1;
12020 break;
12021 case MODE_HI:
12022 len+=2;
12023 break;
12024 case MODE_SI:
12025 len+=4;
12026 break;
12027 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
12028 case MODE_DI:
12029 len+=4;
12030 break;
12031 default:
12032 fatal_insn ("unknown insn mode", insn);
12033 }
12034 }
12035 }
12036 return len;
12037 }
12038 /* Compute default value for "length_address" attribute. */
12039 int
ix86_attr_length_address_default(rtx insn)12040 ix86_attr_length_address_default (rtx insn)
12041 {
12042 int i;
12043
12044 if (get_attr_type (insn) == TYPE_LEA)
12045 {
12046 rtx set = PATTERN (insn);
12047 if (GET_CODE (set) == SET)
12048 ;
12049 else if (GET_CODE (set) == PARALLEL
12050 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
12051 set = XVECEXP (set, 0, 0);
12052 else
12053 {
12054 #ifdef ENABLE_CHECKING
12055 abort ();
12056 #endif
12057 return 0;
12058 }
12059
12060 return memory_address_length (SET_SRC (set));
12061 }
12062
12063 extract_insn_cached (insn);
12064 for (i = recog_data.n_operands - 1; i >= 0; --i)
12065 if (GET_CODE (recog_data.operand[i]) == MEM)
12066 {
12067 return memory_address_length (XEXP (recog_data.operand[i], 0));
12068 break;
12069 }
12070 return 0;
12071 }
12072
12073 /* Return the maximum number of instructions a cpu can issue. */
12074
12075 static int
ix86_issue_rate(void)12076 ix86_issue_rate (void)
12077 {
12078 switch (ix86_tune)
12079 {
12080 case PROCESSOR_PENTIUM:
12081 case PROCESSOR_K6:
12082 return 2;
12083
12084 case PROCESSOR_PENTIUMPRO:
12085 case PROCESSOR_PENTIUM4:
12086 case PROCESSOR_ATHLON:
12087 case PROCESSOR_K8:
12088 return 3;
12089
12090 default:
12091 return 1;
12092 }
12093 }
12094
12095 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
12096 by DEP_INSN and nothing set by DEP_INSN. */
12097
12098 static int
ix86_flags_dependant(rtx insn,rtx dep_insn,enum attr_type insn_type)12099 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12100 {
12101 rtx set, set2;
12102
12103 /* Simplify the test for uninteresting insns. */
12104 if (insn_type != TYPE_SETCC
12105 && insn_type != TYPE_ICMOV
12106 && insn_type != TYPE_FCMOV
12107 && insn_type != TYPE_IBR)
12108 return 0;
12109
12110 if ((set = single_set (dep_insn)) != 0)
12111 {
12112 set = SET_DEST (set);
12113 set2 = NULL_RTX;
12114 }
12115 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
12116 && XVECLEN (PATTERN (dep_insn), 0) == 2
12117 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
12118 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
12119 {
12120 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12121 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12122 }
12123 else
12124 return 0;
12125
12126 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
12127 return 0;
12128
12129 /* This test is true if the dependent insn reads the flags but
12130 not any other potentially set register. */
12131 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
12132 return 0;
12133
12134 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
12135 return 0;
12136
12137 return 1;
12138 }
12139
12140 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12141 address with operands set by DEP_INSN. */
12142
12143 static int
ix86_agi_dependant(rtx insn,rtx dep_insn,enum attr_type insn_type)12144 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12145 {
12146 rtx addr;
12147
12148 if (insn_type == TYPE_LEA
12149 && TARGET_PENTIUM)
12150 {
12151 addr = PATTERN (insn);
12152 if (GET_CODE (addr) == SET)
12153 ;
12154 else if (GET_CODE (addr) == PARALLEL
12155 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
12156 addr = XVECEXP (addr, 0, 0);
12157 else
12158 abort ();
12159 addr = SET_SRC (addr);
12160 }
12161 else
12162 {
12163 int i;
12164 extract_insn_cached (insn);
12165 for (i = recog_data.n_operands - 1; i >= 0; --i)
12166 if (GET_CODE (recog_data.operand[i]) == MEM)
12167 {
12168 addr = XEXP (recog_data.operand[i], 0);
12169 goto found;
12170 }
12171 return 0;
12172 found:;
12173 }
12174
12175 return modified_in_p (addr, dep_insn);
12176 }
12177
12178 static int
ix86_adjust_cost(rtx insn,rtx link,rtx dep_insn,int cost)12179 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
12180 {
12181 enum attr_type insn_type, dep_insn_type;
12182 enum attr_memory memory, dep_memory;
12183 rtx set, set2;
12184 int dep_insn_code_number;
12185
12186 /* Anti and output dependencies have zero cost on all CPUs. */
12187 if (REG_NOTE_KIND (link) != 0)
12188 return 0;
12189
12190 dep_insn_code_number = recog_memoized (dep_insn);
12191
12192 /* If we can't recognize the insns, we can't really do anything. */
12193 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
12194 return cost;
12195
12196 insn_type = get_attr_type (insn);
12197 dep_insn_type = get_attr_type (dep_insn);
12198
12199 switch (ix86_tune)
12200 {
12201 case PROCESSOR_PENTIUM:
12202 /* Address Generation Interlock adds a cycle of latency. */
12203 if (ix86_agi_dependant (insn, dep_insn, insn_type))
12204 cost += 1;
12205
12206 /* ??? Compares pair with jump/setcc. */
12207 if (ix86_flags_dependant (insn, dep_insn, insn_type))
12208 cost = 0;
12209
12210 /* Floating point stores require value to be ready one cycle earlier. */
12211 if (insn_type == TYPE_FMOV
12212 && get_attr_memory (insn) == MEMORY_STORE
12213 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12214 cost += 1;
12215 break;
12216
12217 case PROCESSOR_PENTIUMPRO:
12218 memory = get_attr_memory (insn);
12219 dep_memory = get_attr_memory (dep_insn);
12220
12221 /* Since we can't represent delayed latencies of load+operation,
12222 increase the cost here for non-imov insns. */
12223 if (dep_insn_type != TYPE_IMOV
12224 && dep_insn_type != TYPE_FMOV
12225 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
12226 cost += 1;
12227
12228 /* INT->FP conversion is expensive. */
12229 if (get_attr_fp_int_src (dep_insn))
12230 cost += 5;
12231
12232 /* There is one cycle extra latency between an FP op and a store. */
12233 if (insn_type == TYPE_FMOV
12234 && (set = single_set (dep_insn)) != NULL_RTX
12235 && (set2 = single_set (insn)) != NULL_RTX
12236 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12237 && GET_CODE (SET_DEST (set2)) == MEM)
12238 cost += 1;
12239
12240 /* Show ability of reorder buffer to hide latency of load by executing
12241 in parallel with previous instruction in case
12242 previous instruction is not needed to compute the address. */
12243 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12244 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12245 {
12246 /* Claim moves to take one cycle, as core can issue one load
12247 at time and the next load can start cycle later. */
12248 if (dep_insn_type == TYPE_IMOV
12249 || dep_insn_type == TYPE_FMOV)
12250 cost = 1;
12251 else if (cost > 1)
12252 cost--;
12253 }
12254 break;
12255
12256 case PROCESSOR_K6:
12257 memory = get_attr_memory (insn);
12258 dep_memory = get_attr_memory (dep_insn);
12259 /* The esp dependency is resolved before the instruction is really
12260 finished. */
12261 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12262 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12263 return 1;
12264
12265 /* Since we can't represent delayed latencies of load+operation,
12266 increase the cost here for non-imov insns. */
12267 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
12268 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
12269
12270 /* INT->FP conversion is expensive. */
12271 if (get_attr_fp_int_src (dep_insn))
12272 cost += 5;
12273
12274 /* Show ability of reorder buffer to hide latency of load by executing
12275 in parallel with previous instruction in case
12276 previous instruction is not needed to compute the address. */
12277 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12278 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12279 {
12280 /* Claim moves to take one cycle, as core can issue one load
12281 at time and the next load can start cycle later. */
12282 if (dep_insn_type == TYPE_IMOV
12283 || dep_insn_type == TYPE_FMOV)
12284 cost = 1;
12285 else if (cost > 2)
12286 cost -= 2;
12287 else
12288 cost = 1;
12289 }
12290 break;
12291
12292 case PROCESSOR_ATHLON:
12293 case PROCESSOR_K8:
12294 memory = get_attr_memory (insn);
12295 dep_memory = get_attr_memory (dep_insn);
12296
12297 /* Show ability of reorder buffer to hide latency of load by executing
12298 in parallel with previous instruction in case
12299 previous instruction is not needed to compute the address. */
12300 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12301 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12302 {
12303 enum attr_unit unit = get_attr_unit (insn);
12304 int loadcost = 3;
12305
12306 /* Because of the difference between the length of integer and
12307 floating unit pipeline preparation stages, the memory operands
12308 for floating point are cheaper.
12309
12310 ??? For Athlon it the difference is most probably 2. */
12311 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
12312 loadcost = 3;
12313 else
12314 loadcost = TARGET_ATHLON ? 2 : 0;
12315
12316 if (cost >= loadcost)
12317 cost -= loadcost;
12318 else
12319 cost = 0;
12320 }
12321
12322 default:
12323 break;
12324 }
12325
12326 return cost;
12327 }
12328
12329 static union
12330 {
12331 struct ppro_sched_data
12332 {
12333 rtx decode[3];
12334 int issued_this_cycle;
12335 } ppro;
12336 } ix86_sched_data;
12337
12338 static enum attr_ppro_uops
ix86_safe_ppro_uops(rtx insn)12339 ix86_safe_ppro_uops (rtx insn)
12340 {
12341 if (recog_memoized (insn) >= 0)
12342 return get_attr_ppro_uops (insn);
12343 else
12344 return PPRO_UOPS_MANY;
12345 }
12346
12347 static void
ix86_dump_ppro_packet(FILE * dump)12348 ix86_dump_ppro_packet (FILE *dump)
12349 {
12350 if (ix86_sched_data.ppro.decode[0])
12351 {
12352 fprintf (dump, "PPRO packet: %d",
12353 INSN_UID (ix86_sched_data.ppro.decode[0]));
12354 if (ix86_sched_data.ppro.decode[1])
12355 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
12356 if (ix86_sched_data.ppro.decode[2])
12357 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
12358 fputc ('\n', dump);
12359 }
12360 }
12361
12362 /* We're beginning a new block. Initialize data structures as necessary. */
12363
12364 static void
ix86_sched_init(FILE * dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED,int veclen ATTRIBUTE_UNUSED)12365 ix86_sched_init (FILE *dump ATTRIBUTE_UNUSED,
12366 int sched_verbose ATTRIBUTE_UNUSED,
12367 int veclen ATTRIBUTE_UNUSED)
12368 {
12369 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
12370 }
12371
12372 /* Shift INSN to SLOT, and shift everything else down. */
12373
12374 static void
ix86_reorder_insn(rtx * insnp,rtx * slot)12375 ix86_reorder_insn (rtx *insnp, rtx *slot)
12376 {
12377 if (insnp != slot)
12378 {
12379 rtx insn = *insnp;
12380 do
12381 insnp[0] = insnp[1];
12382 while (++insnp != slot);
12383 *insnp = insn;
12384 }
12385 }
12386
12387 static void
ix86_sched_reorder_ppro(rtx * ready,rtx * e_ready)12388 ix86_sched_reorder_ppro (rtx *ready, rtx *e_ready)
12389 {
12390 rtx decode[3];
12391 enum attr_ppro_uops cur_uops;
12392 int issued_this_cycle;
12393 rtx *insnp;
12394 int i;
12395
12396 /* At this point .ppro.decode contains the state of the three
12397 decoders from last "cycle". That is, those insns that were
12398 actually independent. But here we're scheduling for the
12399 decoder, and we may find things that are decodable in the
12400 same cycle. */
12401
12402 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
12403 issued_this_cycle = 0;
12404
12405 insnp = e_ready;
12406 cur_uops = ix86_safe_ppro_uops (*insnp);
12407
12408 /* If the decoders are empty, and we've a complex insn at the
12409 head of the priority queue, let it issue without complaint. */
12410 if (decode[0] == NULL)
12411 {
12412 if (cur_uops == PPRO_UOPS_MANY)
12413 {
12414 decode[0] = *insnp;
12415 goto ppro_done;
12416 }
12417
12418 /* Otherwise, search for a 2-4 uop unsn to issue. */
12419 while (cur_uops != PPRO_UOPS_FEW)
12420 {
12421 if (insnp == ready)
12422 break;
12423 cur_uops = ix86_safe_ppro_uops (*--insnp);
12424 }
12425
12426 /* If so, move it to the head of the line. */
12427 if (cur_uops == PPRO_UOPS_FEW)
12428 ix86_reorder_insn (insnp, e_ready);
12429
12430 /* Issue the head of the queue. */
12431 issued_this_cycle = 1;
12432 decode[0] = *e_ready--;
12433 }
12434
12435 /* Look for simple insns to fill in the other two slots. */
12436 for (i = 1; i < 3; ++i)
12437 if (decode[i] == NULL)
12438 {
12439 if (ready > e_ready)
12440 goto ppro_done;
12441
12442 insnp = e_ready;
12443 cur_uops = ix86_safe_ppro_uops (*insnp);
12444 while (cur_uops != PPRO_UOPS_ONE)
12445 {
12446 if (insnp == ready)
12447 break;
12448 cur_uops = ix86_safe_ppro_uops (*--insnp);
12449 }
12450
12451 /* Found one. Move it to the head of the queue and issue it. */
12452 if (cur_uops == PPRO_UOPS_ONE)
12453 {
12454 ix86_reorder_insn (insnp, e_ready);
12455 decode[i] = *e_ready--;
12456 issued_this_cycle++;
12457 continue;
12458 }
12459
12460 /* ??? Didn't find one. Ideally, here we would do a lazy split
12461 of 2-uop insns, issue one and queue the other. */
12462 }
12463
12464 ppro_done:
12465 if (issued_this_cycle == 0)
12466 issued_this_cycle = 1;
12467 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
12468 }
12469
12470 /* We are about to being issuing insns for this clock cycle.
12471 Override the default sort algorithm to better slot instructions. */
12472 static int
ix86_sched_reorder(FILE * dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED,rtx * ready,int * n_readyp,int clock_var ATTRIBUTE_UNUSED)12473 ix86_sched_reorder (FILE *dump ATTRIBUTE_UNUSED,
12474 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
12475 int *n_readyp, int clock_var ATTRIBUTE_UNUSED)
12476 {
12477 int n_ready = *n_readyp;
12478 rtx *e_ready = ready + n_ready - 1;
12479
12480 /* Make sure to go ahead and initialize key items in
12481 ix86_sched_data if we are not going to bother trying to
12482 reorder the ready queue. */
12483 if (n_ready < 2)
12484 {
12485 ix86_sched_data.ppro.issued_this_cycle = 1;
12486 goto out;
12487 }
12488
12489 switch (ix86_tune)
12490 {
12491 default:
12492 break;
12493
12494 case PROCESSOR_PENTIUMPRO:
12495 ix86_sched_reorder_ppro (ready, e_ready);
12496 break;
12497 }
12498
12499 out:
12500 return ix86_issue_rate ();
12501 }
12502
12503 /* We are about to issue INSN. Return the number of insns left on the
12504 ready queue that can be issued this cycle. */
12505
12506 static int
ix86_variable_issue(FILE * dump,int sched_verbose,rtx insn,int can_issue_more)12507 ix86_variable_issue (FILE *dump, int sched_verbose, rtx insn,
12508 int can_issue_more)
12509 {
12510 int i;
12511 switch (ix86_tune)
12512 {
12513 default:
12514 return can_issue_more - 1;
12515
12516 case PROCESSOR_PENTIUMPRO:
12517 {
12518 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
12519
12520 if (uops == PPRO_UOPS_MANY)
12521 {
12522 if (sched_verbose)
12523 ix86_dump_ppro_packet (dump);
12524 ix86_sched_data.ppro.decode[0] = insn;
12525 ix86_sched_data.ppro.decode[1] = NULL;
12526 ix86_sched_data.ppro.decode[2] = NULL;
12527 if (sched_verbose)
12528 ix86_dump_ppro_packet (dump);
12529 ix86_sched_data.ppro.decode[0] = NULL;
12530 }
12531 else if (uops == PPRO_UOPS_FEW)
12532 {
12533 if (sched_verbose)
12534 ix86_dump_ppro_packet (dump);
12535 ix86_sched_data.ppro.decode[0] = insn;
12536 ix86_sched_data.ppro.decode[1] = NULL;
12537 ix86_sched_data.ppro.decode[2] = NULL;
12538 }
12539 else
12540 {
12541 for (i = 0; i < 3; ++i)
12542 if (ix86_sched_data.ppro.decode[i] == NULL)
12543 {
12544 ix86_sched_data.ppro.decode[i] = insn;
12545 break;
12546 }
12547 if (i == 3)
12548 abort ();
12549 if (i == 2)
12550 {
12551 if (sched_verbose)
12552 ix86_dump_ppro_packet (dump);
12553 ix86_sched_data.ppro.decode[0] = NULL;
12554 ix86_sched_data.ppro.decode[1] = NULL;
12555 ix86_sched_data.ppro.decode[2] = NULL;
12556 }
12557 }
12558 }
12559 return --ix86_sched_data.ppro.issued_this_cycle;
12560 }
12561 }
12562
12563 static int
ia32_use_dfa_pipeline_interface(void)12564 ia32_use_dfa_pipeline_interface (void)
12565 {
12566 if (TARGET_PENTIUM || TARGET_ATHLON_K8)
12567 return 1;
12568 return 0;
12569 }
12570
12571 /* How many alternative schedules to try. This should be as wide as the
12572 scheduling freedom in the DFA, but no wider. Making this value too
12573 large results extra work for the scheduler. */
12574
12575 static int
ia32_multipass_dfa_lookahead(void)12576 ia32_multipass_dfa_lookahead (void)
12577 {
12578 if (ix86_tune == PROCESSOR_PENTIUM)
12579 return 2;
12580 else
12581 return 0;
12582 }
12583
12584
12585 /* Compute the alignment given to a constant that is being placed in memory.
12586 EXP is the constant and ALIGN is the alignment that the object would
12587 ordinarily have.
12588 The value of this function is used instead of that alignment to align
12589 the object. */
12590
12591 int
ix86_constant_alignment(tree exp,int align)12592 ix86_constant_alignment (tree exp, int align)
12593 {
12594 if (TREE_CODE (exp) == REAL_CST)
12595 {
12596 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12597 return 64;
12598 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12599 return 128;
12600 }
12601 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
12602 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
12603 return BITS_PER_WORD;
12604
12605 return align;
12606 }
12607
12608 /* Compute the alignment for a static variable.
12609 TYPE is the data type, and ALIGN is the alignment that
12610 the object would ordinarily have. The value of this function is used
12611 instead of that alignment to align the object. */
12612
12613 int
ix86_data_alignment(tree type,int align)12614 ix86_data_alignment (tree type, int align)
12615 {
12616 if (AGGREGATE_TYPE_P (type)
12617 && TYPE_SIZE (type)
12618 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12619 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12620 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12621 return 256;
12622
12623 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12624 to 16byte boundary. */
12625 if (TARGET_64BIT)
12626 {
12627 if (AGGREGATE_TYPE_P (type)
12628 && TYPE_SIZE (type)
12629 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12630 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12631 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12632 return 128;
12633 }
12634
12635 if (TREE_CODE (type) == ARRAY_TYPE)
12636 {
12637 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12638 return 64;
12639 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12640 return 128;
12641 }
12642 else if (TREE_CODE (type) == COMPLEX_TYPE)
12643 {
12644
12645 if (TYPE_MODE (type) == DCmode && align < 64)
12646 return 64;
12647 if (TYPE_MODE (type) == XCmode && align < 128)
12648 return 128;
12649 }
12650 else if ((TREE_CODE (type) == RECORD_TYPE
12651 || TREE_CODE (type) == UNION_TYPE
12652 || TREE_CODE (type) == QUAL_UNION_TYPE)
12653 && TYPE_FIELDS (type))
12654 {
12655 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12656 return 64;
12657 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12658 return 128;
12659 }
12660 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12661 || TREE_CODE (type) == INTEGER_TYPE)
12662 {
12663 if (TYPE_MODE (type) == DFmode && align < 64)
12664 return 64;
12665 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12666 return 128;
12667 }
12668
12669 return align;
12670 }
12671
12672 /* Compute the alignment for a local variable.
12673 TYPE is the data type, and ALIGN is the alignment that
12674 the object would ordinarily have. The value of this macro is used
12675 instead of that alignment to align the object. */
12676
12677 int
ix86_local_alignment(tree type,int align)12678 ix86_local_alignment (tree type, int align)
12679 {
12680 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12681 to 16byte boundary. */
12682 if (TARGET_64BIT)
12683 {
12684 if (AGGREGATE_TYPE_P (type)
12685 && TYPE_SIZE (type)
12686 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12687 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12688 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12689 return 128;
12690 }
12691 if (TREE_CODE (type) == ARRAY_TYPE)
12692 {
12693 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12694 return 64;
12695 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12696 return 128;
12697 }
12698 else if (TREE_CODE (type) == COMPLEX_TYPE)
12699 {
12700 if (TYPE_MODE (type) == DCmode && align < 64)
12701 return 64;
12702 if (TYPE_MODE (type) == XCmode && align < 128)
12703 return 128;
12704 }
12705 else if ((TREE_CODE (type) == RECORD_TYPE
12706 || TREE_CODE (type) == UNION_TYPE
12707 || TREE_CODE (type) == QUAL_UNION_TYPE)
12708 && TYPE_FIELDS (type))
12709 {
12710 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12711 return 64;
12712 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12713 return 128;
12714 }
12715 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12716 || TREE_CODE (type) == INTEGER_TYPE)
12717 {
12718
12719 if (TYPE_MODE (type) == DFmode && align < 64)
12720 return 64;
12721 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12722 return 128;
12723 }
12724 return align;
12725 }
12726
12727 /* Emit RTL insns to initialize the variable parts of a trampoline.
12728 FNADDR is an RTX for the address of the function's pure code.
12729 CXT is an RTX for the static chain value for the function. */
12730 void
x86_initialize_trampoline(rtx tramp,rtx fnaddr,rtx cxt)12731 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
12732 {
12733 if (!TARGET_64BIT)
12734 {
12735 /* Compute offset from the end of the jmp to the target function. */
12736 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12737 plus_constant (tramp, 10),
12738 NULL_RTX, 1, OPTAB_DIRECT);
12739 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12740 gen_int_mode (0xb9, QImode));
12741 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12742 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12743 gen_int_mode (0xe9, QImode));
12744 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12745 }
12746 else
12747 {
12748 int offset = 0;
12749 /* Try to load address using shorter movl instead of movabs.
12750 We may want to support movq for kernel mode, but kernel does not use
12751 trampolines at the moment. */
12752 if (x86_64_zero_extended_value (fnaddr))
12753 {
12754 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12755 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12756 gen_int_mode (0xbb41, HImode));
12757 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12758 gen_lowpart (SImode, fnaddr));
12759 offset += 6;
12760 }
12761 else
12762 {
12763 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12764 gen_int_mode (0xbb49, HImode));
12765 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12766 fnaddr);
12767 offset += 10;
12768 }
12769 /* Load static chain using movabs to r10. */
12770 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12771 gen_int_mode (0xba49, HImode));
12772 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12773 cxt);
12774 offset += 10;
12775 /* Jump to the r11 */
12776 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12777 gen_int_mode (0xff49, HImode));
12778 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12779 gen_int_mode (0xe3, QImode));
12780 offset += 3;
12781 if (offset > TRAMPOLINE_SIZE)
12782 abort ();
12783 }
12784
12785 #ifdef ENABLE_EXECUTE_STACK
12786 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
12787 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12788 #endif
12789 }
12790
12791 #define def_builtin(MASK, NAME, TYPE, CODE) \
12792 do { \
12793 if ((MASK) & target_flags \
12794 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12795 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12796 NULL, NULL_TREE); \
12797 } while (0)
12798
12799 struct builtin_description
12800 {
12801 const unsigned int mask;
12802 const enum insn_code icode;
12803 const char *const name;
12804 const enum ix86_builtins code;
12805 const enum rtx_code comparison;
12806 const unsigned int flag;
12807 };
12808
12809 static const struct builtin_description bdesc_comi[] =
12810 {
12811 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12812 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12813 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12814 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12815 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12816 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12817 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12818 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12819 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12820 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12821 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12822 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12823 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12824 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12825 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12826 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12827 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12828 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12829 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12830 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12831 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12832 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12833 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12834 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12835 };
12836
12837 static const struct builtin_description bdesc_2arg[] =
12838 {
12839 /* SSE */
12840 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12841 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12842 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12843 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12844 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12845 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12846 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12847 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12848
12849 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12850 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12851 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12852 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12853 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12854 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12855 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12856 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12857 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12858 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12859 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12860 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12861 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12862 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12863 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12864 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12865 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12866 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12867 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12868 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12869
12870 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12871 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12872 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12873 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12874
12875 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12876 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12877 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12878 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12879
12880 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12881 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12882 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12883 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12884 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12885
12886 /* MMX */
12887 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12888 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12889 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12890 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
12891 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12892 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12893 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12894 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
12895
12896 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12897 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12898 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12899 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12900 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12901 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12902 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12903 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12904
12905 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12906 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12907 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12908
12909 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12910 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12911 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12912 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12913
12914 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12915 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12916
12917 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12918 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12919 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12920 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12921 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12922 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12923
12924 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12925 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12926 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12927 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12928
12929 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12930 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12931 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12932 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12933 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12934 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12935
12936 /* Special. */
12937 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12938 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12939 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12940
12941 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12942 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12943 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12944
12945 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12946 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12947 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12948 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12949 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12950 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12951
12952 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12953 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12954 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12955 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12956 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12957 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12958
12959 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12960 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12961 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12962 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12963
12964 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12965 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12966
12967 /* SSE2 */
12968 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12969 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12970 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12971 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12972 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12973 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12974 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12975 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12976
12977 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12978 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12979 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12980 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12981 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12982 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12983 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12984 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12985 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12986 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12987 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12988 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12989 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12990 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12991 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12992 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12993 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12994 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12995 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12996 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12997
12998 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12999 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
13000 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
13001 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
13002
13003 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
13004 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
13005 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
13006 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
13007
13008 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
13009 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
13010 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
13011
13012 /* SSE2 MMX */
13013 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
13014 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
13015 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
13016 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
13017 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
13018 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
13019 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
13020 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
13021
13022 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
13023 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
13024 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
13025 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
13026 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
13027 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
13028 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
13029 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
13030
13031 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
13032 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
13033 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
13034 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
13035
13036 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
13037 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
13038 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
13039 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
13040
13041 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
13042 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
13043
13044 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
13045 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
13046 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
13047 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
13048 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
13049 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
13050
13051 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
13052 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
13053 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
13054 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
13055
13056 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
13057 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
13058 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
13059 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
13060 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
13061 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
13062 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
13063 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
13064
13065 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
13066 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
13067 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
13068
13069 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
13070 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
13071
13072 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
13073 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
13074 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
13075 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
13076 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
13077 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
13078
13079 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
13080 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
13081 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
13082 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
13083 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
13084 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
13085
13086 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
13087 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
13088 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
13089 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
13090
13091 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
13092
13093 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
13094 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
13095 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
13096 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
13097
13098 /* SSE3 MMX */
13099 { MASK_SSE3, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
13100 { MASK_SSE3, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
13101 { MASK_SSE3, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
13102 { MASK_SSE3, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
13103 { MASK_SSE3, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
13104 { MASK_SSE3, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
13105 };
13106
13107 static const struct builtin_description bdesc_1arg[] =
13108 {
13109 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
13110 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
13111
13112 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
13113 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
13114 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
13115
13116 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
13117 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
13118 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
13119 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
13120 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
13121 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
13122
13123 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
13124 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
13125 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
13126 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
13127
13128 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
13129
13130 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
13131 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
13132
13133 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
13134 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
13135 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
13136 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
13137 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
13138
13139 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
13140
13141 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
13142 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
13143 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
13144 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
13145
13146 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
13147 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
13148 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
13149
13150 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
13151
13152 /* SSE3 */
13153 { MASK_SSE3, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
13154 { MASK_SSE3, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
13155 { MASK_SSE3, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
13156 };
13157
13158 void
ix86_init_builtins(void)13159 ix86_init_builtins (void)
13160 {
13161 if (TARGET_MMX)
13162 ix86_init_mmx_sse_builtins ();
13163 }
13164
13165 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
13166 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
13167 builtins. */
13168 static void
ix86_init_mmx_sse_builtins(void)13169 ix86_init_mmx_sse_builtins (void)
13170 {
13171 const struct builtin_description * d;
13172 size_t i;
13173
13174 tree pchar_type_node = build_pointer_type (char_type_node);
13175 tree pcchar_type_node = build_pointer_type (
13176 build_type_variant (char_type_node, 1, 0));
13177 tree pfloat_type_node = build_pointer_type (float_type_node);
13178 tree pcfloat_type_node = build_pointer_type (
13179 build_type_variant (float_type_node, 1, 0));
13180 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
13181 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
13182 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13183
13184 /* Comparisons. */
13185 tree int_ftype_v4sf_v4sf
13186 = build_function_type_list (integer_type_node,
13187 V4SF_type_node, V4SF_type_node, NULL_TREE);
13188 tree v4si_ftype_v4sf_v4sf
13189 = build_function_type_list (V4SI_type_node,
13190 V4SF_type_node, V4SF_type_node, NULL_TREE);
13191 /* MMX/SSE/integer conversions. */
13192 tree int_ftype_v4sf
13193 = build_function_type_list (integer_type_node,
13194 V4SF_type_node, NULL_TREE);
13195 tree int64_ftype_v4sf
13196 = build_function_type_list (long_long_integer_type_node,
13197 V4SF_type_node, NULL_TREE);
13198 tree int_ftype_v8qi
13199 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
13200 tree v4sf_ftype_v4sf_int
13201 = build_function_type_list (V4SF_type_node,
13202 V4SF_type_node, integer_type_node, NULL_TREE);
13203 tree v4sf_ftype_v4sf_int64
13204 = build_function_type_list (V4SF_type_node,
13205 V4SF_type_node, long_long_integer_type_node,
13206 NULL_TREE);
13207 tree v4sf_ftype_v4sf_v2si
13208 = build_function_type_list (V4SF_type_node,
13209 V4SF_type_node, V2SI_type_node, NULL_TREE);
13210 tree int_ftype_v4hi_int
13211 = build_function_type_list (integer_type_node,
13212 V4HI_type_node, integer_type_node, NULL_TREE);
13213 tree v4hi_ftype_v4hi_int_int
13214 = build_function_type_list (V4HI_type_node, V4HI_type_node,
13215 integer_type_node, integer_type_node,
13216 NULL_TREE);
13217 /* Miscellaneous. */
13218 tree v8qi_ftype_v4hi_v4hi
13219 = build_function_type_list (V8QI_type_node,
13220 V4HI_type_node, V4HI_type_node, NULL_TREE);
13221 tree v4hi_ftype_v2si_v2si
13222 = build_function_type_list (V4HI_type_node,
13223 V2SI_type_node, V2SI_type_node, NULL_TREE);
13224 tree v4sf_ftype_v4sf_v4sf_int
13225 = build_function_type_list (V4SF_type_node,
13226 V4SF_type_node, V4SF_type_node,
13227 integer_type_node, NULL_TREE);
13228 tree v2si_ftype_v4hi_v4hi
13229 = build_function_type_list (V2SI_type_node,
13230 V4HI_type_node, V4HI_type_node, NULL_TREE);
13231 tree v4hi_ftype_v4hi_int
13232 = build_function_type_list (V4HI_type_node,
13233 V4HI_type_node, integer_type_node, NULL_TREE);
13234 tree v4hi_ftype_v4hi_di
13235 = build_function_type_list (V4HI_type_node,
13236 V4HI_type_node, long_long_unsigned_type_node,
13237 NULL_TREE);
13238 tree v2si_ftype_v2si_di
13239 = build_function_type_list (V2SI_type_node,
13240 V2SI_type_node, long_long_unsigned_type_node,
13241 NULL_TREE);
13242 tree void_ftype_void
13243 = build_function_type (void_type_node, void_list_node);
13244 tree void_ftype_unsigned
13245 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
13246 tree void_ftype_unsigned_unsigned
13247 = build_function_type_list (void_type_node, unsigned_type_node,
13248 unsigned_type_node, NULL_TREE);
13249 tree void_ftype_pcvoid_unsigned_unsigned
13250 = build_function_type_list (void_type_node, const_ptr_type_node,
13251 unsigned_type_node, unsigned_type_node,
13252 NULL_TREE);
13253 tree unsigned_ftype_void
13254 = build_function_type (unsigned_type_node, void_list_node);
13255 tree di_ftype_void
13256 = build_function_type (long_long_unsigned_type_node, void_list_node);
13257 tree v4sf_ftype_void
13258 = build_function_type (V4SF_type_node, void_list_node);
13259 tree v2si_ftype_v4sf
13260 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
13261 /* Loads/stores. */
13262 tree void_ftype_v8qi_v8qi_pchar
13263 = build_function_type_list (void_type_node,
13264 V8QI_type_node, V8QI_type_node,
13265 pchar_type_node, NULL_TREE);
13266 tree v4sf_ftype_pcfloat
13267 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
13268 /* @@@ the type is bogus */
13269 tree v4sf_ftype_v4sf_pv2si
13270 = build_function_type_list (V4SF_type_node,
13271 V4SF_type_node, pv2si_type_node, NULL_TREE);
13272 tree void_ftype_pv2si_v4sf
13273 = build_function_type_list (void_type_node,
13274 pv2si_type_node, V4SF_type_node, NULL_TREE);
13275 tree void_ftype_pfloat_v4sf
13276 = build_function_type_list (void_type_node,
13277 pfloat_type_node, V4SF_type_node, NULL_TREE);
13278 tree void_ftype_pdi_di
13279 = build_function_type_list (void_type_node,
13280 pdi_type_node, long_long_unsigned_type_node,
13281 NULL_TREE);
13282 tree void_ftype_pv2di_v2di
13283 = build_function_type_list (void_type_node,
13284 pv2di_type_node, V2DI_type_node, NULL_TREE);
13285 /* Normal vector unops. */
13286 tree v4sf_ftype_v4sf
13287 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
13288
13289 /* Normal vector binops. */
13290 tree v4sf_ftype_v4sf_v4sf
13291 = build_function_type_list (V4SF_type_node,
13292 V4SF_type_node, V4SF_type_node, NULL_TREE);
13293 tree v8qi_ftype_v8qi_v8qi
13294 = build_function_type_list (V8QI_type_node,
13295 V8QI_type_node, V8QI_type_node, NULL_TREE);
13296 tree v4hi_ftype_v4hi_v4hi
13297 = build_function_type_list (V4HI_type_node,
13298 V4HI_type_node, V4HI_type_node, NULL_TREE);
13299 tree v2si_ftype_v2si_v2si
13300 = build_function_type_list (V2SI_type_node,
13301 V2SI_type_node, V2SI_type_node, NULL_TREE);
13302 tree di_ftype_di_di
13303 = build_function_type_list (long_long_unsigned_type_node,
13304 long_long_unsigned_type_node,
13305 long_long_unsigned_type_node, NULL_TREE);
13306
13307 tree v2si_ftype_v2sf
13308 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
13309 tree v2sf_ftype_v2si
13310 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
13311 tree v2si_ftype_v2si
13312 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
13313 tree v2sf_ftype_v2sf
13314 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
13315 tree v2sf_ftype_v2sf_v2sf
13316 = build_function_type_list (V2SF_type_node,
13317 V2SF_type_node, V2SF_type_node, NULL_TREE);
13318 tree v2si_ftype_v2sf_v2sf
13319 = build_function_type_list (V2SI_type_node,
13320 V2SF_type_node, V2SF_type_node, NULL_TREE);
13321 tree pint_type_node = build_pointer_type (integer_type_node);
13322 tree pcint_type_node = build_pointer_type (
13323 build_type_variant (integer_type_node, 1, 0));
13324 tree pdouble_type_node = build_pointer_type (double_type_node);
13325 tree pcdouble_type_node = build_pointer_type (
13326 build_type_variant (double_type_node, 1, 0));
13327 tree int_ftype_v2df_v2df
13328 = build_function_type_list (integer_type_node,
13329 V2DF_type_node, V2DF_type_node, NULL_TREE);
13330
13331 tree ti_ftype_void
13332 = build_function_type (intTI_type_node, void_list_node);
13333 tree v2di_ftype_void
13334 = build_function_type (V2DI_type_node, void_list_node);
13335 tree ti_ftype_ti_ti
13336 = build_function_type_list (intTI_type_node,
13337 intTI_type_node, intTI_type_node, NULL_TREE);
13338 tree void_ftype_pcvoid
13339 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13340 tree v2di_ftype_di
13341 = build_function_type_list (V2DI_type_node,
13342 long_long_unsigned_type_node, NULL_TREE);
13343 tree di_ftype_v2di
13344 = build_function_type_list (long_long_unsigned_type_node,
13345 V2DI_type_node, NULL_TREE);
13346 tree v4sf_ftype_v4si
13347 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13348 tree v4si_ftype_v4sf
13349 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13350 tree v2df_ftype_v4si
13351 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13352 tree v4si_ftype_v2df
13353 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13354 tree v2si_ftype_v2df
13355 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13356 tree v4sf_ftype_v2df
13357 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13358 tree v2df_ftype_v2si
13359 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13360 tree v2df_ftype_v4sf
13361 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13362 tree int_ftype_v2df
13363 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13364 tree int64_ftype_v2df
13365 = build_function_type_list (long_long_integer_type_node,
13366 V2DF_type_node, NULL_TREE);
13367 tree v2df_ftype_v2df_int
13368 = build_function_type_list (V2DF_type_node,
13369 V2DF_type_node, integer_type_node, NULL_TREE);
13370 tree v2df_ftype_v2df_int64
13371 = build_function_type_list (V2DF_type_node,
13372 V2DF_type_node, long_long_integer_type_node,
13373 NULL_TREE);
13374 tree v4sf_ftype_v4sf_v2df
13375 = build_function_type_list (V4SF_type_node,
13376 V4SF_type_node, V2DF_type_node, NULL_TREE);
13377 tree v2df_ftype_v2df_v4sf
13378 = build_function_type_list (V2DF_type_node,
13379 V2DF_type_node, V4SF_type_node, NULL_TREE);
13380 tree v2df_ftype_v2df_v2df_int
13381 = build_function_type_list (V2DF_type_node,
13382 V2DF_type_node, V2DF_type_node,
13383 integer_type_node,
13384 NULL_TREE);
13385 tree v2df_ftype_v2df_pv2si
13386 = build_function_type_list (V2DF_type_node,
13387 V2DF_type_node, pv2si_type_node, NULL_TREE);
13388 tree void_ftype_pv2si_v2df
13389 = build_function_type_list (void_type_node,
13390 pv2si_type_node, V2DF_type_node, NULL_TREE);
13391 tree void_ftype_pdouble_v2df
13392 = build_function_type_list (void_type_node,
13393 pdouble_type_node, V2DF_type_node, NULL_TREE);
13394 tree void_ftype_pint_int
13395 = build_function_type_list (void_type_node,
13396 pint_type_node, integer_type_node, NULL_TREE);
13397 tree void_ftype_v16qi_v16qi_pchar
13398 = build_function_type_list (void_type_node,
13399 V16QI_type_node, V16QI_type_node,
13400 pchar_type_node, NULL_TREE);
13401 tree v2df_ftype_pcdouble
13402 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13403 tree v2df_ftype_v2df_v2df
13404 = build_function_type_list (V2DF_type_node,
13405 V2DF_type_node, V2DF_type_node, NULL_TREE);
13406 tree v16qi_ftype_v16qi_v16qi
13407 = build_function_type_list (V16QI_type_node,
13408 V16QI_type_node, V16QI_type_node, NULL_TREE);
13409 tree v8hi_ftype_v8hi_v8hi
13410 = build_function_type_list (V8HI_type_node,
13411 V8HI_type_node, V8HI_type_node, NULL_TREE);
13412 tree v4si_ftype_v4si_v4si
13413 = build_function_type_list (V4SI_type_node,
13414 V4SI_type_node, V4SI_type_node, NULL_TREE);
13415 tree v2di_ftype_v2di_v2di
13416 = build_function_type_list (V2DI_type_node,
13417 V2DI_type_node, V2DI_type_node, NULL_TREE);
13418 tree v2di_ftype_v2df_v2df
13419 = build_function_type_list (V2DI_type_node,
13420 V2DF_type_node, V2DF_type_node, NULL_TREE);
13421 tree v2df_ftype_v2df
13422 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13423 tree v2df_ftype_double
13424 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
13425 tree v2df_ftype_double_double
13426 = build_function_type_list (V2DF_type_node,
13427 double_type_node, double_type_node, NULL_TREE);
13428 tree int_ftype_v8hi_int
13429 = build_function_type_list (integer_type_node,
13430 V8HI_type_node, integer_type_node, NULL_TREE);
13431 tree v8hi_ftype_v8hi_int_int
13432 = build_function_type_list (V8HI_type_node,
13433 V8HI_type_node, integer_type_node,
13434 integer_type_node, NULL_TREE);
13435 tree v2di_ftype_v2di_int
13436 = build_function_type_list (V2DI_type_node,
13437 V2DI_type_node, integer_type_node, NULL_TREE);
13438 tree v4si_ftype_v4si_int
13439 = build_function_type_list (V4SI_type_node,
13440 V4SI_type_node, integer_type_node, NULL_TREE);
13441 tree v8hi_ftype_v8hi_int
13442 = build_function_type_list (V8HI_type_node,
13443 V8HI_type_node, integer_type_node, NULL_TREE);
13444 tree v8hi_ftype_v8hi_v2di
13445 = build_function_type_list (V8HI_type_node,
13446 V8HI_type_node, V2DI_type_node, NULL_TREE);
13447 tree v4si_ftype_v4si_v2di
13448 = build_function_type_list (V4SI_type_node,
13449 V4SI_type_node, V2DI_type_node, NULL_TREE);
13450 tree v4si_ftype_v8hi_v8hi
13451 = build_function_type_list (V4SI_type_node,
13452 V8HI_type_node, V8HI_type_node, NULL_TREE);
13453 tree di_ftype_v8qi_v8qi
13454 = build_function_type_list (long_long_unsigned_type_node,
13455 V8QI_type_node, V8QI_type_node, NULL_TREE);
13456 tree v2di_ftype_v16qi_v16qi
13457 = build_function_type_list (V2DI_type_node,
13458 V16QI_type_node, V16QI_type_node, NULL_TREE);
13459 tree int_ftype_v16qi
13460 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13461 tree v16qi_ftype_pcchar
13462 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13463 tree void_ftype_pchar_v16qi
13464 = build_function_type_list (void_type_node,
13465 pchar_type_node, V16QI_type_node, NULL_TREE);
13466 tree v4si_ftype_pcint
13467 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13468 tree void_ftype_pcint_v4si
13469 = build_function_type_list (void_type_node,
13470 pcint_type_node, V4SI_type_node, NULL_TREE);
13471 tree v2di_ftype_v2di
13472 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
13473
13474 tree float80_type;
13475 tree float128_type;
13476
13477 /* The __float80 type. */
13478 if (TYPE_MODE (long_double_type_node) == XFmode)
13479 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
13480 "__float80");
13481 else
13482 {
13483 /* The __float80 type. */
13484 float80_type = make_node (REAL_TYPE);
13485 TYPE_PRECISION (float80_type) = 96;
13486 layout_type (float80_type);
13487 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
13488 }
13489
13490 float128_type = make_node (REAL_TYPE);
13491 TYPE_PRECISION (float128_type) = 128;
13492 layout_type (float128_type);
13493 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
13494
13495 /* Add all builtins that are more or less simple operations on two
13496 operands. */
13497 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13498 {
13499 /* Use one of the operands; the target can have a different mode for
13500 mask-generating compares. */
13501 enum machine_mode mode;
13502 tree type;
13503
13504 if (d->name == 0)
13505 continue;
13506 mode = insn_data[d->icode].operand[1].mode;
13507
13508 switch (mode)
13509 {
13510 case V16QImode:
13511 type = v16qi_ftype_v16qi_v16qi;
13512 break;
13513 case V8HImode:
13514 type = v8hi_ftype_v8hi_v8hi;
13515 break;
13516 case V4SImode:
13517 type = v4si_ftype_v4si_v4si;
13518 break;
13519 case V2DImode:
13520 type = v2di_ftype_v2di_v2di;
13521 break;
13522 case V2DFmode:
13523 type = v2df_ftype_v2df_v2df;
13524 break;
13525 case TImode:
13526 type = ti_ftype_ti_ti;
13527 break;
13528 case V4SFmode:
13529 type = v4sf_ftype_v4sf_v4sf;
13530 break;
13531 case V8QImode:
13532 type = v8qi_ftype_v8qi_v8qi;
13533 break;
13534 case V4HImode:
13535 type = v4hi_ftype_v4hi_v4hi;
13536 break;
13537 case V2SImode:
13538 type = v2si_ftype_v2si_v2si;
13539 break;
13540 case DImode:
13541 type = di_ftype_di_di;
13542 break;
13543
13544 default:
13545 abort ();
13546 }
13547
13548 /* Override for comparisons. */
13549 if (d->icode == CODE_FOR_maskcmpv4sf3
13550 || d->icode == CODE_FOR_maskncmpv4sf3
13551 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13552 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13553 type = v4si_ftype_v4sf_v4sf;
13554
13555 if (d->icode == CODE_FOR_maskcmpv2df3
13556 || d->icode == CODE_FOR_maskncmpv2df3
13557 || d->icode == CODE_FOR_vmmaskcmpv2df3
13558 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13559 type = v2di_ftype_v2df_v2df;
13560
13561 def_builtin (d->mask, d->name, type, d->code);
13562 }
13563
13564 /* Add the remaining MMX insns with somewhat more complicated types. */
13565 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13566 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13567 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13568 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13569 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13570
13571 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13572 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13573 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13574
13575 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13576 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13577
13578 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13579 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13580
13581 /* comi/ucomi insns. */
13582 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13583 if (d->mask == MASK_SSE2)
13584 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13585 else
13586 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13587
13588 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13589 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13590 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13591
13592 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13593 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13594 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13595 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13596 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13597 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13598 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13599 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13600 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13601 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13602 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13603
13604 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13605 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13606
13607 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13608
13609 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13610 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13611 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13612 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13613 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13614 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13615
13616 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13617 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13618 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13619 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13620
13621 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13622 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13623 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13624 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13625
13626 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13627
13628 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13629
13630 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13631 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13632 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13633 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13634 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13635 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13636
13637 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13638
13639 /* Original 3DNow! */
13640 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13641 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13642 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13643 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13644 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13645 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13646 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13647 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13648 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13649 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13650 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13651 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13652 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13653 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13654 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13655 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13656 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13657 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13658 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13659 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13660
13661 /* 3DNow! extension as used in the Athlon CPU. */
13662 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13663 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13664 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13665 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13666 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13667 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13668
13669 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13670
13671 /* SSE2 */
13672 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13673 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13674
13675 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13676 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13677 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13678
13679 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13680 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13681 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13682 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13683 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13684 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13685
13686 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13687 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13688 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13689 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13690
13691 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13692 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13693 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13694 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13695 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13696
13697 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13698 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13699 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13700 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13701
13702 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13703 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13704
13705 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13706
13707 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13708 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13709
13710 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13711 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13712 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13713 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13714 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13715
13716 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13717
13718 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13719 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13720 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13721 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13722
13723 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13724 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13725 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13726
13727 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13728 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13729 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13730 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13731
13732 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13733 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13734 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13735 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13736 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13737 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13738 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13739
13740 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13741 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13742 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13743
13744 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13745 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13746 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13747 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13748 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13749 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13750 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13751
13752 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13753
13754 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13755 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13756 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13757
13758 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13759 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13760 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13761
13762 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13763 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13764
13765 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13766 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13767 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13768 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13769
13770 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13771 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13772 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13773 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13774
13775 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13776 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13777
13778 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13779
13780 /* Prescott New Instructions. */
13781 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
13782 void_ftype_pcvoid_unsigned_unsigned,
13783 IX86_BUILTIN_MONITOR);
13784 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
13785 void_ftype_unsigned_unsigned,
13786 IX86_BUILTIN_MWAIT);
13787 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
13788 v4sf_ftype_v4sf,
13789 IX86_BUILTIN_MOVSHDUP);
13790 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
13791 v4sf_ftype_v4sf,
13792 IX86_BUILTIN_MOVSLDUP);
13793 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
13794 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
13795 def_builtin (MASK_SSE3, "__builtin_ia32_loadddup",
13796 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
13797 def_builtin (MASK_SSE3, "__builtin_ia32_movddup",
13798 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
13799 }
13800
13801 /* Errors in the source file can cause expand_expr to return const0_rtx
13802 where we expect a vector. To avoid crashing, use one of the vector
13803 clear instructions. */
13804 static rtx
safe_vector_operand(rtx x,enum machine_mode mode)13805 safe_vector_operand (rtx x, enum machine_mode mode)
13806 {
13807 if (x != const0_rtx)
13808 return x;
13809 x = gen_reg_rtx (mode);
13810
13811 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13812 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13813 : gen_rtx_SUBREG (DImode, x, 0)));
13814 else
13815 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13816 : gen_rtx_SUBREG (V4SFmode, x, 0),
13817 CONST0_RTX (V4SFmode)));
13818 return x;
13819 }
13820
13821 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13822
13823 static rtx
ix86_expand_binop_builtin(enum insn_code icode,tree arglist,rtx target)13824 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
13825 {
13826 rtx pat;
13827 tree arg0 = TREE_VALUE (arglist);
13828 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13829 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13830 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13831 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13832 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13833 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13834
13835 if (VECTOR_MODE_P (mode0))
13836 op0 = safe_vector_operand (op0, mode0);
13837 if (VECTOR_MODE_P (mode1))
13838 op1 = safe_vector_operand (op1, mode1);
13839
13840 if (! target
13841 || GET_MODE (target) != tmode
13842 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13843 target = gen_reg_rtx (tmode);
13844
13845 if (GET_MODE (op1) == SImode && mode1 == TImode)
13846 {
13847 rtx x = gen_reg_rtx (V4SImode);
13848 emit_insn (gen_sse2_loadd (x, op1));
13849 op1 = gen_lowpart (TImode, x);
13850 }
13851
13852 /* In case the insn wants input operands in modes different from
13853 the result, abort. */
13854 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
13855 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
13856 abort ();
13857
13858 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13859 op0 = copy_to_mode_reg (mode0, op0);
13860 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13861 op1 = copy_to_mode_reg (mode1, op1);
13862
13863 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13864 yet one of the two must not be a memory. This is normally enforced
13865 by expanders, but we didn't bother to create one here. */
13866 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13867 op0 = copy_to_mode_reg (mode0, op0);
13868
13869 pat = GEN_FCN (icode) (target, op0, op1);
13870 if (! pat)
13871 return 0;
13872 emit_insn (pat);
13873 return target;
13874 }
13875
13876 /* Subroutine of ix86_expand_builtin to take care of stores. */
13877
13878 static rtx
ix86_expand_store_builtin(enum insn_code icode,tree arglist)13879 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
13880 {
13881 rtx pat;
13882 tree arg0 = TREE_VALUE (arglist);
13883 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13884 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13885 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13886 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13887 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13888
13889 if (VECTOR_MODE_P (mode1))
13890 op1 = safe_vector_operand (op1, mode1);
13891
13892 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13893 op1 = copy_to_mode_reg (mode1, op1);
13894
13895 pat = GEN_FCN (icode) (op0, op1);
13896 if (pat)
13897 emit_insn (pat);
13898 return 0;
13899 }
13900
13901 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13902
13903 static rtx
ix86_expand_unop_builtin(enum insn_code icode,tree arglist,rtx target,int do_load)13904 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
13905 rtx target, int do_load)
13906 {
13907 rtx pat;
13908 tree arg0 = TREE_VALUE (arglist);
13909 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13910 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13911 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13912
13913 if (! target
13914 || GET_MODE (target) != tmode
13915 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13916 target = gen_reg_rtx (tmode);
13917 if (do_load)
13918 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13919 else
13920 {
13921 if (VECTOR_MODE_P (mode0))
13922 op0 = safe_vector_operand (op0, mode0);
13923
13924 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13925 op0 = copy_to_mode_reg (mode0, op0);
13926 }
13927
13928 pat = GEN_FCN (icode) (target, op0);
13929 if (! pat)
13930 return 0;
13931 emit_insn (pat);
13932 return target;
13933 }
13934
13935 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13936 sqrtss, rsqrtss, rcpss. */
13937
13938 static rtx
ix86_expand_unop1_builtin(enum insn_code icode,tree arglist,rtx target)13939 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
13940 {
13941 rtx pat;
13942 tree arg0 = TREE_VALUE (arglist);
13943 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13944 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13945 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13946
13947 if (! target
13948 || GET_MODE (target) != tmode
13949 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13950 target = gen_reg_rtx (tmode);
13951
13952 if (VECTOR_MODE_P (mode0))
13953 op0 = safe_vector_operand (op0, mode0);
13954
13955 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13956 op0 = copy_to_mode_reg (mode0, op0);
13957
13958 op1 = op0;
13959 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13960 op1 = copy_to_mode_reg (mode0, op1);
13961
13962 pat = GEN_FCN (icode) (target, op0, op1);
13963 if (! pat)
13964 return 0;
13965 emit_insn (pat);
13966 return target;
13967 }
13968
13969 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13970
13971 static rtx
ix86_expand_sse_compare(const struct builtin_description * d,tree arglist,rtx target)13972 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
13973 rtx target)
13974 {
13975 rtx pat;
13976 tree arg0 = TREE_VALUE (arglist);
13977 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13978 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13979 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13980 rtx op2;
13981 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13982 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13983 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13984 enum rtx_code comparison = d->comparison;
13985
13986 if (VECTOR_MODE_P (mode0))
13987 op0 = safe_vector_operand (op0, mode0);
13988 if (VECTOR_MODE_P (mode1))
13989 op1 = safe_vector_operand (op1, mode1);
13990
13991 /* Swap operands if we have a comparison that isn't available in
13992 hardware. */
13993 if (d->flag)
13994 {
13995 rtx tmp = gen_reg_rtx (mode1);
13996 emit_move_insn (tmp, op1);
13997 op1 = op0;
13998 op0 = tmp;
13999 }
14000
14001 if (! target
14002 || GET_MODE (target) != tmode
14003 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
14004 target = gen_reg_rtx (tmode);
14005
14006 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
14007 op0 = copy_to_mode_reg (mode0, op0);
14008 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
14009 op1 = copy_to_mode_reg (mode1, op1);
14010
14011 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
14012 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
14013 if (! pat)
14014 return 0;
14015 emit_insn (pat);
14016 return target;
14017 }
14018
14019 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
14020
14021 static rtx
ix86_expand_sse_comi(const struct builtin_description * d,tree arglist,rtx target)14022 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
14023 rtx target)
14024 {
14025 rtx pat;
14026 tree arg0 = TREE_VALUE (arglist);
14027 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14028 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14029 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14030 rtx op2;
14031 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
14032 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
14033 enum rtx_code comparison = d->comparison;
14034
14035 if (VECTOR_MODE_P (mode0))
14036 op0 = safe_vector_operand (op0, mode0);
14037 if (VECTOR_MODE_P (mode1))
14038 op1 = safe_vector_operand (op1, mode1);
14039
14040 /* Swap operands if we have a comparison that isn't available in
14041 hardware. */
14042 if (d->flag)
14043 {
14044 rtx tmp = op1;
14045 op1 = op0;
14046 op0 = tmp;
14047 }
14048
14049 target = gen_reg_rtx (SImode);
14050 emit_move_insn (target, const0_rtx);
14051 target = gen_rtx_SUBREG (QImode, target, 0);
14052
14053 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
14054 op0 = copy_to_mode_reg (mode0, op0);
14055 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
14056 op1 = copy_to_mode_reg (mode1, op1);
14057
14058 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
14059 pat = GEN_FCN (d->icode) (op0, op1);
14060 if (! pat)
14061 return 0;
14062 emit_insn (pat);
14063 emit_insn (gen_rtx_SET (VOIDmode,
14064 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
14065 gen_rtx_fmt_ee (comparison, QImode,
14066 SET_DEST (pat),
14067 const0_rtx)));
14068
14069 return SUBREG_REG (target);
14070 }
14071
14072 /* Expand an expression EXP that calls a built-in function,
14073 with result going to TARGET if that's convenient
14074 (and in mode MODE if that's convenient).
14075 SUBTARGET may be used as the target for computing one of EXP's operands.
14076 IGNORE is nonzero if the value is to be ignored. */
14077
14078 rtx
ix86_expand_builtin(tree exp,rtx target,rtx subtarget ATTRIBUTE_UNUSED,enum machine_mode mode ATTRIBUTE_UNUSED,int ignore ATTRIBUTE_UNUSED)14079 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
14080 enum machine_mode mode ATTRIBUTE_UNUSED,
14081 int ignore ATTRIBUTE_UNUSED)
14082 {
14083 const struct builtin_description *d;
14084 size_t i;
14085 enum insn_code icode;
14086 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
14087 tree arglist = TREE_OPERAND (exp, 1);
14088 tree arg0, arg1, arg2;
14089 rtx op0, op1, op2, pat;
14090 enum machine_mode tmode, mode0, mode1, mode2;
14091 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
14092
14093 switch (fcode)
14094 {
14095 case IX86_BUILTIN_EMMS:
14096 emit_insn (gen_emms ());
14097 return 0;
14098
14099 case IX86_BUILTIN_SFENCE:
14100 emit_insn (gen_sfence ());
14101 return 0;
14102
14103 case IX86_BUILTIN_PEXTRW:
14104 case IX86_BUILTIN_PEXTRW128:
14105 icode = (fcode == IX86_BUILTIN_PEXTRW
14106 ? CODE_FOR_mmx_pextrw
14107 : CODE_FOR_sse2_pextrw);
14108 arg0 = TREE_VALUE (arglist);
14109 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14110 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14111 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14112 tmode = insn_data[icode].operand[0].mode;
14113 mode0 = insn_data[icode].operand[1].mode;
14114 mode1 = insn_data[icode].operand[2].mode;
14115
14116 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14117 op0 = copy_to_mode_reg (mode0, op0);
14118 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14119 {
14120 error ("selector must be an integer constant in the range 0..%i",
14121 fcode == IX86_BUILTIN_PEXTRW ? 3:7);
14122 return gen_reg_rtx (tmode);
14123 }
14124 if (target == 0
14125 || GET_MODE (target) != tmode
14126 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14127 target = gen_reg_rtx (tmode);
14128 pat = GEN_FCN (icode) (target, op0, op1);
14129 if (! pat)
14130 return 0;
14131 emit_insn (pat);
14132 return target;
14133
14134 case IX86_BUILTIN_PINSRW:
14135 case IX86_BUILTIN_PINSRW128:
14136 icode = (fcode == IX86_BUILTIN_PINSRW
14137 ? CODE_FOR_mmx_pinsrw
14138 : CODE_FOR_sse2_pinsrw);
14139 arg0 = TREE_VALUE (arglist);
14140 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14141 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14142 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14143 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14144 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14145 tmode = insn_data[icode].operand[0].mode;
14146 mode0 = insn_data[icode].operand[1].mode;
14147 mode1 = insn_data[icode].operand[2].mode;
14148 mode2 = insn_data[icode].operand[3].mode;
14149
14150 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14151 op0 = copy_to_mode_reg (mode0, op0);
14152 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14153 op1 = copy_to_mode_reg (mode1, op1);
14154 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14155 {
14156 error ("selector must be an integer constant in the range 0..%i",
14157 fcode == IX86_BUILTIN_PINSRW ? 15:255);
14158 return const0_rtx;
14159 }
14160 if (target == 0
14161 || GET_MODE (target) != tmode
14162 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14163 target = gen_reg_rtx (tmode);
14164 pat = GEN_FCN (icode) (target, op0, op1, op2);
14165 if (! pat)
14166 return 0;
14167 emit_insn (pat);
14168 return target;
14169
14170 case IX86_BUILTIN_MASKMOVQ:
14171 case IX86_BUILTIN_MASKMOVDQU:
14172 icode = (fcode == IX86_BUILTIN_MASKMOVQ
14173 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
14174 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
14175 : CODE_FOR_sse2_maskmovdqu));
14176 /* Note the arg order is different from the operand order. */
14177 arg1 = TREE_VALUE (arglist);
14178 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
14179 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14180 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14181 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14182 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14183 mode0 = insn_data[icode].operand[0].mode;
14184 mode1 = insn_data[icode].operand[1].mode;
14185 mode2 = insn_data[icode].operand[2].mode;
14186
14187 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14188 op0 = copy_to_mode_reg (mode0, op0);
14189 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14190 op1 = copy_to_mode_reg (mode1, op1);
14191 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14192 op2 = copy_to_mode_reg (mode2, op2);
14193 pat = GEN_FCN (icode) (op0, op1, op2);
14194 if (! pat)
14195 return 0;
14196 emit_insn (pat);
14197 return 0;
14198
14199 case IX86_BUILTIN_SQRTSS:
14200 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
14201 case IX86_BUILTIN_RSQRTSS:
14202 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
14203 case IX86_BUILTIN_RCPSS:
14204 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
14205
14206 case IX86_BUILTIN_LOADAPS:
14207 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
14208
14209 case IX86_BUILTIN_LOADUPS:
14210 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14211
14212 case IX86_BUILTIN_STOREAPS:
14213 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
14214
14215 case IX86_BUILTIN_STOREUPS:
14216 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
14217
14218 case IX86_BUILTIN_LOADSS:
14219 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
14220
14221 case IX86_BUILTIN_STORESS:
14222 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
14223
14224 case IX86_BUILTIN_LOADHPS:
14225 case IX86_BUILTIN_LOADLPS:
14226 case IX86_BUILTIN_LOADHPD:
14227 case IX86_BUILTIN_LOADLPD:
14228 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
14229 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
14230 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
14231 : CODE_FOR_sse2_movsd);
14232 arg0 = TREE_VALUE (arglist);
14233 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14234 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14235 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14236 tmode = insn_data[icode].operand[0].mode;
14237 mode0 = insn_data[icode].operand[1].mode;
14238 mode1 = insn_data[icode].operand[2].mode;
14239
14240 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14241 op0 = copy_to_mode_reg (mode0, op0);
14242 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14243 if (target == 0
14244 || GET_MODE (target) != tmode
14245 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14246 target = gen_reg_rtx (tmode);
14247 pat = GEN_FCN (icode) (target, op0, op1);
14248 if (! pat)
14249 return 0;
14250 emit_insn (pat);
14251 return target;
14252
14253 case IX86_BUILTIN_STOREHPS:
14254 case IX86_BUILTIN_STORELPS:
14255 case IX86_BUILTIN_STOREHPD:
14256 case IX86_BUILTIN_STORELPD:
14257 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
14258 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
14259 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
14260 : CODE_FOR_sse2_movsd);
14261 arg0 = TREE_VALUE (arglist);
14262 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14263 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14264 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14265 mode0 = insn_data[icode].operand[1].mode;
14266 mode1 = insn_data[icode].operand[2].mode;
14267
14268 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14269 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14270 op1 = copy_to_mode_reg (mode1, op1);
14271
14272 pat = GEN_FCN (icode) (op0, op0, op1);
14273 if (! pat)
14274 return 0;
14275 emit_insn (pat);
14276 return 0;
14277
14278 case IX86_BUILTIN_MOVNTPS:
14279 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
14280 case IX86_BUILTIN_MOVNTQ:
14281 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
14282
14283 case IX86_BUILTIN_LDMXCSR:
14284 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14285 target = assign_386_stack_local (SImode, 0);
14286 emit_move_insn (target, op0);
14287 emit_insn (gen_ldmxcsr (target));
14288 return 0;
14289
14290 case IX86_BUILTIN_STMXCSR:
14291 target = assign_386_stack_local (SImode, 0);
14292 emit_insn (gen_stmxcsr (target));
14293 return copy_to_mode_reg (SImode, target);
14294
14295 case IX86_BUILTIN_SHUFPS:
14296 case IX86_BUILTIN_SHUFPD:
14297 icode = (fcode == IX86_BUILTIN_SHUFPS
14298 ? CODE_FOR_sse_shufps
14299 : CODE_FOR_sse2_shufpd);
14300 arg0 = TREE_VALUE (arglist);
14301 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14302 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14303 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14304 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14305 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14306 tmode = insn_data[icode].operand[0].mode;
14307 mode0 = insn_data[icode].operand[1].mode;
14308 mode1 = insn_data[icode].operand[2].mode;
14309 mode2 = insn_data[icode].operand[3].mode;
14310
14311 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14312 op0 = copy_to_mode_reg (mode0, op0);
14313 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14314 op1 = copy_to_mode_reg (mode1, op1);
14315 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14316 {
14317 /* @@@ better error message */
14318 error ("mask must be an immediate");
14319 return gen_reg_rtx (tmode);
14320 }
14321 if (target == 0
14322 || GET_MODE (target) != tmode
14323 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14324 target = gen_reg_rtx (tmode);
14325 pat = GEN_FCN (icode) (target, op0, op1, op2);
14326 if (! pat)
14327 return 0;
14328 emit_insn (pat);
14329 return target;
14330
14331 case IX86_BUILTIN_PSHUFW:
14332 case IX86_BUILTIN_PSHUFD:
14333 case IX86_BUILTIN_PSHUFHW:
14334 case IX86_BUILTIN_PSHUFLW:
14335 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14336 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14337 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14338 : CODE_FOR_mmx_pshufw);
14339 arg0 = TREE_VALUE (arglist);
14340 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14341 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14342 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14343 tmode = insn_data[icode].operand[0].mode;
14344 mode1 = insn_data[icode].operand[1].mode;
14345 mode2 = insn_data[icode].operand[2].mode;
14346
14347 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14348 op0 = copy_to_mode_reg (mode1, op0);
14349 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14350 {
14351 /* @@@ better error message */
14352 error ("mask must be an immediate");
14353 return const0_rtx;
14354 }
14355 if (target == 0
14356 || GET_MODE (target) != tmode
14357 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14358 target = gen_reg_rtx (tmode);
14359 pat = GEN_FCN (icode) (target, op0, op1);
14360 if (! pat)
14361 return 0;
14362 emit_insn (pat);
14363 return target;
14364
14365 case IX86_BUILTIN_PSLLDQI128:
14366 case IX86_BUILTIN_PSRLDQI128:
14367 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14368 : CODE_FOR_sse2_lshrti3);
14369 arg0 = TREE_VALUE (arglist);
14370 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14371 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14372 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14373 tmode = insn_data[icode].operand[0].mode;
14374 mode1 = insn_data[icode].operand[1].mode;
14375 mode2 = insn_data[icode].operand[2].mode;
14376
14377 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14378 {
14379 op0 = copy_to_reg (op0);
14380 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14381 }
14382 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14383 {
14384 error ("shift must be an immediate");
14385 return const0_rtx;
14386 }
14387 target = gen_reg_rtx (V2DImode);
14388 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14389 if (! pat)
14390 return 0;
14391 emit_insn (pat);
14392 return target;
14393
14394 case IX86_BUILTIN_FEMMS:
14395 emit_insn (gen_femms ());
14396 return NULL_RTX;
14397
14398 case IX86_BUILTIN_PAVGUSB:
14399 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14400
14401 case IX86_BUILTIN_PF2ID:
14402 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14403
14404 case IX86_BUILTIN_PFACC:
14405 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14406
14407 case IX86_BUILTIN_PFADD:
14408 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14409
14410 case IX86_BUILTIN_PFCMPEQ:
14411 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14412
14413 case IX86_BUILTIN_PFCMPGE:
14414 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14415
14416 case IX86_BUILTIN_PFCMPGT:
14417 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14418
14419 case IX86_BUILTIN_PFMAX:
14420 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14421
14422 case IX86_BUILTIN_PFMIN:
14423 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14424
14425 case IX86_BUILTIN_PFMUL:
14426 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14427
14428 case IX86_BUILTIN_PFRCP:
14429 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14430
14431 case IX86_BUILTIN_PFRCPIT1:
14432 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14433
14434 case IX86_BUILTIN_PFRCPIT2:
14435 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14436
14437 case IX86_BUILTIN_PFRSQIT1:
14438 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14439
14440 case IX86_BUILTIN_PFRSQRT:
14441 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14442
14443 case IX86_BUILTIN_PFSUB:
14444 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14445
14446 case IX86_BUILTIN_PFSUBR:
14447 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14448
14449 case IX86_BUILTIN_PI2FD:
14450 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14451
14452 case IX86_BUILTIN_PMULHRW:
14453 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14454
14455 case IX86_BUILTIN_PF2IW:
14456 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14457
14458 case IX86_BUILTIN_PFNACC:
14459 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14460
14461 case IX86_BUILTIN_PFPNACC:
14462 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14463
14464 case IX86_BUILTIN_PI2FW:
14465 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14466
14467 case IX86_BUILTIN_PSWAPDSI:
14468 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14469
14470 case IX86_BUILTIN_PSWAPDSF:
14471 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14472
14473 case IX86_BUILTIN_SSE_ZERO:
14474 target = gen_reg_rtx (V4SFmode);
14475 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
14476 return target;
14477
14478 case IX86_BUILTIN_MMX_ZERO:
14479 target = gen_reg_rtx (DImode);
14480 emit_insn (gen_mmx_clrdi (target));
14481 return target;
14482
14483 case IX86_BUILTIN_CLRTI:
14484 target = gen_reg_rtx (V2DImode);
14485 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14486 return target;
14487
14488
14489 case IX86_BUILTIN_SQRTSD:
14490 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14491 case IX86_BUILTIN_LOADAPD:
14492 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14493 case IX86_BUILTIN_LOADUPD:
14494 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14495
14496 case IX86_BUILTIN_STOREAPD:
14497 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14498 case IX86_BUILTIN_STOREUPD:
14499 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14500
14501 case IX86_BUILTIN_LOADSD:
14502 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14503
14504 case IX86_BUILTIN_STORESD:
14505 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14506
14507 case IX86_BUILTIN_SETPD1:
14508 target = assign_386_stack_local (DFmode, 0);
14509 arg0 = TREE_VALUE (arglist);
14510 emit_move_insn (adjust_address (target, DFmode, 0),
14511 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14512 op0 = gen_reg_rtx (V2DFmode);
14513 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14514 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
14515 return op0;
14516
14517 case IX86_BUILTIN_SETPD:
14518 target = assign_386_stack_local (V2DFmode, 0);
14519 arg0 = TREE_VALUE (arglist);
14520 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14521 emit_move_insn (adjust_address (target, DFmode, 0),
14522 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14523 emit_move_insn (adjust_address (target, DFmode, 8),
14524 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14525 op0 = gen_reg_rtx (V2DFmode);
14526 emit_insn (gen_sse2_movapd (op0, target));
14527 return op0;
14528
14529 case IX86_BUILTIN_LOADRPD:
14530 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14531 gen_reg_rtx (V2DFmode), 1);
14532 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
14533 return target;
14534
14535 case IX86_BUILTIN_LOADPD1:
14536 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14537 gen_reg_rtx (V2DFmode), 1);
14538 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14539 return target;
14540
14541 case IX86_BUILTIN_STOREPD1:
14542 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14543 case IX86_BUILTIN_STORERPD:
14544 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14545
14546 case IX86_BUILTIN_CLRPD:
14547 target = gen_reg_rtx (V2DFmode);
14548 emit_insn (gen_sse_clrv2df (target));
14549 return target;
14550
14551 case IX86_BUILTIN_MFENCE:
14552 emit_insn (gen_sse2_mfence ());
14553 return 0;
14554 case IX86_BUILTIN_LFENCE:
14555 emit_insn (gen_sse2_lfence ());
14556 return 0;
14557
14558 case IX86_BUILTIN_CLFLUSH:
14559 arg0 = TREE_VALUE (arglist);
14560 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14561 icode = CODE_FOR_sse2_clflush;
14562 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14563 op0 = copy_to_mode_reg (Pmode, op0);
14564
14565 emit_insn (gen_sse2_clflush (op0));
14566 return 0;
14567
14568 case IX86_BUILTIN_MOVNTPD:
14569 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14570 case IX86_BUILTIN_MOVNTDQ:
14571 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14572 case IX86_BUILTIN_MOVNTI:
14573 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14574
14575 case IX86_BUILTIN_LOADDQA:
14576 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14577 case IX86_BUILTIN_LOADDQU:
14578 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14579 case IX86_BUILTIN_LOADD:
14580 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14581
14582 case IX86_BUILTIN_STOREDQA:
14583 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14584 case IX86_BUILTIN_STOREDQU:
14585 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14586 case IX86_BUILTIN_STORED:
14587 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14588
14589 case IX86_BUILTIN_MONITOR:
14590 arg0 = TREE_VALUE (arglist);
14591 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14592 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14593 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14594 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14595 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14596 if (!REG_P (op0))
14597 op0 = copy_to_mode_reg (SImode, op0);
14598 if (!REG_P (op1))
14599 op1 = copy_to_mode_reg (SImode, op1);
14600 if (!REG_P (op2))
14601 op2 = copy_to_mode_reg (SImode, op2);
14602 emit_insn (gen_monitor (op0, op1, op2));
14603 return 0;
14604
14605 case IX86_BUILTIN_MWAIT:
14606 arg0 = TREE_VALUE (arglist);
14607 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14608 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14609 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14610 if (!REG_P (op0))
14611 op0 = copy_to_mode_reg (SImode, op0);
14612 if (!REG_P (op1))
14613 op1 = copy_to_mode_reg (SImode, op1);
14614 emit_insn (gen_mwait (op0, op1));
14615 return 0;
14616
14617 case IX86_BUILTIN_LOADDDUP:
14618 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
14619
14620 case IX86_BUILTIN_LDDQU:
14621 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
14622 1);
14623
14624 default:
14625 break;
14626 }
14627
14628 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14629 if (d->code == fcode)
14630 {
14631 /* Compares are treated specially. */
14632 if (d->icode == CODE_FOR_maskcmpv4sf3
14633 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14634 || d->icode == CODE_FOR_maskncmpv4sf3
14635 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14636 || d->icode == CODE_FOR_maskcmpv2df3
14637 || d->icode == CODE_FOR_vmmaskcmpv2df3
14638 || d->icode == CODE_FOR_maskncmpv2df3
14639 || d->icode == CODE_FOR_vmmaskncmpv2df3)
14640 return ix86_expand_sse_compare (d, arglist, target);
14641
14642 return ix86_expand_binop_builtin (d->icode, arglist, target);
14643 }
14644
14645 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14646 if (d->code == fcode)
14647 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14648
14649 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14650 if (d->code == fcode)
14651 return ix86_expand_sse_comi (d, arglist, target);
14652
14653 /* @@@ Should really do something sensible here. */
14654 return 0;
14655 }
14656
14657 /* Store OPERAND to the memory after reload is completed. This means
14658 that we can't easily use assign_stack_local. */
14659 rtx
ix86_force_to_memory(enum machine_mode mode,rtx operand)14660 ix86_force_to_memory (enum machine_mode mode, rtx operand)
14661 {
14662 rtx result;
14663 if (!reload_completed)
14664 abort ();
14665 if (TARGET_RED_ZONE)
14666 {
14667 result = gen_rtx_MEM (mode,
14668 gen_rtx_PLUS (Pmode,
14669 stack_pointer_rtx,
14670 GEN_INT (-RED_ZONE_SIZE)));
14671 emit_move_insn (result, operand);
14672 }
14673 else if (!TARGET_RED_ZONE && TARGET_64BIT)
14674 {
14675 switch (mode)
14676 {
14677 case HImode:
14678 case SImode:
14679 operand = gen_lowpart (DImode, operand);
14680 /* FALLTHRU */
14681 case DImode:
14682 emit_insn (
14683 gen_rtx_SET (VOIDmode,
14684 gen_rtx_MEM (DImode,
14685 gen_rtx_PRE_DEC (DImode,
14686 stack_pointer_rtx)),
14687 operand));
14688 break;
14689 default:
14690 abort ();
14691 }
14692 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14693 }
14694 else
14695 {
14696 switch (mode)
14697 {
14698 case DImode:
14699 {
14700 rtx operands[2];
14701 split_di (&operand, 1, operands, operands + 1);
14702 emit_insn (
14703 gen_rtx_SET (VOIDmode,
14704 gen_rtx_MEM (SImode,
14705 gen_rtx_PRE_DEC (Pmode,
14706 stack_pointer_rtx)),
14707 operands[1]));
14708 emit_insn (
14709 gen_rtx_SET (VOIDmode,
14710 gen_rtx_MEM (SImode,
14711 gen_rtx_PRE_DEC (Pmode,
14712 stack_pointer_rtx)),
14713 operands[0]));
14714 }
14715 break;
14716 case HImode:
14717 /* It is better to store HImodes as SImodes. */
14718 if (!TARGET_PARTIAL_REG_STALL)
14719 operand = gen_lowpart (SImode, operand);
14720 /* FALLTHRU */
14721 case SImode:
14722 emit_insn (
14723 gen_rtx_SET (VOIDmode,
14724 gen_rtx_MEM (GET_MODE (operand),
14725 gen_rtx_PRE_DEC (SImode,
14726 stack_pointer_rtx)),
14727 operand));
14728 break;
14729 default:
14730 abort ();
14731 }
14732 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14733 }
14734 return result;
14735 }
14736
14737 /* Free operand from the memory. */
14738 void
ix86_free_from_memory(enum machine_mode mode)14739 ix86_free_from_memory (enum machine_mode mode)
14740 {
14741 if (!TARGET_RED_ZONE)
14742 {
14743 int size;
14744
14745 if (mode == DImode || TARGET_64BIT)
14746 size = 8;
14747 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14748 size = 2;
14749 else
14750 size = 4;
14751 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14752 to pop or add instruction if registers are available. */
14753 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14754 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14755 GEN_INT (size))));
14756 }
14757 }
14758
14759 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14760 QImode must go into class Q_REGS.
14761 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14762 movdf to do mem-to-mem moves through integer regs. */
14763 enum reg_class
ix86_preferred_reload_class(rtx x,enum reg_class class)14764 ix86_preferred_reload_class (rtx x, enum reg_class class)
14765 {
14766 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14767 return NO_REGS;
14768 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14769 {
14770 /* SSE can't load any constant directly yet. */
14771 if (SSE_CLASS_P (class))
14772 return NO_REGS;
14773 /* Floats can load 0 and 1. */
14774 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14775 {
14776 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14777 if (MAYBE_SSE_CLASS_P (class))
14778 return (reg_class_subset_p (class, GENERAL_REGS)
14779 ? GENERAL_REGS : FLOAT_REGS);
14780 else
14781 return class;
14782 }
14783 /* General regs can load everything. */
14784 if (reg_class_subset_p (class, GENERAL_REGS))
14785 return GENERAL_REGS;
14786 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14787 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14788 return NO_REGS;
14789 }
14790 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14791 return NO_REGS;
14792 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14793 return Q_REGS;
14794 return class;
14795 }
14796
14797 /* If we are copying between general and FP registers, we need a memory
14798 location. The same is true for SSE and MMX registers.
14799
14800 The macro can't work reliably when one of the CLASSES is class containing
14801 registers from multiple units (SSE, MMX, integer). We avoid this by never
14802 combining those units in single alternative in the machine description.
14803 Ensure that this constraint holds to avoid unexpected surprises.
14804
14805 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14806 enforce these sanity checks. */
14807 int
ix86_secondary_memory_needed(enum reg_class class1,enum reg_class class2,enum machine_mode mode,int strict)14808 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14809 enum machine_mode mode, int strict)
14810 {
14811 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14812 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14813 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14814 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14815 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14816 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14817 {
14818 if (strict)
14819 abort ();
14820 else
14821 return 1;
14822 }
14823 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14824 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14825 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14826 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14827 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
14828 }
14829 /* Return the cost of moving data from a register in class CLASS1 to
14830 one in class CLASS2.
14831
14832 It is not required that the cost always equal 2 when FROM is the same as TO;
14833 on some machines it is expensive to move between registers if they are not
14834 general registers. */
14835 int
ix86_register_move_cost(enum machine_mode mode,enum reg_class class1,enum reg_class class2)14836 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
14837 enum reg_class class2)
14838 {
14839 /* In case we require secondary memory, compute cost of the store followed
14840 by load. In order to avoid bad register allocation choices, we need
14841 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14842
14843 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14844 {
14845 int cost = 1;
14846
14847 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14848 MEMORY_MOVE_COST (mode, class1, 1));
14849 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14850 MEMORY_MOVE_COST (mode, class2, 1));
14851
14852 /* In case of copying from general_purpose_register we may emit multiple
14853 stores followed by single load causing memory size mismatch stall.
14854 Count this as arbitrarily high cost of 20. */
14855 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14856 cost += 20;
14857
14858 /* In the case of FP/MMX moves, the registers actually overlap, and we
14859 have to switch modes in order to treat them differently. */
14860 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14861 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14862 cost += 20;
14863
14864 return cost;
14865 }
14866
14867 /* Moves between SSE/MMX and integer unit are expensive. */
14868 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14869 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14870 return ix86_cost->mmxsse_to_integer;
14871 if (MAYBE_FLOAT_CLASS_P (class1))
14872 return ix86_cost->fp_move;
14873 if (MAYBE_SSE_CLASS_P (class1))
14874 return ix86_cost->sse_move;
14875 if (MAYBE_MMX_CLASS_P (class1))
14876 return ix86_cost->mmx_move;
14877 return 2;
14878 }
14879
14880 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14881 int
ix86_hard_regno_mode_ok(int regno,enum machine_mode mode)14882 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
14883 {
14884 /* Flags and only flags can only hold CCmode values. */
14885 if (CC_REGNO_P (regno))
14886 return GET_MODE_CLASS (mode) == MODE_CC;
14887 if (GET_MODE_CLASS (mode) == MODE_CC
14888 || GET_MODE_CLASS (mode) == MODE_RANDOM
14889 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14890 return 0;
14891 if (FP_REGNO_P (regno))
14892 return VALID_FP_MODE_P (mode);
14893 if (SSE_REGNO_P (regno))
14894 return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
14895 if (MMX_REGNO_P (regno))
14896 return (TARGET_MMX
14897 ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
14898 /* We handle both integer and floats in the general purpose registers.
14899 In future we should be able to handle vector modes as well. */
14900 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14901 return 0;
14902 /* Take care for QImode values - they can be in non-QI regs, but then
14903 they do cause partial register stalls. */
14904 if (regno < 4 || mode != QImode || TARGET_64BIT)
14905 return 1;
14906 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14907 }
14908
14909 /* Return the cost of moving data of mode M between a
14910 register and memory. A value of 2 is the default; this cost is
14911 relative to those in `REGISTER_MOVE_COST'.
14912
14913 If moving between registers and memory is more expensive than
14914 between two registers, you should define this macro to express the
14915 relative cost.
14916
14917 Model also increased moving costs of QImode registers in non
14918 Q_REGS classes.
14919 */
14920 int
ix86_memory_move_cost(enum machine_mode mode,enum reg_class class,int in)14921 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
14922 {
14923 if (FLOAT_CLASS_P (class))
14924 {
14925 int index;
14926 switch (mode)
14927 {
14928 case SFmode:
14929 index = 0;
14930 break;
14931 case DFmode:
14932 index = 1;
14933 break;
14934 case XFmode:
14935 index = 2;
14936 break;
14937 default:
14938 return 100;
14939 }
14940 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14941 }
14942 if (SSE_CLASS_P (class))
14943 {
14944 int index;
14945 switch (GET_MODE_SIZE (mode))
14946 {
14947 case 4:
14948 index = 0;
14949 break;
14950 case 8:
14951 index = 1;
14952 break;
14953 case 16:
14954 index = 2;
14955 break;
14956 default:
14957 return 100;
14958 }
14959 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14960 }
14961 if (MMX_CLASS_P (class))
14962 {
14963 int index;
14964 switch (GET_MODE_SIZE (mode))
14965 {
14966 case 4:
14967 index = 0;
14968 break;
14969 case 8:
14970 index = 1;
14971 break;
14972 default:
14973 return 100;
14974 }
14975 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14976 }
14977 switch (GET_MODE_SIZE (mode))
14978 {
14979 case 1:
14980 if (in)
14981 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14982 : ix86_cost->movzbl_load);
14983 else
14984 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14985 : ix86_cost->int_store[0] + 4);
14986 break;
14987 case 2:
14988 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14989 default:
14990 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14991 if (mode == TFmode)
14992 mode = XFmode;
14993 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14994 * (((int) GET_MODE_SIZE (mode)
14995 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
14996 }
14997 }
14998
14999 /* Compute a (partial) cost for rtx X. Return true if the complete
15000 cost has been computed, and false if subexpressions should be
15001 scanned. In either case, *TOTAL contains the cost result. */
15002
15003 static bool
ix86_rtx_costs(rtx x,int code,int outer_code,int * total)15004 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
15005 {
15006 enum machine_mode mode = GET_MODE (x);
15007
15008 switch (code)
15009 {
15010 case CONST_INT:
15011 case CONST:
15012 case LABEL_REF:
15013 case SYMBOL_REF:
15014 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
15015 *total = 3;
15016 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
15017 *total = 2;
15018 else if (flag_pic && SYMBOLIC_CONST (x)
15019 && (!TARGET_64BIT
15020 || (!GET_CODE (x) != LABEL_REF
15021 && (GET_CODE (x) != SYMBOL_REF
15022 || !SYMBOL_REF_LOCAL_P (x)))))
15023 *total = 1;
15024 else
15025 *total = 0;
15026 return true;
15027
15028 case CONST_DOUBLE:
15029 if (mode == VOIDmode)
15030 *total = 0;
15031 else
15032 switch (standard_80387_constant_p (x))
15033 {
15034 case 1: /* 0.0 */
15035 *total = 1;
15036 break;
15037 default: /* Other constants */
15038 *total = 2;
15039 break;
15040 case 0:
15041 case -1:
15042 /* Start with (MEM (SYMBOL_REF)), since that's where
15043 it'll probably end up. Add a penalty for size. */
15044 *total = (COSTS_N_INSNS (1)
15045 + (flag_pic != 0 && !TARGET_64BIT)
15046 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
15047 break;
15048 }
15049 return true;
15050
15051 case ZERO_EXTEND:
15052 /* The zero extensions is often completely free on x86_64, so make
15053 it as cheap as possible. */
15054 if (TARGET_64BIT && mode == DImode
15055 && GET_MODE (XEXP (x, 0)) == SImode)
15056 *total = 1;
15057 else if (TARGET_ZERO_EXTEND_WITH_AND)
15058 *total = COSTS_N_INSNS (ix86_cost->add);
15059 else
15060 *total = COSTS_N_INSNS (ix86_cost->movzx);
15061 return false;
15062
15063 case SIGN_EXTEND:
15064 *total = COSTS_N_INSNS (ix86_cost->movsx);
15065 return false;
15066
15067 case ASHIFT:
15068 if (GET_CODE (XEXP (x, 1)) == CONST_INT
15069 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
15070 {
15071 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15072 if (value == 1)
15073 {
15074 *total = COSTS_N_INSNS (ix86_cost->add);
15075 return false;
15076 }
15077 if ((value == 2 || value == 3)
15078 && !TARGET_DECOMPOSE_LEA
15079 && ix86_cost->lea <= ix86_cost->shift_const)
15080 {
15081 *total = COSTS_N_INSNS (ix86_cost->lea);
15082 return false;
15083 }
15084 }
15085 /* FALLTHRU */
15086
15087 case ROTATE:
15088 case ASHIFTRT:
15089 case LSHIFTRT:
15090 case ROTATERT:
15091 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
15092 {
15093 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15094 {
15095 if (INTVAL (XEXP (x, 1)) > 32)
15096 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
15097 else
15098 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
15099 }
15100 else
15101 {
15102 if (GET_CODE (XEXP (x, 1)) == AND)
15103 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
15104 else
15105 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
15106 }
15107 }
15108 else
15109 {
15110 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15111 *total = COSTS_N_INSNS (ix86_cost->shift_const);
15112 else
15113 *total = COSTS_N_INSNS (ix86_cost->shift_var);
15114 }
15115 return false;
15116
15117 case MULT:
15118 if (FLOAT_MODE_P (mode))
15119 *total = COSTS_N_INSNS (ix86_cost->fmul);
15120 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15121 {
15122 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15123 int nbits;
15124
15125 for (nbits = 0; value != 0; value >>= 1)
15126 nbits++;
15127
15128 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15129 + nbits * ix86_cost->mult_bit);
15130 }
15131 else
15132 {
15133 /* This is arbitrary */
15134 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15135 + 7 * ix86_cost->mult_bit);
15136 }
15137 return false;
15138
15139 case DIV:
15140 case UDIV:
15141 case MOD:
15142 case UMOD:
15143 if (FLOAT_MODE_P (mode))
15144 *total = COSTS_N_INSNS (ix86_cost->fdiv);
15145 else
15146 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
15147 return false;
15148
15149 case PLUS:
15150 if (FLOAT_MODE_P (mode))
15151 *total = COSTS_N_INSNS (ix86_cost->fadd);
15152 else if (!TARGET_DECOMPOSE_LEA
15153 && GET_MODE_CLASS (mode) == MODE_INT
15154 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15155 {
15156 if (GET_CODE (XEXP (x, 0)) == PLUS
15157 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15158 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15159 && CONSTANT_P (XEXP (x, 1)))
15160 {
15161 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15162 if (val == 2 || val == 4 || val == 8)
15163 {
15164 *total = COSTS_N_INSNS (ix86_cost->lea);
15165 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15166 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15167 outer_code);
15168 *total += rtx_cost (XEXP (x, 1), outer_code);
15169 return true;
15170 }
15171 }
15172 else if (GET_CODE (XEXP (x, 0)) == MULT
15173 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15174 {
15175 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15176 if (val == 2 || val == 4 || val == 8)
15177 {
15178 *total = COSTS_N_INSNS (ix86_cost->lea);
15179 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15180 *total += rtx_cost (XEXP (x, 1), outer_code);
15181 return true;
15182 }
15183 }
15184 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15185 {
15186 *total = COSTS_N_INSNS (ix86_cost->lea);
15187 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15188 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15189 *total += rtx_cost (XEXP (x, 1), outer_code);
15190 return true;
15191 }
15192 }
15193 /* FALLTHRU */
15194
15195 case MINUS:
15196 if (FLOAT_MODE_P (mode))
15197 {
15198 *total = COSTS_N_INSNS (ix86_cost->fadd);
15199 return false;
15200 }
15201 /* FALLTHRU */
15202
15203 case AND:
15204 case IOR:
15205 case XOR:
15206 if (!TARGET_64BIT && mode == DImode)
15207 {
15208 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15209 + (rtx_cost (XEXP (x, 0), outer_code)
15210 << (GET_MODE (XEXP (x, 0)) != DImode))
15211 + (rtx_cost (XEXP (x, 1), outer_code)
15212 << (GET_MODE (XEXP (x, 1)) != DImode)));
15213 return true;
15214 }
15215 /* FALLTHRU */
15216
15217 case NEG:
15218 if (FLOAT_MODE_P (mode))
15219 {
15220 *total = COSTS_N_INSNS (ix86_cost->fchs);
15221 return false;
15222 }
15223 /* FALLTHRU */
15224
15225 case NOT:
15226 if (!TARGET_64BIT && mode == DImode)
15227 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15228 else
15229 *total = COSTS_N_INSNS (ix86_cost->add);
15230 return false;
15231
15232 case FLOAT_EXTEND:
15233 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
15234 *total = 0;
15235 return false;
15236
15237 case ABS:
15238 if (FLOAT_MODE_P (mode))
15239 *total = COSTS_N_INSNS (ix86_cost->fabs);
15240 return false;
15241
15242 case SQRT:
15243 if (FLOAT_MODE_P (mode))
15244 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15245 return false;
15246
15247 case UNSPEC:
15248 if (XINT (x, 1) == UNSPEC_TP)
15249 *total = 0;
15250 return false;
15251
15252 default:
15253 return false;
15254 }
15255 }
15256
15257 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
15258 static void
ix86_svr3_asm_out_constructor(rtx symbol,int priority ATTRIBUTE_UNUSED)15259 ix86_svr3_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
15260 {
15261 init_section ();
15262 fputs ("\tpushl $", asm_out_file);
15263 assemble_name (asm_out_file, XSTR (symbol, 0));
15264 fputc ('\n', asm_out_file);
15265 }
15266 #endif
15267
15268 #if TARGET_MACHO
15269
15270 static int current_machopic_label_num;
15271
15272 /* Given a symbol name and its associated stub, write out the
15273 definition of the stub. */
15274
15275 void
machopic_output_stub(FILE * file,const char * symb,const char * stub)15276 machopic_output_stub (FILE *file, const char *symb, const char *stub)
15277 {
15278 unsigned int length;
15279 char *binder_name, *symbol_name, lazy_ptr_name[32];
15280 int label = ++current_machopic_label_num;
15281
15282 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15283 symb = (*targetm.strip_name_encoding) (symb);
15284
15285 length = strlen (stub);
15286 binder_name = alloca (length + 32);
15287 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15288
15289 length = strlen (symb);
15290 symbol_name = alloca (length + 32);
15291 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15292
15293 sprintf (lazy_ptr_name, "L%d$lz", label);
15294
15295 if (MACHOPIC_PURE)
15296 machopic_picsymbol_stub_section ();
15297 else
15298 machopic_symbol_stub_section ();
15299
15300 fprintf (file, "%s:\n", stub);
15301 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15302
15303 if (MACHOPIC_PURE)
15304 {
15305 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15306 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15307 fprintf (file, "\tjmp %%edx\n");
15308 }
15309 else
15310 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
15311
15312 fprintf (file, "%s:\n", binder_name);
15313
15314 if (MACHOPIC_PURE)
15315 {
15316 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15317 fprintf (file, "\tpushl %%eax\n");
15318 }
15319 else
15320 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15321
15322 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15323
15324 machopic_lazy_symbol_ptr_section ();
15325 fprintf (file, "%s:\n", lazy_ptr_name);
15326 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15327 fprintf (file, "\t.long %s\n", binder_name);
15328 }
15329 #endif /* TARGET_MACHO */
15330
15331 /* Order the registers for register allocator. */
15332
15333 void
x86_order_regs_for_local_alloc(void)15334 x86_order_regs_for_local_alloc (void)
15335 {
15336 int pos = 0;
15337 int i;
15338
15339 /* First allocate the local general purpose registers. */
15340 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15341 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15342 reg_alloc_order [pos++] = i;
15343
15344 /* Global general purpose registers. */
15345 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15346 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15347 reg_alloc_order [pos++] = i;
15348
15349 /* x87 registers come first in case we are doing FP math
15350 using them. */
15351 if (!TARGET_SSE_MATH)
15352 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15353 reg_alloc_order [pos++] = i;
15354
15355 /* SSE registers. */
15356 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15357 reg_alloc_order [pos++] = i;
15358 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15359 reg_alloc_order [pos++] = i;
15360
15361 /* x87 registers. */
15362 if (TARGET_SSE_MATH)
15363 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15364 reg_alloc_order [pos++] = i;
15365
15366 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15367 reg_alloc_order [pos++] = i;
15368
15369 /* Initialize the rest of array as we do not allocate some registers
15370 at all. */
15371 while (pos < FIRST_PSEUDO_REGISTER)
15372 reg_alloc_order [pos++] = 0;
15373 }
15374
15375 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15376 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
15377 #endif
15378
15379 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15380 struct attribute_spec.handler. */
15381 static tree
ix86_handle_struct_attribute(tree * node,tree name,tree args ATTRIBUTE_UNUSED,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)15382 ix86_handle_struct_attribute (tree *node, tree name,
15383 tree args ATTRIBUTE_UNUSED,
15384 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
15385 {
15386 tree *type = NULL;
15387 if (DECL_P (*node))
15388 {
15389 if (TREE_CODE (*node) == TYPE_DECL)
15390 type = &TREE_TYPE (*node);
15391 }
15392 else
15393 type = node;
15394
15395 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15396 || TREE_CODE (*type) == UNION_TYPE)))
15397 {
15398 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
15399 *no_add_attrs = true;
15400 }
15401
15402 else if ((is_attribute_p ("ms_struct", name)
15403 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15404 || ((is_attribute_p ("gcc_struct", name)
15405 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15406 {
15407 warning ("`%s' incompatible attribute ignored",
15408 IDENTIFIER_POINTER (name));
15409 *no_add_attrs = true;
15410 }
15411
15412 return NULL_TREE;
15413 }
15414
15415 static bool
ix86_ms_bitfield_layout_p(tree record_type)15416 ix86_ms_bitfield_layout_p (tree record_type)
15417 {
15418 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15419 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15420 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15421 }
15422
15423 /* Returns an expression indicating where the this parameter is
15424 located on entry to the FUNCTION. */
15425
15426 static rtx
x86_this_parameter(tree function)15427 x86_this_parameter (tree function)
15428 {
15429 tree type = TREE_TYPE (function);
15430
15431 if (TARGET_64BIT)
15432 {
15433 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
15434 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15435 }
15436
15437 if (ix86_function_regparm (type, function) > 0)
15438 {
15439 tree parm;
15440
15441 parm = TYPE_ARG_TYPES (type);
15442 /* Figure out whether or not the function has a variable number of
15443 arguments. */
15444 for (; parm; parm = TREE_CHAIN (parm))
15445 if (TREE_VALUE (parm) == void_type_node)
15446 break;
15447 /* If not, the this parameter is in the first argument. */
15448 if (parm)
15449 {
15450 int regno = 0;
15451 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
15452 regno = 2;
15453 return gen_rtx_REG (SImode, regno);
15454 }
15455 }
15456
15457 if (aggregate_value_p (TREE_TYPE (type), type))
15458 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15459 else
15460 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15461 }
15462
15463 /* Determine whether x86_output_mi_thunk can succeed. */
15464
15465 static bool
x86_can_output_mi_thunk(tree thunk ATTRIBUTE_UNUSED,HOST_WIDE_INT delta ATTRIBUTE_UNUSED,HOST_WIDE_INT vcall_offset,tree function)15466 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
15467 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
15468 HOST_WIDE_INT vcall_offset, tree function)
15469 {
15470 /* 64-bit can handle anything. */
15471 if (TARGET_64BIT)
15472 return true;
15473
15474 /* For 32-bit, everything's fine if we have one free register. */
15475 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
15476 return true;
15477
15478 /* Need a free register for vcall_offset. */
15479 if (vcall_offset)
15480 return false;
15481
15482 /* Need a free register for GOT references. */
15483 if (flag_pic && !(*targetm.binds_local_p) (function))
15484 return false;
15485
15486 /* Otherwise ok. */
15487 return true;
15488 }
15489
15490 /* Output the assembler code for a thunk function. THUNK_DECL is the
15491 declaration for the thunk function itself, FUNCTION is the decl for
15492 the target function. DELTA is an immediate constant offset to be
15493 added to THIS. If VCALL_OFFSET is nonzero, the word at
15494 *(*this + vcall_offset) should be added to THIS. */
15495
15496 static void
x86_output_mi_thunk(FILE * file ATTRIBUTE_UNUSED,tree thunk ATTRIBUTE_UNUSED,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)15497 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
15498 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
15499 HOST_WIDE_INT vcall_offset, tree function)
15500 {
15501 rtx xops[3];
15502 rtx this = x86_this_parameter (function);
15503 rtx this_reg, tmp;
15504
15505 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15506 pull it in now and let DELTA benefit. */
15507 if (REG_P (this))
15508 this_reg = this;
15509 else if (vcall_offset)
15510 {
15511 /* Put the this parameter into %eax. */
15512 xops[0] = this;
15513 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15514 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15515 }
15516 else
15517 this_reg = NULL_RTX;
15518
15519 /* Adjust the this parameter by a fixed constant. */
15520 if (delta)
15521 {
15522 xops[0] = GEN_INT (delta);
15523 xops[1] = this_reg ? this_reg : this;
15524 if (TARGET_64BIT)
15525 {
15526 if (!x86_64_general_operand (xops[0], DImode))
15527 {
15528 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15529 xops[1] = tmp;
15530 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15531 xops[0] = tmp;
15532 xops[1] = this;
15533 }
15534 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15535 }
15536 else
15537 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15538 }
15539
15540 /* Adjust the this parameter by a value stored in the vtable. */
15541 if (vcall_offset)
15542 {
15543 if (TARGET_64BIT)
15544 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15545 else
15546 {
15547 int tmp_regno = 2 /* ECX */;
15548 if (lookup_attribute ("fastcall",
15549 TYPE_ATTRIBUTES (TREE_TYPE (function))))
15550 tmp_regno = 0 /* EAX */;
15551 tmp = gen_rtx_REG (SImode, tmp_regno);
15552 }
15553
15554 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15555 xops[1] = tmp;
15556 if (TARGET_64BIT)
15557 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15558 else
15559 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15560
15561 /* Adjust the this parameter. */
15562 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15563 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15564 {
15565 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15566 xops[0] = GEN_INT (vcall_offset);
15567 xops[1] = tmp2;
15568 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15569 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15570 }
15571 xops[1] = this_reg;
15572 if (TARGET_64BIT)
15573 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15574 else
15575 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15576 }
15577
15578 /* If necessary, drop THIS back to its stack slot. */
15579 if (this_reg && this_reg != this)
15580 {
15581 xops[0] = this_reg;
15582 xops[1] = this;
15583 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15584 }
15585
15586 xops[0] = XEXP (DECL_RTL (function), 0);
15587 if (TARGET_64BIT)
15588 {
15589 if (!flag_pic || (*targetm.binds_local_p) (function))
15590 output_asm_insn ("jmp\t%P0", xops);
15591 else
15592 {
15593 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
15594 tmp = gen_rtx_CONST (Pmode, tmp);
15595 tmp = gen_rtx_MEM (QImode, tmp);
15596 xops[0] = tmp;
15597 output_asm_insn ("jmp\t%A0", xops);
15598 }
15599 }
15600 else
15601 {
15602 if (!flag_pic || (*targetm.binds_local_p) (function))
15603 output_asm_insn ("jmp\t%P0", xops);
15604 else
15605 #if TARGET_MACHO
15606 if (TARGET_MACHO)
15607 {
15608 const char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
15609 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15610 tmp = gen_rtx_MEM (QImode, tmp);
15611 xops[0] = tmp;
15612 output_asm_insn ("jmp\t%0", xops);
15613 }
15614 else
15615 #endif /* TARGET_MACHO */
15616 {
15617 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15618 output_set_got (tmp);
15619
15620 xops[1] = tmp;
15621 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15622 output_asm_insn ("jmp\t{*}%1", xops);
15623 }
15624 }
15625 }
15626
15627 static void
x86_file_start(void)15628 x86_file_start (void)
15629 {
15630 default_file_start ();
15631 if (X86_FILE_START_VERSION_DIRECTIVE)
15632 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15633 if (X86_FILE_START_FLTUSED)
15634 fputs ("\t.global\t__fltused\n", asm_out_file);
15635 if (ix86_asm_dialect == ASM_INTEL)
15636 fputs ("\t.intel_syntax\n", asm_out_file);
15637 }
15638
15639 int
x86_field_alignment(tree field,int computed)15640 x86_field_alignment (tree field, int computed)
15641 {
15642 enum machine_mode mode;
15643 tree type = TREE_TYPE (field);
15644
15645 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15646 return computed;
15647 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15648 ? get_inner_array_type (type) : type);
15649 if (mode == DFmode || mode == DCmode
15650 || GET_MODE_CLASS (mode) == MODE_INT
15651 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15652 return MIN (32, computed);
15653 return computed;
15654 }
15655
15656 /* Output assembler code to FILE to increment profiler label # LABELNO
15657 for profiling a function entry. */
15658 void
x86_function_profiler(FILE * file,int labelno ATTRIBUTE_UNUSED)15659 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
15660 {
15661 if (TARGET_64BIT)
15662 if (flag_pic)
15663 {
15664 #ifndef NO_PROFILE_COUNTERS
15665 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15666 #endif
15667 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15668 }
15669 else
15670 {
15671 #ifndef NO_PROFILE_COUNTERS
15672 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15673 #endif
15674 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15675 }
15676 else if (flag_pic)
15677 {
15678 #ifndef NO_PROFILE_COUNTERS
15679 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15680 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15681 #endif
15682 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15683 }
15684 else
15685 {
15686 #ifndef NO_PROFILE_COUNTERS
15687 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
15688 PROFILE_COUNT_REGISTER);
15689 #endif
15690 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15691 }
15692 }
15693
15694 /* We don't have exact information about the insn sizes, but we may assume
15695 quite safely that we are informed about all 1 byte insns and memory
15696 address sizes. This is enough to eliminate unnecessary padding in
15697 99% of cases. */
15698
15699 static int
min_insn_size(rtx insn)15700 min_insn_size (rtx insn)
15701 {
15702 int l = 0;
15703
15704 if (!INSN_P (insn) || !active_insn_p (insn))
15705 return 0;
15706
15707 /* Discard alignments we've emit and jump instructions. */
15708 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15709 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15710 return 0;
15711 if (GET_CODE (insn) == JUMP_INSN
15712 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15713 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15714 return 0;
15715
15716 /* Important case - calls are always 5 bytes.
15717 It is common to have many calls in the row. */
15718 if (GET_CODE (insn) == CALL_INSN
15719 && symbolic_reference_mentioned_p (PATTERN (insn))
15720 && !SIBLING_CALL_P (insn))
15721 return 5;
15722 if (get_attr_length (insn) <= 1)
15723 return 1;
15724
15725 /* For normal instructions we may rely on the sizes of addresses
15726 and the presence of symbol to require 4 bytes of encoding.
15727 This is not the case for jumps where references are PC relative. */
15728 if (GET_CODE (insn) != JUMP_INSN)
15729 {
15730 l = get_attr_length_address (insn);
15731 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15732 l = 4;
15733 }
15734 if (l)
15735 return 1+l;
15736 else
15737 return 2;
15738 }
15739
15740 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
15741 window. */
15742
15743 static void
k8_avoid_jump_misspredicts(void)15744 k8_avoid_jump_misspredicts (void)
15745 {
15746 rtx insn, start = get_insns ();
15747 int nbytes = 0, njumps = 0;
15748 int isjump = 0;
15749
15750 /* Look for all minimal intervals of instructions containing 4 jumps.
15751 The intervals are bounded by START and INSN. NBYTES is the total
15752 size of instructions in the interval including INSN and not including
15753 START. When the NBYTES is smaller than 16 bytes, it is possible
15754 that the end of START and INSN ends up in the same 16byte page.
15755
15756 The smallest offset in the page INSN can start is the case where START
15757 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15758 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15759 */
15760 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15761 {
15762
15763 nbytes += min_insn_size (insn);
15764 if (rtl_dump_file)
15765 fprintf(rtl_dump_file, "Insn %i estimated to %i bytes\n",
15766 INSN_UID (insn), min_insn_size (insn));
15767 if ((GET_CODE (insn) == JUMP_INSN
15768 && GET_CODE (PATTERN (insn)) != ADDR_VEC
15769 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15770 || GET_CODE (insn) == CALL_INSN)
15771 njumps++;
15772 else
15773 continue;
15774
15775 while (njumps > 3)
15776 {
15777 start = NEXT_INSN (start);
15778 if ((GET_CODE (start) == JUMP_INSN
15779 && GET_CODE (PATTERN (start)) != ADDR_VEC
15780 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15781 || GET_CODE (start) == CALL_INSN)
15782 njumps--, isjump = 1;
15783 else
15784 isjump = 0;
15785 nbytes -= min_insn_size (start);
15786 }
15787 if (njumps < 0)
15788 abort ();
15789 if (rtl_dump_file)
15790 fprintf(rtl_dump_file, "Interval %i to %i has %i bytes\n",
15791 INSN_UID (start), INSN_UID (insn), nbytes);
15792
15793 if (njumps == 3 && isjump && nbytes < 16)
15794 {
15795 int padsize = 15 - nbytes + min_insn_size (insn);
15796
15797 if (rtl_dump_file)
15798 fprintf (rtl_dump_file, "Padding insn %i by %i bytes!\n", INSN_UID (insn), padsize);
15799 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15800 }
15801 }
15802 }
15803
15804 /* Implement machine specific optimizations.
15805 At the moment we implement single transformation: AMD Athlon works faster
15806 when RET is not destination of conditional jump or directly preceded
15807 by other jump instruction. We avoid the penalty by inserting NOP just
15808 before the RET instructions in such cases. */
15809 static void
ix86_reorg(void)15810 ix86_reorg (void)
15811 {
15812 edge e;
15813
15814 if (!TARGET_ATHLON_K8 || !optimize || optimize_size)
15815 return;
15816 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15817 {
15818 basic_block bb = e->src;
15819 rtx ret = BB_END (bb);
15820 rtx prev;
15821 bool replace = false;
15822
15823 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15824 || !maybe_hot_bb_p (bb))
15825 continue;
15826 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15827 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15828 break;
15829 if (prev && GET_CODE (prev) == CODE_LABEL)
15830 {
15831 edge e;
15832 for (e = bb->pred; e; e = e->pred_next)
15833 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15834 && !(e->flags & EDGE_FALLTHRU))
15835 replace = true;
15836 }
15837 if (!replace)
15838 {
15839 prev = prev_active_insn (ret);
15840 if (prev
15841 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15842 || GET_CODE (prev) == CALL_INSN))
15843 replace = true;
15844 /* Empty functions get branch mispredict even when the jump destination
15845 is not visible to us. */
15846 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15847 replace = true;
15848 }
15849 if (replace)
15850 {
15851 emit_insn_before (gen_return_internal_long (), ret);
15852 delete_insn (ret);
15853 }
15854 }
15855 k8_avoid_jump_misspredicts ();
15856 }
15857
15858 /* Return nonzero when QImode register that must be represented via REX prefix
15859 is used. */
15860 bool
x86_extended_QIreg_mentioned_p(rtx insn)15861 x86_extended_QIreg_mentioned_p (rtx insn)
15862 {
15863 int i;
15864 extract_insn_cached (insn);
15865 for (i = 0; i < recog_data.n_operands; i++)
15866 if (REG_P (recog_data.operand[i])
15867 && REGNO (recog_data.operand[i]) >= 4)
15868 return true;
15869 return false;
15870 }
15871
15872 /* Return nonzero when P points to register encoded via REX prefix.
15873 Called via for_each_rtx. */
15874 static int
extended_reg_mentioned_1(rtx * p,void * data ATTRIBUTE_UNUSED)15875 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
15876 {
15877 unsigned int regno;
15878 if (!REG_P (*p))
15879 return 0;
15880 regno = REGNO (*p);
15881 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15882 }
15883
15884 /* Return true when INSN mentions register that must be encoded using REX
15885 prefix. */
15886 bool
x86_extended_reg_mentioned_p(rtx insn)15887 x86_extended_reg_mentioned_p (rtx insn)
15888 {
15889 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15890 }
15891
15892 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
15893 optabs would emit if we didn't have TFmode patterns. */
15894
15895 void
x86_emit_floatuns(rtx operands[2])15896 x86_emit_floatuns (rtx operands[2])
15897 {
15898 rtx neglab, donelab, i0, i1, f0, in, out;
15899 enum machine_mode mode, inmode;
15900
15901 inmode = GET_MODE (operands[1]);
15902 if (inmode != SImode
15903 && inmode != DImode)
15904 abort ();
15905
15906 out = operands[0];
15907 in = force_reg (inmode, operands[1]);
15908 mode = GET_MODE (out);
15909 neglab = gen_label_rtx ();
15910 donelab = gen_label_rtx ();
15911 i1 = gen_reg_rtx (Pmode);
15912 f0 = gen_reg_rtx (mode);
15913
15914 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15915
15916 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15917 emit_jump_insn (gen_jump (donelab));
15918 emit_barrier ();
15919
15920 emit_label (neglab);
15921
15922 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15923 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15924 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15925 expand_float (f0, i0, 0);
15926 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15927
15928 emit_label (donelab);
15929 }
15930
15931 /* Return if we do not know how to pass TYPE solely in registers. */
15932 bool
ix86_must_pass_in_stack(enum machine_mode mode,tree type)15933 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
15934 {
15935 if (default_must_pass_in_stack (mode, type))
15936 return true;
15937 return (!TARGET_64BIT && type && mode == TImode);
15938 }
15939
15940 /* Initialize vector TARGET via VALS. */
15941 void
ix86_expand_vector_init(rtx target,rtx vals)15942 ix86_expand_vector_init (rtx target, rtx vals)
15943 {
15944 enum machine_mode mode = GET_MODE (target);
15945 int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode));
15946 int n_elts = (GET_MODE_SIZE (mode) / elt_size);
15947 int i;
15948
15949 for (i = n_elts - 1; i >= 0; i--)
15950 if (GET_CODE (XVECEXP (vals, 0, i)) != CONST_INT
15951 && GET_CODE (XVECEXP (vals, 0, i)) != CONST_DOUBLE)
15952 break;
15953
15954 /* Few special cases first...
15955 ... constants are best loaded from constant pool. */
15956 if (i < 0)
15957 {
15958 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15959 return;
15960 }
15961
15962 /* ... values where only first field is non-constant are best loaded
15963 from the pool and overwriten via move later. */
15964 if (!i)
15965 {
15966 rtx op = simplify_gen_subreg (mode, XVECEXP (vals, 0, 0),
15967 GET_MODE_INNER (mode), 0);
15968
15969 op = force_reg (mode, op);
15970 XVECEXP (vals, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode));
15971 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15972 switch (GET_MODE (target))
15973 {
15974 case V2DFmode:
15975 emit_insn (gen_sse2_movsd (target, target, op));
15976 break;
15977 case V4SFmode:
15978 emit_insn (gen_sse_movss (target, target, op));
15979 break;
15980 default:
15981 break;
15982 }
15983 return;
15984 }
15985
15986 /* And the busy sequence doing rotations. */
15987 switch (GET_MODE (target))
15988 {
15989 case V2DFmode:
15990 {
15991 rtx vecop0 =
15992 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 0), DFmode, 0);
15993 rtx vecop1 =
15994 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 1), DFmode, 0);
15995
15996 vecop0 = force_reg (V2DFmode, vecop0);
15997 vecop1 = force_reg (V2DFmode, vecop1);
15998 emit_insn (gen_sse2_unpcklpd (target, vecop0, vecop1));
15999 }
16000 break;
16001 case V4SFmode:
16002 {
16003 rtx vecop0 =
16004 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 0), SFmode, 0);
16005 rtx vecop1 =
16006 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 1), SFmode, 0);
16007 rtx vecop2 =
16008 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 2), SFmode, 0);
16009 rtx vecop3 =
16010 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 3), SFmode, 0);
16011 rtx tmp1 = gen_reg_rtx (V4SFmode);
16012 rtx tmp2 = gen_reg_rtx (V4SFmode);
16013
16014 vecop0 = force_reg (V4SFmode, vecop0);
16015 vecop1 = force_reg (V4SFmode, vecop1);
16016 vecop2 = force_reg (V4SFmode, vecop2);
16017 vecop3 = force_reg (V4SFmode, vecop3);
16018 emit_insn (gen_sse_unpcklps (tmp1, vecop1, vecop3));
16019 emit_insn (gen_sse_unpcklps (tmp2, vecop0, vecop2));
16020 emit_insn (gen_sse_unpcklps (target, tmp2, tmp1));
16021 }
16022 break;
16023 default:
16024 abort ();
16025 }
16026 }
16027
16028 #include "gt-i386.h"
16029