1 /*
2 * Copyright (c) 2005 - 2010, Nils R. Weller
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 * POSSIBILITY OF SUCH DAMAGE.
26 *
27 * x86 backend
28 * (XXX much of this stuff can probably be adapted to different
29 * architectures)
30 */
31 #include "backend.h"
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <stdarg.h>
35 #include <assert.h>
36 #include <string.h>
37 #include <ctype.h>
38 #include <limits.h>
39 #include "scope.h"
40 #include "decl.h"
41 #include "type.h"
42 #include "decl.h"
43 #include "icode.h"
44 #include "functions.h"
45 #include "control.h"
46 #include "typemap.h"
47 #include "debug.h"
48 #include "token.h"
49 #include "error.h"
50 #include "functions.h"
51 #include "symlist.h"
52 #include "icode.h"
53 #include "stack.h"
54 #include "reg.h"
55 #include "subexpr.h"
56 #include "expr.h"
57 #include "features.h"
58 #include "x87_nonsense.h"
59 /* #include "x86_emit_gas.h" */
60 #include "inlineasm.h"
61 #include "x86_emit_nasm.h"
62 #include "x86_emit_gas.h"
63 #include "amd64_gen.h"
64 #include "amd64_emit_gas.h" /* XXX for SSE */
65 #include "cc1_main.h"
66 #include "n_libc.h"
67
68 static FILE *out;
69 static struct scope *tunit;
70 static int use_nasm = 1; /* XXX */
71 struct emitter_x86 *emit_x86;
72
73 #if ! REMOVE_FLOATBUF
74 struct vreg floatbuf;
75 #endif
76
77 struct vreg x87cw_new;
78 struct vreg x87cw_old;
79
80 static int ebx_saved;
81 static int esi_saved;
82 static int edi_saved;
83 struct vreg csave_ebx;
84 struct vreg csave_esi;
85 struct vreg csave_edi;
86 struct stack_block *saved_ret_addr;
87
88
89 #define N_GPRS 6
90
91 struct reg x86_gprs[7];
92 static struct reg x86_16bit_gprs[6];
93 static struct reg x86_8bit_gprs[8];
94 static struct reg x86_esp;
95 static struct reg x86_ebp;
96 static struct reg x86_esp_16bit;
97 static struct reg x86_ebp_16bit;
98
99 struct reg x86_fprs[8];
100
101 /* 02/09/08: Moved to x86 backend from AMD64 (for OSX) */
102 struct reg x86_sse_regs[8];
103
104 int sse_csave_map[] = {
105 0, 0, 0, 0, 0, 0, 0, 0
106 };
107
108
109
110 static void
init_regs(void)111 init_regs(void) {
112 static struct reg nullreg;
113 int i;
114 static const struct {
115 struct reg *regs;
116 char *names[9];
117 } rps[] = {
118 { x86_gprs,
119 {"eax","ebx","ecx","edx","esi","edi",0,0,0}},
120 { x86_16bit_gprs,
121 {"ax","bx","cx","dx","si","di",0,0,0 }},
122 { x86_8bit_gprs,
123 {"ah","al","bh","bl","ch","cl","dh","dl",NULL}},
124 { x86_fprs,
125 { "st0", "st1", "st2", "st3", "st4", "st5",
126 "st6", "st7", NULL}},
127 { NULL, {0,0,0,0,0,0,0,0,0} }
128 };
129
130 for (i = 0; rps[i].regs != NULL; ++i) {
131 int j;
132 int size = i == 0? 4: i == 1? 2: 1;
133 int type;
134
135 if (rps[i].regs == x86_fprs) {
136 type = REG_FPR;
137 /*
138 * size was for some reason set to 8, with a comment
139 * saying it should be 10, which is factually correct
140 * because those really are 10 bytes big.. but we use
141 * 12 bytes for alignment, and that seems to work
142 */
143 size = 12;
144 } else {
145 type = REG_GPR;
146 }
147
148 nullreg.type = type;
149 nullreg.allocatable = 1;
150 for (j = 0; rps[i].names[j] != NULL; ++j) {
151 rps[i].regs[j] = nullreg;
152 rps[i].regs[j].size = size;
153 rps[i].regs[j].name = rps[i].names[j];
154 }
155 }
156
157 x86_gprs[6].name = NULL;
158
159 for (i = 0; i < 8; ++i) {
160 static char *names[] = {
161 "xmm0", "xmm1", "xmm2", "xmm3",
162 "xmm4", "xmm5", "xmm6", "xmm7",
163 };
164 x86_sse_regs[i].name = names[i];
165 x86_sse_regs[i].type = REG_FPR;
166 x86_sse_regs[i].size = 8; /* XXX */
167 x86_sse_regs[i].allocatable = 1;
168 }
169
170 }
171
172
173 static int
calc_total_refs(struct reg * r)174 calc_total_refs(struct reg *r) {
175 (void) r;
176 return 0;
177 }
178
179 static void
do_invalidate(struct reg * r,struct icode_list * il,int save)180 do_invalidate(struct reg *r, struct icode_list *il, int save) {
181 #if FEAT_DEBUG_DUMP_BOGUS_STORES
182 struct icode_instr *tail = il? il->tail: NULL;
183 #endif
184 if (curfunc->pic_initialized
185 && r == &x86_gprs[1]) {
186 /* ebx is used for PIC access */
187 return;
188 }
189
190 free_preg(r, il, 1, save);
191 #if FEAT_DEBUG_DUMP_BOGUS_STORES
192 if (backend_warn_inv && tail != NULL && tail != il->tail) {
193 icode_make_debug(il, "previous save(s) may be unneeded");
194 }
195 #endif
196 }
197
198 /*
199 * XXX this shouldn't be saving esi/edi/ebx when we're invalidating
200 * because of a function call
201 */
202 static void
invalidate_gprs(struct icode_list * il,int saveregs,int for_fcall)203 invalidate_gprs(struct icode_list *il, int saveregs, int for_fcall) {
204 int i;
205
206 (void) for_fcall;
207 for (i = 0; i < N_GPRS; ++i) {
208 do_invalidate(&x86_gprs[i], il, saveregs);
209 }
210
211 /*
212 * 07/26/12: Dropped incomplete SSE usage check (could yield compiler
213 * crashes)
214 */
215 for (i = 0; i < 8; ++i) {
216 do_invalidate(&x86_sse_regs[i], il, saveregs);
217 }
218 }
219
220 /*
221 * AMD64 & x86
222 */
223 static void
invalidate_except(struct icode_list * il,int save,int for_fcall,...)224 invalidate_except(struct icode_list *il, int save, int for_fcall, ...) {
225 int i;
226 struct reg *except[8];
227 static struct reg *gprset;
228 struct reg *arg;
229 va_list va;
230
231 if (gprset == NULL) {
232 if (backend->arch == ARCH_X86) {
233 gprset = x86_gprs;
234 } else {
235 /* AMD64 */
236 gprset = amd64_x86_gprs;
237 }
238 }
239
240 va_start(va, for_fcall);
241 for (i = 0; (arg = va_arg(va, struct reg *)) != NULL; ++i) {
242 except[i] = arg;
243 }
244 va_end(va);
245 except[i] = NULL;
246
247 for (i = 0; i < N_GPRS; ++i) {
248 int j;
249
250 for (j = 0; except[j] != NULL; ++j) {
251 if (is_member_of_reg(&gprset[i], except[j])) {
252 /*
253 * XXX perhaps we would want to save
254 * part of a GPR in some cases.
255 */
256 break;
257 }
258 }
259 if (except[j] != NULL) {
260 continue;
261 }
262 do_invalidate(&gprset[i], il, save);
263 }
264
265 if (backend->abi == ARCH_AMD64) {
266 for (i = 1; i < 16; ++i) {
267 int j;
268
269 for (j = 0; except[j] != NULL; ++j) {
270 if (&amd64_gprs[i] == except[j]) {
271 break;
272 }
273 }
274 if (except[j] == NULL) {
275 do_invalidate(&amd64_gprs[i], il, save);
276 }
277 }
278 }
279 }
280
281 static int is_noesiedi;
282
283
284 static struct reg *
alloc_16_or_32bit_reg(struct function * f,int size,struct icode_list * il,struct reg * dontwipe)285 alloc_16_or_32bit_reg(
286 struct function *f,
287 int size,
288 struct icode_list *il,
289 struct reg *dontwipe) {
290
291 int i;
292 int save = 0;
293 int least = INT_MAX;
294 int least_idx = -1;
295 static int last_alloc;
296 struct reg *ret = NULL;
297 struct reg *aset;
298 struct reg *topreg = NULL;
299 int old_relax = backend->relax_alloc_gpr_order;
300
301 /*
302 * 05/31/09: Now we always relax the GPR order when allocating
303 * non-ESI/non-EDI registers! This means that we allow this call
304 * to allocate the same register as the last successful call.
305 * This is probably a necessity when generating PIC code, because
306 * that limits us to only 3 registers that are usable for 8bit
307 * and 16bit allocations (ebx is taken as PIC pointer, so only
308 * eax, ecx, edx are allowed).
309 *
310 * If we then have a construct such as
311 *
312 * ptr->member |= ptr2->value;
313 *
314 * ... then that will easily cause two registers to become
315 * unallocatable to hold the pointers, and with PIC ebx is
316 * taken anyway, so there is only one potential register left
317 * which we don't want to filter through the allocation ordering
318 * constraint
319 *
320 * Relaxing the constraint may work if we perform a sequence of:
321 *
322 * - allocating register N
323 * - looking to allocate register M, but finding that it is
324 * already loaded with our desired value, so it can be
325 * skipped
326 * - allocating register N
327 *
328 * In this case, it will seem like we are allocating register N
329 * twice in a row, but there was effectively another allocation
330 * in between. If the first allocation is not needed anymore and
331 * we will work with M and second N, then it will work.
332 *
333 * It will generally if we really only have one register available
334 * but need two at once for an operation.
335 *
336 * XXX Can this happen? Are there any implicit register alloc
337 * ordering assumptions left?
338 */
339 if (is_noesiedi) {
340 backend->relax_alloc_gpr_order = 1;
341 }
342
343 if (backend->arch == ARCH_AMD64) {
344 aset = amd64_x86_gprs;
345 } else {
346 aset = x86_gprs;
347 }
348 (void) f;
349 for (i = 0; x86_gprs[i].name != NULL; ++i) {
350 if (reg_unused( /*&x86_gprs[i]*/ &aset[i])
351 && reg_allocatable(/*&x86_gprs*/ &aset[i])) {
352 ret = &x86_gprs[i];
353 last_alloc = i;
354 break;
355 } else {
356 int total;
357
358 if (!optimizing /* || !reg_allocatable(...)*/) {
359 continue;
360 }
361 total = calc_total_refs(&x86_gprs[i]);
362 if (total < least) {
363 least = total;
364 least_idx = i;
365 }
366 }
367 }
368 if (ret == NULL) {
369 /*
370 * Save and hand out register with least
371 * references
372 */
373 save = 1;
374 if (!optimizing) {
375 static int cur;
376 int iterations = 0;
377
378 do {
379 if (cur == N_GPRS) cur = 0;
380 if (cur == last_alloc) {
381 /*
382 * Ensure two successive allocs always
383 * use different registers
384 */
385 if (backend->relax_alloc_gpr_order
386 && iterations != 0) {
387 /*
388 * 02/09/09: Lift the constraint
389 * that successive uses of the
390 * same GPR aren't allowed, but
391 * only do so in the second
392 * iteration (i.e. try other regs
393 * first and fall back if all
394 * fails)
395 */
396 ;
397 } else {
398 cur = (cur + 1) % N_GPRS;
399 }
400 }
401
402 ret = &x86_gprs[cur /*++*/];
403 topreg = &aset[cur++];
404 /*
405 * 02/09/09: N_GPRS + 1 to allow for an extra
406 * iteration in case relax_alloc_gpr_order is
407 * set
408 */
409 if (++iterations >= N_GPRS + 1) {
410 /*
411 * Ouch, no register can be allocated.
412 * This will probably only ever happen
413 * with inline asm statements using too
414 * many registers .... HOPEFULLY!!
415 */
416 if (is_noesiedi) {
417 backend->relax_alloc_gpr_order = old_relax;
418 }
419 return NULL;
420 }
421 } while ((dontwipe != NULL && ret == dontwipe)
422 || !reg_allocatable(/*ret*/topreg));
423 last_alloc = cur - 1;
424 } else {
425 int idx;
426
427 unimpl(); /* XXX doesn;t work with amd64 */
428 idx = least_idx == -1? 0: least_idx;
429 if (idx == last_alloc) {
430 idx = (idx + 1) % N_GPRS;
431 }
432 ret = &x86_gprs[idx];
433 last_alloc = idx;
434 }
435 }
436
437 if (ret == &x86_gprs[1]) {
438 f->callee_save_used |= CSAVE_EBX;
439 } else if (ret == &x86_gprs[4]) {
440 f->callee_save_used |= CSAVE_ESI;
441 } else if (ret == &x86_gprs[5]) {
442 f->callee_save_used |= CSAVE_EDI;
443 }
444
445 if (save) {
446 struct reg *freeme = ret;
447
448 /*
449 * IMPORTANT: It is assumed that an allocatable register
450 * has a vreg, hence no ret->vreg != NULL check here.
451 * Reusing a preg without a vreg is obviously a bug
452 * because without a vreg, it cannot be saved anywhere.
453 * See reg_set_unallocatable()/vreg_faultin_protected()
454 */
455 if (backend->arch == ARCH_AMD64) {
456 /*
457 * 05/20/11: This ALWAYS attempted to free the
458 * surrounding register, so if we're allocating
459 * eax, it always tried to free rax. Instead we
460 * have to check whether the outer one is
461 * actually used!
462 */
463 freeme = topreg;
464 }
465 free_preg(freeme, il, 1, 1);
466 }
467 if (size == 2) {
468 /* 16bit reg */
469 ret = ret->composed_of[0];
470 if (ret->composed_of) {
471 ret->composed_of[0]->vreg = NULL;
472 if (ret == x86_gprs[4].composed_of[0]
473 || ret == x86_gprs[5].composed_of[0]) {
474 /*
475 * This means we're allocating si or di. It
476 * follows that there can only be one sub-
477 * register, namely sil or dil (on AMD64!)
478 * Hence composed_of[1] does not exist
479 */
480 ;
481 } else {
482 ret->composed_of[1]->vreg = NULL;
483 }
484 }
485 } else {
486 /* 32bit */
487 ret->composed_of[0]->vreg = NULL;
488 if (ret->composed_of[0]->composed_of) {
489 /* eax - edx */
490 ret->composed_of[0]->composed_of[0]->vreg = NULL;
491 if (ret->composed_of[0]->composed_of[1]) {
492 ret->composed_of[0]->composed_of[1]->vreg
493 = NULL;
494 }
495 }
496 }
497
498 if (is_noesiedi) {
499 backend->relax_alloc_gpr_order = old_relax;
500 }
501
502 ret->used = ret->allocatable = 1;
503 return ret;
504 }
505
506 static struct reg *
alloc_16_or_32bit_noesiedi(struct function * f,size_t size,struct icode_list * il,struct reg * dontwipe)507 alloc_16_or_32bit_noesiedi(struct function *f, size_t size,
508 struct icode_list *il, struct reg *dontwipe) {
509 int esi_allocatable /*= x86_gprs[4].allocatable*/;
510 int edi_allocatable /* = x86_gprs[5].allocatable */;
511 struct reg *ret;
512 struct reg *esi_reg;
513 struct reg *edi_reg;
514
515 esi_reg = backend->arch == ARCH_AMD64? &amd64_x86_gprs[4]: &x86_gprs[4];
516 edi_reg = backend->arch == ARCH_AMD64? &amd64_x86_gprs[5]: &x86_gprs[5];
517
518 esi_allocatable = reg_allocatable(esi_reg);
519 edi_allocatable = reg_allocatable(edi_reg);
520
521 reg_set_unallocatable(esi_reg);
522 reg_set_unallocatable(edi_reg);
523
524 is_noesiedi = 1;
525 ret = ALLOC_GPR(f, size, il, dontwipe);
526 is_noesiedi = 0;
527
528 if (esi_allocatable) {
529 reg_set_allocatable(/*&x86_gprs[4]*/esi_reg);
530 }
531 if (edi_allocatable) {
532 reg_set_allocatable(/*&x86_gprs[5]*/edi_reg);
533 }
534 return ret;
535 }
536
537 static struct reg *
alloc_8bit_reg(struct function * f,struct icode_list * il,struct reg * dontwipe)538 alloc_8bit_reg(struct function *f, struct icode_list *il,
539 struct reg *dontwipe) {
540 int i;
541 int least8 = INT_MAX;
542 int least32 = INT_MAX;
543 int total;
544 struct reg *ret = NULL;
545 struct reg *aset;
546
547 if (backend->arch == ARCH_AMD64) {
548 aset = amd64_x86_gprs;
549 } else {
550 aset = x86_gprs;
551 }
552
553 (void) dontwipe;
554
555 for (i = 0; i < 4; ++i) {
556 int j;
557 struct reg **r16bit;
558
559 if (!reg_allocatable(&aset[i])) {
560 continue;
561 }
562 if (!aset[i].used
563 && !x86_gprs[i].used
564 && !((r16bit = x86_gprs[i].composed_of)[0])->used) {
565 struct reg **r8bit;
566
567 r8bit = r16bit[0]->composed_of;
568
569 /*
570 * Beware - mov ah, byte [r8] doesn't work,
571 * but does with al on amd64! So never use
572 * ah on amd64. Oh, and also, composed_of[0]
573 * of ax is actually ah, not al. I'm afraid
574 * of reversing this because I think it will
575 * break other stuff that depends on it
576 */
577 if (backend->arch == ARCH_AMD64) {
578 j = 1;
579 } else {
580 j = 0;
581 }
582 for (; j < 2; ++j) {
583 if (!r8bit[j]->used) {
584 ret = r8bit[j];
585 break;
586 } else {
587 total = calc_total_refs(r8bit[j]);
588 if (total < least8) {
589 least8 = total;
590 }
591 }
592 }
593 } else {
594 /* in use */
595 total = calc_total_refs(/*&x86_gprs*/&aset[i]);
596 if (total < least32) {
597 least32 = total;
598 }
599 }
600 if (ret != NULL) {
601 break;
602 }
603 }
604
605 if (ret == NULL) {
606 ret = alloc_16_or_32bit_noesiedi(f, 2, il, NULL);
607
608 if (ret == NULL) {
609 return NULL;
610 }
611
612 ret->used = 0;
613 ret->allocatable = 1;
614 if (backend->arch == ARCH_AMD64) {
615 ret = ret->composed_of[1];
616 } else {
617 ret = ret->composed_of[0];
618 }
619 ret->used = 1;
620 }
621 return ret;
622 }
623
624
625 static struct reg *
alloc_gpr(struct function * f,int size,struct icode_list * il,struct reg * dontwipe,int line)626 alloc_gpr(struct function *f, int size, struct icode_list *il,
627 struct reg *dontwipe, int line) {
628 struct reg *ret;
629
630 (void) line;
631 if (f == NULL) abort(); /* using invalidate_gprs() now */
632
633 if (size == 0) {
634 /* 0 means GPR */
635 size = 4;
636 }
637
638 if (backend->multi_gpr_object) {
639 /* Previous gpr allocation request remains to be finished */
640 ret = alloc_16_or_32bit_reg(f, 4, il, dontwipe);
641 backend->multi_gpr_object = 0;
642 } else if (size == 8) {
643 /* long long ... ouch */
644 ret = alloc_16_or_32bit_reg(f, 4, il, dontwipe);
645 backend->multi_gpr_object = 1;
646 } else if (size == 4 || size == 2) {
647 ret = alloc_16_or_32bit_reg(f, size, il, dontwipe);
648 } else if (size == 1) {
649 ret = alloc_8bit_reg(f, il, dontwipe);
650 } else {
651 printf("REGISTER LOAD WITH BAD SIZE %d\n", size);
652 abort();
653 }
654
655 if (ret == NULL) {
656 debug_log_regstuff(ret, NULL, DEBUG_LOG_FAILEDALLOC);
657 #ifdef DEBUG6
658 printf("(alloc size was %d)\n", size);
659 #endif
660 } else {
661 debug_log_regstuff(ret, NULL, DEBUG_LOG_ALLOCGPR);
662 }
663 #ifdef DEBUG6
664 if (ret != NULL) {
665 ret->line = line;
666 ++ret->nallocs;
667 }
668 #endif
669
670 return ret;
671 }
672
673 #if 0
674 static int fpr_bos = 0; /* fpr bottom of stack */
675 #endif
676
677 struct reg *
alloc_sse_fpr(struct function * f,int size,struct icode_list * il,struct reg * dontwipe)678 alloc_sse_fpr(struct function *f, int size, struct icode_list *il,
679 struct reg *dontwipe) {
680
681 /*
682 * 06/18/09: Is this size trickery for long double still needed?
683 */
684 if (size == 10 || size == 12 || size == 16) {
685 if (backend->arch == ARCH_AMD64) {
686 return x86_backend.alloc_fpr(f, size, il, dontwipe);
687 } else {
688 return backend->alloc_fpr(f, size, il, dontwipe);
689 }
690 } else {
691 return generic_alloc_gpr(f, size, il, dontwipe,
692 x86_sse_regs, 8, sse_csave_map, 0);
693 }
694 }
695
696
697
698 static struct reg *
alloc_fpr(struct function * f,int size,struct icode_list * il,struct reg * dontwipe)699 alloc_fpr(struct function *f, int size, struct icode_list *il,
700 struct reg *dontwipe) {
701 (void) f; (void) size; (void) il; (void) dontwipe;
702
703 if (sysflag == OS_OSX && (size == 4 || size == 8)) {
704 return alloc_sse_fpr(f, size, il, dontwipe);
705 }
706 x86_fprs[0].used = 1;
707 return &x86_fprs[0];
708 #if 0
709 (void) f; (void) size; (void) il; (void) dontwipe;
710
711 if (fpr_bos == 7) {
712 (void) fprintf(stderr,
713 "x87 register stack overflow\n");
714 abort();
715 }
716
717 /*
718 * Allocated register is top of stack - relocate all allocated
719 * registers by one
720 */
721 for (i = 6; i >= 0; --i) {
722
723 if (x86_fprs[i].used) {
724 vreg_map_preg(x86_fprs[i].vreg, &x86_fprs[i+1]);
725 }
726 }
727
728 x86_fprs[0].used = 1;
729 return &x86_fprs[0];
730 #endif
731 }
732
733 static void
x86_free_preg(struct reg * r,struct icode_list * il)734 x86_free_preg(struct reg *r, struct icode_list *il) {
735 (void) r; (void) il;
736
737 return ;
738 #if 0
739 if (r->type == REG_FPR) {
740 assert(r == &x86_fprs[0]);
741 }
742 r->vreg = NULL;
743 r->used = 0;
744 #endif
745 #if 0
746 if (r->type != REG_FPR || !STUPID_X87(r)) {
747 return;
748 }
749 --fpr_bos; /* :-( */
750
751 #if 0
752 if (r != &x86_fprs[0]) {
753 /*
754 * XXX st0 will be popped if the result is assigned to
755 * something ... but this cannot be relied on when the
756 * result is not use; (void) f1 * f2;
757 * perhaps icode.c should generate an additional
758 * ffree if needed
759 */
760 icode_make_x86_ffree(r, il);
761 }
762 #endif
763 if (r != &x86_fprs[0]) {
764 icode_make_x86_ffree(r, il);
765 }
766 r->vreg= NULL;
767 r->used = 0;
768 #endif
769 }
770
771 static int
init(FILE * fd,struct scope * s)772 init(FILE *fd, struct scope *s) {
773 int i;
774 int j;
775
776 out = fd;
777 tunit = s;
778
779 (void) use_nasm;
780
781 if (sysflag == OS_OSX) {
782 /*
783 * 02/09/09: Make AMD64 emitter available so that we
784 * can emit SSE instructions (required by OSX even on
785 * x86).
786 *
787 * XXX These instructions should be moved to the x86
788 * backend, like all other SSE things already have
789 * (GPR allocator, etc)
790 */
791 emit_amd64 = &emit_amd64_gas;
792 /*
793 * Also initialize FILE handle
794 */
795 amd64_emit_gas.init(out, s);
796 }
797
798 /*
799 * Initialize registers and function pointer tables.
800 * It is important not to trash ``emit'' if this is
801 * called from AMD64 init()!!!
802 */
803 if (asmflag == NULL) {
804 /* Default is nasm */
805 if (backend->arch != ARCH_AMD64) {
806 #if 0
807 emit = &x86_emit_nasm;
808 #endif
809 emit = &x86_emit_gas;
810 }
811 emit_x86 = &x86_emit_x86_gas;
812 } else if (strcmp(asmname, "nasm") == 0
813 || strcmp(asmname, "nwasm") == 0
814 || strcmp(asmname, "yasm") == 0) {
815 if (backend->arch != ARCH_AMD64) {
816 emit = &x86_emit_nasm;
817 }
818 emit_x86 = &x86_emit_x86_nasm;
819 } else if (strcmp(asmname, "as") == 0
820 || strcmp(asmname, "gas") == 0) {
821 if (backend->arch != ARCH_AMD64) {
822 emit = &x86_emit_gas;
823 }
824 emit_x86= &x86_emit_x86_gas;
825 } else {
826 (void) fprintf(stderr, "Unknown x86 assembler `%s'\n",
827 asmflag);
828 exit(EXIT_FAILURE);
829 }
830
831 #if 0
832 if (use_nasm) {
833 emit = &x86_emit_nasm;
834 } else {
835 emit = &x86_emit_gas;
836 }
837 emit = &x86_emit_nasm;
838 emit_x86 = &x86_emit_x86_nasm;
839 #endif
840
841 init_regs();
842 for (i = 0, j = 0; i < N_GPRS; ++i) {
843 struct reg *r16bit;
844
845 x86_gprs[i].composed_of = n_xmalloc(2 * sizeof(struct reg *));
846 r16bit = &x86_16bit_gprs[i];
847 x86_gprs[i].composed_of[0] = r16bit;
848 x86_gprs[i].composed_of[1] = NULL;
849 if (i < 4) {
850 r16bit->composed_of =
851 n_xmalloc(3 * sizeof(struct reg *));
852 r16bit->composed_of[0] = &x86_8bit_gprs[j++];
853 r16bit->composed_of[1] = &x86_8bit_gprs[j++];
854 r16bit->composed_of[2] = NULL;
855 }
856 }
857
858 x86_esp.type = REG_SP;
859 x86_esp.size = 4;
860 x86_esp.name = "esp";
861 x86_esp.composed_of = n_xmalloc(2 * sizeof(struct reg *));
862 *x86_esp.composed_of = &x86_esp_16bit;
863 x86_esp.composed_of[1] = NULL;
864 x86_esp_16bit.size = 2;
865 x86_esp_16bit.name = "sp";
866 x86_ebp.type = REG_BP;
867 x86_ebp.size = 4;
868 x86_ebp.name = "ebp";
869 x86_ebp.composed_of = n_xmalloc(2 * sizeof(struct reg *));
870 *x86_ebp.composed_of = &x86_ebp_16bit;
871 x86_ebp.composed_of[1] = NULL;
872 x86_ebp_16bit.size = 2;
873 x86_ebp_16bit.name = "bp";
874
875 if (backend->arch != ARCH_AMD64) {
876 backend->emit = emit;
877 return emit->init(out, tunit);
878 }
879 return 0;
880 }
881
882 static int
get_ptr_size(void)883 get_ptr_size(void) {
884 return 4;
885 }
886
887 static struct type *
get_size_t(void)888 get_size_t(void) {
889 return make_basic_type(TY_UINT);
890 }
891
892 static struct type *
get_uintptr_t(void)893 get_uintptr_t(void) {
894 return make_basic_type(TY_ULONG);
895 }
896
897 static struct type *
get_wchar_t(void)898 get_wchar_t(void) {
899 return make_basic_type(TY_INT);
900 }
901
902 static size_t
get_sizeof_basic(int type)903 get_sizeof_basic(int type) {
904 switch (type) {
905 case TY_ENUM:
906 return 4; /* XXX */
907
908 case TY_INT:
909 case TY_UINT:
910 case TY_LONG:
911 case TY_ULONG:
912 return 4;
913
914 case TY_LLONG:
915 case TY_ULLONG:
916 return 8;
917
918 case TY_CHAR:
919 case TY_UCHAR:
920 case TY_SCHAR:
921 case TY_BOOL:
922 return 1;
923
924 case TY_SHORT:
925 case TY_USHORT:
926 return 2;
927
928 case TY_FLOAT:
929 return 4;
930
931 case TY_DOUBLE:
932 return 8;
933 case TY_LDOUBLE:
934 if (sysflag == OS_OSX) {
935 return 16;
936 } else {
937 if (backend->arch == ARCH_AMD64) {
938 return /*10 XXX */10;
939 } else {
940 return 10;
941 }
942 }
943 default:
944 printf("err sizeof cannot cope w/ it, wuz %d\n", type);
945 abort();
946 return 1; /* XXX */
947 }
948 }
949
950
951 static void
do_ret(struct function * f,struct icode_instr * ip)952 do_ret(struct function *f, struct icode_instr *ip) {
953 if (f->callee_save_used & CSAVE_EBX) {
954 emit->load(&x86_gprs[1], &csave_ebx);
955 }
956 if (f->callee_save_used & CSAVE_ESI) {
957 emit->load(&x86_gprs[4], &csave_esi);
958 }
959 if (f->callee_save_used & CSAVE_EDI) {
960 emit->load(&x86_gprs[5], &csave_edi);
961 }
962 if (saved_ret_addr) {
963 emit->check_ret_addr(f, saved_ret_addr);
964 }
965 if (f->alloca_head != NULL) {
966 struct stack_block *sb;
967 static struct vreg rvr;
968
969 rvr.stack_addr = f->alloca_regs;
970 rvr.size = 4;
971 backend_vreg_map_preg(&rvr, &x86_gprs[0]);
972 emit->store(&rvr, &rvr);
973 backend_vreg_unmap_preg(&x86_gprs[0]);
974 if (ip && ip->src_vreg && ip->src_vreg->is_multi_reg_obj) {
975 rvr.stack_addr = f->alloca_regs->next;
976 backend_vreg_map_preg(&rvr, &x86_gprs[3]);
977 emit->store(&rvr, &rvr);
978 backend_vreg_unmap_preg(&x86_gprs[3]);
979 }
980
981 for (sb = f->alloca_head; sb != NULL; sb = sb->next) {
982 emit->dealloca(sb, NULL);
983 }
984
985 rvr.stack_addr = f->alloca_regs;
986 backend_vreg_map_preg(&rvr, &x86_gprs[0]);
987 emit->load(&x86_gprs[0], &rvr);
988 backend_vreg_unmap_preg(&x86_gprs[0]);
989 if (ip && ip->src_vreg && ip->src_vreg->is_multi_reg_obj) {
990 rvr.stack_addr = f->alloca_regs->next;
991 backend_vreg_map_preg(&rvr, &x86_gprs[3]);
992 emit->load(&x86_gprs[3], &rvr);
993 backend_vreg_unmap_preg(&x86_gprs[3]);
994 }
995 }
996 if (f->vla_head != NULL) {
997 struct stack_block *sb;
998 static struct vreg rvr;
999
1000 rvr.stack_addr = f->alloca_regs;
1001 rvr.size = 4;
1002 backend_vreg_map_preg(&rvr, &x86_gprs[0]);
1003 emit->store(&rvr, &rvr);
1004 backend_vreg_unmap_preg(&x86_gprs[0]);
1005 if (ip && ip->src_vreg && ip->src_vreg->is_multi_reg_obj) {
1006 rvr.stack_addr = f->alloca_regs->next;
1007 backend_vreg_map_preg(&rvr, &x86_gprs[3]);
1008 emit->store(&rvr, &rvr);
1009 backend_vreg_unmap_preg(&x86_gprs[3]);
1010 }
1011
1012 for (sb = f->alloca_head; sb != NULL; sb = sb->next) {
1013 emit->dealloc_vla(sb, NULL);
1014 }
1015
1016 rvr.stack_addr = f->alloca_regs;
1017 backend_vreg_map_preg(&rvr, &x86_gprs[0]);
1018 emit->load(&x86_gprs[0], &rvr);
1019 backend_vreg_unmap_preg(&x86_gprs[0]);
1020 if (ip && ip->src_vreg && ip->src_vreg->is_multi_reg_obj) {
1021 rvr.stack_addr = f->alloca_regs->next;
1022 backend_vreg_map_preg(&rvr, &x86_gprs[3]);
1023 emit->load(&x86_gprs[3], &rvr);
1024 backend_vreg_unmap_preg(&x86_gprs[3]);
1025 }
1026 }
1027 emit->freestack(f, NULL);
1028 emit->ret(ip);
1029 }
1030
1031 static struct reg *
get_abi_reg(int index,struct type * ty)1032 get_abi_reg(int index, struct type *ty) {
1033 (void) index; (void) ty;
1034 /* x86 passes all stuff on the stack */
1035 return NULL;
1036 }
1037
1038 static struct reg *
get_abi_ret_reg(struct type * ty)1039 get_abi_ret_reg(struct type *ty) {
1040 if (is_integral_type(ty) || ty->tlist != NULL) {
1041 return &x86_gprs[0];
1042 } else {
1043 unimpl();
1044 }
1045 /* NOTREACHED */
1046 return NULL;
1047 }
1048
1049 static int
gen_function(struct function * f)1050 gen_function(struct function *f) {
1051 struct ty_func *proto;
1052 struct scope *scope;
1053 struct icode_instr *lastret = NULL;
1054 struct stack_block *sb;
1055 size_t size;
1056 size_t alloca_bytes = 0;
1057 size_t vla_bytes = 0;
1058 int i;
1059 struct stupidtrace_entry *traceentry = NULL;
1060
1061 emit->setsection(SECTION_TEXT);
1062 proto = f->proto->dtype->tlist->tfunc;
1063
1064 emit->func_header(f); /* XXX */
1065 emit->label(f->proto->dtype->name, 1);
1066 emit->intro(f);
1067
1068 if (proto->nargs > 0) {
1069 struct sym_entry *se = proto->scope->slist;
1070 int i;
1071 long offset = 8; /* ebp, ret, was 0 */
1072
1073 if (f->proto->dtype->tlist->next == NULL
1074 && (f->proto->dtype->code == TY_STRUCT
1075 || f->proto->dtype->code == TY_UNION)) {
1076 /*
1077 * Function returns struct/union - accomodate for
1078 * hidden pointer (passed as first argument)
1079 */
1080 offset += 4;
1081 }
1082
1083 for (i = 0; i < proto->nargs; ++i, se = se->next) {
1084 size_t size;
1085
1086 size = backend->get_sizeof_type(se->dec->dtype, NULL);
1087 if (size < 4) {
1088 /*
1089 * 07/21/08: Ouch, this was missing! char and
1090 * short are passed as dwords, so make sure the
1091 * corresponding stack block is also 4 bytes
1092 *
1093 * Otherwise emit_addrof() will skip the wrong
1094 * byte count to get to the start of the
1095 * ellipsis in variadic functions
1096 */
1097 /*
1098 * 05/22/11: Account for empty structs (a GNU
1099 * C silliness) being passed
1100 */
1101 if (size > 0) {
1102 size = 4;
1103 }
1104 } else if ((size % 4) != 0) {
1105 /*
1106 * 08/09/08: Pad to boundary of 4! This was
1107 * already done for long double below, but not
1108 * for structs and unions
1109 */
1110 size += 4 - size % 4;
1111 }
1112
1113 sb = make_stack_block(offset, size);
1114 offset += size; /* was before makestackblock */
1115
1116 sb->is_func_arg = 1;
1117 se->dec->stack_addr = sb;
1118 }
1119 }
1120
1121 /* Make local variables */
1122 for (scope = f->scope; scope != NULL; scope = scope->next) {
1123 struct stack_block *sb;
1124 struct scope *tmp;
1125 struct decl **dec;
1126 size_t align;
1127
1128 for (tmp = scope; tmp != NULL; tmp = tmp->parent) {
1129 if (tmp == f->scope) {
1130 break;
1131 }
1132 }
1133
1134 if (tmp == NULL) {
1135 /* End of function reached */
1136 break;
1137 }
1138 if (scope->type != SCOPE_CODE) continue;
1139
1140 dec = scope->automatic_decls.data;
1141 for (i = 0; i < scope->automatic_decls.ndecls; ++i) {
1142 struct decl *alignfor;
1143
1144 if (dec[i]->stack_addr != NULL) { /* XXX sucks */
1145 continue;
1146 } else if (IS_VLA(dec[i]->dtype->flags)) {
1147 /*
1148 * 05/22/11: Handle pointers to VLAs properly;
1149 * We have to create a metadata block to
1150 * record dimension sizes, but we allocate
1151 * the pointers themselves on the stack
1152 *
1153 * char (*p)[N];
1154 *
1155 * ... "p" on stack, N in metadata block
1156 */
1157 if (dec[i]->dtype->tlist->type == TN_POINTER_TO) {
1158 ;
1159 } else {
1160 continue;
1161 }
1162 }
1163
1164 #if 0
1165 if (i+1 < scope->automatic_decls.ndecls
1166 && !IS_VLA(dec[i+1]->dtype->flags)) {
1167 align = calc_align_bytes(f->total_allocated,
1168 dec[i]->dtype,
1169 dec[i+1]->dtype);
1170 } else {
1171 align = 0;
1172 }
1173 #endif
1174
1175 alignfor = get_next_auto_decl_in_scope(scope, i);
1176 if (alignfor != NULL) {
1177 align = calc_align_bytes(f->total_allocated,
1178 dec[i]->dtype,
1179 alignfor->dtype, 0);
1180 } else {
1181 align = 0;
1182 }
1183
1184 size = backend->
1185 get_sizeof_decl(dec[i], NULL);
1186 sb = stack_malloc(f, size+align);
1187 sb->nbytes = size;
1188 dec[i]->stack_addr = sb;
1189 }
1190 stack_align(f, 4);
1191 }
1192
1193 /*
1194 * Allocate storage for saving callee-saved registers (ebx/esi/edi)
1195 * (but defer saving them until esp has been updated)
1196 *
1197 * 11/26/07: This unconditionally allocated storage for all regs
1198 * regardless of whether they were saved! Bad.
1199 */
1200 #if 0
1201 f->total_allocated += 12;
1202 f->callee_save_offset = f->total_allocated;
1203 #endif
1204
1205 if (f->callee_save_used & CSAVE_EBX) {
1206 ebx_saved = 1;
1207 f->total_allocated += 4;
1208 csave_ebx.stack_addr
1209 = make_stack_block(f->total_allocated /*callee_save_offset*/, 4);
1210 }
1211 if (f->callee_save_used & CSAVE_ESI) {
1212 esi_saved = 1;
1213 f->total_allocated += 4;
1214 csave_esi.stack_addr
1215 = make_stack_block(f->total_allocated /*callee_save_offset - 4 */, 4);
1216 }
1217 if (f->callee_save_used & CSAVE_EDI) {
1218 edi_saved = 1;
1219 f->total_allocated += 4;
1220 csave_edi.stack_addr
1221 = make_stack_block(f->total_allocated /*callee_save_offset - 8*/, 4);
1222 }
1223 f->callee_save_offset = f->total_allocated;
1224 if (stackprotectflag) {
1225 f->total_allocated += 4;
1226 /*
1227 * 08/03/11: The save_ret_addr stack block was cached here,
1228 * which caused the (later introduced) zone allocator to
1229 * trash the "frame pointer" flag while resetting memory
1230 */
1231 saved_ret_addr
1232 = make_stack_block(f->total_allocated, 4);
1233 }
1234
1235 /* Allocate storage for temporarily saving GPRs & patch offsets */
1236 for (sb = f->regs_head; sb != NULL; sb = sb->next) {
1237 stack_align(f, sb->nbytes);
1238 f->total_allocated += sb->nbytes;
1239 sb->offset = f->total_allocated;
1240 }
1241 /*
1242 * Allocate storage for saving alloca() pointers, and initialize
1243 * it to zero
1244 */
1245 for (sb = f->alloca_head; sb != NULL; sb = sb->next) {
1246 f->total_allocated += sb->nbytes;
1247 alloca_bytes += sb->nbytes;
1248 sb->offset = f->total_allocated;
1249 }
1250 if (f->alloca_head != NULL || f->vla_head != NULL) {
1251 /*
1252 * Get stack for saving return value registers before
1253 * performing free() and alloca()ted blocks
1254 */
1255 f->alloca_regs = make_stack_block(0, 4);
1256 f->total_allocated += 4;
1257 f->alloca_regs->offset = f->total_allocated;
1258 f->alloca_regs->next = make_stack_block(0, 4);
1259 f->total_allocated += 4;
1260 f->alloca_regs->next->offset = f->total_allocated;
1261 }
1262
1263 /*
1264 * Allocate storage for saving VLA data, and initialize
1265 * it to zero
1266 */
1267 for (sb = f->vla_head; sb != NULL; sb = sb->next) {
1268 f->total_allocated += sb->nbytes;
1269 vla_bytes += sb->nbytes;
1270 sb->offset = f->total_allocated;
1271 }
1272
1273 if (sysflag == OS_OSX) {
1274 stack_align(f, 16);
1275 } else {
1276 stack_align(f, 4);
1277 }
1278 if (f->total_allocated > 0) {
1279 emit->allocstack(f, f->total_allocated);
1280 if (f->callee_save_used & CSAVE_EBX) {
1281 backend_vreg_map_preg(&csave_ebx, &x86_gprs[1]);
1282 emit->store(&csave_ebx, &csave_ebx);
1283 backend_vreg_unmap_preg(&x86_gprs[1]);
1284 x86_gprs[1].used = 0; /* unneeded now?!?! */
1285 }
1286 if (f->callee_save_used & CSAVE_ESI) {
1287 backend_vreg_map_preg(&csave_esi, &x86_gprs[4]);
1288 emit->store(&csave_esi, &csave_esi);
1289 backend_vreg_unmap_preg(&x86_gprs[4]);
1290 x86_gprs[4].used = 0; /* unneeded now!?!? */
1291 }
1292 if (f->callee_save_used & CSAVE_EDI) {
1293 backend_vreg_map_preg(&csave_edi, &x86_gprs[5]);
1294 emit->store(&csave_edi, &csave_edi);
1295 backend_vreg_unmap_preg(&x86_gprs[5]);
1296 x86_gprs[5].used = 0; /* unneded now?!?! */
1297 }
1298 }
1299 if (stackprotectflag) {
1300 emit->save_ret_addr(f, saved_ret_addr);
1301 }
1302 if (f->alloca_head) {
1303 /* 08/19/07: This wrongly used alloca_head! */
1304 emit->zerostack(f->alloca_tail, alloca_bytes);
1305 }
1306 if (f->vla_head) {
1307 /* 08/19/07: This wrongly used vla_head! */
1308 emit->zerostack(f->vla_tail, vla_bytes);
1309 }
1310
1311 if (stupidtraceflag && emit->stupidtrace != NULL) {
1312 traceentry = put_stupidtrace_list(f);
1313 emit->stupidtrace(traceentry);
1314 }
1315
1316 if (xlate_icode(f, f->icode, &lastret) != 0) {
1317 return -1;
1318 }
1319 if (lastret != NULL) {
1320 struct icode_instr *tmp;
1321
1322 for (tmp = lastret->next; tmp != NULL; tmp = tmp->next) {
1323 if (tmp->type != INSTR_SETITEM) {
1324 lastret = NULL;
1325 break;
1326 }
1327 }
1328 }
1329
1330 emit->outro(f);
1331
1332 if (traceentry != NULL) {
1333 emit->finish_stupidtrace(traceentry);
1334 }
1335
1336 return 0;
1337 }
1338
1339
1340 #if XLATE_IMMEDIATELY
1341
1342 static int
gen_prepare_output(void)1343 gen_prepare_output(void) {
1344 if (gflag) {
1345 /* Print file names */
1346 emit->dwarf2_files();
1347 }
1348 if (emit->support_decls) {
1349 emit->support_decls();
1350 }
1351 return 0;
1352 }
1353
1354 static int
gen_finish_output(void)1355 gen_finish_output(void) {
1356 /*
1357 * Emit remaining static initializd variables. Currently this
1358 * should only handle function name identifiers (__func__).
1359 */
1360 if (sysflag == OS_OSX) {
1361 emit->global_static_decls(global_scope.static_decls.data,
1362 global_scope.static_decls.ndecls);
1363 }
1364 /* emit->static_init_vars(static_init_vars);
1365 emit->static_init_thread_vars(static_init_thread_vars);*/
1366 emit->static_init_vars(static_init_vars);
1367 emit->static_init_thread_vars(static_init_thread_vars);
1368
1369 emit->static_uninit_vars(static_uninit_vars);
1370 emit->static_uninit_thread_vars(static_uninit_thread_vars);
1371 emit->global_extern_decls(global_scope.extern_decls.data,
1372 global_scope.extern_decls.ndecls);
1373 if (emit->extern_decls) {
1374 emit->extern_decls();
1375 }
1376
1377 /*
1378 * Support buffers at end because we may only now know
1379 * whether they are needed (used at all)
1380 */
1381 emit->support_buffers();
1382
1383 if (emit->finish_program) {
1384 emit->finish_program();
1385 }
1386 x_fflush(out);
1387 return 0;
1388 }
1389
1390
1391 #else
1392
1393 static int
gen_program(void)1394 gen_program(void) {
1395 struct function *func;
1396
1397 if (gflag) {
1398 /* Print file names */
1399 emit->dwarf2_files();
1400 }
1401
1402 if (emit->support_decls) {
1403 emit->support_decls();
1404 }
1405 if (emit->extern_decls) {
1406 emit->extern_decls();
1407 }
1408 #if 0
1409 emit->global_decls();
1410 #endif
1411 emit->global_extern_decls(global_scope.extern_decls.data,
1412 global_scope.extern_decls.ndecls);
1413 emit->global_static_decls(global_scope.static_decls.data,
1414 global_scope.static_decls.ndecls);
1415
1416 #if 0
1417 emit->static_decls();
1418 #endif
1419 emit->static_init_vars(static_init_vars);
1420 emit->static_uninit_vars(static_uninit_vars);
1421 emit->static_init_thread_vars(static_init_thread_vars);
1422 emit->static_uninit_thread_vars(static_uninit_thread_vars);
1423
1424 emit->struct_inits(init_list_head);
1425 emit->empty();
1426 emit->strings(str_const);
1427 emit->fp_constants(float_const);
1428 #if 0
1429 if (emit->llong_constants) {
1430 emit->llong_constants();
1431 }
1432 #endif
1433 emit->support_buffers();
1434 #if 0
1435 if (emit->pic_support) {
1436 emit->pic_support();
1437 }
1438 #endif
1439 emit->empty();
1440
1441
1442 #if 0
1443 if (emit->struct_defs) {
1444 emit->struct_defs();
1445 }
1446 #endif
1447
1448 emit->setsection(SECTION_TEXT);
1449
1450 for (func = funclist; func != NULL; func = func->next) {
1451 curfunc = func;
1452 if (gen_function(func) != 0) {
1453 return -1;
1454 }
1455 emit->empty();
1456 emit->empty();
1457 }
1458
1459 x_fflush(out);
1460
1461 return 0;
1462 }
1463
1464 #endif
1465
1466 static int
calc_x86_stack_bytes(struct fcall_data * fcall,struct vreg ** vrs,int nvrs,int start_value)1467 calc_x86_stack_bytes(struct fcall_data *fcall,
1468 struct vreg **vrs, int nvrs, int start_value) {
1469 int bytes = start_value + 8; /* ebp and return address */
1470 int i;
1471 int need_dap = 0;
1472
1473
1474 if (fcall->functype->nargs == -1
1475 /*|| ty->implicit*/) {
1476 /* Need default argument promotions */
1477 need_dap = 1;
1478 }
1479 for (i = nvrs - 1; i >= 0; --i) {
1480 if (fcall->functype->variadic
1481 && i >= fcall->functype->nargs) {
1482 need_dap = 1;
1483 }
1484 if (vrs[i]->type->tlist != NULL
1485 || is_integral_type(vrs[i]->type)) {
1486 bytes += vrs[i]->size < 4? 4: vrs[i]->size;
1487 } else if (is_floating_type(vrs[i]->type)) {
1488 if (vrs[i]->type->code == TY_FLOAT) {
1489 if (need_dap) {
1490 bytes += 8;
1491 } else {
1492 bytes += 4;
1493 }
1494 } else if (vrs[i]->type->code == TY_LDOUBLE) {
1495 bytes += 16; /* XXXXXXXXXXXXX 16 */
1496 } else {
1497 bytes += vrs[i]->size;
1498 }
1499 } else if (vrs[i]->type->code == TY_STRUCT
1500 || vrs[i]->type->code == TY_UNION) {
1501 /* 07/21/08: (left-)Align to boundary of 4 */
1502 if (vrs[i]->size & 3) {
1503 bytes += 4 - (vrs[i]->size % 4);
1504 }
1505 bytes += vrs[i]->size;
1506 }
1507 }
1508 return bytes;
1509 }
1510
1511
1512 static struct vreg *
icode_make_fcall(struct fcall_data * fcall,struct vreg ** vrs,int nvrs,struct icode_list * il)1513 icode_make_fcall(struct fcall_data *fcall, struct vreg **vrs, int nvrs,
1514 struct icode_list *il)
1515 {
1516 unsigned long allpushed = 0;
1517 struct vreg *tmpvr;
1518 struct vreg *ret = NULL;
1519 struct vreg *vr2;
1520 struct type *ty;
1521 struct icode_instr *ii;
1522 struct type_node *tn;
1523 struct vreg *struct_lvalue;
1524 int i;
1525 int need_dap = 0;
1526 int was_struct;
1527 int was_float;
1528 int was_llong;
1529 int struct_return = 0;
1530 int ret_is_anon_struct = 0;
1531
1532 ty = fcall->calltovr->type;
1533 tmpvr = fcall->calltovr;
1534
1535 tn = ty->tlist;
1536 if (tn->type == TN_POINTER_TO) {
1537 /* Called thru function pointer */
1538 tn = tn->next;
1539 }
1540
1541 struct_lvalue = fcall->lvalue;
1542
1543 if ((ty->code == TY_STRUCT
1544 || ty->code == TY_UNION)
1545 && tn->next == NULL) {
1546 struct_return = 1;
1547 if (struct_lvalue == NULL || fcall->need_anon) {
1548 struct type_node *tnsav;
1549 /*
1550 * Result of function is not assigned so we need to
1551 * allocate storage for the callee to store its
1552 * result into
1553 */
1554
1555 tnsav = ty->tlist;
1556 ty->tlist = NULL;
1557
1558 /*
1559 * 08/05/08: Don't allocate anonymous struct return
1560 * storage right here, but when creating the stack
1561 * frame. This has already been done on MIPS, PPC
1562 * and SPARC, but not on x86/AMD64. The reason is
1563 * that it broke something that is long fogotten
1564 * now. So we'll re-enable this and fix any bugs
1565 * that may come up.
1566 *
1567 * The reason I ran into this again is that if we
1568 * don't allocate the struct on the stack frame,
1569 * then in
1570 *
1571 * struct foo otherfunc() { return ...}
1572 * struct foo func() { return otherfunc(); }
1573 *
1574 * ... the anonymous storage is reclaimed before
1575 * it can be copied as a return value, hence
1576 * trashing it
1577 */
1578 struct_lvalue = vreg_stack_alloc(ty, il, 1 /*0*/, NULL);
1579
1580 ty->tlist = tnsav;
1581 /*
1582 * 08/05/08: Don't add to allpushed since struct is
1583 * created on frame
1584 */
1585 /* allpushed += struct_lvalue->size;*/
1586 ret_is_anon_struct = 1;
1587 }
1588 }
1589
1590 if (sysflag == OS_OSX) {
1591 int count;
1592
1593 count = calc_x86_stack_bytes(fcall, vrs, nvrs, struct_return? 4: 0);
1594 if (count % 16 != 0) {
1595 unsigned long align = 16 - count % 16;
1596 #if 0
1597 printf("aligning %lu\n", align);
1598 #endif
1599 icode_make_allocstack(NULL, align, il);
1600 allpushed += align;
1601 }
1602 }
1603
1604 /*
1605 * 07/20/08: This wrongly took an implicit return type into account
1606 * to determine whether default argument promotions are needed!
1607 */
1608 if (fcall->functype->nargs == -1
1609 /*|| ty->implicit*/) {
1610 /* Need default argument promotions */
1611 need_dap = 1;
1612 }
1613
1614 for (i = nvrs - 1; i >= 0; --i) {
1615 struct vreg *dest;
1616
1617 if (fcall->functype->variadic
1618 && i >= fcall->functype->nargs) {
1619 need_dap = 1;
1620 }
1621
1622 /*
1623 * May have to be moved into
1624 * register if we're dealing with
1625 * pointer stuff, otherwise we may
1626 * push with memory operand
1627 */
1628
1629 was_struct = was_float = was_llong = 0;
1630 if (vrs[i]->parent) {
1631 vr2 = get_parent_struct(vrs[i]);
1632 } else {
1633 vr2 = NULL;
1634 }
1635
1636 if (vrs[i]->type->tlist != NULL) {
1637 vreg_faultin(NULL, NULL, vrs[i], il, 0);
1638 } else {
1639 if (vrs[i]->from_ptr) {
1640 /* XXX not needed?! */
1641 vreg_faultin(NULL, NULL,
1642 vrs[i]->from_ptr, il, 0);
1643 }
1644 if (IS_CHAR(vrs[i]->type->code)
1645 || IS_SHORT(vrs[i]->type->code)) {
1646 struct type *ty
1647 = make_basic_type(TY_INT);
1648
1649 /*
1650 * Bytes and halfwords are pushed as words
1651 */
1652 vrs[i] = backend->
1653 icode_make_cast(vrs[i], ty, il);
1654 } else if (IS_LLONG(vrs[i]->type->code)) {
1655 vreg_faultin(NULL, NULL, vrs[i], il, 0);
1656 allpushed += 8;
1657 ii = icode_make_push(vrs[i], il);
1658 append_icode_list(il, ii);
1659 was_llong = 1;
1660 } else if (vrs[i]->type->code == TY_STRUCT
1661 || vrs[i]->type->code == TY_UNION) {
1662 /*
1663 * struct/union - memcpy() it onto stack,
1664 * allocate storage manually (no push!)
1665 */
1666 /*
1667 * 05/22/11: Account for empty structs (a GNU
1668 * C silliness) being passed
1669 */
1670 if (vrs[i]->size > 0) {
1671 /* 07/21/08: (left-)Align to boundary of 4 */
1672 if (vrs[i]->size & 3) {
1673 icode_make_allocstack(NULL, 4 - (vrs[i]->size % 4), il);
1674 allpushed += 4 - (vrs[i]->size % 4);
1675 }
1676
1677 dest = vreg_stack_alloc(vrs[i]->type, il, 0, NULL);
1678 allpushed += dest->size;
1679 backend->invalidate_gprs(il, 1, INV_FOR_FCALL);
1680 vreg_faultin_ptr(vrs[i], il);
1681 icode_make_copystruct(dest, vrs[i], il);
1682 }
1683 was_struct = 1;
1684 } else {
1685 vreg_faultin_x87(NULL, NULL, vrs[i], il, 0);
1686 if (IS_FLOATING(vrs[i]->type->code)) {
1687 was_float = 1;
1688 if (need_dap
1689 && vrs[i]->type->code
1690 == TY_FLOAT) {
1691 #if 0
1692 struct type *ty
1693 = make_basic_type(TY_DOUBLE);
1694
1695 vrs[i] = backend->
1696 icode_make_cast(vrs[i],ty,il);
1697 #endif
1698 if (sysflag == OS_OSX
1699 && vrs[i]->type->code == TY_FLOAT) {
1700 struct type *ty
1701 = make_basic_type(TY_DOUBLE);
1702
1703 vrs[i] = backend->
1704 icode_make_cast(vrs[i],ty,il);
1705 } else {
1706 vrs[i] = n_xmemdup(vrs[i],
1707 sizeof *vrs[i]);
1708 vrs[i]->type = make_basic_type(
1709 TY_DOUBLE);
1710 vrs[i]->size = backend->
1711 get_sizeof_type(vrs[i]->
1712 type, NULL);
1713 }
1714 }
1715 if (vrs[i]->type->code == TY_LDOUBLE) {
1716 if (sysflag == OS_OSX) {
1717 #if 0
1718 /* 6 bytes of padding */
1719 icode_make_allocstack(NULL, 6, il);
1720 allpushed += 6;
1721 #endif
1722 } else {
1723 #if 0
1724 /* 2 bytes of padding */
1725 icode_make_allocstack(NULL, 2, il);
1726 allpushed += 2;
1727 #endif
1728 }
1729 }
1730 dest = vreg_stack_alloc(vrs[i]->type,
1731 il, 0, NULL);
1732 vreg_map_preg(dest, vrs[i]->pregs[0]);
1733 icode_make_store(NULL, dest, dest, il);
1734
1735 if (vrs[i]->type->code == TY_DOUBLE) {
1736 allpushed += 8;
1737 } else if (vrs[i]->type->code == TY_FLOAT) {
1738 allpushed += 4;
1739 } else {
1740 allpushed += vrs[i]->size;
1741 /* allpushed += 10;*/
1742 }
1743 }
1744 }
1745 }
1746
1747 if (!was_struct && !was_float && !was_llong) {
1748 ii = icode_make_push(vrs[i], il);
1749 if (vrs[i]->size < 4) {
1750 /* bytes and shorts are passed as words */
1751 allpushed += 4;
1752 } else if (vrs[i]->type->tlist != NULL
1753 && vrs[i]->type->tlist->type == TN_ARRAY_OF) {
1754 allpushed += 4;
1755 } else {
1756 allpushed += vrs[i]->size;
1757 }
1758 append_icode_list(il, ii);
1759 }
1760
1761 free_pregs_vreg(vrs[i], il, 0, 0);
1762 if (vr2 && vr2->from_ptr && vr2->from_ptr->pregs[0]
1763 && vr2->from_ptr->pregs[0]->vreg == vr2->from_ptr) {
1764 free_preg(vr2->from_ptr->pregs[0], il, 0, 0);
1765 }
1766 }
1767
1768 if (struct_return) {
1769 struct vreg *addr = vreg_alloc(NULL, NULL, NULL, NULL);
1770
1771 /*
1772 * 06/15/09: icode_make_addrof() apparently happily used stale
1773 * registers for parent struct pointers. Such invalid
1774 * registers can happen if memcpy() is used to pass a struct
1775 * by value. alloc_gpr() used by icode_make_addrof() requires
1776 * a struct type rather than ``function returning struct'', so
1777 * we temporarily set the type list to NULL.
1778 * XXX Can this break in the backend?
1779 */
1780 {
1781 struct reg *r;
1782 /*ii*/ r = make_addrof_structret(struct_lvalue, il);
1783
1784 addr->pregs[0] = r /*ii->dat*/;
1785 addr->size = 4;
1786 }
1787
1788 ii = icode_make_push(addr, il);
1789 append_icode_list(il, ii);
1790
1791 /*
1792 * Adjust amount of bytes allocated; the push above adds
1793 * 4 to it but it's the callee that cleans up the hidden
1794 * pointer, so the count needs to be fixed manually (as
1795 * opposed to having emit_freestack do it.)
1796 * XXX this is very ugly
1797 */
1798 ii = icode_make_adj_allocated(-4);
1799 append_icode_list(il, ii);
1800 free_preg(addr->pregs[0], il, 0, 0);
1801 }
1802
1803
1804 /*
1805 * In the x86 ABI, the caller is responsible for saving
1806 * eax/ecx/edx (but not ebx, esi, edi), so that's what we
1807 * do here
1808 */
1809 backend->invalidate_gprs(il, 1, INV_FOR_FCALL);
1810
1811 if (ty->tlist->type == TN_POINTER_TO) {
1812 /* Need to indirect thru function pointer */
1813 vreg_faultin(NULL, NULL, tmpvr, il, 0);
1814 ii = icode_make_call_indir(tmpvr->pregs[0]);
1815 tmpvr->pregs[0]->used = 0;
1816 tmpvr->pregs[0]->vreg = NULL;
1817 } else {
1818 ii = icode_make_call(ty->name);
1819 if (IS_ASM_RENAMED(ty->flags)) {
1820 ii->hints |= HINT_INSTR_RENAMED;
1821 }
1822 }
1823 append_icode_list(il, ii);
1824 ii = icode_make_freestack(allpushed);
1825 append_icode_list(il, ii);
1826
1827 ret = vreg_alloc(NULL, NULL, NULL, NULL);
1828 ret->type = ty;
1829
1830 /*
1831 * 07/06/2007 What the HELL!??!?! This stuff still did
1832 * if (ty->tlist->next != NULL) {
1833 * to check if the function returns a pointer, not
1834 * taking into account that this could be a call thru
1835 * a function pointer. I thought I had this fixed
1836 * everywhere but apparently it was only done in AMD64
1837 * and all other backends were broken :-(
1838 */
1839 #if 0
1840 if (ty->tlist->next != NULL) {
1841 #endif
1842
1843 if ((ty->tlist->type == TN_POINTER_TO
1844 && ty->tlist->next->next != NULL)
1845 || (ty->tlist->type == TN_FUNCTION
1846 && ty->tlist->next != NULL)) {
1847 /* Must be pointer */
1848 ret->pregs[0] = &x86_gprs[0];
1849 } else {
1850 if (IS_CHAR(ty->code)) {
1851 ret->pregs[0] = x86_gprs[0].composed_of[0]->
1852 composed_of[1];
1853 } else if (IS_SHORT(ty->code)) {
1854 ret->pregs[0] = x86_gprs[0].composed_of[0];
1855 } else if (IS_INT(ty->code)
1856 || IS_LONG(ty->code)
1857 || ty->code == TY_ENUM) { /* XXX */
1858 ret->pregs[0] = &x86_gprs[0];
1859 } else if (IS_LLONG(ty->code)) {
1860 ret->pregs[0] = &x86_gprs[0];
1861 ret->is_multi_reg_obj = 2;
1862 } else if (ty->code == TY_FLOAT
1863 || ty->code == TY_DOUBLE
1864 || ty->code == TY_LDOUBLE) {
1865 if (sysflag == OS_OSX
1866 && ty->code != TY_LDOUBLE) {
1867 ret->pregs[0] = &x86_sse_regs[0];
1868 } else {
1869 ret->pregs[0] = &x86_fprs[0];
1870 }
1871 } else if (ty->code == TY_STRUCT
1872 || ty->code == TY_UNION) {
1873 /*
1874 * 08/16/07: Added this
1875 */
1876 if (ret_is_anon_struct) {
1877 ret = struct_lvalue;
1878 }
1879 ret->struct_ret = 1;
1880 } else if (ty->code == TY_VOID) {
1881 ; /* Nothing! */
1882 }
1883 }
1884
1885 if (ret->pregs[0] != NULL) {
1886 vreg_map_preg(ret, ret->pregs[0]);
1887 if (ret->is_multi_reg_obj) {
1888 vreg_map_preg2(ret, &x86_gprs[3]);
1889 }
1890 }
1891
1892 ret->type = n_xmemdup(ret->type, sizeof *ret->type);
1893 if (ret->type->tlist->type == TN_POINTER_TO) {
1894 copy_tlist(&ret->type->tlist, ret->type->tlist->next->next);
1895 } else {
1896 copy_tlist(&ret->type->tlist, ret->type->tlist->next);
1897 }
1898 if (ret->type->code != TY_VOID || ret->type->tlist) {
1899 ret->size = backend->get_sizeof_type(ret->type, NULL);
1900 }
1901
1902 if (is_x87_trash(ret)) {
1903 /*
1904 * Don't keep stuff in x87 registers, ever!!!
1905 */
1906 free_preg(ret->pregs[0], il, 1, 1);
1907 }
1908 return ret;
1909 }
1910
1911 static int
1912 icode_make_return(struct vreg *vr, struct icode_list *il) {
1913 struct icode_instr *ii;
1914 #if 0
1915 struct type *rtype = curfunc->proto->dtype;
1916
1917 #endif
1918 struct type *rtype = curfunc->rettype;
1919
1920 #if 0
1921 oldtn = curfunc->proto->dtype->tlist;
1922 rtype->tlist = rtype->tlist->next;
1923 #endif
1924 /*
1925 * 08/06/17: We were removing the first typenode, then performed
1926 * the return, then restored the typenode. This is wrong because
1927 * the generated icode may rightly depend on the type being stable
1928 * instead of having it changed behind its back!
1929 */
1930 #if 0
1931 rtype = func_to_return_type(rtype);
1932 #endif
1933
1934 if (vr != NULL) {
1935 if (IS_CHAR(rtype->code)
1936 || IS_SHORT(rtype->code)
1937 || IS_INT(rtype->code)
1938 || IS_LONG(rtype->code)
1939 || rtype->code == TY_ENUM /* 06/15/09: Was missing?!? */
1940 || rtype->tlist != NULL) {
1941 struct reg *r = &x86_gprs[0];
1942 int size = backend->get_sizeof_type(rtype,0);
1943
1944 if (r->size > (unsigned long)size) {
1945 r = get_smaller_reg(r, size);
1946 }
1947 vreg_faultin(r, NULL, vr, il, 0);
1948 } else if (IS_LLONG(rtype->code)) {
1949 vreg_faultin(&x86_gprs[0], &x86_gprs[3],
1950 vr, il, 0);
1951 } else if (rtype->code == TY_FLOAT
1952 || rtype->code == TY_DOUBLE
1953 || rtype->code == TY_LDOUBLE) {
1954 /* Return in st0 */
1955 vreg_faultin_x87(NULL, NULL, vr, il, 0);
1956 } else if (rtype->code == TY_STRUCT
1957 || rtype->code == TY_UNION) {
1958 struct stack_block *sb;
1959 struct vreg *dest;
1960 struct vreg *from_ptr;
1961 static struct decl dec;
1962 struct decl *decp;
1963 unsigned long offset;
1964 static struct type_node tn;
1965
1966 /* Get hidden struct pointer for storing return */
1967 offset = 8; /* Move past ebp,eip */
1968 sb = make_stack_block(offset, 4);
1969 sb->is_func_arg = 1;
1970 dec.stack_addr = sb;
1971 dec.dtype = n_xmemdup(rtype, sizeof *rtype);
1972 decp = n_xmemdup(&dec, sizeof dec);
1973 from_ptr = vreg_alloc(decp, NULL, NULL, NULL);
1974
1975 tn.type = TN_POINTER_TO;
1976 from_ptr->type->tlist = &tn;
1977 from_ptr->size = 4;
1978 vreg_faultin(NULL, NULL, from_ptr, il, 0);
1979
1980 dest = vreg_alloc(NULL, NULL, NULL, NULL);
1981 dest->from_ptr = from_ptr;
1982
1983 /* vr may come from pointer */
1984 vreg_faultin_ptr(vr, il);
1985 icode_make_copystruct(dest, vr, il);
1986 }
1987 }
1988 ii = icode_make_ret(vr);
1989 append_icode_list(il, ii);
1990
1991 #if 0
1992 rtype->tlist = oldtn;
1993 #endif
1994
1995 return 0;
1996 }
1997
1998 /*
1999 * Deal with preparations necessary to make things work with the terrible
2000 * x86 design
2001 */
2002 static void
2003 icode_prepare_op(
2004 struct vreg **dest0,
2005 struct vreg **src0,
2006 int op,
2007 struct icode_list *il) {
2008
2009 struct vreg *dest = *dest0;
2010 struct vreg *src = *src0;
2011
2012 /*
2013 * 05/30/11: This was missing! This function implicitly assumed both
2014 * operands to be register-resident already (e.g. see the eax checks
2015 * below which do not verify that eax is really mapped to the vreg)-
2016 * which was true in most but not all cases. This broke compound
2017 * assignment operators for VLAs, and may have caused bad code
2018 * generation in other cases as well
2019 */
2020 if (!is_floating_type(dest->type)) {
2021 vreg_faultin_protected(dest, NULL, NULL, src, il, 0);
2022 vreg_faultin_protected(src, NULL, NULL, dest, il, 0);
2023 }
2024
2025 /*
2026 * For long long, the preparations below only apply to shifting
2027 */
2028 if (dest->is_multi_reg_obj && op != TOK_OP_BSHL && op != TOK_OP_BSHR) {
2029 return;
2030 }
2031 if (is_floating_type(dest->type)) {
2032 if (backend->arch == ARCH_X86
2033 || dest->type->code == TY_LDOUBLE) {
2034 #if 0
2035 /*
2036 * As we can only write to memory from st0, it is
2037 * desirable to store all results there
2038 */
2039 if (dest->pregs[0] != &x86_fprs[/*0*/ 1]) {
2040 icode_make_x86_fxch(dest->pregs[0],
2041 &x86_fprs[0], il);
2042 }
2043 #endif
2044 return;
2045 } else {
2046 /* Has to be SSE (AMD64) */
2047 return;
2048 }
2049 }
2050
2051 if (op == TOK_OP_DIVIDE || op == TOK_OP_MOD || op == TOK_OP_MULTI) {
2052 /* Destination must be in eax, or rax */
2053
2054 if (backend->arch == ARCH_AMD64 && dest->size == 8) {
2055 if (dest->pregs[0] != &amd64_x86_gprs[0]) {
2056 free_preg(&amd64_x86_gprs[0], il, 1, 1);
2057 vreg_faultin(&amd64_x86_gprs[0], NULL, dest, il,
2058 0);
2059 }
2060 reg_set_unallocatable(&amd64_x86_gprs[3]);
2061 vreg_faultin_protected(dest, /*NULL*/
2062 NULL, NULL, src, il, 0);
2063 reg_set_allocatable(&amd64_x86_gprs[3]);
2064 return;
2065 }
2066 if (dest->pregs[0] != &x86_gprs[0]) {
2067 /*
2068 * 05/20/11: This unconditionally freed eax for AMD64
2069 * too, such that if rax had been in use, it was not
2070 * saved but still marked as in use - which could lead
2071 * to problems later on when both rax and eax were
2072 * in use
2073 */
2074 if (backend->arch == ARCH_AMD64) {
2075 free_preg(&amd64_x86_gprs[0], il, 1, 1);
2076 } else {
2077 free_preg(&x86_gprs[0], il, 1, 1);
2078 }
2079 vreg_faultin(&x86_gprs[0], NULL, dest, il, 0);
2080 }
2081
2082 /*
2083 * 04/13/08: Only load immediate value if there is no
2084 * immediate instruction available!
2085 */
2086 if (src->from_const == NULL
2087 || !backend->have_immediate_op(dest->type, op)) {
2088 /* may not be edx for div */
2089 struct reg *srcreg = NULL;
2090
2091 if (src->pregs[0] && src->pregs[0]->vreg == src) {
2092 if (src->pregs[0] == &x86_gprs[3]) {
2093 /* Have to move it elsewhere */
2094
2095 /*
2096 * 10/31/07: Pass size instead of 0.
2097 * We want a 4 byte reg, but 0 gives
2098 * us a full GPR. That breaks on
2099 * AMD64, where we'll be getting an
2100 * 8 byte reg instead
2101 *
2102 * 08/10/08: This was missing the
2103 * possibility that the source could
2104 * be loaded to eax, which is
2105 * obviously wrong because that's
2106 * the target location! Thus set eax
2107 * unallocatable
2108 * XXX what if it was unallocatable
2109 * before?
2110 */
2111 reg_set_unallocatable(&x86_gprs[0]);
2112 srcreg = ALLOC_GPR(curfunc, /*0*/4,
2113 il, NULL);
2114 reg_set_allocatable(&x86_gprs[0]);
2115 }
2116 }
2117 reg_set_unallocatable(&x86_gprs[3]);
2118 vreg_faultin_protected(dest, /*NULL*/
2119 srcreg, NULL, src, il, 0);
2120 reg_set_allocatable(&x86_gprs[3]);
2121 }
2122
2123 /* edx is trashed in any case - save it */
2124 /*
2125 * 05/20/11: This unconditionally freed edx for AMD64
2126 * too, such that if rdx had been in use, it was not
2127 * saved but still marked as in use - which could lead
2128 * to problems later on when both rdx and edx were
2129 * in use
2130 */
2131 if (backend->arch == ARCH_AMD64) {
2132 free_preg(&amd64_x86_gprs[3], il, 1, 1);
2133 } else {
2134 free_preg(&x86_gprs[3], il, 1, 1);
2135 }
2136 } else if ((op == TOK_OP_BSHL || op == TOK_OP_BSHR)
2137 && (src->from_const == NULL
2138 || !backend->have_immediate_op(dest->type, op))) {
2139 /*
2140 * Source must be in cl
2141 *
2142 * 04/13/08: Only load immediate value if there is no
2143 * immediate instruction available!
2144 */
2145 struct reg *reg_cl;
2146
2147 reg_cl = x86_gprs[2]
2148 .composed_of[0]
2149 ->composed_of[1];
2150 if (src->pregs[0] != reg_cl
2151 || reg_cl->vreg != src) {
2152 /*
2153 * 05/20/11: This unconditionally freed ecx for
2154 * AMD64
2155 */
2156 if (backend->arch == ARCH_AMD64) {
2157 free_preg(&amd64_x86_gprs[2], il, 1, 1);
2158 } else {
2159 free_preg(&x86_gprs[2], il, 1, 1);
2160 }
2161
2162 if (src->is_multi_reg_obj) {
2163 reg_set_unallocatable(&x86_gprs[2]);
2164 src = backend->icode_make_cast(src,
2165 make_basic_type(TY_CHAR), il);
2166 *src0 = src;
2167 reg_set_allocatable(&x86_gprs[2]);
2168 }
2169
2170 /*
2171 * Need to ensure that the operand is loaded
2172 * correctly regardless of its size.
2173 * XXX this is really nasty, perhasp we should
2174 * demand that callers guarantee a byte-sized
2175 * or word-sized vreg?!
2176 */
2177 if (src->size == 1) {
2178 vreg_faultin(reg_cl, NULL, src, il, 0);
2179 } else if (src->size == 2) {
2180 vreg_faultin(x86_gprs[2].composed_of[0],
2181 NULL, src, il, 0);
2182 } else {
2183 /*
2184 * 05/20/11: This did not distinguish
2185 * between x86 and AMD64, such that
2186 * ecx was always used even for 64bit
2187 * integers (resulting in assembler
2188 * errors)
2189 */
2190 if (backend->arch == ARCH_AMD64
2191 && src->size == 8) {
2192 vreg_faultin(&amd64_x86_gprs[2],
2193 NULL, src, il, 0);
2194 } else {
2195 vreg_faultin(&x86_gprs[2],
2196 NULL, src, il, 0);
2197 }
2198 }
2199 vreg_faultin_protected(src, NULL, NULL,
2200 dest, il, 0);
2201 }
2202 }
2203 }
2204
2205
2206 static void
2207 change_preg_size(
2208 struct vreg *vr,
2209 struct icode_list *il,
2210 struct type *to,
2211 struct type *from);
2212
2213 /*
2214 * ,==x87==x87====x87=======x87===============x87===========,
2215 * |~~*,.,*~{ 80x87 FLOATING POINT KLUDGERY DELUXE }~*,.,*~~|
2216 * `======87=87=87===========87=======87==87==========87===='
2217 *
2218 * ``struct vreg floatbuf'' is used as buffer to convert between integers
2219 * and floats because:
2220 * - fild cannot take a GPR or immediate operand
2221 * - fstp can only write to memory too
2222 *
2223 * ``struct vreg x87cw_new'' and ``struct vreg x87cw_old'' are used as buffers
2224 * for storing the x87 status control word. Converting a floating point value
2225 * to an integer with x87 by default rounds mathematically. However, in C,
2226 * ``(int)fp_value'' is required to *truncate* the fractional part. The x87
2227 * control word therefore has to be changed before and restored after
2228 * performing the fp-to-integer conversion in order to make the thing behave as
2229 * desired. (I couldn't believe it when I first saw the code gcc generated for
2230 * this exercise.)
2231 */
2232 #if ! REMOVE_FLOATBUF
2233 static void
2234 #else
2235 static struct vreg *
2236 #endif
2237 load_floatbuf(struct vreg *data,
2238 #if REMOVE_FLOATBUF
2239 struct type *from,
2240 #endif
2241 struct icode_list *il
2242 #if REMOVE_FLOATBUF
2243 , int is_int
2244 #endif
2245 ) {
2246
2247
2248 #if ! REMOVE_FLOATBUF
2249 if (floatbuf.var_backed == NULL) {
2250 /* Not allocated yet */
2251 static struct decl dec;
2252 static struct type ty;
2253
2254 ty = *make_basic_type(/*TY_INT*/TY_LLONG);
2255 dec.dtype = &ty;
2256 ty.name = "_Floatbuf";
2257 floatbuf.var_backed = &dec;
2258 floatbuf.type = &ty;
2259 if (backend->arch == ARCH_AMD64) {
2260 /*
2261 * Wow this used data->pregs[0]->size, which for an
2262 * x87 fpr was 12... So stores to it did
2263 * movt val, _Floatbuf
2264 * I guess 8 is invalid too? So use 4 always
2265 */
2266 floatbuf.size = 4 ; /*data->pregs[0]->size;*/
2267 } else {
2268 floatbuf.size = 4; /* XXX long long :( */
2269 }
2270 }
2271 vreg_map_preg(&floatbuf, data->pregs[0]);
2272 tmp = n_xmemdup(&floatbuf, sizeof floatbuf);
2273 icode_make_store(curfunc, &floatbuf, &floatbuf, il);
2274 #else
2275 /* REMOVE_FLOATBUF is set */
2276 static struct vreg vr;
2277 struct vreg *resvr;
2278 int res_type_changed_to_64bit = 0; /* 06/15/08: Was 1!! */
2279
2280 if (from->code < /*TY_INT*/ TY_LLONG) {
2281 /*
2282 * Smaller than int isn't possible - must have been
2283 * promoted
2284 */
2285 from = make_basic_type( /*TY_INT*/ TY_LLONG);
2286 }
2287
2288 vr.type = from;
2289 vr.size = backend->get_sizeof_type(from, NULL);
2290
2291 vr.is_multi_reg_obj = data->is_multi_reg_obj;
2292 vr.pregs[0] = data->pregs[0];
2293 vr.pregs[1] = data->pregs[1];
2294
2295 resvr = vreg_alloc(NULL,NULL,NULL,NULL);
2296 *resvr = vr;
2297
2298 if ((IS_INT(data->type->code) || IS_LONG(data->type->code))
2299 && data->type->sign == TOK_KEY_UNSIGNED) {
2300 /*
2301 * 06/08/08: Unsigned integers require storing as 64bit
2302 */
2303 vreg_set_new_type(resvr, make_basic_type(TY_LLONG));
2304 res_type_changed_to_64bit = 1;
2305 }
2306
2307 vreg_map_preg(resvr, data->pregs[0]);
2308
2309 /*
2310 * 06/15/08: Multi-register mapping was incorrectly done for
2311 * fp-to-int conversion, but is only correct the other way
2312 * around!
2313 */
2314 if (resvr->is_multi_reg_obj
2315 && !res_type_changed_to_64bit
2316 && is_int) {
2317 vreg_map_preg2(resvr, data->pregs[1]);
2318 }
2319
2320 if (IS_FLOATING(from->code)) {
2321 /* Save and convert */
2322 /*
2323 vreg_stack_alloc() doesn't work because it doesn't
2324 immediately give us a stack_block which can be
2325 assigned to other vregs too
2326 resvr = vreg_stack_alloc(from, il, 1, NULL);*/
2327 resvr->stack_addr = icode_alloc_reg_stack_block(curfunc, resvr->size);
2328 /*
2329 * 06/15/08: Always use data->type instead of resvr->type!
2330 * resvr->type is the source type...?
2331 */
2332
2333 icode_make_x86_fist(resvr->pregs[0], resvr,
2334 (res_type_changed_to_64bit
2335 /*&& is_integral_type(resvr->type)*/)?
2336
2337 resvr->type: data->type, il);
2338 } else {
2339 /* free_preg(resvr->pregs[0], il, 1, 1);*/
2340 icode_make_store(curfunc, resvr, resvr, il);
2341 }
2342
2343 /* Yawn, another duplication to ensure the multi gpr flag is
2344 * preserved for the stores above
2345 *
2346 * 06/08/08: This is now actually beneficial because if we stored
2347 * an unsigned 32bit integer to a 64bit storage block (which is
2348 * necessary to convert large values such as UINT_MAX correctly),
2349 * then we can now set the type of that block to ``unsigned int'',
2350 * thus ensuring that the subsequent load only looks at the lower
2351 * double-word
2352 */
2353 /* return dup_vreg(resvr);*/
2354 resvr = dup_vreg(resvr);
2355 vreg_set_new_type(resvr, data->type);
2356 return resvr;
2357 #endif /* REMOVE_FLOATBUF */
2358 }
2359
2360 #if REMOVE_FLOATBUF
2361
2362 static struct vreg *
2363 load_integer_floatbuf(struct vreg *data, struct type *from,
2364 struct icode_list *il) {
2365
2366 return load_floatbuf(data, from, il, 1);
2367 }
2368
2369
2370 static struct vreg *
2371 load_floatval_floatbuf(struct vreg *data, struct type *from,
2372 struct icode_list *il) {
2373
2374 return load_floatbuf(data, from, il, 0);
2375 }
2376
2377 #endif
2378
2379
2380 /* Save FPU CW to memory */
2381 static void
2382 store_x87cw(struct icode_list *il) {
2383 if (x87cw_old.var_backed == NULL) {
2384 /* Not allocated yet */
2385 static struct decl dec_old;
2386 static struct decl dec_new;
2387 static struct type ty_old;
2388 static struct type ty_new;
2389
2390 ty_old = *make_basic_type(TY_SHORT);
2391 ty_old.name = "_X87CW_old";
2392 dec_old.dtype = &ty_old;
2393 x87cw_old.var_backed = &dec_old;
2394 x87cw_old.type = &ty_old;
2395 x87cw_old.size = 2;
2396
2397 ty_new = *make_basic_type(TY_SHORT);
2398 ty_new.name = "_X87CW_new";
2399 dec_new.dtype = &ty_new;
2400 x87cw_new.var_backed = &dec_new;
2401 x87cw_new.type = &ty_new;
2402 x87cw_new.size = 2;
2403 }
2404 icode_make_x86_store_x87cw(&x87cw_old, il);
2405 }
2406
2407 /* Create modified copy of in-memory CW */
2408 static void
2409 modify_x87cw(struct icode_list *il) {
2410 struct reg *r;
2411 struct icode_instr *ii;
2412
2413 r = alloc_16_or_32bit_noesiedi(curfunc, 2, il, NULL);
2414 vreg_faultin(r, NULL, &x87cw_old, il, 0);
2415 vreg_map_preg(&x87cw_new, r);
2416 ii = icode_make_setreg(r->composed_of[0], 12);
2417 append_icode_list(il, ii);
2418 icode_make_store(curfunc, &x87cw_new, &x87cw_new, il);
2419 r->used = 0;
2420 }
2421
2422 /* Load CW from memory */
2423 static void
2424 load_x87cw(struct vreg *which, struct icode_list *il) {
2425 icode_make_x86_load_x87cw(which, il);
2426 }
2427
2428
2429
2430 #define AMD64_OR_X86_REG(idx) \
2431 (backend->arch == ARCH_AMD64? &amd64_x86_gprs[idx]: &x86_gprs[idx])
2432
2433 static void
2434 change_preg_size(
2435 struct vreg *vr,
2436 struct icode_list *il,
2437 struct type *to,
2438 struct type *from) {
2439
2440 int i;
2441 struct reg *extreg = NULL;
2442 struct icode_instr *ii;
2443 size_t from_size;
2444 int amd64_reg = 0;
2445
2446 from_size = backend->get_sizeof_type(from, NULL);
2447
2448 for (i = 0; i < N_GPRS; ++i) {
2449 if (is_member_of_reg(AMD64_OR_X86_REG(i), vr->pregs[0])) {
2450 break;
2451 }
2452 }
2453 if (i == N_GPRS) {
2454 if (backend->arch == ARCH_AMD64) {
2455 amd64_reg = 1;
2456 } else {
2457 printf("FATAL ERROR: %s is not member of any gpr\n",
2458 vr->pregs[0]->name);
2459 abort();
2460 }
2461 }
2462
2463
2464 if (vr->size > from_size
2465 && (!IS_LLONG(to->code) || from_size != 4)) {
2466 /*
2467 * A sub register is extended to a bigger register
2468 */
2469 vr->pregs[0]->used = 0;
2470 if (i < N_GPRS && reg_unused(AMD64_OR_X86_REG(i))) {
2471 /* Use parent reg, e.g. movsx ax, al */
2472 if (backend->arch == ARCH_AMD64
2473 && vr->size == 8) {
2474 ;
2475 } else {
2476 extreg = vr->size == 4 || vr->size == 8?
2477 &x86_gprs[i]: x86_gprs[i].composed_of[0];
2478 }
2479 } else {
2480 /* Use unrelated reg */
2481 size_t size;
2482
2483 if (vr->size == 8 && backend->arch != ARCH_AMD64) {
2484 size = 4;
2485 } else {
2486 size = vr->size;
2487 }
2488 if (from_size == 1) {
2489 extreg = backend->alloc_16_or_32bit_noesiedi
2490 (curfunc, size, il, NULL);
2491 } else {
2492 extreg = ALLOC_GPR(curfunc, size, il, NULL);
2493 }
2494 }
2495 }
2496
2497 if (vr->size == 2) {
2498 if (from_size == 1) {
2499 free_preg(vr->pregs[0], il, 1, 0);
2500 icode_make_copyreg(extreg, vr->pregs[0], to, from, il);
2501 vreg_map_preg(vr, extreg);
2502 } else if (from_size == 4) {
2503 /* 4 - truncate */
2504 free_preg(vr->pregs[0], il, 1, 0);
2505 vreg_map_preg(vr, vr->pregs[0]->composed_of[0]);
2506 } else if (from_size == 8) {
2507 /* long long or long on amd64 */
2508 if (backend->arch == ARCH_X86) {
2509 free_preg(vr->pregs[0], il, 1, 0);
2510 free_preg(vr->pregs[1], il, 1, 0);
2511 vreg_map_preg(vr, vr->pregs[0]->composed_of[0]);
2512 } else {
2513 free_preg(vr->pregs[0], il, 1, 0);
2514 vreg_map_preg(vr,
2515 vr->pregs[0]
2516 ->composed_of[0]
2517 ->composed_of[0]);
2518 }
2519 }
2520 } else if (vr->size == 4) {
2521 if (from_size == 8) {
2522 /*
2523 * long long! Truncate - low-order 32bits are in
2524 * first preg, on x86
2525 */
2526 if (backend->arch == ARCH_X86) {
2527 free_preg(vr->pregs[1], il, 1, 0);
2528 vreg_map_preg(vr, vr->pregs[0]);
2529 } else {
2530 free_preg(vr->pregs[0], il, 1, 0);
2531 vreg_map_preg(vr, vr->pregs[0]->
2532 composed_of[0]);
2533 }
2534 } else {
2535 /* extend */
2536 /*
2537 * Is this sub register the only used one? If not,
2538 * the other one must be saved
2539 */
2540 icode_make_copyreg(extreg, vr->pregs[0], to, from, il);
2541 free_preg(vr->pregs[0], il, 1, 0);
2542 vreg_map_preg(vr, extreg);
2543 }
2544 } else if (vr->size == 8) {
2545 /* long long! */
2546 if (backend->arch == ARCH_AMD64) {
2547 if (extreg == NULL) {
2548 extreg = ALLOC_GPR(curfunc, 0, il, NULL);
2549 }
2550 free_preg(vr->pregs[0], il, 1, 0);
2551 icode_make_copyreg(extreg, vr->pregs[0], to, from, il);
2552 vreg_map_preg(vr, extreg);
2553 return;
2554 }
2555
2556 #if 0
2557 if (to->code == TY_ULLONG) {
2558 if (extreg != NULL) {
2559 icode_make_copyreg(extreg, vr->pregs[0],
2560 to, from, il);
2561 vreg_map_preg(vr, extreg);
2562 } else {
2563 /*
2564 * dword being converted to long long -
2565 * keep mapping
2566 */
2567 vreg_map_preg(vr, vr->pregs[0]);
2568 }
2569 reg_set_unallocatable(vr->pregs[0]);
2570 r = ALLOC_GPR(curfunc, 4, il, NULL);
2571 reg_set_allocatable(vr->pregs[0]);
2572 vreg_map_preg2(vr, r);
2573 ii = icode_make_setreg(r, 0);
2574 append_icode_list(il, ii);
2575 } else {
2576 #endif
2577 /* signed long long */
2578 if (vr->pregs[0] != &x86_gprs[0]) {
2579 if (!reg_unused(&x86_gprs[0])) {
2580 free_preg(&x86_gprs[0], il, 1, 1);
2581 }
2582
2583 /* Source may be associated with a variable?! */
2584 free_preg(vr->pregs[0], il, 1, 0);
2585 icode_make_copyreg(&x86_gprs[0], vr->pregs[0],
2586 from, from, il);
2587 }
2588 if (!reg_unused(&x86_gprs[3])) {
2589 free_preg(&x86_gprs[3], il, 1, 1);
2590 }
2591 if (from->sign == TOK_KEY_SIGNED) {
2592 icode_make_x86_cdq(il);
2593 } else {
2594 ii = icode_make_setreg(&x86_gprs[3], 0);
2595 append_icode_list(il, ii);
2596 }
2597 vreg_map_preg(vr, &x86_gprs[0]);
2598 vreg_map_preg2(vr, &x86_gprs[3]);
2599 #if 0
2600 }
2601 #endif
2602 } else {
2603 /* Must be 1 - truncate */
2604 struct reg *r;
2605
2606
2607 if (backend->arch == ARCH_AMD64
2608 && from_size == 8) {
2609 free_preg(vr->pregs[0], il, 1, 0);
2610 vreg_map_preg(vr, vr->pregs[0]->
2611 composed_of[0]-> /* 32bit */
2612 composed_of[0]-> /* 16bit */
2613 composed_of[amd64_reg? 0: 1]);
2614 return;
2615 }
2616
2617 /*
2618 * 08/18/08: This was missing the check for amd64_reg, so it
2619 * would fail for i = 6
2620 */
2621 if (i >= 4 && !amd64_reg) {
2622 /*
2623 * Whoops - source resides in esi/edi, which
2624 * do not have 8bit sub registers
2625 */
2626 free_preg(&x86_gprs[i], il, 1, 0);
2627 /*
2628 * 06/20/08: This used to pass the source size, i.e.
2629 * possibly 8 for long long! This was wrong because
2630 * it ended up setting multi-reg state in the backend
2631 * and expecting a second alloc_*() for the second
2632 * dword. This is wrong because we only want a single
2633 * 32bit part register
2634 */
2635 r = alloc_16_or_32bit_noesiedi(curfunc,
2636 /*from_size*/4, il, NULL);
2637 icode_make_copyreg(r, &x86_gprs[i], from, from, il);
2638 free_preg(r, il, 1, 0);
2639 } else if (!amd64_reg) {
2640 r = &x86_gprs[i];
2641 free_preg(r, il, 1, 0);
2642 } else {
2643 /*
2644 * 08/18/08: This was missing?!!?!?!??
2645 */
2646 r = vr->pregs[0];
2647 }
2648 #if 0
2649 free_pregs_vreg(vr, il, 1, 0);
2650 #endif
2651
2652 /*
2653 * 09/30/07: Wow, this unconditionally assumed that
2654 * r is a 32bit register! That broke short-to-char
2655 * conversion, but apparently only in some cases
2656 *
2657 * XXX we have to use r->size instead of from_size
2658 * here... otherwise e.g. on AMD64 a
2659 *
2660 * *charp++ = *ushortp;
2661 *
2662 * ... assignment gives the short source value in
2663 * eax, which may be a conversion/promotion issue
2664 */
2665 if (r->size >= 4) {
2666 if (r->size == 8 && from_size == 8) {
2667 /* AMD64 */
2668 vreg_map_preg(vr,
2669 r->
2670 composed_of[0]->
2671 composed_of[0]->
2672 /* was missing amd64_reg case */
2673 composed_of[amd64_reg? 0: 1]);
2674 } else {
2675 vreg_map_preg(vr,
2676 r->
2677 composed_of[0]->
2678 /* was missing amd64_reg case */
2679 composed_of[amd64_reg? 0: 1]);
2680 }
2681 } else {
2682 /* was missing amd64_reg case */
2683 vreg_map_preg(vr,
2684 r->composed_of[amd64_reg? 0: 1]);
2685 }
2686 }
2687 }
2688
2689
2690 static int
2691 convert_amd64_fp(
2692 struct type *to,
2693 struct type *from,
2694 struct vreg *ret,
2695 struct icode_list *il) {
2696
2697 struct vreg *fbvr;
2698 int rc = 0;
2699
2700 if (from->code == to->code) {
2701 /*
2702 * 07/29/08: This didn't return the ``is long double''
2703 * indicator, so the x87 register was not freed and the
2704 * register stack filled up
2705 *
2706 * XXX This raises the question of why can a conversion
2707 * of type T to itself (no-op) get this far and doesn't
2708 * cause a very early return in icode_make_cast()?
2709 */
2710 if (to->code == TY_LDOUBLE) {
2711 return 1;
2712 }
2713 return 0;
2714 }
2715
2716 if (from->code == TY_LDOUBLE) {
2717 /*
2718 * long double, resident in an x87 register, to
2719 * float or double
2720 */
2721
2722
2723 #if ! REMOVE_FLOATBUF
2724 fbvr = n_xmemdup(&floatbuf, sizeof floatbuf);
2725 #else
2726 fbvr = vreg_alloc(NULL,NULL,NULL,NULL);
2727 #endif
2728
2729 if (to->code == TY_DOUBLE) {
2730 fbvr->size = 8;
2731 fbvr->type = make_basic_type(TY_DOUBLE);
2732 } else {
2733 /* float */
2734 fbvr->size = 4;
2735 fbvr->type = make_basic_type(TY_FLOAT);
2736 }
2737 vreg_map_preg(fbvr, ret->pregs[0]);
2738 #if ! REMOVE_FLOATBUF
2739 icode_make_store(curfunc, fbvr, fbvr, il);
2740 free_preg(fbvr->pregs[0], il, 1, 0);
2741 #else
2742 free_preg(fbvr->pregs[0], il, 1, 1);
2743 #endif
2744
2745 /* Now into SSE register */
2746 vreg_faultin(NULL, NULL, fbvr, il, 0);
2747 vreg_map_preg(ret, fbvr->pregs[0]);
2748 } else if (to->code == TY_LDOUBLE) {
2749 /*
2750 * float or double, resident in an SSE register, to
2751 * long double
2752 */
2753 struct reg *r;
2754 struct vreg *tmp;
2755
2756 #if 0
2757 r = backend->alloc_fpr(curfunc, 12, il, NULL);
2758 #endif
2759 r = &x86_fprs[0];
2760
2761 /*
2762 * 04/12/08: Fixed this
2763 */
2764 tmp = dup_vreg(ret);
2765 tmp->type = from;
2766 tmp->size = backend->get_sizeof_type(from, NULL);
2767 vreg_map_preg(tmp, ret->pregs[0]);
2768
2769 free_preg(/*ret->pregs[0]*/tmp->pregs[0], il, 1, 1); /* causes store */
2770 /*
2771 * XXX hmm another temp adhoc vars :(
2772 * There are lots of problems because we always
2773 * work with ``ret'' which already has the target
2774 * type set. We should use the source vreg more,
2775 * which ahs the correct ype for loading
2776 */
2777 tmp = dup_vreg(tmp /*, sizeof *ret*/);
2778 tmp->type = from;
2779 tmp->size = backend->get_sizeof_type(from, NULL);
2780
2781 vreg_faultin_x87(r, NULL, tmp, il, 0);
2782 vreg_map_preg(ret, r);
2783 rc = 1;
2784 } else if (to->code == TY_DOUBLE) {
2785 icode_make_amd64_cvtss2sd(ret->pregs[0], il);
2786 } else { /* double to float */
2787 icode_make_amd64_cvtsd2ss(ret->pregs[0], il);
2788 }
2789 return rc;
2790 }
2791
2792 /*
2793 * Most of the time, instructions give meaning to data. This function
2794 * generates code required to convert virtual register ``src'' to type
2795 * ``to'' where necessary
2796 */
2797 static struct vreg *
2798 icode_make_cast(struct vreg *src, struct type *to, struct icode_list *il) {
2799 struct reg *r;
2800 struct reg *r2;
2801 struct vreg *ret;
2802 struct type *from = src->type;
2803 struct type *orig_to = to;
2804 size_t size;
2805 int res_is_x87_reg = 0;
2806
2807 ret = src;
2808 if (ret->pregs[0] != NULL
2809 && ret->pregs[0]->vreg == ret) {
2810 /* Item is already resident in a register */
2811 r = NULL;
2812 } else {
2813 /*
2814 * Item is not resident yet so we get to choose
2815 * a suitable register
2816 */
2817 #if 0
2818 if (IS_FLOATING(to->code)) {
2819 r = backend->alloc_fpr(curfunc, 0, il, NULL);
2820 } else {
2821 size = backend->get_sizeof_type(to, NULL);
2822 r = backend->alloc_gpr(curfunc, size, il, NULL);
2823 }
2824 #endif
2825 r = 0;
2826 }
2827
2828 if (is_x87_trash(ret)) {
2829 ret = x87_anonymify(ret, il);
2830 if (ret == src) {
2831 ret = n_xmemdup(ret, sizeof *ret);
2832 }
2833 } else {
2834 if (ret->type->tlist != NULL
2835 || (ret->type->code != TY_STRUCT
2836 && ret->type->code != TY_UNION)) {
2837 vreg_anonymify(&ret, NULL, NULL /*r*/, il);
2838 }
2839
2840 if (ret == src) {
2841 /* XXX anonymify is broken */
2842 ret = vreg_disconnect(src);
2843 }
2844 }
2845
2846 ret->type = to;
2847
2848 if (to->code == TY_VOID) {
2849 if (to->tlist == NULL) {
2850 ret->size = 0;
2851 free_pregs_vreg(ret, il, 0, 0);
2852 return ret;
2853 }
2854 } else {
2855 ret->is_nullptr_const = 0;
2856 }
2857
2858 ret->size = backend->get_sizeof_type(to, NULL);
2859
2860 if (from->tlist != NULL && to->tlist != NULL) {
2861 /*
2862 * Pointers are always of same size
2863 * and use same registers
2864 */
2865 return ret;
2866 } else if (to->tlist != NULL) {
2867 /*
2868 * Integral type to pointer type - cast to
2869 * uintptr_t to get it to the same size
2870 */
2871 to = backend->get_uintptr_t();
2872 }
2873
2874
2875 /*
2876 * We may have to move the item to a different
2877 * register as a result of the conversion
2878 */
2879 if (is_floating_type(to)) {
2880 if (!is_floating_type(from)) {
2881 int from_size;
2882
2883 from_size = backend->get_sizeof_type(from, NULL);
2884 /*
2885 * 04/17/08: Convert to 64bit integer, so that
2886 * 64bit fildq is used instead of 32bit fild!
2887 * This is necessary for large (unsigned) 32bit
2888 * values that are otherwise not converted
2889 * properly
2890 */
2891 if (from_size < 8) {
2892 /* Need to sign-extend first*/
2893 struct vreg *tmp =
2894 n_xmemdup(ret, sizeof *ret);
2895 tmp->size = 8;
2896 change_preg_size(tmp, il, /*to*/
2897 make_basic_type(TY_LLONG), from);
2898 ret = n_xmemdup(ret, sizeof *ret);
2899 vreg_map_preg(ret, tmp->pregs[0]);
2900 if (backend->arch == ARCH_X86) {
2901 vreg_map_preg2(ret, tmp->pregs[1]);
2902 }
2903 ret->type = make_basic_type(TY_LLONG);
2904 ret->size = 8;
2905
2906 /*
2907 * 07/24/08: This wrongly set the multi-reg
2908 * flag for AMD64 as well
2909 */
2910 if (backend->arch != ARCH_AMD64) {
2911 ret->is_multi_reg_obj = 2;
2912 }
2913 from = ret->type;
2914 }
2915
2916 /*
2917 * 08/04/08: Don't perform x86-like u-integer to long
2918 * double conversion for 64bit integers on AMD64
2919 * anymore
2920 */
2921 if (backend->arch == ARCH_X86
2922 && from->code == TY_ULLONG) {
2923 /*
2924 * 08/05/09: Request 4 bytes instead of 8.
2925 * 8 byte requests are always treated as
2926 * multi-register requests, but we only
2927 * want to allocate a single register
2928 * (since we already have ret->pregs[0]).
2929 * So the next ALLOC_GPR() - which may be
2930 * for a 16bit or 8bit item - would
2931 * wrongly return a 32bit GPR
2932 */
2933 struct reg *temp =
2934 ALLOC_GPR(curfunc, /*8*/4, il, NULL);
2935 struct vreg *tempfb;
2936
2937 r = backend->alloc_fpr(curfunc,
2938 0, il, NULL);
2939
2940 tempfb = dup_vreg(ret);
2941 vreg_map_preg(tempfb, ret->pregs[0]);
2942 vreg_map_preg2(tempfb, ret->pregs[1]);
2943 vreg_set_new_type(tempfb, from);
2944 free_preg(ret->pregs[0], il, 1, 1);
2945 icode_make_x86_fild(r, tempfb, il);
2946
2947 icode_make_amd64_ulong_to_float(
2948 ret->pregs[1], /* pass upper dword as source reg */
2949 temp,
2950 r,
2951 to->code, /* is float */
2952 il);
2953 free_preg(temp, il, 1, 0);
2954 free_preg(ret->pregs[0], il, 1, 0);
2955 vreg_map_preg(ret, r);
2956 res_is_x87_reg = 1;
2957 } else if (backend->arch == ARCH_X86
2958 || (to->code == TY_LDOUBLE
2959 && (ret->pregs[0]->size <= 4
2960 || from->sign != TOK_KEY_UNSIGNED))) {
2961 /* x87 kludgery */
2962 #if ! REMOVE_FLOATBUF
2963 load_floatbuf(ret, il);
2964 free_preg(ret->pregs[0], il, 1, 0);
2965 if (ret->is_multi_reg_obj) {
2966 free_preg(ret->pregs[1], il, 1, 0);
2967 }
2968 #else
2969 struct vreg *tempfb =
2970 load_integer_floatbuf(ret,from,il);
2971
2972 free_preg(tempfb->pregs[0], il, 1, 0);
2973 if (tempfb->is_multi_reg_obj) {
2974 free_preg(tempfb->pregs[1], il, 1, 0);
2975 }
2976
2977 tempfb->is_multi_reg_obj = 0;
2978 #endif
2979 #if 0
2980 r = backend->alloc_fpr(curfunc, 0, il, NULL);
2981 #endif
2982 r = &x86_fprs[0];
2983 #if ! REMOVE_FLOATBUF
2984 floatbuf.pregs[0] = NULL;
2985 vreg_faultin_x87(r, NULL, &floatbuf, il, 0);
2986 free_preg(ret->pregs[0], il, 1, 0);
2987 #else
2988 tempfb->pregs[0] = NULL;
2989 /* vreg_faultin_x87(r, NULL, tempfb, il, 0);*/
2990 tempfb = dup_vreg(tempfb);
2991 vreg_set_new_type(tempfb, from);
2992 icode_make_x86_fild(r, tempfb, il);
2993
2994 #endif
2995 ret = dup_vreg(ret);
2996 vreg_map_preg(ret, r);
2997 ret->size = backend->get_sizeof_type(to, NULL);
2998 res_is_x87_reg = 1;
2999 ret->stack_addr = NULL;
3000 } else {
3001 /*
3002 * SSE (AMD64) integer to floating point
3003 * conversion
3004 */
3005 if (ret->pregs[0]->size > 4) {
3006 /*
3007 * 64bit int to fp conversion.
3008 *
3009 * 04/11/08: Use qword SSE
3010 * instructions instead of the
3011 * utter x87 nonsense. There was
3012 * a comment here that said 64bit
3013 * conv instructions don't exist,
3014 * maybe they were overlooked?
3015 */
3016 if (to->code == TY_LDOUBLE) {
3017 struct reg *temp =
3018 ALLOC_GPR(curfunc, 8, il, NULL);
3019 struct vreg *tempfb;
3020
3021 /*
3022 * Note that we can only get
3023 * here for unsigned 64bit
3024 * integers
3025 */
3026 r = backend->alloc_fpr(curfunc,
3027 16, il, NULL);
3028 /*
3029 * 08/02/08: Unsigned long to
3030 * float is a bit more
3031 * complicated than we made it
3032 * out to be
3033 */
3034 /* free_preg(ret->pregs[0], il, 1, 1);*/
3035 tempfb = dup_vreg(ret);
3036 vreg_map_preg(tempfb, ret->pregs[0]);
3037 vreg_set_new_type(tempfb, from);
3038 free_preg(ret->pregs[0], il, 1, 1);
3039 icode_make_x86_fild(r, tempfb, il);
3040
3041 icode_make_amd64_ulong_to_float(
3042 ret->pregs[0],
3043 temp,
3044 r,
3045 TY_LDOUBLE, /* is double */
3046 il);
3047 free_preg(temp, il, 1, 0);
3048 vreg_map_preg(ret, r);
3049 ret->size = backend->get_sizeof_type(to, NULL);
3050 res_is_x87_reg = 1;
3051 } else if (to->code == TY_DOUBLE) {
3052 r = backend->alloc_fpr(curfunc,
3053 8, il, NULL);
3054 if (from->sign == TOK_KEY_UNSIGNED) {
3055 /*
3056 * 08/02/08: Unsigned long to
3057 * float is a bit more
3058 * complicated than we made it
3059 * out to be
3060 */
3061 struct reg *temp =
3062 ALLOC_GPR(curfunc, 8, il, NULL);
3063 icode_make_amd64_ulong_to_float(
3064 ret->pregs[0],
3065 temp,
3066 r,
3067 TY_DOUBLE, /* is double */
3068 il);
3069 free_preg(temp, il, 1, 0);
3070 } else {
3071 icode_make_amd64_cvtsi2sdq(
3072 r, ret, il);
3073 }
3074 } else {
3075 /* Has to be float */
3076 /*
3077 * 08/02/08: Unsigned long to
3078 * float is a bit more
3079 * complicated than we made it
3080 * out to be
3081 */
3082 r = backend->alloc_fpr(curfunc,
3083 4, il, NULL);
3084
3085
3086 if (from->sign == TOK_KEY_UNSIGNED) {
3087 struct reg *temp =
3088 ALLOC_GPR(curfunc, 8, il, NULL);
3089 icode_make_amd64_ulong_to_float(
3090 ret->pregs[0],
3091 temp,
3092 r,
3093 TY_FLOAT, /* is float */
3094 il);
3095 free_preg(temp, il, 1, 0);
3096 } else {
3097 icode_make_amd64_cvtsi2ssq(
3098 r, ret, il);
3099 }
3100 }
3101 free_preg(ret->pregs[0], il, 1, 0);
3102 vreg_map_preg(ret, r);
3103 } else {
3104 if (to->code == TY_DOUBLE) {
3105 r = backend->alloc_fpr(curfunc,
3106 backend->get_sizeof_type
3107 (to, NULL), il, NULL);
3108 icode_make_amd64_cvtsi2sd(
3109 r, ret, il);
3110 res_is_x87_reg = 1;
3111 } else {
3112 /* Has to be float */
3113 r = backend->alloc_fpr(curfunc,
3114 ret->size, il, 0);
3115 icode_make_amd64_cvtsi2ss(
3116 r, ret, il);
3117 }
3118 free_preg(ret->pregs[0], il, 1, 0);
3119 vreg_map_preg(ret, r);
3120 }
3121 }
3122 } else if (backend->arch == ARCH_AMD64
3123 || sysflag == OS_OSX) {
3124 /*
3125 * On AMD64, the item may be in an x87 or
3126 * SSE register, and has to be moved into
3127 * SSE or x87, respectively
3128 */
3129 if (is_x87_trash(src)) {
3130 vreg_faultin_x87(NULL, NULL, src, il, 0);
3131 vreg_map_preg(ret, src->pregs[0]);
3132 #if 0
3133 free_preg(vrtmp->pregs[0], il, 1, 1);
3134 #endif
3135 }
3136 res_is_x87_reg = convert_amd64_fp(to, from, ret, il);
3137 } else {
3138 /*
3139 * x87 to x87... this is not a no-op anymore! Because:
3140 * the source fp value is stored on the stack, so we
3141 * have to load it to a register and create a new
3142 * stack buffer of different size to store it
3143 * (remember we never want to keep stuff in x87 regs)
3144 */
3145 struct vreg *vrtmp;
3146
3147 vreg_faultin_x87(NULL, NULL, src, il, 0);
3148 vrtmp = vreg_alloc(NULL,NULL,NULL,NULL);
3149 vrtmp->type = to;
3150 vrtmp->size = backend->get_sizeof_type(to, NULL);
3151 vreg_map_preg(vrtmp, src->pregs[0]);
3152 free_preg(vrtmp->pregs[0], il, 1, 1);
3153 ret = vrtmp;
3154 }
3155 } else if (is_floating_type(from)) {
3156 if (!is_floating_type(to)) {
3157 if ((backend->arch == ARCH_X86 && sysflag != OS_OSX)
3158 || from->code == TY_LDOUBLE) {
3159 /*
3160 * We have to change the status control word,
3161 * perform the conversion by writing the value
3162 * to the float buffer, then save it in a GPR,
3163 * then reset the CW
3164 */
3165 #if REMOVE_FLOATBUF
3166 struct vreg *tempfb;
3167 struct stack_block *sb;
3168 #endif
3169 store_x87cw(il);
3170 modify_x87cw(il);
3171 load_x87cw(&x87cw_new, il);
3172 size = backend->get_sizeof_type(to, NULL);
3173
3174 vreg_faultin_x87(NULL, NULL, src, il, 0);
3175 vreg_map_preg(ret, src->pregs[0]);
3176 src->pregs[0] = NULL;
3177 #if ! REMOVE_FLOATBUF
3178 load_floatbuf(ret, il);
3179 free_preg(floatbuf.pregs[0], il, 1, 0);
3180 if (ret->is_multi_reg_obj) {
3181 free_preg(floatbuf.pregs[1], il, 1, 0);
3182 }
3183 #else
3184 ret = dup_vreg(ret);
3185 vreg_set_new_type(ret, to);
3186 tempfb = load_floatval_floatbuf(ret, from, il);
3187 #if 0
3188 free_preg(tempfb->pregs[0], il, 1, 0);
3189 if (ret->is_multi_reg_obj) {
3190 free_preg(tempfb->pregs[1], il, 1, 0);
3191 }
3192 #endif
3193 #endif
3194
3195 #if 0
3196 floatbuf.pregs[0] = NULL;
3197 #endif
3198
3199 if (size < 4) {
3200 /*
3201 * fistp cannot output shorts or chars -
3202 * so get an int and convert it
3203 */
3204
3205 r = alloc_16_or_32bit_noesiedi(curfunc,
3206 4, il, NULL);
3207 } else {
3208 r = ALLOC_GPR(curfunc, size, il, NULL);
3209 }
3210
3211 if (backend->arch == ARCH_X86
3212 && IS_LLONG(to->code)
3213 && to->tlist == NULL) {
3214 r2 = ALLOC_GPR(curfunc, size, il, NULL);
3215 } else {
3216 r2 = NULL;
3217 }
3218 #if ! REMOVE_FLOATBUF
3219 fbvr = n_xmemdup(&floatbuf, sizeof floatbuf);
3220 if (size > 4 && backend->arch == ARCH_AMD64) {
3221 fbvr->size = 8;
3222 fbvr->type = make_basic_type(TY_LONG);
3223 } else {
3224 fbvr->size = 4;
3225 fbvr->type = make_basic_type(TY_INT);
3226 }
3227
3228 vreg_faultin(r, r2, fbvr, il, 0);
3229 #else
3230 sb = tempfb->stack_addr;
3231
3232 tempfb = vreg_alloc(NULL,NULL,NULL,NULL);
3233 if (size > 4 && backend->arch == ARCH_AMD64) {
3234 tempfb->size = 8;
3235 tempfb->type = make_basic_type(TY_LONG);
3236 } else if (size == 8) {
3237 /*
3238 * 06/04/08: This was missing - why? It
3239 * broke double to long long conversion
3240 * since the long was treated as two
3241 * individual ints instead of one llong
3242 */
3243 tempfb->type = make_basic_type(TY_LLONG);
3244 tempfb->size = 8;
3245 tempfb->is_multi_reg_obj = 2;
3246 } else {
3247 tempfb->size = 4;
3248 tempfb->type = make_basic_type(TY_INT);
3249 }
3250 tempfb->stack_addr = sb;
3251
3252 vreg_faultin(r, r2, tempfb, il, 0);
3253 #endif
3254
3255 if (size < 4) {
3256 if (size == 1) {
3257 free_preg(r, il, 0, 0);
3258 r = r->composed_of[0]
3259 ->composed_of[/*0*/1];
3260 } else {
3261 /* 2 */
3262 free_preg(r, il, 0, 0);
3263 r = r->composed_of[0];
3264 }
3265 }
3266 vreg_map_preg(ret, r);
3267 if (r2 != NULL) {
3268 vreg_map_preg2(ret, r2);
3269 }
3270 load_x87cw(&x87cw_old, il);
3271 } else {
3272 /*
3273 * SSE (AMD64) floating point to integer
3274 * conversion
3275 */
3276 int siz;
3277 int to_quad = 0;
3278 int is_64bit = 0;
3279
3280
3281 /*
3282 * 08/01/08: When converting to unsigned
3283 * 32bit integers, we first have to convert
3284 * to a 64bit integer, then chop off the
3285 * desired part!
3286 */
3287 if (backend->arch == ARCH_X86) {
3288 /*
3289 * 02/15/09: SSE on x86 (for OSX) cannot use
3290 * 64bit GPRs, so for now we just always use
3291 * 32bit results
3292 */
3293 is_64bit = 0;
3294 to_quad = 0;
3295 } else {
3296 if (!IS_LONG(to->code) && !IS_LLONG(to->code)) {
3297 is_64bit = 0;
3298 if (to->sign == TOK_KEY_UNSIGNED) {
3299 to_quad = 1;
3300 }
3301 } else {
3302 is_64bit = 1;
3303 to_quad = 1;
3304 }
3305 }
3306
3307 r = ALLOC_GPR(curfunc, to_quad? 8: 4, il, NULL);
3308 if (from->code == TY_DOUBLE) {
3309 if (to_quad) {
3310 icode_make_amd64_cvttsd2siq(
3311 r, ret->pregs[0], il);
3312 } else {
3313 icode_make_amd64_cvttsd2si(
3314 r, ret->pregs[0], il);
3315 }
3316 } else {
3317 if (to_quad) {
3318 icode_make_amd64_cvttss2siq(
3319 r, ret->pregs[0], il);
3320 } else {
3321 icode_make_amd64_cvttss2si(
3322 r, ret->pregs[0], il);
3323 }
3324 }
3325 siz = backend->get_sizeof_type(to, NULL);
3326 /*
3327 * 08/01/08: < 4 instead of == 4
3328 */
3329 if (siz < 4 || (to_quad && !is_64bit)) {
3330 struct reg *r2;
3331
3332 r2 = ALLOC_GPR(curfunc, siz, il, NULL);
3333 icode_make_copyreg(r2, r, to,
3334 to->sign !=
3335 TOK_KEY_UNSIGNED?
3336 make_basic_type(TY_INT)
3337 : make_basic_type(TY_UINT),
3338 il);
3339 free_preg(r, il, 0, 0);
3340 r = r2;
3341 }
3342
3343 vreg_map_preg(ret, r);
3344 if (backend->arch == ARCH_X86 && IS_LLONG(to->code)) {
3345 /*
3346 * The result is 32bit, so sign- or zero-extend
3347 * it if we are converting to long long
3348 */
3349 change_preg_size(ret, il, to, make_basic_type(TY_INT));
3350 }
3351 }
3352 } else if (backend->arch == ARCH_AMD64) {
3353 /*
3354 * x87 vs SSE maybe?
3355 */
3356 res_is_x87_reg = convert_amd64_fp(to, from, ret, il);
3357 }
3358 } else if (ret->pregs[0]->size != ret->size && to->code != from->code) {
3359 /*
3360 * XXX change_preg_size() was being called for ``long long''
3361 * versus ``unsigned long long'' because the preg size check
3362 * above yields 4 for those types!
3363 * Thus only call the function if one or both types are not
3364 * llong
3365 */
3366 if ( (!IS_LLONG(from->code) || from->tlist != NULL)
3367 || (!IS_LLONG(to->code) || to->tlist != NULL) ) {
3368 change_preg_size(ret, il, to, from);
3369 }
3370 }
3371
3372 to = orig_to; /* because of uintptr_t stuff */
3373 ret->type = to;
3374 ret->size = backend->get_sizeof_type(to, NULL);
3375
3376 if (res_is_x87_reg) {
3377 /*
3378 * Save to stack so that the god awful x87 regs are
3379 * all free
3380 */
3381 ret->is_multi_reg_obj = 0;
3382 vreg_map_preg(ret, ret->pregs[0]);
3383 free_preg(ret->pregs[0], il, 1, 1);
3384 ret->pregs[0] = NULL;
3385 } else if (ret->pregs[0] != NULL) {
3386 /*
3387 * The non-null check is to avoid mapping to a null
3388 * pointer register, which can happen if source and
3389 * target type are x87 fp types, such that no
3390 * conversion is actually performed and no register
3391 * is ever loaded
3392 */
3393 vreg_map_preg(ret, ret->pregs[0]);
3394 }
3395
3396 /* Update multi-register information */
3397 if (backend->arch == ARCH_X86
3398 && IS_LLONG(to->code)
3399 && to->tlist == NULL) {
3400 ret->is_multi_reg_obj = 2;
3401 vreg_map_preg2(ret, ret->pregs[1]);
3402 } else {
3403 ret->is_multi_reg_obj = 0;
3404 ret->pregs[1] = NULL;
3405 }
3406 if (ret->type->code == TY_BOOL && ret->type->tlist == NULL) {
3407 boolify_result(ret, il);
3408 }
3409
3410 return ret;
3411 }
3412
3413 static void
3414 icode_initialize_pic(struct function *f, struct icode_list *il) {
3415 /*
3416 * We only have to do the first initialization, because ebx is
3417 * callee-save, and so even after function calls it remains
3418 * loaded with the GOT address
3419 */
3420 if (!f->pic_initialized) {
3421 free_preg(&x86_gprs[1], il, 1, 1);
3422 reg_set_unallocatable(&x86_gprs[1]);
3423 f->callee_save_used |= CSAVE_EBX;
3424 icode_make_initialize_pic(f, il);
3425 }
3426 }
3427
3428 static void
3429 icode_complete_func(struct function *f, struct icode_list *il) {
3430 (void) il;
3431
3432 if (f->pic_initialized) {
3433 /* PIC register ebx was used - free it again */
3434 reg_set_allocatable(&x86_gprs[1]);
3435 x86_gprs[1].used = 0;
3436 }
3437 }
3438
3439 static void
3440 do_print_gpr(struct reg *r) {
3441 printf("%s=%d(%d) ", r->name, r->used, reg_allocatable(r));
3442 if (r->vreg && r->vreg->pregs[0] == r) {
3443 printf("<-> %p", r->vreg);
3444 }
3445 }
3446
3447 static void
3448 debug_print_gprs(void) {
3449 int i;
3450
3451 for (i = 0; i < 6; ++i) {
3452 printf("\t\t");
3453 do_print_gpr(&x86_gprs[i]);
3454 putchar('\t');
3455 do_print_gpr(x86_gprs[i].composed_of[0]);
3456 if (i < 4) {
3457 putchar('\t');
3458 do_print_gpr(x86_gprs[i].composed_of[0]->
3459 composed_of[0]);
3460 putchar('\t');
3461 do_print_gpr(x86_gprs[i].composed_of[0]->
3462 composed_of[1]);
3463 }
3464 putchar('\n');
3465 }
3466 }
3467
3468 static int
3469 is_multi_reg_obj(struct type *t) {
3470 return (t->tlist == NULL && IS_LLONG(t->code))? 2: 0;
3471 }
3472
3473 static struct reg *
3474 name_to_reg(const char *name) {
3475 int i;
3476 size_t len;
3477
3478 if (*name == '%') ++name;
3479
3480 if (strncmp(name, "st", 2) == 0) {
3481 /* Floating point registers */
3482 if (name[2] == 0) {
3483 /* st = st(0) */
3484 return &x86_fprs[0];
3485 } else if (name[2] != '(' || name[4] != ')'
3486 || name[5] != 0 || !isdigit((unsigned char)name[3])
3487 || name[3] > '7') {
3488 return NULL;
3489 } else {
3490 return &x86_fprs[name[3] - '0'];
3491 }
3492 } else if ((len = strlen(name)) == 2) {
3493 if (name[1] == 'i') {
3494 if (strcmp(x86_gprs[4].name, name) == 0) {
3495 return &x86_gprs[4];
3496 } else if (strcmp(x86_gprs[5].name, name) == 0) {
3497 return &x86_gprs[5];
3498 }
3499 }
3500 for (i = 0; i < 4; ++i) {
3501 if (name[1] == 'x') {
3502 /* Must be 16bit */
3503 if (strcmp(x86_gprs[i].composed_of[0]->name,
3504 name) == 0) {
3505 return x86_gprs[i].composed_of[0];
3506 }
3507 } else {
3508 /* Must be 8bit */
3509 if (strcmp(x86_gprs[i].composed_of[0]->
3510 composed_of[0]->name, name) == 0) {
3511 return x86_gprs[i].composed_of[0]
3512 ->composed_of[0];
3513 }
3514 if (strcmp(x86_gprs[i].composed_of[0]->
3515 composed_of[0]->name, name) == 0) {
3516 return x86_gprs[i].composed_of[0]
3517 ->composed_of[0];
3518 }
3519 }
3520 }
3521 if (strcmp(x86_esp.composed_of[0]->name, name) == 0) {
3522 return x86_esp.composed_of[0];
3523 }
3524 if (strcmp(x86_ebp.composed_of[0]->name, name) == 0) {
3525 return x86_esp.composed_of[0];
3526 }
3527 } else if (len == 3) {
3528 for (i = 0; i < N_GPRS; ++i) {
3529 if (strcmp(x86_gprs[i].name, name) == 0) {
3530 return &x86_gprs[i];
3531 }
3532 }
3533 if (strcmp(x86_esp.name, name) == 0) {
3534 return &x86_esp;
3535 } else if (strcmp(x86_ebp.name, name) == 0) {
3536 return &x86_ebp;
3537 }
3538 if (backend->arch == ARCH_AMD64) {
3539 for (i = 0; i < N_GPRS; ++i) {
3540 if (strcmp(amd64_x86_gprs[i].name, name) == 0) {
3541 return &amd64_x86_gprs[i];
3542 }
3543 }
3544 for (i = 0; i < 8; ++i) {
3545 if (strcmp(amd64_gprs[i].name, name) == 0) {
3546 return &amd64_gprs[i];
3547 }
3548 }
3549 }
3550 }
3551 return NULL;
3552 }
3553
3554 /*
3555 * Get suitably sized register for storing item vr, where ch dictates which
3556 * 32bit register to choose from. For use with inline asm constraints
3557 *
3558 * XXX this does handle the amd64, but not completely
3559 */
3560 static struct reg *
3561 asmvreg_to_reg(
3562 struct vreg **vr0,
3563 int ch,
3564 struct inline_asm_io *io,
3565 struct icode_list *il,
3566 int faultin) {
3567
3568 struct reg *r = NULL;
3569 struct vreg *vr = *vr0;
3570 size_t size = vr->size;
3571 struct vreg *newvr;
3572
3573 if ((vr->type->code == TY_STRUCT || vr->type->code == TY_UNION)
3574 && vr->type->tlist == NULL) {
3575 errorfl(io->expr->tok,
3576 "Cannot load struct/union into register");
3577 return NULL;
3578 } else if (IS_LLONG(vr->type->code)
3579 && vr->type->tlist == NULL
3580 && backend->arch == ARCH_X86) {
3581 errorfl(io->expr->tok,
3582 "Cannot load long long into register");
3583 return NULL;
3584 } else if (vr->type->tlist != NULL) {
3585 size = backend->arch == ARCH_AMD64? 8: 4;
3586 }
3587
3588 /*
3589 * For a/b/c/d/S/D input must be moved to a specific register. For
3590 * q more or less as well, and for r to any GPR
3591 */
3592 if (ch == 'b') {
3593 curfunc->callee_save_used |= CSAVE_EBX;
3594 } else if (ch == 'S') {
3595 curfunc->callee_save_used |= CSAVE_ESI;
3596 } else if (ch == 'D') {
3597 curfunc->callee_save_used |= CSAVE_EDI;
3598 }
3599 switch (ch) {
3600 case 'a': /* eax */
3601 if (backend->arch == ARCH_AMD64) r = &amd64_x86_gprs[0];
3602 else r = &x86_gprs[0];
3603 break;
3604 case 'b': /* ebx */
3605 if (backend->arch == ARCH_AMD64) r = &amd64_x86_gprs[1];
3606 else r = &x86_gprs[1];
3607 break;
3608 case 'c': /* ecx */
3609 if (backend->arch == ARCH_AMD64) r = &amd64_x86_gprs[2];
3610 else r = &x86_gprs[2];
3611 break;
3612 case 'd': /* edx */
3613 if (backend->arch == ARCH_AMD64) r = &amd64_x86_gprs[3];
3614 else r = &x86_gprs[3];
3615 break;
3616 case 'S': /* esi */
3617 if (backend->arch == ARCH_AMD64) r = &amd64_x86_gprs[4];
3618 else r = &x86_gprs[4];
3619 break;
3620 case 'D': /* edi */
3621 if (backend->arch == ARCH_AMD64) r = &amd64_x86_gprs[5];
3622 else r = &x86_gprs[5];
3623 break;
3624 case 'q':
3625 case 'Q':
3626 /* XXX amd64 */
3627 /* Must be any of eax/ebx/ecx/edx - exclude esi/edi */
3628 if (backend->arch == ARCH_X86) {
3629 r = alloc_16_or_32bit_noesiedi(curfunc, 0, il, NULL);
3630 } else {
3631 /* XXX maybe need 64bit x86 allocator :-( */
3632 r = x86_backend.alloc_gpr(curfunc, 0, il, NULL, 0);
3633 if (is_member_of_reg(&amd64_x86_gprs[0], r)) {
3634 r = &amd64_x86_gprs[0];
3635 } else if (is_member_of_reg(&amd64_x86_gprs[1], r)) {
3636 r = &amd64_x86_gprs[1];
3637 } else if (is_member_of_reg(&amd64_x86_gprs[2], r)) {
3638 r = &amd64_x86_gprs[2];
3639 } else if (is_member_of_reg(&amd64_x86_gprs[3], r)) {
3640 r = &amd64_x86_gprs[3];
3641 }
3642 }
3643 break;
3644 case 'r':
3645 if (size == 1) {
3646 if (backend->arch == ARCH_X86) {
3647 /* esi/edi have no 1byte sub registers ... */
3648 r = alloc_16_or_32bit_noesiedi(curfunc, 1,
3649 il, NULL);
3650 } else {
3651 /* amd64 */
3652 r = ALLOC_GPR(curfunc, 1, il, NULL);
3653 }
3654 } else {
3655 if (backend->arch == ARCH_X86) {
3656 r = alloc_16_or_32bit_reg(curfunc, size,
3657 il, NULL);
3658 } else {
3659 r = ALLOC_GPR(curfunc, size, il, NULL);
3660 }
3661 }
3662 break;
3663 default:
3664 printf("BAD CHAR FOR asmvreg_to_reg(): %c(%d)\n", ch, ch);
3665 abort();
3666 }
3667
3668 if (r == NULL) {
3669 errorfl(io->expr->tok, "Too many inline asm operands - "
3670 "cannot allocate register");
3671 return NULL;
3672 } else if (faultin && !reg_allocatable(r)) {
3673 /*
3674 * XXX this isn't quite correct... use of ``faultin'' above
3675 * causes output registers to be assigned even if those are
3676 * used for input, which is good. Problem is that clobbered
3677 * registers should not be used for output.
3678 */
3679 errorfl(io->expr->tok, "Cannot allocate %s (in clobber list?)",
3680 r->name);
3681 return NULL;
3682 }
3683 free_preg(r, il, 1, 1);
3684 if (size == 1 && (ch == 'S' || ch == 'D')
3685 && backend->arch == ARCH_X86) {
3686 errorfl(io->expr->tok,
3687 "Cannot store 1-byte item to "
3688 "%s", r->name);
3689 return NULL;
3690 } else if (size != r->size) {
3691 if (r->size == 8) {
3692 /* amd64 */
3693 r = r->composed_of[0];
3694 }
3695
3696 if (size == 1) {
3697 r = r->composed_of[0]->composed_of[0];
3698 } else if (size == 2) {
3699 r = r->composed_of[0];
3700 } else if (size == 4) {
3701 /* amd64 - 64 to 32 bit, already done above */
3702 ;
3703 }
3704 }
3705
3706 newvr = vreg_disconnect(vr);
3707
3708 if (faultin) {
3709 vreg_faultin(r, NULL, newvr, il, 0);
3710 reg_set_unallocatable(r);
3711 }
3712 *vr0 = newvr;
3713 return r;
3714 }
3715
3716 static char *
3717 get_inlineasm_label(const char *tmpl) {
3718 char *ret = n_xmalloc(strlen(tmpl) + sizeof "inlasm");
3719 sprintf(ret, "inlasm%s", tmpl);
3720 return ret;
3721 }
3722
3723 /*
3724 * Print inline asm instruction operand
3725 */
3726 void
3727 print_asmitem_x86(FILE *out, void *item, int item_type, int postfix, int a) {
3728 char *p = NULL;
3729 struct reg *r = NULL;
3730 struct gas_token *gt;
3731 struct inline_asm_io *io;
3732 int idx;
3733 int applied_constraint = 0;
3734
3735 switch (item_type) {
3736 case ITEM_NUMBER:
3737 if (a == TO_GAS) x_fputc('$', out);
3738 gt = item;
3739 p = gt->data;
3740 break;
3741 case ITEM_REG:
3742 if (a == TO_GAS) x_fputc('%', out);
3743 gt = item;
3744 p = gt->data;
3745 break;
3746 case ITEM_SUBREG_B:
3747 case ITEM_SUBREG_H:
3748 case ITEM_SUBREG_W:
3749 io = item;
3750 if (io->outreg) {
3751 r = io->outreg;
3752 } else if (io->inreg) {
3753 r = io->inreg;
3754 } else {
3755 r = io->vreg->pregs[0];
3756 }
3757 if (r == NULL/* || r->vreg != io->vreg*/) { /* XXX!!! */
3758 errorfl(io->expr->tok,
3759 "Operand not in register but used with %h or %b");
3760 return;
3761 }
3762
3763 if (backend->arch == ARCH_X86) {
3764 if (!is_member_of_reg(&x86_gprs[0], r)
3765 && !is_member_of_reg(&x86_gprs[1], r)
3766 && !is_member_of_reg(&x86_gprs[2], r)
3767 && !is_member_of_reg(&x86_gprs[3], r)) {
3768 errorfl(io->expr->tok,
3769 "`%s' does not have a 8bit register for use with %%h or %%b",
3770 r->name);
3771 return;
3772 }
3773 } else {
3774 /* AMD64 */
3775 int i;
3776
3777 for (i = 0; i < 4; ++i) {
3778 if (is_member_of_reg(&amd64_x86_gprs[i], r)) {
3779 break;
3780 }
3781 }
3782 if (i == 4) {
3783 for (i = 8; i < 16; ++i) {
3784 if (is_member_of_reg(&amd64_gprs[i],
3785 r)) {
3786 errorfl(io->expr->tok,
3787 "`%s' doesn't make sense with %%h or %%b",
3788 r->name);
3789 return;
3790 }
3791 }
3792 if (i == 16) {
3793 errorfl(io->expr->tok,
3794 "`%s' does not have a 8bit register for use with %h or %b",
3795 r->name);
3796 return;
3797 }
3798 }
3799 }
3800 if (item_type == ITEM_SUBREG_B) {
3801 idx = 1;
3802 } else {
3803 idx = 0;
3804 }
3805 if (r->size == 2) {
3806 if (item_type == ITEM_SUBREG_W) {
3807 ; /* OK - already 16bit */
3808 } else {
3809 r = r->composed_of[idx];
3810 }
3811 } else if (r->size == 1) {
3812 /*
3813 * XXX this unimpl() was probably here because
3814 * I didn't know what this means if used with
3815 * 8 bit regs!
3816 */
3817 #if 0
3818 unimpl();
3819 #endif
3820 } else {
3821 /* Must be 4 */
3822 if (item_type == ITEM_SUBREG_W) {
3823 r = r->composed_of[0];
3824 } else {
3825 r = r->composed_of[0]->composed_of[idx];
3826 }
3827 }
3828 if (a == TO_GAS) x_fputc('%', out);
3829 x_fprintf(out, "%s", r->name);
3830 break;
3831 case ITEM_VARIABLE:
3832 gt = item;
3833 if (a == TO_NASM) {
3834 x_fputc('$', out);
3835 }
3836 p = gt->data;
3837 break;
3838 case ITEM_LABEL:
3839 x_fprintf(out, ".%s", item);
3840 break;
3841 case ITEM_INPUT:
3842 case ITEM_OUTPUT:
3843 io = item;
3844 for (p = io->constraints; *p != 0; ++p) {
3845 struct vreg *vr = io->vreg;
3846
3847
3848 r = NULL;
3849 /*
3850 * If this constraint uses a register (even with
3851 * "m" we may have a register holding a pointer
3852 * value), map it to the vreg
3853 */
3854 if (strchr("qrabcdSDm", *p) != 0) {
3855 if (item_type == ITEM_INPUT) {
3856 r = io->inreg;
3857 } else {
3858 /* Output */
3859 r = io->outreg;
3860 }
3861 if (vr->from_ptr != NULL) {
3862 /*
3863 * Register is pointer value
3864 */
3865 backend_vreg_map_preg(vr->from_ptr, r);
3866 } else {
3867 /* Register references vreg */
3868 backend_vreg_map_preg(vr, r);
3869 }
3870 }
3871
3872 if (*p == '+' || *p == '=' || *p == '&') {
3873 continue;
3874 } else if (applied_constraint) {
3875 /*
3876 * 05/17/09: For things like "rm", after
3877 * having chosen r, we do not want to print
3878 * an m item as well. Because it's just one
3879 * operand.
3880 * XXX Here we always use the first one,
3881 * i.e. r in "rm" and m in "mr". Probably
3882 * should pick it depending on the other
3883 * instruction operands
3884 */
3885 continue;
3886 } else if (strchr("qrabcdSD", *p) != 0) {
3887 if (a == TO_GAS) x_fputc('%', out);
3888 if (item_type == ITEM_INPUT) {
3889 r = io->inreg;
3890 x_fprintf(out, "%s",
3891 r->name);
3892 } else {
3893 /* output */
3894 r = io->outreg;
3895 x_fprintf(out, "%s", r->name);
3896 }
3897 } else if (*p == 'm') {
3898 if (postfix != 0 && a == TO_NASM) {
3899 char *p = NULL;
3900
3901 switch (postfix) {
3902 /*
3903 * XXX what about floating point
3904 * l and t :(
3905 */
3906 case 'b': p = "byte"; break;
3907 case 'w': p = "word"; break;
3908 case 'l': p = "dword"; break;
3909 case 'q': p = "qword"; break;
3910 default:
3911 unimpl();
3912 }
3913 x_fprintf(out, "%s ", p);
3914 }
3915 emit->print_mem_operand(io->vreg, NULL);
3916 } else if (*p == 'i') {
3917 if (eval_const_expr(io->expr, 0, NULL) != 0) {
3918 return;
3919 }
3920 if (io->vreg->type->sign != TOK_KEY_UNSIGNED) {
3921 #if 0
3922 x_fprintf(out, "%ld",
3923 *(long *)io->expr->const_value
3924 ->value);
3925 #endif
3926 cross_print_value_by_type(out,
3927 io->expr->const_value->value,
3928 TY_LONG, 'd');
3929 } else {
3930 #if 0
3931 x_fprintf(out, "%lu",
3932 *(long *)io->expr->const_value
3933 ->value);
3934 #endif
3935 cross_print_value_by_type(out,
3936 io->expr->const_value->value,
3937 TY_ULONG, 'd');
3938 }
3939 } else if (*p == 'o') {
3940 unimpl();
3941 } else if (*p == 'v') {
3942 unimpl();
3943 } else {
3944 printf("WHA?? %c\n", *p);
3945 unimpl();
3946 }
3947 applied_constraint = 1;
3948
3949 if (r != NULL) {
3950 backend_vreg_unmap_preg(r);
3951 }
3952 }
3953 p = NULL;
3954 }
3955
3956 if (p != NULL) {
3957 x_fprintf(out, "%s", p);
3958 }
3959 }
3960
3961 int
3962 x86_have_immediate_op(struct type *ty, int op) {
3963 if (Oflag == -1) { /* XXX really want this here? */
3964 return 0;
3965 }
3966 if (op == TOK_OP_BSHL
3967 || op == TOK_OP_BSHR
3968 || op == TOK_OP_BAND
3969 || op == TOK_OP_BOR
3970 || op == TOK_OP_BXOR
3971 || op == TOK_OP_COBSHL
3972 || op == TOK_OP_COBSHR
3973 || op == TOK_OP_COBOR
3974 || op == TOK_OP_COBAND
3975 || op == TOK_OP_COBXOR) {
3976 if (backend->arch == ARCH_X86
3977 && IS_LLONG(ty->code)) {
3978 return 0;
3979 }
3980 return 1;
3981 }
3982 return 0;
3983 }
3984
3985 struct backend x86_backend = {
3986 ARCH_X86,
3987 0, /* ABI */
3988 0, /* multi_gpr_object */
3989 4, /* structure alignment */
3990 1, /* need pic initialization (ebx) */
3991 0, /* emulate long double */
3992 0, /* relax alloc gpr order */
3993 0, /* max displacement */
3994 0, /* min displacement */
3995 x86_have_immediate_op,
3996 init,
3997 is_multi_reg_obj,
3998 get_ptr_size,
3999 get_size_t,
4000 get_uintptr_t,
4001 get_wchar_t,
4002 get_sizeof_basic,
4003 get_sizeof_type,
4004 get_sizeof_elem_type,
4005 get_sizeof_decl,
4006 get_sizeof_const,
4007 get_sizeof_vla_type,
4008 get_align_type,
4009 gen_function,
4010 #if XLATE_IMMEDIATELY
4011 gen_prepare_output,
4012 gen_finish_output,
4013 #else
4014 gen_program,
4015 #endif
4016 NULL,
4017 &x86_esp,
4018 invalidate_gprs,
4019 invalidate_except,
4020 alloc_gpr,
4021 alloc_16_or_32bit_noesiedi,
4022 alloc_fpr,
4023 x86_free_preg,
4024 icode_make_fcall,
4025 icode_make_return,
4026 NULL,
4027 icode_prepare_op,
4028 NULL, /* prepare_load_addrlabel */
4029 icode_make_cast,
4030 NULL, /* icode_make_structreloc */
4031 icode_initialize_pic,
4032 icode_complete_func,
4033 make_null_block,
4034 make_init_name,
4035 debug_print_gprs,
4036 name_to_reg,
4037 asmvreg_to_reg,
4038 get_inlineasm_label,
4039 do_ret,
4040 get_abi_reg,
4041 get_abi_ret_reg,
4042 generic_same_representation
4043 };
4044
4045