1 /*
2 * Copyright (c) 2006 - 2010, Nils R. Weller
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 * POSSIBILITY OF SUCH DAMAGE.
26 *
27 * x86 backend
28 * (XXX much of this stuff can probably be adapted to different
29 * architectures)
30 */
31 #include "amd64_gen.h"
32 #include "backend.h"
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <stdarg.h>
36 #include <assert.h>
37 #include <string.h>
38 #include <ctype.h>
39 #include <limits.h>
40 #include "scope.h"
41 #include "decl.h"
42 #include "type.h"
43 #include "decl.h"
44 #include "icode.h"
45 #include "functions.h"
46 #include "control.h"
47 #include "debug.h"
48 #include "token.h"
49 #include "error.h"
50 #include "functions.h"
51 #include "symlist.h"
52 #include "icode.h"
53 #include "cc_main.h"
54 #include "stack.h"
55 #include "reg.h"
56 #include "subexpr.h"
57 #include "expr.h"
58 /* #include "x86_emit_gas.h" */
59 #include "inlineasm.h"
60 #include "x86_emit_nasm.h"
61 #include "x86_emit_gas.h"
62 #include "x86_gen.h"
63 #include "amd64_emit_yasm.h"
64 #include "amd64_emit_gas.h"
65 #include "cc1_main.h"
66 #include "n_libc.h"
67
68
69
70 static FILE *out;
71 static struct scope *tunit;
72 static int use_nasm = 1; /* XXX */
73
74 static int rbx_saved;
75 struct vreg csave_rbx;
76 struct emitter_amd64 *emit_amd64;
77
78 int amd64_need_negmask;
79 int amd64_need_ulong_float_mask;
80
81
82 #define N_GPRS 6
83 #define N_ARGREGS 6
84
85 struct reg *amd64_argregs[] = {
86 /* rdi, rsi, rdx, rcx, r8, r9 */
87 &amd64_x86_gprs[5], &amd64_x86_gprs[4],
88 &amd64_x86_gprs[3], &amd64_x86_gprs[2],
89 &amd64_gprs[8], &amd64_gprs[9]
90 };
91
92 struct reg amd64_x86_gprs[7];
93 struct reg amd64_gprs[16];
94 struct reg amd64_gprs_32bit[16];
95 struct reg amd64_gprs_16bit[16];
96 struct reg amd64_gprs_8bit[16];
97 struct reg amd64_sil;
98 struct reg amd64_dil;
99
100 static int callee_save_map[] = {
101 0, 0, 0, 0, /* r8 - r11 */
102 1, 1, 1, 1 /* r12 - r15 */
103 };
104
105
106 static void
init_regs(void)107 init_regs(void) {
108 static struct reg nullreg;
109 int i, j;
110 static const struct {
111 struct reg *regs;
112 char *names[9];
113 } rps[] = {
114 { amd64_x86_gprs,
115 {"rax","rbx","rcx","rdx","rsi","rdi",0,0,0}},
116 { NULL, {0,0,0,0,0,0,0,0,0} }
117 };
118
119 for (i = 0; rps[i].regs != NULL; ++i) {
120 nullreg.type = REG_GPR;
121 nullreg.allocatable = 1;
122 for (j = 0; rps[i].names[j] != NULL; ++j) {
123 rps[i].regs[j] = nullreg;
124 rps[i].regs[j].composed_of =
125 n_xmalloc(2 * sizeof(struct reg *));
126 rps[i].regs[j].composed_of[0] = &x86_gprs[j];
127 rps[i].regs[j].composed_of[1] = NULL;
128 rps[i].regs[j].size = 8;
129 rps[i].regs[j].name = rps[i].names[j];
130 }
131 }
132
133 amd64_sil.size = 1;
134 amd64_sil.name = "sil";
135 amd64_sil.type = REG_GPR;
136 amd64_sil.allocatable = 1;
137 x86_gprs[4].composed_of[0]->composed_of =
138 n_xmalloc(2 * sizeof(struct reg *));
139 x86_gprs[4].composed_of[0]->composed_of[0] = &amd64_sil;
140 x86_gprs[4].composed_of[0]->composed_of[1] = NULL;
141
142 amd64_dil.size = 1;
143 amd64_dil.name = "dil";
144 amd64_dil.type = REG_GPR;
145 amd64_dil.allocatable = 1;
146 x86_gprs[5].composed_of[0]->composed_of =
147 n_xmalloc(2 * sizeof(struct reg *));
148 x86_gprs[5].composed_of[0]->composed_of[0] = &amd64_dil;
149 x86_gprs[5].composed_of[0]->composed_of[1] = NULL;
150
151 for (i = 8; i < 16; ++i) {
152 static char *new_gpr_names[] = {
153 "r8", "r9", "r10", "r11",
154 "r12", "r13", "r14", "r15"
155 };
156 static char *new_gpr_names_32[] = {
157 "r8d", "r9d", "r10d", "r11d",
158 "r12d", "r13d", "r14d", "r15d"
159 };
160 static char *new_gpr_names_16[] = {
161 "r8w", "r9w", "r10w", "r11w",
162 "r12w", "r13w", "r14w", "r15w"
163 };
164 static char *new_gpr_names_8[] = {
165 "r8b", "r9b", "r10b", "r11b",
166 "r12b", "r13b", "r14b", "r15b"
167 };
168 amd64_gprs[i].name = new_gpr_names[i-8];
169 amd64_gprs[i].size = 8;
170 amd64_gprs[i].type = REG_GPR;
171 amd64_gprs[i].allocatable = 1;
172 amd64_gprs[i].composed_of = n_xmalloc(2 * sizeof(struct reg*));
173 amd64_gprs[i].composed_of[0] = &amd64_gprs_32bit[i];
174 amd64_gprs[i].composed_of[1] = NULL;
175
176 amd64_gprs_32bit[i].name = new_gpr_names_32[i-8];
177 amd64_gprs_32bit[i].size = 4;
178 amd64_gprs_32bit[i].type = REG_GPR;
179 amd64_gprs_32bit[i].allocatable = 1;
180 amd64_gprs_32bit[i].composed_of
181 = n_xmalloc(2 * sizeof(struct reg*));
182 amd64_gprs_32bit[i].composed_of[0] = &amd64_gprs_16bit[i];
183 amd64_gprs_32bit[i].composed_of[1] = NULL;
184
185 amd64_gprs_16bit[i].name = new_gpr_names_16[i-8];
186 amd64_gprs_16bit[i].size = 2;
187 amd64_gprs_16bit[i].type = REG_GPR;
188 amd64_gprs_16bit[i].allocatable = 1;
189 amd64_gprs_16bit[i].composed_of
190 = n_xmalloc(2 * sizeof(struct reg*));
191 amd64_gprs_16bit[i].composed_of[0] = &amd64_gprs_8bit[i];
192 amd64_gprs_16bit[i].composed_of[1] = NULL;
193
194 amd64_gprs_8bit[i].name = new_gpr_names_8[i-8];
195 amd64_gprs_8bit[i].size = 1;
196 amd64_gprs_8bit[i].type = REG_GPR;
197 amd64_gprs_8bit[i].allocatable = 1;
198 amd64_gprs_8bit[i].composed_of = NULL;
199 }
200
201 amd64_x86_gprs[6].name = NULL;
202 }
203
204 struct reg *
find_top_reg(struct reg * r)205 find_top_reg(struct reg *r) {
206 int i;
207
208 for (i = 0; i < 6; ++i) {
209 if (is_member_of_reg(&amd64_x86_gprs[i], r)) {
210 return &amd64_x86_gprs[i];
211 }
212 }
213
214 /*
215 * 10/30/07: Added this. I don't know yet why it is possible to
216 * get an r8-r15 sub register and to have to find the top, since
217 * the register allocator only uses rax - rdi for small items
218 * currently. It may be because conversion uses sub registers
219 */
220 for (i = 8; i < 16; ++i) {
221 if (is_member_of_reg(&amd64_gprs[i], r)) {
222 return &amd64_gprs[i];
223 }
224 }
225 fprintf(stderr, "Failed to find top preg for %s\n", r->name);
226 abort();
227 return NULL;
228 }
229
230
231 static void
do_invalidate(struct reg * r,struct icode_list * il,int save)232 do_invalidate(struct reg *r, struct icode_list *il, int save) {
233 free_preg(r, il, 1, save);
234 }
235
236
237 /*
238 * XXX this shouldn't be saving esi/edi/ebx and r12 - r15 when we're
239 * invalidating because of a function call, because those regs are
240 * callee-save
241 */
242 static void
invalidate_gprs(struct icode_list * il,int saveregs,int for_fcall)243 invalidate_gprs(struct icode_list *il, int saveregs, int for_fcall) {
244 int i;
245
246 (void) for_fcall;
247 for (i = 0; i < N_GPRS; ++i) {
248 do_invalidate(&amd64_x86_gprs[i], il, saveregs);
249 }
250 for (i = 8; i < 16; ++i) {
251 do_invalidate(&amd64_gprs[i], il, saveregs);
252 }
253
254 /*
255 * 07/26/12: Dropped incomplete SSE usage check, could
256 * yield compiler crashes
257 */
258 for (i = 0; i < 8; ++i) {
259 do_invalidate(&x86_sse_regs[i], il, saveregs);
260 }
261 }
262
263
264 static struct reg *
alloc_gpr(struct function * f,int size,struct icode_list * il,struct reg * dontwipe,int line)265 alloc_gpr(struct function *f, int size, struct icode_list *il,
266 struct reg *dontwipe, int line) {
267 struct reg *ret;
268
269 if (size == 0) {
270 /* 0 means GPR */
271 size = 8;
272 }
273
274 if (size < 8) {
275 ret = x86_backend.alloc_gpr(f, size, il, dontwipe, line);
276 } else {
277 /*
278 * Notice how only r8 - r15 are used for 64bit register
279 * allocations. This is because the x86 gpr extensions
280 * (rax, rbx, etc) are used for argument passing, so
281 * thrashing should be avoided. Note that emit_copystruct()
282 * will use those regs too, so it is absolutely critical
283 * that struct pointers used by it are never stored in
284 * rdi/rsi/rdx
285 */
286 ret = generic_alloc_gpr(f, size, il, dontwipe,
287 &amd64_gprs[8], 8, callee_save_map, line);
288 }
289
290 return ret;
291 }
292
293
294 static struct reg *
alloc_fpr(struct function * f,int size,struct icode_list * il,struct reg * dontwipe)295 alloc_fpr(struct function *f, int size, struct icode_list *il,
296 struct reg *dontwipe) {
297 return alloc_sse_fpr(f, size, il, dontwipe);
298 }
299
300 static void
x86_free_preg(struct reg * r,struct icode_list * il)301 x86_free_preg(struct reg *r, struct icode_list *il) {
302 x86_backend.free_preg(r, il);
303 }
304
305 /*
306 * IMPORTANT: The x86 backend and the x86 emitter that corresponds to
307 * this emitter (currently only yasm) also has to be initialized (init()),
308 * because some code is shared between x86 and amd64
309 */
310 static int
init(FILE * fd,struct scope * s)311 init(FILE *fd, struct scope *s) {
312 out = fd;
313 tunit = s;
314
315 (void) use_nasm;
316 if (asmflag == NULL
317 || strcmp(asmname, "gas") == 0
318 || strcmp(asmname, "as") == 0) {
319 emit = &amd64_emit_gas;
320 emit_x86 = &x86_emit_x86_gas;
321 x86_backend.init(fd, s);
322 x86_emit_gas.init(out, tunit);
323 emit_amd64 = &emit_amd64_gas;
324 } else if (strcmp(asmname, "yasm") == 0) {
325 /* Default is yasm */
326 emit = &amd64_emit_yasm;
327 emit_x86 = &x86_emit_x86_nasm; /* XXX */
328 x86_backend.init(fd, s);
329 x86_emit_nasm.init(out, tunit);
330 emit_amd64 = &emit_amd64_yasm;
331 } else {
332 (void) fprintf(stderr, "Unknown AMD64 assembler `%s'\n",
333 asmflag);
334 exit(EXIT_FAILURE);
335 }
336
337 init_regs();
338
339 /* Setup code sharing between x86 and amd64 */
340 amd64_backend.invalidate_except = x86_backend.invalidate_except;
341 amd64_backend.name_to_reg = x86_backend.name_to_reg;
342 amd64_backend.get_inlineasm_label = x86_backend.get_inlineasm_label;
343 amd64_backend.asmvreg_to_reg = x86_backend.asmvreg_to_reg;
344 amd64_backend.alloc_16_or_32bit_noesiedi =
345 x86_backend.alloc_16_or_32bit_noesiedi;
346 backend->emit = emit;
347 return emit->init(out, tunit);
348 }
349
350 static int
get_ptr_size(void)351 get_ptr_size(void) {
352 return 8;
353 }
354
355 static struct type *
get_size_t(void)356 get_size_t(void) {
357 return make_basic_type(TY_ULONG);
358 }
359
360 static struct type *
get_uintptr_t(void)361 get_uintptr_t(void) {
362 return make_basic_type(TY_ULONG);
363 }
364
365 static struct type *
get_wchar_t(void)366 get_wchar_t(void) {
367 return make_basic_type(TY_INT);
368 }
369
370
371 static size_t
get_sizeof_basic(int type)372 get_sizeof_basic(int type) {
373 switch (type) {
374 case TY_ENUM:
375 return 4; /* XXX */
376
377 case TY_INT:
378 case TY_UINT:
379 return 4;
380 case TY_LONG:
381 case TY_ULONG:
382 case TY_LLONG:
383 case TY_ULLONG:
384 return 8;
385
386 case TY_CHAR:
387 case TY_UCHAR:
388 case TY_SCHAR:
389 case TY_BOOL:
390 return 1;
391
392 case TY_SHORT:
393 case TY_USHORT:
394 return 2;
395
396 case TY_FLOAT:
397 return 4;
398
399 case TY_DOUBLE:
400 return 8; /* XXX contradicts abi */
401
402 case TY_LDOUBLE:
403 return /*10*/12;
404 default:
405 printf("err sizeof cannot cope w/ it, wuz %d\n", type);
406 abort();
407 return 1; /* XXX */
408 }
409 }
410
411 static struct vreg saved_gprs[4]; /* r12 - r15 */
412 static struct stack_block *saved_gprs_sb[4];
413
414 static void
do_ret(struct function * f,struct icode_instr * ip)415 do_ret(struct function *f, struct icode_instr *ip) {
416 int i;
417
418 if (f->alloca_head != NULL) {
419 struct stack_block *sb;
420 static struct vreg rvr;
421
422 rvr.stack_addr = f->alloca_regs;
423 rvr.size = 8;
424 backend_vreg_map_preg(&rvr, &amd64_x86_gprs[0]);
425 emit->store(&rvr, &rvr);
426
427 for (sb = f->alloca_head; sb != NULL; sb = sb->next) {
428 emit->dealloca(sb, NULL);
429 }
430
431 emit->load(&amd64_x86_gprs[0], &rvr);
432 backend_vreg_unmap_preg(&amd64_x86_gprs[0]);
433 }
434 if (f->vla_head != NULL) {
435 struct stack_block *sb;
436 static struct vreg rvr;
437
438 rvr.stack_addr = f->alloca_regs;
439 rvr.size = 8;
440 backend_vreg_map_preg(&rvr, &amd64_x86_gprs[0]);
441 emit->store(&rvr, &rvr);
442
443 for (sb = f->vla_head; sb != NULL; sb = sb->next) {
444 emit->dealloc_vla(sb, NULL);
445 }
446
447 emit->load(&amd64_x86_gprs[0], &rvr);
448 backend_vreg_unmap_preg(&amd64_x86_gprs[0]);
449 }
450 if (f->callee_save_used & CSAVE_EBX) {
451 emit->load(&amd64_x86_gprs[1], &csave_rbx);
452 }
453 for (i = 12; i < 16; ++i) {
454 if (saved_gprs[i-12].stack_addr != NULL) {
455 emit->load(&amd64_gprs[i], &saved_gprs[i-12]);
456 }
457 }
458
459 if (saved_ret_addr) {
460 emit->check_ret_addr(f, saved_ret_addr);
461 }
462 emit->freestack(f, NULL);
463 emit->ret(ip);
464 }
465
466 static struct reg *
get_abi_reg(int index,struct type * ty)467 get_abi_reg(int index, struct type *ty) {
468 if (index == 0
469 && (is_integral_type(ty)
470 || ty->tlist != NULL)) {
471 int size = backend->get_sizeof_type(ty, NULL);
472 if (size == 8) {
473 return amd64_argregs[0];
474 } else if (size == 4) {
475 return amd64_argregs[0]->composed_of[0];
476 } else {
477 unimpl();
478 }
479 } else {
480 unimpl();
481 }
482 return NULL;
483 }
484
485 static struct reg *
get_abi_ret_reg(struct type * ty)486 get_abi_ret_reg(struct type *ty) {
487 if (is_integral_type(ty) || ty->tlist != NULL) {
488 return &amd64_x86_gprs[0];
489 } else {
490 unimpl();
491 }
492 /* NOTREACHED */
493 return NULL;
494 }
495
496 static void
map_parameters(struct function * f,struct ty_func * proto)497 map_parameters(struct function *f, struct ty_func *proto) {
498 struct sym_entry *se = proto->scope->slist;
499 struct stack_block *sb;
500 int i;
501 long offset = 16; /* rbp */
502 int gprs_used = 0;
503 int fprs_used = 0;
504 struct reg *curreg;
505 int stack_bytes_used = 0;
506
507 if (f->fty->variadic) {
508 /*
509 * Same story as usual - allocate space for argument
510 * registers; those are then followed by any possibly
511 * stack-passed variadic arguments.
512 * 6 arg regs * 8 = 48 bytes
513 * XXX floating point!! mmm..sse registers.mmm
514 */
515 f->fty->lastarg = alloc_decl();
516
517 /* Allocate 48 bytes for gprs, followed by 64 for fprs */
518 f->fty->lastarg->stack_addr = stack_malloc(f, /*48*/112);
519 }
520
521 if (f->proto->dtype->tlist->next == NULL
522 && (f->proto->dtype->code == TY_STRUCT
523 || f->proto->dtype->code == TY_UNION)) {
524 /*
525 * Function returns struct/union - accomodate for
526 * hidden pointer (passed as first argument)
527 * XXX duplicates mips code
528 */
529 struct vreg *hp;
530 hp = vreg_alloc(NULL,NULL,NULL,NULL);
531 hp->size = 8;
532 hp->var_backed = alloc_decl();
533 hp->var_backed->dtype =
534 n_xmemdup(f->proto->dtype, sizeof(struct type));
535 hp->var_backed->dtype->tlist = alloc_type_node();
536 hp->var_backed->dtype->tlist->type = TN_POINTER_TO;
537 hp->var_backed->stack_addr = stack_malloc(f, 8);
538 f->hidden_pointer = hp;
539 ++gprs_used;
540 }
541
542 for (i = 0; i < proto->nargs; ++i, se = se->next) {
543 size_t size;
544 long last_offset = offset;
545
546 size = backend->get_sizeof_type(se->dec->dtype, NULL);
547 if (is_integral_type(se->dec->dtype)
548 || se->dec->dtype->tlist) {
549 if (gprs_used < N_ARGREGS) {
550 /* passed in register */
551 curreg = amd64_argregs[gprs_used++];
552 sb = stack_malloc(f, size);
553 se->dec->stack_addr = sb;
554 if (size == 4) {
555 curreg = curreg->composed_of[0];
556 } else if (size == 2) {
557 curreg = curreg->composed_of[0]
558 ->composed_of[0];
559 } else if (size == 1) {
560 if (curreg->composed_of[0]
561 ->composed_of[0]
562 ->composed_of[1]) {
563 curreg = curreg->composed_of[0]
564 ->composed_of[0]
565 ->composed_of[1];
566 } else {
567 curreg = curreg->composed_of[0]
568 ->composed_of[0]
569 ->composed_of[0];
570 }
571 }
572
573 se->dec->stack_addr->from_reg = curreg;
574 } else {
575 /* passed on stack */
576 /* XXX alignment */
577 /*assert(size == se->dec->vreg->size);*/
578
579
580 se->dec->stack_addr =
581 make_stack_block(offset,
582 /*se->dec->vreg->size*/ size);
583 se->dec->stack_addr->is_func_arg = 1;
584 offset += size/*se->dec->vreg->size*/;
585 while (offset % 8) {
586 ++offset;
587 }
588 }
589 } else if (IS_FLOATING(se->dec->dtype->code)) {
590 if (se->dec->dtype->code == TY_LDOUBLE) {
591 /* XXXX woah... what's the deal with size vs se->dec->vreg->size? */
592 /*assert(se->dec->vreg->size == size);*/
593 if (offset % 16) {
594 /* First align to 16-byte boundary */
595 offset += 16 - (offset % 16);
596 }
597 sb = make_stack_block(offset, size);
598 sb->is_func_arg = 1;
599 offset += size; /*se->dec->vreg->size;*/
600 se->dec->stack_addr = sb;
601 stack_bytes_used += /*16*/ offset - last_offset;
602 } else {
603 if (fprs_used < 8) {
604 /* passed in register */
605 curreg = &x86_sse_regs[fprs_used++];
606 sb = stack_malloc(f, size);
607 se->dec->stack_addr = sb;
608 sb->from_reg = curreg;
609 } else {
610 /*assert(size == se->dec->vreg->size); */
611 /* passed on stack */
612 /* XXX alignment */
613 se->dec->stack_addr =
614 make_stack_block(offset,
615 /*se->dec->vreg->size*/ size);
616 se->dec->stack_addr->is_func_arg = 1;
617 offset += size /*se->dec->vreg->size*/;
618 if (offset % 8) {
619 offset += 8 - (offset % 8);
620 }
621 }
622 }
623 } else if (se->dec->dtype->code == TY_STRUCT
624 || se->dec->dtype->code == TY_UNION) {
625 if (1 /*size > 16 || has_unaligned_members() */) {
626 /*
627 * 07/26/12: Align for struct first. This may
628 * require 8 bytes of padding if the struct
629 * contains long double
630 */
631 int align = backend->get_align_type(se->dec->dtype);
632 if (offset % align) {
633 offset += align - (offset % align);
634 }
635 sb = make_stack_block(offset, size);
636 offset += size; /* was before makestackblock */
637 if (offset % 8) {
638 offset += 8 - (offset % 8);
639 }
640 sb->is_func_arg = 1;
641 se->dec->stack_addr = sb;
642
643 #if 0
644 if (size % 8) {
645 stack_bytes_used += size + (8 - size % 8);
646 } else {
647 stack_bytes_used += size;
648 }
649 #endif
650 stack_bytes_used += offset - last_offset;
651 }
652 } else {
653 unimpl();
654 }
655 }
656 if (f->fty->variadic) {
657 /* Patch varargs block to real address */
658 struct stack_block *save_area;
659
660 save_area = f->fty->lastarg->stack_addr;
661 if (gprs_used == 6) {
662 /* All variadic stuff passed on stack */
663 f->fty->lastarg->stack_addr =
664 make_stack_block(offset, 0);
665 f->fty->lastarg->stack_addr->is_func_arg = 1;
666 } else {
667 f->fty->lastarg->stack_addr->from_reg =
668 (void *)&amd64_argregs[gprs_used]; /* XXX */
669 }
670 if (f->patchme) {
671 struct amd64_va_patches *p = f->patchme;
672 int n;
673
674 /*
675 * 08/07/08: Use a loop because there may be
676 * multiple items to be patched! (Multiple
677 * va_start() calls in the function)
678 */
679 for (; p != NULL; p = p->next) {
680 if (gprs_used == 6) {
681 n = 48;
682 } else {
683 n = (&amd64_argregs[gprs_used] -
684 amd64_argregs) * 8;
685 }
686 *p->gp_offset = n;
687 if (fprs_used == 8) {
688 n = 64+48;
689 } else {
690 n = (&x86_sse_regs[fprs_used] -
691 x86_sse_regs) * 8;
692 n += 48;
693 }
694
695 *p->reg_save_area = *save_area;
696 if (gprs_used == 6) {
697 /*
698 * The last argument is definitely passed
699 * on the stack so we can use that as
700 * base address
701 */
702 *p->overflow_arg_area =
703 *f->fty->lastarg->stack_addr;
704 } else {
705 /*
706 * 07/25/12: The stack area begins at [rbp + 16],
707 * but nwcc passes long double and struct
708 * arguments on the stack as well, such
709 * that we may have to advance the varargs
710 * start offset. Example:
711 *
712 * void foo(int x, struct foo f, char *fmt, ...);
713 *
714 * ... fmt and x are passed in registers, as
715 * are the first couple of varargs arguments,
716 * but since "f" is passed on the stack the
717 * last varargs arguments begin at
718 * [rbp + 16 + sizeof f] (with suitable
719 * alignment)
720 * Traditionally we always assumed excess
721 * args at rbp+16
722 */
723 int offset = 16 + stack_bytes_used;
724 *p->overflow_arg_area =
725 *make_stack_block(offset, 0);
726 p->overflow_arg_area->is_func_arg = 1;
727 }
728 }
729 #if 0
730 printf("gp offset = %d\n", n);
731 printf("reg save area = %d\n", p->reg_save_area->offset);
732 printf("overflow area = %d\n", p->overflow_arg_area->offset);
733 #endif
734 }
735 }
736 }
737
738 void store_preg_to_var(struct decl *, size_t, struct reg *);
739
740 static int
gen_function(struct function * f)741 gen_function(struct function *f) {
742 struct ty_func *proto;
743 struct scope *scope;
744 struct icode_instr *lastret = NULL;
745 struct stack_block *sb;
746 struct sym_entry *se;
747 size_t size;
748 size_t alloca_bytes = 0;
749 size_t vla_bytes = 0;
750 int i;
751 unsigned mask;
752
753 emit->setsection(SECTION_TEXT);
754 proto = f->proto->dtype->tlist->tfunc;
755
756 emit->func_header(f);
757 emit->label(f->proto->dtype->name, 1);
758 emit->intro(f);
759
760 map_parameters(f, proto);
761
762 /* Make local variables */
763 for (scope = f->scope; scope != NULL; scope = scope->next) {
764 struct stack_block *sb;
765 struct scope *tmp;
766 struct decl **dec;
767 size_t align;
768
769 for (tmp = scope; tmp != NULL; tmp = tmp->parent) {
770 if (tmp == f->scope) {
771 break;
772 }
773 }
774
775 if (tmp == NULL) {
776 /* End of function reached */
777 break;
778 }
779 if (scope->type != SCOPE_CODE) continue;
780
781 dec = scope->automatic_decls.data;
782 for (i = 0; i < scope->automatic_decls.ndecls; ++i) {
783 struct decl *alignfor;
784
785 if (dec[i]->stack_addr != NULL) { /* XXX sucks */
786 continue;
787 } else if (IS_VLA(dec[i]->dtype->flags)) {
788 /*
789 * 05/22/11: Handle pointers to VLAs properly;
790 * We have to create a metadata block to
791 * record dimension sizes, but we allocate
792 * the pointers themselves on the stack
793 *
794 * char (*p)[N];
795 *
796 * ... "p" on stack, N in metadata block
797 */
798 if (dec[i]->dtype->tlist->type == TN_POINTER_TO) {
799 ;
800 } else {
801 continue;
802 }
803 }
804
805 alignfor = get_next_auto_decl_in_scope(scope, i);
806 if (alignfor != NULL) {
807 align = calc_align_bytes(f->total_allocated,
808 dec[i]->dtype,
809 alignfor->dtype, 0);
810 } else {
811 align = 0;
812 }
813
814 size = backend->
815 get_sizeof_decl(dec[i], NULL);
816 sb = stack_malloc(f, size+align);
817 sb->nbytes = size;
818 dec[i]->stack_addr = sb;
819 }
820 }
821 stack_align(f, 8);
822
823 /*
824 * Allocate storage for saving callee-saved registers (ebx/esi/edi)
825 * (but defer saving them until esp has been updated)
826 */
827 f->total_allocated += 8;
828 if (f->callee_save_used & CSAVE_EBX) {
829 rbx_saved = 1;
830 csave_rbx.stack_addr
831 = make_stack_block(f->total_allocated, 8);
832 }
833
834 for (i = 12, mask = 1 << 11; i < 16; ++i, mask <<= 1) {
835 if (f->callee_save_used & mask) {
836 if (saved_gprs_sb[i-12] == NULL) {
837 saved_gprs_sb[i-12] = make_stack_block(0, 8);
838 }
839 f->total_allocated += 8;
840 saved_gprs[i-12].stack_addr = saved_gprs_sb[i-12];
841 saved_gprs[i-12].size = 8;
842 saved_gprs[i-12].stack_addr->offset =
843 f->total_allocated;
844 } else {
845 saved_gprs[i-12].stack_addr = NULL;
846 }
847 }
848 f->callee_save_offset = f->total_allocated;
849
850 if (stackprotectflag) {
851 f->total_allocated += 4;
852 /*
853 * 08/03/11: The save_ret_addr stack block was cached here,
854 * which caused the (later introduced) zone allocator to
855 * trash the "frame pointer" flag while resetting memory
856 */
857 saved_ret_addr
858 = make_stack_block(f->total_allocated, 4);
859 }
860
861 /* Allocate storage for temporarily saving GPRs & patch offsets */
862 for (sb = f->regs_head; sb != NULL; sb = sb->next) {
863 stack_align(f, sb->nbytes);
864 f->total_allocated += sb->nbytes;
865 sb->offset = f->total_allocated;
866 }
867 /*
868 * Allocate storage for saving alloca() pointers, and initialize
869 * it to zero
870 */
871 stack_align(f, 8);
872 for (sb = f->alloca_head; sb != NULL; sb = sb->next) {
873 f->total_allocated += sb->nbytes;
874 alloca_bytes += sb->nbytes;
875 sb->offset = f->total_allocated;
876 }
877
878 /*
879 * Allocate storage for saving VLA data, and initialize
880 * it to zero
881 */
882 for (sb = f->vla_head; sb != NULL; sb = sb->next) {
883 f->total_allocated += sb->nbytes;
884 vla_bytes += sb->nbytes;
885 sb->offset = f->total_allocated;
886 }
887 if (f->alloca_head != NULL || f->vla_head != NULL) {
888 /*
889 * Get stack for saving return value register (rax)
890 * before performing free() on alloca()ted blocks
891 */
892 f->alloca_regs = make_stack_block(0, 8);
893 f->total_allocated += 8;
894 f->alloca_regs->offset = f->total_allocated;
895 }
896
897 if (f->total_allocated > 0) {
898 stack_align(f, 16);
899 emit->allocstack(f, f->total_allocated);
900 if (f->callee_save_used & CSAVE_EBX) {
901 backend_vreg_map_preg(&csave_rbx, &amd64_x86_gprs[1]);
902 emit->store(&csave_rbx, &csave_rbx);
903 backend_vreg_unmap_preg(&amd64_x86_gprs[1]);
904 x86_gprs[1].used = 0;
905 amd64_gprs[1].used = 0;
906 }
907 for (i = 0; i < 4; ++i) {
908 if (saved_gprs[i].stack_addr != NULL) {
909 backend_vreg_map_preg(&saved_gprs[i],
910 &amd64_gprs[12+i]);
911 emit->store(&saved_gprs[i], &saved_gprs[i]);
912 backend_vreg_unmap_preg(
913 &amd64_gprs[12+i]);
914 amd64_gprs[12+i].used = 0;
915 }
916 }
917 if (f->hidden_pointer) {
918 backend_vreg_map_preg(f->hidden_pointer, &amd64_x86_gprs[5]);
919 emit->store(f->hidden_pointer, f->hidden_pointer);
920 backend_vreg_unmap_preg(&amd64_x86_gprs[5]);
921 }
922 se = proto->scope->slist;
923 for (i = 0; i < proto->nargs; ++i, se = se->next) {
924 if (se->dec->stack_addr->from_reg != NULL) {
925 static struct vreg tempvr;
926
927 tempvr.var_backed = se->dec;
928 tempvr.size = backend->get_sizeof_type(
929 se->dec->dtype, NULL);
930 tempvr.type = se->dec->dtype;
931
932 backend_vreg_map_preg(&tempvr,
933 se->dec->stack_addr->from_reg);
934 emit->store(&tempvr, &tempvr);
935 backend_vreg_unmap_preg(
936 se->dec->stack_addr->from_reg);
937 }
938 }
939 if (f->fty->variadic
940 && f->fty->lastarg->stack_addr->from_reg != NULL) {
941 struct reg **r;
942 size_t saved_offset =
943 f->fty->lastarg->stack_addr->offset;
944
945 r = (struct reg **)f->fty->
946 lastarg->stack_addr->from_reg; /* XXX */
947 f->fty->lastarg->stack_addr->offset -=
948 (r - amd64_argregs) * 8;
949 for (i = r - amd64_argregs; i < N_ARGREGS; ++i) {
950 store_preg_to_var(f->fty->lastarg, 8,
951 amd64_argregs[i]);
952 f->fty->lastarg->stack_addr->offset -= 8;
953 }
954
955 /* XXX ... */
956 for (i = 0; i < 8; ++i) {
957 f->fty->lastarg->dtype =
958 make_basic_type(TY_DOUBLE);
959 store_preg_to_var(f->fty->lastarg, 8,
960 &x86_sse_regs[i]);
961 f->fty->lastarg->stack_addr->offset -= 8;
962 }
963
964 f->fty->lastarg->stack_addr->offset = saved_offset;
965 }
966 }
967 if (stackprotectflag) {
968 emit->save_ret_addr(f, saved_ret_addr);
969 }
970 if (curfunc->alloca_head != NULL) {
971 emit->zerostack(curfunc->alloca_tail, alloca_bytes);
972 }
973 if (curfunc->vla_head != NULL) {
974 emit->zerostack(curfunc->vla_tail, vla_bytes);
975 }
976
977 if (xlate_icode(f, f->icode, &lastret) != 0) {
978 return -1;
979 }
980 emit->outro(f);
981 return 0;
982 }
983
984
985 #if XLATE_IMMEDIATELY
986
987 static int
gen_prepare_output(void)988 gen_prepare_output(void) {
989 if (gflag) {
990 /* Print file names */
991 emit->dwarf2_files();
992 }
993 if (emit->support_decls) {
994 emit->support_decls();
995 }
996 return 0;
997 }
998
999 static int
gen_finish_output(void)1000 gen_finish_output(void) {
1001 emit->static_init_vars(static_init_vars);
1002 emit->static_init_thread_vars(static_init_thread_vars);
1003
1004 emit->static_uninit_vars(static_uninit_vars);
1005 emit->static_uninit_thread_vars(static_uninit_thread_vars);
1006 emit->global_extern_decls(global_scope.extern_decls.data,
1007 global_scope.extern_decls.ndecls);
1008 if (emit->extern_decls) {
1009 emit->extern_decls();
1010 }
1011 emit->support_buffers();
1012 if (emit->finish_program) {
1013 emit->finish_program();
1014 }
1015 x_fflush(out);
1016 return 0;
1017 }
1018
1019 #else
1020
1021 static int
gen_program(void)1022 gen_program(void) {
1023 struct function *func;
1024
1025 if (gflag) {
1026 /* Print file names */
1027 emit->dwarf2_files();
1028 }
1029
1030 if (emit->support_decls) {
1031 emit->support_decls();
1032 }
1033 if (emit->extern_decls) {
1034 emit->extern_decls();
1035 }
1036
1037 #if 0
1038 emit->global_decls();
1039 #endif
1040 emit->global_extern_decls(global_scope.extern_decls.data,
1041 global_scope.extern_decls.ndecls);
1042 emit->global_static_decls(global_scope.static_decls.data,
1043 global_scope.static_decls.ndecls);
1044 #if 0
1045 emit->static_decls();
1046 #endif
1047 emit->static_init_vars(static_init_vars);
1048 emit->static_uninit_vars(static_uninit_vars);
1049 emit->static_init_thread_vars(static_init_thread_vars);
1050 emit->static_uninit_thread_vars(static_uninit_thread_vars);
1051
1052 emit->struct_inits(init_list_head);
1053
1054 emit->empty();
1055 emit->strings(str_const);
1056 emit->fp_constants(float_const);
1057 emit->support_buffers();
1058 emit->empty();
1059
1060 if (emit->struct_defs) {
1061 emit->struct_defs();
1062 }
1063
1064 emit->setsection(SECTION_TEXT);
1065
1066 for (func = funclist; func != NULL; func = func->next) {
1067 curfunc = func;
1068 if (gen_function(func) != 0) {
1069 return -1;
1070 }
1071 emit->empty();
1072 emit->empty();
1073 }
1074 x_fflush(out);
1075
1076 return 0;
1077 }
1078
1079 #endif
1080
1081
1082 /*
1083 * 10/30/07: This stuff was quite wrong because it did
1084 * not align correctly and did not count the long double
1085 * size properly (it may still not be right, but seems
1086 * better now)
1087 */
1088 static void
pass_ldouble_stack(struct vreg * vr,unsigned long * allpushed,struct icode_list * il)1089 pass_ldouble_stack(
1090 struct vreg *vr,
1091 unsigned long *allpushed,
1092 struct icode_list *il) {
1093 struct vreg *dest;
1094
1095 /* We will use at least 16 bytes to pass the long double itself */
1096 *allpushed += 16;
1097
1098 dest = vreg_alloc(NULL, NULL, NULL, vr->type);
1099 dest->stack_addr = make_stack_block(vr->addr_offset, 16);
1100 dest->stack_addr->use_frame_pointer = 0;
1101
1102 vreg_faultin_x87(NULL, NULL, vr, il, 0);
1103 vreg_map_preg(dest, vr->pregs[0]);
1104 icode_make_store(curfunc, dest, dest, il);
1105 }
1106
1107 static unsigned long
pass_args_stack(struct vreg ** vrs,int nvrs,unsigned long preceding_allpushed,unsigned long precalc,struct icode_list * il)1108 pass_args_stack(struct vreg **vrs, int nvrs,
1109 unsigned long preceding_allpushed,
1110 unsigned long precalc,
1111 struct icode_list *il) {
1112 int j;
1113 /*
1114 * 07/26/12: The stack usage calculation already allocated 8 bytes of
1115 * storage if necessary in order to ensure 16-byte-alignment for the
1116 * callee. We failed to take that alignment into account by assuming
1117 * we're starting at 0, thereby messing up alignment decisions for
1118 * long double
1119 */
1120 unsigned long allpushed = preceding_allpushed; /*0;*/
1121 int ignore_integral = 0;
1122 int ignore_floating = 0;
1123
1124 /*
1125 * 07/26/12: A bunch of clutter and highly dubious decisions have
1126 * been removed. We now take the results of offset and alignment
1127 * calculations that are performed prior to calling this function
1128 * rather than duplication them here (error-prone)
1129 */
1130
1131 /*
1132 * 07/26/12: The argument placement is done from right to left because
1133 * we allocate storage as we go, with the stack growing "downward" and
1134 * ending up to the leftmost argument.
1135 * Because of this, the code used to handle padding improperly. Given
1136 * for example a long double argument followed by a double, a traversal
1137 * from left to right in the stack size calculation loop prior to
1138 * this function will decide, correcly:
1139 *
1140 * stack is 16-byte aligned, no changes necessary
1141 * slot 1-2 place long double (leftmost arg)
1142 * slot 3 place double
1143 * slot 4 final padding
1144 *
1145 * This right to left iteration here incorrectly did it like this
1146 * instead:
1147 *
1148 * slot 4 place double (alignment is uninteresting, will be 8 at least)
1149 * slot 3 stack is not 16-byte aligned, allocate 8 bytes padding!!!
1150 * slot 1-2 place long double
1151 *
1152 * This was incompatible with map_parameters(), which also decides
1153 * the left to right way.
1154 *
1155 * So now, as a
1156 * XXX TEMPORARY UGLY KLUDGE XXX
1157 * we keep passing over the arguments from right to left, but check
1158 * whether the preceding would, if we were to store the current item
1159 * at this location, require another 8 bytes of padding between itself
1160 * and this item. If that's the case, we allocate 8 bytes of padding
1161 * at the current slot instead and just move the current item 8 bytes
1162 * ahead.
1163 *
1164 * A better solution would be to:
1165 *
1166 * - Set all offset and alignment allocations in stone in the
1167 * stack size calculation iteration that is performed prior to calling
1168 * this function in order to determine alignment
1169 * - Allocate all storage in one block
1170 * - Use the precalcuated values here instead of reproducing them
1171 * (which very error-prone anyway)
1172 * - Store items to their corresponding locations in that storage
1173 * block
1174 */
1175
1176
1177 if (precalc > 0) {
1178 /* Allocate all storage used for passing arguments (inc alignment) */
1179 icode_make_allocstack(NULL, precalc, il);
1180 }
1181
1182 for (j = nvrs - 1; j >= 0; --j) {
1183 int remaining = 0;
1184 struct vreg *dest;
1185 size_t tysize;
1186 size_t align;
1187 int is_struct = 0;
1188 int is_ldouble = 0;
1189
1190 if (vrs[j]->addr_offset == -1) {
1191 /*
1192 * 07/26/12: This argument is not passed on the stack
1193 */
1194 continue;
1195 }
1196
1197 if ((IS_CHAR(vrs[j]->type->code)
1198 || IS_SHORT(vrs[j]->type->code))
1199 && vrs[j]->type->tlist == NULL) {
1200 vrs[j] = backend->
1201 icode_make_cast(vrs[j],
1202 make_basic_type(TY_INT), il);
1203 } else {
1204 if (vrs[j]->type->code == TY_LDOUBLE
1205 && vrs[j]->type->tlist == NULL) {
1206 is_ldouble = 1;
1207 } else {
1208 if (!is_basic_agg_type(vrs[j]->type)) {
1209 vreg_faultin_x87(NULL, NULL, vrs[j], il, 0);
1210 } else {
1211 is_struct = 1;
1212 }
1213 }
1214 }
1215
1216
1217 if (is_ldouble) {
1218 /*
1219 * 07/23/08: Do long double here as well instead of
1220 * separately, since offsets were wrong
1221 */
1222 pass_ldouble_stack(vrs[j], &allpushed, il);
1223 } else {
1224 dest = vreg_alloc(NULL, NULL, NULL, vrs[j]->type);
1225 dest->stack_addr = make_stack_block(vrs[j]->addr_offset, backend->get_sizeof_type(vrs[j]->type, NULL));
1226 dest->stack_addr->use_frame_pointer = 0;
1227 allpushed += dest->size;
1228 }
1229
1230 if (is_struct) {
1231 /*
1232 * 07/22/08: Invalidation was missing. There were
1233 * no visible known bugs, but pass_struct_union()
1234 * also called invalidate_gprs(), and it really
1235 * should be done for copystruct
1236 */
1237 backend->invalidate_gprs(il, 1, INV_FOR_FCALL);
1238 vreg_faultin_ptr(vrs[j], il);
1239
1240 /* 04/06/08: This was missing! */
1241 icode_make_copystruct(dest, vrs[j], il);
1242 } else if (is_ldouble) {
1243 ; /* Already passed above */
1244 } else {
1245 /*
1246 * 04/06/08: Note that the store frees the x87 reg, if used!
1247 */
1248 vreg_map_preg(dest, vrs[j]->pregs[0]);
1249 icode_make_store(curfunc, dest, dest, il);
1250 }
1251 }
1252
1253 return precalc;
1254 }
1255
1256
1257
1258 static struct vreg *
icode_make_fcall(struct fcall_data * fcall,struct vreg ** vrs,int nvrs,struct icode_list * il)1259 icode_make_fcall(struct fcall_data *fcall, struct vreg **vrs, int nvrs,
1260 struct icode_list *il)
1261 {
1262 size_t allpushed = 0;
1263 size_t would_use_stack_bytes = 0;
1264 struct vreg *tmpvr;
1265 struct vreg *ret = NULL;
1266 struct type *ty;
1267 struct icode_instr *ii;
1268 struct type_node *tn;
1269 struct vreg *struct_lvalue;
1270 struct reg *fptr_reg = NULL;
1271 int i;
1272 int need_dap = 0;
1273 int regs_used = 0;
1274 int fp_regs_used = 0;
1275 int ret_is_anon_struct = 0;
1276 int saved_regs_used;
1277 int saved_fp_regs_used;
1278
1279 ty = fcall->calltovr->type;
1280 tmpvr = fcall->calltovr;
1281
1282 tn = ty->tlist;
1283 if (tn->type == TN_POINTER_TO) {
1284 /* Called thru function pointer */
1285 tn = tn->next;
1286 }
1287
1288 struct_lvalue = fcall->lvalue;
1289
1290 if ((ty->code == TY_STRUCT
1291 || ty->code == TY_UNION)
1292 && tn->next == NULL) {
1293 if (struct_lvalue == NULL || fcall->need_anon) {
1294 struct type_node *tnsav;
1295 /*
1296 * Result of function is not assigned so we need to
1297 * allocate storage for the callee to store its
1298 * result into
1299 */
1300
1301 #if 1 /* XXX: This should go, use rettype! */
1302 tnsav = ty->tlist;
1303 ty->tlist = NULL;
1304 #endif
1305 /*
1306 * 08/05/08: Don't allocate anonymous struct return
1307 * storage right here, but when creating the stack
1308 * frame. This has already been done on MIPS, PPC
1309 * and SPARC, but not on x86/AMD64. The reason is
1310 * that it broke something that is long forgotten
1311 * now. So we'll re-enable this and fix any bugs
1312 * that may come up.
1313 *
1314 * The reason I ran into this again is that if we
1315 * don't allocate the struct on the stack frame,
1316 * then in
1317 *
1318 * struct foo otherfunc() { return ...}
1319 * struct foo func() { return otherfunc(); }
1320 *
1321 * ... the anonymous storage is reclaimed before
1322 * it can be copied as a return value, hence
1323 * trashing it
1324 */
1325 struct_lvalue = vreg_stack_alloc(ty, il, 1 /*0*/, NULL);
1326
1327 #if 1 /* XXX: This should go, use rettype! */
1328 ty->tlist = tnsav;
1329 #endif
1330 /*
1331 * 08/05/08: Don't add to allpushed since struct is
1332 * created on frame
1333 */
1334 /* allpushed += struct_lvalue->size;*/
1335 ret_is_anon_struct = 1;
1336 }
1337
1338 /* Hidden pointer is passed in first GPR! */
1339 #if 0
1340 ii = icode_make_addrof(NULL, struct_lvalue, il);
1341 append_icode_list(il, ii);
1342 #endif
1343 {
1344 struct reg *r;
1345 /*ii*/ r = make_addrof_structret(struct_lvalue, il);
1346
1347 free_preg(amd64_argregs[0], il, 1, 1);
1348 icode_make_copyreg(amd64_argregs[0], r /*ii->dat*/, NULL, NULL, il);
1349 ++regs_used;
1350 }
1351 }
1352
1353 /*
1354 * 07/20/08: This wrongly took an implicit return type into account
1355 * to determine whether default argument promotions are needed!
1356 */
1357 if (fcall->functype->nargs == -1
1358 /*|| ty->implicit*/) {
1359 /* Need default argument promotions */
1360 need_dap = 1;
1361 }
1362
1363
1364 /*
1365 * 07/24/08: Now we make three passes over all arguments; The first
1366 * part determines which integral and non-long-double arguments need
1367 * to be passed on the stack (struct-by-value and long double always
1368 * go there), the second pass performs the passing of the stack
1369 * arguments, and the third pass passes all register arguments.
1370 *
1371 * By doing stack arguments first, we can minimize register saving
1372 * problems (since struct-by-value may need to call memcpy(), which
1373 * invalidates most GPRs)
1374 */
1375 saved_regs_used = regs_used;
1376 saved_fp_regs_used = fp_regs_used;
1377
1378 /*
1379 * First determine the amount of stack usage
1380 */
1381 for (i = 0; i < nvrs; ++i) {
1382 /* First mark the argument as not being passed on stack (may change later) */
1383 vrs[i]->addr_offset = -1;
1384
1385 if (vrs[i]->type->tlist != NULL
1386 || is_integral_type(vrs[i]->type)) {
1387 if (regs_used < N_ARGREGS) {
1388 ++regs_used;
1389 } else {
1390 vrs[i]->addr_offset = would_use_stack_bytes;
1391
1392 /*
1393 * An integral or scalar type is always
1394 * rounded up to 8 bytes if necessary
1395 */
1396 would_use_stack_bytes += 8;
1397 }
1398 } else if (IS_FLOATING(vrs[i]->type->code)) {
1399 if (vrs[i]->type->code == TY_LDOUBLE) {
1400 /*
1401 * long double is always passed on stack and
1402 * takes up two quad-word argument slots
1403 * 07/26/12: It might also require a slot of
1404 * padding in order to ensure 16-byte
1405 * alignment
1406 */
1407 if (would_use_stack_bytes % 16) {
1408 would_use_stack_bytes += 8;
1409 }
1410 vrs[i]->addr_offset = would_use_stack_bytes;
1411 would_use_stack_bytes += 16;
1412 } else {
1413 /* float or double */
1414 if (fp_regs_used < 8) {
1415 ++fp_regs_used;
1416 } else {
1417 vrs[i]->addr_offset = would_use_stack_bytes;
1418 /*
1419 * A floating point type is always
1420 * padded to 8 bytes if necessary
1421 */
1422 would_use_stack_bytes += 8;
1423 }
1424 }
1425 } else if ((vrs[i]->type->code == TY_STRUCT
1426 || vrs[i]->type->code == TY_UNION)
1427 && vrs[i]->type->tlist == NULL) {
1428 int size = backend->get_sizeof_type(vrs[i]->type, NULL);
1429 int align = backend->get_align_type(vrs[i]->type);
1430
1431 if (size % 8) {
1432 size += 8 - size % 8;
1433 }
1434
1435
1436 /*
1437 * 07/26/12: Account for possibility of 16-byte alignment
1438 * (long double members)
1439 */
1440 if (would_use_stack_bytes % align) {
1441 would_use_stack_bytes += 8;
1442 }
1443 vrs[i]->addr_offset = would_use_stack_bytes;
1444 would_use_stack_bytes += size;
1445 }
1446 }
1447
1448 /*
1449 * Reset register counters (we have to use the saved vars since the
1450 * values may not have started out as 0, e.g. if the function returns
1451 * a struct, regs_used begins counting at 1)
1452 */
1453 regs_used = saved_regs_used;
1454 fp_regs_used = saved_fp_regs_used;
1455
1456 /*
1457 * 07/27/08: As required by the ABI, ensure that the stack ends
1458 * up being 16-byte-aligned eventually
1459 */
1460 if (would_use_stack_bytes % 16) {
1461 size_t align = 16 - would_use_stack_bytes % 16;
1462
1463 allpushed += align;
1464 would_use_stack_bytes += align;
1465 }
1466
1467
1468 /*
1469 * 07/23/08: Pass all struct args in one go here!
1470 */
1471 allpushed = pass_args_stack(vrs, /*i*/ nvrs, allpushed, would_use_stack_bytes, il);
1472
1473 for (i = 0; i < nvrs; ++i) {
1474 struct reg *curreg;
1475
1476 if (fcall->functype->variadic
1477 && i >= fcall->functype->nargs) {
1478 need_dap = 1;
1479 }
1480
1481 if (vrs[i]->type->tlist != NULL
1482 || is_integral_type(vrs[i]->type)) {
1483 if (regs_used < N_ARGREGS) {
1484 curreg = amd64_argregs[regs_used];
1485 } else {
1486 curreg = NULL;
1487 }
1488
1489 /*
1490 * 07/23/08: Don't fault-in if we pass on the stack
1491 * later
1492 */
1493 if (curreg != NULL) {
1494 if ((IS_CHAR(vrs[i]->type->code)
1495 || IS_SHORT(vrs[i]->type->code))
1496 && vrs[i]->type->tlist == NULL) {
1497 vrs[i] = backend->
1498 icode_make_cast(vrs[i],
1499 make_basic_type(TY_INT), il);
1500 } else {
1501 vreg_faultin(NULL, NULL, vrs[i], il, 0);
1502 }
1503 }
1504
1505 if (curreg != NULL) {
1506 struct reg *topcurreg = curreg;
1507
1508 if (curreg->size > vrs[i]->size) {
1509 if (vrs[i]->type != NULL
1510 && vrs[i]->type->tlist != NULL
1511 && vrs[i]->type->tlist->type == TN_VARARRAY_OF) {
1512 /*
1513 * 02/23/09: The vreg size was 0 because
1514 * we are passing a VLA - don't cut off
1515 * the upper word! XXX Note that the real
1516 * question is why we are not doing VLA
1517 * array type to pointer decay when passing
1518 * it to a function - maybe that would be
1519 * the correct fix in expr_to_icode()?
1520 */
1521 ;
1522 } else {
1523 curreg = curreg->composed_of[0];
1524 }
1525 }
1526 if (vrs[i]->pregs[0] != curreg) {
1527 free_preg(topcurreg, il, 1, 1);
1528 icode_make_copyreg(curreg,
1529 vrs[i]->pregs[0],
1530 vrs[i]->type,
1531 vrs[i]->type, il);
1532 }
1533 reg_set_unallocatable(curreg);
1534 amd64_argregs[regs_used]->used = 0;
1535 ++regs_used;
1536 } else {
1537 /* Pass remaining args on stack */
1538 /*
1539 * 07/23/08: Don't pass now, and don't break,
1540 * since there may be remaining FP args which
1541 * can go into registers! Do all stack args
1542 * in one go later
1543 */
1544 }
1545 } else if (IS_FLOATING(vrs[i]->type->code)) {
1546 if (vrs[i]->type->code == TY_LDOUBLE) {
1547 /* long double is always passed on stack */
1548 ;
1549 } else {
1550 /* float or double */
1551 struct reg *curfpreg;
1552
1553 if (vrs[i]->type->code == TY_FLOAT
1554 && need_dap) {
1555 struct type *ty =
1556 make_basic_type(TY_DOUBLE);
1557 vrs[i] = backend->icode_make_cast(
1558 vrs[i], ty, il);
1559 }
1560
1561 if (fp_regs_used < 8) {
1562 curfpreg = &x86_sse_regs
1563 [fp_regs_used];
1564 if (vrs[i]->pregs[0] != curfpreg
1565 || vrs[i]->pregs[0]->vreg
1566 != vrs[i]) {
1567 free_preg(curfpreg,
1568 il, 1, 1);
1569 }
1570 vreg_faultin(curfpreg, NULL,
1571 vrs[i], il, 0);
1572 ++fp_regs_used;
1573 } else {
1574 ; /* Passed on stack */
1575 }
1576 }
1577 } else if (vrs[i]->type->code == TY_STRUCT
1578 || vrs[i]->type->code == TY_UNION) {
1579 ;
1580 } else {
1581 unimpl();
1582 }
1583 }
1584
1585 /*
1586 * In the x86 ABI, the caller is responsible for saving
1587 * eax/ecx/edx (but not ebx, esi, edi), so that's what we
1588 * do here
1589 */
1590 if (ty->tlist->type == TN_POINTER_TO) {
1591 /*
1592 * Need to indirect thru function pointer.
1593 * 07/10/15: This stuff used to come after the invalidate
1594 * below. Thus it trashed an argument register
1595 */
1596 vreg_faultin(NULL, NULL, tmpvr, il, 0);
1597 fptr_reg = tmpvr->pregs[0];
1598 tmpvr->pregs[0]->used = 0;
1599 }
1600
1601 backend->invalidate_gprs(il, 1, INV_FOR_FCALL);
1602 if (fcall->functype->variadic || need_dap) {
1603 /* rax = number of sse registers used for call */
1604 ii = icode_make_setreg(&amd64_x86_gprs[0], fp_regs_used);
1605 append_icode_list(il, ii);
1606 reg_set_unallocatable(&amd64_x86_gprs[0]);
1607 amd64_x86_gprs[0].used = 0;
1608 }
1609
1610
1611 if (ty->tlist->type == TN_POINTER_TO) {
1612 /* Need to indirect thru function pointer */
1613 ii = icode_make_call_indir(fptr_reg);
1614 } else {
1615 ii = icode_make_call(ty->name);
1616 if (IS_ASM_RENAMED(ty->flags)) {
1617 /*
1618 * 02/21/09: Pass renaming as icode instr kludge
1619 * to OSX AMD64 emitter
1620 */
1621 ii->hints |= HINT_INSTR_RENAMED;
1622 }
1623 }
1624 append_icode_list(il, ii);
1625 ii = icode_make_freestack(allpushed);
1626 append_icode_list(il, ii);
1627
1628 for (i = 0; i < N_ARGREGS; ++i) {
1629 reg_set_allocatable(amd64_argregs[i]);
1630 }
1631 reg_set_allocatable(&amd64_x86_gprs[0]);
1632
1633 ret = vreg_alloc(NULL, NULL, NULL, NULL);
1634 ret->type = ty;
1635
1636 /* XXX man, this pointer stuff is painful and error prone */
1637 if ((ty->tlist->type == TN_POINTER_TO
1638 && ty->tlist->next->next != NULL)
1639 || (ty->tlist->type == TN_FUNCTION
1640 && ty->tlist->next != NULL)) {
1641 /* Must be pointer */
1642 ret->pregs[0] = &amd64_x86_gprs[0];
1643 } else {
1644 if (IS_CHAR(ty->code)) {
1645 ret->pregs[0] = x86_gprs[0].composed_of[0]->
1646 composed_of[1];
1647 } else if (IS_SHORT(ty->code)) {
1648 ret->pregs[0] = x86_gprs[0].composed_of[0];
1649 } else if (IS_INT(ty->code)
1650 || ty->code == TY_ENUM) { /* XXX */
1651 ret->pregs[0] = &x86_gprs[0];
1652 } else if (IS_LONG(ty->code) || IS_LLONG(ty->code)) {
1653 ret->pregs[0] = &amd64_x86_gprs[0];
1654 } else if (ty->code == TY_FLOAT
1655 || ty->code == TY_DOUBLE) {
1656 ret->pregs[0] = &x86_sse_regs[0];
1657 } else if (ty->code == TY_LDOUBLE) {
1658 ret->pregs[0] = &x86_fprs[0];
1659 } else if (ty->code == TY_STRUCT
1660 || ty->code == TY_UNION) {
1661 if (ret_is_anon_struct) {
1662 /*
1663 * 08/16/07: Added this
1664 */
1665 ret = struct_lvalue;
1666 }
1667 ret->struct_ret = 1;
1668 } else if (ty->code == TY_VOID) {
1669 ; /* Nothing! */
1670 }
1671 }
1672
1673 ret->type = n_xmemdup(ret->type, sizeof *ret->type);
1674 if (ret->type->tlist->type == TN_POINTER_TO) {
1675 copy_tlist(&ret->type->tlist, ret->type->tlist->next->next);
1676 } else {
1677 copy_tlist(&ret->type->tlist, ret->type->tlist->next);
1678 }
1679 if (ret->type->code != TY_VOID || ret->type->tlist) {
1680 ret->size = backend->get_sizeof_type(ret->type, NULL);
1681 }
1682
1683 if (ret->pregs[0] != NULL) {
1684 vreg_map_preg(ret, ret->pregs[0]);
1685 }
1686
1687 if (is_x87_trash(ret)) {
1688 /*
1689 * Don't keep stuff in x87 registers, ever!!
1690 */
1691 free_preg(ret->pregs[0], il, 1, 1);
1692 }
1693 return ret;
1694 }
1695
1696 static int
icode_make_return(struct vreg * vr,struct icode_list * il)1697 icode_make_return(struct vreg *vr, struct icode_list *il) {
1698 struct icode_instr *ii;
1699 struct type *rtype = curfunc->rettype; /*proto->dtype;*/
1700
1701 /* 06/17/08: Use rettype instead of (wrongly) changing function type! */
1702 #if 0
1703 oldtn = rtype->tlist;
1704 rtype->tlist = rtype->tlist->next;
1705 #endif
1706
1707 if (vr != NULL) {
1708 if (IS_CHAR(rtype->code)
1709 || IS_SHORT(rtype->code)
1710 || IS_INT(rtype->code)
1711 || IS_LONG(rtype->code)
1712 || IS_LLONG(rtype->code)
1713 || rtype->code == TY_ENUM /* 06/15/09: Was missing?!? */
1714 || rtype->tlist != NULL) {
1715 struct reg *r = &amd64_x86_gprs[0];
1716 if (r->size > vr->size) {
1717 r = get_smaller_reg(r, vr->size);
1718 }
1719 vreg_faultin(r, NULL, vr, il, 0);
1720 } else if (rtype->code == TY_FLOAT
1721 || rtype->code == TY_DOUBLE) {
1722 /* Return in xmm0 */
1723 vreg_faultin(&x86_sse_regs[0], NULL, vr, il, 0);
1724 } else if (rtype->code == TY_LDOUBLE) {
1725 /* Return in st0 */
1726 vreg_faultin_x87(NULL, NULL, vr, il, 0);
1727 } else if (rtype->code == TY_STRUCT
1728 || rtype->code == TY_UNION) {
1729
1730 /* vr may come from pointer */
1731 vreg_faultin_ptr(vr, il);
1732 icode_make_copystruct(/*dest*/NULL, vr, il);
1733 }
1734 }
1735 ii = icode_make_ret(vr);
1736 append_icode_list(il, ii);
1737
1738 #if 0
1739 rtype->tlist = oldtn;
1740 #endif
1741
1742 return 0;
1743 }
1744
1745 /*
1746 * Deal with preparations necessary to make things work with the terrible
1747 * x86 design
1748 */
1749 static void
icode_prepare_op(struct vreg ** dest0,struct vreg ** src0,int op,struct icode_list * il)1750 icode_prepare_op(
1751 struct vreg **dest0,
1752 struct vreg **src0,
1753 int op,
1754 struct icode_list *il) {
1755
1756 x86_backend.icode_prepare_op(dest0, src0, op, il);
1757 }
1758
1759
1760
1761 /*
1762 * Most of the time, instructions give meaning to data. This function
1763 * generates code required to convert virtual register ``src'' to type
1764 * ``to'' where necessary
1765 */
1766 static struct vreg *
icode_make_cast(struct vreg * src,struct type * to,struct icode_list * il)1767 icode_make_cast(struct vreg *src, struct type *to, struct icode_list *il) {
1768 return x86_backend.icode_make_cast(src, to, il);
1769 }
1770
1771 static void
do_print_gpr(struct reg * r)1772 do_print_gpr(struct reg *r) {
1773 printf("%s=%d ", r->name, r->used);
1774 if (r->vreg && r->vreg->pregs[0] == r) {
1775 printf("<-> %p", r->vreg);
1776 }
1777 }
1778
1779 static void
debug_print_gprs(void)1780 debug_print_gprs(void) {
1781 int i;
1782
1783 for (i = 0; i < 6; ++i) {
1784 printf("\t");
1785 do_print_gpr(&amd64_x86_gprs[i]);
1786 printf("\t");
1787 do_print_gpr(&x86_gprs[i]);
1788 putchar('\t');
1789 do_print_gpr(x86_gprs[i].composed_of[0]);
1790 if (i < 4) {
1791 putchar('\t');
1792 do_print_gpr(x86_gprs[i].composed_of[0]->
1793 composed_of[0]);
1794 putchar('\t');
1795 do_print_gpr(x86_gprs[i].composed_of[0]->
1796 composed_of[1]);
1797 }
1798 putchar('\n');
1799 }
1800 for (i = 8; i < 16; i += 4) {
1801 printf("\t");
1802 do_print_gpr(&amd64_gprs[i]);
1803 printf("\t");
1804 do_print_gpr(&amd64_gprs[i+1]);
1805 printf("\t");
1806 do_print_gpr(&amd64_gprs[i+2]);
1807 printf("\t");
1808 do_print_gpr(&amd64_gprs[i+3]);
1809 }
1810 }
1811
1812 static int
is_multi_reg_obj(struct type * t)1813 is_multi_reg_obj(struct type *t) {
1814 (void) t;
1815 return 0;
1816 }
1817
1818
1819 struct backend amd64_backend = {
1820 ARCH_AMD64,
1821 0, /* ABI */
1822 0, /* multi_gpr_object */
1823 4, /* structure alignment */
1824 0, /* need pic initialization? */
1825 0, /* emulate long double? */
1826 0, /* relax alloc gpr order */
1827 0, /* max displacement */
1828 0, /* min displacement */
1829 x86_have_immediate_op,
1830 init,
1831 is_multi_reg_obj,
1832 get_ptr_size,
1833 get_size_t,
1834 get_uintptr_t,
1835 get_wchar_t,
1836 get_sizeof_basic,
1837 get_sizeof_type,
1838 get_sizeof_elem_type,
1839 get_sizeof_decl,
1840 get_sizeof_const,
1841 get_sizeof_vla_type,
1842 get_align_type,
1843 gen_function,
1844 #if XLATE_IMMEDIATELY
1845 gen_prepare_output,
1846 gen_finish_output,
1847 #else
1848 gen_program,
1849 #endif
1850 NULL,
1851 NULL,
1852 invalidate_gprs,
1853 /*invalidate_except*/NULL,
1854 alloc_gpr,
1855 /*alloc_16_or_32bit_noesiedi*/NULL,
1856 alloc_fpr,
1857 x86_free_preg,
1858 icode_make_fcall,
1859 icode_make_return,
1860 NULL,
1861 icode_prepare_op,
1862 NULL, /* prepare_load_addrlabel */
1863 icode_make_cast,
1864 NULL, /* icode_make_structreloc */
1865 NULL, /* icode_initialize_pic */
1866 NULL, /* icode_complete_func */
1867 make_null_block,
1868 make_init_name,
1869 debug_print_gprs,
1870 /*name_to_reg XXX */ NULL,
1871 /*asmvreg_to_reg*/ NULL,
1872 /*get_inlineasm_label*/NULL,
1873 do_ret,
1874 get_abi_reg,
1875 get_abi_ret_reg,
1876 generic_same_representation
1877 };
1878
1879