1 /*
2  * Copyright (c) 2006 - 2010, Nils R. Weller
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  * notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  * notice, this list of conditions and the following disclaimer in the
13  * documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25  * POSSIBILITY OF SUCH DAMAGE.
26  *
27  * x86 backend
28  * (XXX much of this stuff can probably be adapted to different
29  * architectures)
30  */
31 #include "amd64_gen.h"
32 #include "backend.h"
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <stdarg.h>
36 #include <assert.h>
37 #include <string.h>
38 #include <ctype.h>
39 #include <limits.h>
40 #include "scope.h"
41 #include "decl.h"
42 #include "type.h"
43 #include "decl.h"
44 #include "icode.h"
45 #include "functions.h"
46 #include "control.h"
47 #include "debug.h"
48 #include "token.h"
49 #include "error.h"
50 #include "functions.h"
51 #include "symlist.h"
52 #include "icode.h"
53 #include "cc_main.h"
54 #include "stack.h"
55 #include "reg.h"
56 #include "subexpr.h"
57 #include "expr.h"
58 /* #include "x86_emit_gas.h" */
59 #include "inlineasm.h"
60 #include "x86_emit_nasm.h"
61 #include "x86_emit_gas.h"
62 #include "x86_gen.h"
63 #include "amd64_emit_yasm.h"
64 #include "amd64_emit_gas.h"
65 #include "cc1_main.h"
66 #include "n_libc.h"
67 
68 
69 
70 static FILE			*out;
71 static struct scope		*tunit;
72 static int			use_nasm = 1; /* XXX */
73 
74 static int			rbx_saved;
75 struct vreg			csave_rbx;
76 struct emitter_amd64		*emit_amd64;
77 
78 int				amd64_need_negmask;
79 int				amd64_need_ulong_float_mask;
80 
81 
82 #define N_GPRS	6
83 #define N_ARGREGS 6
84 
85 struct reg		*amd64_argregs[] = {
86 	/* rdi, rsi, rdx, rcx, r8, r9 */
87 	&amd64_x86_gprs[5], &amd64_x86_gprs[4],
88 	&amd64_x86_gprs[3], &amd64_x86_gprs[2],
89 	&amd64_gprs[8], &amd64_gprs[9]
90 };
91 
92 struct reg		amd64_x86_gprs[7];
93 struct reg		amd64_gprs[16];
94 struct reg		amd64_gprs_32bit[16];
95 struct reg		amd64_gprs_16bit[16];
96 struct reg		amd64_gprs_8bit[16];
97 struct reg		amd64_sil;
98 struct reg		amd64_dil;
99 
100 static int		callee_save_map[] = {
101 	0, 0, 0, 0, /* r8 - r11 */
102 	1, 1, 1, 1 /* r12 - r15 */
103 };
104 
105 
106 static void
init_regs(void)107 init_regs(void) {
108 	static struct reg	nullreg;
109 	int			i, j;
110 	static const struct {
111 		struct reg	*regs;
112 		char		*names[9];
113 	} rps[] = {
114 		{ amd64_x86_gprs,
115 			{"rax","rbx","rcx","rdx","rsi","rdi",0,0,0}},
116 		{ NULL, {0,0,0,0,0,0,0,0,0} }
117 	};
118 
119 	for (i = 0; rps[i].regs != NULL; ++i) {
120 		nullreg.type = REG_GPR;
121 		nullreg.allocatable = 1;
122 		for (j = 0; rps[i].names[j] != NULL; ++j) {
123 			rps[i].regs[j] = nullreg;
124 			rps[i].regs[j].composed_of =
125 				n_xmalloc(2 * sizeof(struct reg *));
126 			rps[i].regs[j].composed_of[0] = &x86_gprs[j];
127 			rps[i].regs[j].composed_of[1] = NULL;
128 			rps[i].regs[j].size = 8;
129 			rps[i].regs[j].name = rps[i].names[j];
130 		}
131 	}
132 
133 	amd64_sil.size = 1;
134 	amd64_sil.name = "sil";
135 	amd64_sil.type = REG_GPR;
136 	amd64_sil.allocatable = 1;
137 	x86_gprs[4].composed_of[0]->composed_of =
138 		n_xmalloc(2 * sizeof(struct reg *));
139 	x86_gprs[4].composed_of[0]->composed_of[0] = &amd64_sil;
140 	x86_gprs[4].composed_of[0]->composed_of[1] = NULL;
141 
142 	amd64_dil.size = 1;
143 	amd64_dil.name = "dil";
144 	amd64_dil.type = REG_GPR;
145 	amd64_dil.allocatable = 1;
146 	x86_gprs[5].composed_of[0]->composed_of =
147 		n_xmalloc(2 * sizeof(struct reg *));
148 	x86_gprs[5].composed_of[0]->composed_of[0] = &amd64_dil;
149 	x86_gprs[5].composed_of[0]->composed_of[1] = NULL;
150 
151 	for (i = 8; i < 16; ++i) {
152 		static char	*new_gpr_names[] = {
153 			"r8", "r9", "r10", "r11",
154 			"r12", "r13", "r14", "r15"
155 		};
156 		static char	*new_gpr_names_32[] = {
157 			"r8d", "r9d", "r10d", "r11d",
158 			"r12d", "r13d", "r14d", "r15d"
159 		};
160 		static char	*new_gpr_names_16[] = {
161 			"r8w", "r9w", "r10w", "r11w",
162 			"r12w", "r13w", "r14w", "r15w"
163 		};
164 		static char	*new_gpr_names_8[] = {
165 			"r8b", "r9b", "r10b", "r11b",
166 			"r12b", "r13b", "r14b", "r15b"
167 		};
168 		amd64_gprs[i].name = new_gpr_names[i-8];
169 		amd64_gprs[i].size = 8;
170 		amd64_gprs[i].type = REG_GPR;
171 		amd64_gprs[i].allocatable = 1;
172 		amd64_gprs[i].composed_of = n_xmalloc(2 * sizeof(struct reg*));
173 		amd64_gprs[i].composed_of[0] = &amd64_gprs_32bit[i];
174 		amd64_gprs[i].composed_of[1] = NULL;
175 
176 		amd64_gprs_32bit[i].name = new_gpr_names_32[i-8];
177 		amd64_gprs_32bit[i].size = 4;
178 		amd64_gprs_32bit[i].type = REG_GPR;
179 		amd64_gprs_32bit[i].allocatable = 1;
180 		amd64_gprs_32bit[i].composed_of
181 			= n_xmalloc(2 * sizeof(struct reg*));
182 		amd64_gprs_32bit[i].composed_of[0] = &amd64_gprs_16bit[i];
183 		amd64_gprs_32bit[i].composed_of[1] = NULL;
184 
185 		amd64_gprs_16bit[i].name = new_gpr_names_16[i-8];
186 		amd64_gprs_16bit[i].size = 2;
187 		amd64_gprs_16bit[i].type = REG_GPR;
188 		amd64_gprs_16bit[i].allocatable = 1;
189 		amd64_gprs_16bit[i].composed_of
190 			= n_xmalloc(2 * sizeof(struct reg*));
191 		amd64_gprs_16bit[i].composed_of[0] = &amd64_gprs_8bit[i];
192 		amd64_gprs_16bit[i].composed_of[1] = NULL;
193 
194 		amd64_gprs_8bit[i].name = new_gpr_names_8[i-8];
195 		amd64_gprs_8bit[i].size = 1;
196 		amd64_gprs_8bit[i].type = REG_GPR;
197 		amd64_gprs_8bit[i].allocatable = 1;
198 		amd64_gprs_8bit[i].composed_of = NULL;
199 	}
200 
201 	amd64_x86_gprs[6].name = NULL;
202 }
203 
204 struct reg *
find_top_reg(struct reg * r)205 find_top_reg(struct reg *r) {
206 	int	i;
207 
208 	for (i = 0; i < 6; ++i) {
209 		if (is_member_of_reg(&amd64_x86_gprs[i], r)) {
210 			return &amd64_x86_gprs[i];
211 		}
212 	}
213 
214 	/*
215 	 * 10/30/07: Added this. I don't know yet why it is possible to
216 	 * get an r8-r15 sub register and to have to find the top, since
217 	 * the register allocator only uses rax - rdi for small items
218 	 * currently. It may be because conversion uses sub registers
219 	 */
220 	for (i = 8; i < 16; ++i) {
221 		if (is_member_of_reg(&amd64_gprs[i], r)) {
222 			return &amd64_gprs[i];
223 		}
224 	}
225 	fprintf(stderr, "Failed to find top preg for %s\n", r->name);
226 	abort();
227 	return NULL;
228 }
229 
230 
231 static void
do_invalidate(struct reg * r,struct icode_list * il,int save)232 do_invalidate(struct reg *r, struct icode_list *il, int save) {
233 	free_preg(r, il, 1, save);
234 }
235 
236 
237 /*
238  * XXX this shouldn't be saving esi/edi/ebx and r12 - r15 when we're
239  * invalidating because of a function call, because those regs are
240  * callee-save
241  */
242 static void
invalidate_gprs(struct icode_list * il,int saveregs,int for_fcall)243 invalidate_gprs(struct icode_list *il, int saveregs, int for_fcall) {
244 	int	i;
245 
246 	(void) for_fcall;
247 	for (i = 0; i < N_GPRS; ++i) {
248 		do_invalidate(&amd64_x86_gprs[i], il, saveregs);
249 	}
250 	for (i = 8; i < 16; ++i) {
251 		do_invalidate(&amd64_gprs[i], il, saveregs);
252 	}
253 
254 	/*
255 	 * 07/26/12: Dropped incomplete SSE usage check, could
256 	 * yield compiler crashes
257 	 */
258 	for (i = 0; i < 8; ++i) {
259 		do_invalidate(&x86_sse_regs[i], il, saveregs);
260 	}
261 }
262 
263 
264 static struct reg *
alloc_gpr(struct function * f,int size,struct icode_list * il,struct reg * dontwipe,int line)265 alloc_gpr(struct function *f, int size, struct icode_list *il,
266 struct reg *dontwipe, int line) {
267 	struct reg	*ret;
268 
269 	if (size == 0) {
270 		/* 0 means GPR */
271 		size = 8;
272 	}
273 
274 	if (size < 8) {
275 		ret = x86_backend.alloc_gpr(f, size, il, dontwipe, line);
276 	} else {
277 		/*
278 		 * Notice how only r8 - r15 are used for 64bit register
279 		 * allocations. This is because the x86 gpr extensions
280 		 * (rax, rbx, etc) are used for argument passing, so
281 		 * thrashing should be avoided. Note that emit_copystruct()
282 		 * will use those regs too, so it is absolutely critical
283 		 * that struct pointers used by it are never stored in
284 		 * rdi/rsi/rdx
285 		 */
286 		ret = generic_alloc_gpr(f, size, il, dontwipe,
287 			&amd64_gprs[8], 8, callee_save_map, line);
288 	}
289 
290 	return ret;
291 }
292 
293 
294 static struct reg *
alloc_fpr(struct function * f,int size,struct icode_list * il,struct reg * dontwipe)295 alloc_fpr(struct function *f, int size, struct icode_list *il,
296 struct reg *dontwipe) {
297 	return alloc_sse_fpr(f, size, il, dontwipe);
298 }
299 
300 static void
x86_free_preg(struct reg * r,struct icode_list * il)301 x86_free_preg(struct reg *r, struct icode_list *il) {
302 	x86_backend.free_preg(r, il);
303 }
304 
305 /*
306  * IMPORTANT: The x86 backend and the x86 emitter that corresponds to
307  * this emitter (currently only yasm) also has to be initialized (init()),
308  * because some code is shared between x86 and amd64
309  */
310 static int
init(FILE * fd,struct scope * s)311 init(FILE *fd, struct scope *s) {
312 	out = fd;
313 	tunit  = s;
314 
315 	(void) use_nasm;
316 	if (asmflag == NULL
317 		|| strcmp(asmname, "gas") == 0
318 		|| strcmp(asmname, "as") == 0) {
319 		emit = &amd64_emit_gas;
320 		emit_x86 = &x86_emit_x86_gas;
321 		x86_backend.init(fd, s);
322 		x86_emit_gas.init(out, tunit);
323 		emit_amd64 = &emit_amd64_gas;
324 	} else if (strcmp(asmname, "yasm") == 0) {
325 		/* Default is yasm */
326 		emit = &amd64_emit_yasm;
327 		emit_x86 = &x86_emit_x86_nasm; /* XXX */
328 		x86_backend.init(fd, s);
329 		x86_emit_nasm.init(out, tunit);
330 		emit_amd64 = &emit_amd64_yasm;
331 	} else {
332 		(void) fprintf(stderr, "Unknown AMD64 assembler `%s'\n",
333 			asmflag);
334 		exit(EXIT_FAILURE);
335 	}
336 
337 	init_regs();
338 
339 	/* Setup code sharing between x86 and amd64 */
340 	amd64_backend.invalidate_except = x86_backend.invalidate_except;
341 	amd64_backend.name_to_reg = x86_backend.name_to_reg;
342 	amd64_backend.get_inlineasm_label = x86_backend.get_inlineasm_label;
343 	amd64_backend.asmvreg_to_reg = x86_backend.asmvreg_to_reg;
344 	amd64_backend.alloc_16_or_32bit_noesiedi =
345 		x86_backend.alloc_16_or_32bit_noesiedi;
346 	backend->emit = emit;
347 	return emit->init(out, tunit);
348 }
349 
350 static int
get_ptr_size(void)351 get_ptr_size(void) {
352 	return 8;
353 }
354 
355 static struct type *
get_size_t(void)356 get_size_t(void) {
357 	return make_basic_type(TY_ULONG);
358 }
359 
360 static struct type *
get_uintptr_t(void)361 get_uintptr_t(void) {
362 	return make_basic_type(TY_ULONG);
363 }
364 
365 static struct type *
get_wchar_t(void)366 get_wchar_t(void) {
367 	return make_basic_type(TY_INT);
368 }
369 
370 
371 static size_t
get_sizeof_basic(int type)372 get_sizeof_basic(int type) {
373 	switch (type) {
374 	case TY_ENUM:
375 		return 4; /* XXX */
376 
377 	case TY_INT:
378 	case TY_UINT:
379 		return 4;
380 	case TY_LONG:
381 	case TY_ULONG:
382 	case TY_LLONG:
383 	case TY_ULLONG:
384 		return 8;
385 
386 	case TY_CHAR:
387 	case TY_UCHAR:
388 	case TY_SCHAR:
389 	case TY_BOOL:
390 		return 1;
391 
392 	case TY_SHORT:
393 	case TY_USHORT:
394 		return 2;
395 
396 	case TY_FLOAT:
397 		return 4;
398 
399 	case TY_DOUBLE:
400 		return 8; /* XXX contradicts abi */
401 
402 	case TY_LDOUBLE:
403 		return /*10*/12;
404 	default:
405 	printf("err sizeof cannot cope w/ it, wuz %d\n", type);
406 	abort();
407 		return 1; /* XXX */
408 	}
409 }
410 
411 static struct vreg		saved_gprs[4]; /* r12 - r15 */
412 static struct stack_block	*saved_gprs_sb[4];
413 
414 static void
do_ret(struct function * f,struct icode_instr * ip)415 do_ret(struct function *f, struct icode_instr *ip) {
416 	int	i;
417 
418 	if (f->alloca_head != NULL) {
419 		struct stack_block	*sb;
420 		static struct vreg	rvr;
421 
422 		rvr.stack_addr = f->alloca_regs;
423 		rvr.size = 8;
424 		backend_vreg_map_preg(&rvr, &amd64_x86_gprs[0]);
425 		emit->store(&rvr, &rvr);
426 
427 		for (sb = f->alloca_head; sb != NULL; sb = sb->next) {
428 			emit->dealloca(sb, NULL);
429 		}
430 
431 		emit->load(&amd64_x86_gprs[0], &rvr);
432 		backend_vreg_unmap_preg(&amd64_x86_gprs[0]);
433 	}
434 	if (f->vla_head != NULL) {
435 		struct stack_block	*sb;
436 		static struct vreg	rvr;
437 
438 		rvr.stack_addr = f->alloca_regs;
439 		rvr.size = 8;
440 		backend_vreg_map_preg(&rvr, &amd64_x86_gprs[0]);
441 		emit->store(&rvr, &rvr);
442 
443 		for (sb = f->vla_head; sb != NULL; sb = sb->next) {
444 			emit->dealloc_vla(sb, NULL);
445 		}
446 
447 		emit->load(&amd64_x86_gprs[0], &rvr);
448 		backend_vreg_unmap_preg(&amd64_x86_gprs[0]);
449 	}
450 	if (f->callee_save_used & CSAVE_EBX) {
451 		emit->load(&amd64_x86_gprs[1], &csave_rbx);
452 	}
453 	for (i = 12; i < 16; ++i) {
454 		if (saved_gprs[i-12].stack_addr != NULL) {
455 			emit->load(&amd64_gprs[i], &saved_gprs[i-12]);
456 		}
457 	}
458 
459 	if (saved_ret_addr) {
460 		emit->check_ret_addr(f, saved_ret_addr);
461 	}
462 	emit->freestack(f, NULL);
463 	emit->ret(ip);
464 }
465 
466 static struct reg *
get_abi_reg(int index,struct type * ty)467 get_abi_reg(int index, struct type *ty) {
468 	if (index == 0
469 		&& (is_integral_type(ty)
470 			|| ty->tlist != NULL)) {
471 		int	size = backend->get_sizeof_type(ty, NULL);
472 		if (size == 8) {
473 			return amd64_argregs[0];
474 		} else if (size == 4) {
475 			return amd64_argregs[0]->composed_of[0];
476 		} else {
477 			unimpl();
478 		}
479 	} else {
480 		unimpl();
481 	}
482 	return NULL;
483 }
484 
485 static struct reg *
get_abi_ret_reg(struct type * ty)486 get_abi_ret_reg(struct type *ty) {
487 	if (is_integral_type(ty) || ty->tlist != NULL) {
488 		return &amd64_x86_gprs[0];
489 	} else {
490 		unimpl();
491 	}
492 	/* NOTREACHED */
493 	return NULL;
494 }
495 
496 static void
map_parameters(struct function * f,struct ty_func * proto)497 map_parameters(struct function *f, struct ty_func *proto) {
498 	struct sym_entry	*se = proto->scope->slist;
499 	struct stack_block	*sb;
500 	int			i;
501 	long			offset = 16; /* rbp */
502 	int			gprs_used = 0;
503 	int			fprs_used = 0;
504 	struct reg		*curreg;
505 	int			stack_bytes_used = 0;
506 
507 	if (f->fty->variadic) {
508 		/*
509 		 * Same story as usual - allocate space for argument
510 		 * registers; those are then followed by any possibly
511 		 * stack-passed variadic arguments.
512 		 * 6 arg regs * 8 = 48 bytes
513 		 * XXX floating point!! mmm..sse registers.mmm
514 		 */
515 		f->fty->lastarg = alloc_decl();
516 
517 		/* Allocate 48 bytes for gprs, followed by 64 for fprs */
518 		f->fty->lastarg->stack_addr = stack_malloc(f, /*48*/112);
519 	}
520 
521 	if (f->proto->dtype->tlist->next == NULL
522 		&& (f->proto->dtype->code == TY_STRUCT
523 		|| f->proto->dtype->code == TY_UNION)) {
524 		/*
525 		 * Function returns struct/union - accomodate for
526 		 * hidden pointer (passed as first argument)
527 		 * XXX duplicates mips code
528 		 */
529 		struct vreg	*hp;
530 		hp = vreg_alloc(NULL,NULL,NULL,NULL);
531 		hp->size = 8;
532 		hp->var_backed = alloc_decl();
533 		hp->var_backed->dtype =
534 			n_xmemdup(f->proto->dtype, sizeof(struct type));
535 		hp->var_backed->dtype->tlist = alloc_type_node();
536 		hp->var_backed->dtype->tlist->type = TN_POINTER_TO;
537 		hp->var_backed->stack_addr = stack_malloc(f, 8);
538 		f->hidden_pointer = hp;
539 		++gprs_used;
540 	}
541 
542 	for (i = 0; i < proto->nargs; ++i, se = se->next) {
543 		size_t		size;
544 		long		last_offset = offset;
545 
546 		size = backend->get_sizeof_type(se->dec->dtype, NULL);
547 		if (is_integral_type(se->dec->dtype)
548 			|| se->dec->dtype->tlist) {
549 			if (gprs_used < N_ARGREGS) {
550 				/* passed in register */
551 				curreg = amd64_argregs[gprs_used++];
552 				sb = stack_malloc(f, size);
553 				se->dec->stack_addr = sb;
554 				if (size == 4) {
555 					curreg = curreg->composed_of[0];
556 				} else if (size == 2) {
557 					curreg = curreg->composed_of[0]
558 						->composed_of[0];
559 				} else if (size == 1) {
560 					if (curreg->composed_of[0]
561 						->composed_of[0]
562 						->composed_of[1]) {
563 						curreg = curreg->composed_of[0]
564 							->composed_of[0]
565 							->composed_of[1];
566 					} else {
567 						curreg = curreg->composed_of[0]
568 							->composed_of[0]
569 							->composed_of[0];
570 					}
571 				}
572 
573 				se->dec->stack_addr->from_reg = curreg;
574 			} else {
575 				/* passed on stack */
576 				/* XXX alignment */
577 /*assert(size == se->dec->vreg->size);*/
578 
579 
580 				se->dec->stack_addr =
581 					make_stack_block(offset,
582 						/*se->dec->vreg->size*/ size);
583 				se->dec->stack_addr->is_func_arg = 1;
584 				offset += size/*se->dec->vreg->size*/;
585 				while (offset % 8) {
586 					++offset;
587 				}
588 			}
589 		} else if (IS_FLOATING(se->dec->dtype->code)) {
590 			if (se->dec->dtype->code == TY_LDOUBLE) {
591 /* XXXX woah... what's the deal with size vs se->dec->vreg->size? */
592 /*assert(se->dec->vreg->size == size);*/
593 				if (offset % 16) {
594 					/* First align to 16-byte boundary */
595 					offset += 16 - (offset % 16);
596 				}
597 				sb = make_stack_block(offset, size);
598 				sb->is_func_arg = 1;
599 				offset += size;  /*se->dec->vreg->size;*/
600 				se->dec->stack_addr = sb;
601 				stack_bytes_used += /*16*/ offset - last_offset;
602 			} else {
603 				if (fprs_used < 8) {
604 					/* passed in register */
605 					curreg = &x86_sse_regs[fprs_used++];
606 					sb = stack_malloc(f, size);
607 					se->dec->stack_addr = sb;
608 					sb->from_reg = curreg;
609 				} else {
610 /*assert(size == se->dec->vreg->size);	*/
611 					/* passed on stack */
612 					/* XXX alignment */
613 					se->dec->stack_addr =
614 						make_stack_block(offset,
615 							/*se->dec->vreg->size*/ size);
616 					se->dec->stack_addr->is_func_arg = 1;
617 					offset += size /*se->dec->vreg->size*/;
618 					if (offset % 8) {
619 						offset += 8 - (offset % 8);
620 					}
621 				}
622 			}
623 		} else if (se->dec->dtype->code == TY_STRUCT
624 			|| se->dec->dtype->code == TY_UNION) {
625 			if (1 /*size > 16  || has_unaligned_members() */) {
626 				/*
627 				 * 07/26/12: Align for struct first. This may
628 				 * require 8 bytes of padding if the struct
629 				 * contains long double
630 				 */
631 				int	align = backend->get_align_type(se->dec->dtype);
632 				if (offset % align) {
633 					offset += align - (offset % align);
634 				}
635 				sb = make_stack_block(offset, size);
636 				offset += size; /* was before makestackblock */
637 				if (offset % 8) {
638 					offset += 8 - (offset % 8);
639 				}
640 				sb->is_func_arg = 1;
641 				se->dec->stack_addr = sb;
642 
643 #if 0
644 				if (size % 8) {
645 					stack_bytes_used += size + (8 - size % 8);
646 				} else {
647 					stack_bytes_used += size;
648 				}
649 				#endif
650 				stack_bytes_used += offset - last_offset;
651 			}
652 		} else {
653 			unimpl();
654 		}
655 	}
656 	if (f->fty->variadic) {
657 		/* Patch varargs block to real address */
658 		struct stack_block	*save_area;
659 
660 		save_area = f->fty->lastarg->stack_addr;
661 		if (gprs_used == 6) {
662 			/* All variadic stuff passed on stack */
663 			f->fty->lastarg->stack_addr =
664 				make_stack_block(offset, 0);
665 			f->fty->lastarg->stack_addr->is_func_arg = 1;
666 		} else {
667 			f->fty->lastarg->stack_addr->from_reg =
668 				(void *)&amd64_argregs[gprs_used]; /* XXX */
669 		}
670 		if (f->patchme) {
671 			struct amd64_va_patches	*p = f->patchme;
672 			int			n;
673 
674 			/*
675 			 * 08/07/08: Use a loop because there may be
676 			 * multiple items to be patched! (Multiple
677 			 * va_start() calls in the function)
678 			 */
679 			for (; p != NULL; p = p->next) {
680 				if (gprs_used == 6) {
681 					n = 48;
682 				} else {
683 					n = (&amd64_argregs[gprs_used] -
684 						amd64_argregs) * 8;
685 				}
686 				*p->gp_offset = n;
687 				if (fprs_used == 8) {
688 					n = 64+48;
689 				} else {
690 					n = (&x86_sse_regs[fprs_used] -
691 						x86_sse_regs) * 8;
692 					n += 48;
693 				}
694 
695 				*p->reg_save_area = *save_area;
696 				if (gprs_used == 6) {
697 					/*
698 					 * The last argument is definitely passed
699 					 * on the stack so we can use that as
700 					 * base address
701 					 */
702 					*p->overflow_arg_area =
703 						*f->fty->lastarg->stack_addr;
704 				} else {
705 					/*
706 					 * 07/25/12: The stack area begins at [rbp + 16],
707 					 * but nwcc passes long double and struct
708 					 * arguments on the stack as well, such
709 					 * that we may have to advance the varargs
710 					 * start offset. Example:
711 					 *
712 					 * void foo(int x, struct foo f, char *fmt, ...);
713 					 *
714 					 * ... fmt and x are passed in registers, as
715 					 * are the first couple of varargs arguments,
716 					 * but since "f" is passed on the stack the
717 					 * last varargs arguments begin at
718 					 * [rbp + 16 + sizeof f] (with suitable
719 					 * alignment)
720 					 * Traditionally we always assumed excess
721 					 * args at rbp+16
722 					 */
723 					int	offset = 16 + stack_bytes_used;
724 					*p->overflow_arg_area =
725 						*make_stack_block(offset, 0);
726 					p->overflow_arg_area->is_func_arg = 1;
727 				}
728 			}
729 #if 0
730 printf("gp offset = %d\n", n);
731 printf("reg save area = %d\n", p->reg_save_area->offset);
732 printf("overflow area = %d\n", p->overflow_arg_area->offset);
733 #endif
734 		}
735 	}
736 }
737 
738 void	store_preg_to_var(struct decl *, size_t, struct reg *);
739 
740 static int
gen_function(struct function * f)741 gen_function(struct function *f) {
742 	struct ty_func		*proto;
743 	struct scope		*scope;
744 	struct icode_instr	*lastret = NULL;
745 	struct stack_block	*sb;
746 	struct sym_entry	*se;
747 	size_t			size;
748 	size_t			alloca_bytes = 0;
749 	size_t			vla_bytes = 0;
750 	int			i;
751 	unsigned		mask;
752 
753 	emit->setsection(SECTION_TEXT);
754 	proto = f->proto->dtype->tlist->tfunc;
755 
756 	emit->func_header(f);
757 	emit->label(f->proto->dtype->name, 1);
758 	emit->intro(f);
759 
760 	map_parameters(f, proto);
761 
762 	/* Make local variables */
763 	for (scope = f->scope; scope != NULL; scope = scope->next) {
764 		struct stack_block	*sb;
765 		struct scope		*tmp;
766 		struct decl		**dec;
767 		size_t			align;
768 
769 		for (tmp = scope; tmp != NULL; tmp = tmp->parent) {
770 			if (tmp == f->scope) {
771 				break;
772 			}
773 		}
774 
775 		if (tmp == NULL) {
776 			/* End of function reached */
777 			break;
778 		}
779 		if (scope->type != SCOPE_CODE) continue;
780 
781 		dec = scope->automatic_decls.data;
782 		for (i = 0; i < scope->automatic_decls.ndecls; ++i) {
783 			struct decl	*alignfor;
784 
785 			if (dec[i]->stack_addr != NULL) { /* XXX sucks */
786 				continue;
787 			} else if (IS_VLA(dec[i]->dtype->flags)) {
788                                 /*
789                                  * 05/22/11: Handle pointers to VLAs properly;
790                                  * We have to create a metadata block to
791                                  * record dimension sizes, but we allocate
792                                  * the pointers themselves on the stack
793                                  *
794                                  *   char (*p)[N];
795                                  *
796                                  * ... "p" on stack, N in metadata block
797                                  */
798                                 if (dec[i]->dtype->tlist->type == TN_POINTER_TO) {
799                                         ;
800                                 } else {
801                                         continue;
802                                 }
803 			}
804 
805 			alignfor = get_next_auto_decl_in_scope(scope, i);
806 			if (alignfor != NULL) {
807 				align = calc_align_bytes(f->total_allocated,
808 					dec[i]->dtype,
809 					alignfor->dtype, 0);
810 			} else {
811 				align = 0;
812 			}
813 
814 			size = backend->
815 				get_sizeof_decl(dec[i], NULL);
816 			sb = stack_malloc(f, size+align);
817 			sb->nbytes = size;
818 			dec[i]->stack_addr = sb;
819 		}
820 	}
821 	stack_align(f, 8);
822 
823 	/*
824 	 * Allocate storage for saving callee-saved registers (ebx/esi/edi)
825 	 * (but defer saving them until esp has been updated)
826 	 */
827 	f->total_allocated += 8;
828 	if (f->callee_save_used & CSAVE_EBX) {
829 		rbx_saved = 1;
830 		csave_rbx.stack_addr
831 			= make_stack_block(f->total_allocated, 8);
832 	}
833 
834 	for (i = 12, mask = 1 << 11; i < 16; ++i, mask <<= 1) {
835 		if (f->callee_save_used & mask) {
836 			if (saved_gprs_sb[i-12] == NULL) {
837 				saved_gprs_sb[i-12] = make_stack_block(0, 8);
838 			}
839 			f->total_allocated += 8;
840 			saved_gprs[i-12].stack_addr = saved_gprs_sb[i-12];
841 			saved_gprs[i-12].size = 8;
842 			saved_gprs[i-12].stack_addr->offset =
843 				f->total_allocated;
844 		} else {
845 			saved_gprs[i-12].stack_addr = NULL;
846 		}
847 	}
848 	f->callee_save_offset = f->total_allocated;
849 
850 	if (stackprotectflag) {
851 		f->total_allocated += 4;
852 		/*
853 		 * 08/03/11: The save_ret_addr stack block was cached here,
854 		 * which caused the (later introduced) zone allocator to
855 		 * trash the "frame pointer" flag while resetting memory
856 		 */
857 		saved_ret_addr
858 			= make_stack_block(f->total_allocated, 4);
859 	}
860 
861 	/* Allocate storage for temporarily saving GPRs & patch offsets */
862 	for (sb = f->regs_head; sb != NULL; sb = sb->next) {
863 		stack_align(f, sb->nbytes);
864 		f->total_allocated += sb->nbytes;
865 		sb->offset = f->total_allocated;
866 	}
867 	/*
868 	 * Allocate storage for saving alloca() pointers, and initialize
869 	 * it to zero
870 	 */
871 	stack_align(f, 8);
872 	for (sb = f->alloca_head; sb != NULL; sb = sb->next) {
873 		f->total_allocated += sb->nbytes;
874 		alloca_bytes += sb->nbytes;
875 		sb->offset = f->total_allocated;
876 	}
877 
878 	/*
879 	 * Allocate storage for saving VLA data, and initialize
880 	 * it to zero
881 	 */
882 	for (sb = f->vla_head; sb != NULL; sb = sb->next) {
883 		f->total_allocated += sb->nbytes;
884 		vla_bytes += sb->nbytes;
885 		sb->offset = f->total_allocated;
886 	}
887 	if (f->alloca_head != NULL || f->vla_head != NULL) {
888 		/*
889 		 * Get stack for saving return value register (rax)
890 		 * before performing free() on alloca()ted blocks
891 		 */
892 		f->alloca_regs = make_stack_block(0, 8);
893 		f->total_allocated += 8;
894 		f->alloca_regs->offset = f->total_allocated;
895 	}
896 
897 	if (f->total_allocated > 0) {
898 		stack_align(f, 16);
899 		emit->allocstack(f, f->total_allocated);
900 		if (f->callee_save_used & CSAVE_EBX) {
901 			backend_vreg_map_preg(&csave_rbx, &amd64_x86_gprs[1]);
902 			emit->store(&csave_rbx, &csave_rbx);
903 			backend_vreg_unmap_preg(&amd64_x86_gprs[1]);
904 			x86_gprs[1].used = 0;
905 			amd64_gprs[1].used = 0;
906 		}
907 		for (i = 0; i < 4; ++i) {
908 			if (saved_gprs[i].stack_addr != NULL) {
909 				backend_vreg_map_preg(&saved_gprs[i],
910 					&amd64_gprs[12+i]);
911 				emit->store(&saved_gprs[i], &saved_gprs[i]);
912 				backend_vreg_unmap_preg(
913 					&amd64_gprs[12+i]);
914 				amd64_gprs[12+i].used = 0;
915 			}
916 		}
917 		if (f->hidden_pointer) {
918 			backend_vreg_map_preg(f->hidden_pointer, &amd64_x86_gprs[5]);
919 			emit->store(f->hidden_pointer, f->hidden_pointer);
920 			backend_vreg_unmap_preg(&amd64_x86_gprs[5]);
921 		}
922 		se = proto->scope->slist;
923 		for (i = 0; i < proto->nargs; ++i, se = se->next) {
924 			if (se->dec->stack_addr->from_reg != NULL) {
925 				static struct vreg	tempvr;
926 
927 				tempvr.var_backed = se->dec;
928 				tempvr.size = backend->get_sizeof_type(
929 					se->dec->dtype, NULL);
930 				tempvr.type = se->dec->dtype;
931 
932 				backend_vreg_map_preg(&tempvr,
933 					se->dec->stack_addr->from_reg);
934 				emit->store(&tempvr, &tempvr);
935 				backend_vreg_unmap_preg(
936 					se->dec->stack_addr->from_reg);
937 			}
938 		}
939 		if (f->fty->variadic
940 			&& f->fty->lastarg->stack_addr->from_reg != NULL) {
941 			struct reg	**r;
942 			size_t		saved_offset =
943 				f->fty->lastarg->stack_addr->offset;
944 
945 			r = (struct reg **)f->fty->
946 				lastarg->stack_addr->from_reg; /* XXX */
947 			f->fty->lastarg->stack_addr->offset -=
948 				(r - amd64_argregs) * 8;
949 			for (i = r - amd64_argregs; i < N_ARGREGS; ++i) {
950 				store_preg_to_var(f->fty->lastarg, 8,
951 					amd64_argregs[i]);
952 				f->fty->lastarg->stack_addr->offset -= 8;
953 			}
954 
955 			/* XXX ... */
956 			for (i = 0; i < 8; ++i) {
957 				f->fty->lastarg->dtype =
958 					make_basic_type(TY_DOUBLE);
959 				store_preg_to_var(f->fty->lastarg, 8,
960 					&x86_sse_regs[i]);
961 				f->fty->lastarg->stack_addr->offset -= 8;
962 			}
963 
964 			f->fty->lastarg->stack_addr->offset = saved_offset;
965 		}
966 	}
967 	if (stackprotectflag) {
968 		emit->save_ret_addr(f, saved_ret_addr);
969 	}
970 	if (curfunc->alloca_head != NULL) {
971 		emit->zerostack(curfunc->alloca_tail, alloca_bytes);
972 	}
973 	if (curfunc->vla_head != NULL) {
974 		emit->zerostack(curfunc->vla_tail, vla_bytes);
975 	}
976 
977 	if (xlate_icode(f, f->icode, &lastret) != 0) {
978 		return -1;
979 	}
980 	emit->outro(f);
981 	return 0;
982 }
983 
984 
985 #if XLATE_IMMEDIATELY
986 
987 static int
gen_prepare_output(void)988 gen_prepare_output(void) {
989 	if (gflag) {
990 		/* Print file names */
991 		emit->dwarf2_files();
992 	}
993 	if (emit->support_decls) {
994 		emit->support_decls();
995 	}
996 	return 0;
997 }
998 
999 static int
gen_finish_output(void)1000 gen_finish_output(void) {
1001 	emit->static_init_vars(static_init_vars);
1002 	emit->static_init_thread_vars(static_init_thread_vars);
1003 
1004 	emit->static_uninit_vars(static_uninit_vars);
1005 	emit->static_uninit_thread_vars(static_uninit_thread_vars);
1006 	emit->global_extern_decls(global_scope.extern_decls.data,
1007 		global_scope.extern_decls.ndecls);
1008 	if (emit->extern_decls) {
1009 		emit->extern_decls();
1010 	}
1011 	emit->support_buffers();
1012 	if (emit->finish_program) {
1013 		emit->finish_program();
1014 	}
1015 	x_fflush(out);
1016 	return 0;
1017 }
1018 
1019 #else
1020 
1021 static int
gen_program(void)1022 gen_program(void) {
1023 	struct function		*func;
1024 
1025 	if (gflag) {
1026 		/* Print file names */
1027 		emit->dwarf2_files();
1028 	}
1029 
1030 	if (emit->support_decls) {
1031 		emit->support_decls();
1032 	}
1033 	if (emit->extern_decls) {
1034 		emit->extern_decls();
1035 	}
1036 
1037 #if 0
1038 	emit->global_decls();
1039 #endif
1040 	emit->global_extern_decls(global_scope.extern_decls.data,
1041 			global_scope.extern_decls.ndecls);
1042 	emit->global_static_decls(global_scope.static_decls.data,
1043 			global_scope.static_decls.ndecls);
1044 #if 0
1045 	emit->static_decls();
1046 #endif
1047 	emit->static_init_vars(static_init_vars);
1048 	emit->static_uninit_vars(static_uninit_vars);
1049 	emit->static_init_thread_vars(static_init_thread_vars);
1050 	emit->static_uninit_thread_vars(static_uninit_thread_vars);
1051 
1052 	emit->struct_inits(init_list_head);
1053 
1054 	emit->empty();
1055 	emit->strings(str_const);
1056 	emit->fp_constants(float_const);
1057 	emit->support_buffers();
1058 	emit->empty();
1059 
1060 	if (emit->struct_defs) {
1061 		emit->struct_defs();
1062 	}
1063 
1064 	emit->setsection(SECTION_TEXT);
1065 
1066 	for (func = funclist; func != NULL; func = func->next) {
1067 		curfunc = func;
1068 		if (gen_function(func) != 0) {
1069 			return -1;
1070 		}
1071 		emit->empty();
1072 		emit->empty();
1073 	}
1074 	x_fflush(out);
1075 
1076 	return 0;
1077 }
1078 
1079 #endif
1080 
1081 
1082 /*
1083  * 10/30/07: This stuff was quite wrong because it did
1084  * not align correctly and did not count the long double
1085  * size properly (it may still not be right, but seems
1086  * better now)
1087  */
1088 static void
pass_ldouble_stack(struct vreg * vr,unsigned long * allpushed,struct icode_list * il)1089 pass_ldouble_stack(
1090 	struct vreg *vr,
1091 	unsigned long *allpushed,
1092 	struct icode_list *il) {
1093 	struct vreg	*dest;
1094 
1095 	/* We will use at least 16 bytes to pass the long double itself */
1096 	*allpushed += 16;
1097 
1098 	dest = vreg_alloc(NULL, NULL, NULL, vr->type);
1099 	dest->stack_addr = make_stack_block(vr->addr_offset, 16);
1100 	dest->stack_addr->use_frame_pointer = 0;
1101 
1102 	vreg_faultin_x87(NULL, NULL, vr, il, 0);
1103 	vreg_map_preg(dest, vr->pregs[0]);
1104 	icode_make_store(curfunc, dest, dest, il);
1105 }
1106 
1107 static unsigned long
pass_args_stack(struct vreg ** vrs,int nvrs,unsigned long preceding_allpushed,unsigned long precalc,struct icode_list * il)1108 pass_args_stack(struct vreg **vrs, int nvrs,
1109 		unsigned long preceding_allpushed,
1110 		unsigned long precalc,
1111 		struct icode_list *il) {
1112 	int		j;
1113 	/*
1114 	 * 07/26/12: The stack usage calculation already allocated 8 bytes of
1115 	 * storage if necessary in order to ensure 16-byte-alignment for the
1116 	 * callee. We failed to take that alignment into account by assuming
1117 	 * we're starting at 0, thereby messing up alignment decisions for
1118 	 * long double
1119 	 */
1120 	unsigned long	allpushed = preceding_allpushed;  /*0;*/
1121 	int		ignore_integral = 0;
1122 	int		ignore_floating = 0;
1123 
1124 	/*
1125 	 * 07/26/12: A bunch of clutter and highly dubious decisions have
1126 	 * been removed. We now take the results of offset and alignment
1127 	 * calculations that are performed prior to calling this function
1128 	 * rather than duplication them here (error-prone)
1129 	 */
1130 
1131 	/*
1132 	 * 07/26/12: The argument placement is done from right to left because
1133 	 * we allocate storage as we go, with the stack growing "downward" and
1134 	 * ending up to the leftmost argument.
1135 	 * Because of this, the code used to handle padding improperly. Given
1136 	 * for example a long double argument followed by a double, a traversal
1137 	 * from left to right in the stack size calculation loop prior to
1138 	 * this function will decide, correcly:
1139 	 *
1140 	 *                stack is 16-byte aligned, no changes necessary
1141 	 * slot 1-2       place long double   (leftmost arg)
1142 	 * slot 3         place double
1143 	 * slot 4         final padding
1144 	 *
1145 	 * This right to left iteration here incorrectly did it like this
1146 	 * instead:
1147 	 *
1148 	 * slot 4     place double    (alignment is uninteresting, will be 8 at least)
1149 	 * slot 3     stack is not 16-byte aligned, allocate 8 bytes padding!!!
1150 	 * slot 1-2   place long double
1151 	 *
1152 	 * This was incompatible with map_parameters(), which also decides
1153 	 * the left to right way.
1154 	 *
1155 	 * So now, as a
1156 	 *      XXX TEMPORARY UGLY KLUDGE XXX
1157 	 * we keep passing over the arguments from right to left, but check
1158 	 * whether the preceding would, if we were to store the current item
1159 	 * at this location, require another 8 bytes of padding between itself
1160 	 * and this item. If that's the case, we allocate 8 bytes of padding
1161 	 * at the current slot instead and just move the current item 8 bytes
1162 	 * ahead.
1163 	 *
1164 	 * A better solution would be to:
1165 	 *
1166 	 *    - Set all offset and alignment allocations in stone in the
1167 	 * stack size calculation iteration that is performed prior to calling
1168 	 * this function in order to determine alignment
1169 	 *    - Allocate all storage in one block
1170 	 *    - Use the precalcuated values here instead of reproducing them
1171 	 * (which very error-prone anyway)
1172 	 *    - Store items to their corresponding locations in that storage
1173 	 * block
1174 	 */
1175 
1176 
1177 	if (precalc > 0) {
1178 		/* Allocate all storage used for passing arguments (inc alignment) */
1179 		icode_make_allocstack(NULL, precalc, il);
1180 	}
1181 
1182 	for (j = nvrs - 1; j >= 0; --j) {
1183 		int		remaining = 0;
1184 		struct vreg	*dest;
1185 		size_t		tysize;
1186 		size_t		align;
1187 		int		is_struct = 0;
1188 		int		is_ldouble = 0;
1189 
1190 		if (vrs[j]->addr_offset == -1) {
1191 			/*
1192 			 * 07/26/12: This argument is not passed on the stack
1193 			 */
1194 			continue;
1195 		}
1196 
1197 		if ((IS_CHAR(vrs[j]->type->code)
1198 			|| IS_SHORT(vrs[j]->type->code))
1199 			&& vrs[j]->type->tlist == NULL) {
1200 			vrs[j] = backend->
1201 				icode_make_cast(vrs[j],
1202 					make_basic_type(TY_INT), il);
1203 		} else {
1204 			if (vrs[j]->type->code == TY_LDOUBLE
1205 				&& vrs[j]->type->tlist == NULL) {
1206 				is_ldouble = 1;
1207 			} else {
1208 				if (!is_basic_agg_type(vrs[j]->type)) {
1209 					vreg_faultin_x87(NULL, NULL, vrs[j], il, 0);
1210 				} else {
1211 					is_struct = 1;
1212 				}
1213 			}
1214 		}
1215 
1216 
1217 		if (is_ldouble) {
1218 			/*
1219 			 * 07/23/08: Do long double here as well instead of
1220 			 * separately, since offsets were wrong
1221 			 */
1222 			pass_ldouble_stack(vrs[j], &allpushed, il);
1223 		} else {
1224 			dest = vreg_alloc(NULL, NULL, NULL, vrs[j]->type);
1225 			dest->stack_addr = make_stack_block(vrs[j]->addr_offset, backend->get_sizeof_type(vrs[j]->type, NULL));
1226 			dest->stack_addr->use_frame_pointer = 0;
1227 			allpushed += dest->size;
1228 		}
1229 
1230 		if (is_struct) {
1231 			/*
1232 			 * 07/22/08: Invalidation was missing. There were
1233 			 * no visible known bugs, but pass_struct_union()
1234 			 * also called invalidate_gprs(), and it really
1235 			 * should be done for copystruct
1236 			 */
1237 			backend->invalidate_gprs(il, 1, INV_FOR_FCALL);
1238 			vreg_faultin_ptr(vrs[j], il);
1239 
1240 			/* 04/06/08: This was missing! */
1241 			icode_make_copystruct(dest, vrs[j], il);
1242 		} else if (is_ldouble) {
1243 			; /* Already passed above */
1244 		} else {
1245 			/*
1246 			 * 04/06/08: Note that the store frees the x87 reg, if used!
1247 			 */
1248 			vreg_map_preg(dest, vrs[j]->pregs[0]);
1249 			icode_make_store(curfunc, dest, dest, il);
1250 		}
1251 	}
1252 
1253 	return precalc;
1254 }
1255 
1256 
1257 
1258 static struct vreg *
icode_make_fcall(struct fcall_data * fcall,struct vreg ** vrs,int nvrs,struct icode_list * il)1259 icode_make_fcall(struct fcall_data *fcall, struct vreg **vrs, int nvrs,
1260 struct icode_list *il)
1261 {
1262 	size_t			allpushed = 0;
1263 	size_t			would_use_stack_bytes = 0;
1264 	struct vreg		*tmpvr;
1265 	struct vreg		*ret = NULL;
1266 	struct type		*ty;
1267 	struct icode_instr	*ii;
1268 	struct type_node	*tn;
1269 	struct vreg		*struct_lvalue;
1270 	struct reg 		*fptr_reg = NULL;
1271 	int			i;
1272 	int			need_dap = 0;
1273 	int			regs_used = 0;
1274 	int			fp_regs_used = 0;
1275 	int			ret_is_anon_struct = 0;
1276 	int			saved_regs_used;
1277 	int			saved_fp_regs_used;
1278 
1279 	ty = fcall->calltovr->type;
1280 	tmpvr = fcall->calltovr;
1281 
1282 	tn = ty->tlist;
1283 	if (tn->type == TN_POINTER_TO) {
1284 		/* Called thru function pointer */
1285 		tn = tn->next;
1286 	}
1287 
1288 	struct_lvalue = fcall->lvalue;
1289 
1290 	if ((ty->code == TY_STRUCT
1291 		|| ty->code == TY_UNION)
1292 		&& tn->next == NULL) {
1293 		if (struct_lvalue == NULL || fcall->need_anon) {
1294 			struct type_node	*tnsav;
1295 			/*
1296 			 * Result of function is not assigned so we need to
1297 			 * allocate storage for the callee to store its
1298 			 * result into
1299 			 */
1300 
1301 #if 1 /* XXX: This should go, use rettype! */
1302 			tnsav = ty->tlist;
1303 			ty->tlist = NULL;
1304 #endif
1305 			/*
1306 			 * 08/05/08: Don't allocate anonymous struct return
1307 			 * storage right here, but when creating the stack
1308 			 * frame. This has already been done on MIPS, PPC
1309 			 * and SPARC, but not on x86/AMD64. The reason is
1310 			 * that it broke something that is long forgotten
1311 			 * now. So we'll re-enable this and fix any bugs
1312 			 * that may come up.
1313 			 *
1314 			 * The reason I ran into this again is that if we
1315 			 * don't allocate the struct on the stack frame,
1316 			 * then in
1317 			 *
1318 			 *     struct foo otherfunc() { return ...}
1319 			 *     struct foo func() { return otherfunc(); }
1320 			 *
1321 			 * ... the anonymous storage is reclaimed before
1322 			 * it can be copied as a return value, hence
1323 			 * trashing it
1324 			 */
1325 			struct_lvalue = vreg_stack_alloc(ty, il, 1 /*0*/, NULL);
1326 
1327 #if 1 /* XXX: This should go, use rettype! */
1328 			ty->tlist = tnsav;
1329 #endif
1330 			/*
1331 			 * 08/05/08: Don't add to allpushed since struct is
1332 			 * created on frame
1333 			 */
1334 			/* allpushed += struct_lvalue->size;*/
1335 			ret_is_anon_struct = 1;
1336 		}
1337 
1338 		/* Hidden pointer is passed in first GPR! */
1339 #if 0
1340 		ii = icode_make_addrof(NULL, struct_lvalue, il);
1341 		append_icode_list(il, ii);
1342 #endif
1343 		{
1344 			struct reg	*r;
1345 			/*ii*/ r = make_addrof_structret(struct_lvalue, il);
1346 
1347 			free_preg(amd64_argregs[0], il, 1, 1);
1348 			icode_make_copyreg(amd64_argregs[0], r /*ii->dat*/, NULL, NULL, il);
1349 			++regs_used;
1350 		}
1351 	}
1352 
1353 	/*
1354 	 * 07/20/08: This wrongly took an implicit return type into account
1355 	 * to determine whether default argument promotions are needed!
1356 	 */
1357 	if (fcall->functype->nargs == -1
1358 		/*|| ty->implicit*/) {
1359 		/* Need default argument promotions */
1360 		need_dap = 1;
1361 	}
1362 
1363 
1364 	/*
1365 	 * 07/24/08: Now we make three passes over all arguments; The first
1366 	 * part determines which integral and non-long-double arguments need
1367 	 * to be passed on the stack (struct-by-value and long double always
1368 	 * go there), the second pass performs the passing of the stack
1369 	 * arguments, and the third pass passes all register arguments.
1370 	 *
1371 	 * By doing stack arguments first, we can minimize register saving
1372 	 * problems (since struct-by-value may need to call memcpy(), which
1373 	 * invalidates most GPRs)
1374 	 */
1375 	saved_regs_used = regs_used;
1376 	saved_fp_regs_used = fp_regs_used;
1377 
1378 	/*
1379 	 * First determine the amount of stack usage
1380 	 */
1381 	for (i = 0; i < nvrs; ++i) {
1382 		/* First mark the argument as not being passed on stack (may change later) */
1383 		vrs[i]->addr_offset = -1;
1384 
1385 		if (vrs[i]->type->tlist != NULL
1386 			|| is_integral_type(vrs[i]->type)) {
1387 			if (regs_used < N_ARGREGS) {
1388 				++regs_used;
1389 			} else {
1390 				vrs[i]->addr_offset = would_use_stack_bytes;
1391 
1392 				/*
1393 				 * An integral or scalar type is always
1394 				 * rounded up to 8 bytes if necessary
1395 				 */
1396 				would_use_stack_bytes += 8;
1397 			}
1398 		} else if (IS_FLOATING(vrs[i]->type->code)) {
1399 			if (vrs[i]->type->code == TY_LDOUBLE) {
1400 				/*
1401 				 * long double is always passed on stack and
1402 				 * takes up two quad-word argument slots
1403 				 * 07/26/12: It might also require a slot of
1404 				 * padding in order to ensure 16-byte
1405 				 * alignment
1406 				 */
1407 				if (would_use_stack_bytes % 16) {
1408 					would_use_stack_bytes += 8;
1409 				}
1410 				vrs[i]->addr_offset = would_use_stack_bytes;
1411 				would_use_stack_bytes += 16;
1412 			} else {
1413 				/* float or double */
1414 				if (fp_regs_used < 8) {
1415 					++fp_regs_used;
1416 				} else {
1417 					vrs[i]->addr_offset = would_use_stack_bytes;
1418 					/*
1419 					 * A floating point type is always
1420 					 * padded to 8 bytes if necessary
1421 					 */
1422 					would_use_stack_bytes += 8;
1423 				}
1424 			}
1425 		} else if ((vrs[i]->type->code == TY_STRUCT
1426 			|| vrs[i]->type->code == TY_UNION)
1427 			&& vrs[i]->type->tlist == NULL) {
1428 			int	size = backend->get_sizeof_type(vrs[i]->type, NULL);
1429 			int	align = backend->get_align_type(vrs[i]->type);
1430 
1431 			if (size % 8) {
1432 				size += 8 - size % 8;
1433 			}
1434 
1435 
1436 			/*
1437 			 * 07/26/12: Account for possibility of 16-byte alignment
1438 			 * (long double members)
1439 			 */
1440 			if (would_use_stack_bytes % align) {
1441 				would_use_stack_bytes += 8;
1442 			}
1443 			vrs[i]->addr_offset = would_use_stack_bytes;
1444 			would_use_stack_bytes += size;
1445 		}
1446 	}
1447 
1448 	/*
1449 	 * Reset register counters (we have to use the saved vars since the
1450 	 * values may not have started out as 0, e.g. if the function returns
1451 	 * a struct, regs_used begins counting at 1)
1452 	 */
1453 	regs_used = saved_regs_used;
1454 	fp_regs_used = saved_fp_regs_used;
1455 
1456 	/*
1457 	 * 07/27/08: As required by the ABI, ensure that the stack ends
1458 	 * up being 16-byte-aligned eventually
1459 	 */
1460 	if (would_use_stack_bytes % 16) {
1461 		size_t	align = 16 - would_use_stack_bytes % 16;
1462 
1463 		allpushed += align;
1464 		would_use_stack_bytes += align;
1465 	}
1466 
1467 
1468 	/*
1469 	 * 07/23/08: Pass all struct args in one go here!
1470 	 */
1471 	allpushed = pass_args_stack(vrs, /*i*/ nvrs, allpushed, would_use_stack_bytes, il);
1472 
1473 	for (i = 0; i < nvrs; ++i) {
1474 		struct reg		*curreg;
1475 
1476 		if (fcall->functype->variadic
1477 			&& i >= fcall->functype->nargs) {
1478 			need_dap = 1;
1479 		}
1480 
1481 		if (vrs[i]->type->tlist != NULL
1482 			|| is_integral_type(vrs[i]->type)) {
1483 			if (regs_used < N_ARGREGS) {
1484 				curreg = amd64_argregs[regs_used];
1485 			} else {
1486 				curreg = NULL;
1487 			}
1488 
1489 			/*
1490 			 * 07/23/08: Don't fault-in if we pass on the stack
1491 			 * later
1492 			 */
1493 			if (curreg != NULL) {
1494 				if ((IS_CHAR(vrs[i]->type->code)
1495 					|| IS_SHORT(vrs[i]->type->code))
1496 					&& vrs[i]->type->tlist == NULL) {
1497 					vrs[i] = backend->
1498 						icode_make_cast(vrs[i],
1499 								make_basic_type(TY_INT), il);
1500 				} else {
1501 					vreg_faultin(NULL, NULL, vrs[i], il, 0);
1502 				}
1503 			}
1504 
1505 			if (curreg != NULL) {
1506 				struct reg	*topcurreg = curreg;
1507 
1508 				if (curreg->size > vrs[i]->size) {
1509 					if (vrs[i]->type != NULL
1510 						&& vrs[i]->type->tlist != NULL
1511 						&& vrs[i]->type->tlist->type == TN_VARARRAY_OF) {
1512 						/*
1513 						 * 02/23/09: The vreg size was 0 because
1514 						 * we are passing a VLA - don't cut off
1515 					 	 * the upper word! XXX Note that the real
1516 						 * question is why we are not doing VLA
1517 						 * array type to pointer decay when passing
1518 						 * it to a function - maybe that would be
1519 						 * the correct fix in expr_to_icode()?
1520 						 */
1521 						;
1522 					} else {
1523 						curreg = curreg->composed_of[0];
1524 					}
1525 				}
1526 				if (vrs[i]->pregs[0] != curreg) {
1527 					free_preg(topcurreg, il, 1, 1);
1528 					icode_make_copyreg(curreg,
1529 						vrs[i]->pregs[0],
1530 						vrs[i]->type,
1531 						vrs[i]->type, il);
1532 				}
1533 				reg_set_unallocatable(curreg);
1534 				amd64_argregs[regs_used]->used = 0;
1535 				++regs_used;
1536 			} else {
1537 				/* Pass remaining args on stack */
1538 				/*
1539 				 * 07/23/08: Don't pass now, and don't break,
1540 				 * since there may be remaining FP args which
1541 				 * can go into registers! Do all stack args
1542 				 * in one go later
1543 				 */
1544 			}
1545 		} else if (IS_FLOATING(vrs[i]->type->code)) {
1546 			if (vrs[i]->type->code == TY_LDOUBLE) {
1547 				/* long double is always passed on stack */
1548 				;
1549 			} else {
1550 				/* float or double */
1551 				struct reg	*curfpreg;
1552 
1553 				if (vrs[i]->type->code == TY_FLOAT
1554 					&& need_dap) {
1555 					struct type	*ty =
1556 						make_basic_type(TY_DOUBLE);
1557 					vrs[i] = backend->icode_make_cast(
1558 						vrs[i], ty, il);
1559 				}
1560 
1561 				if (fp_regs_used < 8) {
1562 					curfpreg = &x86_sse_regs
1563 						[fp_regs_used];
1564 					if (vrs[i]->pregs[0] != curfpreg
1565 						|| vrs[i]->pregs[0]->vreg
1566 						!= vrs[i]) {
1567 						free_preg(curfpreg,
1568 							il, 1, 1);
1569 					}
1570 					vreg_faultin(curfpreg, NULL,
1571 						vrs[i], il, 0);
1572 					++fp_regs_used;
1573 				} else {
1574 					; /* Passed on stack */
1575 				}
1576 			}
1577 		} else if (vrs[i]->type->code == TY_STRUCT
1578 				|| vrs[i]->type->code == TY_UNION) {
1579 			;
1580 		} else {
1581 			unimpl();
1582 		}
1583 	}
1584 
1585 	/*
1586 	 * In the x86 ABI, the caller is responsible for saving
1587 	 * eax/ecx/edx (but not ebx, esi, edi), so that's what we
1588 	 * do here
1589 	 */
1590 	if (ty->tlist->type == TN_POINTER_TO) {
1591 		/*
1592 		 * Need to indirect thru function pointer.
1593 		 * 07/10/15: This stuff used to come after the invalidate
1594 		 * below. Thus it trashed an argument register
1595 		 */
1596 		vreg_faultin(NULL, NULL, tmpvr, il, 0);
1597 		fptr_reg = tmpvr->pregs[0];
1598 		tmpvr->pregs[0]->used = 0;
1599 	}
1600 
1601 	backend->invalidate_gprs(il, 1, INV_FOR_FCALL);
1602 	if (fcall->functype->variadic || need_dap) {
1603 		/* rax = number of sse registers used for call */
1604 		ii = icode_make_setreg(&amd64_x86_gprs[0], fp_regs_used);
1605 		append_icode_list(il, ii);
1606 		reg_set_unallocatable(&amd64_x86_gprs[0]);
1607 		amd64_x86_gprs[0].used = 0;
1608 	}
1609 
1610 
1611 	if (ty->tlist->type == TN_POINTER_TO) {
1612 		/* Need to indirect thru function pointer */
1613 		ii = icode_make_call_indir(fptr_reg);
1614 	} else {
1615 		ii = icode_make_call(ty->name);
1616 		if (IS_ASM_RENAMED(ty->flags)) {
1617 			/*
1618 			 * 02/21/09: Pass renaming as icode instr kludge
1619 			 * to OSX AMD64 emitter
1620 			 */
1621 			ii->hints |= HINT_INSTR_RENAMED;
1622 		}
1623 	}
1624 	append_icode_list(il, ii);
1625 	ii = icode_make_freestack(allpushed);
1626 	append_icode_list(il, ii);
1627 
1628 	for (i = 0; i < N_ARGREGS; ++i) {
1629 		reg_set_allocatable(amd64_argregs[i]);
1630 	}
1631 	reg_set_allocatable(&amd64_x86_gprs[0]);
1632 
1633 	ret = vreg_alloc(NULL, NULL, NULL, NULL);
1634 	ret->type = ty;
1635 
1636 	/* XXX man, this pointer stuff is painful and error prone */
1637 	if ((ty->tlist->type == TN_POINTER_TO
1638 		&& ty->tlist->next->next != NULL)
1639 		|| (ty->tlist->type == TN_FUNCTION
1640 		&& ty->tlist->next != NULL)) {
1641 		/* Must be pointer */
1642 		ret->pregs[0] = &amd64_x86_gprs[0];
1643 	} else {
1644 		if (IS_CHAR(ty->code)) {
1645 			ret->pregs[0] = x86_gprs[0].composed_of[0]->
1646 				composed_of[1];
1647 		} else if (IS_SHORT(ty->code)) {
1648 			ret->pregs[0] = x86_gprs[0].composed_of[0];
1649 		} else if (IS_INT(ty->code)
1650 			|| ty->code == TY_ENUM) { /* XXX */
1651 			ret->pregs[0] = &x86_gprs[0];
1652 		} else if (IS_LONG(ty->code) || IS_LLONG(ty->code)) {
1653 			ret->pregs[0] = &amd64_x86_gprs[0];
1654 		} else if (ty->code == TY_FLOAT
1655 			|| ty->code == TY_DOUBLE) {
1656 			ret->pregs[0] = &x86_sse_regs[0];
1657 		} else if (ty->code == TY_LDOUBLE) {
1658 			ret->pregs[0] = &x86_fprs[0];
1659 		} else if (ty->code == TY_STRUCT
1660 			|| ty->code == TY_UNION) {
1661 			if (ret_is_anon_struct) {
1662 				/*
1663 				 * 08/16/07: Added this
1664 				 */
1665 				ret = struct_lvalue;
1666 			}
1667 			ret->struct_ret = 1;
1668 		} else if (ty->code == TY_VOID) {
1669 			; /* Nothing! */
1670 		}
1671 	}
1672 
1673 	ret->type = n_xmemdup(ret->type, sizeof *ret->type);
1674 	if (ret->type->tlist->type == TN_POINTER_TO) {
1675 		copy_tlist(&ret->type->tlist, ret->type->tlist->next->next);
1676 	} else {
1677 		copy_tlist(&ret->type->tlist, ret->type->tlist->next);
1678 	}
1679 	if (ret->type->code != TY_VOID || ret->type->tlist) {
1680 		ret->size = backend->get_sizeof_type(ret->type, NULL);
1681 	}
1682 
1683 	if (ret->pregs[0] != NULL) {
1684 		vreg_map_preg(ret, ret->pregs[0]);
1685 	}
1686 
1687 	if (is_x87_trash(ret)) {
1688 		/*
1689 		 * Don't keep stuff in x87 registers, ever!!
1690 		 */
1691 		free_preg(ret->pregs[0], il, 1, 1);
1692 	}
1693 	return ret;
1694 }
1695 
1696 static int
icode_make_return(struct vreg * vr,struct icode_list * il)1697 icode_make_return(struct vreg *vr, struct icode_list *il) {
1698 	struct icode_instr	*ii;
1699 	struct type		*rtype = curfunc->rettype;  /*proto->dtype;*/
1700 
1701 	/* 06/17/08: Use rettype instead of (wrongly) changing function type! */
1702 #if 0
1703 	oldtn = rtype->tlist;
1704 	rtype->tlist = rtype->tlist->next;
1705 #endif
1706 
1707 	if (vr != NULL) {
1708 		if (IS_CHAR(rtype->code)
1709 			|| IS_SHORT(rtype->code)
1710 			|| IS_INT(rtype->code)
1711 			|| IS_LONG(rtype->code)
1712 			|| IS_LLONG(rtype->code)
1713 			|| rtype->code == TY_ENUM /* 06/15/09: Was missing?!? */
1714 			|| rtype->tlist != NULL) {
1715 			struct reg	*r = &amd64_x86_gprs[0];
1716 			if (r->size > vr->size) {
1717 				r = get_smaller_reg(r, vr->size);
1718 			}
1719 			vreg_faultin(r, NULL, vr, il, 0);
1720 		} else if (rtype->code == TY_FLOAT
1721 			|| rtype->code == TY_DOUBLE) {
1722 			/* Return in xmm0 */
1723 			vreg_faultin(&x86_sse_regs[0], NULL, vr, il, 0);
1724 		} else if (rtype->code == TY_LDOUBLE) {
1725 			/* Return in st0 */
1726 			vreg_faultin_x87(NULL, NULL, vr, il, 0);
1727 		} else if (rtype->code == TY_STRUCT
1728 			|| rtype->code == TY_UNION) {
1729 
1730 			/* vr may come from pointer */
1731 			vreg_faultin_ptr(vr, il);
1732 			icode_make_copystruct(/*dest*/NULL, vr, il);
1733 		}
1734 	}
1735 	ii = icode_make_ret(vr);
1736 	append_icode_list(il, ii);
1737 
1738 #if 0
1739 	rtype->tlist = oldtn;
1740 #endif
1741 
1742 	return 0;
1743 }
1744 
1745 /*
1746  * Deal with preparations necessary to make things work with the terrible
1747  * x86 design
1748  */
1749 static void
icode_prepare_op(struct vreg ** dest0,struct vreg ** src0,int op,struct icode_list * il)1750 icode_prepare_op(
1751 	struct vreg **dest0,
1752 	struct vreg **src0,
1753 	int op,
1754 	struct icode_list *il) {
1755 
1756 	x86_backend.icode_prepare_op(dest0, src0, op, il);
1757 }
1758 
1759 
1760 
1761 /*
1762  * Most of the time, instructions give meaning to data. This function
1763  * generates code required to convert virtual register ``src'' to type
1764  * ``to'' where necessary
1765  */
1766 static struct vreg *
icode_make_cast(struct vreg * src,struct type * to,struct icode_list * il)1767 icode_make_cast(struct vreg *src, struct type *to, struct icode_list *il) {
1768 	return x86_backend.icode_make_cast(src, to, il);
1769 }
1770 
1771 static void
do_print_gpr(struct reg * r)1772 do_print_gpr(struct reg *r) {
1773 	printf("%s=%d ", r->name, r->used);
1774 	if (r->vreg && r->vreg->pregs[0] == r) {
1775 		printf("<-> %p", r->vreg);
1776 	}
1777 }
1778 
1779 static void
debug_print_gprs(void)1780 debug_print_gprs(void) {
1781 	int	i;
1782 
1783 	for (i = 0; i < 6; ++i) {
1784 		printf("\t");
1785 		do_print_gpr(&amd64_x86_gprs[i]);
1786 		printf("\t");
1787 		do_print_gpr(&x86_gprs[i]);
1788 		putchar('\t');
1789 		do_print_gpr(x86_gprs[i].composed_of[0]);
1790 		if (i < 4) {
1791 			putchar('\t');
1792 			do_print_gpr(x86_gprs[i].composed_of[0]->
1793 				composed_of[0]);
1794 			putchar('\t');
1795 			do_print_gpr(x86_gprs[i].composed_of[0]->
1796 				composed_of[1]);
1797 		}
1798 		putchar('\n');
1799 	}
1800 	for (i = 8; i < 16; i += 4) {
1801 		printf("\t");
1802 		do_print_gpr(&amd64_gprs[i]);
1803 		printf("\t");
1804 		do_print_gpr(&amd64_gprs[i+1]);
1805 		printf("\t");
1806 		do_print_gpr(&amd64_gprs[i+2]);
1807 		printf("\t");
1808 		do_print_gpr(&amd64_gprs[i+3]);
1809 	}
1810 }
1811 
1812 static int
is_multi_reg_obj(struct type * t)1813 is_multi_reg_obj(struct type *t) {
1814 	(void) t;
1815 	return 0;
1816 }
1817 
1818 
1819 struct backend amd64_backend = {
1820 	ARCH_AMD64,
1821 	0, /* ABI */
1822 	0, /* multi_gpr_object */
1823 	4, /* structure alignment */
1824 	0, /* need pic initialization? */
1825 	0, /* emulate long double? */
1826 	0, /* relax alloc gpr order */
1827 	0, /* max displacement */
1828 	0, /* min displacement */
1829 	x86_have_immediate_op,
1830 	init,
1831 	is_multi_reg_obj,
1832 	get_ptr_size,
1833 	get_size_t,
1834 	get_uintptr_t,
1835 	get_wchar_t,
1836 	get_sizeof_basic,
1837 	get_sizeof_type,
1838 	get_sizeof_elem_type,
1839 	get_sizeof_decl,
1840 	get_sizeof_const,
1841 	get_sizeof_vla_type,
1842 	get_align_type,
1843 	gen_function,
1844 #if XLATE_IMMEDIATELY
1845 	gen_prepare_output,
1846 	gen_finish_output,
1847 #else
1848 	gen_program,
1849 #endif
1850 	NULL,
1851 	NULL,
1852 	invalidate_gprs,
1853 	/*invalidate_except*/NULL,
1854 	alloc_gpr,
1855 	/*alloc_16_or_32bit_noesiedi*/NULL,
1856 	alloc_fpr,
1857 	x86_free_preg,
1858 	icode_make_fcall,
1859 	icode_make_return,
1860 	NULL,
1861 	icode_prepare_op,
1862 	NULL, /* prepare_load_addrlabel */
1863 	icode_make_cast,
1864 	NULL, /* icode_make_structreloc */
1865 	NULL, /* icode_initialize_pic */
1866 	NULL, /* icode_complete_func */
1867 	make_null_block,
1868 	make_init_name,
1869 	debug_print_gprs,
1870 	/*name_to_reg XXX */ NULL,
1871 	/*asmvreg_to_reg*/ NULL,
1872 	/*get_inlineasm_label*/NULL,
1873 	do_ret,
1874 	get_abi_reg,
1875 	get_abi_ret_reg,
1876 	generic_same_representation
1877 };
1878 
1879