1 /*
2  * Copyright (c) 2005 - 2010, Nils R. Weller
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  * notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  * notice, this list of conditions and the following disclaimer in the
13  * documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25  * POSSIBILITY OF SUCH DAMAGE.
26  *
27  * x86 backend
28  * (XXX much of this stuff can probably be adapted to different
29  * architectures)
30  */
31 #include "backend.h"
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <stdarg.h>
35 #include <assert.h>
36 #include <string.h>
37 #include <ctype.h>
38 #include <limits.h>
39 #include "scope.h"
40 #include "decl.h"
41 #include "type.h"
42 #include "decl.h"
43 #include "icode.h"
44 #include "functions.h"
45 #include "control.h"
46 #include "typemap.h"
47 #include "debug.h"
48 #include "token.h"
49 #include "error.h"
50 #include "functions.h"
51 #include "symlist.h"
52 #include "icode.h"
53 #include "stack.h"
54 #include "reg.h"
55 #include "subexpr.h"
56 #include "expr.h"
57 #include "features.h"
58 #include "x87_nonsense.h"
59 /* #include "x86_emit_gas.h" */
60 #include "inlineasm.h"
61 #include "x86_emit_nasm.h"
62 #include "x86_emit_gas.h"
63 #include "amd64_gen.h"
64 #include "amd64_emit_gas.h"  /* XXX for SSE */
65 #include "cc1_main.h"
66 #include "n_libc.h"
67 
68 static FILE			*out;
69 static struct scope		*tunit;
70 static int			use_nasm = 1; /* XXX */
71 struct emitter_x86		*emit_x86;
72 
73 #if ! REMOVE_FLOATBUF
74 struct vreg			floatbuf;
75 #endif
76 
77 struct vreg			x87cw_new;
78 struct vreg			x87cw_old;
79 
80 static int			ebx_saved;
81 static int			esi_saved;
82 static int			edi_saved;
83 struct vreg			csave_ebx;
84 struct vreg			csave_esi;
85 struct vreg			csave_edi;
86 struct stack_block		*saved_ret_addr;
87 
88 
89 #define N_GPRS	6
90 
91 struct reg		x86_gprs[7];
92 static struct reg	x86_16bit_gprs[6];
93 static struct reg	x86_8bit_gprs[8];
94 static struct reg	x86_esp;
95 static struct reg	x86_ebp;
96 static struct reg	x86_esp_16bit;
97 static struct reg	x86_ebp_16bit;
98 
99 struct reg		x86_fprs[8];
100 
101 /* 02/09/08: Moved to x86 backend from AMD64 (for OSX) */
102 struct reg	x86_sse_regs[8];
103 
104 int	sse_csave_map[] = {
105 	0, 0, 0, 0, 0, 0, 0, 0
106 };
107 
108 
109 
110 static void
init_regs(void)111 init_regs(void) {
112 	static struct reg	nullreg;
113 	int					i;
114 	static const struct {
115 		struct reg	*regs;
116 		char		*names[9];
117 	} rps[] = {
118 		{ x86_gprs,
119 			{"eax","ebx","ecx","edx","esi","edi",0,0,0}},
120 		{ x86_16bit_gprs,
121 			{"ax","bx","cx","dx","si","di",0,0,0 }},
122 		{ x86_8bit_gprs,
123 			{"ah","al","bh","bl","ch","cl","dh","dl",NULL}},
124 		{ x86_fprs,
125 			{ "st0", "st1", "st2", "st3", "st4", "st5",
126 			"st6", "st7", NULL}},
127 		{ NULL, {0,0,0,0,0,0,0,0,0} }
128 	};
129 
130 	for (i = 0; rps[i].regs != NULL; ++i) {
131 		int	j;
132 		int	size = i == 0? 4: i == 1? 2: 1;
133 		int	type;
134 
135 		if (rps[i].regs == x86_fprs) {
136 			type = REG_FPR;
137 			/*
138 			 * size was for some reason set to 8, with a comment
139 			 * saying it should be 10, which is factually correct
140 			 * because those really are 10 bytes big.. but we use
141 			 * 12 bytes for alignment, and that seems to work
142 			 */
143 			size = 12;
144 		} else {
145 			type = REG_GPR;
146 		}
147 
148 		nullreg.type = type;
149 		nullreg.allocatable = 1;
150 		for (j = 0; rps[i].names[j] != NULL; ++j) {
151 			rps[i].regs[j] = nullreg;
152 			rps[i].regs[j].size = size;
153 			rps[i].regs[j].name = rps[i].names[j];
154 		}
155 	}
156 
157 	x86_gprs[6].name = NULL;
158 
159 	for (i = 0; i < 8; ++i) {
160 		static char     *names[] = {
161 			"xmm0", "xmm1", "xmm2", "xmm3",
162 			"xmm4", "xmm5", "xmm6", "xmm7",
163 		};
164 		x86_sse_regs[i].name = names[i];
165 		x86_sse_regs[i].type = REG_FPR;
166 		x86_sse_regs[i].size = 8; /* XXX */
167 		x86_sse_regs[i].allocatable = 1;
168 	}
169 
170 }
171 
172 
173 static int
calc_total_refs(struct reg * r)174 calc_total_refs(struct reg *r) {
175 	(void) r;
176 	return 0;
177 }
178 
179 static void
do_invalidate(struct reg * r,struct icode_list * il,int save)180 do_invalidate(struct reg *r, struct icode_list *il, int save) {
181 #if FEAT_DEBUG_DUMP_BOGUS_STORES
182 	struct icode_instr	*tail = il? il->tail: NULL;
183 #endif
184 	if (curfunc->pic_initialized
185 		&& r == &x86_gprs[1]) {
186 		/* ebx is used for PIC access */
187 		return;
188 	}
189 
190 	free_preg(r, il, 1, save);
191 #if FEAT_DEBUG_DUMP_BOGUS_STORES
192 	if (backend_warn_inv && tail != NULL && tail != il->tail) {
193 		icode_make_debug(il, "previous save(s) may be unneeded");
194 	}
195 #endif
196 }
197 
198 /*
199  * XXX this shouldn't be saving esi/edi/ebx when we're invalidating
200  * because of a function call
201  */
202 static void
invalidate_gprs(struct icode_list * il,int saveregs,int for_fcall)203 invalidate_gprs(struct icode_list *il, int saveregs, int for_fcall) {
204 	int	i;
205 
206 	(void) for_fcall;
207 	for (i = 0; i < N_GPRS; ++i) {
208 		do_invalidate(&x86_gprs[i], il, saveregs);
209 	}
210 
211 	/*
212 	 * 07/26/12: Dropped incomplete SSE usage check (could yield compiler
213 	 * crashes)
214 	 */
215 	for (i = 0; i < 8; ++i) {
216 		do_invalidate(&x86_sse_regs[i], il, saveregs);
217 	}
218 }
219 
220 /*
221  * AMD64 & x86
222  */
223 static void
invalidate_except(struct icode_list * il,int save,int for_fcall,...)224 invalidate_except(struct icode_list *il, int save, int for_fcall, ...) {
225 	int			i;
226 	struct reg		*except[8];
227 	static struct reg	*gprset;
228 	struct reg		*arg;
229 	va_list			va;
230 
231 	if (gprset == NULL) {
232 		if (backend->arch == ARCH_X86) {
233 			gprset = x86_gprs;
234 		} else {
235 			/* AMD64 */
236 			gprset = amd64_x86_gprs;
237 		}
238 	}
239 
240 	va_start(va, for_fcall);
241 	for (i = 0; (arg = va_arg(va, struct reg *)) != NULL; ++i) {
242 		except[i] = arg;
243 	}
244 	va_end(va);
245 	except[i] = NULL;
246 
247 	for (i = 0; i < N_GPRS; ++i) {
248 		int	j;
249 
250 		for (j = 0; except[j] != NULL; ++j) {
251 			if (is_member_of_reg(&gprset[i], except[j])) {
252 				/*
253 				 * XXX perhaps we would want to save
254 				 * part of a GPR in some cases.
255 				 */
256 				break;
257 			}
258 		}
259 		if (except[j] != NULL) {
260 			continue;
261 		}
262 		do_invalidate(&gprset[i], il, save);
263 	}
264 
265 	if (backend->abi == ARCH_AMD64) {
266 		for (i = 1; i < 16; ++i) {
267 			int	j;
268 
269 			for (j = 0; except[j] != NULL; ++j) {
270 				if (&amd64_gprs[i] == except[j]) {
271 					break;
272 				}
273 			}
274 			if (except[j] == NULL) {
275 				do_invalidate(&amd64_gprs[i], il, save);
276 			}
277 		}
278 	}
279 }
280 
281 static int is_noesiedi;
282 
283 
284 static struct reg *
alloc_16_or_32bit_reg(struct function * f,int size,struct icode_list * il,struct reg * dontwipe)285 alloc_16_or_32bit_reg(
286 	struct function *f,
287 	int size,
288 	struct icode_list *il,
289 	struct reg *dontwipe) {
290 
291 	int			i;
292 	int			save = 0;
293 	int			least = INT_MAX;
294 	int			least_idx = -1;
295 	static int		last_alloc;
296 	struct reg		*ret = NULL;
297 	struct reg		*aset;
298 	struct reg		*topreg = NULL;
299 	int			old_relax = backend->relax_alloc_gpr_order;
300 
301 	/*
302 	 * 05/31/09: Now we always relax the GPR order when allocating
303 	 * non-ESI/non-EDI registers! This means that we allow this call
304 	 * to allocate the same register as the last successful call.
305 	 * This is probably a necessity when generating PIC code, because
306 	 * that limits us to only 3 registers that are usable for 8bit
307 	 * and 16bit allocations (ebx is taken as PIC pointer, so only
308 	 * eax, ecx, edx are allowed).
309 	 *
310 	 * If we then have a construct such as
311 	 *
312 	 *    ptr->member |= ptr2->value;
313 	 *
314 	 * ... then that will easily cause two registers to become
315 	 * unallocatable to hold the pointers, and with PIC ebx is
316 	 * taken anyway, so there is only one potential register left
317 	 * which we don't want to filter through the allocation ordering
318 	 * constraint
319 	 *
320 	 * Relaxing the constraint may work if we perform a sequence of:
321 	 *
322 	 *     - allocating register N
323 	 *     - looking to allocate register M, but finding that it is
324 	 *       already loaded with our desired value, so it can be
325 	 *       skipped
326 	 *     - allocating register N
327 	 *
328 	 * In this case, it will seem like we are allocating register N
329 	 * twice in a row, but there was effectively another allocation
330 	 * in between. If the first allocation is not needed anymore and
331 	 * we will work with M and second N, then it will work.
332 	 *
333 	 * It will generally if we really only have one register available
334 	 * but need two at once for an operation.
335 	 *
336 	 * XXX Can this happen? Are there any implicit register alloc
337 	 * ordering assumptions left?
338 	 */
339 	if (is_noesiedi) {
340 		backend->relax_alloc_gpr_order = 1;
341 	}
342 
343 	if (backend->arch == ARCH_AMD64) {
344 		aset = amd64_x86_gprs;
345 	} else {
346 		aset = x86_gprs;
347 	}
348 	(void) f;
349 	for (i = 0; x86_gprs[i].name != NULL; ++i) {
350 		if (reg_unused( /*&x86_gprs[i]*/  &aset[i])
351 			&& reg_allocatable(/*&x86_gprs*/ &aset[i])) {
352 			ret = &x86_gprs[i];
353 			last_alloc = i;
354 			break;
355 		} else {
356 			int	total;
357 
358 			if (!optimizing /* || !reg_allocatable(...)*/) {
359 				continue;
360 			}
361 			total = calc_total_refs(&x86_gprs[i]);
362 			if (total < least) {
363 				least = total;
364 				least_idx = i;
365 			}
366 		}
367 	}
368 	if (ret == NULL) {
369 		/*
370 	 	 * Save and hand out register with least
371 	 	 * references
372 	 	 */
373 		save = 1;
374 		if (!optimizing) {
375 			static int	cur;
376 			int		iterations = 0;
377 
378 			do {
379 				if (cur == N_GPRS) cur = 0;
380 				if (cur == last_alloc) {
381 					/*
382 					 * Ensure two successive allocs always
383 					 * use different registers
384 					 */
385 					if (backend->relax_alloc_gpr_order
386 						&& iterations != 0) {
387 						/*
388 						 * 02/09/09: Lift the constraint
389 						 * that successive uses of the
390 						 * same GPR aren't allowed, but
391 						 * only do so in the second
392 						 * iteration (i.e. try other regs
393 						 * first and fall back if all
394 						 * fails)
395 						 */
396 						;
397 					} else {
398 						cur = (cur + 1) % N_GPRS;
399 					}
400 				}
401 
402 				ret = &x86_gprs[cur /*++*/];
403 				topreg = &aset[cur++];
404 				/*
405 				 * 02/09/09: N_GPRS + 1 to allow for an extra
406 				 * iteration in case relax_alloc_gpr_order is
407 				 * set
408 				 */
409 				if (++iterations >= N_GPRS + 1) {
410 					/*
411 					 * Ouch, no register can be allocated.
412 					 * This will probably only ever happen
413 					 * with inline asm statements using too
414 					 * many registers .... HOPEFULLY!!
415 					 */
416 					if (is_noesiedi) {
417 						backend->relax_alloc_gpr_order = old_relax;
418 					}
419 					return NULL;
420 				}
421 			} while ((dontwipe != NULL && ret == dontwipe)
422 				|| !reg_allocatable(/*ret*/topreg));
423 			last_alloc = cur - 1;
424 		} else {
425 			int	idx;
426 
427 unimpl(); /* XXX doesn;t work with amd64 */
428 			idx = least_idx == -1? 0: least_idx;
429 			if (idx == last_alloc) {
430 				idx = (idx + 1) % N_GPRS;
431 			}
432 			ret = &x86_gprs[idx];
433 			last_alloc = idx;
434 		}
435 	}
436 
437 	if (ret == &x86_gprs[1]) {
438 		f->callee_save_used |= CSAVE_EBX;
439 	} else if (ret == &x86_gprs[4]) {
440 		f->callee_save_used |= CSAVE_ESI;
441 	} else if (ret == &x86_gprs[5]) {
442 		f->callee_save_used |= CSAVE_EDI;
443 	}
444 
445 	if (save) {
446 		struct reg	*freeme = ret;
447 
448 		/*
449 		 * IMPORTANT: It is assumed that an allocatable register
450 		 * has a vreg, hence no ret->vreg != NULL check here.
451 		 * Reusing a preg without a vreg is obviously a bug
452 		 * because without a vreg, it cannot be saved anywhere.
453 		 * See reg_set_unallocatable()/vreg_faultin_protected()
454 		 */
455 		if (backend->arch == ARCH_AMD64) {
456 			/*
457 			 * 05/20/11: This ALWAYS attempted to free the
458 			 * surrounding register, so if we're allocating
459 			 * eax, it always tried to free rax. Instead we
460 			 * have to check whether the outer one is
461 			 * actually used!
462 			 */
463 			freeme = topreg;
464 		}
465 		free_preg(freeme, il, 1, 1);
466 	}
467 	if (size == 2) {
468 		/* 16bit reg */
469 		ret = ret->composed_of[0];
470 		if (ret->composed_of) {
471 			ret->composed_of[0]->vreg = NULL;
472 			if (ret == x86_gprs[4].composed_of[0]
473 				|| ret == x86_gprs[5].composed_of[0]) {
474 				/*
475 				 * This means we're allocating si or di. It
476 				 * follows that there can only be one sub-
477 				 * register, namely sil or dil (on AMD64!)
478 				 * Hence composed_of[1] does not exist
479 				 */
480 				;
481 			} else {
482 				ret->composed_of[1]->vreg = NULL;
483 			}
484 		}
485 	} else {
486 		/* 32bit */
487 		ret->composed_of[0]->vreg = NULL;
488 		if (ret->composed_of[0]->composed_of) {
489 			/* eax - edx */
490 			ret->composed_of[0]->composed_of[0]->vreg = NULL;
491 			if (ret->composed_of[0]->composed_of[1]) {
492 				ret->composed_of[0]->composed_of[1]->vreg
493 					= NULL;
494 			}
495 		}
496 	}
497 
498 	if (is_noesiedi) {
499 		backend->relax_alloc_gpr_order = old_relax;
500 	}
501 
502 	ret->used = ret->allocatable = 1;
503 	return ret;
504 }
505 
506 static struct reg *
alloc_16_or_32bit_noesiedi(struct function * f,size_t size,struct icode_list * il,struct reg * dontwipe)507 alloc_16_or_32bit_noesiedi(struct function *f, size_t size,
508 struct icode_list *il, struct reg *dontwipe) {
509 	int		esi_allocatable /*= x86_gprs[4].allocatable*/;
510 	int		edi_allocatable /* = x86_gprs[5].allocatable */;
511 	struct reg	*ret;
512 	struct reg	*esi_reg;
513 	struct reg	*edi_reg;
514 
515 	esi_reg = backend->arch == ARCH_AMD64? &amd64_x86_gprs[4]: &x86_gprs[4];
516 	edi_reg = backend->arch == ARCH_AMD64? &amd64_x86_gprs[5]: &x86_gprs[5];
517 
518 	esi_allocatable = reg_allocatable(esi_reg);
519 	edi_allocatable = reg_allocatable(edi_reg);
520 
521 	reg_set_unallocatable(esi_reg);
522 	reg_set_unallocatable(edi_reg);
523 
524 	is_noesiedi = 1;
525 	ret = ALLOC_GPR(f, size, il, dontwipe);
526 	is_noesiedi = 0;
527 
528 	if (esi_allocatable) {
529 		reg_set_allocatable(/*&x86_gprs[4]*/esi_reg);
530 	}
531 	if (edi_allocatable) {
532 		reg_set_allocatable(/*&x86_gprs[5]*/edi_reg);
533 	}
534 	return ret;
535 }
536 
537 static struct reg *
alloc_8bit_reg(struct function * f,struct icode_list * il,struct reg * dontwipe)538 alloc_8bit_reg(struct function *f, struct icode_list *il,
539 struct reg *dontwipe) {
540 	int	i;
541 	int		least8 = INT_MAX;
542 	int		least32 = INT_MAX;
543 	int		total;
544 	struct reg	*ret = NULL;
545 	struct reg	*aset;
546 
547 	if (backend->arch == ARCH_AMD64) {
548 		aset = amd64_x86_gprs;
549 	} else {
550 		aset = x86_gprs;
551 	}
552 
553 	(void) dontwipe;
554 
555 	for (i = 0; i < 4; ++i) {
556 		int		j;
557 		struct reg	**r16bit;
558 
559 		if (!reg_allocatable(&aset[i])) {
560 			continue;
561 		}
562 		if (!aset[i].used
563 			&& !x86_gprs[i].used
564 			&& !((r16bit = x86_gprs[i].composed_of)[0])->used) {
565 			struct reg	**r8bit;
566 
567 			r8bit = r16bit[0]->composed_of;
568 
569 			/*
570 			 * Beware - mov ah, byte [r8] doesn't work,
571 			 * but does with al on amd64! So never use
572 			 * ah on amd64. Oh, and also, composed_of[0]
573 			 * of ax is actually ah, not al. I'm afraid
574 			 * of reversing this because I think it will
575 			 * break other stuff that depends on it
576 			 */
577 			if (backend->arch == ARCH_AMD64) {
578 				j = 1;
579 			} else {
580 				j = 0;
581 			}
582 			for (; j < 2; ++j) {
583 				if (!r8bit[j]->used) {
584 					ret = r8bit[j];
585 					break;
586 				} else {
587 					total = calc_total_refs(r8bit[j]);
588 					if (total < least8) {
589 						least8 = total;
590 					}
591 				}
592 			}
593 		} else {
594 			/* in use */
595 			total = calc_total_refs(/*&x86_gprs*/&aset[i]);
596 			if (total < least32) {
597 				least32 = total;
598 			}
599 		}
600 		if (ret != NULL) {
601 			break;
602 		}
603 	}
604 
605 	if (ret == NULL) {
606 		ret = alloc_16_or_32bit_noesiedi(f, 2, il, NULL);
607 
608 		if (ret == NULL) {
609 			return NULL;
610 		}
611 
612 		ret->used = 0;
613 		ret->allocatable = 1;
614 		if (backend->arch == ARCH_AMD64) {
615 			ret = ret->composed_of[1];
616 		} else {
617 			ret = ret->composed_of[0];
618 		}
619 		ret->used = 1;
620 	}
621 	return ret;
622 }
623 
624 
625 static struct reg *
alloc_gpr(struct function * f,int size,struct icode_list * il,struct reg * dontwipe,int line)626 alloc_gpr(struct function *f, int size, struct icode_list *il,
627 struct reg *dontwipe, int line) {
628 	struct reg	*ret;
629 
630 	(void) line;
631 	if (f == NULL) abort(); /* using invalidate_gprs() now */
632 
633 	if (size == 0) {
634 		/* 0 means GPR */
635 		size = 4;
636 	}
637 
638 	if (backend->multi_gpr_object) {
639 		/* Previous gpr allocation request remains to be finished */
640 		ret = alloc_16_or_32bit_reg(f, 4, il, dontwipe);
641 		backend->multi_gpr_object = 0;
642 	} else if (size == 8) {
643 		/* long long ... ouch */
644 		ret = alloc_16_or_32bit_reg(f, 4, il, dontwipe);
645 		backend->multi_gpr_object = 1;
646 	} else if (size == 4 || size == 2) {
647 		ret = alloc_16_or_32bit_reg(f, size, il, dontwipe);
648 	} else if (size == 1) {
649 		ret = alloc_8bit_reg(f, il, dontwipe);
650 	} else {
651 		printf("REGISTER LOAD WITH BAD SIZE %d\n", size);
652 		abort();
653 	}
654 
655 	if (ret == NULL) {
656 		debug_log_regstuff(ret, NULL, DEBUG_LOG_FAILEDALLOC);
657 #ifdef DEBUG6
658 		printf("(alloc size was %d)\n", size);
659 #endif
660 	} else {
661 		debug_log_regstuff(ret, NULL, DEBUG_LOG_ALLOCGPR);
662 	}
663 #ifdef DEBUG6
664 	if (ret != NULL) {
665 		ret->line = line;
666 		++ret->nallocs;
667 	}
668 #endif
669 
670 	return ret;
671 }
672 
673 #if 0
674 static int	fpr_bos = 0; /* fpr bottom of stack */
675 #endif
676 
677 struct reg *
alloc_sse_fpr(struct function * f,int size,struct icode_list * il,struct reg * dontwipe)678 alloc_sse_fpr(struct function *f, int size, struct icode_list *il,
679 struct reg *dontwipe) {
680 
681 	/*
682 	 * 06/18/09: Is this size trickery for long double still needed?
683 	 */
684         if (size == 10 || size == 12 || size == 16) {
685 		if (backend->arch == ARCH_AMD64) {
686 			return x86_backend.alloc_fpr(f, size, il, dontwipe);
687 		} else {
688 	                return backend->alloc_fpr(f, size, il, dontwipe);
689 		}
690         } else {
691                 return generic_alloc_gpr(f, size, il, dontwipe,
692                         x86_sse_regs, 8, sse_csave_map, 0);
693         }
694 }
695 
696 
697 
698 static struct reg *
alloc_fpr(struct function * f,int size,struct icode_list * il,struct reg * dontwipe)699 alloc_fpr(struct function *f, int size, struct icode_list *il,
700 struct reg *dontwipe) {
701 	(void) f; (void) size; (void) il; (void) dontwipe;
702 
703 	if (sysflag == OS_OSX && (size == 4 || size == 8)) {
704 		return alloc_sse_fpr(f, size, il, dontwipe);
705 	}
706 	x86_fprs[0].used = 1;
707 	return &x86_fprs[0];
708 #if 0
709 	(void) f; (void) size; (void) il; (void) dontwipe;
710 
711 	if (fpr_bos == 7) {
712 		(void) fprintf(stderr,
713 			"x87 register stack overflow\n");
714 		abort();
715 	}
716 
717 	/*
718 	 * Allocated register is top of stack - relocate all allocated
719 	 * registers by one
720 	 */
721 	for (i = 6; i >= 0; --i) {
722 
723 		if (x86_fprs[i].used) {
724 			vreg_map_preg(x86_fprs[i].vreg, &x86_fprs[i+1]);
725 		}
726 	}
727 
728 	x86_fprs[0].used = 1;
729 	return &x86_fprs[0];
730 #endif
731 }
732 
733 static void
x86_free_preg(struct reg * r,struct icode_list * il)734 x86_free_preg(struct reg *r, struct icode_list *il) {
735 	(void) r; (void) il;
736 
737 	return ;
738 #if 0
739 	if (r->type == REG_FPR) {
740 		assert(r == &x86_fprs[0]);
741 	}
742 	r->vreg = NULL;
743 	r->used = 0;
744 #endif
745 #if 0
746 	if (r->type != REG_FPR || !STUPID_X87(r)) {
747 		return;
748 	}
749 	--fpr_bos; /* :-( */
750 
751 #if 0
752 	if (r != &x86_fprs[0]) {
753 		/*
754 		 * XXX st0 will be popped if the result is assigned to
755 		 * something ... but this cannot be relied on when the
756 		 * result is not use; (void) f1 * f2;
757 		 * perhaps icode.c should generate an additional
758 		 * ffree if needed
759 		 */
760 		icode_make_x86_ffree(r, il);
761 	}
762 #endif
763 	if (r != &x86_fprs[0]) {
764 		icode_make_x86_ffree(r, il);
765 	}
766 	r->vreg=  NULL;
767 	r->used = 0;
768 #endif
769 }
770 
771 static int
init(FILE * fd,struct scope * s)772 init(FILE *fd, struct scope *s) {
773 	int	i;
774 	int	j;
775 
776 	out = fd;
777 	tunit  = s;
778 
779 	(void) use_nasm;
780 
781 	if (sysflag == OS_OSX) {
782 		/*
783 		 * 02/09/09: Make AMD64 emitter available so that we
784 		 * can emit SSE instructions (required by OSX even on
785 		 * x86).
786 		 *
787 		 * XXX These instructions should be moved to the x86
788 		 * backend, like all other SSE things already have
789 		 * (GPR allocator, etc)
790 		 */
791 		emit_amd64 = &emit_amd64_gas;
792 		/*
793 		 * Also initialize FILE handle
794 		 */
795 		amd64_emit_gas.init(out, s);
796 	}
797 
798 	/*
799 	 * Initialize registers and function pointer tables.
800 	 * It is important not to trash ``emit'' if this is
801 	 * called from AMD64 init()!!!
802 	 */
803 	if (asmflag == NULL) {
804 		/* Default is nasm */
805 		if (backend->arch != ARCH_AMD64) {
806 #if 0
807 			emit = &x86_emit_nasm;
808 #endif
809 			emit = &x86_emit_gas;
810 		}
811 		emit_x86 = &x86_emit_x86_gas;
812 	} else if (strcmp(asmname, "nasm") == 0
813 		|| strcmp(asmname, "nwasm") == 0
814 		|| strcmp(asmname, "yasm") == 0) {
815 		if (backend->arch != ARCH_AMD64) {
816 			emit = &x86_emit_nasm;
817 		}
818 		emit_x86 = &x86_emit_x86_nasm;
819 	} else if (strcmp(asmname, "as") == 0
820 		|| strcmp(asmname, "gas") == 0) {
821 		if (backend->arch != ARCH_AMD64) {
822 			emit = &x86_emit_gas;
823 		}
824 		emit_x86=  &x86_emit_x86_gas;
825 	} else {
826 		(void) fprintf(stderr, "Unknown x86 assembler `%s'\n",
827 			asmflag);
828 		exit(EXIT_FAILURE);
829 	}
830 
831 #if 0
832 	if (use_nasm) {
833 		emit = &x86_emit_nasm;
834 	} else {
835 		emit = &x86_emit_gas;
836 	}
837 	emit = &x86_emit_nasm;
838 	emit_x86 = &x86_emit_x86_nasm;
839 #endif
840 
841 	init_regs();
842 	for (i = 0, j = 0; i < N_GPRS; ++i) {
843 		struct reg	*r16bit;
844 
845 		x86_gprs[i].composed_of = n_xmalloc(2 * sizeof(struct reg *));
846 		r16bit = &x86_16bit_gprs[i];
847 		x86_gprs[i].composed_of[0] = r16bit;
848 		x86_gprs[i].composed_of[1] = NULL;
849 		if (i < 4) {
850 			r16bit->composed_of =
851 				n_xmalloc(3 * sizeof(struct reg *));
852 			r16bit->composed_of[0] = &x86_8bit_gprs[j++];
853 			r16bit->composed_of[1] = &x86_8bit_gprs[j++];
854 			r16bit->composed_of[2] = NULL;
855 		}
856 	}
857 
858 	x86_esp.type = REG_SP;
859 	x86_esp.size = 4;
860 	x86_esp.name = "esp";
861 	x86_esp.composed_of = n_xmalloc(2 * sizeof(struct reg *));
862 	*x86_esp.composed_of = &x86_esp_16bit;
863 	x86_esp.composed_of[1] = NULL;
864 	x86_esp_16bit.size = 2;
865 	x86_esp_16bit.name = "sp";
866 	x86_ebp.type = REG_BP;
867 	x86_ebp.size = 4;
868 	x86_ebp.name = "ebp";
869 	x86_ebp.composed_of = n_xmalloc(2 * sizeof(struct reg *));
870 	*x86_ebp.composed_of = &x86_ebp_16bit;
871 	x86_ebp.composed_of[1] = NULL;
872 	x86_ebp_16bit.size = 2;
873 	x86_ebp_16bit.name = "bp";
874 
875 	if (backend->arch != ARCH_AMD64) {
876 		backend->emit = emit;
877 		return emit->init(out, tunit);
878 	}
879 	return 0;
880 }
881 
882 static int
get_ptr_size(void)883 get_ptr_size(void) {
884 	return 4;
885 }
886 
887 static struct type *
get_size_t(void)888 get_size_t(void) {
889 	return make_basic_type(TY_UINT);
890 }
891 
892 static struct type *
get_uintptr_t(void)893 get_uintptr_t(void) {
894 	return make_basic_type(TY_ULONG);
895 }
896 
897 static struct type *
get_wchar_t(void)898 get_wchar_t(void) {
899 	return make_basic_type(TY_INT);
900 }
901 
902 static size_t
get_sizeof_basic(int type)903 get_sizeof_basic(int type) {
904 	switch (type) {
905 	case TY_ENUM:
906 		return 4; /* XXX */
907 
908 	case TY_INT:
909 	case TY_UINT:
910 	case TY_LONG:
911 	case TY_ULONG:
912 		return 4;
913 
914 	case TY_LLONG:
915 	case TY_ULLONG:
916 		return 8;
917 
918 	case TY_CHAR:
919 	case TY_UCHAR:
920 	case TY_SCHAR:
921 	case TY_BOOL:
922 		return 1;
923 
924 	case TY_SHORT:
925 	case TY_USHORT:
926 		return 2;
927 
928 	case TY_FLOAT:
929 		return 4;
930 
931 	case TY_DOUBLE:
932 		return 8;
933 	case TY_LDOUBLE:
934 		if (sysflag == OS_OSX) {
935 			return 16;
936 		} else {
937 			if (backend->arch == ARCH_AMD64) {
938 				return /*10 XXX */10;
939 			} else {
940 				return 10;
941 			}
942 		}
943 	default:
944 	printf("err sizeof cannot cope w/ it, wuz %d\n", type);
945 	abort();
946 		return 1; /* XXX */
947 	}
948 }
949 
950 
951 static void
do_ret(struct function * f,struct icode_instr * ip)952 do_ret(struct function *f, struct icode_instr *ip) {
953 	if (f->callee_save_used & CSAVE_EBX) {
954 		emit->load(&x86_gprs[1], &csave_ebx);
955 	}
956 	if (f->callee_save_used & CSAVE_ESI) {
957 		emit->load(&x86_gprs[4], &csave_esi);
958 	}
959 	if (f->callee_save_used & CSAVE_EDI) {
960 		emit->load(&x86_gprs[5], &csave_edi);
961 	}
962 	if (saved_ret_addr) {
963 		emit->check_ret_addr(f, saved_ret_addr);
964 	}
965 	if (f->alloca_head != NULL) {
966 		struct stack_block	*sb;
967 		static struct vreg	rvr;
968 
969 		rvr.stack_addr = f->alloca_regs;
970 		rvr.size = 4;
971 		backend_vreg_map_preg(&rvr, &x86_gprs[0]);
972 		emit->store(&rvr, &rvr);
973 		backend_vreg_unmap_preg(&x86_gprs[0]);
974 		if (ip && ip->src_vreg && ip->src_vreg->is_multi_reg_obj) {
975 			rvr.stack_addr = f->alloca_regs->next;
976 			backend_vreg_map_preg(&rvr, &x86_gprs[3]);
977 			emit->store(&rvr, &rvr);
978 			backend_vreg_unmap_preg(&x86_gprs[3]);
979 		}
980 
981 		for (sb = f->alloca_head; sb != NULL; sb = sb->next) {
982 			emit->dealloca(sb, NULL);
983 		}
984 
985 		rvr.stack_addr = f->alloca_regs;
986 		backend_vreg_map_preg(&rvr, &x86_gprs[0]);
987 		emit->load(&x86_gprs[0], &rvr);
988 		backend_vreg_unmap_preg(&x86_gprs[0]);
989 		if (ip && ip->src_vreg && ip->src_vreg->is_multi_reg_obj) {
990 			rvr.stack_addr = f->alloca_regs->next;
991 			backend_vreg_map_preg(&rvr, &x86_gprs[3]);
992 			emit->load(&x86_gprs[3], &rvr);
993 			backend_vreg_unmap_preg(&x86_gprs[3]);
994 		}
995 	}
996 	if (f->vla_head != NULL) {
997 		struct stack_block	*sb;
998 		static struct vreg	rvr;
999 
1000 		rvr.stack_addr = f->alloca_regs;
1001 		rvr.size = 4;
1002 		backend_vreg_map_preg(&rvr, &x86_gprs[0]);
1003 		emit->store(&rvr, &rvr);
1004 		backend_vreg_unmap_preg(&x86_gprs[0]);
1005 		if (ip && ip->src_vreg && ip->src_vreg->is_multi_reg_obj) {
1006 			rvr.stack_addr = f->alloca_regs->next;
1007 			backend_vreg_map_preg(&rvr, &x86_gprs[3]);
1008 			emit->store(&rvr, &rvr);
1009 			backend_vreg_unmap_preg(&x86_gprs[3]);
1010 		}
1011 
1012 		for (sb = f->alloca_head; sb != NULL; sb = sb->next) {
1013 			emit->dealloc_vla(sb, NULL);
1014 		}
1015 
1016 		rvr.stack_addr = f->alloca_regs;
1017 		backend_vreg_map_preg(&rvr, &x86_gprs[0]);
1018 		emit->load(&x86_gprs[0], &rvr);
1019 		backend_vreg_unmap_preg(&x86_gprs[0]);
1020 		if (ip && ip->src_vreg && ip->src_vreg->is_multi_reg_obj) {
1021 			rvr.stack_addr = f->alloca_regs->next;
1022 			backend_vreg_map_preg(&rvr, &x86_gprs[3]);
1023 			emit->load(&x86_gprs[3], &rvr);
1024 			backend_vreg_unmap_preg(&x86_gprs[3]);
1025 		}
1026 	}
1027 	emit->freestack(f, NULL);
1028 	emit->ret(ip);
1029 }
1030 
1031 static struct reg *
get_abi_reg(int index,struct type * ty)1032 get_abi_reg(int index, struct type *ty) {
1033 	(void) index; (void) ty;
1034 	/* x86 passes all stuff on the stack */
1035 	return NULL;
1036 }
1037 
1038 static struct reg *
get_abi_ret_reg(struct type * ty)1039 get_abi_ret_reg(struct type *ty) {
1040 	if (is_integral_type(ty) || ty->tlist != NULL) {
1041 		return &x86_gprs[0];
1042 	} else {
1043 		unimpl();
1044 	}
1045 	/* NOTREACHED */
1046 	return NULL;
1047 }
1048 
1049 static int
gen_function(struct function * f)1050 gen_function(struct function *f) {
1051 	struct ty_func		*proto;
1052 	struct scope		*scope;
1053 	struct icode_instr	*lastret = NULL;
1054 	struct stack_block	*sb;
1055 	size_t			size;
1056 	size_t			alloca_bytes = 0;
1057 	size_t			vla_bytes = 0;
1058 	int			i;
1059 	struct stupidtrace_entry	*traceentry = NULL;
1060 
1061 	emit->setsection(SECTION_TEXT);
1062 	proto = f->proto->dtype->tlist->tfunc;
1063 
1064 	emit->func_header(f); /* XXX */
1065 	emit->label(f->proto->dtype->name, 1);
1066 	emit->intro(f);
1067 
1068 	if (proto->nargs > 0) {
1069 		struct sym_entry	*se = proto->scope->slist;
1070 		int			i;
1071 		long			offset = 8; /* ebp, ret, was 0 */
1072 
1073 		if (f->proto->dtype->tlist->next == NULL
1074 			&& (f->proto->dtype->code == TY_STRUCT
1075 			|| f->proto->dtype->code == TY_UNION)) {
1076 			/*
1077 			 * Function returns struct/union - accomodate for
1078 			 * hidden pointer (passed as first argument)
1079 			 */
1080 			offset += 4;
1081 		}
1082 
1083 		for (i = 0; i < proto->nargs; ++i, se = se->next) {
1084 			size_t		size;
1085 
1086 			size = backend->get_sizeof_type(se->dec->dtype, NULL);
1087 			if (size < 4) {
1088 				/*
1089 				 * 07/21/08: Ouch, this was missing! char and
1090 				 * short are passed as dwords, so make sure the
1091 				 * corresponding stack block is also 4 bytes
1092 				 *
1093 				 * Otherwise emit_addrof() will skip the wrong
1094 				 * byte count to get to the start of the
1095 				 * ellipsis in variadic functions
1096 				 */
1097 				/*
1098 				 * 05/22/11: Account for empty structs (a GNU
1099 				 * C silliness) being passed
1100 				 */
1101 				if (size > 0) {
1102 					size = 4;
1103 				}
1104 			} else if ((size % 4) != 0) {
1105 				/*
1106 				 * 08/09/08: Pad to boundary of 4! This was
1107 				 * already done for long double below, but not
1108 				 * for structs and unions
1109 				 */
1110 				size += 4 - size % 4;
1111 			}
1112 
1113 			sb = make_stack_block(offset, size);
1114 			offset += size; /* was before makestackblock */
1115 
1116 			sb->is_func_arg = 1;
1117 			se->dec->stack_addr = sb;
1118 		}
1119 	}
1120 
1121 	/* Make local variables */
1122 	for (scope = f->scope; scope != NULL; scope = scope->next) {
1123 		struct stack_block	*sb;
1124 		struct scope		*tmp;
1125 		struct decl		**dec;
1126 		size_t			align;
1127 
1128 		for (tmp = scope; tmp != NULL; tmp = tmp->parent) {
1129 			if (tmp == f->scope) {
1130 				break;
1131 			}
1132 		}
1133 
1134 		if (tmp == NULL) {
1135 			/* End of function reached */
1136 			break;
1137 		}
1138 		if (scope->type != SCOPE_CODE) continue;
1139 
1140 		dec = scope->automatic_decls.data;
1141 		for (i = 0; i < scope->automatic_decls.ndecls; ++i) {
1142 			struct decl	*alignfor;
1143 
1144 			if (dec[i]->stack_addr != NULL) { /* XXX sucks */
1145 				continue;
1146 			} else if (IS_VLA(dec[i]->dtype->flags)) {
1147 				/*
1148 				 * 05/22/11: Handle pointers to VLAs properly;
1149 				 * We have to create a metadata block to
1150 				 * record dimension sizes, but we allocate
1151 				 * the pointers themselves on the stack
1152 				 *
1153 				 *   char (*p)[N];
1154 				 *
1155 				 * ... "p" on stack, N in metadata block
1156 				 */
1157 				if (dec[i]->dtype->tlist->type == TN_POINTER_TO) {
1158 					;
1159 				} else {
1160 					continue;
1161 				}
1162 			}
1163 
1164 #if 0
1165 			if (i+1 < scope->automatic_decls.ndecls
1166 				&& !IS_VLA(dec[i+1]->dtype->flags)) {
1167 				align = calc_align_bytes(f->total_allocated,
1168 					dec[i]->dtype,
1169 					dec[i+1]->dtype);
1170 			} else {
1171 				align = 0;
1172 			}
1173 #endif
1174 
1175 			alignfor = get_next_auto_decl_in_scope(scope, i);
1176 			if (alignfor != NULL) {
1177 				align = calc_align_bytes(f->total_allocated,
1178 					dec[i]->dtype,
1179 					alignfor->dtype, 0);
1180 			} else {
1181 				align = 0;
1182 			}
1183 
1184 			size = backend->
1185 				get_sizeof_decl(dec[i], NULL);
1186 			sb = stack_malloc(f, size+align);
1187 			sb->nbytes = size;
1188 			dec[i]->stack_addr = sb;
1189 		}
1190 		stack_align(f, 4);
1191 	}
1192 
1193 	/*
1194 	 * Allocate storage for saving callee-saved registers (ebx/esi/edi)
1195 	 * (but defer saving them until esp has been updated)
1196 	 *
1197 	 * 11/26/07: This unconditionally allocated storage for all regs
1198 	 * regardless of whether they were saved! Bad.
1199 	 */
1200 #if 0
1201 	f->total_allocated += 12;
1202 	f->callee_save_offset = f->total_allocated;
1203 #endif
1204 
1205 	if (f->callee_save_used & CSAVE_EBX) {
1206 		ebx_saved = 1;
1207 		f->total_allocated += 4;
1208 		csave_ebx.stack_addr
1209 			= make_stack_block(f->total_allocated /*callee_save_offset*/, 4);
1210 	}
1211 	if (f->callee_save_used & CSAVE_ESI) {
1212 		esi_saved = 1;
1213 		f->total_allocated += 4;
1214 		csave_esi.stack_addr
1215 			= make_stack_block(f->total_allocated /*callee_save_offset  - 4 */, 4);
1216 	}
1217 	if (f->callee_save_used & CSAVE_EDI) {
1218 		edi_saved = 1;
1219 		f->total_allocated += 4;
1220 		csave_edi.stack_addr
1221 			= make_stack_block(f->total_allocated /*callee_save_offset  - 8*/, 4);
1222 	}
1223 	f->callee_save_offset = f->total_allocated;
1224 	if (stackprotectflag) {
1225 		f->total_allocated += 4;
1226 		/*
1227 		 * 08/03/11: The save_ret_addr stack block was cached here,
1228 		 * which caused the (later introduced) zone allocator to
1229 		 * trash the "frame pointer" flag while resetting memory
1230 		 */
1231 		saved_ret_addr
1232 			= make_stack_block(f->total_allocated, 4);
1233 	}
1234 
1235 	/* Allocate storage for temporarily saving GPRs & patch offsets */
1236 	for (sb = f->regs_head; sb != NULL; sb = sb->next) {
1237 		stack_align(f, sb->nbytes);
1238 		f->total_allocated += sb->nbytes;
1239 		sb->offset = f->total_allocated;
1240 	}
1241 	/*
1242 	 * Allocate storage for saving alloca() pointers, and initialize
1243 	 * it to zero
1244 	 */
1245 	for (sb = f->alloca_head; sb != NULL; sb = sb->next) {
1246 		f->total_allocated += sb->nbytes;
1247 		alloca_bytes += sb->nbytes;
1248 		sb->offset = f->total_allocated;
1249 	}
1250 	if (f->alloca_head != NULL || f->vla_head != NULL) {
1251 		/*
1252 		 * Get stack for saving return value registers before
1253 		 * performing free() and alloca()ted blocks
1254 		 */
1255 		f->alloca_regs = make_stack_block(0, 4);
1256 		f->total_allocated += 4;
1257 		f->alloca_regs->offset = f->total_allocated;
1258 		f->alloca_regs->next = make_stack_block(0, 4);
1259 		f->total_allocated += 4;
1260 		f->alloca_regs->next->offset = f->total_allocated;
1261 	}
1262 
1263 	/*
1264 	 * Allocate storage for saving VLA data, and initialize
1265 	 * it to zero
1266 	 */
1267 	for (sb = f->vla_head; sb != NULL; sb = sb->next) {
1268 		f->total_allocated += sb->nbytes;
1269 		vla_bytes += sb->nbytes;
1270 		sb->offset = f->total_allocated;
1271 	}
1272 
1273 	if (sysflag == OS_OSX) {
1274 		stack_align(f, 16);
1275 	} else {
1276 		stack_align(f, 4);
1277 	}
1278 	if (f->total_allocated > 0) {
1279 		emit->allocstack(f, f->total_allocated);
1280 		if (f->callee_save_used & CSAVE_EBX) {
1281 			backend_vreg_map_preg(&csave_ebx, &x86_gprs[1]);
1282 			emit->store(&csave_ebx, &csave_ebx);
1283 			backend_vreg_unmap_preg(&x86_gprs[1]);
1284 			x86_gprs[1].used = 0; /* unneeded now?!?! */
1285 		}
1286 		if (f->callee_save_used & CSAVE_ESI) {
1287 			backend_vreg_map_preg(&csave_esi, &x86_gprs[4]);
1288 			emit->store(&csave_esi, &csave_esi);
1289 			backend_vreg_unmap_preg(&x86_gprs[4]);
1290 			x86_gprs[4].used = 0; /* unneeded now!?!? */
1291 		}
1292 		if (f->callee_save_used & CSAVE_EDI) {
1293 			backend_vreg_map_preg(&csave_edi, &x86_gprs[5]);
1294 			emit->store(&csave_edi, &csave_edi);
1295 			backend_vreg_unmap_preg(&x86_gprs[5]);
1296 			x86_gprs[5].used = 0; /* unneded now?!?! */
1297 		}
1298 	}
1299 	if (stackprotectflag) {
1300 		emit->save_ret_addr(f, saved_ret_addr);
1301 	}
1302 	if (f->alloca_head) {
1303 		/* 08/19/07: This wrongly used alloca_head! */
1304 		emit->zerostack(f->alloca_tail, alloca_bytes);
1305 	}
1306 	if (f->vla_head) {
1307 		/* 08/19/07: This wrongly used vla_head! */
1308 		emit->zerostack(f->vla_tail, vla_bytes);
1309 	}
1310 
1311 	if (stupidtraceflag && emit->stupidtrace != NULL) {
1312 		traceentry = put_stupidtrace_list(f);
1313 		emit->stupidtrace(traceentry);
1314 	}
1315 
1316 	if (xlate_icode(f, f->icode, &lastret) != 0) {
1317 		return -1;
1318 	}
1319 	if (lastret != NULL) {
1320 		struct icode_instr	*tmp;
1321 
1322 		for (tmp = lastret->next; tmp != NULL; tmp = tmp->next) {
1323 			if (tmp->type != INSTR_SETITEM) {
1324 				lastret = NULL;
1325 				break;
1326 			}
1327 		}
1328 	}
1329 
1330 	emit->outro(f);
1331 
1332 	if (traceentry != NULL) {
1333 		emit->finish_stupidtrace(traceentry);
1334 	}
1335 
1336 	return 0;
1337 }
1338 
1339 
1340 #if XLATE_IMMEDIATELY
1341 
1342 static int
gen_prepare_output(void)1343 gen_prepare_output(void) {
1344 	if (gflag) {
1345 		/* Print file names */
1346 		emit->dwarf2_files();
1347 	}
1348 	if (emit->support_decls) {
1349 		emit->support_decls();
1350 	}
1351 	return 0;
1352 }
1353 
1354 static int
gen_finish_output(void)1355 gen_finish_output(void) {
1356 	/*
1357 	 * Emit remaining static initializd variables. Currently this
1358 	 * should only handle function name identifiers (__func__).
1359 	 */
1360 	if (sysflag == OS_OSX) {
1361 		emit->global_static_decls(global_scope.static_decls.data,
1362 				global_scope.static_decls.ndecls);
1363 	}
1364 /*	emit->static_init_vars(static_init_vars);
1365 	emit->static_init_thread_vars(static_init_thread_vars);*/
1366 	emit->static_init_vars(static_init_vars);
1367 	emit->static_init_thread_vars(static_init_thread_vars);
1368 
1369 	emit->static_uninit_vars(static_uninit_vars);
1370 	emit->static_uninit_thread_vars(static_uninit_thread_vars);
1371 	emit->global_extern_decls(global_scope.extern_decls.data,
1372 			global_scope.extern_decls.ndecls);
1373 	if (emit->extern_decls) {
1374 		emit->extern_decls();
1375 	}
1376 
1377 	/*
1378 	 * Support buffers at end because we may only now know
1379 	 * whether they are needed (used at all)
1380 	 */
1381 	emit->support_buffers();
1382 
1383 	if (emit->finish_program) {
1384 		emit->finish_program();
1385 	}
1386 	x_fflush(out);
1387 	return 0;
1388 }
1389 
1390 
1391 #else
1392 
1393 static int
gen_program(void)1394 gen_program(void) {
1395 	struct function		*func;
1396 
1397 	if (gflag) {
1398 		/* Print file names */
1399 		emit->dwarf2_files();
1400 	}
1401 
1402 	if (emit->support_decls) {
1403 		emit->support_decls();
1404 	}
1405 	if (emit->extern_decls) {
1406 		emit->extern_decls();
1407 	}
1408 #if 0
1409 	emit->global_decls();
1410 #endif
1411 	emit->global_extern_decls(global_scope.extern_decls.data,
1412 			global_scope.extern_decls.ndecls);
1413 	emit->global_static_decls(global_scope.static_decls.data,
1414 			global_scope.static_decls.ndecls);
1415 
1416 #if 0
1417 	emit->static_decls();
1418 #endif
1419 	emit->static_init_vars(static_init_vars);
1420 	emit->static_uninit_vars(static_uninit_vars);
1421 	emit->static_init_thread_vars(static_init_thread_vars);
1422 	emit->static_uninit_thread_vars(static_uninit_thread_vars);
1423 
1424 	emit->struct_inits(init_list_head);
1425 	emit->empty();
1426 	emit->strings(str_const);
1427 	emit->fp_constants(float_const);
1428 #if 0
1429 	if (emit->llong_constants) {
1430 		emit->llong_constants();
1431 	}
1432 #endif
1433 	emit->support_buffers();
1434 #if 0
1435 	if (emit->pic_support) {
1436 		emit->pic_support();
1437 	}
1438 #endif
1439 	emit->empty();
1440 
1441 
1442 #if 0
1443 	if (emit->struct_defs) {
1444 		emit->struct_defs();
1445 	}
1446 #endif
1447 
1448 	emit->setsection(SECTION_TEXT);
1449 
1450 	for (func = funclist; func != NULL; func = func->next) {
1451 		curfunc = func;
1452 		if (gen_function(func) != 0) {
1453 			return -1;
1454 		}
1455 		emit->empty();
1456 		emit->empty();
1457 	}
1458 
1459 	x_fflush(out);
1460 
1461 	return 0;
1462 }
1463 
1464 #endif
1465 
1466 static int
calc_x86_stack_bytes(struct fcall_data * fcall,struct vreg ** vrs,int nvrs,int start_value)1467 calc_x86_stack_bytes(struct fcall_data *fcall,
1468 	struct vreg **vrs, int nvrs, int start_value) {
1469 	int	bytes = start_value + 8; /* ebp and return address */
1470 	int	i;
1471 	int	need_dap = 0;
1472 
1473 
1474 	if (fcall->functype->nargs == -1
1475 		/*|| ty->implicit*/) {
1476 		/* Need default argument promotions */
1477 		need_dap = 1;
1478 	}
1479 	for (i = nvrs - 1; i >= 0; --i) {
1480 		if (fcall->functype->variadic
1481 			&& i >= fcall->functype->nargs) {
1482 			need_dap = 1;
1483 		}
1484 		if (vrs[i]->type->tlist != NULL
1485 			|| is_integral_type(vrs[i]->type)) {
1486 			bytes += vrs[i]->size < 4? 4: vrs[i]->size;
1487 		} else if (is_floating_type(vrs[i]->type)) {
1488 			if (vrs[i]->type->code == TY_FLOAT) {
1489 				if (need_dap) {
1490 					bytes += 8;
1491 				} else {
1492 					bytes += 4;
1493 				}
1494 			} else if (vrs[i]->type->code == TY_LDOUBLE) {
1495 				bytes += 16; /* XXXXXXXXXXXXX 16 */
1496 			} else {
1497 				bytes += vrs[i]->size;
1498 			}
1499 		} else if (vrs[i]->type->code == TY_STRUCT
1500 			|| vrs[i]->type->code == TY_UNION) {
1501 			/* 07/21/08: (left-)Align to boundary of 4 */
1502 			if (vrs[i]->size & 3) {
1503 				bytes += 4 - (vrs[i]->size % 4);
1504 			}
1505 			bytes += vrs[i]->size;
1506 		}
1507 	}
1508 	return bytes;
1509 }
1510 
1511 
1512 static struct vreg *
icode_make_fcall(struct fcall_data * fcall,struct vreg ** vrs,int nvrs,struct icode_list * il)1513 icode_make_fcall(struct fcall_data *fcall, struct vreg **vrs, int nvrs,
1514 struct icode_list *il)
1515 {
1516 	unsigned long		allpushed = 0;
1517 	struct vreg		*tmpvr;
1518 	struct vreg		*ret = NULL;
1519 	struct vreg		*vr2;
1520 	struct type		*ty;
1521 	struct icode_instr	*ii;
1522 	struct type_node	*tn;
1523 	struct vreg		*struct_lvalue;
1524 	int			i;
1525 	int			need_dap = 0;
1526 	int			was_struct;
1527 	int			was_float;
1528 	int			was_llong;
1529 	int			struct_return = 0;
1530 	int			ret_is_anon_struct = 0;
1531 
1532 	ty = fcall->calltovr->type;
1533 	tmpvr = fcall->calltovr;
1534 
1535 	tn = ty->tlist;
1536 	if (tn->type == TN_POINTER_TO) {
1537 		/* Called thru function pointer */
1538 		tn = tn->next;
1539 	}
1540 
1541 	struct_lvalue = fcall->lvalue;
1542 
1543 	if ((ty->code == TY_STRUCT
1544 		|| ty->code == TY_UNION)
1545 		&& tn->next == NULL) {
1546 		struct_return = 1;
1547 		if (struct_lvalue == NULL || fcall->need_anon) {
1548 			struct type_node	*tnsav;
1549 			/*
1550 			 * Result of function is not assigned so we need to
1551 			 * allocate storage for the callee to store its
1552 			 * result into
1553 			 */
1554 
1555 			tnsav = ty->tlist;
1556 			ty->tlist = NULL;
1557 
1558 			/*
1559 			 * 08/05/08: Don't allocate anonymous struct return
1560 			 * storage right here, but when creating the stack
1561 			 * frame. This has already been done on MIPS, PPC
1562 			 * and SPARC, but not on x86/AMD64. The reason is
1563 			 * that it broke something that is long fogotten
1564 			 * now. So we'll re-enable this and fix any bugs
1565 			 * that may come up.
1566 			 *
1567 			 * The reason I ran into this again is that if we
1568 			 * don't allocate the struct on the stack frame,
1569 			 * then in
1570 			 *
1571 			 *     struct foo otherfunc() { return ...}
1572 			 *     struct foo func() { return otherfunc(); }
1573 			 *
1574 			 * ... the anonymous storage is reclaimed before
1575 			 * it can be copied as a return value, hence
1576 			 * trashing it
1577  			 */
1578 			struct_lvalue = vreg_stack_alloc(ty, il, 1 /*0*/, NULL);
1579 
1580 			ty->tlist = tnsav;
1581 			/*
1582 			 * 08/05/08: Don't add to allpushed since struct is
1583 			 * created on frame
1584 			 */
1585 	/*		allpushed += struct_lvalue->size;*/
1586 			ret_is_anon_struct = 1;
1587 		}
1588 	}
1589 
1590 	if (sysflag == OS_OSX) {
1591 		int	count;
1592 
1593 		count = calc_x86_stack_bytes(fcall, vrs, nvrs, struct_return? 4: 0);
1594 		if (count % 16 != 0) {
1595 			unsigned long align = 16 - count % 16;
1596 #if 0
1597 			printf("aligning %lu\n", align);
1598 #endif
1599 			icode_make_allocstack(NULL, align, il);
1600 			allpushed += align;
1601 		}
1602 	}
1603 
1604 	/*
1605 	 * 07/20/08: This wrongly took an implicit return type into account
1606 	 * to determine whether default argument promotions are needed!
1607 	 */
1608 	if (fcall->functype->nargs == -1
1609 		/*|| ty->implicit*/) {
1610 		/* Need default argument promotions */
1611 		need_dap = 1;
1612 	}
1613 
1614 	for (i = nvrs - 1; i >= 0; --i) {
1615 		struct vreg		*dest;
1616 
1617 		if (fcall->functype->variadic
1618 			&& i >= fcall->functype->nargs) {
1619 			need_dap = 1;
1620 		}
1621 
1622 		/*
1623 		 * May have to be moved into
1624 		 * register if we're dealing with
1625 		 * pointer stuff, otherwise we may
1626 		 * push with memory operand
1627 		 */
1628 
1629 		was_struct = was_float = was_llong = 0;
1630 		if (vrs[i]->parent) {
1631 			vr2 = get_parent_struct(vrs[i]);
1632 		} else {
1633 			vr2 = NULL;
1634 		}
1635 
1636 		if (vrs[i]->type->tlist != NULL) {
1637 			vreg_faultin(NULL, NULL, vrs[i], il, 0);
1638 		} else {
1639 			if (vrs[i]->from_ptr) {
1640 				/* XXX not needed?! */
1641 				vreg_faultin(NULL, NULL,
1642 					vrs[i]->from_ptr, il, 0);
1643 			}
1644 			if (IS_CHAR(vrs[i]->type->code)
1645 				|| IS_SHORT(vrs[i]->type->code)) {
1646 				struct type	*ty
1647 					= make_basic_type(TY_INT);
1648 
1649 				/*
1650 				 * Bytes and halfwords are pushed as words
1651 				 */
1652 				vrs[i] = backend->
1653 					icode_make_cast(vrs[i], ty, il);
1654 			} else if (IS_LLONG(vrs[i]->type->code)) {
1655 				vreg_faultin(NULL, NULL, vrs[i], il, 0);
1656 				allpushed += 8;
1657 				ii = icode_make_push(vrs[i], il);
1658 				append_icode_list(il, ii);
1659 				was_llong = 1;
1660 			} else if (vrs[i]->type->code == TY_STRUCT
1661 				|| vrs[i]->type->code == TY_UNION) {
1662 				/*
1663 				 * struct/union - memcpy() it onto stack,
1664 				 * allocate storage manually (no push!)
1665 				 */
1666 				/*
1667 				 * 05/22/11: Account for empty structs (a GNU
1668 				 * C silliness) being passed
1669 				 */
1670 				if (vrs[i]->size > 0) {
1671 					/* 07/21/08: (left-)Align to boundary of 4 */
1672 					if (vrs[i]->size & 3) {
1673 						icode_make_allocstack(NULL, 4 - (vrs[i]->size % 4), il);
1674 						allpushed += 4 - (vrs[i]->size % 4);
1675 					}
1676 
1677 					dest = vreg_stack_alloc(vrs[i]->type, il, 0, NULL);
1678 					allpushed += dest->size;
1679 					backend->invalidate_gprs(il, 1, INV_FOR_FCALL);
1680 					vreg_faultin_ptr(vrs[i], il);
1681 					icode_make_copystruct(dest, vrs[i], il);
1682 				}
1683 				was_struct = 1;
1684 			} else {
1685 				vreg_faultin_x87(NULL, NULL, vrs[i], il, 0);
1686 				if (IS_FLOATING(vrs[i]->type->code)) {
1687 					was_float = 1;
1688 					if (need_dap
1689 						&& vrs[i]->type->code
1690 						== TY_FLOAT) {
1691 #if 0
1692 						struct type	*ty
1693 						= make_basic_type(TY_DOUBLE);
1694 
1695 						vrs[i] = backend->
1696 						icode_make_cast(vrs[i],ty,il);
1697 #endif
1698 						if (sysflag == OS_OSX
1699 							&& vrs[i]->type->code == TY_FLOAT) {
1700 							struct type	*ty
1701 								= make_basic_type(TY_DOUBLE);
1702 
1703 							vrs[i] = backend->
1704 								icode_make_cast(vrs[i],ty,il);
1705 						} else {
1706 							vrs[i] = n_xmemdup(vrs[i],
1707 								sizeof *vrs[i]);
1708 							vrs[i]->type = make_basic_type(
1709 								TY_DOUBLE);
1710 							vrs[i]->size = backend->
1711 								get_sizeof_type(vrs[i]->
1712 									type, NULL);
1713 						}
1714 					}
1715 					if (vrs[i]->type->code == TY_LDOUBLE) {
1716 						if (sysflag == OS_OSX) {
1717 #if 0
1718 							/* 6 bytes of padding */
1719 							icode_make_allocstack(NULL, 6, il);
1720 							allpushed += 6;
1721 #endif
1722 						} else {
1723 #if 0
1724 							/* 2 bytes of padding */
1725 							icode_make_allocstack(NULL, 2, il);
1726 							allpushed += 2;
1727 #endif
1728 						}
1729 					}
1730 					dest = vreg_stack_alloc(vrs[i]->type,
1731 						il, 0, NULL);
1732 					vreg_map_preg(dest, vrs[i]->pregs[0]);
1733 					icode_make_store(NULL, dest, dest, il);
1734 
1735 					if (vrs[i]->type->code == TY_DOUBLE) {
1736 						allpushed += 8;
1737 					} else if (vrs[i]->type->code == TY_FLOAT) {
1738 						allpushed += 4;
1739 					} else {
1740 						allpushed += vrs[i]->size;
1741 					/*	allpushed += 10;*/
1742 					}
1743 				}
1744 			}
1745 		}
1746 
1747 		if (!was_struct && !was_float && !was_llong) {
1748 			ii = icode_make_push(vrs[i], il);
1749 			if (vrs[i]->size < 4) {
1750 				/* bytes and shorts are passed as words */
1751 				allpushed += 4;
1752 			} else if (vrs[i]->type->tlist != NULL
1753 				&& vrs[i]->type->tlist->type == TN_ARRAY_OF) {
1754 				allpushed += 4;
1755 			} else {
1756 				allpushed += vrs[i]->size;
1757 			}
1758 			append_icode_list(il, ii);
1759 		}
1760 
1761 		free_pregs_vreg(vrs[i], il, 0, 0);
1762 		if (vr2 && vr2->from_ptr && vr2->from_ptr->pregs[0]
1763 			&& vr2->from_ptr->pregs[0]->vreg == vr2->from_ptr) {
1764 			free_preg(vr2->from_ptr->pregs[0], il, 0, 0);
1765 		}
1766 	}
1767 
1768 	if (struct_return) {
1769 		struct vreg	*addr = vreg_alloc(NULL, NULL, NULL, NULL);
1770 
1771 		/*
1772 		 * 06/15/09: icode_make_addrof() apparently happily used stale
1773 		 * registers for parent struct pointers. Such invalid
1774 		 * registers can happen if memcpy() is used to pass a struct
1775 		 * by value. alloc_gpr() used by icode_make_addrof() requires
1776 		 * a struct type rather than ``function returning struct'', so
1777 		 * we temporarily set the type list to NULL.
1778 		 * XXX Can this break in the backend?
1779 		 */
1780 		{
1781 			struct reg	*r;
1782 			/*ii*/ r = make_addrof_structret(struct_lvalue, il);
1783 
1784 			addr->pregs[0] = r /*ii->dat*/;
1785 			addr->size = 4;
1786 		}
1787 
1788 		ii = icode_make_push(addr, il);
1789 		append_icode_list(il, ii);
1790 
1791 		/*
1792 		 * Adjust amount of bytes allocated; the push above adds
1793 		 * 4 to it but it's the callee that cleans up the hidden
1794 		 * pointer, so the count needs to be fixed manually (as
1795 		 * opposed to having emit_freestack do it.)
1796 		 * XXX this is very ugly
1797 		 */
1798 		ii = icode_make_adj_allocated(-4);
1799 		append_icode_list(il, ii);
1800 		free_preg(addr->pregs[0], il, 0, 0);
1801 	}
1802 
1803 
1804 	/*
1805 	 * In the x86 ABI, the caller is responsible for saving
1806 	 * eax/ecx/edx (but not ebx, esi, edi), so that's what we
1807 	 * do here
1808 	 */
1809 	backend->invalidate_gprs(il, 1, INV_FOR_FCALL);
1810 
1811 	if (ty->tlist->type == TN_POINTER_TO) {
1812 		/* Need to indirect thru function pointer */
1813 		vreg_faultin(NULL, NULL, tmpvr, il, 0);
1814 		ii = icode_make_call_indir(tmpvr->pregs[0]);
1815 		tmpvr->pregs[0]->used = 0;
1816 		tmpvr->pregs[0]->vreg = NULL;
1817 	} else {
1818 		ii = icode_make_call(ty->name);
1819 		if (IS_ASM_RENAMED(ty->flags)) {
1820 			ii->hints |= HINT_INSTR_RENAMED;
1821 		}
1822 	}
1823 	append_icode_list(il, ii);
1824 	ii = icode_make_freestack(allpushed);
1825 	append_icode_list(il, ii);
1826 
1827 	ret = vreg_alloc(NULL, NULL, NULL, NULL);
1828 	ret->type = ty;
1829 
1830 	/*
1831 	 * 07/06/2007 What the HELL!??!?! This stuff still did
1832 	 *    if (ty->tlist->next != NULL) {
1833 	 * to check if the function returns a pointer, not
1834 	 * taking into account that this could be a call thru
1835 	 * a function pointer. I thought I had this fixed
1836 	 * everywhere but apparently it was only done in AMD64
1837 	 * and all other backends were broken :-(
1838 	 */
1839 #if 0
1840 	if (ty->tlist->next != NULL) {
1841 #endif
1842 
1843 	if ((ty->tlist->type == TN_POINTER_TO
1844 		&& ty->tlist->next->next != NULL)
1845 		|| (ty->tlist->type == TN_FUNCTION
1846 		&& ty->tlist->next != NULL)) {
1847 		/* Must be pointer */
1848 		ret->pregs[0] = &x86_gprs[0];
1849 	} else {
1850 		if (IS_CHAR(ty->code)) {
1851 			ret->pregs[0] = x86_gprs[0].composed_of[0]->
1852 				composed_of[1];
1853 		} else if (IS_SHORT(ty->code)) {
1854 			ret->pregs[0] = x86_gprs[0].composed_of[0];
1855 		} else if (IS_INT(ty->code)
1856 			|| IS_LONG(ty->code)
1857 			|| ty->code == TY_ENUM) { /* XXX */
1858 			ret->pregs[0] = &x86_gprs[0];
1859 		} else if (IS_LLONG(ty->code)) {
1860 			ret->pregs[0] = &x86_gprs[0];
1861 			ret->is_multi_reg_obj = 2;
1862 		} else if (ty->code == TY_FLOAT
1863 			|| ty->code == TY_DOUBLE
1864 			|| ty->code == TY_LDOUBLE) {
1865 			if (sysflag == OS_OSX
1866 				&& ty->code != TY_LDOUBLE) {
1867 				ret->pregs[0] = &x86_sse_regs[0];
1868 			} else {
1869 				ret->pregs[0] = &x86_fprs[0];
1870 			}
1871 		} else if (ty->code == TY_STRUCT
1872 			|| ty->code == TY_UNION) {
1873 			/*
1874 			 * 08/16/07: Added this
1875 			 */
1876 			if (ret_is_anon_struct) {
1877 				ret = struct_lvalue;
1878 			}
1879 			ret->struct_ret = 1;
1880 		} else if (ty->code == TY_VOID) {
1881 			; /* Nothing! */
1882 		}
1883 	}
1884 
1885 	if (ret->pregs[0] != NULL) {
1886 		vreg_map_preg(ret, ret->pregs[0]);
1887 		if (ret->is_multi_reg_obj) {
1888 			vreg_map_preg2(ret, &x86_gprs[3]);
1889 		}
1890 	}
1891 
1892 	ret->type = n_xmemdup(ret->type, sizeof *ret->type);
1893 	if (ret->type->tlist->type == TN_POINTER_TO) {
1894 		copy_tlist(&ret->type->tlist, ret->type->tlist->next->next);
1895 	} else {
1896 		copy_tlist(&ret->type->tlist, ret->type->tlist->next);
1897 	}
1898 	if (ret->type->code != TY_VOID || ret->type->tlist) {
1899 		ret->size = backend->get_sizeof_type(ret->type, NULL);
1900 	}
1901 
1902 	if (is_x87_trash(ret)) {
1903 		/*
1904 		 * Don't keep stuff in x87 registers, ever!!!
1905 		 */
1906 		free_preg(ret->pregs[0], il, 1, 1);
1907 	}
1908 	return ret;
1909 }
1910 
1911 static int
1912 icode_make_return(struct vreg *vr, struct icode_list *il) {
1913 	struct icode_instr	*ii;
1914 #if 0
1915 	struct type		*rtype = curfunc->proto->dtype;
1916 
1917 #endif
1918 	struct type		*rtype = curfunc->rettype;
1919 
1920 #if 0
1921 	oldtn = curfunc->proto->dtype->tlist;
1922 	rtype->tlist = rtype->tlist->next;
1923 #endif
1924 	/*
1925 	 * 08/06/17: We were removing the first typenode, then performed
1926 	 * the return, then restored the typenode. This is wrong because
1927 	 * the generated icode may rightly depend on the type being stable
1928 	 * instead of having it changed behind its back!
1929 	 */
1930 #if 0
1931 	rtype = func_to_return_type(rtype);
1932 #endif
1933 
1934 	if (vr != NULL) {
1935 		if (IS_CHAR(rtype->code)
1936 			|| IS_SHORT(rtype->code)
1937 			|| IS_INT(rtype->code)
1938 			|| IS_LONG(rtype->code)
1939 			|| rtype->code == TY_ENUM /* 06/15/09: Was missing?!? */
1940 			|| rtype->tlist != NULL) {
1941 			struct reg	*r = &x86_gprs[0];
1942 			int		size = backend->get_sizeof_type(rtype,0);
1943 
1944 			if (r->size > (unsigned long)size) {
1945 				r = get_smaller_reg(r, size);
1946 			}
1947 			vreg_faultin(r, NULL, vr, il, 0);
1948 		} else if (IS_LLONG(rtype->code)) {
1949 			vreg_faultin(&x86_gprs[0], &x86_gprs[3],
1950 				vr, il, 0);
1951 		} else if (rtype->code == TY_FLOAT
1952 			|| rtype->code == TY_DOUBLE
1953 			|| rtype->code == TY_LDOUBLE) {
1954 			/* Return in st0 */
1955 			vreg_faultin_x87(NULL, NULL, vr, il, 0);
1956 		} else if (rtype->code == TY_STRUCT
1957 			|| rtype->code == TY_UNION) {
1958 			struct stack_block	*sb;
1959 			struct vreg		*dest;
1960 			struct vreg		*from_ptr;
1961 			static struct decl	dec;
1962 			struct decl		*decp;
1963 			unsigned long		offset;
1964 			static struct type_node	tn;
1965 
1966 			/* Get hidden struct pointer for storing return */
1967 			offset = 8; /* Move past ebp,eip */
1968 			sb = make_stack_block(offset, 4);
1969 			sb->is_func_arg = 1;
1970 			dec.stack_addr = sb;
1971 			dec.dtype = n_xmemdup(rtype, sizeof *rtype);
1972 			decp = n_xmemdup(&dec, sizeof dec);
1973 			from_ptr = vreg_alloc(decp, NULL, NULL, NULL);
1974 
1975 			tn.type = TN_POINTER_TO;
1976 			from_ptr->type->tlist = &tn;
1977 			from_ptr->size = 4;
1978 			vreg_faultin(NULL, NULL, from_ptr, il, 0);
1979 
1980 			dest = vreg_alloc(NULL, NULL, NULL, NULL);
1981 			dest->from_ptr = from_ptr;
1982 
1983 			/* vr may come from pointer */
1984 			vreg_faultin_ptr(vr, il);
1985 			icode_make_copystruct(dest, vr, il);
1986 		}
1987 	}
1988 	ii = icode_make_ret(vr);
1989 	append_icode_list(il, ii);
1990 
1991 #if 0
1992 	rtype->tlist = oldtn;
1993 #endif
1994 
1995 	return 0;
1996 }
1997 
1998 /*
1999  * Deal with preparations necessary to make things work with the terrible
2000  * x86 design
2001  */
2002 static void
2003 icode_prepare_op(
2004 	struct vreg **dest0,
2005 	struct vreg **src0,
2006 	int op,
2007 	struct icode_list *il) {
2008 
2009 	struct vreg	*dest = *dest0;
2010 	struct vreg	*src = *src0;
2011 
2012 	/*
2013 	 * 05/30/11: This was missing! This function implicitly assumed both
2014 	 * operands to be register-resident already (e.g. see the eax checks
2015 	 * below which do not verify that eax is really mapped to the vreg)-
2016 	 * which was true in most but not all cases. This broke compound
2017 	 * assignment operators for VLAs, and may have caused bad code
2018 	 * generation in other cases as well
2019 	 */
2020 	if (!is_floating_type(dest->type)) {
2021 		vreg_faultin_protected(dest, NULL, NULL, src, il, 0);
2022 		vreg_faultin_protected(src, NULL, NULL, dest, il, 0);
2023 	}
2024 
2025 	/*
2026 	 * For long long, the preparations below only apply to shifting
2027 	 */
2028 	if (dest->is_multi_reg_obj && op != TOK_OP_BSHL && op != TOK_OP_BSHR) {
2029 		return;
2030 	}
2031 	if (is_floating_type(dest->type)) {
2032 		if (backend->arch == ARCH_X86
2033 			|| dest->type->code == TY_LDOUBLE) {
2034 #if 0
2035 			/*
2036 		 	 * As we can only write to memory from st0, it is
2037 		 	 * desirable to store all results there
2038 		 	 */
2039 			if (dest->pregs[0] != &x86_fprs[/*0*/ 1]) {
2040 				icode_make_x86_fxch(dest->pregs[0],
2041 					&x86_fprs[0], il);
2042 			}
2043 #endif
2044 			return;
2045 		} else {
2046 			/* Has to be SSE (AMD64) */
2047 			return;
2048 		}
2049 	}
2050 
2051 	if (op == TOK_OP_DIVIDE || op == TOK_OP_MOD || op == TOK_OP_MULTI) {
2052 		/* Destination must be in eax, or rax */
2053 
2054 		if (backend->arch == ARCH_AMD64 && dest->size == 8) {
2055 			if (dest->pregs[0] != &amd64_x86_gprs[0]) {
2056 				free_preg(&amd64_x86_gprs[0], il, 1, 1);
2057 				vreg_faultin(&amd64_x86_gprs[0], NULL, dest, il,
2058 					0);
2059 			}
2060 			reg_set_unallocatable(&amd64_x86_gprs[3]);
2061 			vreg_faultin_protected(dest, /*NULL*/
2062 				NULL, NULL, src, il, 0);
2063 			reg_set_allocatable(&amd64_x86_gprs[3]);
2064 			return;
2065 		}
2066 		if (dest->pregs[0] != &x86_gprs[0]) {
2067 			/*
2068 			 * 05/20/11: This unconditionally freed eax for AMD64
2069 			 * too, such that if rax had been in use, it was not
2070 			 * saved but still marked as in use - which could lead
2071 			 * to problems later on when both rax and eax were
2072 			 * in use
2073 			 */
2074 			if (backend->arch == ARCH_AMD64) {
2075 				free_preg(&amd64_x86_gprs[0], il, 1, 1);
2076 			} else {
2077 				free_preg(&x86_gprs[0], il, 1, 1);
2078 			}
2079 			vreg_faultin(&x86_gprs[0], NULL, dest, il, 0);
2080 		}
2081 
2082 		/*
2083 		 * 04/13/08: Only load immediate value if there is no
2084 		 * immediate instruction available!
2085 		 */
2086 		if (src->from_const == NULL
2087 			|| !backend->have_immediate_op(dest->type, op)) {
2088 			/* may not be edx for div */
2089 			struct reg	*srcreg = NULL;
2090 
2091 			if (src->pregs[0] && src->pregs[0]->vreg == src) {
2092 				if (src->pregs[0] == &x86_gprs[3]) {
2093 					/* Have to move it elsewhere */
2094 
2095 					/*
2096 					 * 10/31/07: Pass size instead of 0.
2097 					 * We want a 4 byte reg, but 0 gives
2098 					 * us a full GPR. That breaks on
2099 					 * AMD64, where we'll be getting an
2100 					 * 8 byte reg instead
2101 					 *
2102 					 * 08/10/08: This was missing the
2103 					 * possibility that the source could
2104 					 * be loaded to eax, which is
2105 					 * obviously wrong because that's
2106 					 * the target location! Thus set eax
2107 					 * unallocatable
2108 					 * XXX what if it was unallocatable
2109 					 * before?
2110 				 	 */
2111 					reg_set_unallocatable(&x86_gprs[0]);
2112 					srcreg = ALLOC_GPR(curfunc, /*0*/4,
2113 						il, NULL);
2114 					reg_set_allocatable(&x86_gprs[0]);
2115 				}
2116 			}
2117 			reg_set_unallocatable(&x86_gprs[3]);
2118 			vreg_faultin_protected(dest, /*NULL*/
2119 				srcreg, NULL, src, il, 0);
2120 			reg_set_allocatable(&x86_gprs[3]);
2121 		}
2122 
2123 		/* edx is trashed in any case - save it */
2124 		/*
2125 		 * 05/20/11: This unconditionally freed edx for AMD64
2126 		 * too, such that if rdx had been in use, it was not
2127 		 * saved but still marked as in use - which could lead
2128 		 * to problems later on when both rdx and edx were
2129 		 * in use
2130 		 */
2131 		if (backend->arch == ARCH_AMD64) {
2132 			free_preg(&amd64_x86_gprs[3], il, 1, 1);
2133 		} else {
2134 			free_preg(&x86_gprs[3], il, 1, 1);
2135 		}
2136 	} else if ((op == TOK_OP_BSHL || op == TOK_OP_BSHR)
2137 		&& (src->from_const == NULL
2138 		|| !backend->have_immediate_op(dest->type, op))) {
2139 		/*
2140 		 * Source must be in cl
2141 		 *
2142 		 * 04/13/08: Only load immediate value if there is no
2143 		 * immediate instruction available!
2144 		 */
2145 		struct reg	*reg_cl;
2146 
2147 		reg_cl = x86_gprs[2]
2148 			.composed_of[0]
2149 			->composed_of[1];
2150 		if (src->pregs[0] != reg_cl
2151 			|| reg_cl->vreg != src) {
2152 		 	/*
2153 			 * 05/20/11: This unconditionally freed ecx for
2154 			 * AMD64
2155 			 */
2156 			if (backend->arch == ARCH_AMD64) {
2157 				free_preg(&amd64_x86_gprs[2], il, 1, 1);
2158 			} else {
2159 				free_preg(&x86_gprs[2], il, 1, 1);
2160 			}
2161 
2162 			if (src->is_multi_reg_obj) {
2163 				reg_set_unallocatable(&x86_gprs[2]);
2164 				src = backend->icode_make_cast(src,
2165 					make_basic_type(TY_CHAR), il);
2166 				*src0 = src;
2167 				reg_set_allocatable(&x86_gprs[2]);
2168 			}
2169 
2170 			/*
2171 			 * Need to ensure that the operand is loaded
2172 			 * correctly regardless of its size.
2173 			 * XXX this is really nasty, perhasp we should
2174 			 * demand that callers guarantee a byte-sized
2175 			 * or word-sized vreg?!
2176 			 */
2177 			if (src->size == 1) {
2178 				vreg_faultin(reg_cl, NULL, src, il, 0);
2179 			} else if (src->size == 2) {
2180 				vreg_faultin(x86_gprs[2].composed_of[0],
2181 					NULL, src, il, 0);
2182 			} else {
2183 				/*
2184 				 * 05/20/11: This did not distinguish
2185 				 * between x86 and AMD64, such that
2186 				 * ecx was always used even for 64bit
2187 				 * integers (resulting in assembler
2188 				 * errors)
2189 				 */
2190 				if (backend->arch == ARCH_AMD64
2191 					&& src->size == 8) {
2192 					vreg_faultin(&amd64_x86_gprs[2],
2193 						NULL, src, il, 0);
2194 				} else {
2195 					vreg_faultin(&x86_gprs[2],
2196 						NULL, src, il, 0);
2197 				}
2198 			}
2199 			vreg_faultin_protected(src, NULL, NULL,
2200 				dest, il, 0);
2201 		}
2202 	}
2203 }
2204 
2205 
2206 static void
2207 change_preg_size(
2208 	struct vreg *vr,
2209 	struct icode_list *il,
2210 	struct type *to,
2211 	struct type *from);
2212 
2213 /*
2214  * ,==x87==x87====x87=======x87===============x87===========,
2215  * |~~*,.,*~{ 80x87 FLOATING POINT KLUDGERY DELUXE }~*,.,*~~|
2216  * `======87=87=87===========87=======87==87==========87===='
2217  *
2218  * ``struct vreg floatbuf'' is used as buffer to convert between integers
2219  * and floats because:
2220  *    - fild cannot take a GPR or immediate operand
2221  *    - fstp can only write to memory too
2222  *
2223  * ``struct vreg x87cw_new'' and ``struct vreg x87cw_old'' are used as buffers
2224  * for storing the x87 status control word. Converting a floating point value
2225  * to an integer with x87 by default rounds mathematically. However, in C,
2226  * ``(int)fp_value'' is required to *truncate* the fractional part. The x87
2227  * control word therefore has to be changed before and restored after
2228  * performing the fp-to-integer conversion in order to make the thing behave as
2229  * desired. (I couldn't believe it when I first saw the code gcc generated for
2230  * this exercise.)
2231  */
2232 #if ! REMOVE_FLOATBUF
2233 static void
2234 #else
2235 static struct vreg *
2236 #endif
2237 load_floatbuf(struct vreg *data,
2238 #if REMOVE_FLOATBUF
2239 	struct type *from,
2240 #endif
2241 	struct icode_list *il
2242 #if REMOVE_FLOATBUF
2243 	, int is_int
2244 #endif
2245 	) {
2246 
2247 
2248 #if ! REMOVE_FLOATBUF
2249 	if (floatbuf.var_backed == NULL) {
2250 		/* Not allocated yet */
2251 		static struct decl	dec;
2252 		static struct type	ty;
2253 
2254 		ty = *make_basic_type(/*TY_INT*/TY_LLONG);
2255 		dec.dtype = &ty;
2256 		ty.name = "_Floatbuf";
2257 		floatbuf.var_backed = &dec;
2258 		floatbuf.type = &ty;
2259 		if (backend->arch == ARCH_AMD64) {
2260 			/*
2261 			 * Wow this used data->pregs[0]->size, which for an
2262 			 * x87 fpr was 12... So stores to it did
2263 			 *    movt val, _Floatbuf
2264 			 * I guess 8 is invalid too? So use 4 always
2265 			 */
2266 			floatbuf.size = 4 ;   /*data->pregs[0]->size;*/
2267 		} else {
2268 			floatbuf.size = 4; /* XXX long long :( */
2269 		}
2270 	}
2271 	vreg_map_preg(&floatbuf, data->pregs[0]);
2272 	tmp = n_xmemdup(&floatbuf, sizeof floatbuf);
2273 	icode_make_store(curfunc, &floatbuf, &floatbuf, il);
2274 #else
2275 	/* REMOVE_FLOATBUF is set */
2276 	static struct vreg	vr;
2277 	struct vreg		*resvr;
2278 	int			res_type_changed_to_64bit = 0; /* 06/15/08: Was 1!! */
2279 
2280 	if (from->code < /*TY_INT*/ TY_LLONG) {
2281 		/*
2282 		 * Smaller than int isn't possible - must have been
2283 		 * promoted
2284 		 */
2285 		from = make_basic_type(  /*TY_INT*/ TY_LLONG);
2286 	}
2287 
2288 	vr.type = from;
2289 	vr.size = backend->get_sizeof_type(from, NULL);
2290 
2291 	vr.is_multi_reg_obj = data->is_multi_reg_obj;
2292 	vr.pregs[0] = data->pregs[0];
2293 	vr.pregs[1] = data->pregs[1];
2294 
2295 	resvr = vreg_alloc(NULL,NULL,NULL,NULL);
2296 	*resvr = vr;
2297 
2298 	if ((IS_INT(data->type->code) || IS_LONG(data->type->code))
2299 		&& data->type->sign == TOK_KEY_UNSIGNED) {
2300 		/*
2301 		 * 06/08/08: Unsigned integers require storing as 64bit
2302 		 */
2303 		vreg_set_new_type(resvr, make_basic_type(TY_LLONG));
2304 		res_type_changed_to_64bit = 1;
2305 	}
2306 
2307 	vreg_map_preg(resvr, data->pregs[0]);
2308 
2309 	/*
2310 	 * 06/15/08: Multi-register mapping was incorrectly done for
2311 	 * fp-to-int conversion, but is only correct the other way
2312 	 * around!
2313 	 */
2314 	if (resvr->is_multi_reg_obj
2315 		&& !res_type_changed_to_64bit
2316 		&& is_int) {
2317 		vreg_map_preg2(resvr, data->pregs[1]);
2318 	}
2319 
2320 	if (IS_FLOATING(from->code)) {
2321 		/* Save and convert */
2322 /*
2323 	vreg_stack_alloc() doesn't work because it doesn't
2324 	immediately give us a stack_block which can be
2325 	assigned to other vregs too
2326 	resvr = vreg_stack_alloc(from, il, 1, NULL);*/
2327 resvr->stack_addr = icode_alloc_reg_stack_block(curfunc, resvr->size);
2328 		/*
2329 		 * 06/15/08: Always use data->type instead of resvr->type!
2330 		 * resvr->type is the source type...?
2331 		 */
2332 
2333 		icode_make_x86_fist(resvr->pregs[0], resvr,
2334 			(res_type_changed_to_64bit
2335 			/*&& is_integral_type(resvr->type)*/)?
2336 
2337 			resvr->type: data->type, il);
2338 	} else {
2339 /*	free_preg(resvr->pregs[0], il, 1, 1);*/
2340 		icode_make_store(curfunc, resvr, resvr, il);
2341 	}
2342 
2343 	/* Yawn, another duplication to ensure the multi gpr flag is
2344 	 * preserved for the stores above
2345 	 *
2346 	 * 06/08/08: This is now actually beneficial because if we stored
2347 	 * an unsigned 32bit integer to a 64bit storage block (which is
2348 	 * necessary to convert large values such as UINT_MAX correctly),
2349 	 * then we can now set the type of that block to ``unsigned int'',
2350 	 * thus ensuring that the subsequent load only looks at the lower
2351 	 * double-word
2352 	 */
2353 /*	return dup_vreg(resvr);*/
2354 	resvr = dup_vreg(resvr);
2355 	vreg_set_new_type(resvr, data->type);
2356 	return resvr;
2357 #endif /* REMOVE_FLOATBUF */
2358 }
2359 
2360 #if REMOVE_FLOATBUF
2361 
2362 static struct vreg *
2363 load_integer_floatbuf(struct vreg *data, struct type *from,
2364 	struct icode_list *il) {
2365 
2366 	return load_floatbuf(data, from, il, 1);
2367 }
2368 
2369 
2370 static struct vreg *
2371 load_floatval_floatbuf(struct vreg *data, struct type *from,
2372 	struct icode_list *il) {
2373 
2374 	return load_floatbuf(data, from, il, 0);
2375 }
2376 
2377 #endif
2378 
2379 
2380 /* Save FPU CW to memory */
2381 static void
2382 store_x87cw(struct icode_list *il) {
2383 	if (x87cw_old.var_backed == NULL) {
2384 		/* Not allocated yet */
2385 		static struct decl	dec_old;
2386 		static struct decl	dec_new;
2387 		static struct type	ty_old;
2388 		static struct type	ty_new;
2389 
2390 		ty_old = *make_basic_type(TY_SHORT);
2391 		ty_old.name = "_X87CW_old";
2392 		dec_old.dtype = &ty_old;
2393 		x87cw_old.var_backed = &dec_old;
2394 		x87cw_old.type = &ty_old;
2395 		x87cw_old.size = 2;
2396 
2397 		ty_new = *make_basic_type(TY_SHORT);
2398 		ty_new.name = "_X87CW_new";
2399 		dec_new.dtype = &ty_new;
2400 		x87cw_new.var_backed = &dec_new;
2401 		x87cw_new.type = &ty_new;
2402 		x87cw_new.size = 2;
2403 	}
2404 	icode_make_x86_store_x87cw(&x87cw_old, il);
2405 }
2406 
2407 /* Create modified copy of in-memory CW */
2408 static void
2409 modify_x87cw(struct icode_list *il) {
2410 	struct reg		*r;
2411 	struct icode_instr	*ii;
2412 
2413 	r = alloc_16_or_32bit_noesiedi(curfunc, 2, il, NULL);
2414 	vreg_faultin(r, NULL, &x87cw_old, il, 0);
2415 	vreg_map_preg(&x87cw_new, r);
2416 	ii = icode_make_setreg(r->composed_of[0], 12);
2417 	append_icode_list(il, ii);
2418 	icode_make_store(curfunc, &x87cw_new, &x87cw_new, il);
2419 	r->used = 0;
2420 }
2421 
2422 /* Load CW from memory */
2423 static void
2424 load_x87cw(struct vreg *which, struct icode_list *il) {
2425 	icode_make_x86_load_x87cw(which, il);
2426 }
2427 
2428 
2429 
2430 #define AMD64_OR_X86_REG(idx) \
2431 	(backend->arch == ARCH_AMD64? &amd64_x86_gprs[idx]: &x86_gprs[idx])
2432 
2433 static void
2434 change_preg_size(
2435 	struct vreg *vr,
2436 	struct icode_list *il,
2437 	struct type *to,
2438 	struct type *from) {
2439 
2440 	int			i;
2441 	struct reg		*extreg = NULL;
2442 	struct icode_instr	*ii;
2443 	size_t			from_size;
2444 	int			amd64_reg = 0;
2445 
2446 	from_size = backend->get_sizeof_type(from, NULL);
2447 
2448 	for (i = 0; i < N_GPRS; ++i) {
2449 		if (is_member_of_reg(AMD64_OR_X86_REG(i), vr->pregs[0])) {
2450 			break;
2451 		}
2452 	}
2453 	if (i == N_GPRS) {
2454 		if (backend->arch == ARCH_AMD64) {
2455 			amd64_reg = 1;
2456 		} else {
2457 			printf("FATAL ERROR: %s is not member of any gpr\n",
2458 				vr->pregs[0]->name);
2459 			abort();
2460 		}
2461 	}
2462 
2463 
2464 	if (vr->size > from_size
2465 		&& (!IS_LLONG(to->code) || from_size != 4)) {
2466 		/*
2467 		 * A sub register is extended to a bigger register
2468 		 */
2469 		vr->pregs[0]->used = 0;
2470 		if (i < N_GPRS && reg_unused(AMD64_OR_X86_REG(i))) {
2471 			/* Use parent reg, e.g. movsx ax, al */
2472 			if (backend->arch == ARCH_AMD64
2473 				&& vr->size == 8) {
2474 				;
2475 			} else {
2476 				extreg = vr->size == 4 || vr->size == 8?
2477 				&x86_gprs[i]: x86_gprs[i].composed_of[0];
2478 			}
2479 		} else {
2480 			/* Use unrelated reg */
2481 			size_t	size;
2482 
2483 			if (vr->size == 8 && backend->arch != ARCH_AMD64) {
2484 				size = 4;
2485 			} else {
2486 				size = vr->size;
2487 			}
2488 			if (from_size == 1) {
2489 				extreg = backend->alloc_16_or_32bit_noesiedi
2490 					(curfunc, size, il, NULL);
2491 			} else {
2492 				extreg = ALLOC_GPR(curfunc, size, il, NULL);
2493 			}
2494 		}
2495 	}
2496 
2497 	if (vr->size == 2) {
2498 		if (from_size == 1) {
2499 			free_preg(vr->pregs[0], il, 1, 0);
2500 			icode_make_copyreg(extreg, vr->pregs[0], to, from, il);
2501 			vreg_map_preg(vr, extreg);
2502 		} else if (from_size == 4) {
2503 			/* 4 - truncate */
2504 			free_preg(vr->pregs[0], il, 1, 0);
2505 			vreg_map_preg(vr, vr->pregs[0]->composed_of[0]);
2506 		} else if (from_size == 8) {
2507 			/* long long or long on amd64 */
2508 			if (backend->arch == ARCH_X86) {
2509 				free_preg(vr->pregs[0], il, 1, 0);
2510 				free_preg(vr->pregs[1], il, 1, 0);
2511 				vreg_map_preg(vr, vr->pregs[0]->composed_of[0]);
2512 			} else {
2513 				free_preg(vr->pregs[0], il, 1, 0);
2514 				vreg_map_preg(vr,
2515 					vr->pregs[0]
2516 						->composed_of[0]
2517 						->composed_of[0]);
2518 			}
2519 		}
2520 	} else if (vr->size == 4) {
2521 		if (from_size == 8) {
2522 			/*
2523 			 * long long! Truncate - low-order 32bits are in
2524 			 * first preg, on x86
2525 			 */
2526 			if (backend->arch == ARCH_X86) {
2527 				free_preg(vr->pregs[1], il, 1, 0);
2528 				vreg_map_preg(vr, vr->pregs[0]);
2529 			} else {
2530 				free_preg(vr->pregs[0], il, 1, 0);
2531 				vreg_map_preg(vr, vr->pregs[0]->
2532 					composed_of[0]);
2533 			}
2534 		} else {
2535 			/* extend */
2536 			/*
2537 			 * Is this sub register the only used one? If not,
2538 			 * the other one must be saved
2539 			 */
2540 			icode_make_copyreg(extreg, vr->pregs[0], to, from, il);
2541 			free_preg(vr->pregs[0], il, 1, 0);
2542 			vreg_map_preg(vr, extreg);
2543 		}
2544 	} else if (vr->size == 8) {
2545 		/* long long! */
2546 		if (backend->arch == ARCH_AMD64) {
2547 			if (extreg == NULL) {
2548 				extreg = ALLOC_GPR(curfunc, 0, il, NULL);
2549 			}
2550 			free_preg(vr->pregs[0], il, 1, 0);
2551 			icode_make_copyreg(extreg, vr->pregs[0], to, from, il);
2552 			vreg_map_preg(vr, extreg);
2553 			return;
2554 		}
2555 
2556 #if 0
2557 		if (to->code == TY_ULLONG) {
2558 			if (extreg != NULL) {
2559 				icode_make_copyreg(extreg, vr->pregs[0],
2560 					to, from, il);
2561 				vreg_map_preg(vr, extreg);
2562 			} else {
2563 				/*
2564 				 * dword being converted to long long -
2565 				 * keep mapping
2566 				 */
2567 				vreg_map_preg(vr, vr->pregs[0]);
2568 			}
2569 			reg_set_unallocatable(vr->pregs[0]);
2570 			r = ALLOC_GPR(curfunc, 4, il, NULL);
2571 			reg_set_allocatable(vr->pregs[0]);
2572 			vreg_map_preg2(vr, r);
2573 			ii = icode_make_setreg(r, 0);
2574 			append_icode_list(il, ii);
2575 		} else {
2576 #endif
2577 			/* signed long long */
2578 			if (vr->pregs[0] != &x86_gprs[0]) {
2579 				if (!reg_unused(&x86_gprs[0])) {
2580 					free_preg(&x86_gprs[0], il, 1, 1);
2581 				}
2582 
2583 				/* Source may be associated with a variable?! */
2584 				free_preg(vr->pregs[0], il, 1, 0);
2585 				icode_make_copyreg(&x86_gprs[0], vr->pregs[0],
2586 					from, from, il);
2587 			}
2588 			if (!reg_unused(&x86_gprs[3])) {
2589 				free_preg(&x86_gprs[3], il, 1, 1);
2590 			}
2591 			if (from->sign == TOK_KEY_SIGNED) {
2592 				icode_make_x86_cdq(il);
2593 			} else {
2594 				ii = icode_make_setreg(&x86_gprs[3], 0);
2595 				append_icode_list(il, ii);
2596 			}
2597 			vreg_map_preg(vr, &x86_gprs[0]);
2598 			vreg_map_preg2(vr, &x86_gprs[3]);
2599 #if 0
2600 		}
2601 #endif
2602 	} else {
2603 		/* Must be 1 - truncate */
2604 		struct reg	*r;
2605 
2606 
2607 		if (backend->arch == ARCH_AMD64
2608 			&& from_size == 8) {
2609 			free_preg(vr->pregs[0], il, 1, 0);
2610 			vreg_map_preg(vr, vr->pregs[0]->
2611 				composed_of[0]-> /* 32bit */
2612 				composed_of[0]-> /* 16bit */
2613 				composed_of[amd64_reg? 0: 1]);
2614 			return;
2615 		}
2616 
2617 		/*
2618 		 * 08/18/08: This was missing the check for amd64_reg, so it
2619 		 * would fail for i = 6
2620 		 */
2621 		if (i >= 4 && !amd64_reg) {
2622 			/*
2623 			 * Whoops - source resides in esi/edi, which
2624 			 * do not have 8bit sub registers
2625 			 */
2626 			free_preg(&x86_gprs[i], il, 1, 0);
2627 			/*
2628 			 * 06/20/08: This used to pass the source size, i.e.
2629 			 * possibly 8 for long long! This was wrong because
2630 			 * it ended up setting multi-reg state in the backend
2631 			 * and expecting a second alloc_*() for the second
2632 			 * dword. This is wrong because we only want a single
2633 			 * 32bit part register
2634 			 */
2635 			r = alloc_16_or_32bit_noesiedi(curfunc,
2636 				/*from_size*/4, il, NULL);
2637 			icode_make_copyreg(r, &x86_gprs[i], from, from, il);
2638 			free_preg(r, il, 1, 0);
2639 		} else if (!amd64_reg) {
2640 			r = &x86_gprs[i];
2641 			free_preg(r, il, 1, 0);
2642 		} else {
2643 			/*
2644 			 * 08/18/08: This was missing?!!?!?!??
2645 			 */
2646 			r = vr->pregs[0];
2647 		}
2648 #if 0
2649 		free_pregs_vreg(vr, il, 1, 0);
2650 #endif
2651 
2652 		/*
2653 		 * 09/30/07: Wow, this unconditionally assumed that
2654 		 * r is a 32bit register! That broke short-to-char
2655 		 * conversion, but apparently only in some cases
2656 		 *
2657 		 * XXX we have to use r->size instead of from_size
2658 		 * here... otherwise e.g. on AMD64 a
2659 		 *
2660 		 *     *charp++ = *ushortp;
2661 		 *
2662 		 * ... assignment gives the short source value in
2663 		 * eax, which may be a conversion/promotion issue
2664 		 */
2665 		if (r->size >= 4) {
2666 			if (r->size == 8 && from_size == 8) {
2667 				/* AMD64 */
2668 				vreg_map_preg(vr,
2669 					r->
2670 					composed_of[0]->
2671 					composed_of[0]->
2672 					/* was missing amd64_reg case */
2673 					composed_of[amd64_reg? 0: 1]);
2674 			} else {
2675 				vreg_map_preg(vr,
2676 					r->
2677 					composed_of[0]->
2678 					/* was missing amd64_reg case */
2679 					composed_of[amd64_reg? 0: 1]);
2680 			}
2681 		} else {
2682 			/* was missing amd64_reg case */
2683 			vreg_map_preg(vr,
2684 				r->composed_of[amd64_reg? 0: 1]);
2685 		}
2686 	}
2687 }
2688 
2689 
2690 static int
2691 convert_amd64_fp(
2692 	struct type *to,
2693  	struct type *from,
2694 	struct vreg *ret,
2695 	struct icode_list *il) {
2696 
2697 	struct vreg	*fbvr;
2698 	int		rc = 0;
2699 
2700 	if (from->code == to->code) {
2701 		/*
2702 		 * 07/29/08: This didn't return the ``is long double''
2703 		 * indicator, so the x87 register was not freed and the
2704 		 * register stack filled up
2705 		 *
2706 		 * XXX This raises the question of why can a conversion
2707 		 * of type T to itself (no-op) get this far and doesn't
2708 		 * cause a very early return in icode_make_cast()?
2709 		 */
2710 		if (to->code == TY_LDOUBLE) {
2711 			return 1;
2712 		}
2713 		return 0;
2714 	}
2715 
2716 	if (from->code == TY_LDOUBLE) {
2717 		/*
2718 		 * long double, resident in an x87 register, to
2719 		 * float or double
2720 		 */
2721 
2722 
2723 #if ! REMOVE_FLOATBUF
2724 		fbvr = n_xmemdup(&floatbuf, sizeof floatbuf);
2725 #else
2726 		fbvr = vreg_alloc(NULL,NULL,NULL,NULL);
2727 #endif
2728 
2729 		if (to->code == TY_DOUBLE) {
2730 			fbvr->size = 8;
2731 			fbvr->type = make_basic_type(TY_DOUBLE);
2732 		} else {
2733 			/* float */
2734 			fbvr->size = 4;
2735 			fbvr->type = make_basic_type(TY_FLOAT);
2736 		}
2737 		vreg_map_preg(fbvr, ret->pregs[0]);
2738 #if ! REMOVE_FLOATBUF
2739 		icode_make_store(curfunc, fbvr, fbvr, il);
2740 		free_preg(fbvr->pregs[0], il, 1, 0);
2741 #else
2742 		free_preg(fbvr->pregs[0], il, 1, 1);
2743 #endif
2744 
2745 		/* Now into SSE register */
2746 		vreg_faultin(NULL, NULL, fbvr, il, 0);
2747 		vreg_map_preg(ret, fbvr->pregs[0]);
2748 	} else if (to->code == TY_LDOUBLE) {
2749 		/*
2750 		 * float or double, resident in an SSE register, to
2751 		 * long double
2752 		 */
2753 		struct reg	*r;
2754 		struct vreg	*tmp;
2755 
2756 #if 0
2757 		r = backend->alloc_fpr(curfunc, 12, il, NULL);
2758 #endif
2759 		r = &x86_fprs[0];
2760 
2761 		/*
2762 		 * 04/12/08: Fixed this
2763 		 */
2764 		tmp = dup_vreg(ret);
2765 		tmp->type = from;
2766 		tmp->size = backend->get_sizeof_type(from, NULL);
2767 		vreg_map_preg(tmp, ret->pregs[0]);
2768 
2769 		free_preg(/*ret->pregs[0]*/tmp->pregs[0], il, 1, 1); /* causes store */
2770 		/*
2771 		 * XXX hmm another temp adhoc vars :(
2772 		 * There are lots of problems because we always
2773 		 * work with ``ret'' which already has the target
2774 		 * type set. We should use the source vreg more,
2775 		 * which ahs the correct ype for loading
2776 		 */
2777 		tmp = dup_vreg(tmp /*, sizeof *ret*/);
2778 		tmp->type = from;
2779 		tmp->size = backend->get_sizeof_type(from, NULL);
2780 
2781 		vreg_faultin_x87(r, NULL, tmp, il, 0);
2782 		vreg_map_preg(ret, r);
2783 		rc = 1;
2784 	} else if (to->code == TY_DOUBLE) {
2785 		icode_make_amd64_cvtss2sd(ret->pregs[0], il);
2786 	} else { /* double to float */
2787 		icode_make_amd64_cvtsd2ss(ret->pregs[0], il);
2788 	}
2789 	return rc;
2790 }
2791 
2792 /*
2793  * Most of the time, instructions give meaning to data. This function
2794  * generates code required to convert virtual register ``src'' to type
2795  * ``to'' where necessary
2796  */
2797 static struct vreg *
2798 icode_make_cast(struct vreg *src, struct type *to, struct icode_list *il) {
2799 	struct reg		*r;
2800 	struct reg		*r2;
2801 	struct vreg		*ret;
2802 	struct type		*from = src->type;
2803 	struct type		*orig_to = to;
2804 	size_t			size;
2805 	int			res_is_x87_reg = 0;
2806 
2807 	ret = src;
2808 	if (ret->pregs[0] != NULL
2809 		&& ret->pregs[0]->vreg == ret) {
2810 		/* Item is already resident in a register */
2811 		r = NULL;
2812 	} else {
2813 		/*
2814 		 * Item is not resident yet so we get to choose
2815 		 * a suitable register
2816 		 */
2817 #if 0
2818 		if (IS_FLOATING(to->code)) {
2819 			r = backend->alloc_fpr(curfunc, 0, il, NULL);
2820 		} else {
2821 			size = backend->get_sizeof_type(to, NULL);
2822 			r = backend->alloc_gpr(curfunc, size, il, NULL);
2823 		}
2824 #endif
2825 		r = 0;
2826 	}
2827 
2828 	if (is_x87_trash(ret)) {
2829 		ret = x87_anonymify(ret, il);
2830 		if (ret == src) {
2831 			ret = n_xmemdup(ret, sizeof *ret);
2832 		}
2833 	} else {
2834 		if (ret->type->tlist != NULL
2835 			|| (ret->type->code != TY_STRUCT
2836 			&& ret->type->code != TY_UNION)) {
2837 			vreg_anonymify(&ret, NULL, NULL /*r*/, il);
2838 		}
2839 
2840 		if (ret == src) {
2841 			/* XXX anonymify is broken */
2842 			ret = vreg_disconnect(src);
2843 		}
2844 	}
2845 
2846 	ret->type = to;
2847 
2848 	if (to->code == TY_VOID) {
2849 		if (to->tlist == NULL) {
2850 			ret->size = 0;
2851 			free_pregs_vreg(ret, il, 0, 0);
2852 			return ret;
2853 		}
2854 	} else {
2855 		ret->is_nullptr_const = 0;
2856 	}
2857 
2858 	ret->size = backend->get_sizeof_type(to, NULL);
2859 
2860 	if (from->tlist != NULL && to->tlist != NULL) {
2861 		/*
2862 		 * Pointers are always of same size
2863 		 * and use same registers
2864 		 */
2865 		return ret;
2866  	} else if (to->tlist != NULL) {
2867 		/*
2868 		 * Integral type to pointer type - cast to
2869 		 * uintptr_t to get it to the same size
2870 		 */
2871 		to = backend->get_uintptr_t();
2872 	}
2873 
2874 
2875 	/*
2876 	 * We may have to move the item to a different
2877 	 * register as a result of the conversion
2878 	 */
2879 	if (is_floating_type(to)) {
2880 		if (!is_floating_type(from)) {
2881 			int	from_size;
2882 
2883 			from_size = backend->get_sizeof_type(from, NULL);
2884 			/*
2885 			 * 04/17/08: Convert to 64bit integer, so that
2886 			 * 64bit fildq is used instead of 32bit fild!
2887 			 * This is necessary for large (unsigned) 32bit
2888 			 * values that are otherwise not converted
2889 			 * properly
2890 			 */
2891 			if (from_size < 8) {
2892 				/* Need to sign-extend first*/
2893 				struct vreg	*tmp =
2894 					n_xmemdup(ret, sizeof *ret);
2895 				tmp->size = 8;
2896 				change_preg_size(tmp, il, /*to*/
2897 					make_basic_type(TY_LLONG), from);
2898 				ret = n_xmemdup(ret, sizeof *ret);
2899 				vreg_map_preg(ret, tmp->pregs[0]);
2900 				if (backend->arch == ARCH_X86) {
2901 					vreg_map_preg2(ret, tmp->pregs[1]);
2902 				}
2903 				ret->type = make_basic_type(TY_LLONG);
2904 				ret->size = 8;
2905 
2906 				/*
2907 				 * 07/24/08: This wrongly set the multi-reg
2908 				 * flag for AMD64 as well
2909 				 */
2910 				if (backend->arch != ARCH_AMD64) {
2911 					ret->is_multi_reg_obj = 2;
2912 				}
2913 				from = ret->type;
2914 			}
2915 
2916 			/*
2917 			 * 08/04/08: Don't perform x86-like u-integer to long
2918 			 * double conversion for 64bit integers on AMD64
2919 			 * anymore
2920 			 */
2921 			if (backend->arch == ARCH_X86
2922 				&& from->code == TY_ULLONG) {
2923 				/*
2924 				 * 08/05/09: Request 4 bytes instead of 8.
2925 				 * 8 byte requests are always treated as
2926 				 * multi-register requests, but we only
2927 				 * want to allocate a single register
2928 				 * (since we already have ret->pregs[0]).
2929 				 * So the next ALLOC_GPR() - which may be
2930 				 * for a 16bit or 8bit item - would
2931 				 * wrongly return a 32bit GPR
2932 				 */
2933 				struct reg	*temp =
2934 					ALLOC_GPR(curfunc, /*8*/4, il, NULL);
2935 				struct vreg	*tempfb;
2936 
2937 				r = backend->alloc_fpr(curfunc,
2938 					0, il, NULL);
2939 
2940 				tempfb = dup_vreg(ret);
2941 				vreg_map_preg(tempfb, ret->pregs[0]);
2942 				vreg_map_preg2(tempfb, ret->pregs[1]);
2943 				vreg_set_new_type(tempfb, from);
2944 				free_preg(ret->pregs[0], il, 1, 1);
2945 				icode_make_x86_fild(r, tempfb, il);
2946 
2947 				icode_make_amd64_ulong_to_float(
2948 					ret->pregs[1], /* pass upper dword as source reg */
2949 					temp,
2950 					r,
2951 					to->code, /* is float */
2952 					il);
2953 				free_preg(temp, il, 1, 0);
2954 				free_preg(ret->pregs[0], il, 1, 0);
2955 				vreg_map_preg(ret, r);
2956 				res_is_x87_reg = 1;
2957 			} else if (backend->arch == ARCH_X86
2958 				|| (to->code == TY_LDOUBLE
2959 					&& (ret->pregs[0]->size <= 4
2960 					|| from->sign != TOK_KEY_UNSIGNED))) {
2961 				/* x87 kludgery */
2962 #if ! REMOVE_FLOATBUF
2963 				load_floatbuf(ret, il);
2964 				free_preg(ret->pregs[0], il, 1, 0);
2965 				if (ret->is_multi_reg_obj) {
2966 					free_preg(ret->pregs[1], il, 1, 0);
2967 				}
2968 #else
2969 				struct vreg	*tempfb =
2970 					load_integer_floatbuf(ret,from,il);
2971 
2972 				free_preg(tempfb->pregs[0], il, 1, 0);
2973 				if (tempfb->is_multi_reg_obj) {
2974 					free_preg(tempfb->pregs[1], il, 1, 0);
2975 				}
2976 
2977 tempfb->is_multi_reg_obj = 0;
2978 #endif
2979 #if 0
2980 				r = backend->alloc_fpr(curfunc, 0, il, NULL);
2981 #endif
2982 				r = &x86_fprs[0];
2983 #if ! REMOVE_FLOATBUF
2984 				floatbuf.pregs[0] = NULL;
2985 				vreg_faultin_x87(r, NULL, &floatbuf, il, 0);
2986 				free_preg(ret->pregs[0], il, 1, 0);
2987 #else
2988 				tempfb->pregs[0] = NULL;
2989 /*				vreg_faultin_x87(r, NULL, tempfb, il, 0);*/
2990 		tempfb = dup_vreg(tempfb);
2991 		vreg_set_new_type(tempfb, from);
2992 				icode_make_x86_fild(r, tempfb, il);
2993 
2994 #endif
2995 ret = dup_vreg(ret);
2996 				vreg_map_preg(ret, r);
2997 				ret->size = backend->get_sizeof_type(to, NULL);
2998 				res_is_x87_reg = 1;
2999 ret->stack_addr = NULL;
3000 			} else {
3001 				/*
3002 				 * SSE (AMD64) integer to floating point
3003 				 * conversion
3004 				 */
3005 				if (ret->pregs[0]->size > 4) {
3006 					/*
3007 					 * 64bit int to fp conversion.
3008 					 *
3009 					 * 04/11/08: Use qword SSE
3010 					 * instructions instead of the
3011 					 * utter x87 nonsense. There was
3012 					 * a comment here that said 64bit
3013 					 * conv instructions don't exist,
3014 					 * maybe they were overlooked?
3015 					 */
3016 					if (to->code == TY_LDOUBLE) {
3017 						struct reg	*temp =
3018 							ALLOC_GPR(curfunc, 8, il, NULL);
3019 						struct vreg *tempfb;
3020 
3021 						/*
3022 						 * Note that we can only get
3023 						 * here for unsigned 64bit
3024 						 * integers
3025 						 */
3026 						r = backend->alloc_fpr(curfunc,
3027 							16, il, NULL);
3028 						/*
3029 						 * 08/02/08: Unsigned long to
3030 						 * float is a bit more
3031 						 * complicated than we made it
3032 						 * out to be
3033 						 */
3034 /*						free_preg(ret->pregs[0], il, 1, 1);*/
3035 						tempfb = dup_vreg(ret);
3036 						vreg_map_preg(tempfb, ret->pregs[0]);
3037 						vreg_set_new_type(tempfb, from);
3038 						free_preg(ret->pregs[0], il, 1, 1);
3039 						icode_make_x86_fild(r, tempfb, il);
3040 
3041 						icode_make_amd64_ulong_to_float(
3042 							ret->pregs[0],
3043 							temp,
3044 							r,
3045 							TY_LDOUBLE, /* is double */
3046 							il);
3047 						free_preg(temp, il, 1, 0);
3048 						vreg_map_preg(ret, r);
3049 						ret->size = backend->get_sizeof_type(to, NULL);
3050 						res_is_x87_reg = 1;
3051 					} else if (to->code == TY_DOUBLE) {
3052 						r = backend->alloc_fpr(curfunc,
3053 							8, il, NULL);
3054 						if (from->sign == TOK_KEY_UNSIGNED) {
3055 							/*
3056 							 * 08/02/08: Unsigned long to
3057 							 * float is a bit more
3058 							 * complicated than we made it
3059 							 * out to be
3060 							 */
3061 							struct reg	*temp =
3062 								ALLOC_GPR(curfunc, 8, il, NULL);
3063 							icode_make_amd64_ulong_to_float(
3064 								ret->pregs[0],
3065 								temp,
3066 								r,
3067 								TY_DOUBLE, /* is double */
3068 								il);
3069 							free_preg(temp, il, 1, 0);
3070 						} else {
3071 							icode_make_amd64_cvtsi2sdq(
3072 								r, ret, il);
3073 						}
3074 					} else {
3075 						/* Has to be float */
3076 						/*
3077 						 * 08/02/08: Unsigned long to
3078 						 * float is a bit more
3079 						 * complicated than we made it
3080 						 * out to be
3081 						 */
3082 						r = backend->alloc_fpr(curfunc,
3083 							4, il, NULL);
3084 
3085 
3086 						if (from->sign == TOK_KEY_UNSIGNED) {
3087 							struct reg	*temp =
3088 								ALLOC_GPR(curfunc, 8, il, NULL);
3089 							icode_make_amd64_ulong_to_float(
3090 								ret->pregs[0],
3091 								temp,
3092 								r,
3093 								TY_FLOAT, /* is float */
3094 								il);
3095 							free_preg(temp, il, 1, 0);
3096 						} else {
3097 							icode_make_amd64_cvtsi2ssq(
3098 								r, ret, il);
3099 						}
3100 					}
3101 					free_preg(ret->pregs[0], il, 1, 0);
3102 					vreg_map_preg(ret, r);
3103 				} else {
3104 					if (to->code == TY_DOUBLE) {
3105 						r = backend->alloc_fpr(curfunc,
3106 							backend->get_sizeof_type
3107 							(to, NULL), il, NULL);
3108 						icode_make_amd64_cvtsi2sd(
3109 							r, ret, il);
3110 						res_is_x87_reg = 1;
3111 					} else {
3112 						/* Has to be float */
3113 						r = backend->alloc_fpr(curfunc,
3114 							ret->size, il, 0);
3115 						icode_make_amd64_cvtsi2ss(
3116 							r, ret, il);
3117 					}
3118 					free_preg(ret->pregs[0], il, 1, 0);
3119 					vreg_map_preg(ret, r);
3120 				}
3121 			}
3122 		} else if (backend->arch == ARCH_AMD64
3123 			|| sysflag == OS_OSX) {
3124 			/*
3125 			 * On AMD64, the item may be in an x87 or
3126 			 * SSE register, and has to be moved into
3127 			 * SSE or x87, respectively
3128 			 */
3129 			if (is_x87_trash(src)) {
3130 				vreg_faultin_x87(NULL, NULL, src, il, 0);
3131 				vreg_map_preg(ret, src->pregs[0]);
3132 #if 0
3133 				free_preg(vrtmp->pregs[0], il, 1, 1);
3134 #endif
3135 			}
3136 			res_is_x87_reg = convert_amd64_fp(to, from, ret, il);
3137 		} else {
3138 			/*
3139 			 * x87 to x87... this is not a no-op anymore! Because:
3140 			 * the source fp value is stored on the stack, so we
3141 			 * have to load it to a register and create a new
3142 			 * stack buffer of different size to store it
3143 			 * (remember we never want to keep stuff in x87 regs)
3144 			 */
3145 			struct vreg	*vrtmp;
3146 
3147 			vreg_faultin_x87(NULL, NULL, src, il, 0);
3148 			vrtmp = vreg_alloc(NULL,NULL,NULL,NULL);
3149 			vrtmp->type = to;
3150 			vrtmp->size = backend->get_sizeof_type(to, NULL);
3151 			vreg_map_preg(vrtmp, src->pregs[0]);
3152 			free_preg(vrtmp->pregs[0], il, 1, 1);
3153 			ret = vrtmp;
3154 		}
3155 	} else if (is_floating_type(from)) {
3156 		if (!is_floating_type(to)) {
3157 			if ((backend->arch == ARCH_X86 && sysflag != OS_OSX)
3158 				|| from->code == TY_LDOUBLE) {
3159 				/*
3160 			 	 * We have to change the status control word,
3161 			 	 * perform the conversion by writing the value
3162 			 	 * to the float buffer, then save it in a GPR,
3163 			 	 * then reset the CW
3164 				  */
3165 #if REMOVE_FLOATBUF
3166 				struct vreg		*tempfb;
3167 				struct stack_block	*sb;
3168 #endif
3169 				store_x87cw(il);
3170 				modify_x87cw(il);
3171 				load_x87cw(&x87cw_new, il);
3172 				size = backend->get_sizeof_type(to, NULL);
3173 
3174 				vreg_faultin_x87(NULL, NULL, src, il, 0);
3175 				vreg_map_preg(ret, src->pregs[0]);
3176 				src->pregs[0] = NULL;
3177 #if ! REMOVE_FLOATBUF
3178 				load_floatbuf(ret, il);
3179 				free_preg(floatbuf.pregs[0], il, 1, 0);
3180 				if (ret->is_multi_reg_obj) {
3181 					free_preg(floatbuf.pregs[1], il, 1, 0);
3182 				}
3183 #else
3184 			ret  = dup_vreg(ret);
3185 			vreg_set_new_type(ret, to);
3186 				tempfb = load_floatval_floatbuf(ret, from, il);
3187 #if 0
3188 				free_preg(tempfb->pregs[0], il, 1, 0);
3189 				if (ret->is_multi_reg_obj) {
3190 					free_preg(tempfb->pregs[1], il, 1, 0);
3191 				}
3192 #endif
3193 #endif
3194 
3195 #if 0
3196 				floatbuf.pregs[0] = NULL;
3197 #endif
3198 
3199 				if (size < 4) {
3200 					/*
3201 					 * fistp cannot output shorts or chars -
3202 					 * so get an int and convert it
3203 				 	 */
3204 
3205 					r = alloc_16_or_32bit_noesiedi(curfunc,
3206 						4, il, NULL);
3207 				} else {
3208 					r = ALLOC_GPR(curfunc, size, il, NULL);
3209 				}
3210 
3211 				if (backend->arch == ARCH_X86
3212 					&& IS_LLONG(to->code)
3213 					&& to->tlist == NULL) {
3214 					r2 = ALLOC_GPR(curfunc, size, il, NULL);
3215 				} else {
3216 					r2 = NULL;
3217 				}
3218 #if ! REMOVE_FLOATBUF
3219 				fbvr = n_xmemdup(&floatbuf, sizeof floatbuf);
3220 				if (size > 4 && backend->arch == ARCH_AMD64) {
3221 					fbvr->size = 8;
3222 					fbvr->type = make_basic_type(TY_LONG);
3223 				} else {
3224 					fbvr->size = 4;
3225 					fbvr->type = make_basic_type(TY_INT);
3226 				}
3227 
3228 				vreg_faultin(r, r2, fbvr, il, 0);
3229 #else
3230 				sb = tempfb->stack_addr;
3231 
3232 				tempfb = vreg_alloc(NULL,NULL,NULL,NULL);
3233 				if (size > 4 && backend->arch == ARCH_AMD64) {
3234 					tempfb->size = 8;
3235 					tempfb->type = make_basic_type(TY_LONG);
3236 				} else if (size == 8) {
3237 					/*
3238 					 * 06/04/08: This was missing - why? It
3239 					 * broke double to long long conversion
3240 					 * since the long was treated as two
3241 					 * individual ints instead of one llong
3242 					 */
3243 					tempfb->type = make_basic_type(TY_LLONG);
3244 					tempfb->size = 8;
3245 					tempfb->is_multi_reg_obj = 2;
3246 				} else {
3247 					tempfb->size = 4;
3248 					tempfb->type = make_basic_type(TY_INT);
3249 				}
3250 				tempfb->stack_addr = sb;
3251 
3252 				vreg_faultin(r, r2, tempfb, il, 0);
3253 #endif
3254 
3255 				if (size < 4) {
3256 					if (size == 1) {
3257 						free_preg(r, il, 0, 0);
3258 						r = r->composed_of[0]
3259 							->composed_of[/*0*/1];
3260 					} else {
3261 						/* 2 */
3262 						free_preg(r, il, 0, 0);
3263 						r = r->composed_of[0];
3264 					}
3265 				}
3266 				vreg_map_preg(ret, r);
3267 				if (r2 != NULL) {
3268 					vreg_map_preg2(ret, r2);
3269 				}
3270 				load_x87cw(&x87cw_old, il);
3271 			} else {
3272 				/*
3273 				 * SSE (AMD64) floating point to integer
3274 				 * conversion
3275 				 */
3276 				int	siz;
3277 				int	to_quad = 0;
3278 				int	is_64bit = 0;
3279 
3280 
3281 				/*
3282 				 * 08/01/08: When converting to unsigned
3283 				 * 32bit integers, we first have to convert
3284 				 * to a 64bit integer, then chop off the
3285 				 * desired part!
3286 				 */
3287 				if (backend->arch == ARCH_X86) {
3288 					/*
3289 					 * 02/15/09: SSE on x86 (for OSX) cannot use
3290 					 * 64bit GPRs, so for now we just always use
3291 					 * 32bit results
3292 					 */
3293 					is_64bit = 0;
3294 					to_quad = 0;
3295 				} else {
3296 					if (!IS_LONG(to->code) && !IS_LLONG(to->code)) {
3297 						is_64bit = 0;
3298 						if (to->sign == TOK_KEY_UNSIGNED) {
3299 							to_quad = 1;
3300 						}
3301 					} else {
3302 						is_64bit = 1;
3303 						to_quad = 1;
3304 					}
3305 				}
3306 
3307 				r = ALLOC_GPR(curfunc, to_quad? 8: 4, il, NULL);
3308 				if (from->code == TY_DOUBLE) {
3309 					if (to_quad) {
3310 						icode_make_amd64_cvttsd2siq(
3311 							r, ret->pregs[0], il);
3312 					} else {
3313 						icode_make_amd64_cvttsd2si(
3314 							r, ret->pregs[0], il);
3315 					}
3316 				} else {
3317 					if (to_quad) {
3318 						icode_make_amd64_cvttss2siq(
3319 							r, ret->pregs[0], il);
3320 					} else {
3321 						icode_make_amd64_cvttss2si(
3322 							r, ret->pregs[0], il);
3323 					}
3324 				}
3325 				siz = backend->get_sizeof_type(to, NULL);
3326 				/*
3327 				 * 08/01/08: < 4 instead of == 4
3328 				 */
3329 				if (siz < 4 || (to_quad && !is_64bit)) {
3330 					struct reg	*r2;
3331 
3332 					r2 = ALLOC_GPR(curfunc, siz, il, NULL);
3333 					icode_make_copyreg(r2, r, to,
3334 						to->sign !=
3335 							TOK_KEY_UNSIGNED?
3336 							make_basic_type(TY_INT)
3337 							: make_basic_type(TY_UINT),
3338 						il);
3339 					free_preg(r, il, 0, 0);
3340 					r = r2;
3341 				}
3342 
3343 				vreg_map_preg(ret, r);
3344 				if (backend->arch == ARCH_X86 && IS_LLONG(to->code)) {
3345 					/*
3346 					 * The result is 32bit, so sign- or zero-extend
3347 					 * it if we are converting to long long
3348 					 */
3349 					change_preg_size(ret, il, to, make_basic_type(TY_INT));
3350 				}
3351 			}
3352 		} else if (backend->arch == ARCH_AMD64) {
3353 			/*
3354 			 * x87 vs SSE maybe?
3355 			 */
3356 			res_is_x87_reg = convert_amd64_fp(to, from, ret, il);
3357 		}
3358 	} else if (ret->pregs[0]->size != ret->size && to->code != from->code) {
3359 		/*
3360 		 * XXX change_preg_size() was being called for ``long long''
3361 		 * versus ``unsigned long long'' because the preg size check
3362 		 * above yields 4 for those types!
3363 		 * Thus only call the function if one or both types are not
3364 		 * llong
3365 		 */
3366 		if ( (!IS_LLONG(from->code) || from->tlist != NULL)
3367 			|| (!IS_LLONG(to->code) || to->tlist != NULL)  ) {
3368 			change_preg_size(ret, il, to, from);
3369 		}
3370 	}
3371 
3372 	to = orig_to; /* because of uintptr_t stuff */
3373 	ret->type = to;
3374 	ret->size = backend->get_sizeof_type(to, NULL);
3375 
3376 	if (res_is_x87_reg) {
3377 		/*
3378 		 * Save to stack so that the god awful  x87 regs are
3379 		 * all free
3380 		 */
3381 		ret->is_multi_reg_obj = 0;
3382 		vreg_map_preg(ret, ret->pregs[0]);
3383 		free_preg(ret->pregs[0], il, 1, 1);
3384 		ret->pregs[0] = NULL;
3385 	} else if (ret->pregs[0] != NULL) {
3386 		/*
3387 		 * The non-null check is to avoid mapping to a null
3388 		 * pointer register, which can happen if source and
3389 		 * target type are x87 fp types, such that no
3390 		 * conversion is actually performed and no register
3391 		 * is ever loaded
3392 		 */
3393 		vreg_map_preg(ret, ret->pregs[0]);
3394 	}
3395 
3396 	/* Update multi-register information */
3397 	if (backend->arch == ARCH_X86
3398 		 && IS_LLONG(to->code)
3399 		 && to->tlist == NULL) {
3400 		ret->is_multi_reg_obj = 2;
3401 		vreg_map_preg2(ret, ret->pregs[1]);
3402 	} else {
3403 		ret->is_multi_reg_obj = 0;
3404 		ret->pregs[1] = NULL;
3405 	}
3406 	if (ret->type->code == TY_BOOL && ret->type->tlist == NULL) {
3407 		boolify_result(ret, il);
3408 	}
3409 
3410 	return ret;
3411 }
3412 
3413 static void
3414 icode_initialize_pic(struct function *f, struct icode_list *il) {
3415 	/*
3416 	 * We only have to do the first initialization, because ebx is
3417 	 * callee-save, and so even after function calls it remains
3418 	 * loaded with the GOT address
3419 	 */
3420 	if (!f->pic_initialized) {
3421 		free_preg(&x86_gprs[1], il, 1, 1);
3422 		reg_set_unallocatable(&x86_gprs[1]);
3423 		f->callee_save_used |= CSAVE_EBX;
3424 		icode_make_initialize_pic(f, il);
3425 	}
3426 }
3427 
3428 static void
3429 icode_complete_func(struct function *f, struct icode_list *il) {
3430 	(void) il;
3431 
3432 	if (f->pic_initialized) {
3433 		/* PIC register ebx was used - free it again */
3434 		reg_set_allocatable(&x86_gprs[1]);
3435 		x86_gprs[1].used = 0;
3436 	}
3437 }
3438 
3439 static void
3440 do_print_gpr(struct reg *r) {
3441 	printf("%s=%d(%d) ", r->name, r->used, reg_allocatable(r));
3442 	if (r->vreg && r->vreg->pregs[0] == r) {
3443 		printf("<-> %p", r->vreg);
3444 	}
3445 }
3446 
3447 static void
3448 debug_print_gprs(void) {
3449 	int	i;
3450 
3451 	for (i = 0; i < 6; ++i) {
3452 		printf("\t\t");
3453 		do_print_gpr(&x86_gprs[i]);
3454 		putchar('\t');
3455 		do_print_gpr(x86_gprs[i].composed_of[0]);
3456 		if (i < 4) {
3457 			putchar('\t');
3458 			do_print_gpr(x86_gprs[i].composed_of[0]->
3459 				composed_of[0]);
3460 			putchar('\t');
3461 			do_print_gpr(x86_gprs[i].composed_of[0]->
3462 				composed_of[1]);
3463 		}
3464 		putchar('\n');
3465 	}
3466 }
3467 
3468 static int
3469 is_multi_reg_obj(struct type *t) {
3470 	return (t->tlist == NULL && IS_LLONG(t->code))? 2: 0;
3471 }
3472 
3473 static struct reg *
3474 name_to_reg(const char *name) {
3475 	int		i;
3476 	size_t		len;
3477 
3478 	if (*name == '%') ++name;
3479 
3480 	if (strncmp(name, "st", 2) == 0) {
3481 		/* Floating point registers */
3482 		if (name[2] == 0) {
3483 			/* st = st(0) */
3484 			return &x86_fprs[0];
3485 		} else if (name[2] != '(' || name[4] != ')'
3486 			|| name[5] != 0 || !isdigit((unsigned char)name[3])
3487 			|| name[3] > '7') {
3488 			return NULL;
3489 		} else {
3490 			return &x86_fprs[name[3] - '0'];
3491 		}
3492 	} else if ((len = strlen(name)) == 2) {
3493 		if (name[1] == 'i') {
3494 			if (strcmp(x86_gprs[4].name, name) == 0) {
3495 				return &x86_gprs[4];
3496 			} else if (strcmp(x86_gprs[5].name, name) == 0) {
3497 				return &x86_gprs[5];
3498 			}
3499 		}
3500 		for (i = 0; i < 4; ++i) {
3501 			if (name[1] == 'x') {
3502 				/* Must be 16bit */
3503 				if (strcmp(x86_gprs[i].composed_of[0]->name,
3504 					name) == 0) {
3505 					return x86_gprs[i].composed_of[0];
3506 				}
3507 			} else {
3508 				/* Must be 8bit */
3509 				if (strcmp(x86_gprs[i].composed_of[0]->
3510 					composed_of[0]->name, name) == 0) {
3511 					return x86_gprs[i].composed_of[0]
3512 						->composed_of[0];
3513 				}
3514 				if (strcmp(x86_gprs[i].composed_of[0]->
3515 					composed_of[0]->name, name) == 0) {
3516 					return x86_gprs[i].composed_of[0]
3517 						->composed_of[0];
3518 				}
3519 			}
3520 		}
3521 		if (strcmp(x86_esp.composed_of[0]->name, name) == 0) {
3522 			return x86_esp.composed_of[0];
3523 		}
3524 		if (strcmp(x86_ebp.composed_of[0]->name, name) == 0) {
3525 			return x86_esp.composed_of[0];
3526 		}
3527 	} else if (len == 3) {
3528 		for (i = 0; i < N_GPRS; ++i) {
3529 			if (strcmp(x86_gprs[i].name, name) == 0) {
3530 				return &x86_gprs[i];
3531 			}
3532 		}
3533 		if (strcmp(x86_esp.name, name) == 0) {
3534 			return &x86_esp;
3535 		} else if (strcmp(x86_ebp.name, name) == 0) {
3536 			return &x86_ebp;
3537 		}
3538 		if (backend->arch == ARCH_AMD64) {
3539 			for (i = 0; i < N_GPRS; ++i) {
3540 				if (strcmp(amd64_x86_gprs[i].name, name) == 0) {
3541 					return &amd64_x86_gprs[i];
3542 				}
3543 			}
3544 			for (i = 0; i < 8; ++i) {
3545 				if (strcmp(amd64_gprs[i].name, name) == 0) {
3546 					return &amd64_gprs[i];
3547 				}
3548 			}
3549 		}
3550 	}
3551 	return NULL;
3552 }
3553 
3554 /*
3555  * Get suitably sized register for storing item vr, where ch dictates which
3556  * 32bit register to choose from. For use with inline asm constraints
3557  *
3558  * XXX this does handle the amd64, but not completely
3559  */
3560 static struct reg *
3561 asmvreg_to_reg(
3562 	struct vreg **vr0,
3563 	int ch,
3564 	struct inline_asm_io *io,
3565 	struct icode_list *il,
3566 	int faultin) {
3567 
3568 	struct reg      *r = NULL;
3569 	struct vreg	*vr = *vr0;
3570 	size_t          size = vr->size;
3571 	struct vreg	*newvr;
3572 
3573 	if ((vr->type->code == TY_STRUCT || vr->type->code == TY_UNION)
3574 		&& vr->type->tlist == NULL) {
3575 		errorfl(io->expr->tok,
3576 			"Cannot load struct/union into register");
3577 		return NULL;
3578 	} else if (IS_LLONG(vr->type->code)
3579 		&& vr->type->tlist == NULL
3580 		&& backend->arch == ARCH_X86) {
3581 		errorfl(io->expr->tok,
3582 			"Cannot load long long into register");
3583 		return NULL;
3584 	} else if (vr->type->tlist != NULL) {
3585 		size = backend->arch == ARCH_AMD64? 8: 4;
3586 	}
3587 
3588 	/*
3589 	 * For a/b/c/d/S/D input must be moved to a specific register. For
3590 	 * q more or less as well, and for r to any GPR
3591 	 */
3592 	if (ch == 'b') {
3593 		curfunc->callee_save_used |= CSAVE_EBX;
3594 	} else if (ch == 'S') {
3595 		curfunc->callee_save_used |= CSAVE_ESI;
3596 	} else if (ch == 'D') {
3597 		curfunc->callee_save_used |= CSAVE_EDI;
3598 	}
3599 	switch (ch) {
3600 	case 'a': /* eax */
3601 		if (backend->arch == ARCH_AMD64) r = &amd64_x86_gprs[0];
3602 		else r = &x86_gprs[0];
3603 		break;
3604 	case 'b': /* ebx */
3605 		if (backend->arch == ARCH_AMD64) r = &amd64_x86_gprs[1];
3606 		else r = &x86_gprs[1];
3607 		break;
3608 	case 'c': /* ecx */
3609 		if (backend->arch == ARCH_AMD64) r = &amd64_x86_gprs[2];
3610 		else r = &x86_gprs[2];
3611 		break;
3612 	case 'd': /* edx */
3613 		if (backend->arch == ARCH_AMD64) r = &amd64_x86_gprs[3];
3614 		else r = &x86_gprs[3];
3615 		break;
3616 	case 'S': /* esi */
3617 		if (backend->arch == ARCH_AMD64) r = &amd64_x86_gprs[4];
3618 		else r = &x86_gprs[4];
3619 		break;
3620 	case 'D': /* edi */
3621 		if (backend->arch == ARCH_AMD64) r = &amd64_x86_gprs[5];
3622 		else r = &x86_gprs[5];
3623 		break;
3624 	case 'q':
3625 	case 'Q':
3626 		/* XXX amd64 */
3627 		/* Must be any of eax/ebx/ecx/edx - exclude esi/edi */
3628 		if (backend->arch == ARCH_X86) {
3629 			r = alloc_16_or_32bit_noesiedi(curfunc, 0, il, NULL);
3630 		} else {
3631 			/* XXX maybe need 64bit x86 allocator :-( */
3632 			r = x86_backend.alloc_gpr(curfunc, 0, il, NULL, 0);
3633 			if (is_member_of_reg(&amd64_x86_gprs[0], r)) {
3634 				r = &amd64_x86_gprs[0];
3635 			} else if (is_member_of_reg(&amd64_x86_gprs[1], r)) {
3636 				r = &amd64_x86_gprs[1];
3637 			} else if (is_member_of_reg(&amd64_x86_gprs[2], r)) {
3638 				r = &amd64_x86_gprs[2];
3639 			} else if (is_member_of_reg(&amd64_x86_gprs[3], r)) {
3640 				r = &amd64_x86_gprs[3];
3641 			}
3642 		}
3643 		break;
3644 	case 'r':
3645 		if (size == 1) {
3646 			if (backend->arch == ARCH_X86) {
3647 				/* esi/edi have no 1byte sub registers ... */
3648 				r = alloc_16_or_32bit_noesiedi(curfunc, 1,
3649 					il, NULL);
3650 			} else {
3651 				/* amd64 */
3652 				r = ALLOC_GPR(curfunc, 1, il, NULL);
3653 			}
3654 		} else {
3655 			if (backend->arch == ARCH_X86) {
3656 				r = alloc_16_or_32bit_reg(curfunc, size,
3657 					il, NULL);
3658 			} else {
3659 				r = ALLOC_GPR(curfunc, size, il, NULL);
3660 			}
3661 		}
3662 		break;
3663 	default:
3664 		printf("BAD CHAR FOR asmvreg_to_reg(): %c(%d)\n", ch, ch);
3665 		abort();
3666 	}
3667 
3668 	if (r == NULL) {
3669 		errorfl(io->expr->tok, "Too many inline asm operands - "
3670 			"cannot allocate register");
3671 		return NULL;
3672 	} else if (faultin && !reg_allocatable(r)) {
3673 		/*
3674 		 * XXX this isn't quite correct... use of ``faultin'' above
3675 		 * causes output registers to be assigned even if those are
3676 		 * used for input, which is good. Problem is that clobbered
3677 		 * registers should not be used for output.
3678 		 */
3679 		errorfl(io->expr->tok, "Cannot allocate %s (in clobber list?)",
3680 			r->name);
3681 		return NULL;
3682 	}
3683 	free_preg(r, il, 1, 1);
3684 	if (size == 1 && (ch == 'S' || ch == 'D')
3685 		&& backend->arch == ARCH_X86) {
3686 		errorfl(io->expr->tok,
3687 			"Cannot store 1-byte item to "
3688 			"%s", r->name);
3689 		return NULL;
3690 	} else if (size != r->size) {
3691 		if (r->size == 8) {
3692 			/* amd64 */
3693 			r = r->composed_of[0];
3694 		}
3695 
3696 		if (size == 1) {
3697 			r = r->composed_of[0]->composed_of[0];
3698 		} else if (size == 2) {
3699 			r = r->composed_of[0];
3700 		} else if (size == 4) {
3701 			/* amd64 - 64 to 32 bit, already done above */
3702 			;
3703 		}
3704 	}
3705 
3706 	newvr = vreg_disconnect(vr);
3707 
3708 	if (faultin) {
3709 		vreg_faultin(r, NULL, newvr, il, 0);
3710 		reg_set_unallocatable(r);
3711 	}
3712 	*vr0 = newvr;
3713 	return r;
3714 }
3715 
3716 static char *
3717 get_inlineasm_label(const char *tmpl) {
3718 	char	*ret = n_xmalloc(strlen(tmpl) + sizeof "inlasm");
3719 	sprintf(ret, "inlasm%s", tmpl);
3720 	return ret;
3721 }
3722 
3723 /*
3724  * Print inline asm instruction operand
3725  */
3726 void
3727 print_asmitem_x86(FILE *out, void *item, int item_type, int postfix, int a) {
3728 	char			*p = NULL;
3729 	struct reg		*r = NULL;
3730 	struct gas_token	*gt;
3731 	struct inline_asm_io	*io;
3732 	int			idx;
3733 	int			applied_constraint = 0;
3734 
3735 	switch (item_type) {
3736 	case ITEM_NUMBER:
3737 		if (a == TO_GAS) x_fputc('$', out);
3738 		gt = item;
3739 		p = gt->data;
3740 		break;
3741 	case ITEM_REG:
3742 		if (a == TO_GAS) x_fputc('%', out);
3743 		gt = item;
3744 		p = gt->data;
3745 		break;
3746 	case ITEM_SUBREG_B:
3747 	case ITEM_SUBREG_H:
3748 	case ITEM_SUBREG_W:
3749 		io = item;
3750 		if (io->outreg) {
3751 			r = io->outreg;
3752 		} else if (io->inreg) {
3753 			r = io->inreg;
3754 		} else {
3755 			r = io->vreg->pregs[0];
3756 		}
3757 		if (r == NULL/* || r->vreg != io->vreg*/) { /* XXX!!! */
3758 			errorfl(io->expr->tok,
3759 			"Operand not in register but used with %h or %b");
3760 			return;
3761 		}
3762 
3763 		if (backend->arch == ARCH_X86) {
3764 			if (!is_member_of_reg(&x86_gprs[0], r)
3765 				&& !is_member_of_reg(&x86_gprs[1], r)
3766 				&& !is_member_of_reg(&x86_gprs[2], r)
3767 				&& !is_member_of_reg(&x86_gprs[3], r)) {
3768 				errorfl(io->expr->tok,
3769 		"`%s' does not have a 8bit register for use with %%h or %%b",
3770 					r->name);
3771 				return;
3772 			}
3773 		} else {
3774 			/* AMD64 */
3775 			int	i;
3776 
3777 			for (i = 0; i < 4; ++i) {
3778 				if (is_member_of_reg(&amd64_x86_gprs[i], r)) {
3779 					break;
3780 				}
3781 			}
3782 			if (i == 4) {
3783 				for (i = 8; i < 16; ++i) {
3784 					if (is_member_of_reg(&amd64_gprs[i],
3785 						r)) {
3786 						errorfl(io->expr->tok,
3787 				"`%s' doesn't make sense with %%h or %%b",
3788 					r->name);
3789 						return;
3790 					}
3791 				}
3792 				if (i == 16) {
3793 					errorfl(io->expr->tok,
3794 		"`%s' does not have a 8bit register for use with %h or %b",
3795 						r->name);
3796 					return;
3797 				}
3798 			}
3799 		}
3800 		if (item_type == ITEM_SUBREG_B) {
3801 			idx = 1;
3802 		} else {
3803 			idx = 0;
3804 		}
3805 		if (r->size == 2) {
3806 			if (item_type == ITEM_SUBREG_W) {
3807 				; /* OK - already 16bit */
3808 			} else {
3809 				r = r->composed_of[idx];
3810 			}
3811 		} else if (r->size == 1) {
3812 			/*
3813 			 * XXX this unimpl() was probably here because
3814 			 * I didn't know what this means if used with
3815 			 * 8 bit regs!
3816 			 */
3817 #if 0
3818 			unimpl();
3819 #endif
3820 		} else {
3821 			/* Must be 4 */
3822 			if (item_type == ITEM_SUBREG_W) {
3823 				r = r->composed_of[0];
3824 			} else {
3825 				r = r->composed_of[0]->composed_of[idx];
3826 			}
3827 		}
3828 		if (a == TO_GAS) x_fputc('%', out);
3829 		x_fprintf(out, "%s", r->name);
3830 		break;
3831 	case ITEM_VARIABLE:
3832 		gt = item;
3833 		if (a == TO_NASM) {
3834 			x_fputc('$', out);
3835 		}
3836 		p = gt->data;
3837 		break;
3838 	case ITEM_LABEL:
3839 		x_fprintf(out, ".%s", item);
3840 		break;
3841 	case ITEM_INPUT:
3842 	case ITEM_OUTPUT:
3843 		io = item;
3844 		for (p = io->constraints; *p != 0; ++p) {
3845 			struct vreg	*vr = io->vreg;
3846 
3847 
3848 			r = NULL;
3849 			/*
3850 			 * If this constraint uses a register (even with
3851 			 * "m" we may have a register holding a pointer
3852 			 * value), map it to the vreg
3853 			 */
3854 			if (strchr("qrabcdSDm", *p) != 0) {
3855 				if (item_type == ITEM_INPUT) {
3856 					r = io->inreg;
3857 				} else {
3858 					/* Output */
3859 					r = io->outreg;
3860 				}
3861 				if (vr->from_ptr != NULL) {
3862 					/*
3863 					 * Register is pointer value
3864 					 */
3865 					backend_vreg_map_preg(vr->from_ptr, r);
3866 				} else {
3867 					/* Register references vreg */
3868 					backend_vreg_map_preg(vr, r);
3869 				}
3870 			}
3871 
3872 			if (*p == '+' || *p == '=' || *p == '&') {
3873 				continue;
3874 			} else if (applied_constraint) {
3875 				/*
3876 				 * 05/17/09: For things like "rm", after
3877 				 * having chosen r, we do not want to print
3878 				 * an m item as well. Because it's just one
3879 				 * operand.
3880 				 * XXX Here we always use the first one,
3881 				 * i.e. r in "rm" and m in "mr". Probably
3882 				 * should pick it depending on the other
3883 				 * instruction operands
3884 				 */
3885 				continue;
3886 			} else if (strchr("qrabcdSD", *p) != 0) {
3887 				if (a == TO_GAS) x_fputc('%', out);
3888 				if (item_type == ITEM_INPUT) {
3889 					r = io->inreg;
3890 					x_fprintf(out, "%s",
3891 						r->name);
3892 				} else {
3893 					/* output */
3894 					r = io->outreg;
3895 					x_fprintf(out, "%s", r->name);
3896 				}
3897 			} else if (*p == 'm') {
3898 				if (postfix != 0 && a == TO_NASM) {
3899 					char	*p = NULL;
3900 
3901 					switch (postfix) {
3902 					/*
3903 					 * XXX what about floating point
3904 					 * l and t :(
3905 					 */
3906 					case 'b': p = "byte"; break;
3907 					case 'w': p = "word"; break;
3908 					case 'l': p = "dword"; break;
3909 					case 'q': p = "qword"; break;
3910 					default:
3911 						  unimpl();
3912 					}
3913 					x_fprintf(out, "%s ", p);
3914 				}
3915 				emit->print_mem_operand(io->vreg, NULL);
3916 			} else if (*p == 'i') {
3917 				if (eval_const_expr(io->expr, 0, NULL) != 0) {
3918 					return;
3919 				}
3920 				if (io->vreg->type->sign != TOK_KEY_UNSIGNED) {
3921 #if 0
3922 					x_fprintf(out, "%ld",
3923 						*(long *)io->expr->const_value
3924 						->value);
3925 #endif
3926 					cross_print_value_by_type(out,
3927 						io->expr->const_value->value,
3928 						TY_LONG, 'd');
3929 				} else {
3930 #if 0
3931 					x_fprintf(out, "%lu",
3932 						*(long *)io->expr->const_value
3933 						->value);
3934 #endif
3935 					cross_print_value_by_type(out,
3936 						io->expr->const_value->value,
3937 						TY_ULONG, 'd');
3938 				}
3939 			} else if (*p == 'o') {
3940 				unimpl();
3941 			} else if (*p == 'v') {
3942 				unimpl();
3943 			} else {
3944 				printf("WHA?? %c\n", *p);
3945 				unimpl();
3946 			}
3947 			applied_constraint = 1;
3948 
3949 			if (r != NULL) {
3950 				backend_vreg_unmap_preg(r);
3951 			}
3952 		}
3953 		p = NULL;
3954 	}
3955 
3956 	if (p != NULL) {
3957 		x_fprintf(out, "%s", p);
3958 	}
3959 }
3960 
3961 int
3962 x86_have_immediate_op(struct type *ty, int op) {
3963 	if (Oflag == -1) { /* XXX really want this here? */
3964 		return 0;
3965 	}
3966 	if (op == TOK_OP_BSHL
3967 		|| op == TOK_OP_BSHR
3968 		|| op == TOK_OP_BAND
3969 		|| op == TOK_OP_BOR
3970 		|| op == TOK_OP_BXOR
3971 		|| op == TOK_OP_COBSHL
3972 		|| op == TOK_OP_COBSHR
3973 		|| op == TOK_OP_COBOR
3974 		|| op == TOK_OP_COBAND
3975 		|| op == TOK_OP_COBXOR) {
3976 		if (backend->arch == ARCH_X86
3977 			&& IS_LLONG(ty->code)) {
3978 			return 0;
3979 		}
3980 		return 1;
3981 	}
3982 	return 0;
3983 }
3984 
3985 struct backend x86_backend = {
3986 	ARCH_X86,
3987 	0, /* ABI */
3988 	0, /* multi_gpr_object */
3989 	4, /* structure alignment */
3990 	1, /* need pic initialization (ebx) */
3991 	0, /* emulate long double */
3992 	0, /* relax alloc gpr order */
3993 	0, /* max displacement */
3994 	0, /* min displacement */
3995 	x86_have_immediate_op,
3996 	init,
3997 	is_multi_reg_obj,
3998 	get_ptr_size,
3999 	get_size_t,
4000 	get_uintptr_t,
4001 	get_wchar_t,
4002 	get_sizeof_basic,
4003 	get_sizeof_type,
4004 	get_sizeof_elem_type,
4005 	get_sizeof_decl,
4006 	get_sizeof_const,
4007 	get_sizeof_vla_type,
4008 	get_align_type,
4009 	gen_function,
4010 #if XLATE_IMMEDIATELY
4011 	gen_prepare_output,
4012 	gen_finish_output,
4013 #else
4014 	gen_program,
4015 #endif
4016 	NULL,
4017 	&x86_esp,
4018 	invalidate_gprs,
4019 	invalidate_except,
4020 	alloc_gpr,
4021 	alloc_16_or_32bit_noesiedi,
4022 	alloc_fpr,
4023 	x86_free_preg,
4024 	icode_make_fcall,
4025 	icode_make_return,
4026 	NULL,
4027 	icode_prepare_op,
4028 	NULL, /* prepare_load_addrlabel */
4029 	icode_make_cast,
4030 	NULL, /* icode_make_structreloc */
4031 	icode_initialize_pic,
4032 	icode_complete_func,
4033 	make_null_block,
4034 	make_init_name,
4035 	debug_print_gprs,
4036 	name_to_reg,
4037 	asmvreg_to_reg,
4038 	get_inlineasm_label,
4039 	do_ret,
4040 	get_abi_reg,
4041 	get_abi_ret_reg,
4042 	generic_same_representation
4043 };
4044 
4045