xref: /qemu/tcg/ppc/tcg-target.c.inc (revision 86d063fa)
1/*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25#include "elf.h"
26#include "../tcg-pool.c.inc"
27#include "../tcg-ldst.c.inc"
28
29/*
30 * Standardize on the _CALL_FOO symbols used by GCC:
31 * Apple XCode does not define _CALL_DARWIN.
32 * Clang defines _CALL_ELF (64-bit) but not _CALL_SYSV (32-bit).
33 */
34#if !defined(_CALL_SYSV) && \
35    !defined(_CALL_DARWIN) && \
36    !defined(_CALL_AIX) && \
37    !defined(_CALL_ELF)
38# if defined(__APPLE__)
39#  define _CALL_DARWIN
40# elif defined(__ELF__) && TCG_TARGET_REG_BITS == 32
41#  define _CALL_SYSV
42# else
43#  error "Unknown ABI"
44# endif
45#endif
46
47#if TCG_TARGET_REG_BITS == 64
48# define TCG_TARGET_CALL_ARG_I32   TCG_CALL_ARG_EXTEND
49# define TCG_TARGET_CALL_RET_I128  TCG_CALL_RET_NORMAL
50#else
51# define TCG_TARGET_CALL_ARG_I32   TCG_CALL_ARG_NORMAL
52# define TCG_TARGET_CALL_RET_I128  TCG_CALL_RET_BY_REF
53#endif
54#ifdef _CALL_SYSV
55# define TCG_TARGET_CALL_ARG_I64   TCG_CALL_ARG_EVEN
56# define TCG_TARGET_CALL_ARG_I128  TCG_CALL_ARG_BY_REF
57#else
58# define TCG_TARGET_CALL_ARG_I64   TCG_CALL_ARG_NORMAL
59# define TCG_TARGET_CALL_ARG_I128  TCG_CALL_ARG_NORMAL
60#endif
61
62/* For some memory operations, we need a scratch that isn't R0.  For the AIX
63   calling convention, we can re-use the TOC register since we'll be reloading
64   it at every call.  Otherwise R12 will do nicely as neither a call-saved
65   register nor a parameter register.  */
66#ifdef _CALL_AIX
67# define TCG_REG_TMP1   TCG_REG_R2
68#else
69# define TCG_REG_TMP1   TCG_REG_R12
70#endif
71
72#define TCG_VEC_TMP1    TCG_REG_V0
73#define TCG_VEC_TMP2    TCG_REG_V1
74
75#define TCG_REG_TB     TCG_REG_R31
76#define USE_REG_TB     (TCG_TARGET_REG_BITS == 64)
77
78/* Shorthand for size of a pointer.  Avoid promotion to unsigned.  */
79#define SZP  ((int)sizeof(void *))
80
81/* Shorthand for size of a register.  */
82#define SZR  (TCG_TARGET_REG_BITS / 8)
83
84#define TCG_CT_CONST_S16  0x100
85#define TCG_CT_CONST_U16  0x200
86#define TCG_CT_CONST_S32  0x400
87#define TCG_CT_CONST_U32  0x800
88#define TCG_CT_CONST_ZERO 0x1000
89#define TCG_CT_CONST_MONE 0x2000
90#define TCG_CT_CONST_WSZ  0x4000
91
92#define ALL_GENERAL_REGS  0xffffffffu
93#define ALL_VECTOR_REGS   0xffffffff00000000ull
94
95#ifdef CONFIG_SOFTMMU
96#define ALL_QLOAD_REGS \
97    (ALL_GENERAL_REGS & \
98     ~((1 << TCG_REG_R3) | (1 << TCG_REG_R4) | (1 << TCG_REG_R5)))
99#define ALL_QSTORE_REGS \
100    (ALL_GENERAL_REGS & ~((1 << TCG_REG_R3) | (1 << TCG_REG_R4) | \
101                          (1 << TCG_REG_R5) | (1 << TCG_REG_R6)))
102#else
103#define ALL_QLOAD_REGS  (ALL_GENERAL_REGS & ~(1 << TCG_REG_R3))
104#define ALL_QSTORE_REGS ALL_QLOAD_REGS
105#endif
106
107TCGPowerISA have_isa;
108static bool have_isel;
109bool have_altivec;
110bool have_vsx;
111
112#ifndef CONFIG_SOFTMMU
113#define TCG_GUEST_BASE_REG 30
114#endif
115
116#ifdef CONFIG_DEBUG_TCG
117static const char tcg_target_reg_names[TCG_TARGET_NB_REGS][4] = {
118    "r0",  "r1",  "r2",  "r3",  "r4",  "r5",  "r6",  "r7",
119    "r8",  "r9",  "r10", "r11", "r12", "r13", "r14", "r15",
120    "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
121    "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
122    "v0",  "v1",  "v2",  "v3",  "v4",  "v5",  "v6",  "v7",
123    "v8",  "v9",  "v10", "v11", "v12", "v13", "v14", "v15",
124    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
125    "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
126};
127#endif
128
129static const int tcg_target_reg_alloc_order[] = {
130    TCG_REG_R14,  /* call saved registers */
131    TCG_REG_R15,
132    TCG_REG_R16,
133    TCG_REG_R17,
134    TCG_REG_R18,
135    TCG_REG_R19,
136    TCG_REG_R20,
137    TCG_REG_R21,
138    TCG_REG_R22,
139    TCG_REG_R23,
140    TCG_REG_R24,
141    TCG_REG_R25,
142    TCG_REG_R26,
143    TCG_REG_R27,
144    TCG_REG_R28,
145    TCG_REG_R29,
146    TCG_REG_R30,
147    TCG_REG_R31,
148    TCG_REG_R12,  /* call clobbered, non-arguments */
149    TCG_REG_R11,
150    TCG_REG_R2,
151    TCG_REG_R13,
152    TCG_REG_R10,  /* call clobbered, arguments */
153    TCG_REG_R9,
154    TCG_REG_R8,
155    TCG_REG_R7,
156    TCG_REG_R6,
157    TCG_REG_R5,
158    TCG_REG_R4,
159    TCG_REG_R3,
160
161    /* V0 and V1 reserved as temporaries; V20 - V31 are call-saved */
162    TCG_REG_V2,   /* call clobbered, vectors */
163    TCG_REG_V3,
164    TCG_REG_V4,
165    TCG_REG_V5,
166    TCG_REG_V6,
167    TCG_REG_V7,
168    TCG_REG_V8,
169    TCG_REG_V9,
170    TCG_REG_V10,
171    TCG_REG_V11,
172    TCG_REG_V12,
173    TCG_REG_V13,
174    TCG_REG_V14,
175    TCG_REG_V15,
176    TCG_REG_V16,
177    TCG_REG_V17,
178    TCG_REG_V18,
179    TCG_REG_V19,
180};
181
182static const int tcg_target_call_iarg_regs[] = {
183    TCG_REG_R3,
184    TCG_REG_R4,
185    TCG_REG_R5,
186    TCG_REG_R6,
187    TCG_REG_R7,
188    TCG_REG_R8,
189    TCG_REG_R9,
190    TCG_REG_R10
191};
192
193static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
194{
195    tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
196    tcg_debug_assert(slot >= 0 && slot <= 1);
197    return TCG_REG_R3 + slot;
198}
199
200static const int tcg_target_callee_save_regs[] = {
201#ifdef _CALL_DARWIN
202    TCG_REG_R11,
203#endif
204    TCG_REG_R14,
205    TCG_REG_R15,
206    TCG_REG_R16,
207    TCG_REG_R17,
208    TCG_REG_R18,
209    TCG_REG_R19,
210    TCG_REG_R20,
211    TCG_REG_R21,
212    TCG_REG_R22,
213    TCG_REG_R23,
214    TCG_REG_R24,
215    TCG_REG_R25,
216    TCG_REG_R26,
217    TCG_REG_R27, /* currently used for the global env */
218    TCG_REG_R28,
219    TCG_REG_R29,
220    TCG_REG_R30,
221    TCG_REG_R31
222};
223
224static inline bool in_range_b(tcg_target_long target)
225{
226    return target == sextract64(target, 0, 26);
227}
228
229static uint32_t reloc_pc24_val(const tcg_insn_unit *pc,
230			       const tcg_insn_unit *target)
231{
232    ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
233    tcg_debug_assert(in_range_b(disp));
234    return disp & 0x3fffffc;
235}
236
237static bool reloc_pc24(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
238{
239    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
240    ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
241
242    if (in_range_b(disp)) {
243        *src_rw = (*src_rw & ~0x3fffffc) | (disp & 0x3fffffc);
244        return true;
245    }
246    return false;
247}
248
249static uint16_t reloc_pc14_val(const tcg_insn_unit *pc,
250			       const tcg_insn_unit *target)
251{
252    ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
253    tcg_debug_assert(disp == (int16_t) disp);
254    return disp & 0xfffc;
255}
256
257static bool reloc_pc14(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
258{
259    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
260    ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
261
262    if (disp == (int16_t) disp) {
263        *src_rw = (*src_rw & ~0xfffc) | (disp & 0xfffc);
264        return true;
265    }
266    return false;
267}
268
269/* test if a constant matches the constraint */
270static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
271{
272    if (ct & TCG_CT_CONST) {
273        return 1;
274    }
275
276    /* The only 32-bit constraint we use aside from
277       TCG_CT_CONST is TCG_CT_CONST_S16.  */
278    if (type == TCG_TYPE_I32) {
279        val = (int32_t)val;
280    }
281
282    if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) {
283        return 1;
284    } else if ((ct & TCG_CT_CONST_U16) && val == (uint16_t)val) {
285        return 1;
286    } else if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
287        return 1;
288    } else if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
289        return 1;
290    } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
291        return 1;
292    } else if ((ct & TCG_CT_CONST_MONE) && val == -1) {
293        return 1;
294    } else if ((ct & TCG_CT_CONST_WSZ)
295               && val == (type == TCG_TYPE_I32 ? 32 : 64)) {
296        return 1;
297    }
298    return 0;
299}
300
301#define OPCD(opc) ((opc)<<26)
302#define XO19(opc) (OPCD(19)|((opc)<<1))
303#define MD30(opc) (OPCD(30)|((opc)<<2))
304#define MDS30(opc) (OPCD(30)|((opc)<<1))
305#define XO31(opc) (OPCD(31)|((opc)<<1))
306#define XO58(opc) (OPCD(58)|(opc))
307#define XO62(opc) (OPCD(62)|(opc))
308#define VX4(opc)  (OPCD(4)|(opc))
309
310#define B      OPCD( 18)
311#define BC     OPCD( 16)
312#define LBZ    OPCD( 34)
313#define LHZ    OPCD( 40)
314#define LHA    OPCD( 42)
315#define LWZ    OPCD( 32)
316#define LWZUX  XO31( 55)
317#define STB    OPCD( 38)
318#define STH    OPCD( 44)
319#define STW    OPCD( 36)
320
321#define STD    XO62(  0)
322#define STDU   XO62(  1)
323#define STDX   XO31(149)
324
325#define LD     XO58(  0)
326#define LDX    XO31( 21)
327#define LDU    XO58(  1)
328#define LDUX   XO31( 53)
329#define LWA    XO58(  2)
330#define LWAX   XO31(341)
331
332#define ADDIC  OPCD( 12)
333#define ADDI   OPCD( 14)
334#define ADDIS  OPCD( 15)
335#define ORI    OPCD( 24)
336#define ORIS   OPCD( 25)
337#define XORI   OPCD( 26)
338#define XORIS  OPCD( 27)
339#define ANDI   OPCD( 28)
340#define ANDIS  OPCD( 29)
341#define MULLI  OPCD(  7)
342#define CMPLI  OPCD( 10)
343#define CMPI   OPCD( 11)
344#define SUBFIC OPCD( 8)
345
346#define LWZU   OPCD( 33)
347#define STWU   OPCD( 37)
348
349#define RLWIMI OPCD( 20)
350#define RLWINM OPCD( 21)
351#define RLWNM  OPCD( 23)
352
353#define RLDICL MD30(  0)
354#define RLDICR MD30(  1)
355#define RLDIMI MD30(  3)
356#define RLDCL  MDS30( 8)
357
358#define BCLR   XO19( 16)
359#define BCCTR  XO19(528)
360#define CRAND  XO19(257)
361#define CRANDC XO19(129)
362#define CRNAND XO19(225)
363#define CROR   XO19(449)
364#define CRNOR  XO19( 33)
365
366#define EXTSB  XO31(954)
367#define EXTSH  XO31(922)
368#define EXTSW  XO31(986)
369#define ADD    XO31(266)
370#define ADDE   XO31(138)
371#define ADDME  XO31(234)
372#define ADDZE  XO31(202)
373#define ADDC   XO31( 10)
374#define AND    XO31( 28)
375#define SUBF   XO31( 40)
376#define SUBFC  XO31(  8)
377#define SUBFE  XO31(136)
378#define SUBFME XO31(232)
379#define SUBFZE XO31(200)
380#define OR     XO31(444)
381#define XOR    XO31(316)
382#define MULLW  XO31(235)
383#define MULHW  XO31( 75)
384#define MULHWU XO31( 11)
385#define DIVW   XO31(491)
386#define DIVWU  XO31(459)
387#define MODSW  XO31(779)
388#define MODUW  XO31(267)
389#define CMP    XO31(  0)
390#define CMPL   XO31( 32)
391#define LHBRX  XO31(790)
392#define LWBRX  XO31(534)
393#define LDBRX  XO31(532)
394#define STHBRX XO31(918)
395#define STWBRX XO31(662)
396#define STDBRX XO31(660)
397#define MFSPR  XO31(339)
398#define MTSPR  XO31(467)
399#define SRAWI  XO31(824)
400#define NEG    XO31(104)
401#define MFCR   XO31( 19)
402#define MFOCRF (MFCR | (1u << 20))
403#define NOR    XO31(124)
404#define CNTLZW XO31( 26)
405#define CNTLZD XO31( 58)
406#define CNTTZW XO31(538)
407#define CNTTZD XO31(570)
408#define CNTPOPW XO31(378)
409#define CNTPOPD XO31(506)
410#define ANDC   XO31( 60)
411#define ORC    XO31(412)
412#define EQV    XO31(284)
413#define NAND   XO31(476)
414#define ISEL   XO31( 15)
415
416#define MULLD  XO31(233)
417#define MULHD  XO31( 73)
418#define MULHDU XO31(  9)
419#define DIVD   XO31(489)
420#define DIVDU  XO31(457)
421#define MODSD  XO31(777)
422#define MODUD  XO31(265)
423
424#define LBZX   XO31( 87)
425#define LHZX   XO31(279)
426#define LHAX   XO31(343)
427#define LWZX   XO31( 23)
428#define STBX   XO31(215)
429#define STHX   XO31(407)
430#define STWX   XO31(151)
431
432#define EIEIO  XO31(854)
433#define HWSYNC XO31(598)
434#define LWSYNC (HWSYNC | (1u << 21))
435
436#define SPR(a, b) ((((a)<<5)|(b))<<11)
437#define LR     SPR(8, 0)
438#define CTR    SPR(9, 0)
439
440#define SLW    XO31( 24)
441#define SRW    XO31(536)
442#define SRAW   XO31(792)
443
444#define SLD    XO31( 27)
445#define SRD    XO31(539)
446#define SRAD   XO31(794)
447#define SRADI  XO31(413<<1)
448
449#define BRH    XO31(219)
450#define BRW    XO31(155)
451#define BRD    XO31(187)
452
453#define TW     XO31( 4)
454#define TRAP   (TW | TO(31))
455
456#define NOP    ORI  /* ori 0,0,0 */
457
458#define LVX        XO31(103)
459#define LVEBX      XO31(7)
460#define LVEHX      XO31(39)
461#define LVEWX      XO31(71)
462#define LXSDX      (XO31(588) | 1)  /* v2.06, force tx=1 */
463#define LXVDSX     (XO31(332) | 1)  /* v2.06, force tx=1 */
464#define LXSIWZX    (XO31(12) | 1)   /* v2.07, force tx=1 */
465#define LXV        (OPCD(61) | 8 | 1)  /* v3.00, force tx=1 */
466#define LXSD       (OPCD(57) | 2)   /* v3.00 */
467#define LXVWSX     (XO31(364) | 1)  /* v3.00, force tx=1 */
468
469#define STVX       XO31(231)
470#define STVEWX     XO31(199)
471#define STXSDX     (XO31(716) | 1)  /* v2.06, force sx=1 */
472#define STXSIWX    (XO31(140) | 1)  /* v2.07, force sx=1 */
473#define STXV       (OPCD(61) | 8 | 5) /* v3.00, force sx=1 */
474#define STXSD      (OPCD(61) | 2)   /* v3.00 */
475
476#define VADDSBS    VX4(768)
477#define VADDUBS    VX4(512)
478#define VADDUBM    VX4(0)
479#define VADDSHS    VX4(832)
480#define VADDUHS    VX4(576)
481#define VADDUHM    VX4(64)
482#define VADDSWS    VX4(896)
483#define VADDUWS    VX4(640)
484#define VADDUWM    VX4(128)
485#define VADDUDM    VX4(192)       /* v2.07 */
486
487#define VSUBSBS    VX4(1792)
488#define VSUBUBS    VX4(1536)
489#define VSUBUBM    VX4(1024)
490#define VSUBSHS    VX4(1856)
491#define VSUBUHS    VX4(1600)
492#define VSUBUHM    VX4(1088)
493#define VSUBSWS    VX4(1920)
494#define VSUBUWS    VX4(1664)
495#define VSUBUWM    VX4(1152)
496#define VSUBUDM    VX4(1216)      /* v2.07 */
497
498#define VNEGW      (VX4(1538) | (6 << 16))  /* v3.00 */
499#define VNEGD      (VX4(1538) | (7 << 16))  /* v3.00 */
500
501#define VMAXSB     VX4(258)
502#define VMAXSH     VX4(322)
503#define VMAXSW     VX4(386)
504#define VMAXSD     VX4(450)       /* v2.07 */
505#define VMAXUB     VX4(2)
506#define VMAXUH     VX4(66)
507#define VMAXUW     VX4(130)
508#define VMAXUD     VX4(194)       /* v2.07 */
509#define VMINSB     VX4(770)
510#define VMINSH     VX4(834)
511#define VMINSW     VX4(898)
512#define VMINSD     VX4(962)       /* v2.07 */
513#define VMINUB     VX4(514)
514#define VMINUH     VX4(578)
515#define VMINUW     VX4(642)
516#define VMINUD     VX4(706)       /* v2.07 */
517
518#define VCMPEQUB   VX4(6)
519#define VCMPEQUH   VX4(70)
520#define VCMPEQUW   VX4(134)
521#define VCMPEQUD   VX4(199)       /* v2.07 */
522#define VCMPGTSB   VX4(774)
523#define VCMPGTSH   VX4(838)
524#define VCMPGTSW   VX4(902)
525#define VCMPGTSD   VX4(967)       /* v2.07 */
526#define VCMPGTUB   VX4(518)
527#define VCMPGTUH   VX4(582)
528#define VCMPGTUW   VX4(646)
529#define VCMPGTUD   VX4(711)       /* v2.07 */
530#define VCMPNEB    VX4(7)         /* v3.00 */
531#define VCMPNEH    VX4(71)        /* v3.00 */
532#define VCMPNEW    VX4(135)       /* v3.00 */
533
534#define VSLB       VX4(260)
535#define VSLH       VX4(324)
536#define VSLW       VX4(388)
537#define VSLD       VX4(1476)      /* v2.07 */
538#define VSRB       VX4(516)
539#define VSRH       VX4(580)
540#define VSRW       VX4(644)
541#define VSRD       VX4(1732)      /* v2.07 */
542#define VSRAB      VX4(772)
543#define VSRAH      VX4(836)
544#define VSRAW      VX4(900)
545#define VSRAD      VX4(964)       /* v2.07 */
546#define VRLB       VX4(4)
547#define VRLH       VX4(68)
548#define VRLW       VX4(132)
549#define VRLD       VX4(196)       /* v2.07 */
550
551#define VMULEUB    VX4(520)
552#define VMULEUH    VX4(584)
553#define VMULEUW    VX4(648)       /* v2.07 */
554#define VMULOUB    VX4(8)
555#define VMULOUH    VX4(72)
556#define VMULOUW    VX4(136)       /* v2.07 */
557#define VMULUWM    VX4(137)       /* v2.07 */
558#define VMULLD     VX4(457)       /* v3.10 */
559#define VMSUMUHM   VX4(38)
560
561#define VMRGHB     VX4(12)
562#define VMRGHH     VX4(76)
563#define VMRGHW     VX4(140)
564#define VMRGLB     VX4(268)
565#define VMRGLH     VX4(332)
566#define VMRGLW     VX4(396)
567
568#define VPKUHUM    VX4(14)
569#define VPKUWUM    VX4(78)
570
571#define VAND       VX4(1028)
572#define VANDC      VX4(1092)
573#define VNOR       VX4(1284)
574#define VOR        VX4(1156)
575#define VXOR       VX4(1220)
576#define VEQV       VX4(1668)      /* v2.07 */
577#define VNAND      VX4(1412)      /* v2.07 */
578#define VORC       VX4(1348)      /* v2.07 */
579
580#define VSPLTB     VX4(524)
581#define VSPLTH     VX4(588)
582#define VSPLTW     VX4(652)
583#define VSPLTISB   VX4(780)
584#define VSPLTISH   VX4(844)
585#define VSPLTISW   VX4(908)
586
587#define VSLDOI     VX4(44)
588
589#define XXPERMDI   (OPCD(60) | (10 << 3) | 7)  /* v2.06, force ax=bx=tx=1 */
590#define XXSEL      (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */
591#define XXSPLTIB   (OPCD(60) | (360 << 1) | 1) /* v3.00, force tx=1 */
592
593#define MFVSRD     (XO31(51) | 1)   /* v2.07, force sx=1 */
594#define MFVSRWZ    (XO31(115) | 1)  /* v2.07, force sx=1 */
595#define MTVSRD     (XO31(179) | 1)  /* v2.07, force tx=1 */
596#define MTVSRWZ    (XO31(243) | 1)  /* v2.07, force tx=1 */
597#define MTVSRDD    (XO31(435) | 1)  /* v3.00, force tx=1 */
598#define MTVSRWS    (XO31(403) | 1)  /* v3.00, force tx=1 */
599
600#define RT(r) ((r)<<21)
601#define RS(r) ((r)<<21)
602#define RA(r) ((r)<<16)
603#define RB(r) ((r)<<11)
604#define TO(t) ((t)<<21)
605#define SH(s) ((s)<<11)
606#define MB(b) ((b)<<6)
607#define ME(e) ((e)<<1)
608#define BO(o) ((o)<<21)
609#define MB64(b) ((b)<<5)
610#define FXM(b) (1 << (19 - (b)))
611
612#define VRT(r)  (((r) & 31) << 21)
613#define VRA(r)  (((r) & 31) << 16)
614#define VRB(r)  (((r) & 31) << 11)
615#define VRC(r)  (((r) & 31) <<  6)
616
617#define LK    1
618
619#define TAB(t, a, b) (RT(t) | RA(a) | RB(b))
620#define SAB(s, a, b) (RS(s) | RA(a) | RB(b))
621#define TAI(s, a, i) (RT(s) | RA(a) | ((i) & 0xffff))
622#define SAI(s, a, i) (RS(s) | RA(a) | ((i) & 0xffff))
623
624#define BF(n)    ((n)<<23)
625#define BI(n, c) (((c)+((n)*4))<<16)
626#define BT(n, c) (((c)+((n)*4))<<21)
627#define BA(n, c) (((c)+((n)*4))<<16)
628#define BB(n, c) (((c)+((n)*4))<<11)
629#define BC_(n, c) (((c)+((n)*4))<<6)
630
631#define BO_COND_TRUE  BO(12)
632#define BO_COND_FALSE BO( 4)
633#define BO_ALWAYS     BO(20)
634
635enum {
636    CR_LT,
637    CR_GT,
638    CR_EQ,
639    CR_SO
640};
641
642static const uint32_t tcg_to_bc[] = {
643    [TCG_COND_EQ]  = BC | BI(7, CR_EQ) | BO_COND_TRUE,
644    [TCG_COND_NE]  = BC | BI(7, CR_EQ) | BO_COND_FALSE,
645    [TCG_COND_LT]  = BC | BI(7, CR_LT) | BO_COND_TRUE,
646    [TCG_COND_GE]  = BC | BI(7, CR_LT) | BO_COND_FALSE,
647    [TCG_COND_LE]  = BC | BI(7, CR_GT) | BO_COND_FALSE,
648    [TCG_COND_GT]  = BC | BI(7, CR_GT) | BO_COND_TRUE,
649    [TCG_COND_LTU] = BC | BI(7, CR_LT) | BO_COND_TRUE,
650    [TCG_COND_GEU] = BC | BI(7, CR_LT) | BO_COND_FALSE,
651    [TCG_COND_LEU] = BC | BI(7, CR_GT) | BO_COND_FALSE,
652    [TCG_COND_GTU] = BC | BI(7, CR_GT) | BO_COND_TRUE,
653};
654
655/* The low bit here is set if the RA and RB fields must be inverted.  */
656static const uint32_t tcg_to_isel[] = {
657    [TCG_COND_EQ]  = ISEL | BC_(7, CR_EQ),
658    [TCG_COND_NE]  = ISEL | BC_(7, CR_EQ) | 1,
659    [TCG_COND_LT]  = ISEL | BC_(7, CR_LT),
660    [TCG_COND_GE]  = ISEL | BC_(7, CR_LT) | 1,
661    [TCG_COND_LE]  = ISEL | BC_(7, CR_GT) | 1,
662    [TCG_COND_GT]  = ISEL | BC_(7, CR_GT),
663    [TCG_COND_LTU] = ISEL | BC_(7, CR_LT),
664    [TCG_COND_GEU] = ISEL | BC_(7, CR_LT) | 1,
665    [TCG_COND_LEU] = ISEL | BC_(7, CR_GT) | 1,
666    [TCG_COND_GTU] = ISEL | BC_(7, CR_GT),
667};
668
669static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
670                        intptr_t value, intptr_t addend)
671{
672    const tcg_insn_unit *target;
673    int16_t lo;
674    int32_t hi;
675
676    value += addend;
677    target = (const tcg_insn_unit *)value;
678
679    switch (type) {
680    case R_PPC_REL14:
681        return reloc_pc14(code_ptr, target);
682    case R_PPC_REL24:
683        return reloc_pc24(code_ptr, target);
684    case R_PPC_ADDR16:
685        /*
686         * We are (slightly) abusing this relocation type.  In particular,
687         * assert that the low 2 bits are zero, and do not modify them.
688         * That way we can use this with LD et al that have opcode bits
689         * in the low 2 bits of the insn.
690         */
691        if ((value & 3) || value != (int16_t)value) {
692            return false;
693        }
694        *code_ptr = (*code_ptr & ~0xfffc) | (value & 0xfffc);
695        break;
696    case R_PPC_ADDR32:
697        /*
698         * We are abusing this relocation type.  Again, this points to
699         * a pair of insns, lis + load.  This is an absolute address
700         * relocation for PPC32 so the lis cannot be removed.
701         */
702        lo = value;
703        hi = value - lo;
704        if (hi + lo != value) {
705            return false;
706        }
707        code_ptr[0] = deposit32(code_ptr[0], 0, 16, hi >> 16);
708        code_ptr[1] = deposit32(code_ptr[1], 0, 16, lo);
709        break;
710    default:
711        g_assert_not_reached();
712    }
713    return true;
714}
715
716static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
717                             TCGReg base, tcg_target_long offset);
718
719static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
720{
721    if (ret == arg) {
722        return true;
723    }
724    switch (type) {
725    case TCG_TYPE_I64:
726        tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
727        /* fallthru */
728    case TCG_TYPE_I32:
729        if (ret < TCG_REG_V0) {
730            if (arg < TCG_REG_V0) {
731                tcg_out32(s, OR | SAB(arg, ret, arg));
732                break;
733            } else if (have_isa_2_07) {
734                tcg_out32(s, (type == TCG_TYPE_I32 ? MFVSRWZ : MFVSRD)
735                          | VRT(arg) | RA(ret));
736                break;
737            } else {
738                /* Altivec does not support vector->integer moves.  */
739                return false;
740            }
741        } else if (arg < TCG_REG_V0) {
742            if (have_isa_2_07) {
743                tcg_out32(s, (type == TCG_TYPE_I32 ? MTVSRWZ : MTVSRD)
744                          | VRT(ret) | RA(arg));
745                break;
746            } else {
747                /* Altivec does not support integer->vector moves.  */
748                return false;
749            }
750        }
751        /* fallthru */
752    case TCG_TYPE_V64:
753    case TCG_TYPE_V128:
754        tcg_debug_assert(ret >= TCG_REG_V0 && arg >= TCG_REG_V0);
755        tcg_out32(s, VOR | VRT(ret) | VRA(arg) | VRB(arg));
756        break;
757    default:
758        g_assert_not_reached();
759    }
760    return true;
761}
762
763static inline void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs,
764                               int sh, int mb)
765{
766    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
767    sh = SH(sh & 0x1f) | (((sh >> 5) & 1) << 1);
768    mb = MB64((mb >> 5) | ((mb << 1) & 0x3f));
769    tcg_out32(s, op | RA(ra) | RS(rs) | sh | mb);
770}
771
772static inline void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs,
773                               int sh, int mb, int me)
774{
775    tcg_out32(s, op | RA(ra) | RS(rs) | SH(sh) | MB(mb) | ME(me));
776}
777
778static inline void tcg_out_ext8s(TCGContext *s, TCGReg dst, TCGReg src)
779{
780    tcg_out32(s, EXTSB | RA(dst) | RS(src));
781}
782
783static inline void tcg_out_ext16s(TCGContext *s, TCGReg dst, TCGReg src)
784{
785    tcg_out32(s, EXTSH | RA(dst) | RS(src));
786}
787
788static inline void tcg_out_ext16u(TCGContext *s, TCGReg dst, TCGReg src)
789{
790    tcg_out32(s, ANDI | SAI(src, dst, 0xffff));
791}
792
793static inline void tcg_out_ext32s(TCGContext *s, TCGReg dst, TCGReg src)
794{
795    tcg_out32(s, EXTSW | RA(dst) | RS(src));
796}
797
798static inline void tcg_out_ext32u(TCGContext *s, TCGReg dst, TCGReg src)
799{
800    tcg_out_rld(s, RLDICL, dst, src, 0, 32);
801}
802
803static inline void tcg_out_shli32(TCGContext *s, TCGReg dst, TCGReg src, int c)
804{
805    tcg_out_rlw(s, RLWINM, dst, src, c, 0, 31 - c);
806}
807
808static inline void tcg_out_shli64(TCGContext *s, TCGReg dst, TCGReg src, int c)
809{
810    tcg_out_rld(s, RLDICR, dst, src, c, 63 - c);
811}
812
813static inline void tcg_out_sari32(TCGContext *s, TCGReg dst, TCGReg src, int c)
814{
815    /* Limit immediate shift count lest we create an illegal insn.  */
816    tcg_out32(s, SRAWI | RA(dst) | RS(src) | SH(c & 31));
817}
818
819static inline void tcg_out_shri32(TCGContext *s, TCGReg dst, TCGReg src, int c)
820{
821    tcg_out_rlw(s, RLWINM, dst, src, 32 - c, c, 31);
822}
823
824static inline void tcg_out_shri64(TCGContext *s, TCGReg dst, TCGReg src, int c)
825{
826    tcg_out_rld(s, RLDICL, dst, src, 64 - c, c);
827}
828
829static inline void tcg_out_sari64(TCGContext *s, TCGReg dst, TCGReg src, int c)
830{
831    tcg_out32(s, SRADI | RA(dst) | RS(src) | SH(c & 0x1f) | ((c >> 4) & 2));
832}
833
834static void tcg_out_bswap16(TCGContext *s, TCGReg dst, TCGReg src, int flags)
835{
836    TCGReg tmp = dst == src ? TCG_REG_R0 : dst;
837
838    if (have_isa_3_10) {
839        tcg_out32(s, BRH | RA(dst) | RS(src));
840        if (flags & TCG_BSWAP_OS) {
841            tcg_out_ext16s(s, dst, dst);
842        } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
843            tcg_out_ext16u(s, dst, dst);
844        }
845        return;
846    }
847
848    /*
849     * In the following,
850     *   dep(a, b, m) -> (a & ~m) | (b & m)
851     *
852     * Begin with:                              src = xxxxabcd
853     */
854    /* tmp = rol32(src, 24) & 0x000000ff            = 0000000c */
855    tcg_out_rlw(s, RLWINM, tmp, src, 24, 24, 31);
856    /* tmp = dep(tmp, rol32(src, 8), 0x0000ff00)    = 000000dc */
857    tcg_out_rlw(s, RLWIMI, tmp, src, 8, 16, 23);
858
859    if (flags & TCG_BSWAP_OS) {
860        tcg_out_ext16s(s, dst, tmp);
861    } else {
862        tcg_out_mov(s, TCG_TYPE_REG, dst, tmp);
863    }
864}
865
866static void tcg_out_bswap32(TCGContext *s, TCGReg dst, TCGReg src, int flags)
867{
868    TCGReg tmp = dst == src ? TCG_REG_R0 : dst;
869
870    if (have_isa_3_10) {
871        tcg_out32(s, BRW | RA(dst) | RS(src));
872        if (flags & TCG_BSWAP_OS) {
873            tcg_out_ext32s(s, dst, dst);
874        } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
875            tcg_out_ext32u(s, dst, dst);
876        }
877        return;
878    }
879
880    /*
881     * Stolen from gcc's builtin_bswap32.
882     * In the following,
883     *   dep(a, b, m) -> (a & ~m) | (b & m)
884     *
885     * Begin with:                              src = xxxxabcd
886     */
887    /* tmp = rol32(src, 8) & 0xffffffff             = 0000bcda */
888    tcg_out_rlw(s, RLWINM, tmp, src, 8, 0, 31);
889    /* tmp = dep(tmp, rol32(src, 24), 0xff000000)   = 0000dcda */
890    tcg_out_rlw(s, RLWIMI, tmp, src, 24, 0, 7);
891    /* tmp = dep(tmp, rol32(src, 24), 0x0000ff00)   = 0000dcba */
892    tcg_out_rlw(s, RLWIMI, tmp, src, 24, 16, 23);
893
894    if (flags & TCG_BSWAP_OS) {
895        tcg_out_ext32s(s, dst, tmp);
896    } else {
897        tcg_out_mov(s, TCG_TYPE_REG, dst, tmp);
898    }
899}
900
901static void tcg_out_bswap64(TCGContext *s, TCGReg dst, TCGReg src)
902{
903    TCGReg t0 = dst == src ? TCG_REG_R0 : dst;
904    TCGReg t1 = dst == src ? dst : TCG_REG_R0;
905
906    if (have_isa_3_10) {
907        tcg_out32(s, BRD | RA(dst) | RS(src));
908        return;
909    }
910
911    /*
912     * In the following,
913     *   dep(a, b, m) -> (a & ~m) | (b & m)
914     *
915     * Begin with:                              src = abcdefgh
916     */
917    /* t0 = rol32(src, 8) & 0xffffffff              = 0000fghe */
918    tcg_out_rlw(s, RLWINM, t0, src, 8, 0, 31);
919    /* t0 = dep(t0, rol32(src, 24), 0xff000000)     = 0000hghe */
920    tcg_out_rlw(s, RLWIMI, t0, src, 24, 0, 7);
921    /* t0 = dep(t0, rol32(src, 24), 0x0000ff00)     = 0000hgfe */
922    tcg_out_rlw(s, RLWIMI, t0, src, 24, 16, 23);
923
924    /* t0 = rol64(t0, 32)                           = hgfe0000 */
925    tcg_out_rld(s, RLDICL, t0, t0, 32, 0);
926    /* t1 = rol64(src, 32)                          = efghabcd */
927    tcg_out_rld(s, RLDICL, t1, src, 32, 0);
928
929    /* t0 = dep(t0, rol32(t1, 24), 0xffffffff)      = hgfebcda */
930    tcg_out_rlw(s, RLWIMI, t0, t1, 8, 0, 31);
931    /* t0 = dep(t0, rol32(t1, 24), 0xff000000)      = hgfedcda */
932    tcg_out_rlw(s, RLWIMI, t0, t1, 24, 0, 7);
933    /* t0 = dep(t0, rol32(t1, 24), 0x0000ff00)      = hgfedcba */
934    tcg_out_rlw(s, RLWIMI, t0, t1, 24, 16, 23);
935
936    tcg_out_mov(s, TCG_TYPE_REG, dst, t0);
937}
938
939/* Emit a move into ret of arg, if it can be done in one insn.  */
940static bool tcg_out_movi_one(TCGContext *s, TCGReg ret, tcg_target_long arg)
941{
942    if (arg == (int16_t)arg) {
943        tcg_out32(s, ADDI | TAI(ret, 0, arg));
944        return true;
945    }
946    if (arg == (int32_t)arg && (arg & 0xffff) == 0) {
947        tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16));
948        return true;
949    }
950    return false;
951}
952
953static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
954                             tcg_target_long arg, bool in_prologue)
955{
956    intptr_t tb_diff;
957    tcg_target_long tmp;
958    int shift;
959
960    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
961
962    if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
963        arg = (int32_t)arg;
964    }
965
966    /* Load 16-bit immediates with one insn.  */
967    if (tcg_out_movi_one(s, ret, arg)) {
968        return;
969    }
970
971    /* Load addresses within the TB with one insn.  */
972    tb_diff = tcg_tbrel_diff(s, (void *)arg);
973    if (!in_prologue && USE_REG_TB && tb_diff == (int16_t)tb_diff) {
974        tcg_out32(s, ADDI | TAI(ret, TCG_REG_TB, tb_diff));
975        return;
976    }
977
978    /* Load 32-bit immediates with two insns.  Note that we've already
979       eliminated bare ADDIS, so we know both insns are required.  */
980    if (TCG_TARGET_REG_BITS == 32 || arg == (int32_t)arg) {
981        tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16));
982        tcg_out32(s, ORI | SAI(ret, ret, arg));
983        return;
984    }
985    if (arg == (uint32_t)arg && !(arg & 0x8000)) {
986        tcg_out32(s, ADDI | TAI(ret, 0, arg));
987        tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
988        return;
989    }
990
991    /* Load masked 16-bit value.  */
992    if (arg > 0 && (arg & 0x8000)) {
993        tmp = arg | 0x7fff;
994        if ((tmp & (tmp + 1)) == 0) {
995            int mb = clz64(tmp + 1) + 1;
996            tcg_out32(s, ADDI | TAI(ret, 0, arg));
997            tcg_out_rld(s, RLDICL, ret, ret, 0, mb);
998            return;
999        }
1000    }
1001
1002    /* Load common masks with 2 insns.  */
1003    shift = ctz64(arg);
1004    tmp = arg >> shift;
1005    if (tmp == (int16_t)tmp) {
1006        tcg_out32(s, ADDI | TAI(ret, 0, tmp));
1007        tcg_out_shli64(s, ret, ret, shift);
1008        return;
1009    }
1010    shift = clz64(arg);
1011    if (tcg_out_movi_one(s, ret, arg << shift)) {
1012        tcg_out_shri64(s, ret, ret, shift);
1013        return;
1014    }
1015
1016    /* Load addresses within 2GB of TB with 2 (or rarely 3) insns.  */
1017    if (!in_prologue && USE_REG_TB && tb_diff == (int32_t)tb_diff) {
1018        tcg_out_mem_long(s, ADDI, ADD, ret, TCG_REG_TB, tb_diff);
1019        return;
1020    }
1021
1022    /* Use the constant pool, if possible.  */
1023    if (!in_prologue && USE_REG_TB) {
1024        new_pool_label(s, arg, R_PPC_ADDR16, s->code_ptr,
1025                       tcg_tbrel_diff(s, NULL));
1026        tcg_out32(s, LD | TAI(ret, TCG_REG_TB, 0));
1027        return;
1028    }
1029
1030    tmp = arg >> 31 >> 1;
1031    tcg_out_movi(s, TCG_TYPE_I32, ret, tmp);
1032    if (tmp) {
1033        tcg_out_shli64(s, ret, ret, 32);
1034    }
1035    if (arg & 0xffff0000) {
1036        tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
1037    }
1038    if (arg & 0xffff) {
1039        tcg_out32(s, ORI | SAI(ret, ret, arg));
1040    }
1041}
1042
1043static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
1044                             TCGReg ret, int64_t val)
1045{
1046    uint32_t load_insn;
1047    int rel, low;
1048    intptr_t add;
1049
1050    switch (vece) {
1051    case MO_8:
1052        low = (int8_t)val;
1053        if (low >= -16 && low < 16) {
1054            tcg_out32(s, VSPLTISB | VRT(ret) | ((val & 31) << 16));
1055            return;
1056        }
1057        if (have_isa_3_00) {
1058            tcg_out32(s, XXSPLTIB | VRT(ret) | ((val & 0xff) << 11));
1059            return;
1060        }
1061        break;
1062
1063    case MO_16:
1064        low = (int16_t)val;
1065        if (low >= -16 && low < 16) {
1066            tcg_out32(s, VSPLTISH | VRT(ret) | ((val & 31) << 16));
1067            return;
1068        }
1069        break;
1070
1071    case MO_32:
1072        low = (int32_t)val;
1073        if (low >= -16 && low < 16) {
1074            tcg_out32(s, VSPLTISW | VRT(ret) | ((val & 31) << 16));
1075            return;
1076        }
1077        break;
1078    }
1079
1080    /*
1081     * Otherwise we must load the value from the constant pool.
1082     */
1083    if (USE_REG_TB) {
1084        rel = R_PPC_ADDR16;
1085        add = tcg_tbrel_diff(s, NULL);
1086    } else {
1087        rel = R_PPC_ADDR32;
1088        add = 0;
1089    }
1090
1091    if (have_vsx) {
1092        load_insn = type == TCG_TYPE_V64 ? LXSDX : LXVDSX;
1093        load_insn |= VRT(ret) | RB(TCG_REG_TMP1);
1094        if (TCG_TARGET_REG_BITS == 64) {
1095            new_pool_label(s, val, rel, s->code_ptr, add);
1096        } else {
1097            new_pool_l2(s, rel, s->code_ptr, add, val >> 32, val);
1098        }
1099    } else {
1100        load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1);
1101        if (TCG_TARGET_REG_BITS == 64) {
1102            new_pool_l2(s, rel, s->code_ptr, add, val, val);
1103        } else {
1104            new_pool_l4(s, rel, s->code_ptr, add,
1105                        val >> 32, val, val >> 32, val);
1106        }
1107    }
1108
1109    if (USE_REG_TB) {
1110        tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, 0, 0));
1111        load_insn |= RA(TCG_REG_TB);
1112    } else {
1113        tcg_out32(s, ADDIS | TAI(TCG_REG_TMP1, 0, 0));
1114        tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0));
1115    }
1116    tcg_out32(s, load_insn);
1117}
1118
1119static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
1120                         tcg_target_long arg)
1121{
1122    switch (type) {
1123    case TCG_TYPE_I32:
1124    case TCG_TYPE_I64:
1125        tcg_debug_assert(ret < TCG_REG_V0);
1126        tcg_out_movi_int(s, type, ret, arg, false);
1127        break;
1128
1129    default:
1130        g_assert_not_reached();
1131    }
1132}
1133
1134static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
1135                             tcg_target_long imm)
1136{
1137    /* This function is only used for passing structs by reference. */
1138    g_assert_not_reached();
1139}
1140
1141static bool mask_operand(uint32_t c, int *mb, int *me)
1142{
1143    uint32_t lsb, test;
1144
1145    /* Accept a bit pattern like:
1146           0....01....1
1147           1....10....0
1148           0..01..10..0
1149       Keep track of the transitions.  */
1150    if (c == 0 || c == -1) {
1151        return false;
1152    }
1153    test = c;
1154    lsb = test & -test;
1155    test += lsb;
1156    if (test & (test - 1)) {
1157        return false;
1158    }
1159
1160    *me = clz32(lsb);
1161    *mb = test ? clz32(test & -test) + 1 : 0;
1162    return true;
1163}
1164
1165static bool mask64_operand(uint64_t c, int *mb, int *me)
1166{
1167    uint64_t lsb;
1168
1169    if (c == 0) {
1170        return false;
1171    }
1172
1173    lsb = c & -c;
1174    /* Accept 1..10..0.  */
1175    if (c == -lsb) {
1176        *mb = 0;
1177        *me = clz64(lsb);
1178        return true;
1179    }
1180    /* Accept 0..01..1.  */
1181    if (lsb == 1 && (c & (c + 1)) == 0) {
1182        *mb = clz64(c + 1) + 1;
1183        *me = 63;
1184        return true;
1185    }
1186    return false;
1187}
1188
1189static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1190{
1191    int mb, me;
1192
1193    if (mask_operand(c, &mb, &me)) {
1194        tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me);
1195    } else if ((c & 0xffff) == c) {
1196        tcg_out32(s, ANDI | SAI(src, dst, c));
1197        return;
1198    } else if ((c & 0xffff0000) == c) {
1199        tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
1200        return;
1201    } else {
1202        tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R0, c);
1203        tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
1204    }
1205}
1206
1207static void tcg_out_andi64(TCGContext *s, TCGReg dst, TCGReg src, uint64_t c)
1208{
1209    int mb, me;
1210
1211    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1212    if (mask64_operand(c, &mb, &me)) {
1213        if (mb == 0) {
1214            tcg_out_rld(s, RLDICR, dst, src, 0, me);
1215        } else {
1216            tcg_out_rld(s, RLDICL, dst, src, 0, mb);
1217        }
1218    } else if ((c & 0xffff) == c) {
1219        tcg_out32(s, ANDI | SAI(src, dst, c));
1220        return;
1221    } else if ((c & 0xffff0000) == c) {
1222        tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
1223        return;
1224    } else {
1225        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, c);
1226        tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
1227    }
1228}
1229
1230static void tcg_out_zori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c,
1231                           int op_lo, int op_hi)
1232{
1233    if (c >> 16) {
1234        tcg_out32(s, op_hi | SAI(src, dst, c >> 16));
1235        src = dst;
1236    }
1237    if (c & 0xffff) {
1238        tcg_out32(s, op_lo | SAI(src, dst, c));
1239        src = dst;
1240    }
1241}
1242
1243static void tcg_out_ori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1244{
1245    tcg_out_zori32(s, dst, src, c, ORI, ORIS);
1246}
1247
1248static void tcg_out_xori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1249{
1250    tcg_out_zori32(s, dst, src, c, XORI, XORIS);
1251}
1252
1253static void tcg_out_b(TCGContext *s, int mask, const tcg_insn_unit *target)
1254{
1255    ptrdiff_t disp = tcg_pcrel_diff(s, target);
1256    if (in_range_b(disp)) {
1257        tcg_out32(s, B | (disp & 0x3fffffc) | mask);
1258    } else {
1259        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, (uintptr_t)target);
1260        tcg_out32(s, MTSPR | RS(TCG_REG_R0) | CTR);
1261        tcg_out32(s, BCCTR | BO_ALWAYS | mask);
1262    }
1263}
1264
1265static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
1266                             TCGReg base, tcg_target_long offset)
1267{
1268    tcg_target_long orig = offset, l0, l1, extra = 0, align = 0;
1269    bool is_int_store = false;
1270    TCGReg rs = TCG_REG_TMP1;
1271
1272    switch (opi) {
1273    case LD: case LWA:
1274        align = 3;
1275        /* FALLTHRU */
1276    default:
1277        if (rt > TCG_REG_R0 && rt < TCG_REG_V0) {
1278            rs = rt;
1279            break;
1280        }
1281        break;
1282    case LXSD:
1283    case STXSD:
1284        align = 3;
1285        break;
1286    case LXV:
1287    case STXV:
1288        align = 15;
1289        break;
1290    case STD:
1291        align = 3;
1292        /* FALLTHRU */
1293    case STB: case STH: case STW:
1294        is_int_store = true;
1295        break;
1296    }
1297
1298    /* For unaligned, or very large offsets, use the indexed form.  */
1299    if (offset & align || offset != (int32_t)offset || opi == 0) {
1300        if (rs == base) {
1301            rs = TCG_REG_R0;
1302        }
1303        tcg_debug_assert(!is_int_store || rs != rt);
1304        tcg_out_movi(s, TCG_TYPE_PTR, rs, orig);
1305        tcg_out32(s, opx | TAB(rt & 31, base, rs));
1306        return;
1307    }
1308
1309    l0 = (int16_t)offset;
1310    offset = (offset - l0) >> 16;
1311    l1 = (int16_t)offset;
1312
1313    if (l1 < 0 && orig >= 0) {
1314        extra = 0x4000;
1315        l1 = (int16_t)(offset - 0x4000);
1316    }
1317    if (l1) {
1318        tcg_out32(s, ADDIS | TAI(rs, base, l1));
1319        base = rs;
1320    }
1321    if (extra) {
1322        tcg_out32(s, ADDIS | TAI(rs, base, extra));
1323        base = rs;
1324    }
1325    if (opi != ADDI || base != rt || l0 != 0) {
1326        tcg_out32(s, opi | TAI(rt & 31, base, l0));
1327    }
1328}
1329
1330static void tcg_out_vsldoi(TCGContext *s, TCGReg ret,
1331                           TCGReg va, TCGReg vb, int shb)
1332{
1333    tcg_out32(s, VSLDOI | VRT(ret) | VRA(va) | VRB(vb) | (shb << 6));
1334}
1335
1336static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1337                       TCGReg base, intptr_t offset)
1338{
1339    int shift;
1340
1341    switch (type) {
1342    case TCG_TYPE_I32:
1343        if (ret < TCG_REG_V0) {
1344            tcg_out_mem_long(s, LWZ, LWZX, ret, base, offset);
1345            break;
1346        }
1347        if (have_isa_2_07 && have_vsx) {
1348            tcg_out_mem_long(s, 0, LXSIWZX, ret, base, offset);
1349            break;
1350        }
1351        tcg_debug_assert((offset & 3) == 0);
1352        tcg_out_mem_long(s, 0, LVEWX, ret, base, offset);
1353        shift = (offset - 4) & 0xc;
1354        if (shift) {
1355            tcg_out_vsldoi(s, ret, ret, ret, shift);
1356        }
1357        break;
1358    case TCG_TYPE_I64:
1359        if (ret < TCG_REG_V0) {
1360            tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1361            tcg_out_mem_long(s, LD, LDX, ret, base, offset);
1362            break;
1363        }
1364        /* fallthru */
1365    case TCG_TYPE_V64:
1366        tcg_debug_assert(ret >= TCG_REG_V0);
1367        if (have_vsx) {
1368            tcg_out_mem_long(s, have_isa_3_00 ? LXSD : 0, LXSDX,
1369                             ret, base, offset);
1370            break;
1371        }
1372        tcg_debug_assert((offset & 7) == 0);
1373        tcg_out_mem_long(s, 0, LVX, ret, base, offset & -16);
1374        if (offset & 8) {
1375            tcg_out_vsldoi(s, ret, ret, ret, 8);
1376        }
1377        break;
1378    case TCG_TYPE_V128:
1379        tcg_debug_assert(ret >= TCG_REG_V0);
1380        tcg_debug_assert((offset & 15) == 0);
1381        tcg_out_mem_long(s, have_isa_3_00 ? LXV : 0,
1382                         LVX, ret, base, offset);
1383        break;
1384    default:
1385        g_assert_not_reached();
1386    }
1387}
1388
1389static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
1390                              TCGReg base, intptr_t offset)
1391{
1392    int shift;
1393
1394    switch (type) {
1395    case TCG_TYPE_I32:
1396        if (arg < TCG_REG_V0) {
1397            tcg_out_mem_long(s, STW, STWX, arg, base, offset);
1398            break;
1399        }
1400        if (have_isa_2_07 && have_vsx) {
1401            tcg_out_mem_long(s, 0, STXSIWX, arg, base, offset);
1402            break;
1403        }
1404        assert((offset & 3) == 0);
1405        tcg_debug_assert((offset & 3) == 0);
1406        shift = (offset - 4) & 0xc;
1407        if (shift) {
1408            tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, shift);
1409            arg = TCG_VEC_TMP1;
1410        }
1411        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
1412        break;
1413    case TCG_TYPE_I64:
1414        if (arg < TCG_REG_V0) {
1415            tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1416            tcg_out_mem_long(s, STD, STDX, arg, base, offset);
1417            break;
1418        }
1419        /* fallthru */
1420    case TCG_TYPE_V64:
1421        tcg_debug_assert(arg >= TCG_REG_V0);
1422        if (have_vsx) {
1423            tcg_out_mem_long(s, have_isa_3_00 ? STXSD : 0,
1424                             STXSDX, arg, base, offset);
1425            break;
1426        }
1427        tcg_debug_assert((offset & 7) == 0);
1428        if (offset & 8) {
1429            tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, 8);
1430            arg = TCG_VEC_TMP1;
1431        }
1432        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
1433        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset + 4);
1434        break;
1435    case TCG_TYPE_V128:
1436        tcg_debug_assert(arg >= TCG_REG_V0);
1437        tcg_out_mem_long(s, have_isa_3_00 ? STXV : 0,
1438                         STVX, arg, base, offset);
1439        break;
1440    default:
1441        g_assert_not_reached();
1442    }
1443}
1444
1445static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1446                               TCGReg base, intptr_t ofs)
1447{
1448    return false;
1449}
1450
1451static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2,
1452                        int const_arg2, int cr, TCGType type)
1453{
1454    int imm;
1455    uint32_t op;
1456
1457    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1458
1459    /* Simplify the comparisons below wrt CMPI.  */
1460    if (type == TCG_TYPE_I32) {
1461        arg2 = (int32_t)arg2;
1462    }
1463
1464    switch (cond) {
1465    case TCG_COND_EQ:
1466    case TCG_COND_NE:
1467        if (const_arg2) {
1468            if ((int16_t) arg2 == arg2) {
1469                op = CMPI;
1470                imm = 1;
1471                break;
1472            } else if ((uint16_t) arg2 == arg2) {
1473                op = CMPLI;
1474                imm = 1;
1475                break;
1476            }
1477        }
1478        op = CMPL;
1479        imm = 0;
1480        break;
1481
1482    case TCG_COND_LT:
1483    case TCG_COND_GE:
1484    case TCG_COND_LE:
1485    case TCG_COND_GT:
1486        if (const_arg2) {
1487            if ((int16_t) arg2 == arg2) {
1488                op = CMPI;
1489                imm = 1;
1490                break;
1491            }
1492        }
1493        op = CMP;
1494        imm = 0;
1495        break;
1496
1497    case TCG_COND_LTU:
1498    case TCG_COND_GEU:
1499    case TCG_COND_LEU:
1500    case TCG_COND_GTU:
1501        if (const_arg2) {
1502            if ((uint16_t) arg2 == arg2) {
1503                op = CMPLI;
1504                imm = 1;
1505                break;
1506            }
1507        }
1508        op = CMPL;
1509        imm = 0;
1510        break;
1511
1512    default:
1513        tcg_abort();
1514    }
1515    op |= BF(cr) | ((type == TCG_TYPE_I64) << 21);
1516
1517    if (imm) {
1518        tcg_out32(s, op | RA(arg1) | (arg2 & 0xffff));
1519    } else {
1520        if (const_arg2) {
1521            tcg_out_movi(s, type, TCG_REG_R0, arg2);
1522            arg2 = TCG_REG_R0;
1523        }
1524        tcg_out32(s, op | RA(arg1) | RB(arg2));
1525    }
1526}
1527
1528static void tcg_out_setcond_eq0(TCGContext *s, TCGType type,
1529                                TCGReg dst, TCGReg src)
1530{
1531    if (type == TCG_TYPE_I32) {
1532        tcg_out32(s, CNTLZW | RS(src) | RA(dst));
1533        tcg_out_shri32(s, dst, dst, 5);
1534    } else {
1535        tcg_out32(s, CNTLZD | RS(src) | RA(dst));
1536        tcg_out_shri64(s, dst, dst, 6);
1537    }
1538}
1539
1540static void tcg_out_setcond_ne0(TCGContext *s, TCGReg dst, TCGReg src)
1541{
1542    /* X != 0 implies X + -1 generates a carry.  Extra addition
1543       trickery means: R = X-1 + ~X + C = X-1 + (-X+1) + C = C.  */
1544    if (dst != src) {
1545        tcg_out32(s, ADDIC | TAI(dst, src, -1));
1546        tcg_out32(s, SUBFE | TAB(dst, dst, src));
1547    } else {
1548        tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1));
1549        tcg_out32(s, SUBFE | TAB(dst, TCG_REG_R0, src));
1550    }
1551}
1552
1553static TCGReg tcg_gen_setcond_xor(TCGContext *s, TCGReg arg1, TCGArg arg2,
1554                                  bool const_arg2)
1555{
1556    if (const_arg2) {
1557        if ((uint32_t)arg2 == arg2) {
1558            tcg_out_xori32(s, TCG_REG_R0, arg1, arg2);
1559        } else {
1560            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, arg2);
1561            tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, TCG_REG_R0));
1562        }
1563    } else {
1564        tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, arg2));
1565    }
1566    return TCG_REG_R0;
1567}
1568
1569static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond,
1570                            TCGArg arg0, TCGArg arg1, TCGArg arg2,
1571                            int const_arg2)
1572{
1573    int crop, sh;
1574
1575    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1576
1577    /* Ignore high bits of a potential constant arg2.  */
1578    if (type == TCG_TYPE_I32) {
1579        arg2 = (uint32_t)arg2;
1580    }
1581
1582    /* Handle common and trivial cases before handling anything else.  */
1583    if (arg2 == 0) {
1584        switch (cond) {
1585        case TCG_COND_EQ:
1586            tcg_out_setcond_eq0(s, type, arg0, arg1);
1587            return;
1588        case TCG_COND_NE:
1589            if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
1590                tcg_out_ext32u(s, TCG_REG_R0, arg1);
1591                arg1 = TCG_REG_R0;
1592            }
1593            tcg_out_setcond_ne0(s, arg0, arg1);
1594            return;
1595        case TCG_COND_GE:
1596            tcg_out32(s, NOR | SAB(arg1, arg0, arg1));
1597            arg1 = arg0;
1598            /* FALLTHRU */
1599        case TCG_COND_LT:
1600            /* Extract the sign bit.  */
1601            if (type == TCG_TYPE_I32) {
1602                tcg_out_shri32(s, arg0, arg1, 31);
1603            } else {
1604                tcg_out_shri64(s, arg0, arg1, 63);
1605            }
1606            return;
1607        default:
1608            break;
1609        }
1610    }
1611
1612    /* If we have ISEL, we can implement everything with 3 or 4 insns.
1613       All other cases below are also at least 3 insns, so speed up the
1614       code generator by not considering them and always using ISEL.  */
1615    if (have_isel) {
1616        int isel, tab;
1617
1618        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
1619
1620        isel = tcg_to_isel[cond];
1621
1622        tcg_out_movi(s, type, arg0, 1);
1623        if (isel & 1) {
1624            /* arg0 = (bc ? 0 : 1) */
1625            tab = TAB(arg0, 0, arg0);
1626            isel &= ~1;
1627        } else {
1628            /* arg0 = (bc ? 1 : 0) */
1629            tcg_out_movi(s, type, TCG_REG_R0, 0);
1630            tab = TAB(arg0, arg0, TCG_REG_R0);
1631        }
1632        tcg_out32(s, isel | tab);
1633        return;
1634    }
1635
1636    switch (cond) {
1637    case TCG_COND_EQ:
1638        arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
1639        tcg_out_setcond_eq0(s, type, arg0, arg1);
1640        return;
1641
1642    case TCG_COND_NE:
1643        arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
1644        /* Discard the high bits only once, rather than both inputs.  */
1645        if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
1646            tcg_out_ext32u(s, TCG_REG_R0, arg1);
1647            arg1 = TCG_REG_R0;
1648        }
1649        tcg_out_setcond_ne0(s, arg0, arg1);
1650        return;
1651
1652    case TCG_COND_GT:
1653    case TCG_COND_GTU:
1654        sh = 30;
1655        crop = 0;
1656        goto crtest;
1657
1658    case TCG_COND_LT:
1659    case TCG_COND_LTU:
1660        sh = 29;
1661        crop = 0;
1662        goto crtest;
1663
1664    case TCG_COND_GE:
1665    case TCG_COND_GEU:
1666        sh = 31;
1667        crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_LT) | BB(7, CR_LT);
1668        goto crtest;
1669
1670    case TCG_COND_LE:
1671    case TCG_COND_LEU:
1672        sh = 31;
1673        crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_GT) | BB(7, CR_GT);
1674    crtest:
1675        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
1676        if (crop) {
1677            tcg_out32(s, crop);
1678        }
1679        tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7));
1680        tcg_out_rlw(s, RLWINM, arg0, TCG_REG_R0, sh, 31, 31);
1681        break;
1682
1683    default:
1684        tcg_abort();
1685    }
1686}
1687
1688static void tcg_out_bc(TCGContext *s, int bc, TCGLabel *l)
1689{
1690    if (l->has_value) {
1691        bc |= reloc_pc14_val(tcg_splitwx_to_rx(s->code_ptr), l->u.value_ptr);
1692    } else {
1693        tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, l, 0);
1694    }
1695    tcg_out32(s, bc);
1696}
1697
1698static void tcg_out_brcond(TCGContext *s, TCGCond cond,
1699                           TCGArg arg1, TCGArg arg2, int const_arg2,
1700                           TCGLabel *l, TCGType type)
1701{
1702    tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
1703    tcg_out_bc(s, tcg_to_bc[cond], l);
1704}
1705
1706static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond,
1707                            TCGArg dest, TCGArg c1, TCGArg c2, TCGArg v1,
1708                            TCGArg v2, bool const_c2)
1709{
1710    /* If for some reason both inputs are zero, don't produce bad code.  */
1711    if (v1 == 0 && v2 == 0) {
1712        tcg_out_movi(s, type, dest, 0);
1713        return;
1714    }
1715
1716    tcg_out_cmp(s, cond, c1, c2, const_c2, 7, type);
1717
1718    if (have_isel) {
1719        int isel = tcg_to_isel[cond];
1720
1721        /* Swap the V operands if the operation indicates inversion.  */
1722        if (isel & 1) {
1723            int t = v1;
1724            v1 = v2;
1725            v2 = t;
1726            isel &= ~1;
1727        }
1728        /* V1 == 0 is handled by isel; V2 == 0 must be handled by hand.  */
1729        if (v2 == 0) {
1730            tcg_out_movi(s, type, TCG_REG_R0, 0);
1731        }
1732        tcg_out32(s, isel | TAB(dest, v1, v2));
1733    } else {
1734        if (dest == v2) {
1735            cond = tcg_invert_cond(cond);
1736            v2 = v1;
1737        } else if (dest != v1) {
1738            if (v1 == 0) {
1739                tcg_out_movi(s, type, dest, 0);
1740            } else {
1741                tcg_out_mov(s, type, dest, v1);
1742            }
1743        }
1744        /* Branch forward over one insn */
1745        tcg_out32(s, tcg_to_bc[cond] | 8);
1746        if (v2 == 0) {
1747            tcg_out_movi(s, type, dest, 0);
1748        } else {
1749            tcg_out_mov(s, type, dest, v2);
1750        }
1751    }
1752}
1753
1754static void tcg_out_cntxz(TCGContext *s, TCGType type, uint32_t opc,
1755                          TCGArg a0, TCGArg a1, TCGArg a2, bool const_a2)
1756{
1757    if (const_a2 && a2 == (type == TCG_TYPE_I32 ? 32 : 64)) {
1758        tcg_out32(s, opc | RA(a0) | RS(a1));
1759    } else {
1760        tcg_out_cmp(s, TCG_COND_EQ, a1, 0, 1, 7, type);
1761        /* Note that the only other valid constant for a2 is 0.  */
1762        if (have_isel) {
1763            tcg_out32(s, opc | RA(TCG_REG_R0) | RS(a1));
1764            tcg_out32(s, tcg_to_isel[TCG_COND_EQ] | TAB(a0, a2, TCG_REG_R0));
1765        } else if (!const_a2 && a0 == a2) {
1766            tcg_out32(s, tcg_to_bc[TCG_COND_EQ] | 8);
1767            tcg_out32(s, opc | RA(a0) | RS(a1));
1768        } else {
1769            tcg_out32(s, opc | RA(a0) | RS(a1));
1770            tcg_out32(s, tcg_to_bc[TCG_COND_NE] | 8);
1771            if (const_a2) {
1772                tcg_out_movi(s, type, a0, 0);
1773            } else {
1774                tcg_out_mov(s, type, a0, a2);
1775            }
1776        }
1777    }
1778}
1779
1780static void tcg_out_cmp2(TCGContext *s, const TCGArg *args,
1781                         const int *const_args)
1782{
1783    static const struct { uint8_t bit1, bit2; } bits[] = {
1784        [TCG_COND_LT ] = { CR_LT, CR_LT },
1785        [TCG_COND_LE ] = { CR_LT, CR_GT },
1786        [TCG_COND_GT ] = { CR_GT, CR_GT },
1787        [TCG_COND_GE ] = { CR_GT, CR_LT },
1788        [TCG_COND_LTU] = { CR_LT, CR_LT },
1789        [TCG_COND_LEU] = { CR_LT, CR_GT },
1790        [TCG_COND_GTU] = { CR_GT, CR_GT },
1791        [TCG_COND_GEU] = { CR_GT, CR_LT },
1792    };
1793
1794    TCGCond cond = args[4], cond2;
1795    TCGArg al, ah, bl, bh;
1796    int blconst, bhconst;
1797    int op, bit1, bit2;
1798
1799    al = args[0];
1800    ah = args[1];
1801    bl = args[2];
1802    bh = args[3];
1803    blconst = const_args[2];
1804    bhconst = const_args[3];
1805
1806    switch (cond) {
1807    case TCG_COND_EQ:
1808        op = CRAND;
1809        goto do_equality;
1810    case TCG_COND_NE:
1811        op = CRNAND;
1812    do_equality:
1813        tcg_out_cmp(s, cond, al, bl, blconst, 6, TCG_TYPE_I32);
1814        tcg_out_cmp(s, cond, ah, bh, bhconst, 7, TCG_TYPE_I32);
1815        tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
1816        break;
1817
1818    case TCG_COND_LT:
1819    case TCG_COND_LE:
1820    case TCG_COND_GT:
1821    case TCG_COND_GE:
1822    case TCG_COND_LTU:
1823    case TCG_COND_LEU:
1824    case TCG_COND_GTU:
1825    case TCG_COND_GEU:
1826        bit1 = bits[cond].bit1;
1827        bit2 = bits[cond].bit2;
1828        op = (bit1 != bit2 ? CRANDC : CRAND);
1829        cond2 = tcg_unsigned_cond(cond);
1830
1831        tcg_out_cmp(s, cond, ah, bh, bhconst, 6, TCG_TYPE_I32);
1832        tcg_out_cmp(s, cond2, al, bl, blconst, 7, TCG_TYPE_I32);
1833        tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, bit2));
1834        tcg_out32(s, CROR | BT(7, CR_EQ) | BA(6, bit1) | BB(7, CR_EQ));
1835        break;
1836
1837    default:
1838        tcg_abort();
1839    }
1840}
1841
1842static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
1843                             const int *const_args)
1844{
1845    tcg_out_cmp2(s, args + 1, const_args + 1);
1846    tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7));
1847    tcg_out_rlw(s, RLWINM, args[0], TCG_REG_R0, 31, 31, 31);
1848}
1849
1850static void tcg_out_brcond2 (TCGContext *s, const TCGArg *args,
1851                             const int *const_args)
1852{
1853    tcg_out_cmp2(s, args, const_args);
1854    tcg_out_bc(s, BC | BI(7, CR_EQ) | BO_COND_TRUE, arg_label(args[5]));
1855}
1856
1857static void tcg_out_mb(TCGContext *s, TCGArg a0)
1858{
1859    uint32_t insn;
1860
1861    if (a0 & TCG_MO_ST_LD) {
1862        insn = HWSYNC;
1863    } else {
1864        insn = LWSYNC;
1865    }
1866
1867    tcg_out32(s, insn);
1868}
1869
1870static void tcg_out_call_int(TCGContext *s, int lk,
1871                             const tcg_insn_unit *target)
1872{
1873#ifdef _CALL_AIX
1874    /* Look through the descriptor.  If the branch is in range, and we
1875       don't have to spend too much effort on building the toc.  */
1876    const void *tgt = ((const void * const *)target)[0];
1877    uintptr_t toc = ((const uintptr_t *)target)[1];
1878    intptr_t diff = tcg_pcrel_diff(s, tgt);
1879
1880    if (in_range_b(diff) && toc == (uint32_t)toc) {
1881        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, toc);
1882        tcg_out_b(s, lk, tgt);
1883    } else {
1884        /* Fold the low bits of the constant into the addresses below.  */
1885        intptr_t arg = (intptr_t)target;
1886        int ofs = (int16_t)arg;
1887
1888        if (ofs + 8 < 0x8000) {
1889            arg -= ofs;
1890        } else {
1891            ofs = 0;
1892        }
1893        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, arg);
1894        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_TMP1, ofs);
1895        tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR);
1896        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_REG_TMP1, ofs + SZP);
1897        tcg_out32(s, BCCTR | BO_ALWAYS | lk);
1898    }
1899#elif defined(_CALL_ELF) && _CALL_ELF == 2
1900    intptr_t diff;
1901
1902    /* In the ELFv2 ABI, we have to set up r12 to contain the destination
1903       address, which the callee uses to compute its TOC address.  */
1904    /* FIXME: when the branch is in range, we could avoid r12 load if we
1905       knew that the destination uses the same TOC, and what its local
1906       entry point offset is.  */
1907    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R12, (intptr_t)target);
1908
1909    diff = tcg_pcrel_diff(s, target);
1910    if (in_range_b(diff)) {
1911        tcg_out_b(s, lk, target);
1912    } else {
1913        tcg_out32(s, MTSPR | RS(TCG_REG_R12) | CTR);
1914        tcg_out32(s, BCCTR | BO_ALWAYS | lk);
1915    }
1916#else
1917    tcg_out_b(s, lk, target);
1918#endif
1919}
1920
1921static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
1922                         const TCGHelperInfo *info)
1923{
1924    tcg_out_call_int(s, LK, target);
1925}
1926
1927static const uint32_t qemu_ldx_opc[(MO_SSIZE + MO_BSWAP) + 1] = {
1928    [MO_UB] = LBZX,
1929    [MO_UW] = LHZX,
1930    [MO_UL] = LWZX,
1931    [MO_UQ] = LDX,
1932    [MO_SW] = LHAX,
1933    [MO_SL] = LWAX,
1934    [MO_BSWAP | MO_UB] = LBZX,
1935    [MO_BSWAP | MO_UW] = LHBRX,
1936    [MO_BSWAP | MO_UL] = LWBRX,
1937    [MO_BSWAP | MO_UQ] = LDBRX,
1938};
1939
1940static const uint32_t qemu_stx_opc[(MO_SIZE + MO_BSWAP) + 1] = {
1941    [MO_UB] = STBX,
1942    [MO_UW] = STHX,
1943    [MO_UL] = STWX,
1944    [MO_UQ] = STDX,
1945    [MO_BSWAP | MO_UB] = STBX,
1946    [MO_BSWAP | MO_UW] = STHBRX,
1947    [MO_BSWAP | MO_UL] = STWBRX,
1948    [MO_BSWAP | MO_UQ] = STDBRX,
1949};
1950
1951static const uint32_t qemu_exts_opc[4] = {
1952    EXTSB, EXTSH, EXTSW, 0
1953};
1954
1955#if defined (CONFIG_SOFTMMU)
1956/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
1957 *                                 int mmu_idx, uintptr_t ra)
1958 */
1959static void * const qemu_ld_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
1960    [MO_UB]   = helper_ret_ldub_mmu,
1961    [MO_LEUW] = helper_le_lduw_mmu,
1962    [MO_LEUL] = helper_le_ldul_mmu,
1963    [MO_LEUQ] = helper_le_ldq_mmu,
1964    [MO_BEUW] = helper_be_lduw_mmu,
1965    [MO_BEUL] = helper_be_ldul_mmu,
1966    [MO_BEUQ] = helper_be_ldq_mmu,
1967};
1968
1969/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
1970 *                                 uintxx_t val, int mmu_idx, uintptr_t ra)
1971 */
1972static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
1973    [MO_UB]   = helper_ret_stb_mmu,
1974    [MO_LEUW] = helper_le_stw_mmu,
1975    [MO_LEUL] = helper_le_stl_mmu,
1976    [MO_LEUQ] = helper_le_stq_mmu,
1977    [MO_BEUW] = helper_be_stw_mmu,
1978    [MO_BEUL] = helper_be_stl_mmu,
1979    [MO_BEUQ] = helper_be_stq_mmu,
1980};
1981
1982/* We expect to use a 16-bit negative offset from ENV.  */
1983QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1984QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -32768);
1985
1986/* Perform the TLB load and compare.  Places the result of the comparison
1987   in CR7, loads the addend of the TLB into R3, and returns the register
1988   containing the guest address (zero-extended into R4).  Clobbers R0 and R2. */
1989
1990static TCGReg tcg_out_tlb_read(TCGContext *s, MemOp opc,
1991                               TCGReg addrlo, TCGReg addrhi,
1992                               int mem_index, bool is_read)
1993{
1994    int cmp_off
1995        = (is_read
1996           ? offsetof(CPUTLBEntry, addr_read)
1997           : offsetof(CPUTLBEntry, addr_write));
1998    int fast_off = TLB_MASK_TABLE_OFS(mem_index);
1999    int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
2000    int table_off = fast_off + offsetof(CPUTLBDescFast, table);
2001    unsigned s_bits = opc & MO_SIZE;
2002    unsigned a_bits = get_alignment_bits(opc);
2003
2004    /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx].  */
2005    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_AREG0, mask_off);
2006    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R4, TCG_AREG0, table_off);
2007
2008    /* Extract the page index, shifted into place for tlb index.  */
2009    if (TCG_TARGET_REG_BITS == 32) {
2010        tcg_out_shri32(s, TCG_REG_TMP1, addrlo,
2011                       TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
2012    } else {
2013        tcg_out_shri64(s, TCG_REG_TMP1, addrlo,
2014                       TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
2015    }
2016    tcg_out32(s, AND | SAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_TMP1));
2017
2018    /* Load the TLB comparator.  */
2019    if (cmp_off == 0 && TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
2020        uint32_t lxu = (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32
2021                        ? LWZUX : LDUX);
2022        tcg_out32(s, lxu | TAB(TCG_REG_TMP1, TCG_REG_R3, TCG_REG_R4));
2023    } else {
2024        tcg_out32(s, ADD | TAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_R4));
2025        if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
2026            tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP1, TCG_REG_R3, cmp_off + 4);
2027            tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R4, TCG_REG_R3, cmp_off);
2028        } else {
2029            tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP1, TCG_REG_R3, cmp_off);
2030        }
2031    }
2032
2033    /* Load the TLB addend for use on the fast path.  Do this asap
2034       to minimize any load use delay.  */
2035    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_REG_R3,
2036               offsetof(CPUTLBEntry, addend));
2037
2038    /* Clear the non-page, non-alignment bits from the address */
2039    if (TCG_TARGET_REG_BITS == 32) {
2040        /* We don't support unaligned accesses on 32-bits.
2041         * Preserve the bottom bits and thus trigger a comparison
2042         * failure on unaligned accesses.
2043         */
2044        if (a_bits < s_bits) {
2045            a_bits = s_bits;
2046        }
2047        tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0,
2048                    (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
2049    } else {
2050        TCGReg t = addrlo;
2051
2052        /* If the access is unaligned, we need to make sure we fail if we
2053         * cross a page boundary.  The trick is to add the access size-1
2054         * to the address before masking the low bits.  That will make the
2055         * address overflow to the next page if we cross a page boundary,
2056         * which will then force a mismatch of the TLB compare.
2057         */
2058        if (a_bits < s_bits) {
2059            unsigned a_mask = (1 << a_bits) - 1;
2060            unsigned s_mask = (1 << s_bits) - 1;
2061            tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask));
2062            t = TCG_REG_R0;
2063        }
2064
2065        /* Mask the address for the requested alignment.  */
2066        if (TARGET_LONG_BITS == 32) {
2067            tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0,
2068                        (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
2069            /* Zero-extend the address for use in the final address.  */
2070            tcg_out_ext32u(s, TCG_REG_R4, addrlo);
2071            addrlo = TCG_REG_R4;
2072        } else if (a_bits == 0) {
2073            tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - TARGET_PAGE_BITS);
2074        } else {
2075            tcg_out_rld(s, RLDICL, TCG_REG_R0, t,
2076                        64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - a_bits);
2077            tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0);
2078        }
2079    }
2080
2081    if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
2082        tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1,
2083                    0, 7, TCG_TYPE_I32);
2084        tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_R4, 0, 6, TCG_TYPE_I32);
2085        tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
2086    } else {
2087        tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1,
2088                    0, 7, TCG_TYPE_TL);
2089    }
2090
2091    return addrlo;
2092}
2093
2094/* Record the context of a call to the out of line helper code for the slow
2095   path for a load or store, so that we can later generate the correct
2096   helper code.  */
2097static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
2098                                TCGReg datalo_reg, TCGReg datahi_reg,
2099                                TCGReg addrlo_reg, TCGReg addrhi_reg,
2100                                tcg_insn_unit *raddr, tcg_insn_unit *lptr)
2101{
2102    TCGLabelQemuLdst *label = new_ldst_label(s);
2103
2104    label->is_ld = is_ld;
2105    label->oi = oi;
2106    label->datalo_reg = datalo_reg;
2107    label->datahi_reg = datahi_reg;
2108    label->addrlo_reg = addrlo_reg;
2109    label->addrhi_reg = addrhi_reg;
2110    label->raddr = tcg_splitwx_to_rx(raddr);
2111    label->label_ptr[0] = lptr;
2112}
2113
2114static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
2115{
2116    MemOpIdx oi = lb->oi;
2117    MemOp opc = get_memop(oi);
2118    TCGReg hi, lo, arg = TCG_REG_R3;
2119
2120    if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
2121        return false;
2122    }
2123
2124    tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0);
2125
2126    lo = lb->addrlo_reg;
2127    hi = lb->addrhi_reg;
2128    if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
2129        arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN);
2130        tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
2131        tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
2132    } else {
2133        /* If the address needed to be zero-extended, we'll have already
2134           placed it in R4.  The only remaining case is 64-bit guest.  */
2135        tcg_out_mov(s, TCG_TYPE_TL, arg++, lo);
2136    }
2137
2138    tcg_out_movi(s, TCG_TYPE_I32, arg++, oi);
2139    tcg_out32(s, MFSPR | RT(arg) | LR);
2140
2141    tcg_out_call_int(s, LK, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
2142
2143    lo = lb->datalo_reg;
2144    hi = lb->datahi_reg;
2145    if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
2146        tcg_out_mov(s, TCG_TYPE_I32, lo, TCG_REG_R4);
2147        tcg_out_mov(s, TCG_TYPE_I32, hi, TCG_REG_R3);
2148    } else if (opc & MO_SIGN) {
2149        uint32_t insn = qemu_exts_opc[opc & MO_SIZE];
2150        tcg_out32(s, insn | RA(lo) | RS(TCG_REG_R3));
2151    } else {
2152        tcg_out_mov(s, TCG_TYPE_REG, lo, TCG_REG_R3);
2153    }
2154
2155    tcg_out_b(s, 0, lb->raddr);
2156    return true;
2157}
2158
2159static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
2160{
2161    MemOpIdx oi = lb->oi;
2162    MemOp opc = get_memop(oi);
2163    MemOp s_bits = opc & MO_SIZE;
2164    TCGReg hi, lo, arg = TCG_REG_R3;
2165
2166    if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
2167        return false;
2168    }
2169
2170    tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0);
2171
2172    lo = lb->addrlo_reg;
2173    hi = lb->addrhi_reg;
2174    if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
2175        arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN);
2176        tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
2177        tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
2178    } else {
2179        /* If the address needed to be zero-extended, we'll have already
2180           placed it in R4.  The only remaining case is 64-bit guest.  */
2181        tcg_out_mov(s, TCG_TYPE_TL, arg++, lo);
2182    }
2183
2184    lo = lb->datalo_reg;
2185    hi = lb->datahi_reg;
2186    if (TCG_TARGET_REG_BITS == 32) {
2187        switch (s_bits) {
2188        case MO_64:
2189            arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN);
2190            tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
2191            /* FALLTHRU */
2192        case MO_32:
2193            tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
2194            break;
2195        default:
2196            tcg_out_rlw(s, RLWINM, arg++, lo, 0, 32 - (8 << s_bits), 31);
2197            break;
2198        }
2199    } else {
2200        if (s_bits == MO_64) {
2201            tcg_out_mov(s, TCG_TYPE_I64, arg++, lo);
2202        } else {
2203            tcg_out_rld(s, RLDICL, arg++, lo, 0, 64 - (8 << s_bits));
2204        }
2205    }
2206
2207    tcg_out_movi(s, TCG_TYPE_I32, arg++, oi);
2208    tcg_out32(s, MFSPR | RT(arg) | LR);
2209
2210    tcg_out_call_int(s, LK, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
2211
2212    tcg_out_b(s, 0, lb->raddr);
2213    return true;
2214}
2215#else
2216
2217static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addrlo,
2218                                   TCGReg addrhi, unsigned a_bits)
2219{
2220    unsigned a_mask = (1 << a_bits) - 1;
2221    TCGLabelQemuLdst *label = new_ldst_label(s);
2222
2223    label->is_ld = is_ld;
2224    label->addrlo_reg = addrlo;
2225    label->addrhi_reg = addrhi;
2226
2227    /* We are expecting a_bits to max out at 7, much lower than ANDI. */
2228    tcg_debug_assert(a_bits < 16);
2229    tcg_out32(s, ANDI | SAI(addrlo, TCG_REG_R0, a_mask));
2230
2231    label->label_ptr[0] = s->code_ptr;
2232    tcg_out32(s, BC | BI(0, CR_EQ) | BO_COND_FALSE | LK);
2233
2234    label->raddr = tcg_splitwx_to_rx(s->code_ptr);
2235}
2236
2237static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
2238{
2239    if (!reloc_pc14(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
2240        return false;
2241    }
2242
2243    if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
2244        TCGReg arg = TCG_REG_R4;
2245
2246        arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN);
2247        if (l->addrlo_reg != arg) {
2248            tcg_out_mov(s, TCG_TYPE_I32, arg, l->addrhi_reg);
2249            tcg_out_mov(s, TCG_TYPE_I32, arg + 1, l->addrlo_reg);
2250        } else if (l->addrhi_reg != arg + 1) {
2251            tcg_out_mov(s, TCG_TYPE_I32, arg + 1, l->addrlo_reg);
2252            tcg_out_mov(s, TCG_TYPE_I32, arg, l->addrhi_reg);
2253        } else {
2254            tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R0, arg);
2255            tcg_out_mov(s, TCG_TYPE_I32, arg, arg + 1);
2256            tcg_out_mov(s, TCG_TYPE_I32, arg + 1, TCG_REG_R0);
2257        }
2258    } else {
2259        tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_R4, l->addrlo_reg);
2260    }
2261    tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_R3, TCG_AREG0);
2262
2263    /* "Tail call" to the helper, with the return address back inline. */
2264    tcg_out_call_int(s, 0, (const void *)(l->is_ld ? helper_unaligned_ld
2265                                          : helper_unaligned_st));
2266    return true;
2267}
2268
2269static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
2270{
2271    return tcg_out_fail_alignment(s, l);
2272}
2273
2274static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
2275{
2276    return tcg_out_fail_alignment(s, l);
2277}
2278
2279#endif /* SOFTMMU */
2280
2281static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
2282{
2283    TCGReg datalo, datahi, addrlo, rbase;
2284    TCGReg addrhi __attribute__((unused));
2285    MemOpIdx oi;
2286    MemOp opc, s_bits;
2287#ifdef CONFIG_SOFTMMU
2288    int mem_index;
2289    tcg_insn_unit *label_ptr;
2290#else
2291    unsigned a_bits;
2292#endif
2293
2294    datalo = *args++;
2295    datahi = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
2296    addrlo = *args++;
2297    addrhi = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0);
2298    oi = *args++;
2299    opc = get_memop(oi);
2300    s_bits = opc & MO_SIZE;
2301
2302#ifdef CONFIG_SOFTMMU
2303    mem_index = get_mmuidx(oi);
2304    addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, mem_index, true);
2305
2306    /* Load a pointer into the current opcode w/conditional branch-link. */
2307    label_ptr = s->code_ptr;
2308    tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
2309
2310    rbase = TCG_REG_R3;
2311#else  /* !CONFIG_SOFTMMU */
2312    a_bits = get_alignment_bits(opc);
2313    if (a_bits) {
2314        tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
2315    }
2316    rbase = guest_base ? TCG_GUEST_BASE_REG : 0;
2317    if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
2318        tcg_out_ext32u(s, TCG_REG_TMP1, addrlo);
2319        addrlo = TCG_REG_TMP1;
2320    }
2321#endif
2322
2323    if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) {
2324        if (opc & MO_BSWAP) {
2325            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
2326            tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo));
2327            tcg_out32(s, LWBRX | TAB(datahi, rbase, TCG_REG_R0));
2328        } else if (rbase != 0) {
2329            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
2330            tcg_out32(s, LWZX | TAB(datahi, rbase, addrlo));
2331            tcg_out32(s, LWZX | TAB(datalo, rbase, TCG_REG_R0));
2332        } else if (addrlo == datahi) {
2333            tcg_out32(s, LWZ | TAI(datalo, addrlo, 4));
2334            tcg_out32(s, LWZ | TAI(datahi, addrlo, 0));
2335        } else {
2336            tcg_out32(s, LWZ | TAI(datahi, addrlo, 0));
2337            tcg_out32(s, LWZ | TAI(datalo, addrlo, 4));
2338        }
2339    } else {
2340        uint32_t insn = qemu_ldx_opc[opc & (MO_BSWAP | MO_SSIZE)];
2341        if (!have_isa_2_06 && insn == LDBRX) {
2342            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
2343            tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo));
2344            tcg_out32(s, LWBRX | TAB(TCG_REG_R0, rbase, TCG_REG_R0));
2345            tcg_out_rld(s, RLDIMI, datalo, TCG_REG_R0, 32, 0);
2346        } else if (insn) {
2347            tcg_out32(s, insn | TAB(datalo, rbase, addrlo));
2348        } else {
2349            insn = qemu_ldx_opc[opc & (MO_SIZE | MO_BSWAP)];
2350            tcg_out32(s, insn | TAB(datalo, rbase, addrlo));
2351            insn = qemu_exts_opc[s_bits];
2352            tcg_out32(s, insn | RA(datalo) | RS(datalo));
2353        }
2354    }
2355
2356#ifdef CONFIG_SOFTMMU
2357    add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
2358                        s->code_ptr, label_ptr);
2359#endif
2360}
2361
2362static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
2363{
2364    TCGReg datalo, datahi, addrlo, rbase;
2365    TCGReg addrhi __attribute__((unused));
2366    MemOpIdx oi;
2367    MemOp opc, s_bits;
2368#ifdef CONFIG_SOFTMMU
2369    int mem_index;
2370    tcg_insn_unit *label_ptr;
2371#else
2372    unsigned a_bits;
2373#endif
2374
2375    datalo = *args++;
2376    datahi = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
2377    addrlo = *args++;
2378    addrhi = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0);
2379    oi = *args++;
2380    opc = get_memop(oi);
2381    s_bits = opc & MO_SIZE;
2382
2383#ifdef CONFIG_SOFTMMU
2384    mem_index = get_mmuidx(oi);
2385    addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, mem_index, false);
2386
2387    /* Load a pointer into the current opcode w/conditional branch-link. */
2388    label_ptr = s->code_ptr;
2389    tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
2390
2391    rbase = TCG_REG_R3;
2392#else  /* !CONFIG_SOFTMMU */
2393    a_bits = get_alignment_bits(opc);
2394    if (a_bits) {
2395        tcg_out_test_alignment(s, false, addrlo, addrhi, a_bits);
2396    }
2397    rbase = guest_base ? TCG_GUEST_BASE_REG : 0;
2398    if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
2399        tcg_out_ext32u(s, TCG_REG_TMP1, addrlo);
2400        addrlo = TCG_REG_TMP1;
2401    }
2402#endif
2403
2404    if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) {
2405        if (opc & MO_BSWAP) {
2406            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
2407            tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo));
2408            tcg_out32(s, STWBRX | SAB(datahi, rbase, TCG_REG_R0));
2409        } else if (rbase != 0) {
2410            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
2411            tcg_out32(s, STWX | SAB(datahi, rbase, addrlo));
2412            tcg_out32(s, STWX | SAB(datalo, rbase, TCG_REG_R0));
2413        } else {
2414            tcg_out32(s, STW | TAI(datahi, addrlo, 0));
2415            tcg_out32(s, STW | TAI(datalo, addrlo, 4));
2416        }
2417    } else {
2418        uint32_t insn = qemu_stx_opc[opc & (MO_BSWAP | MO_SIZE)];
2419        if (!have_isa_2_06 && insn == STDBRX) {
2420            tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo));
2421            tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, addrlo, 4));
2422            tcg_out_shri64(s, TCG_REG_R0, datalo, 32);
2423            tcg_out32(s, STWBRX | SAB(TCG_REG_R0, rbase, TCG_REG_TMP1));
2424        } else {
2425            tcg_out32(s, insn | SAB(datalo, rbase, addrlo));
2426        }
2427    }
2428
2429#ifdef CONFIG_SOFTMMU
2430    add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
2431                        s->code_ptr, label_ptr);
2432#endif
2433}
2434
2435static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2436{
2437    int i;
2438    for (i = 0; i < count; ++i) {
2439        p[i] = NOP;
2440    }
2441}
2442
2443/* Parameters for function call generation, used in tcg.c.  */
2444#define TCG_TARGET_STACK_ALIGN       16
2445
2446#ifdef _CALL_AIX
2447# define LINK_AREA_SIZE                (6 * SZR)
2448# define LR_OFFSET                     (1 * SZR)
2449# define TCG_TARGET_CALL_STACK_OFFSET  (LINK_AREA_SIZE + 8 * SZR)
2450#elif defined(_CALL_DARWIN)
2451# define LINK_AREA_SIZE                (6 * SZR)
2452# define LR_OFFSET                     (2 * SZR)
2453#elif TCG_TARGET_REG_BITS == 64
2454# if defined(_CALL_ELF) && _CALL_ELF == 2
2455#  define LINK_AREA_SIZE               (4 * SZR)
2456#  define LR_OFFSET                    (1 * SZR)
2457# endif
2458#else /* TCG_TARGET_REG_BITS == 32 */
2459# if defined(_CALL_SYSV)
2460#  define LINK_AREA_SIZE               (2 * SZR)
2461#  define LR_OFFSET                    (1 * SZR)
2462# endif
2463#endif
2464#ifndef LR_OFFSET
2465# error "Unhandled abi"
2466#endif
2467#ifndef TCG_TARGET_CALL_STACK_OFFSET
2468# define TCG_TARGET_CALL_STACK_OFFSET  LINK_AREA_SIZE
2469#endif
2470
2471#define CPU_TEMP_BUF_SIZE  (CPU_TEMP_BUF_NLONGS * (int)sizeof(long))
2472#define REG_SAVE_SIZE      ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * SZR)
2473
2474#define FRAME_SIZE ((TCG_TARGET_CALL_STACK_OFFSET   \
2475                     + TCG_STATIC_CALL_ARGS_SIZE    \
2476                     + CPU_TEMP_BUF_SIZE            \
2477                     + REG_SAVE_SIZE                \
2478                     + TCG_TARGET_STACK_ALIGN - 1)  \
2479                    & -TCG_TARGET_STACK_ALIGN)
2480
2481#define REG_SAVE_BOT (FRAME_SIZE - REG_SAVE_SIZE)
2482
2483static void tcg_target_qemu_prologue(TCGContext *s)
2484{
2485    int i;
2486
2487#ifdef _CALL_AIX
2488    const void **desc = (const void **)s->code_ptr;
2489    desc[0] = tcg_splitwx_to_rx(desc + 2);  /* entry point */
2490    desc[1] = 0;                            /* environment pointer */
2491    s->code_ptr = (void *)(desc + 2);       /* skip over descriptor */
2492#endif
2493
2494    tcg_set_frame(s, TCG_REG_CALL_STACK, REG_SAVE_BOT - CPU_TEMP_BUF_SIZE,
2495                  CPU_TEMP_BUF_SIZE);
2496
2497    /* Prologue */
2498    tcg_out32(s, MFSPR | RT(TCG_REG_R0) | LR);
2499    tcg_out32(s, (SZR == 8 ? STDU : STWU)
2500              | SAI(TCG_REG_R1, TCG_REG_R1, -FRAME_SIZE));
2501
2502    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
2503        tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
2504                   TCG_REG_R1, REG_SAVE_BOT + i * SZR);
2505    }
2506    tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
2507
2508#ifndef CONFIG_SOFTMMU
2509    if (guest_base) {
2510        tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true);
2511        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
2512    }
2513#endif
2514
2515    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2516    tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR);
2517    if (USE_REG_TB) {
2518        tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, tcg_target_call_iarg_regs[1]);
2519    }
2520    tcg_out32(s, BCCTR | BO_ALWAYS);
2521
2522    /* Epilogue */
2523    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
2524
2525    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
2526    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
2527        tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
2528                   TCG_REG_R1, REG_SAVE_BOT + i * SZR);
2529    }
2530    tcg_out32(s, MTSPR | RS(TCG_REG_R0) | LR);
2531    tcg_out32(s, ADDI | TAI(TCG_REG_R1, TCG_REG_R1, FRAME_SIZE));
2532    tcg_out32(s, BCLR | BO_ALWAYS);
2533}
2534
2535static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg)
2536{
2537    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, arg);
2538    tcg_out_b(s, 0, tcg_code_gen_epilogue);
2539}
2540
2541static void tcg_out_goto_tb(TCGContext *s, int which)
2542{
2543    uintptr_t ptr = get_jmp_target_addr(s, which);
2544
2545    if (USE_REG_TB) {
2546        ptrdiff_t offset = tcg_tbrel_diff(s, (void *)ptr);
2547        tcg_out_mem_long(s, LD, LDX, TCG_REG_TB, TCG_REG_TB, offset);
2548
2549        /* Direct branch will be patched by tb_target_set_jmp_target. */
2550        set_jmp_insn_offset(s, which);
2551        tcg_out32(s, MTSPR | RS(TCG_REG_TB) | CTR);
2552
2553        /* When branch is out of range, fall through to indirect. */
2554        tcg_out32(s, BCCTR | BO_ALWAYS);
2555
2556        /* For the unlinked case, need to reset TCG_REG_TB.  */
2557        set_jmp_reset_offset(s, which);
2558        tcg_out_mem_long(s, ADDI, ADD, TCG_REG_TB, TCG_REG_TB,
2559                         -tcg_current_code_size(s));
2560    } else {
2561        /* Direct branch will be patched by tb_target_set_jmp_target. */
2562        set_jmp_insn_offset(s, which);
2563        tcg_out32(s, NOP);
2564
2565        /* When branch is out of range, fall through to indirect. */
2566        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, ptr - (int16_t)ptr);
2567        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, (int16_t)ptr);
2568        tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR);
2569        tcg_out32(s, BCCTR | BO_ALWAYS);
2570        set_jmp_reset_offset(s, which);
2571    }
2572}
2573
2574void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
2575                              uintptr_t jmp_rx, uintptr_t jmp_rw)
2576{
2577    uintptr_t addr = tb->jmp_target_addr[n];
2578    intptr_t diff = addr - jmp_rx;
2579    tcg_insn_unit insn;
2580
2581    if (in_range_b(diff)) {
2582        insn = B | (diff & 0x3fffffc);
2583    } else if (USE_REG_TB) {
2584        insn = MTSPR | RS(TCG_REG_TB) | CTR;
2585    } else {
2586        insn = NOP;
2587    }
2588
2589    qatomic_set((uint32_t *)jmp_rw, insn);
2590    flush_idcache_range(jmp_rx, jmp_rw, 4);
2591}
2592
2593static void tcg_out_op(TCGContext *s, TCGOpcode opc,
2594                       const TCGArg args[TCG_MAX_OP_ARGS],
2595                       const int const_args[TCG_MAX_OP_ARGS])
2596{
2597    TCGArg a0, a1, a2;
2598
2599    switch (opc) {
2600    case INDEX_op_goto_ptr:
2601        tcg_out32(s, MTSPR | RS(args[0]) | CTR);
2602        if (USE_REG_TB) {
2603            tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, args[0]);
2604        }
2605        tcg_out32(s, ADDI | TAI(TCG_REG_R3, 0, 0));
2606        tcg_out32(s, BCCTR | BO_ALWAYS);
2607        break;
2608    case INDEX_op_br:
2609        {
2610            TCGLabel *l = arg_label(args[0]);
2611            uint32_t insn = B;
2612
2613            if (l->has_value) {
2614                insn |= reloc_pc24_val(tcg_splitwx_to_rx(s->code_ptr),
2615                                       l->u.value_ptr);
2616            } else {
2617                tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, l, 0);
2618            }
2619            tcg_out32(s, insn);
2620        }
2621        break;
2622    case INDEX_op_ld8u_i32:
2623    case INDEX_op_ld8u_i64:
2624        tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
2625        break;
2626    case INDEX_op_ld8s_i32:
2627    case INDEX_op_ld8s_i64:
2628        tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
2629        tcg_out_ext8s(s, args[0], args[0]);
2630        break;
2631    case INDEX_op_ld16u_i32:
2632    case INDEX_op_ld16u_i64:
2633        tcg_out_mem_long(s, LHZ, LHZX, args[0], args[1], args[2]);
2634        break;
2635    case INDEX_op_ld16s_i32:
2636    case INDEX_op_ld16s_i64:
2637        tcg_out_mem_long(s, LHA, LHAX, args[0], args[1], args[2]);
2638        break;
2639    case INDEX_op_ld_i32:
2640    case INDEX_op_ld32u_i64:
2641        tcg_out_mem_long(s, LWZ, LWZX, args[0], args[1], args[2]);
2642        break;
2643    case INDEX_op_ld32s_i64:
2644        tcg_out_mem_long(s, LWA, LWAX, args[0], args[1], args[2]);
2645        break;
2646    case INDEX_op_ld_i64:
2647        tcg_out_mem_long(s, LD, LDX, args[0], args[1], args[2]);
2648        break;
2649    case INDEX_op_st8_i32:
2650    case INDEX_op_st8_i64:
2651        tcg_out_mem_long(s, STB, STBX, args[0], args[1], args[2]);
2652        break;
2653    case INDEX_op_st16_i32:
2654    case INDEX_op_st16_i64:
2655        tcg_out_mem_long(s, STH, STHX, args[0], args[1], args[2]);
2656        break;
2657    case INDEX_op_st_i32:
2658    case INDEX_op_st32_i64:
2659        tcg_out_mem_long(s, STW, STWX, args[0], args[1], args[2]);
2660        break;
2661    case INDEX_op_st_i64:
2662        tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]);
2663        break;
2664
2665    case INDEX_op_add_i32:
2666        a0 = args[0], a1 = args[1], a2 = args[2];
2667        if (const_args[2]) {
2668        do_addi_32:
2669            tcg_out_mem_long(s, ADDI, ADD, a0, a1, (int32_t)a2);
2670        } else {
2671            tcg_out32(s, ADD | TAB(a0, a1, a2));
2672        }
2673        break;
2674    case INDEX_op_sub_i32:
2675        a0 = args[0], a1 = args[1], a2 = args[2];
2676        if (const_args[1]) {
2677            if (const_args[2]) {
2678                tcg_out_movi(s, TCG_TYPE_I32, a0, a1 - a2);
2679            } else {
2680                tcg_out32(s, SUBFIC | TAI(a0, a2, a1));
2681            }
2682        } else if (const_args[2]) {
2683            a2 = -a2;
2684            goto do_addi_32;
2685        } else {
2686            tcg_out32(s, SUBF | TAB(a0, a2, a1));
2687        }
2688        break;
2689
2690    case INDEX_op_and_i32:
2691        a0 = args[0], a1 = args[1], a2 = args[2];
2692        if (const_args[2]) {
2693            tcg_out_andi32(s, a0, a1, a2);
2694        } else {
2695            tcg_out32(s, AND | SAB(a1, a0, a2));
2696        }
2697        break;
2698    case INDEX_op_and_i64:
2699        a0 = args[0], a1 = args[1], a2 = args[2];
2700        if (const_args[2]) {
2701            tcg_out_andi64(s, a0, a1, a2);
2702        } else {
2703            tcg_out32(s, AND | SAB(a1, a0, a2));
2704        }
2705        break;
2706    case INDEX_op_or_i64:
2707    case INDEX_op_or_i32:
2708        a0 = args[0], a1 = args[1], a2 = args[2];
2709        if (const_args[2]) {
2710            tcg_out_ori32(s, a0, a1, a2);
2711        } else {
2712            tcg_out32(s, OR | SAB(a1, a0, a2));
2713        }
2714        break;
2715    case INDEX_op_xor_i64:
2716    case INDEX_op_xor_i32:
2717        a0 = args[0], a1 = args[1], a2 = args[2];
2718        if (const_args[2]) {
2719            tcg_out_xori32(s, a0, a1, a2);
2720        } else {
2721            tcg_out32(s, XOR | SAB(a1, a0, a2));
2722        }
2723        break;
2724    case INDEX_op_andc_i32:
2725        a0 = args[0], a1 = args[1], a2 = args[2];
2726        if (const_args[2]) {
2727            tcg_out_andi32(s, a0, a1, ~a2);
2728        } else {
2729            tcg_out32(s, ANDC | SAB(a1, a0, a2));
2730        }
2731        break;
2732    case INDEX_op_andc_i64:
2733        a0 = args[0], a1 = args[1], a2 = args[2];
2734        if (const_args[2]) {
2735            tcg_out_andi64(s, a0, a1, ~a2);
2736        } else {
2737            tcg_out32(s, ANDC | SAB(a1, a0, a2));
2738        }
2739        break;
2740    case INDEX_op_orc_i32:
2741        if (const_args[2]) {
2742            tcg_out_ori32(s, args[0], args[1], ~args[2]);
2743            break;
2744        }
2745        /* FALLTHRU */
2746    case INDEX_op_orc_i64:
2747        tcg_out32(s, ORC | SAB(args[1], args[0], args[2]));
2748        break;
2749    case INDEX_op_eqv_i32:
2750        if (const_args[2]) {
2751            tcg_out_xori32(s, args[0], args[1], ~args[2]);
2752            break;
2753        }
2754        /* FALLTHRU */
2755    case INDEX_op_eqv_i64:
2756        tcg_out32(s, EQV | SAB(args[1], args[0], args[2]));
2757        break;
2758    case INDEX_op_nand_i32:
2759    case INDEX_op_nand_i64:
2760        tcg_out32(s, NAND | SAB(args[1], args[0], args[2]));
2761        break;
2762    case INDEX_op_nor_i32:
2763    case INDEX_op_nor_i64:
2764        tcg_out32(s, NOR | SAB(args[1], args[0], args[2]));
2765        break;
2766
2767    case INDEX_op_clz_i32:
2768        tcg_out_cntxz(s, TCG_TYPE_I32, CNTLZW, args[0], args[1],
2769                      args[2], const_args[2]);
2770        break;
2771    case INDEX_op_ctz_i32:
2772        tcg_out_cntxz(s, TCG_TYPE_I32, CNTTZW, args[0], args[1],
2773                      args[2], const_args[2]);
2774        break;
2775    case INDEX_op_ctpop_i32:
2776        tcg_out32(s, CNTPOPW | SAB(args[1], args[0], 0));
2777        break;
2778
2779    case INDEX_op_clz_i64:
2780        tcg_out_cntxz(s, TCG_TYPE_I64, CNTLZD, args[0], args[1],
2781                      args[2], const_args[2]);
2782        break;
2783    case INDEX_op_ctz_i64:
2784        tcg_out_cntxz(s, TCG_TYPE_I64, CNTTZD, args[0], args[1],
2785                      args[2], const_args[2]);
2786        break;
2787    case INDEX_op_ctpop_i64:
2788        tcg_out32(s, CNTPOPD | SAB(args[1], args[0], 0));
2789        break;
2790
2791    case INDEX_op_mul_i32:
2792        a0 = args[0], a1 = args[1], a2 = args[2];
2793        if (const_args[2]) {
2794            tcg_out32(s, MULLI | TAI(a0, a1, a2));
2795        } else {
2796            tcg_out32(s, MULLW | TAB(a0, a1, a2));
2797        }
2798        break;
2799
2800    case INDEX_op_div_i32:
2801        tcg_out32(s, DIVW | TAB(args[0], args[1], args[2]));
2802        break;
2803
2804    case INDEX_op_divu_i32:
2805        tcg_out32(s, DIVWU | TAB(args[0], args[1], args[2]));
2806        break;
2807
2808    case INDEX_op_rem_i32:
2809        tcg_out32(s, MODSW | TAB(args[0], args[1], args[2]));
2810        break;
2811
2812    case INDEX_op_remu_i32:
2813        tcg_out32(s, MODUW | TAB(args[0], args[1], args[2]));
2814        break;
2815
2816    case INDEX_op_shl_i32:
2817        if (const_args[2]) {
2818            /* Limit immediate shift count lest we create an illegal insn.  */
2819            tcg_out_shli32(s, args[0], args[1], args[2] & 31);
2820        } else {
2821            tcg_out32(s, SLW | SAB(args[1], args[0], args[2]));
2822        }
2823        break;
2824    case INDEX_op_shr_i32:
2825        if (const_args[2]) {
2826            /* Limit immediate shift count lest we create an illegal insn.  */
2827            tcg_out_shri32(s, args[0], args[1], args[2] & 31);
2828        } else {
2829            tcg_out32(s, SRW | SAB(args[1], args[0], args[2]));
2830        }
2831        break;
2832    case INDEX_op_sar_i32:
2833        if (const_args[2]) {
2834            tcg_out_sari32(s, args[0], args[1], args[2]);
2835        } else {
2836            tcg_out32(s, SRAW | SAB(args[1], args[0], args[2]));
2837        }
2838        break;
2839    case INDEX_op_rotl_i32:
2840        if (const_args[2]) {
2841            tcg_out_rlw(s, RLWINM, args[0], args[1], args[2], 0, 31);
2842        } else {
2843            tcg_out32(s, RLWNM | SAB(args[1], args[0], args[2])
2844                         | MB(0) | ME(31));
2845        }
2846        break;
2847    case INDEX_op_rotr_i32:
2848        if (const_args[2]) {
2849            tcg_out_rlw(s, RLWINM, args[0], args[1], 32 - args[2], 0, 31);
2850        } else {
2851            tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 32));
2852            tcg_out32(s, RLWNM | SAB(args[1], args[0], TCG_REG_R0)
2853                         | MB(0) | ME(31));
2854        }
2855        break;
2856
2857    case INDEX_op_brcond_i32:
2858        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
2859                       arg_label(args[3]), TCG_TYPE_I32);
2860        break;
2861    case INDEX_op_brcond_i64:
2862        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
2863                       arg_label(args[3]), TCG_TYPE_I64);
2864        break;
2865    case INDEX_op_brcond2_i32:
2866        tcg_out_brcond2(s, args, const_args);
2867        break;
2868
2869    case INDEX_op_neg_i32:
2870    case INDEX_op_neg_i64:
2871        tcg_out32(s, NEG | RT(args[0]) | RA(args[1]));
2872        break;
2873
2874    case INDEX_op_not_i32:
2875    case INDEX_op_not_i64:
2876        tcg_out32(s, NOR | SAB(args[1], args[0], args[1]));
2877        break;
2878
2879    case INDEX_op_add_i64:
2880        a0 = args[0], a1 = args[1], a2 = args[2];
2881        if (const_args[2]) {
2882        do_addi_64:
2883            tcg_out_mem_long(s, ADDI, ADD, a0, a1, a2);
2884        } else {
2885            tcg_out32(s, ADD | TAB(a0, a1, a2));
2886        }
2887        break;
2888    case INDEX_op_sub_i64:
2889        a0 = args[0], a1 = args[1], a2 = args[2];
2890        if (const_args[1]) {
2891            if (const_args[2]) {
2892                tcg_out_movi(s, TCG_TYPE_I64, a0, a1 - a2);
2893            } else {
2894                tcg_out32(s, SUBFIC | TAI(a0, a2, a1));
2895            }
2896        } else if (const_args[2]) {
2897            a2 = -a2;
2898            goto do_addi_64;
2899        } else {
2900            tcg_out32(s, SUBF | TAB(a0, a2, a1));
2901        }
2902        break;
2903
2904    case INDEX_op_shl_i64:
2905        if (const_args[2]) {
2906            /* Limit immediate shift count lest we create an illegal insn.  */
2907            tcg_out_shli64(s, args[0], args[1], args[2] & 63);
2908        } else {
2909            tcg_out32(s, SLD | SAB(args[1], args[0], args[2]));
2910        }
2911        break;
2912    case INDEX_op_shr_i64:
2913        if (const_args[2]) {
2914            /* Limit immediate shift count lest we create an illegal insn.  */
2915            tcg_out_shri64(s, args[0], args[1], args[2] & 63);
2916        } else {
2917            tcg_out32(s, SRD | SAB(args[1], args[0], args[2]));
2918        }
2919        break;
2920    case INDEX_op_sar_i64:
2921        if (const_args[2]) {
2922            tcg_out_sari64(s, args[0], args[1], args[2]);
2923        } else {
2924            tcg_out32(s, SRAD | SAB(args[1], args[0], args[2]));
2925        }
2926        break;
2927    case INDEX_op_rotl_i64:
2928        if (const_args[2]) {
2929            tcg_out_rld(s, RLDICL, args[0], args[1], args[2], 0);
2930        } else {
2931            tcg_out32(s, RLDCL | SAB(args[1], args[0], args[2]) | MB64(0));
2932        }
2933        break;
2934    case INDEX_op_rotr_i64:
2935        if (const_args[2]) {
2936            tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 0);
2937        } else {
2938            tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 64));
2939            tcg_out32(s, RLDCL | SAB(args[1], args[0], TCG_REG_R0) | MB64(0));
2940        }
2941        break;
2942
2943    case INDEX_op_mul_i64:
2944        a0 = args[0], a1 = args[1], a2 = args[2];
2945        if (const_args[2]) {
2946            tcg_out32(s, MULLI | TAI(a0, a1, a2));
2947        } else {
2948            tcg_out32(s, MULLD | TAB(a0, a1, a2));
2949        }
2950        break;
2951    case INDEX_op_div_i64:
2952        tcg_out32(s, DIVD | TAB(args[0], args[1], args[2]));
2953        break;
2954    case INDEX_op_divu_i64:
2955        tcg_out32(s, DIVDU | TAB(args[0], args[1], args[2]));
2956        break;
2957    case INDEX_op_rem_i64:
2958        tcg_out32(s, MODSD | TAB(args[0], args[1], args[2]));
2959        break;
2960    case INDEX_op_remu_i64:
2961        tcg_out32(s, MODUD | TAB(args[0], args[1], args[2]));
2962        break;
2963
2964    case INDEX_op_qemu_ld_i32:
2965        tcg_out_qemu_ld(s, args, false);
2966        break;
2967    case INDEX_op_qemu_ld_i64:
2968        tcg_out_qemu_ld(s, args, true);
2969        break;
2970    case INDEX_op_qemu_st_i32:
2971        tcg_out_qemu_st(s, args, false);
2972        break;
2973    case INDEX_op_qemu_st_i64:
2974        tcg_out_qemu_st(s, args, true);
2975        break;
2976
2977    case INDEX_op_ext8s_i32:
2978    case INDEX_op_ext8s_i64:
2979        tcg_out_ext8s(s, args[0], args[1]);
2980        break;
2981    case INDEX_op_ext16s_i32:
2982    case INDEX_op_ext16s_i64:
2983        tcg_out_ext16s(s, args[0], args[1]);
2984        break;
2985    case INDEX_op_ext_i32_i64:
2986    case INDEX_op_ext32s_i64:
2987        tcg_out_ext32s(s, args[0], args[1]);
2988        break;
2989    case INDEX_op_extu_i32_i64:
2990        tcg_out_ext32u(s, args[0], args[1]);
2991        break;
2992
2993    case INDEX_op_setcond_i32:
2994        tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2],
2995                        const_args[2]);
2996        break;
2997    case INDEX_op_setcond_i64:
2998        tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2],
2999                        const_args[2]);
3000        break;
3001    case INDEX_op_setcond2_i32:
3002        tcg_out_setcond2(s, args, const_args);
3003        break;
3004
3005    case INDEX_op_bswap16_i32:
3006    case INDEX_op_bswap16_i64:
3007        tcg_out_bswap16(s, args[0], args[1], args[2]);
3008        break;
3009    case INDEX_op_bswap32_i32:
3010        tcg_out_bswap32(s, args[0], args[1], 0);
3011        break;
3012    case INDEX_op_bswap32_i64:
3013        tcg_out_bswap32(s, args[0], args[1], args[2]);
3014        break;
3015    case INDEX_op_bswap64_i64:
3016        tcg_out_bswap64(s, args[0], args[1]);
3017        break;
3018
3019    case INDEX_op_deposit_i32:
3020        if (const_args[2]) {
3021            uint32_t mask = ((2u << (args[4] - 1)) - 1) << args[3];
3022            tcg_out_andi32(s, args[0], args[0], ~mask);
3023        } else {
3024            tcg_out_rlw(s, RLWIMI, args[0], args[2], args[3],
3025                        32 - args[3] - args[4], 31 - args[3]);
3026        }
3027        break;
3028    case INDEX_op_deposit_i64:
3029        if (const_args[2]) {
3030            uint64_t mask = ((2ull << (args[4] - 1)) - 1) << args[3];
3031            tcg_out_andi64(s, args[0], args[0], ~mask);
3032        } else {
3033            tcg_out_rld(s, RLDIMI, args[0], args[2], args[3],
3034                        64 - args[3] - args[4]);
3035        }
3036        break;
3037
3038    case INDEX_op_extract_i32:
3039        tcg_out_rlw(s, RLWINM, args[0], args[1],
3040                    32 - args[2], 32 - args[3], 31);
3041        break;
3042    case INDEX_op_extract_i64:
3043        tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 64 - args[3]);
3044        break;
3045
3046    case INDEX_op_movcond_i32:
3047        tcg_out_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], args[2],
3048                        args[3], args[4], const_args[2]);
3049        break;
3050    case INDEX_op_movcond_i64:
3051        tcg_out_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1], args[2],
3052                        args[3], args[4], const_args[2]);
3053        break;
3054
3055#if TCG_TARGET_REG_BITS == 64
3056    case INDEX_op_add2_i64:
3057#else
3058    case INDEX_op_add2_i32:
3059#endif
3060        /* Note that the CA bit is defined based on the word size of the
3061           environment.  So in 64-bit mode it's always carry-out of bit 63.
3062           The fallback code using deposit works just as well for 32-bit.  */
3063        a0 = args[0], a1 = args[1];
3064        if (a0 == args[3] || (!const_args[5] && a0 == args[5])) {
3065            a0 = TCG_REG_R0;
3066        }
3067        if (const_args[4]) {
3068            tcg_out32(s, ADDIC | TAI(a0, args[2], args[4]));
3069        } else {
3070            tcg_out32(s, ADDC | TAB(a0, args[2], args[4]));
3071        }
3072        if (const_args[5]) {
3073            tcg_out32(s, (args[5] ? ADDME : ADDZE) | RT(a1) | RA(args[3]));
3074        } else {
3075            tcg_out32(s, ADDE | TAB(a1, args[3], args[5]));
3076        }
3077        if (a0 != args[0]) {
3078            tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
3079        }
3080        break;
3081
3082#if TCG_TARGET_REG_BITS == 64
3083    case INDEX_op_sub2_i64:
3084#else
3085    case INDEX_op_sub2_i32:
3086#endif
3087        a0 = args[0], a1 = args[1];
3088        if (a0 == args[5] || (!const_args[3] && a0 == args[3])) {
3089            a0 = TCG_REG_R0;
3090        }
3091        if (const_args[2]) {
3092            tcg_out32(s, SUBFIC | TAI(a0, args[4], args[2]));
3093        } else {
3094            tcg_out32(s, SUBFC | TAB(a0, args[4], args[2]));
3095        }
3096        if (const_args[3]) {
3097            tcg_out32(s, (args[3] ? SUBFME : SUBFZE) | RT(a1) | RA(args[5]));
3098        } else {
3099            tcg_out32(s, SUBFE | TAB(a1, args[5], args[3]));
3100        }
3101        if (a0 != args[0]) {
3102            tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
3103        }
3104        break;
3105
3106    case INDEX_op_muluh_i32:
3107        tcg_out32(s, MULHWU | TAB(args[0], args[1], args[2]));
3108        break;
3109    case INDEX_op_mulsh_i32:
3110        tcg_out32(s, MULHW | TAB(args[0], args[1], args[2]));
3111        break;
3112    case INDEX_op_muluh_i64:
3113        tcg_out32(s, MULHDU | TAB(args[0], args[1], args[2]));
3114        break;
3115    case INDEX_op_mulsh_i64:
3116        tcg_out32(s, MULHD | TAB(args[0], args[1], args[2]));
3117        break;
3118
3119    case INDEX_op_mb:
3120        tcg_out_mb(s, args[0]);
3121        break;
3122
3123    case INDEX_op_mov_i32:   /* Always emitted via tcg_out_mov.  */
3124    case INDEX_op_mov_i64:
3125    case INDEX_op_call:      /* Always emitted via tcg_out_call.  */
3126    case INDEX_op_exit_tb:   /* Always emitted via tcg_out_exit_tb.  */
3127    case INDEX_op_goto_tb:   /* Always emitted via tcg_out_goto_tb.  */
3128    default:
3129        tcg_abort();
3130    }
3131}
3132
3133int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
3134{
3135    switch (opc) {
3136    case INDEX_op_and_vec:
3137    case INDEX_op_or_vec:
3138    case INDEX_op_xor_vec:
3139    case INDEX_op_andc_vec:
3140    case INDEX_op_not_vec:
3141    case INDEX_op_nor_vec:
3142    case INDEX_op_eqv_vec:
3143    case INDEX_op_nand_vec:
3144        return 1;
3145    case INDEX_op_orc_vec:
3146        return have_isa_2_07;
3147    case INDEX_op_add_vec:
3148    case INDEX_op_sub_vec:
3149    case INDEX_op_smax_vec:
3150    case INDEX_op_smin_vec:
3151    case INDEX_op_umax_vec:
3152    case INDEX_op_umin_vec:
3153    case INDEX_op_shlv_vec:
3154    case INDEX_op_shrv_vec:
3155    case INDEX_op_sarv_vec:
3156    case INDEX_op_rotlv_vec:
3157        return vece <= MO_32 || have_isa_2_07;
3158    case INDEX_op_ssadd_vec:
3159    case INDEX_op_sssub_vec:
3160    case INDEX_op_usadd_vec:
3161    case INDEX_op_ussub_vec:
3162        return vece <= MO_32;
3163    case INDEX_op_cmp_vec:
3164    case INDEX_op_shli_vec:
3165    case INDEX_op_shri_vec:
3166    case INDEX_op_sari_vec:
3167    case INDEX_op_rotli_vec:
3168        return vece <= MO_32 || have_isa_2_07 ? -1 : 0;
3169    case INDEX_op_neg_vec:
3170        return vece >= MO_32 && have_isa_3_00;
3171    case INDEX_op_mul_vec:
3172        switch (vece) {
3173        case MO_8:
3174        case MO_16:
3175            return -1;
3176        case MO_32:
3177            return have_isa_2_07 ? 1 : -1;
3178        case MO_64:
3179            return have_isa_3_10;
3180        }
3181        return 0;
3182    case INDEX_op_bitsel_vec:
3183        return have_vsx;
3184    case INDEX_op_rotrv_vec:
3185        return -1;
3186    default:
3187        return 0;
3188    }
3189}
3190
3191static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
3192                            TCGReg dst, TCGReg src)
3193{
3194    tcg_debug_assert(dst >= TCG_REG_V0);
3195
3196    /* Splat from integer reg allowed via constraints for v3.00.  */
3197    if (src < TCG_REG_V0) {
3198        tcg_debug_assert(have_isa_3_00);
3199        switch (vece) {
3200        case MO_64:
3201            tcg_out32(s, MTVSRDD | VRT(dst) | RA(src) | RB(src));
3202            return true;
3203        case MO_32:
3204            tcg_out32(s, MTVSRWS | VRT(dst) | RA(src));
3205            return true;
3206        default:
3207            /* Fail, so that we fall back on either dupm or mov+dup.  */
3208            return false;
3209        }
3210    }
3211
3212    /*
3213     * Recall we use (or emulate) VSX integer loads, so the integer is
3214     * right justified within the left (zero-index) double-word.
3215     */
3216    switch (vece) {
3217    case MO_8:
3218        tcg_out32(s, VSPLTB | VRT(dst) | VRB(src) | (7 << 16));
3219        break;
3220    case MO_16:
3221        tcg_out32(s, VSPLTH | VRT(dst) | VRB(src) | (3 << 16));
3222        break;
3223    case MO_32:
3224        tcg_out32(s, VSPLTW | VRT(dst) | VRB(src) | (1 << 16));
3225        break;
3226    case MO_64:
3227        if (have_vsx) {
3228            tcg_out32(s, XXPERMDI | VRT(dst) | VRA(src) | VRB(src));
3229            break;
3230        }
3231        tcg_out_vsldoi(s, TCG_VEC_TMP1, src, src, 8);
3232        tcg_out_vsldoi(s, dst, TCG_VEC_TMP1, src, 8);
3233        break;
3234    default:
3235        g_assert_not_reached();
3236    }
3237    return true;
3238}
3239
3240static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
3241                             TCGReg out, TCGReg base, intptr_t offset)
3242{
3243    int elt;
3244
3245    tcg_debug_assert(out >= TCG_REG_V0);
3246    switch (vece) {
3247    case MO_8:
3248        if (have_isa_3_00) {
3249            tcg_out_mem_long(s, LXV, LVX, out, base, offset & -16);
3250        } else {
3251            tcg_out_mem_long(s, 0, LVEBX, out, base, offset);
3252        }
3253        elt = extract32(offset, 0, 4);
3254#if !HOST_BIG_ENDIAN
3255        elt ^= 15;
3256#endif
3257        tcg_out32(s, VSPLTB | VRT(out) | VRB(out) | (elt << 16));
3258        break;
3259    case MO_16:
3260        tcg_debug_assert((offset & 1) == 0);
3261        if (have_isa_3_00) {
3262            tcg_out_mem_long(s, LXV | 8, LVX, out, base, offset & -16);
3263        } else {
3264            tcg_out_mem_long(s, 0, LVEHX, out, base, offset);
3265        }
3266        elt = extract32(offset, 1, 3);
3267#if !HOST_BIG_ENDIAN
3268        elt ^= 7;
3269#endif
3270        tcg_out32(s, VSPLTH | VRT(out) | VRB(out) | (elt << 16));
3271        break;
3272    case MO_32:
3273        if (have_isa_3_00) {
3274            tcg_out_mem_long(s, 0, LXVWSX, out, base, offset);
3275            break;
3276        }
3277        tcg_debug_assert((offset & 3) == 0);
3278        tcg_out_mem_long(s, 0, LVEWX, out, base, offset);
3279        elt = extract32(offset, 2, 2);
3280#if !HOST_BIG_ENDIAN
3281        elt ^= 3;
3282#endif
3283        tcg_out32(s, VSPLTW | VRT(out) | VRB(out) | (elt << 16));
3284        break;
3285    case MO_64:
3286        if (have_vsx) {
3287            tcg_out_mem_long(s, 0, LXVDSX, out, base, offset);
3288            break;
3289        }
3290        tcg_debug_assert((offset & 7) == 0);
3291        tcg_out_mem_long(s, 0, LVX, out, base, offset & -16);
3292        tcg_out_vsldoi(s, TCG_VEC_TMP1, out, out, 8);
3293        elt = extract32(offset, 3, 1);
3294#if !HOST_BIG_ENDIAN
3295        elt = !elt;
3296#endif
3297        if (elt) {
3298            tcg_out_vsldoi(s, out, out, TCG_VEC_TMP1, 8);
3299        } else {
3300            tcg_out_vsldoi(s, out, TCG_VEC_TMP1, out, 8);
3301        }
3302        break;
3303    default:
3304        g_assert_not_reached();
3305    }
3306    return true;
3307}
3308
3309static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
3310                           unsigned vecl, unsigned vece,
3311                           const TCGArg args[TCG_MAX_OP_ARGS],
3312                           const int const_args[TCG_MAX_OP_ARGS])
3313{
3314    static const uint32_t
3315        add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM },
3316        sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM },
3317        mul_op[4] = { 0, 0, VMULUWM, VMULLD },
3318        neg_op[4] = { 0, 0, VNEGW, VNEGD },
3319        eq_op[4]  = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD },
3320        ne_op[4]  = { VCMPNEB, VCMPNEH, VCMPNEW, 0 },
3321        gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD },
3322        gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD },
3323        ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 },
3324        usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 },
3325        sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 },
3326        ussub_op[4] = { VSUBUBS, VSUBUHS, VSUBUWS, 0 },
3327        umin_op[4] = { VMINUB, VMINUH, VMINUW, VMINUD },
3328        smin_op[4] = { VMINSB, VMINSH, VMINSW, VMINSD },
3329        umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, VMAXUD },
3330        smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, VMAXSD },
3331        shlv_op[4] = { VSLB, VSLH, VSLW, VSLD },
3332        shrv_op[4] = { VSRB, VSRH, VSRW, VSRD },
3333        sarv_op[4] = { VSRAB, VSRAH, VSRAW, VSRAD },
3334        mrgh_op[4] = { VMRGHB, VMRGHH, VMRGHW, 0 },
3335        mrgl_op[4] = { VMRGLB, VMRGLH, VMRGLW, 0 },
3336        muleu_op[4] = { VMULEUB, VMULEUH, VMULEUW, 0 },
3337        mulou_op[4] = { VMULOUB, VMULOUH, VMULOUW, 0 },
3338        pkum_op[4] = { VPKUHUM, VPKUWUM, 0, 0 },
3339        rotl_op[4] = { VRLB, VRLH, VRLW, VRLD };
3340
3341    TCGType type = vecl + TCG_TYPE_V64;
3342    TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
3343    uint32_t insn;
3344
3345    switch (opc) {
3346    case INDEX_op_ld_vec:
3347        tcg_out_ld(s, type, a0, a1, a2);
3348        return;
3349    case INDEX_op_st_vec:
3350        tcg_out_st(s, type, a0, a1, a2);
3351        return;
3352    case INDEX_op_dupm_vec:
3353        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
3354        return;
3355
3356    case INDEX_op_add_vec:
3357        insn = add_op[vece];
3358        break;
3359    case INDEX_op_sub_vec:
3360        insn = sub_op[vece];
3361        break;
3362    case INDEX_op_neg_vec:
3363        insn = neg_op[vece];
3364        a2 = a1;
3365        a1 = 0;
3366        break;
3367    case INDEX_op_mul_vec:
3368        insn = mul_op[vece];
3369        break;
3370    case INDEX_op_ssadd_vec:
3371        insn = ssadd_op[vece];
3372        break;
3373    case INDEX_op_sssub_vec:
3374        insn = sssub_op[vece];
3375        break;
3376    case INDEX_op_usadd_vec:
3377        insn = usadd_op[vece];
3378        break;
3379    case INDEX_op_ussub_vec:
3380        insn = ussub_op[vece];
3381        break;
3382    case INDEX_op_smin_vec:
3383        insn = smin_op[vece];
3384        break;
3385    case INDEX_op_umin_vec:
3386        insn = umin_op[vece];
3387        break;
3388    case INDEX_op_smax_vec:
3389        insn = smax_op[vece];
3390        break;
3391    case INDEX_op_umax_vec:
3392        insn = umax_op[vece];
3393        break;
3394    case INDEX_op_shlv_vec:
3395        insn = shlv_op[vece];
3396        break;
3397    case INDEX_op_shrv_vec:
3398        insn = shrv_op[vece];
3399        break;
3400    case INDEX_op_sarv_vec:
3401        insn = sarv_op[vece];
3402        break;
3403    case INDEX_op_and_vec:
3404        insn = VAND;
3405        break;
3406    case INDEX_op_or_vec:
3407        insn = VOR;
3408        break;
3409    case INDEX_op_xor_vec:
3410        insn = VXOR;
3411        break;
3412    case INDEX_op_andc_vec:
3413        insn = VANDC;
3414        break;
3415    case INDEX_op_not_vec:
3416        insn = VNOR;
3417        a2 = a1;
3418        break;
3419    case INDEX_op_orc_vec:
3420        insn = VORC;
3421        break;
3422    case INDEX_op_nand_vec:
3423        insn = VNAND;
3424        break;
3425    case INDEX_op_nor_vec:
3426        insn = VNOR;
3427        break;
3428    case INDEX_op_eqv_vec:
3429        insn = VEQV;
3430        break;
3431
3432    case INDEX_op_cmp_vec:
3433        switch (args[3]) {
3434        case TCG_COND_EQ:
3435            insn = eq_op[vece];
3436            break;
3437        case TCG_COND_NE:
3438            insn = ne_op[vece];
3439            break;
3440        case TCG_COND_GT:
3441            insn = gts_op[vece];
3442            break;
3443        case TCG_COND_GTU:
3444            insn = gtu_op[vece];
3445            break;
3446        default:
3447            g_assert_not_reached();
3448        }
3449        break;
3450
3451    case INDEX_op_bitsel_vec:
3452        tcg_out32(s, XXSEL | VRT(a0) | VRC(a1) | VRB(a2) | VRA(args[3]));
3453        return;
3454
3455    case INDEX_op_dup2_vec:
3456        assert(TCG_TARGET_REG_BITS == 32);
3457        /* With inputs a1 = xLxx, a2 = xHxx  */
3458        tcg_out32(s, VMRGHW | VRT(a0) | VRA(a2) | VRB(a1));  /* a0  = xxHL */
3459        tcg_out_vsldoi(s, TCG_VEC_TMP1, a0, a0, 8);          /* tmp = HLxx */
3460        tcg_out_vsldoi(s, a0, a0, TCG_VEC_TMP1, 8);          /* a0  = HLHL */
3461        return;
3462
3463    case INDEX_op_ppc_mrgh_vec:
3464        insn = mrgh_op[vece];
3465        break;
3466    case INDEX_op_ppc_mrgl_vec:
3467        insn = mrgl_op[vece];
3468        break;
3469    case INDEX_op_ppc_muleu_vec:
3470        insn = muleu_op[vece];
3471        break;
3472    case INDEX_op_ppc_mulou_vec:
3473        insn = mulou_op[vece];
3474        break;
3475    case INDEX_op_ppc_pkum_vec:
3476        insn = pkum_op[vece];
3477        break;
3478    case INDEX_op_rotlv_vec:
3479        insn = rotl_op[vece];
3480        break;
3481    case INDEX_op_ppc_msum_vec:
3482        tcg_debug_assert(vece == MO_16);
3483        tcg_out32(s, VMSUMUHM | VRT(a0) | VRA(a1) | VRB(a2) | VRC(args[3]));
3484        return;
3485
3486    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
3487    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
3488    default:
3489        g_assert_not_reached();
3490    }
3491
3492    tcg_debug_assert(insn != 0);
3493    tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2));
3494}
3495
3496static void expand_vec_shi(TCGType type, unsigned vece, TCGv_vec v0,
3497                           TCGv_vec v1, TCGArg imm, TCGOpcode opci)
3498{
3499    TCGv_vec t1;
3500
3501    if (vece == MO_32) {
3502        /*
3503         * Only 5 bits are significant, and VSPLTISB can represent -16..15.
3504         * So using negative numbers gets us the 4th bit easily.
3505         */
3506        imm = sextract32(imm, 0, 5);
3507    } else {
3508        imm &= (8 << vece) - 1;
3509    }
3510
3511    /* Splat w/bytes for xxspltib when 2.07 allows MO_64. */
3512    t1 = tcg_constant_vec(type, MO_8, imm);
3513    vec_gen_3(opci, type, vece, tcgv_vec_arg(v0),
3514              tcgv_vec_arg(v1), tcgv_vec_arg(t1));
3515}
3516
3517static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
3518                           TCGv_vec v1, TCGv_vec v2, TCGCond cond)
3519{
3520    bool need_swap = false, need_inv = false;
3521
3522    tcg_debug_assert(vece <= MO_32 || have_isa_2_07);
3523
3524    switch (cond) {
3525    case TCG_COND_EQ:
3526    case TCG_COND_GT:
3527    case TCG_COND_GTU:
3528        break;
3529    case TCG_COND_NE:
3530        if (have_isa_3_00 && vece <= MO_32) {
3531            break;
3532        }
3533        /* fall through */
3534    case TCG_COND_LE:
3535    case TCG_COND_LEU:
3536        need_inv = true;
3537        break;
3538    case TCG_COND_LT:
3539    case TCG_COND_LTU:
3540        need_swap = true;
3541        break;
3542    case TCG_COND_GE:
3543    case TCG_COND_GEU:
3544        need_swap = need_inv = true;
3545        break;
3546    default:
3547        g_assert_not_reached();
3548    }
3549
3550    if (need_inv) {
3551        cond = tcg_invert_cond(cond);
3552    }
3553    if (need_swap) {
3554        TCGv_vec t1;
3555        t1 = v1, v1 = v2, v2 = t1;
3556        cond = tcg_swap_cond(cond);
3557    }
3558
3559    vec_gen_4(INDEX_op_cmp_vec, type, vece, tcgv_vec_arg(v0),
3560              tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond);
3561
3562    if (need_inv) {
3563        tcg_gen_not_vec(vece, v0, v0);
3564    }
3565}
3566
3567static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0,
3568                           TCGv_vec v1, TCGv_vec v2)
3569{
3570    TCGv_vec t1 = tcg_temp_new_vec(type);
3571    TCGv_vec t2 = tcg_temp_new_vec(type);
3572    TCGv_vec c0, c16;
3573
3574    switch (vece) {
3575    case MO_8:
3576    case MO_16:
3577        vec_gen_3(INDEX_op_ppc_muleu_vec, type, vece, tcgv_vec_arg(t1),
3578                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
3579        vec_gen_3(INDEX_op_ppc_mulou_vec, type, vece, tcgv_vec_arg(t2),
3580                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
3581        vec_gen_3(INDEX_op_ppc_mrgh_vec, type, vece + 1, tcgv_vec_arg(v0),
3582                  tcgv_vec_arg(t1), tcgv_vec_arg(t2));
3583        vec_gen_3(INDEX_op_ppc_mrgl_vec, type, vece + 1, tcgv_vec_arg(t1),
3584                  tcgv_vec_arg(t1), tcgv_vec_arg(t2));
3585        vec_gen_3(INDEX_op_ppc_pkum_vec, type, vece, tcgv_vec_arg(v0),
3586                  tcgv_vec_arg(v0), tcgv_vec_arg(t1));
3587	break;
3588
3589    case MO_32:
3590        tcg_debug_assert(!have_isa_2_07);
3591        /*
3592         * Only 5 bits are significant, and VSPLTISB can represent -16..15.
3593         * So using -16 is a quick way to represent 16.
3594         */
3595        c16 = tcg_constant_vec(type, MO_8, -16);
3596        c0 = tcg_constant_vec(type, MO_8, 0);
3597
3598        vec_gen_3(INDEX_op_rotlv_vec, type, MO_32, tcgv_vec_arg(t1),
3599                  tcgv_vec_arg(v2), tcgv_vec_arg(c16));
3600        vec_gen_3(INDEX_op_ppc_mulou_vec, type, MO_16, tcgv_vec_arg(t2),
3601                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
3602        vec_gen_4(INDEX_op_ppc_msum_vec, type, MO_16, tcgv_vec_arg(t1),
3603                  tcgv_vec_arg(v1), tcgv_vec_arg(t1), tcgv_vec_arg(c0));
3604        vec_gen_3(INDEX_op_shlv_vec, type, MO_32, tcgv_vec_arg(t1),
3605                  tcgv_vec_arg(t1), tcgv_vec_arg(c16));
3606        tcg_gen_add_vec(MO_32, v0, t1, t2);
3607        break;
3608
3609    default:
3610        g_assert_not_reached();
3611    }
3612    tcg_temp_free_vec(t1);
3613    tcg_temp_free_vec(t2);
3614}
3615
3616void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
3617                       TCGArg a0, ...)
3618{
3619    va_list va;
3620    TCGv_vec v0, v1, v2, t0;
3621    TCGArg a2;
3622
3623    va_start(va, a0);
3624    v0 = temp_tcgv_vec(arg_temp(a0));
3625    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3626    a2 = va_arg(va, TCGArg);
3627
3628    switch (opc) {
3629    case INDEX_op_shli_vec:
3630        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shlv_vec);
3631        break;
3632    case INDEX_op_shri_vec:
3633        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shrv_vec);
3634        break;
3635    case INDEX_op_sari_vec:
3636        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_sarv_vec);
3637        break;
3638    case INDEX_op_rotli_vec:
3639        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_rotlv_vec);
3640        break;
3641    case INDEX_op_cmp_vec:
3642        v2 = temp_tcgv_vec(arg_temp(a2));
3643        expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
3644        break;
3645    case INDEX_op_mul_vec:
3646        v2 = temp_tcgv_vec(arg_temp(a2));
3647        expand_vec_mul(type, vece, v0, v1, v2);
3648        break;
3649    case INDEX_op_rotlv_vec:
3650        v2 = temp_tcgv_vec(arg_temp(a2));
3651        t0 = tcg_temp_new_vec(type);
3652        tcg_gen_neg_vec(vece, t0, v2);
3653        tcg_gen_rotlv_vec(vece, v0, v1, t0);
3654        tcg_temp_free_vec(t0);
3655        break;
3656    default:
3657        g_assert_not_reached();
3658    }
3659    va_end(va);
3660}
3661
3662static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
3663{
3664    switch (op) {
3665    case INDEX_op_goto_ptr:
3666        return C_O0_I1(r);
3667
3668    case INDEX_op_ld8u_i32:
3669    case INDEX_op_ld8s_i32:
3670    case INDEX_op_ld16u_i32:
3671    case INDEX_op_ld16s_i32:
3672    case INDEX_op_ld_i32:
3673    case INDEX_op_ctpop_i32:
3674    case INDEX_op_neg_i32:
3675    case INDEX_op_not_i32:
3676    case INDEX_op_ext8s_i32:
3677    case INDEX_op_ext16s_i32:
3678    case INDEX_op_bswap16_i32:
3679    case INDEX_op_bswap32_i32:
3680    case INDEX_op_extract_i32:
3681    case INDEX_op_ld8u_i64:
3682    case INDEX_op_ld8s_i64:
3683    case INDEX_op_ld16u_i64:
3684    case INDEX_op_ld16s_i64:
3685    case INDEX_op_ld32u_i64:
3686    case INDEX_op_ld32s_i64:
3687    case INDEX_op_ld_i64:
3688    case INDEX_op_ctpop_i64:
3689    case INDEX_op_neg_i64:
3690    case INDEX_op_not_i64:
3691    case INDEX_op_ext8s_i64:
3692    case INDEX_op_ext16s_i64:
3693    case INDEX_op_ext32s_i64:
3694    case INDEX_op_ext_i32_i64:
3695    case INDEX_op_extu_i32_i64:
3696    case INDEX_op_bswap16_i64:
3697    case INDEX_op_bswap32_i64:
3698    case INDEX_op_bswap64_i64:
3699    case INDEX_op_extract_i64:
3700        return C_O1_I1(r, r);
3701
3702    case INDEX_op_st8_i32:
3703    case INDEX_op_st16_i32:
3704    case INDEX_op_st_i32:
3705    case INDEX_op_st8_i64:
3706    case INDEX_op_st16_i64:
3707    case INDEX_op_st32_i64:
3708    case INDEX_op_st_i64:
3709        return C_O0_I2(r, r);
3710
3711    case INDEX_op_add_i32:
3712    case INDEX_op_and_i32:
3713    case INDEX_op_or_i32:
3714    case INDEX_op_xor_i32:
3715    case INDEX_op_andc_i32:
3716    case INDEX_op_orc_i32:
3717    case INDEX_op_eqv_i32:
3718    case INDEX_op_shl_i32:
3719    case INDEX_op_shr_i32:
3720    case INDEX_op_sar_i32:
3721    case INDEX_op_rotl_i32:
3722    case INDEX_op_rotr_i32:
3723    case INDEX_op_setcond_i32:
3724    case INDEX_op_and_i64:
3725    case INDEX_op_andc_i64:
3726    case INDEX_op_shl_i64:
3727    case INDEX_op_shr_i64:
3728    case INDEX_op_sar_i64:
3729    case INDEX_op_rotl_i64:
3730    case INDEX_op_rotr_i64:
3731    case INDEX_op_setcond_i64:
3732        return C_O1_I2(r, r, ri);
3733
3734    case INDEX_op_mul_i32:
3735    case INDEX_op_mul_i64:
3736        return C_O1_I2(r, r, rI);
3737
3738    case INDEX_op_div_i32:
3739    case INDEX_op_divu_i32:
3740    case INDEX_op_rem_i32:
3741    case INDEX_op_remu_i32:
3742    case INDEX_op_nand_i32:
3743    case INDEX_op_nor_i32:
3744    case INDEX_op_muluh_i32:
3745    case INDEX_op_mulsh_i32:
3746    case INDEX_op_orc_i64:
3747    case INDEX_op_eqv_i64:
3748    case INDEX_op_nand_i64:
3749    case INDEX_op_nor_i64:
3750    case INDEX_op_div_i64:
3751    case INDEX_op_divu_i64:
3752    case INDEX_op_rem_i64:
3753    case INDEX_op_remu_i64:
3754    case INDEX_op_mulsh_i64:
3755    case INDEX_op_muluh_i64:
3756        return C_O1_I2(r, r, r);
3757
3758    case INDEX_op_sub_i32:
3759        return C_O1_I2(r, rI, ri);
3760    case INDEX_op_add_i64:
3761        return C_O1_I2(r, r, rT);
3762    case INDEX_op_or_i64:
3763    case INDEX_op_xor_i64:
3764        return C_O1_I2(r, r, rU);
3765    case INDEX_op_sub_i64:
3766        return C_O1_I2(r, rI, rT);
3767    case INDEX_op_clz_i32:
3768    case INDEX_op_ctz_i32:
3769    case INDEX_op_clz_i64:
3770    case INDEX_op_ctz_i64:
3771        return C_O1_I2(r, r, rZW);
3772
3773    case INDEX_op_brcond_i32:
3774    case INDEX_op_brcond_i64:
3775        return C_O0_I2(r, ri);
3776
3777    case INDEX_op_movcond_i32:
3778    case INDEX_op_movcond_i64:
3779        return C_O1_I4(r, r, ri, rZ, rZ);
3780    case INDEX_op_deposit_i32:
3781    case INDEX_op_deposit_i64:
3782        return C_O1_I2(r, 0, rZ);
3783    case INDEX_op_brcond2_i32:
3784        return C_O0_I4(r, r, ri, ri);
3785    case INDEX_op_setcond2_i32:
3786        return C_O1_I4(r, r, r, ri, ri);
3787    case INDEX_op_add2_i64:
3788    case INDEX_op_add2_i32:
3789        return C_O2_I4(r, r, r, r, rI, rZM);
3790    case INDEX_op_sub2_i64:
3791    case INDEX_op_sub2_i32:
3792        return C_O2_I4(r, r, rI, rZM, r, r);
3793
3794    case INDEX_op_qemu_ld_i32:
3795        return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
3796                ? C_O1_I1(r, L)
3797                : C_O1_I2(r, L, L));
3798
3799    case INDEX_op_qemu_st_i32:
3800        return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
3801                ? C_O0_I2(S, S)
3802                : C_O0_I3(S, S, S));
3803
3804    case INDEX_op_qemu_ld_i64:
3805        return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L)
3806                : TARGET_LONG_BITS == 32 ? C_O2_I1(L, L, L)
3807                : C_O2_I2(L, L, L, L));
3808
3809    case INDEX_op_qemu_st_i64:
3810        return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(S, S)
3811                : TARGET_LONG_BITS == 32 ? C_O0_I3(S, S, S)
3812                : C_O0_I4(S, S, S, S));
3813
3814    case INDEX_op_add_vec:
3815    case INDEX_op_sub_vec:
3816    case INDEX_op_mul_vec:
3817    case INDEX_op_and_vec:
3818    case INDEX_op_or_vec:
3819    case INDEX_op_xor_vec:
3820    case INDEX_op_andc_vec:
3821    case INDEX_op_orc_vec:
3822    case INDEX_op_nor_vec:
3823    case INDEX_op_eqv_vec:
3824    case INDEX_op_nand_vec:
3825    case INDEX_op_cmp_vec:
3826    case INDEX_op_ssadd_vec:
3827    case INDEX_op_sssub_vec:
3828    case INDEX_op_usadd_vec:
3829    case INDEX_op_ussub_vec:
3830    case INDEX_op_smax_vec:
3831    case INDEX_op_smin_vec:
3832    case INDEX_op_umax_vec:
3833    case INDEX_op_umin_vec:
3834    case INDEX_op_shlv_vec:
3835    case INDEX_op_shrv_vec:
3836    case INDEX_op_sarv_vec:
3837    case INDEX_op_rotlv_vec:
3838    case INDEX_op_rotrv_vec:
3839    case INDEX_op_ppc_mrgh_vec:
3840    case INDEX_op_ppc_mrgl_vec:
3841    case INDEX_op_ppc_muleu_vec:
3842    case INDEX_op_ppc_mulou_vec:
3843    case INDEX_op_ppc_pkum_vec:
3844    case INDEX_op_dup2_vec:
3845        return C_O1_I2(v, v, v);
3846
3847    case INDEX_op_not_vec:
3848    case INDEX_op_neg_vec:
3849        return C_O1_I1(v, v);
3850
3851    case INDEX_op_dup_vec:
3852        return have_isa_3_00 ? C_O1_I1(v, vr) : C_O1_I1(v, v);
3853
3854    case INDEX_op_ld_vec:
3855    case INDEX_op_dupm_vec:
3856        return C_O1_I1(v, r);
3857
3858    case INDEX_op_st_vec:
3859        return C_O0_I2(v, r);
3860
3861    case INDEX_op_bitsel_vec:
3862    case INDEX_op_ppc_msum_vec:
3863        return C_O1_I3(v, v, v, v);
3864
3865    default:
3866        g_assert_not_reached();
3867    }
3868}
3869
3870static void tcg_target_init(TCGContext *s)
3871{
3872    unsigned long hwcap = qemu_getauxval(AT_HWCAP);
3873    unsigned long hwcap2 = qemu_getauxval(AT_HWCAP2);
3874
3875    have_isa = tcg_isa_base;
3876    if (hwcap & PPC_FEATURE_ARCH_2_06) {
3877        have_isa = tcg_isa_2_06;
3878    }
3879#ifdef PPC_FEATURE2_ARCH_2_07
3880    if (hwcap2 & PPC_FEATURE2_ARCH_2_07) {
3881        have_isa = tcg_isa_2_07;
3882    }
3883#endif
3884#ifdef PPC_FEATURE2_ARCH_3_00
3885    if (hwcap2 & PPC_FEATURE2_ARCH_3_00) {
3886        have_isa = tcg_isa_3_00;
3887    }
3888#endif
3889#ifdef PPC_FEATURE2_ARCH_3_10
3890    if (hwcap2 & PPC_FEATURE2_ARCH_3_10) {
3891        have_isa = tcg_isa_3_10;
3892    }
3893#endif
3894
3895#ifdef PPC_FEATURE2_HAS_ISEL
3896    /* Prefer explicit instruction from the kernel. */
3897    have_isel = (hwcap2 & PPC_FEATURE2_HAS_ISEL) != 0;
3898#else
3899    /* Fall back to knowing Power7 (2.06) has ISEL. */
3900    have_isel = have_isa_2_06;
3901#endif
3902
3903    if (hwcap & PPC_FEATURE_HAS_ALTIVEC) {
3904        have_altivec = true;
3905        /* We only care about the portion of VSX that overlaps Altivec. */
3906        if (hwcap & PPC_FEATURE_HAS_VSX) {
3907            have_vsx = true;
3908        }
3909    }
3910
3911    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff;
3912    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff;
3913    if (have_altivec) {
3914        tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
3915        tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
3916    }
3917
3918    tcg_target_call_clobber_regs = 0;
3919    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
3920    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2);
3921    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3);
3922    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4);
3923    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5);
3924    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6);
3925    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R7);
3926    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
3927    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
3928    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
3929    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
3930    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R12);
3931
3932    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0);
3933    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1);
3934    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2);
3935    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3);
3936    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4);
3937    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5);
3938    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6);
3939    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7);
3940    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
3941    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
3942    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
3943    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
3944    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
3945    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
3946    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
3947    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
3948    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16);
3949    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17);
3950    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18);
3951    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19);
3952
3953    s->reserved_regs = 0;
3954    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */
3955    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */
3956#if defined(_CALL_SYSV)
3957    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* toc pointer */
3958#endif
3959#if defined(_CALL_SYSV) || TCG_TARGET_REG_BITS == 64
3960    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */
3961#endif
3962    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); /* mem temp */
3963    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP1);
3964    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP2);
3965    if (USE_REG_TB) {
3966        tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB);  /* tb->tc_ptr */
3967    }
3968}
3969
3970#ifdef __ELF__
3971typedef struct {
3972    DebugFrameCIE cie;
3973    DebugFrameFDEHeader fde;
3974    uint8_t fde_def_cfa[4];
3975    uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2 + 3];
3976} DebugFrame;
3977
3978/* We're expecting a 2 byte uleb128 encoded value.  */
3979QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
3980
3981#if TCG_TARGET_REG_BITS == 64
3982# define ELF_HOST_MACHINE EM_PPC64
3983#else
3984# define ELF_HOST_MACHINE EM_PPC
3985#endif
3986
3987static DebugFrame debug_frame = {
3988    .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3989    .cie.id = -1,
3990    .cie.version = 1,
3991    .cie.code_align = 1,
3992    .cie.data_align = (-SZR & 0x7f),         /* sleb128 -SZR */
3993    .cie.return_column = 65,
3994
3995    /* Total FDE size does not include the "len" member.  */
3996    .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
3997
3998    .fde_def_cfa = {
3999        12, TCG_REG_R1,                 /* DW_CFA_def_cfa r1, ... */
4000        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
4001        (FRAME_SIZE >> 7)
4002    },
4003    .fde_reg_ofs = {
4004        /* DW_CFA_offset_extended_sf, lr, LR_OFFSET */
4005        0x11, 65, (LR_OFFSET / -SZR) & 0x7f,
4006    }
4007};
4008
4009void tcg_register_jit(const void *buf, size_t buf_size)
4010{
4011    uint8_t *p = &debug_frame.fde_reg_ofs[3];
4012    int i;
4013
4014    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i, p += 2) {
4015        p[0] = 0x80 + tcg_target_callee_save_regs[i];
4016        p[1] = (FRAME_SIZE - (REG_SAVE_BOT + i * SZR)) / SZR;
4017    }
4018
4019    debug_frame.fde.func_start = (uintptr_t)buf;
4020    debug_frame.fde.func_len = buf_size;
4021
4022    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
4023}
4024#endif /* __ELF__ */
4025#undef VMULEUB
4026#undef VMULEUH
4027#undef VMULEUW
4028#undef VMULOUB
4029#undef VMULOUH
4030#undef VMULOUW
4031#undef VMSUMUHM
4032