xref: /qemu/tcg/ppc/tcg-target.c.inc (revision 29b62a10)
1/*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25#include "elf.h"
26#include "../tcg-pool.c.inc"
27#include "../tcg-ldst.c.inc"
28
29/*
30 * Standardize on the _CALL_FOO symbols used by GCC:
31 * Apple XCode does not define _CALL_DARWIN.
32 * Clang defines _CALL_ELF (64-bit) but not _CALL_SYSV (32-bit).
33 */
34#if !defined(_CALL_SYSV) && \
35    !defined(_CALL_DARWIN) && \
36    !defined(_CALL_AIX) && \
37    !defined(_CALL_ELF)
38# if defined(__APPLE__)
39#  define _CALL_DARWIN
40# elif defined(__ELF__) && TCG_TARGET_REG_BITS == 32
41#  define _CALL_SYSV
42# else
43#  error "Unknown ABI"
44# endif
45#endif
46
47#if TCG_TARGET_REG_BITS == 64
48# define TCG_TARGET_CALL_ARG_I32   TCG_CALL_ARG_EXTEND
49#else
50# define TCG_TARGET_CALL_ARG_I32   TCG_CALL_ARG_NORMAL
51#endif
52#ifdef _CALL_SYSV
53# define TCG_TARGET_CALL_ARG_I64   TCG_CALL_ARG_EVEN
54#else
55# define TCG_TARGET_CALL_ARG_I64   TCG_CALL_ARG_NORMAL
56#endif
57
58/* For some memory operations, we need a scratch that isn't R0.  For the AIX
59   calling convention, we can re-use the TOC register since we'll be reloading
60   it at every call.  Otherwise R12 will do nicely as neither a call-saved
61   register nor a parameter register.  */
62#ifdef _CALL_AIX
63# define TCG_REG_TMP1   TCG_REG_R2
64#else
65# define TCG_REG_TMP1   TCG_REG_R12
66#endif
67
68#define TCG_VEC_TMP1    TCG_REG_V0
69#define TCG_VEC_TMP2    TCG_REG_V1
70
71#define TCG_REG_TB     TCG_REG_R31
72#define USE_REG_TB     (TCG_TARGET_REG_BITS == 64)
73
74/* Shorthand for size of a pointer.  Avoid promotion to unsigned.  */
75#define SZP  ((int)sizeof(void *))
76
77/* Shorthand for size of a register.  */
78#define SZR  (TCG_TARGET_REG_BITS / 8)
79
80#define TCG_CT_CONST_S16  0x100
81#define TCG_CT_CONST_U16  0x200
82#define TCG_CT_CONST_S32  0x400
83#define TCG_CT_CONST_U32  0x800
84#define TCG_CT_CONST_ZERO 0x1000
85#define TCG_CT_CONST_MONE 0x2000
86#define TCG_CT_CONST_WSZ  0x4000
87
88#define ALL_GENERAL_REGS  0xffffffffu
89#define ALL_VECTOR_REGS   0xffffffff00000000ull
90
91#ifdef CONFIG_SOFTMMU
92#define ALL_QLOAD_REGS \
93    (ALL_GENERAL_REGS & \
94     ~((1 << TCG_REG_R3) | (1 << TCG_REG_R4) | (1 << TCG_REG_R5)))
95#define ALL_QSTORE_REGS \
96    (ALL_GENERAL_REGS & ~((1 << TCG_REG_R3) | (1 << TCG_REG_R4) | \
97                          (1 << TCG_REG_R5) | (1 << TCG_REG_R6)))
98#else
99#define ALL_QLOAD_REGS  (ALL_GENERAL_REGS & ~(1 << TCG_REG_R3))
100#define ALL_QSTORE_REGS ALL_QLOAD_REGS
101#endif
102
103TCGPowerISA have_isa;
104static bool have_isel;
105bool have_altivec;
106bool have_vsx;
107
108#ifndef CONFIG_SOFTMMU
109#define TCG_GUEST_BASE_REG 30
110#endif
111
112#ifdef CONFIG_DEBUG_TCG
113static const char tcg_target_reg_names[TCG_TARGET_NB_REGS][4] = {
114    "r0",  "r1",  "r2",  "r3",  "r4",  "r5",  "r6",  "r7",
115    "r8",  "r9",  "r10", "r11", "r12", "r13", "r14", "r15",
116    "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
117    "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
118    "v0",  "v1",  "v2",  "v3",  "v4",  "v5",  "v6",  "v7",
119    "v8",  "v9",  "v10", "v11", "v12", "v13", "v14", "v15",
120    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
121    "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
122};
123#endif
124
125static const int tcg_target_reg_alloc_order[] = {
126    TCG_REG_R14,  /* call saved registers */
127    TCG_REG_R15,
128    TCG_REG_R16,
129    TCG_REG_R17,
130    TCG_REG_R18,
131    TCG_REG_R19,
132    TCG_REG_R20,
133    TCG_REG_R21,
134    TCG_REG_R22,
135    TCG_REG_R23,
136    TCG_REG_R24,
137    TCG_REG_R25,
138    TCG_REG_R26,
139    TCG_REG_R27,
140    TCG_REG_R28,
141    TCG_REG_R29,
142    TCG_REG_R30,
143    TCG_REG_R31,
144    TCG_REG_R12,  /* call clobbered, non-arguments */
145    TCG_REG_R11,
146    TCG_REG_R2,
147    TCG_REG_R13,
148    TCG_REG_R10,  /* call clobbered, arguments */
149    TCG_REG_R9,
150    TCG_REG_R8,
151    TCG_REG_R7,
152    TCG_REG_R6,
153    TCG_REG_R5,
154    TCG_REG_R4,
155    TCG_REG_R3,
156
157    /* V0 and V1 reserved as temporaries; V20 - V31 are call-saved */
158    TCG_REG_V2,   /* call clobbered, vectors */
159    TCG_REG_V3,
160    TCG_REG_V4,
161    TCG_REG_V5,
162    TCG_REG_V6,
163    TCG_REG_V7,
164    TCG_REG_V8,
165    TCG_REG_V9,
166    TCG_REG_V10,
167    TCG_REG_V11,
168    TCG_REG_V12,
169    TCG_REG_V13,
170    TCG_REG_V14,
171    TCG_REG_V15,
172    TCG_REG_V16,
173    TCG_REG_V17,
174    TCG_REG_V18,
175    TCG_REG_V19,
176};
177
178static const int tcg_target_call_iarg_regs[] = {
179    TCG_REG_R3,
180    TCG_REG_R4,
181    TCG_REG_R5,
182    TCG_REG_R6,
183    TCG_REG_R7,
184    TCG_REG_R8,
185    TCG_REG_R9,
186    TCG_REG_R10
187};
188
189static const int tcg_target_call_oarg_regs[] = {
190    TCG_REG_R3,
191    TCG_REG_R4
192};
193
194static const int tcg_target_callee_save_regs[] = {
195#ifdef _CALL_DARWIN
196    TCG_REG_R11,
197#endif
198    TCG_REG_R14,
199    TCG_REG_R15,
200    TCG_REG_R16,
201    TCG_REG_R17,
202    TCG_REG_R18,
203    TCG_REG_R19,
204    TCG_REG_R20,
205    TCG_REG_R21,
206    TCG_REG_R22,
207    TCG_REG_R23,
208    TCG_REG_R24,
209    TCG_REG_R25,
210    TCG_REG_R26,
211    TCG_REG_R27, /* currently used for the global env */
212    TCG_REG_R28,
213    TCG_REG_R29,
214    TCG_REG_R30,
215    TCG_REG_R31
216};
217
218static inline bool in_range_b(tcg_target_long target)
219{
220    return target == sextract64(target, 0, 26);
221}
222
223static uint32_t reloc_pc24_val(const tcg_insn_unit *pc,
224			       const tcg_insn_unit *target)
225{
226    ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
227    tcg_debug_assert(in_range_b(disp));
228    return disp & 0x3fffffc;
229}
230
231static bool reloc_pc24(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
232{
233    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
234    ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
235
236    if (in_range_b(disp)) {
237        *src_rw = (*src_rw & ~0x3fffffc) | (disp & 0x3fffffc);
238        return true;
239    }
240    return false;
241}
242
243static uint16_t reloc_pc14_val(const tcg_insn_unit *pc,
244			       const tcg_insn_unit *target)
245{
246    ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
247    tcg_debug_assert(disp == (int16_t) disp);
248    return disp & 0xfffc;
249}
250
251static bool reloc_pc14(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
252{
253    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
254    ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
255
256    if (disp == (int16_t) disp) {
257        *src_rw = (*src_rw & ~0xfffc) | (disp & 0xfffc);
258        return true;
259    }
260    return false;
261}
262
263/* test if a constant matches the constraint */
264static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
265{
266    if (ct & TCG_CT_CONST) {
267        return 1;
268    }
269
270    /* The only 32-bit constraint we use aside from
271       TCG_CT_CONST is TCG_CT_CONST_S16.  */
272    if (type == TCG_TYPE_I32) {
273        val = (int32_t)val;
274    }
275
276    if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) {
277        return 1;
278    } else if ((ct & TCG_CT_CONST_U16) && val == (uint16_t)val) {
279        return 1;
280    } else if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
281        return 1;
282    } else if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
283        return 1;
284    } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
285        return 1;
286    } else if ((ct & TCG_CT_CONST_MONE) && val == -1) {
287        return 1;
288    } else if ((ct & TCG_CT_CONST_WSZ)
289               && val == (type == TCG_TYPE_I32 ? 32 : 64)) {
290        return 1;
291    }
292    return 0;
293}
294
295#define OPCD(opc) ((opc)<<26)
296#define XO19(opc) (OPCD(19)|((opc)<<1))
297#define MD30(opc) (OPCD(30)|((opc)<<2))
298#define MDS30(opc) (OPCD(30)|((opc)<<1))
299#define XO31(opc) (OPCD(31)|((opc)<<1))
300#define XO58(opc) (OPCD(58)|(opc))
301#define XO62(opc) (OPCD(62)|(opc))
302#define VX4(opc)  (OPCD(4)|(opc))
303
304#define B      OPCD( 18)
305#define BC     OPCD( 16)
306#define LBZ    OPCD( 34)
307#define LHZ    OPCD( 40)
308#define LHA    OPCD( 42)
309#define LWZ    OPCD( 32)
310#define LWZUX  XO31( 55)
311#define STB    OPCD( 38)
312#define STH    OPCD( 44)
313#define STW    OPCD( 36)
314
315#define STD    XO62(  0)
316#define STDU   XO62(  1)
317#define STDX   XO31(149)
318
319#define LD     XO58(  0)
320#define LDX    XO31( 21)
321#define LDU    XO58(  1)
322#define LDUX   XO31( 53)
323#define LWA    XO58(  2)
324#define LWAX   XO31(341)
325
326#define ADDIC  OPCD( 12)
327#define ADDI   OPCD( 14)
328#define ADDIS  OPCD( 15)
329#define ORI    OPCD( 24)
330#define ORIS   OPCD( 25)
331#define XORI   OPCD( 26)
332#define XORIS  OPCD( 27)
333#define ANDI   OPCD( 28)
334#define ANDIS  OPCD( 29)
335#define MULLI  OPCD(  7)
336#define CMPLI  OPCD( 10)
337#define CMPI   OPCD( 11)
338#define SUBFIC OPCD( 8)
339
340#define LWZU   OPCD( 33)
341#define STWU   OPCD( 37)
342
343#define RLWIMI OPCD( 20)
344#define RLWINM OPCD( 21)
345#define RLWNM  OPCD( 23)
346
347#define RLDICL MD30(  0)
348#define RLDICR MD30(  1)
349#define RLDIMI MD30(  3)
350#define RLDCL  MDS30( 8)
351
352#define BCLR   XO19( 16)
353#define BCCTR  XO19(528)
354#define CRAND  XO19(257)
355#define CRANDC XO19(129)
356#define CRNAND XO19(225)
357#define CROR   XO19(449)
358#define CRNOR  XO19( 33)
359
360#define EXTSB  XO31(954)
361#define EXTSH  XO31(922)
362#define EXTSW  XO31(986)
363#define ADD    XO31(266)
364#define ADDE   XO31(138)
365#define ADDME  XO31(234)
366#define ADDZE  XO31(202)
367#define ADDC   XO31( 10)
368#define AND    XO31( 28)
369#define SUBF   XO31( 40)
370#define SUBFC  XO31(  8)
371#define SUBFE  XO31(136)
372#define SUBFME XO31(232)
373#define SUBFZE XO31(200)
374#define OR     XO31(444)
375#define XOR    XO31(316)
376#define MULLW  XO31(235)
377#define MULHW  XO31( 75)
378#define MULHWU XO31( 11)
379#define DIVW   XO31(491)
380#define DIVWU  XO31(459)
381#define MODSW  XO31(779)
382#define MODUW  XO31(267)
383#define CMP    XO31(  0)
384#define CMPL   XO31( 32)
385#define LHBRX  XO31(790)
386#define LWBRX  XO31(534)
387#define LDBRX  XO31(532)
388#define STHBRX XO31(918)
389#define STWBRX XO31(662)
390#define STDBRX XO31(660)
391#define MFSPR  XO31(339)
392#define MTSPR  XO31(467)
393#define SRAWI  XO31(824)
394#define NEG    XO31(104)
395#define MFCR   XO31( 19)
396#define MFOCRF (MFCR | (1u << 20))
397#define NOR    XO31(124)
398#define CNTLZW XO31( 26)
399#define CNTLZD XO31( 58)
400#define CNTTZW XO31(538)
401#define CNTTZD XO31(570)
402#define CNTPOPW XO31(378)
403#define CNTPOPD XO31(506)
404#define ANDC   XO31( 60)
405#define ORC    XO31(412)
406#define EQV    XO31(284)
407#define NAND   XO31(476)
408#define ISEL   XO31( 15)
409
410#define MULLD  XO31(233)
411#define MULHD  XO31( 73)
412#define MULHDU XO31(  9)
413#define DIVD   XO31(489)
414#define DIVDU  XO31(457)
415#define MODSD  XO31(777)
416#define MODUD  XO31(265)
417
418#define LBZX   XO31( 87)
419#define LHZX   XO31(279)
420#define LHAX   XO31(343)
421#define LWZX   XO31( 23)
422#define STBX   XO31(215)
423#define STHX   XO31(407)
424#define STWX   XO31(151)
425
426#define EIEIO  XO31(854)
427#define HWSYNC XO31(598)
428#define LWSYNC (HWSYNC | (1u << 21))
429
430#define SPR(a, b) ((((a)<<5)|(b))<<11)
431#define LR     SPR(8, 0)
432#define CTR    SPR(9, 0)
433
434#define SLW    XO31( 24)
435#define SRW    XO31(536)
436#define SRAW   XO31(792)
437
438#define SLD    XO31( 27)
439#define SRD    XO31(539)
440#define SRAD   XO31(794)
441#define SRADI  XO31(413<<1)
442
443#define BRH    XO31(219)
444#define BRW    XO31(155)
445#define BRD    XO31(187)
446
447#define TW     XO31( 4)
448#define TRAP   (TW | TO(31))
449
450#define NOP    ORI  /* ori 0,0,0 */
451
452#define LVX        XO31(103)
453#define LVEBX      XO31(7)
454#define LVEHX      XO31(39)
455#define LVEWX      XO31(71)
456#define LXSDX      (XO31(588) | 1)  /* v2.06, force tx=1 */
457#define LXVDSX     (XO31(332) | 1)  /* v2.06, force tx=1 */
458#define LXSIWZX    (XO31(12) | 1)   /* v2.07, force tx=1 */
459#define LXV        (OPCD(61) | 8 | 1)  /* v3.00, force tx=1 */
460#define LXSD       (OPCD(57) | 2)   /* v3.00 */
461#define LXVWSX     (XO31(364) | 1)  /* v3.00, force tx=1 */
462
463#define STVX       XO31(231)
464#define STVEWX     XO31(199)
465#define STXSDX     (XO31(716) | 1)  /* v2.06, force sx=1 */
466#define STXSIWX    (XO31(140) | 1)  /* v2.07, force sx=1 */
467#define STXV       (OPCD(61) | 8 | 5) /* v3.00, force sx=1 */
468#define STXSD      (OPCD(61) | 2)   /* v3.00 */
469
470#define VADDSBS    VX4(768)
471#define VADDUBS    VX4(512)
472#define VADDUBM    VX4(0)
473#define VADDSHS    VX4(832)
474#define VADDUHS    VX4(576)
475#define VADDUHM    VX4(64)
476#define VADDSWS    VX4(896)
477#define VADDUWS    VX4(640)
478#define VADDUWM    VX4(128)
479#define VADDUDM    VX4(192)       /* v2.07 */
480
481#define VSUBSBS    VX4(1792)
482#define VSUBUBS    VX4(1536)
483#define VSUBUBM    VX4(1024)
484#define VSUBSHS    VX4(1856)
485#define VSUBUHS    VX4(1600)
486#define VSUBUHM    VX4(1088)
487#define VSUBSWS    VX4(1920)
488#define VSUBUWS    VX4(1664)
489#define VSUBUWM    VX4(1152)
490#define VSUBUDM    VX4(1216)      /* v2.07 */
491
492#define VNEGW      (VX4(1538) | (6 << 16))  /* v3.00 */
493#define VNEGD      (VX4(1538) | (7 << 16))  /* v3.00 */
494
495#define VMAXSB     VX4(258)
496#define VMAXSH     VX4(322)
497#define VMAXSW     VX4(386)
498#define VMAXSD     VX4(450)       /* v2.07 */
499#define VMAXUB     VX4(2)
500#define VMAXUH     VX4(66)
501#define VMAXUW     VX4(130)
502#define VMAXUD     VX4(194)       /* v2.07 */
503#define VMINSB     VX4(770)
504#define VMINSH     VX4(834)
505#define VMINSW     VX4(898)
506#define VMINSD     VX4(962)       /* v2.07 */
507#define VMINUB     VX4(514)
508#define VMINUH     VX4(578)
509#define VMINUW     VX4(642)
510#define VMINUD     VX4(706)       /* v2.07 */
511
512#define VCMPEQUB   VX4(6)
513#define VCMPEQUH   VX4(70)
514#define VCMPEQUW   VX4(134)
515#define VCMPEQUD   VX4(199)       /* v2.07 */
516#define VCMPGTSB   VX4(774)
517#define VCMPGTSH   VX4(838)
518#define VCMPGTSW   VX4(902)
519#define VCMPGTSD   VX4(967)       /* v2.07 */
520#define VCMPGTUB   VX4(518)
521#define VCMPGTUH   VX4(582)
522#define VCMPGTUW   VX4(646)
523#define VCMPGTUD   VX4(711)       /* v2.07 */
524#define VCMPNEB    VX4(7)         /* v3.00 */
525#define VCMPNEH    VX4(71)        /* v3.00 */
526#define VCMPNEW    VX4(135)       /* v3.00 */
527
528#define VSLB       VX4(260)
529#define VSLH       VX4(324)
530#define VSLW       VX4(388)
531#define VSLD       VX4(1476)      /* v2.07 */
532#define VSRB       VX4(516)
533#define VSRH       VX4(580)
534#define VSRW       VX4(644)
535#define VSRD       VX4(1732)      /* v2.07 */
536#define VSRAB      VX4(772)
537#define VSRAH      VX4(836)
538#define VSRAW      VX4(900)
539#define VSRAD      VX4(964)       /* v2.07 */
540#define VRLB       VX4(4)
541#define VRLH       VX4(68)
542#define VRLW       VX4(132)
543#define VRLD       VX4(196)       /* v2.07 */
544
545#define VMULEUB    VX4(520)
546#define VMULEUH    VX4(584)
547#define VMULEUW    VX4(648)       /* v2.07 */
548#define VMULOUB    VX4(8)
549#define VMULOUH    VX4(72)
550#define VMULOUW    VX4(136)       /* v2.07 */
551#define VMULUWM    VX4(137)       /* v2.07 */
552#define VMULLD     VX4(457)       /* v3.10 */
553#define VMSUMUHM   VX4(38)
554
555#define VMRGHB     VX4(12)
556#define VMRGHH     VX4(76)
557#define VMRGHW     VX4(140)
558#define VMRGLB     VX4(268)
559#define VMRGLH     VX4(332)
560#define VMRGLW     VX4(396)
561
562#define VPKUHUM    VX4(14)
563#define VPKUWUM    VX4(78)
564
565#define VAND       VX4(1028)
566#define VANDC      VX4(1092)
567#define VNOR       VX4(1284)
568#define VOR        VX4(1156)
569#define VXOR       VX4(1220)
570#define VEQV       VX4(1668)      /* v2.07 */
571#define VNAND      VX4(1412)      /* v2.07 */
572#define VORC       VX4(1348)      /* v2.07 */
573
574#define VSPLTB     VX4(524)
575#define VSPLTH     VX4(588)
576#define VSPLTW     VX4(652)
577#define VSPLTISB   VX4(780)
578#define VSPLTISH   VX4(844)
579#define VSPLTISW   VX4(908)
580
581#define VSLDOI     VX4(44)
582
583#define XXPERMDI   (OPCD(60) | (10 << 3) | 7)  /* v2.06, force ax=bx=tx=1 */
584#define XXSEL      (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */
585#define XXSPLTIB   (OPCD(60) | (360 << 1) | 1) /* v3.00, force tx=1 */
586
587#define MFVSRD     (XO31(51) | 1)   /* v2.07, force sx=1 */
588#define MFVSRWZ    (XO31(115) | 1)  /* v2.07, force sx=1 */
589#define MTVSRD     (XO31(179) | 1)  /* v2.07, force tx=1 */
590#define MTVSRWZ    (XO31(243) | 1)  /* v2.07, force tx=1 */
591#define MTVSRDD    (XO31(435) | 1)  /* v3.00, force tx=1 */
592#define MTVSRWS    (XO31(403) | 1)  /* v3.00, force tx=1 */
593
594#define RT(r) ((r)<<21)
595#define RS(r) ((r)<<21)
596#define RA(r) ((r)<<16)
597#define RB(r) ((r)<<11)
598#define TO(t) ((t)<<21)
599#define SH(s) ((s)<<11)
600#define MB(b) ((b)<<6)
601#define ME(e) ((e)<<1)
602#define BO(o) ((o)<<21)
603#define MB64(b) ((b)<<5)
604#define FXM(b) (1 << (19 - (b)))
605
606#define VRT(r)  (((r) & 31) << 21)
607#define VRA(r)  (((r) & 31) << 16)
608#define VRB(r)  (((r) & 31) << 11)
609#define VRC(r)  (((r) & 31) <<  6)
610
611#define LK    1
612
613#define TAB(t, a, b) (RT(t) | RA(a) | RB(b))
614#define SAB(s, a, b) (RS(s) | RA(a) | RB(b))
615#define TAI(s, a, i) (RT(s) | RA(a) | ((i) & 0xffff))
616#define SAI(s, a, i) (RS(s) | RA(a) | ((i) & 0xffff))
617
618#define BF(n)    ((n)<<23)
619#define BI(n, c) (((c)+((n)*4))<<16)
620#define BT(n, c) (((c)+((n)*4))<<21)
621#define BA(n, c) (((c)+((n)*4))<<16)
622#define BB(n, c) (((c)+((n)*4))<<11)
623#define BC_(n, c) (((c)+((n)*4))<<6)
624
625#define BO_COND_TRUE  BO(12)
626#define BO_COND_FALSE BO( 4)
627#define BO_ALWAYS     BO(20)
628
629enum {
630    CR_LT,
631    CR_GT,
632    CR_EQ,
633    CR_SO
634};
635
636static const uint32_t tcg_to_bc[] = {
637    [TCG_COND_EQ]  = BC | BI(7, CR_EQ) | BO_COND_TRUE,
638    [TCG_COND_NE]  = BC | BI(7, CR_EQ) | BO_COND_FALSE,
639    [TCG_COND_LT]  = BC | BI(7, CR_LT) | BO_COND_TRUE,
640    [TCG_COND_GE]  = BC | BI(7, CR_LT) | BO_COND_FALSE,
641    [TCG_COND_LE]  = BC | BI(7, CR_GT) | BO_COND_FALSE,
642    [TCG_COND_GT]  = BC | BI(7, CR_GT) | BO_COND_TRUE,
643    [TCG_COND_LTU] = BC | BI(7, CR_LT) | BO_COND_TRUE,
644    [TCG_COND_GEU] = BC | BI(7, CR_LT) | BO_COND_FALSE,
645    [TCG_COND_LEU] = BC | BI(7, CR_GT) | BO_COND_FALSE,
646    [TCG_COND_GTU] = BC | BI(7, CR_GT) | BO_COND_TRUE,
647};
648
649/* The low bit here is set if the RA and RB fields must be inverted.  */
650static const uint32_t tcg_to_isel[] = {
651    [TCG_COND_EQ]  = ISEL | BC_(7, CR_EQ),
652    [TCG_COND_NE]  = ISEL | BC_(7, CR_EQ) | 1,
653    [TCG_COND_LT]  = ISEL | BC_(7, CR_LT),
654    [TCG_COND_GE]  = ISEL | BC_(7, CR_LT) | 1,
655    [TCG_COND_LE]  = ISEL | BC_(7, CR_GT) | 1,
656    [TCG_COND_GT]  = ISEL | BC_(7, CR_GT),
657    [TCG_COND_LTU] = ISEL | BC_(7, CR_LT),
658    [TCG_COND_GEU] = ISEL | BC_(7, CR_LT) | 1,
659    [TCG_COND_LEU] = ISEL | BC_(7, CR_GT) | 1,
660    [TCG_COND_GTU] = ISEL | BC_(7, CR_GT),
661};
662
663static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
664                        intptr_t value, intptr_t addend)
665{
666    const tcg_insn_unit *target;
667    int16_t lo;
668    int32_t hi;
669
670    value += addend;
671    target = (const tcg_insn_unit *)value;
672
673    switch (type) {
674    case R_PPC_REL14:
675        return reloc_pc14(code_ptr, target);
676    case R_PPC_REL24:
677        return reloc_pc24(code_ptr, target);
678    case R_PPC_ADDR16:
679        /*
680         * We are (slightly) abusing this relocation type.  In particular,
681         * assert that the low 2 bits are zero, and do not modify them.
682         * That way we can use this with LD et al that have opcode bits
683         * in the low 2 bits of the insn.
684         */
685        if ((value & 3) || value != (int16_t)value) {
686            return false;
687        }
688        *code_ptr = (*code_ptr & ~0xfffc) | (value & 0xfffc);
689        break;
690    case R_PPC_ADDR32:
691        /*
692         * We are abusing this relocation type.  Again, this points to
693         * a pair of insns, lis + load.  This is an absolute address
694         * relocation for PPC32 so the lis cannot be removed.
695         */
696        lo = value;
697        hi = value - lo;
698        if (hi + lo != value) {
699            return false;
700        }
701        code_ptr[0] = deposit32(code_ptr[0], 0, 16, hi >> 16);
702        code_ptr[1] = deposit32(code_ptr[1], 0, 16, lo);
703        break;
704    default:
705        g_assert_not_reached();
706    }
707    return true;
708}
709
710static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
711                             TCGReg base, tcg_target_long offset);
712
713static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
714{
715    if (ret == arg) {
716        return true;
717    }
718    switch (type) {
719    case TCG_TYPE_I64:
720        tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
721        /* fallthru */
722    case TCG_TYPE_I32:
723        if (ret < TCG_REG_V0) {
724            if (arg < TCG_REG_V0) {
725                tcg_out32(s, OR | SAB(arg, ret, arg));
726                break;
727            } else if (have_isa_2_07) {
728                tcg_out32(s, (type == TCG_TYPE_I32 ? MFVSRWZ : MFVSRD)
729                          | VRT(arg) | RA(ret));
730                break;
731            } else {
732                /* Altivec does not support vector->integer moves.  */
733                return false;
734            }
735        } else if (arg < TCG_REG_V0) {
736            if (have_isa_2_07) {
737                tcg_out32(s, (type == TCG_TYPE_I32 ? MTVSRWZ : MTVSRD)
738                          | VRT(ret) | RA(arg));
739                break;
740            } else {
741                /* Altivec does not support integer->vector moves.  */
742                return false;
743            }
744        }
745        /* fallthru */
746    case TCG_TYPE_V64:
747    case TCG_TYPE_V128:
748        tcg_debug_assert(ret >= TCG_REG_V0 && arg >= TCG_REG_V0);
749        tcg_out32(s, VOR | VRT(ret) | VRA(arg) | VRB(arg));
750        break;
751    default:
752        g_assert_not_reached();
753    }
754    return true;
755}
756
757static inline void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs,
758                               int sh, int mb)
759{
760    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
761    sh = SH(sh & 0x1f) | (((sh >> 5) & 1) << 1);
762    mb = MB64((mb >> 5) | ((mb << 1) & 0x3f));
763    tcg_out32(s, op | RA(ra) | RS(rs) | sh | mb);
764}
765
766static inline void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs,
767                               int sh, int mb, int me)
768{
769    tcg_out32(s, op | RA(ra) | RS(rs) | SH(sh) | MB(mb) | ME(me));
770}
771
772static inline void tcg_out_ext8s(TCGContext *s, TCGReg dst, TCGReg src)
773{
774    tcg_out32(s, EXTSB | RA(dst) | RS(src));
775}
776
777static inline void tcg_out_ext16s(TCGContext *s, TCGReg dst, TCGReg src)
778{
779    tcg_out32(s, EXTSH | RA(dst) | RS(src));
780}
781
782static inline void tcg_out_ext16u(TCGContext *s, TCGReg dst, TCGReg src)
783{
784    tcg_out32(s, ANDI | SAI(src, dst, 0xffff));
785}
786
787static inline void tcg_out_ext32s(TCGContext *s, TCGReg dst, TCGReg src)
788{
789    tcg_out32(s, EXTSW | RA(dst) | RS(src));
790}
791
792static inline void tcg_out_ext32u(TCGContext *s, TCGReg dst, TCGReg src)
793{
794    tcg_out_rld(s, RLDICL, dst, src, 0, 32);
795}
796
797static inline void tcg_out_shli32(TCGContext *s, TCGReg dst, TCGReg src, int c)
798{
799    tcg_out_rlw(s, RLWINM, dst, src, c, 0, 31 - c);
800}
801
802static inline void tcg_out_shli64(TCGContext *s, TCGReg dst, TCGReg src, int c)
803{
804    tcg_out_rld(s, RLDICR, dst, src, c, 63 - c);
805}
806
807static inline void tcg_out_sari32(TCGContext *s, TCGReg dst, TCGReg src, int c)
808{
809    /* Limit immediate shift count lest we create an illegal insn.  */
810    tcg_out32(s, SRAWI | RA(dst) | RS(src) | SH(c & 31));
811}
812
813static inline void tcg_out_shri32(TCGContext *s, TCGReg dst, TCGReg src, int c)
814{
815    tcg_out_rlw(s, RLWINM, dst, src, 32 - c, c, 31);
816}
817
818static inline void tcg_out_shri64(TCGContext *s, TCGReg dst, TCGReg src, int c)
819{
820    tcg_out_rld(s, RLDICL, dst, src, 64 - c, c);
821}
822
823static inline void tcg_out_sari64(TCGContext *s, TCGReg dst, TCGReg src, int c)
824{
825    tcg_out32(s, SRADI | RA(dst) | RS(src) | SH(c & 0x1f) | ((c >> 4) & 2));
826}
827
828static void tcg_out_bswap16(TCGContext *s, TCGReg dst, TCGReg src, int flags)
829{
830    TCGReg tmp = dst == src ? TCG_REG_R0 : dst;
831
832    if (have_isa_3_10) {
833        tcg_out32(s, BRH | RA(dst) | RS(src));
834        if (flags & TCG_BSWAP_OS) {
835            tcg_out_ext16s(s, dst, dst);
836        } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
837            tcg_out_ext16u(s, dst, dst);
838        }
839        return;
840    }
841
842    /*
843     * In the following,
844     *   dep(a, b, m) -> (a & ~m) | (b & m)
845     *
846     * Begin with:                              src = xxxxabcd
847     */
848    /* tmp = rol32(src, 24) & 0x000000ff            = 0000000c */
849    tcg_out_rlw(s, RLWINM, tmp, src, 24, 24, 31);
850    /* tmp = dep(tmp, rol32(src, 8), 0x0000ff00)    = 000000dc */
851    tcg_out_rlw(s, RLWIMI, tmp, src, 8, 16, 23);
852
853    if (flags & TCG_BSWAP_OS) {
854        tcg_out_ext16s(s, dst, tmp);
855    } else {
856        tcg_out_mov(s, TCG_TYPE_REG, dst, tmp);
857    }
858}
859
860static void tcg_out_bswap32(TCGContext *s, TCGReg dst, TCGReg src, int flags)
861{
862    TCGReg tmp = dst == src ? TCG_REG_R0 : dst;
863
864    if (have_isa_3_10) {
865        tcg_out32(s, BRW | RA(dst) | RS(src));
866        if (flags & TCG_BSWAP_OS) {
867            tcg_out_ext32s(s, dst, dst);
868        } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
869            tcg_out_ext32u(s, dst, dst);
870        }
871        return;
872    }
873
874    /*
875     * Stolen from gcc's builtin_bswap32.
876     * In the following,
877     *   dep(a, b, m) -> (a & ~m) | (b & m)
878     *
879     * Begin with:                              src = xxxxabcd
880     */
881    /* tmp = rol32(src, 8) & 0xffffffff             = 0000bcda */
882    tcg_out_rlw(s, RLWINM, tmp, src, 8, 0, 31);
883    /* tmp = dep(tmp, rol32(src, 24), 0xff000000)   = 0000dcda */
884    tcg_out_rlw(s, RLWIMI, tmp, src, 24, 0, 7);
885    /* tmp = dep(tmp, rol32(src, 24), 0x0000ff00)   = 0000dcba */
886    tcg_out_rlw(s, RLWIMI, tmp, src, 24, 16, 23);
887
888    if (flags & TCG_BSWAP_OS) {
889        tcg_out_ext32s(s, dst, tmp);
890    } else {
891        tcg_out_mov(s, TCG_TYPE_REG, dst, tmp);
892    }
893}
894
895static void tcg_out_bswap64(TCGContext *s, TCGReg dst, TCGReg src)
896{
897    TCGReg t0 = dst == src ? TCG_REG_R0 : dst;
898    TCGReg t1 = dst == src ? dst : TCG_REG_R0;
899
900    if (have_isa_3_10) {
901        tcg_out32(s, BRD | RA(dst) | RS(src));
902        return;
903    }
904
905    /*
906     * In the following,
907     *   dep(a, b, m) -> (a & ~m) | (b & m)
908     *
909     * Begin with:                              src = abcdefgh
910     */
911    /* t0 = rol32(src, 8) & 0xffffffff              = 0000fghe */
912    tcg_out_rlw(s, RLWINM, t0, src, 8, 0, 31);
913    /* t0 = dep(t0, rol32(src, 24), 0xff000000)     = 0000hghe */
914    tcg_out_rlw(s, RLWIMI, t0, src, 24, 0, 7);
915    /* t0 = dep(t0, rol32(src, 24), 0x0000ff00)     = 0000hgfe */
916    tcg_out_rlw(s, RLWIMI, t0, src, 24, 16, 23);
917
918    /* t0 = rol64(t0, 32)                           = hgfe0000 */
919    tcg_out_rld(s, RLDICL, t0, t0, 32, 0);
920    /* t1 = rol64(src, 32)                          = efghabcd */
921    tcg_out_rld(s, RLDICL, t1, src, 32, 0);
922
923    /* t0 = dep(t0, rol32(t1, 24), 0xffffffff)      = hgfebcda */
924    tcg_out_rlw(s, RLWIMI, t0, t1, 8, 0, 31);
925    /* t0 = dep(t0, rol32(t1, 24), 0xff000000)      = hgfedcda */
926    tcg_out_rlw(s, RLWIMI, t0, t1, 24, 0, 7);
927    /* t0 = dep(t0, rol32(t1, 24), 0x0000ff00)      = hgfedcba */
928    tcg_out_rlw(s, RLWIMI, t0, t1, 24, 16, 23);
929
930    tcg_out_mov(s, TCG_TYPE_REG, dst, t0);
931}
932
933/* Emit a move into ret of arg, if it can be done in one insn.  */
934static bool tcg_out_movi_one(TCGContext *s, TCGReg ret, tcg_target_long arg)
935{
936    if (arg == (int16_t)arg) {
937        tcg_out32(s, ADDI | TAI(ret, 0, arg));
938        return true;
939    }
940    if (arg == (int32_t)arg && (arg & 0xffff) == 0) {
941        tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16));
942        return true;
943    }
944    return false;
945}
946
947static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
948                             tcg_target_long arg, bool in_prologue)
949{
950    intptr_t tb_diff;
951    tcg_target_long tmp;
952    int shift;
953
954    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
955
956    if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
957        arg = (int32_t)arg;
958    }
959
960    /* Load 16-bit immediates with one insn.  */
961    if (tcg_out_movi_one(s, ret, arg)) {
962        return;
963    }
964
965    /* Load addresses within the TB with one insn.  */
966    tb_diff = tcg_tbrel_diff(s, (void *)arg);
967    if (!in_prologue && USE_REG_TB && tb_diff == (int16_t)tb_diff) {
968        tcg_out32(s, ADDI | TAI(ret, TCG_REG_TB, tb_diff));
969        return;
970    }
971
972    /* Load 32-bit immediates with two insns.  Note that we've already
973       eliminated bare ADDIS, so we know both insns are required.  */
974    if (TCG_TARGET_REG_BITS == 32 || arg == (int32_t)arg) {
975        tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16));
976        tcg_out32(s, ORI | SAI(ret, ret, arg));
977        return;
978    }
979    if (arg == (uint32_t)arg && !(arg & 0x8000)) {
980        tcg_out32(s, ADDI | TAI(ret, 0, arg));
981        tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
982        return;
983    }
984
985    /* Load masked 16-bit value.  */
986    if (arg > 0 && (arg & 0x8000)) {
987        tmp = arg | 0x7fff;
988        if ((tmp & (tmp + 1)) == 0) {
989            int mb = clz64(tmp + 1) + 1;
990            tcg_out32(s, ADDI | TAI(ret, 0, arg));
991            tcg_out_rld(s, RLDICL, ret, ret, 0, mb);
992            return;
993        }
994    }
995
996    /* Load common masks with 2 insns.  */
997    shift = ctz64(arg);
998    tmp = arg >> shift;
999    if (tmp == (int16_t)tmp) {
1000        tcg_out32(s, ADDI | TAI(ret, 0, tmp));
1001        tcg_out_shli64(s, ret, ret, shift);
1002        return;
1003    }
1004    shift = clz64(arg);
1005    if (tcg_out_movi_one(s, ret, arg << shift)) {
1006        tcg_out_shri64(s, ret, ret, shift);
1007        return;
1008    }
1009
1010    /* Load addresses within 2GB of TB with 2 (or rarely 3) insns.  */
1011    if (!in_prologue && USE_REG_TB && tb_diff == (int32_t)tb_diff) {
1012        tcg_out_mem_long(s, ADDI, ADD, ret, TCG_REG_TB, tb_diff);
1013        return;
1014    }
1015
1016    /* Use the constant pool, if possible.  */
1017    if (!in_prologue && USE_REG_TB) {
1018        new_pool_label(s, arg, R_PPC_ADDR16, s->code_ptr,
1019                       tcg_tbrel_diff(s, NULL));
1020        tcg_out32(s, LD | TAI(ret, TCG_REG_TB, 0));
1021        return;
1022    }
1023
1024    tmp = arg >> 31 >> 1;
1025    tcg_out_movi(s, TCG_TYPE_I32, ret, tmp);
1026    if (tmp) {
1027        tcg_out_shli64(s, ret, ret, 32);
1028    }
1029    if (arg & 0xffff0000) {
1030        tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
1031    }
1032    if (arg & 0xffff) {
1033        tcg_out32(s, ORI | SAI(ret, ret, arg));
1034    }
1035}
1036
1037static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
1038                             TCGReg ret, int64_t val)
1039{
1040    uint32_t load_insn;
1041    int rel, low;
1042    intptr_t add;
1043
1044    switch (vece) {
1045    case MO_8:
1046        low = (int8_t)val;
1047        if (low >= -16 && low < 16) {
1048            tcg_out32(s, VSPLTISB | VRT(ret) | ((val & 31) << 16));
1049            return;
1050        }
1051        if (have_isa_3_00) {
1052            tcg_out32(s, XXSPLTIB | VRT(ret) | ((val & 0xff) << 11));
1053            return;
1054        }
1055        break;
1056
1057    case MO_16:
1058        low = (int16_t)val;
1059        if (low >= -16 && low < 16) {
1060            tcg_out32(s, VSPLTISH | VRT(ret) | ((val & 31) << 16));
1061            return;
1062        }
1063        break;
1064
1065    case MO_32:
1066        low = (int32_t)val;
1067        if (low >= -16 && low < 16) {
1068            tcg_out32(s, VSPLTISW | VRT(ret) | ((val & 31) << 16));
1069            return;
1070        }
1071        break;
1072    }
1073
1074    /*
1075     * Otherwise we must load the value from the constant pool.
1076     */
1077    if (USE_REG_TB) {
1078        rel = R_PPC_ADDR16;
1079        add = tcg_tbrel_diff(s, NULL);
1080    } else {
1081        rel = R_PPC_ADDR32;
1082        add = 0;
1083    }
1084
1085    if (have_vsx) {
1086        load_insn = type == TCG_TYPE_V64 ? LXSDX : LXVDSX;
1087        load_insn |= VRT(ret) | RB(TCG_REG_TMP1);
1088        if (TCG_TARGET_REG_BITS == 64) {
1089            new_pool_label(s, val, rel, s->code_ptr, add);
1090        } else {
1091            new_pool_l2(s, rel, s->code_ptr, add, val >> 32, val);
1092        }
1093    } else {
1094        load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1);
1095        if (TCG_TARGET_REG_BITS == 64) {
1096            new_pool_l2(s, rel, s->code_ptr, add, val, val);
1097        } else {
1098            new_pool_l4(s, rel, s->code_ptr, add,
1099                        val >> 32, val, val >> 32, val);
1100        }
1101    }
1102
1103    if (USE_REG_TB) {
1104        tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, 0, 0));
1105        load_insn |= RA(TCG_REG_TB);
1106    } else {
1107        tcg_out32(s, ADDIS | TAI(TCG_REG_TMP1, 0, 0));
1108        tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0));
1109    }
1110    tcg_out32(s, load_insn);
1111}
1112
1113static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
1114                         tcg_target_long arg)
1115{
1116    switch (type) {
1117    case TCG_TYPE_I32:
1118    case TCG_TYPE_I64:
1119        tcg_debug_assert(ret < TCG_REG_V0);
1120        tcg_out_movi_int(s, type, ret, arg, false);
1121        break;
1122
1123    default:
1124        g_assert_not_reached();
1125    }
1126}
1127
1128static bool mask_operand(uint32_t c, int *mb, int *me)
1129{
1130    uint32_t lsb, test;
1131
1132    /* Accept a bit pattern like:
1133           0....01....1
1134           1....10....0
1135           0..01..10..0
1136       Keep track of the transitions.  */
1137    if (c == 0 || c == -1) {
1138        return false;
1139    }
1140    test = c;
1141    lsb = test & -test;
1142    test += lsb;
1143    if (test & (test - 1)) {
1144        return false;
1145    }
1146
1147    *me = clz32(lsb);
1148    *mb = test ? clz32(test & -test) + 1 : 0;
1149    return true;
1150}
1151
1152static bool mask64_operand(uint64_t c, int *mb, int *me)
1153{
1154    uint64_t lsb;
1155
1156    if (c == 0) {
1157        return false;
1158    }
1159
1160    lsb = c & -c;
1161    /* Accept 1..10..0.  */
1162    if (c == -lsb) {
1163        *mb = 0;
1164        *me = clz64(lsb);
1165        return true;
1166    }
1167    /* Accept 0..01..1.  */
1168    if (lsb == 1 && (c & (c + 1)) == 0) {
1169        *mb = clz64(c + 1) + 1;
1170        *me = 63;
1171        return true;
1172    }
1173    return false;
1174}
1175
1176static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1177{
1178    int mb, me;
1179
1180    if (mask_operand(c, &mb, &me)) {
1181        tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me);
1182    } else if ((c & 0xffff) == c) {
1183        tcg_out32(s, ANDI | SAI(src, dst, c));
1184        return;
1185    } else if ((c & 0xffff0000) == c) {
1186        tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
1187        return;
1188    } else {
1189        tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R0, c);
1190        tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
1191    }
1192}
1193
1194static void tcg_out_andi64(TCGContext *s, TCGReg dst, TCGReg src, uint64_t c)
1195{
1196    int mb, me;
1197
1198    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1199    if (mask64_operand(c, &mb, &me)) {
1200        if (mb == 0) {
1201            tcg_out_rld(s, RLDICR, dst, src, 0, me);
1202        } else {
1203            tcg_out_rld(s, RLDICL, dst, src, 0, mb);
1204        }
1205    } else if ((c & 0xffff) == c) {
1206        tcg_out32(s, ANDI | SAI(src, dst, c));
1207        return;
1208    } else if ((c & 0xffff0000) == c) {
1209        tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
1210        return;
1211    } else {
1212        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, c);
1213        tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
1214    }
1215}
1216
1217static void tcg_out_zori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c,
1218                           int op_lo, int op_hi)
1219{
1220    if (c >> 16) {
1221        tcg_out32(s, op_hi | SAI(src, dst, c >> 16));
1222        src = dst;
1223    }
1224    if (c & 0xffff) {
1225        tcg_out32(s, op_lo | SAI(src, dst, c));
1226        src = dst;
1227    }
1228}
1229
1230static void tcg_out_ori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1231{
1232    tcg_out_zori32(s, dst, src, c, ORI, ORIS);
1233}
1234
1235static void tcg_out_xori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1236{
1237    tcg_out_zori32(s, dst, src, c, XORI, XORIS);
1238}
1239
1240static void tcg_out_b(TCGContext *s, int mask, const tcg_insn_unit *target)
1241{
1242    ptrdiff_t disp = tcg_pcrel_diff(s, target);
1243    if (in_range_b(disp)) {
1244        tcg_out32(s, B | (disp & 0x3fffffc) | mask);
1245    } else {
1246        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, (uintptr_t)target);
1247        tcg_out32(s, MTSPR | RS(TCG_REG_R0) | CTR);
1248        tcg_out32(s, BCCTR | BO_ALWAYS | mask);
1249    }
1250}
1251
1252static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
1253                             TCGReg base, tcg_target_long offset)
1254{
1255    tcg_target_long orig = offset, l0, l1, extra = 0, align = 0;
1256    bool is_int_store = false;
1257    TCGReg rs = TCG_REG_TMP1;
1258
1259    switch (opi) {
1260    case LD: case LWA:
1261        align = 3;
1262        /* FALLTHRU */
1263    default:
1264        if (rt > TCG_REG_R0 && rt < TCG_REG_V0) {
1265            rs = rt;
1266            break;
1267        }
1268        break;
1269    case LXSD:
1270    case STXSD:
1271        align = 3;
1272        break;
1273    case LXV:
1274    case STXV:
1275        align = 15;
1276        break;
1277    case STD:
1278        align = 3;
1279        /* FALLTHRU */
1280    case STB: case STH: case STW:
1281        is_int_store = true;
1282        break;
1283    }
1284
1285    /* For unaligned, or very large offsets, use the indexed form.  */
1286    if (offset & align || offset != (int32_t)offset || opi == 0) {
1287        if (rs == base) {
1288            rs = TCG_REG_R0;
1289        }
1290        tcg_debug_assert(!is_int_store || rs != rt);
1291        tcg_out_movi(s, TCG_TYPE_PTR, rs, orig);
1292        tcg_out32(s, opx | TAB(rt & 31, base, rs));
1293        return;
1294    }
1295
1296    l0 = (int16_t)offset;
1297    offset = (offset - l0) >> 16;
1298    l1 = (int16_t)offset;
1299
1300    if (l1 < 0 && orig >= 0) {
1301        extra = 0x4000;
1302        l1 = (int16_t)(offset - 0x4000);
1303    }
1304    if (l1) {
1305        tcg_out32(s, ADDIS | TAI(rs, base, l1));
1306        base = rs;
1307    }
1308    if (extra) {
1309        tcg_out32(s, ADDIS | TAI(rs, base, extra));
1310        base = rs;
1311    }
1312    if (opi != ADDI || base != rt || l0 != 0) {
1313        tcg_out32(s, opi | TAI(rt & 31, base, l0));
1314    }
1315}
1316
1317static void tcg_out_vsldoi(TCGContext *s, TCGReg ret,
1318                           TCGReg va, TCGReg vb, int shb)
1319{
1320    tcg_out32(s, VSLDOI | VRT(ret) | VRA(va) | VRB(vb) | (shb << 6));
1321}
1322
1323static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1324                       TCGReg base, intptr_t offset)
1325{
1326    int shift;
1327
1328    switch (type) {
1329    case TCG_TYPE_I32:
1330        if (ret < TCG_REG_V0) {
1331            tcg_out_mem_long(s, LWZ, LWZX, ret, base, offset);
1332            break;
1333        }
1334        if (have_isa_2_07 && have_vsx) {
1335            tcg_out_mem_long(s, 0, LXSIWZX, ret, base, offset);
1336            break;
1337        }
1338        tcg_debug_assert((offset & 3) == 0);
1339        tcg_out_mem_long(s, 0, LVEWX, ret, base, offset);
1340        shift = (offset - 4) & 0xc;
1341        if (shift) {
1342            tcg_out_vsldoi(s, ret, ret, ret, shift);
1343        }
1344        break;
1345    case TCG_TYPE_I64:
1346        if (ret < TCG_REG_V0) {
1347            tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1348            tcg_out_mem_long(s, LD, LDX, ret, base, offset);
1349            break;
1350        }
1351        /* fallthru */
1352    case TCG_TYPE_V64:
1353        tcg_debug_assert(ret >= TCG_REG_V0);
1354        if (have_vsx) {
1355            tcg_out_mem_long(s, have_isa_3_00 ? LXSD : 0, LXSDX,
1356                             ret, base, offset);
1357            break;
1358        }
1359        tcg_debug_assert((offset & 7) == 0);
1360        tcg_out_mem_long(s, 0, LVX, ret, base, offset & -16);
1361        if (offset & 8) {
1362            tcg_out_vsldoi(s, ret, ret, ret, 8);
1363        }
1364        break;
1365    case TCG_TYPE_V128:
1366        tcg_debug_assert(ret >= TCG_REG_V0);
1367        tcg_debug_assert((offset & 15) == 0);
1368        tcg_out_mem_long(s, have_isa_3_00 ? LXV : 0,
1369                         LVX, ret, base, offset);
1370        break;
1371    default:
1372        g_assert_not_reached();
1373    }
1374}
1375
1376static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
1377                              TCGReg base, intptr_t offset)
1378{
1379    int shift;
1380
1381    switch (type) {
1382    case TCG_TYPE_I32:
1383        if (arg < TCG_REG_V0) {
1384            tcg_out_mem_long(s, STW, STWX, arg, base, offset);
1385            break;
1386        }
1387        if (have_isa_2_07 && have_vsx) {
1388            tcg_out_mem_long(s, 0, STXSIWX, arg, base, offset);
1389            break;
1390        }
1391        assert((offset & 3) == 0);
1392        tcg_debug_assert((offset & 3) == 0);
1393        shift = (offset - 4) & 0xc;
1394        if (shift) {
1395            tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, shift);
1396            arg = TCG_VEC_TMP1;
1397        }
1398        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
1399        break;
1400    case TCG_TYPE_I64:
1401        if (arg < TCG_REG_V0) {
1402            tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1403            tcg_out_mem_long(s, STD, STDX, arg, base, offset);
1404            break;
1405        }
1406        /* fallthru */
1407    case TCG_TYPE_V64:
1408        tcg_debug_assert(arg >= TCG_REG_V0);
1409        if (have_vsx) {
1410            tcg_out_mem_long(s, have_isa_3_00 ? STXSD : 0,
1411                             STXSDX, arg, base, offset);
1412            break;
1413        }
1414        tcg_debug_assert((offset & 7) == 0);
1415        if (offset & 8) {
1416            tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, 8);
1417            arg = TCG_VEC_TMP1;
1418        }
1419        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
1420        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset + 4);
1421        break;
1422    case TCG_TYPE_V128:
1423        tcg_debug_assert(arg >= TCG_REG_V0);
1424        tcg_out_mem_long(s, have_isa_3_00 ? STXV : 0,
1425                         STVX, arg, base, offset);
1426        break;
1427    default:
1428        g_assert_not_reached();
1429    }
1430}
1431
1432static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1433                               TCGReg base, intptr_t ofs)
1434{
1435    return false;
1436}
1437
1438static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2,
1439                        int const_arg2, int cr, TCGType type)
1440{
1441    int imm;
1442    uint32_t op;
1443
1444    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1445
1446    /* Simplify the comparisons below wrt CMPI.  */
1447    if (type == TCG_TYPE_I32) {
1448        arg2 = (int32_t)arg2;
1449    }
1450
1451    switch (cond) {
1452    case TCG_COND_EQ:
1453    case TCG_COND_NE:
1454        if (const_arg2) {
1455            if ((int16_t) arg2 == arg2) {
1456                op = CMPI;
1457                imm = 1;
1458                break;
1459            } else if ((uint16_t) arg2 == arg2) {
1460                op = CMPLI;
1461                imm = 1;
1462                break;
1463            }
1464        }
1465        op = CMPL;
1466        imm = 0;
1467        break;
1468
1469    case TCG_COND_LT:
1470    case TCG_COND_GE:
1471    case TCG_COND_LE:
1472    case TCG_COND_GT:
1473        if (const_arg2) {
1474            if ((int16_t) arg2 == arg2) {
1475                op = CMPI;
1476                imm = 1;
1477                break;
1478            }
1479        }
1480        op = CMP;
1481        imm = 0;
1482        break;
1483
1484    case TCG_COND_LTU:
1485    case TCG_COND_GEU:
1486    case TCG_COND_LEU:
1487    case TCG_COND_GTU:
1488        if (const_arg2) {
1489            if ((uint16_t) arg2 == arg2) {
1490                op = CMPLI;
1491                imm = 1;
1492                break;
1493            }
1494        }
1495        op = CMPL;
1496        imm = 0;
1497        break;
1498
1499    default:
1500        tcg_abort();
1501    }
1502    op |= BF(cr) | ((type == TCG_TYPE_I64) << 21);
1503
1504    if (imm) {
1505        tcg_out32(s, op | RA(arg1) | (arg2 & 0xffff));
1506    } else {
1507        if (const_arg2) {
1508            tcg_out_movi(s, type, TCG_REG_R0, arg2);
1509            arg2 = TCG_REG_R0;
1510        }
1511        tcg_out32(s, op | RA(arg1) | RB(arg2));
1512    }
1513}
1514
1515static void tcg_out_setcond_eq0(TCGContext *s, TCGType type,
1516                                TCGReg dst, TCGReg src)
1517{
1518    if (type == TCG_TYPE_I32) {
1519        tcg_out32(s, CNTLZW | RS(src) | RA(dst));
1520        tcg_out_shri32(s, dst, dst, 5);
1521    } else {
1522        tcg_out32(s, CNTLZD | RS(src) | RA(dst));
1523        tcg_out_shri64(s, dst, dst, 6);
1524    }
1525}
1526
1527static void tcg_out_setcond_ne0(TCGContext *s, TCGReg dst, TCGReg src)
1528{
1529    /* X != 0 implies X + -1 generates a carry.  Extra addition
1530       trickery means: R = X-1 + ~X + C = X-1 + (-X+1) + C = C.  */
1531    if (dst != src) {
1532        tcg_out32(s, ADDIC | TAI(dst, src, -1));
1533        tcg_out32(s, SUBFE | TAB(dst, dst, src));
1534    } else {
1535        tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1));
1536        tcg_out32(s, SUBFE | TAB(dst, TCG_REG_R0, src));
1537    }
1538}
1539
1540static TCGReg tcg_gen_setcond_xor(TCGContext *s, TCGReg arg1, TCGArg arg2,
1541                                  bool const_arg2)
1542{
1543    if (const_arg2) {
1544        if ((uint32_t)arg2 == arg2) {
1545            tcg_out_xori32(s, TCG_REG_R0, arg1, arg2);
1546        } else {
1547            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, arg2);
1548            tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, TCG_REG_R0));
1549        }
1550    } else {
1551        tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, arg2));
1552    }
1553    return TCG_REG_R0;
1554}
1555
1556static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond,
1557                            TCGArg arg0, TCGArg arg1, TCGArg arg2,
1558                            int const_arg2)
1559{
1560    int crop, sh;
1561
1562    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1563
1564    /* Ignore high bits of a potential constant arg2.  */
1565    if (type == TCG_TYPE_I32) {
1566        arg2 = (uint32_t)arg2;
1567    }
1568
1569    /* Handle common and trivial cases before handling anything else.  */
1570    if (arg2 == 0) {
1571        switch (cond) {
1572        case TCG_COND_EQ:
1573            tcg_out_setcond_eq0(s, type, arg0, arg1);
1574            return;
1575        case TCG_COND_NE:
1576            if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
1577                tcg_out_ext32u(s, TCG_REG_R0, arg1);
1578                arg1 = TCG_REG_R0;
1579            }
1580            tcg_out_setcond_ne0(s, arg0, arg1);
1581            return;
1582        case TCG_COND_GE:
1583            tcg_out32(s, NOR | SAB(arg1, arg0, arg1));
1584            arg1 = arg0;
1585            /* FALLTHRU */
1586        case TCG_COND_LT:
1587            /* Extract the sign bit.  */
1588            if (type == TCG_TYPE_I32) {
1589                tcg_out_shri32(s, arg0, arg1, 31);
1590            } else {
1591                tcg_out_shri64(s, arg0, arg1, 63);
1592            }
1593            return;
1594        default:
1595            break;
1596        }
1597    }
1598
1599    /* If we have ISEL, we can implement everything with 3 or 4 insns.
1600       All other cases below are also at least 3 insns, so speed up the
1601       code generator by not considering them and always using ISEL.  */
1602    if (have_isel) {
1603        int isel, tab;
1604
1605        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
1606
1607        isel = tcg_to_isel[cond];
1608
1609        tcg_out_movi(s, type, arg0, 1);
1610        if (isel & 1) {
1611            /* arg0 = (bc ? 0 : 1) */
1612            tab = TAB(arg0, 0, arg0);
1613            isel &= ~1;
1614        } else {
1615            /* arg0 = (bc ? 1 : 0) */
1616            tcg_out_movi(s, type, TCG_REG_R0, 0);
1617            tab = TAB(arg0, arg0, TCG_REG_R0);
1618        }
1619        tcg_out32(s, isel | tab);
1620        return;
1621    }
1622
1623    switch (cond) {
1624    case TCG_COND_EQ:
1625        arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
1626        tcg_out_setcond_eq0(s, type, arg0, arg1);
1627        return;
1628
1629    case TCG_COND_NE:
1630        arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
1631        /* Discard the high bits only once, rather than both inputs.  */
1632        if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
1633            tcg_out_ext32u(s, TCG_REG_R0, arg1);
1634            arg1 = TCG_REG_R0;
1635        }
1636        tcg_out_setcond_ne0(s, arg0, arg1);
1637        return;
1638
1639    case TCG_COND_GT:
1640    case TCG_COND_GTU:
1641        sh = 30;
1642        crop = 0;
1643        goto crtest;
1644
1645    case TCG_COND_LT:
1646    case TCG_COND_LTU:
1647        sh = 29;
1648        crop = 0;
1649        goto crtest;
1650
1651    case TCG_COND_GE:
1652    case TCG_COND_GEU:
1653        sh = 31;
1654        crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_LT) | BB(7, CR_LT);
1655        goto crtest;
1656
1657    case TCG_COND_LE:
1658    case TCG_COND_LEU:
1659        sh = 31;
1660        crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_GT) | BB(7, CR_GT);
1661    crtest:
1662        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
1663        if (crop) {
1664            tcg_out32(s, crop);
1665        }
1666        tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7));
1667        tcg_out_rlw(s, RLWINM, arg0, TCG_REG_R0, sh, 31, 31);
1668        break;
1669
1670    default:
1671        tcg_abort();
1672    }
1673}
1674
1675static void tcg_out_bc(TCGContext *s, int bc, TCGLabel *l)
1676{
1677    if (l->has_value) {
1678        bc |= reloc_pc14_val(tcg_splitwx_to_rx(s->code_ptr), l->u.value_ptr);
1679    } else {
1680        tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, l, 0);
1681    }
1682    tcg_out32(s, bc);
1683}
1684
1685static void tcg_out_brcond(TCGContext *s, TCGCond cond,
1686                           TCGArg arg1, TCGArg arg2, int const_arg2,
1687                           TCGLabel *l, TCGType type)
1688{
1689    tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
1690    tcg_out_bc(s, tcg_to_bc[cond], l);
1691}
1692
1693static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond,
1694                            TCGArg dest, TCGArg c1, TCGArg c2, TCGArg v1,
1695                            TCGArg v2, bool const_c2)
1696{
1697    /* If for some reason both inputs are zero, don't produce bad code.  */
1698    if (v1 == 0 && v2 == 0) {
1699        tcg_out_movi(s, type, dest, 0);
1700        return;
1701    }
1702
1703    tcg_out_cmp(s, cond, c1, c2, const_c2, 7, type);
1704
1705    if (have_isel) {
1706        int isel = tcg_to_isel[cond];
1707
1708        /* Swap the V operands if the operation indicates inversion.  */
1709        if (isel & 1) {
1710            int t = v1;
1711            v1 = v2;
1712            v2 = t;
1713            isel &= ~1;
1714        }
1715        /* V1 == 0 is handled by isel; V2 == 0 must be handled by hand.  */
1716        if (v2 == 0) {
1717            tcg_out_movi(s, type, TCG_REG_R0, 0);
1718        }
1719        tcg_out32(s, isel | TAB(dest, v1, v2));
1720    } else {
1721        if (dest == v2) {
1722            cond = tcg_invert_cond(cond);
1723            v2 = v1;
1724        } else if (dest != v1) {
1725            if (v1 == 0) {
1726                tcg_out_movi(s, type, dest, 0);
1727            } else {
1728                tcg_out_mov(s, type, dest, v1);
1729            }
1730        }
1731        /* Branch forward over one insn */
1732        tcg_out32(s, tcg_to_bc[cond] | 8);
1733        if (v2 == 0) {
1734            tcg_out_movi(s, type, dest, 0);
1735        } else {
1736            tcg_out_mov(s, type, dest, v2);
1737        }
1738    }
1739}
1740
1741static void tcg_out_cntxz(TCGContext *s, TCGType type, uint32_t opc,
1742                          TCGArg a0, TCGArg a1, TCGArg a2, bool const_a2)
1743{
1744    if (const_a2 && a2 == (type == TCG_TYPE_I32 ? 32 : 64)) {
1745        tcg_out32(s, opc | RA(a0) | RS(a1));
1746    } else {
1747        tcg_out_cmp(s, TCG_COND_EQ, a1, 0, 1, 7, type);
1748        /* Note that the only other valid constant for a2 is 0.  */
1749        if (have_isel) {
1750            tcg_out32(s, opc | RA(TCG_REG_R0) | RS(a1));
1751            tcg_out32(s, tcg_to_isel[TCG_COND_EQ] | TAB(a0, a2, TCG_REG_R0));
1752        } else if (!const_a2 && a0 == a2) {
1753            tcg_out32(s, tcg_to_bc[TCG_COND_EQ] | 8);
1754            tcg_out32(s, opc | RA(a0) | RS(a1));
1755        } else {
1756            tcg_out32(s, opc | RA(a0) | RS(a1));
1757            tcg_out32(s, tcg_to_bc[TCG_COND_NE] | 8);
1758            if (const_a2) {
1759                tcg_out_movi(s, type, a0, 0);
1760            } else {
1761                tcg_out_mov(s, type, a0, a2);
1762            }
1763        }
1764    }
1765}
1766
1767static void tcg_out_cmp2(TCGContext *s, const TCGArg *args,
1768                         const int *const_args)
1769{
1770    static const struct { uint8_t bit1, bit2; } bits[] = {
1771        [TCG_COND_LT ] = { CR_LT, CR_LT },
1772        [TCG_COND_LE ] = { CR_LT, CR_GT },
1773        [TCG_COND_GT ] = { CR_GT, CR_GT },
1774        [TCG_COND_GE ] = { CR_GT, CR_LT },
1775        [TCG_COND_LTU] = { CR_LT, CR_LT },
1776        [TCG_COND_LEU] = { CR_LT, CR_GT },
1777        [TCG_COND_GTU] = { CR_GT, CR_GT },
1778        [TCG_COND_GEU] = { CR_GT, CR_LT },
1779    };
1780
1781    TCGCond cond = args[4], cond2;
1782    TCGArg al, ah, bl, bh;
1783    int blconst, bhconst;
1784    int op, bit1, bit2;
1785
1786    al = args[0];
1787    ah = args[1];
1788    bl = args[2];
1789    bh = args[3];
1790    blconst = const_args[2];
1791    bhconst = const_args[3];
1792
1793    switch (cond) {
1794    case TCG_COND_EQ:
1795        op = CRAND;
1796        goto do_equality;
1797    case TCG_COND_NE:
1798        op = CRNAND;
1799    do_equality:
1800        tcg_out_cmp(s, cond, al, bl, blconst, 6, TCG_TYPE_I32);
1801        tcg_out_cmp(s, cond, ah, bh, bhconst, 7, TCG_TYPE_I32);
1802        tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
1803        break;
1804
1805    case TCG_COND_LT:
1806    case TCG_COND_LE:
1807    case TCG_COND_GT:
1808    case TCG_COND_GE:
1809    case TCG_COND_LTU:
1810    case TCG_COND_LEU:
1811    case TCG_COND_GTU:
1812    case TCG_COND_GEU:
1813        bit1 = bits[cond].bit1;
1814        bit2 = bits[cond].bit2;
1815        op = (bit1 != bit2 ? CRANDC : CRAND);
1816        cond2 = tcg_unsigned_cond(cond);
1817
1818        tcg_out_cmp(s, cond, ah, bh, bhconst, 6, TCG_TYPE_I32);
1819        tcg_out_cmp(s, cond2, al, bl, blconst, 7, TCG_TYPE_I32);
1820        tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, bit2));
1821        tcg_out32(s, CROR | BT(7, CR_EQ) | BA(6, bit1) | BB(7, CR_EQ));
1822        break;
1823
1824    default:
1825        tcg_abort();
1826    }
1827}
1828
1829static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
1830                             const int *const_args)
1831{
1832    tcg_out_cmp2(s, args + 1, const_args + 1);
1833    tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7));
1834    tcg_out_rlw(s, RLWINM, args[0], TCG_REG_R0, 31, 31, 31);
1835}
1836
1837static void tcg_out_brcond2 (TCGContext *s, const TCGArg *args,
1838                             const int *const_args)
1839{
1840    tcg_out_cmp2(s, args, const_args);
1841    tcg_out_bc(s, BC | BI(7, CR_EQ) | BO_COND_TRUE, arg_label(args[5]));
1842}
1843
1844static void tcg_out_mb(TCGContext *s, TCGArg a0)
1845{
1846    uint32_t insn;
1847
1848    if (a0 & TCG_MO_ST_LD) {
1849        insn = HWSYNC;
1850    } else {
1851        insn = LWSYNC;
1852    }
1853
1854    tcg_out32(s, insn);
1855}
1856
1857static void tcg_out_call_int(TCGContext *s, int lk,
1858                             const tcg_insn_unit *target)
1859{
1860#ifdef _CALL_AIX
1861    /* Look through the descriptor.  If the branch is in range, and we
1862       don't have to spend too much effort on building the toc.  */
1863    const void *tgt = ((const void * const *)target)[0];
1864    uintptr_t toc = ((const uintptr_t *)target)[1];
1865    intptr_t diff = tcg_pcrel_diff(s, tgt);
1866
1867    if (in_range_b(diff) && toc == (uint32_t)toc) {
1868        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, toc);
1869        tcg_out_b(s, lk, tgt);
1870    } else {
1871        /* Fold the low bits of the constant into the addresses below.  */
1872        intptr_t arg = (intptr_t)target;
1873        int ofs = (int16_t)arg;
1874
1875        if (ofs + 8 < 0x8000) {
1876            arg -= ofs;
1877        } else {
1878            ofs = 0;
1879        }
1880        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, arg);
1881        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_TMP1, ofs);
1882        tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR);
1883        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_REG_TMP1, ofs + SZP);
1884        tcg_out32(s, BCCTR | BO_ALWAYS | lk);
1885    }
1886#elif defined(_CALL_ELF) && _CALL_ELF == 2
1887    intptr_t diff;
1888
1889    /* In the ELFv2 ABI, we have to set up r12 to contain the destination
1890       address, which the callee uses to compute its TOC address.  */
1891    /* FIXME: when the branch is in range, we could avoid r12 load if we
1892       knew that the destination uses the same TOC, and what its local
1893       entry point offset is.  */
1894    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R12, (intptr_t)target);
1895
1896    diff = tcg_pcrel_diff(s, target);
1897    if (in_range_b(diff)) {
1898        tcg_out_b(s, lk, target);
1899    } else {
1900        tcg_out32(s, MTSPR | RS(TCG_REG_R12) | CTR);
1901        tcg_out32(s, BCCTR | BO_ALWAYS | lk);
1902    }
1903#else
1904    tcg_out_b(s, lk, target);
1905#endif
1906}
1907
1908static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
1909                         const TCGHelperInfo *info)
1910{
1911    tcg_out_call_int(s, LK, target);
1912}
1913
1914static const uint32_t qemu_ldx_opc[(MO_SSIZE + MO_BSWAP) + 1] = {
1915    [MO_UB] = LBZX,
1916    [MO_UW] = LHZX,
1917    [MO_UL] = LWZX,
1918    [MO_UQ] = LDX,
1919    [MO_SW] = LHAX,
1920    [MO_SL] = LWAX,
1921    [MO_BSWAP | MO_UB] = LBZX,
1922    [MO_BSWAP | MO_UW] = LHBRX,
1923    [MO_BSWAP | MO_UL] = LWBRX,
1924    [MO_BSWAP | MO_UQ] = LDBRX,
1925};
1926
1927static const uint32_t qemu_stx_opc[(MO_SIZE + MO_BSWAP) + 1] = {
1928    [MO_UB] = STBX,
1929    [MO_UW] = STHX,
1930    [MO_UL] = STWX,
1931    [MO_UQ] = STDX,
1932    [MO_BSWAP | MO_UB] = STBX,
1933    [MO_BSWAP | MO_UW] = STHBRX,
1934    [MO_BSWAP | MO_UL] = STWBRX,
1935    [MO_BSWAP | MO_UQ] = STDBRX,
1936};
1937
1938static const uint32_t qemu_exts_opc[4] = {
1939    EXTSB, EXTSH, EXTSW, 0
1940};
1941
1942#if defined (CONFIG_SOFTMMU)
1943/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
1944 *                                 int mmu_idx, uintptr_t ra)
1945 */
1946static void * const qemu_ld_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
1947    [MO_UB]   = helper_ret_ldub_mmu,
1948    [MO_LEUW] = helper_le_lduw_mmu,
1949    [MO_LEUL] = helper_le_ldul_mmu,
1950    [MO_LEUQ] = helper_le_ldq_mmu,
1951    [MO_BEUW] = helper_be_lduw_mmu,
1952    [MO_BEUL] = helper_be_ldul_mmu,
1953    [MO_BEUQ] = helper_be_ldq_mmu,
1954};
1955
1956/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
1957 *                                 uintxx_t val, int mmu_idx, uintptr_t ra)
1958 */
1959static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
1960    [MO_UB]   = helper_ret_stb_mmu,
1961    [MO_LEUW] = helper_le_stw_mmu,
1962    [MO_LEUL] = helper_le_stl_mmu,
1963    [MO_LEUQ] = helper_le_stq_mmu,
1964    [MO_BEUW] = helper_be_stw_mmu,
1965    [MO_BEUL] = helper_be_stl_mmu,
1966    [MO_BEUQ] = helper_be_stq_mmu,
1967};
1968
1969/* We expect to use a 16-bit negative offset from ENV.  */
1970QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1971QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -32768);
1972
1973/* Perform the TLB load and compare.  Places the result of the comparison
1974   in CR7, loads the addend of the TLB into R3, and returns the register
1975   containing the guest address (zero-extended into R4).  Clobbers R0 and R2. */
1976
1977static TCGReg tcg_out_tlb_read(TCGContext *s, MemOp opc,
1978                               TCGReg addrlo, TCGReg addrhi,
1979                               int mem_index, bool is_read)
1980{
1981    int cmp_off
1982        = (is_read
1983           ? offsetof(CPUTLBEntry, addr_read)
1984           : offsetof(CPUTLBEntry, addr_write));
1985    int fast_off = TLB_MASK_TABLE_OFS(mem_index);
1986    int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
1987    int table_off = fast_off + offsetof(CPUTLBDescFast, table);
1988    unsigned s_bits = opc & MO_SIZE;
1989    unsigned a_bits = get_alignment_bits(opc);
1990
1991    /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx].  */
1992    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_AREG0, mask_off);
1993    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R4, TCG_AREG0, table_off);
1994
1995    /* Extract the page index, shifted into place for tlb index.  */
1996    if (TCG_TARGET_REG_BITS == 32) {
1997        tcg_out_shri32(s, TCG_REG_TMP1, addrlo,
1998                       TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1999    } else {
2000        tcg_out_shri64(s, TCG_REG_TMP1, addrlo,
2001                       TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
2002    }
2003    tcg_out32(s, AND | SAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_TMP1));
2004
2005    /* Load the TLB comparator.  */
2006    if (cmp_off == 0 && TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
2007        uint32_t lxu = (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32
2008                        ? LWZUX : LDUX);
2009        tcg_out32(s, lxu | TAB(TCG_REG_TMP1, TCG_REG_R3, TCG_REG_R4));
2010    } else {
2011        tcg_out32(s, ADD | TAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_R4));
2012        if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
2013            tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP1, TCG_REG_R3, cmp_off + 4);
2014            tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R4, TCG_REG_R3, cmp_off);
2015        } else {
2016            tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP1, TCG_REG_R3, cmp_off);
2017        }
2018    }
2019
2020    /* Load the TLB addend for use on the fast path.  Do this asap
2021       to minimize any load use delay.  */
2022    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_REG_R3,
2023               offsetof(CPUTLBEntry, addend));
2024
2025    /* Clear the non-page, non-alignment bits from the address */
2026    if (TCG_TARGET_REG_BITS == 32) {
2027        /* We don't support unaligned accesses on 32-bits.
2028         * Preserve the bottom bits and thus trigger a comparison
2029         * failure on unaligned accesses.
2030         */
2031        if (a_bits < s_bits) {
2032            a_bits = s_bits;
2033        }
2034        tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0,
2035                    (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
2036    } else {
2037        TCGReg t = addrlo;
2038
2039        /* If the access is unaligned, we need to make sure we fail if we
2040         * cross a page boundary.  The trick is to add the access size-1
2041         * to the address before masking the low bits.  That will make the
2042         * address overflow to the next page if we cross a page boundary,
2043         * which will then force a mismatch of the TLB compare.
2044         */
2045        if (a_bits < s_bits) {
2046            unsigned a_mask = (1 << a_bits) - 1;
2047            unsigned s_mask = (1 << s_bits) - 1;
2048            tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask));
2049            t = TCG_REG_R0;
2050        }
2051
2052        /* Mask the address for the requested alignment.  */
2053        if (TARGET_LONG_BITS == 32) {
2054            tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0,
2055                        (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
2056            /* Zero-extend the address for use in the final address.  */
2057            tcg_out_ext32u(s, TCG_REG_R4, addrlo);
2058            addrlo = TCG_REG_R4;
2059        } else if (a_bits == 0) {
2060            tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - TARGET_PAGE_BITS);
2061        } else {
2062            tcg_out_rld(s, RLDICL, TCG_REG_R0, t,
2063                        64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - a_bits);
2064            tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0);
2065        }
2066    }
2067
2068    if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
2069        tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1,
2070                    0, 7, TCG_TYPE_I32);
2071        tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_R4, 0, 6, TCG_TYPE_I32);
2072        tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
2073    } else {
2074        tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1,
2075                    0, 7, TCG_TYPE_TL);
2076    }
2077
2078    return addrlo;
2079}
2080
2081/* Record the context of a call to the out of line helper code for the slow
2082   path for a load or store, so that we can later generate the correct
2083   helper code.  */
2084static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
2085                                TCGReg datalo_reg, TCGReg datahi_reg,
2086                                TCGReg addrlo_reg, TCGReg addrhi_reg,
2087                                tcg_insn_unit *raddr, tcg_insn_unit *lptr)
2088{
2089    TCGLabelQemuLdst *label = new_ldst_label(s);
2090
2091    label->is_ld = is_ld;
2092    label->oi = oi;
2093    label->datalo_reg = datalo_reg;
2094    label->datahi_reg = datahi_reg;
2095    label->addrlo_reg = addrlo_reg;
2096    label->addrhi_reg = addrhi_reg;
2097    label->raddr = tcg_splitwx_to_rx(raddr);
2098    label->label_ptr[0] = lptr;
2099}
2100
2101static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
2102{
2103    MemOpIdx oi = lb->oi;
2104    MemOp opc = get_memop(oi);
2105    TCGReg hi, lo, arg = TCG_REG_R3;
2106
2107    if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
2108        return false;
2109    }
2110
2111    tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0);
2112
2113    lo = lb->addrlo_reg;
2114    hi = lb->addrhi_reg;
2115    if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
2116        arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN);
2117        tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
2118        tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
2119    } else {
2120        /* If the address needed to be zero-extended, we'll have already
2121           placed it in R4.  The only remaining case is 64-bit guest.  */
2122        tcg_out_mov(s, TCG_TYPE_TL, arg++, lo);
2123    }
2124
2125    tcg_out_movi(s, TCG_TYPE_I32, arg++, oi);
2126    tcg_out32(s, MFSPR | RT(arg) | LR);
2127
2128    tcg_out_call_int(s, LK, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
2129
2130    lo = lb->datalo_reg;
2131    hi = lb->datahi_reg;
2132    if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
2133        tcg_out_mov(s, TCG_TYPE_I32, lo, TCG_REG_R4);
2134        tcg_out_mov(s, TCG_TYPE_I32, hi, TCG_REG_R3);
2135    } else if (opc & MO_SIGN) {
2136        uint32_t insn = qemu_exts_opc[opc & MO_SIZE];
2137        tcg_out32(s, insn | RA(lo) | RS(TCG_REG_R3));
2138    } else {
2139        tcg_out_mov(s, TCG_TYPE_REG, lo, TCG_REG_R3);
2140    }
2141
2142    tcg_out_b(s, 0, lb->raddr);
2143    return true;
2144}
2145
2146static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
2147{
2148    MemOpIdx oi = lb->oi;
2149    MemOp opc = get_memop(oi);
2150    MemOp s_bits = opc & MO_SIZE;
2151    TCGReg hi, lo, arg = TCG_REG_R3;
2152
2153    if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
2154        return false;
2155    }
2156
2157    tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0);
2158
2159    lo = lb->addrlo_reg;
2160    hi = lb->addrhi_reg;
2161    if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
2162        arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN);
2163        tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
2164        tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
2165    } else {
2166        /* If the address needed to be zero-extended, we'll have already
2167           placed it in R4.  The only remaining case is 64-bit guest.  */
2168        tcg_out_mov(s, TCG_TYPE_TL, arg++, lo);
2169    }
2170
2171    lo = lb->datalo_reg;
2172    hi = lb->datahi_reg;
2173    if (TCG_TARGET_REG_BITS == 32) {
2174        switch (s_bits) {
2175        case MO_64:
2176            arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN);
2177            tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
2178            /* FALLTHRU */
2179        case MO_32:
2180            tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
2181            break;
2182        default:
2183            tcg_out_rlw(s, RLWINM, arg++, lo, 0, 32 - (8 << s_bits), 31);
2184            break;
2185        }
2186    } else {
2187        if (s_bits == MO_64) {
2188            tcg_out_mov(s, TCG_TYPE_I64, arg++, lo);
2189        } else {
2190            tcg_out_rld(s, RLDICL, arg++, lo, 0, 64 - (8 << s_bits));
2191        }
2192    }
2193
2194    tcg_out_movi(s, TCG_TYPE_I32, arg++, oi);
2195    tcg_out32(s, MFSPR | RT(arg) | LR);
2196
2197    tcg_out_call_int(s, LK, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
2198
2199    tcg_out_b(s, 0, lb->raddr);
2200    return true;
2201}
2202#else
2203
2204static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addrlo,
2205                                   TCGReg addrhi, unsigned a_bits)
2206{
2207    unsigned a_mask = (1 << a_bits) - 1;
2208    TCGLabelQemuLdst *label = new_ldst_label(s);
2209
2210    label->is_ld = is_ld;
2211    label->addrlo_reg = addrlo;
2212    label->addrhi_reg = addrhi;
2213
2214    /* We are expecting a_bits to max out at 7, much lower than ANDI. */
2215    tcg_debug_assert(a_bits < 16);
2216    tcg_out32(s, ANDI | SAI(addrlo, TCG_REG_R0, a_mask));
2217
2218    label->label_ptr[0] = s->code_ptr;
2219    tcg_out32(s, BC | BI(0, CR_EQ) | BO_COND_FALSE | LK);
2220
2221    label->raddr = tcg_splitwx_to_rx(s->code_ptr);
2222}
2223
2224static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
2225{
2226    if (!reloc_pc14(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
2227        return false;
2228    }
2229
2230    if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
2231        TCGReg arg = TCG_REG_R4;
2232
2233        arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN);
2234        if (l->addrlo_reg != arg) {
2235            tcg_out_mov(s, TCG_TYPE_I32, arg, l->addrhi_reg);
2236            tcg_out_mov(s, TCG_TYPE_I32, arg + 1, l->addrlo_reg);
2237        } else if (l->addrhi_reg != arg + 1) {
2238            tcg_out_mov(s, TCG_TYPE_I32, arg + 1, l->addrlo_reg);
2239            tcg_out_mov(s, TCG_TYPE_I32, arg, l->addrhi_reg);
2240        } else {
2241            tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R0, arg);
2242            tcg_out_mov(s, TCG_TYPE_I32, arg, arg + 1);
2243            tcg_out_mov(s, TCG_TYPE_I32, arg + 1, TCG_REG_R0);
2244        }
2245    } else {
2246        tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_R4, l->addrlo_reg);
2247    }
2248    tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_R3, TCG_AREG0);
2249
2250    /* "Tail call" to the helper, with the return address back inline. */
2251    tcg_out_call_int(s, 0, (const void *)(l->is_ld ? helper_unaligned_ld
2252                                          : helper_unaligned_st));
2253    return true;
2254}
2255
2256static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
2257{
2258    return tcg_out_fail_alignment(s, l);
2259}
2260
2261static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
2262{
2263    return tcg_out_fail_alignment(s, l);
2264}
2265
2266#endif /* SOFTMMU */
2267
2268static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
2269{
2270    TCGReg datalo, datahi, addrlo, rbase;
2271    TCGReg addrhi __attribute__((unused));
2272    MemOpIdx oi;
2273    MemOp opc, s_bits;
2274#ifdef CONFIG_SOFTMMU
2275    int mem_index;
2276    tcg_insn_unit *label_ptr;
2277#else
2278    unsigned a_bits;
2279#endif
2280
2281    datalo = *args++;
2282    datahi = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
2283    addrlo = *args++;
2284    addrhi = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0);
2285    oi = *args++;
2286    opc = get_memop(oi);
2287    s_bits = opc & MO_SIZE;
2288
2289#ifdef CONFIG_SOFTMMU
2290    mem_index = get_mmuidx(oi);
2291    addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, mem_index, true);
2292
2293    /* Load a pointer into the current opcode w/conditional branch-link. */
2294    label_ptr = s->code_ptr;
2295    tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
2296
2297    rbase = TCG_REG_R3;
2298#else  /* !CONFIG_SOFTMMU */
2299    a_bits = get_alignment_bits(opc);
2300    if (a_bits) {
2301        tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
2302    }
2303    rbase = guest_base ? TCG_GUEST_BASE_REG : 0;
2304    if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
2305        tcg_out_ext32u(s, TCG_REG_TMP1, addrlo);
2306        addrlo = TCG_REG_TMP1;
2307    }
2308#endif
2309
2310    if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) {
2311        if (opc & MO_BSWAP) {
2312            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
2313            tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo));
2314            tcg_out32(s, LWBRX | TAB(datahi, rbase, TCG_REG_R0));
2315        } else if (rbase != 0) {
2316            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
2317            tcg_out32(s, LWZX | TAB(datahi, rbase, addrlo));
2318            tcg_out32(s, LWZX | TAB(datalo, rbase, TCG_REG_R0));
2319        } else if (addrlo == datahi) {
2320            tcg_out32(s, LWZ | TAI(datalo, addrlo, 4));
2321            tcg_out32(s, LWZ | TAI(datahi, addrlo, 0));
2322        } else {
2323            tcg_out32(s, LWZ | TAI(datahi, addrlo, 0));
2324            tcg_out32(s, LWZ | TAI(datalo, addrlo, 4));
2325        }
2326    } else {
2327        uint32_t insn = qemu_ldx_opc[opc & (MO_BSWAP | MO_SSIZE)];
2328        if (!have_isa_2_06 && insn == LDBRX) {
2329            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
2330            tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo));
2331            tcg_out32(s, LWBRX | TAB(TCG_REG_R0, rbase, TCG_REG_R0));
2332            tcg_out_rld(s, RLDIMI, datalo, TCG_REG_R0, 32, 0);
2333        } else if (insn) {
2334            tcg_out32(s, insn | TAB(datalo, rbase, addrlo));
2335        } else {
2336            insn = qemu_ldx_opc[opc & (MO_SIZE | MO_BSWAP)];
2337            tcg_out32(s, insn | TAB(datalo, rbase, addrlo));
2338            insn = qemu_exts_opc[s_bits];
2339            tcg_out32(s, insn | RA(datalo) | RS(datalo));
2340        }
2341    }
2342
2343#ifdef CONFIG_SOFTMMU
2344    add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
2345                        s->code_ptr, label_ptr);
2346#endif
2347}
2348
2349static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
2350{
2351    TCGReg datalo, datahi, addrlo, rbase;
2352    TCGReg addrhi __attribute__((unused));
2353    MemOpIdx oi;
2354    MemOp opc, s_bits;
2355#ifdef CONFIG_SOFTMMU
2356    int mem_index;
2357    tcg_insn_unit *label_ptr;
2358#else
2359    unsigned a_bits;
2360#endif
2361
2362    datalo = *args++;
2363    datahi = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
2364    addrlo = *args++;
2365    addrhi = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0);
2366    oi = *args++;
2367    opc = get_memop(oi);
2368    s_bits = opc & MO_SIZE;
2369
2370#ifdef CONFIG_SOFTMMU
2371    mem_index = get_mmuidx(oi);
2372    addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, mem_index, false);
2373
2374    /* Load a pointer into the current opcode w/conditional branch-link. */
2375    label_ptr = s->code_ptr;
2376    tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
2377
2378    rbase = TCG_REG_R3;
2379#else  /* !CONFIG_SOFTMMU */
2380    a_bits = get_alignment_bits(opc);
2381    if (a_bits) {
2382        tcg_out_test_alignment(s, false, addrlo, addrhi, a_bits);
2383    }
2384    rbase = guest_base ? TCG_GUEST_BASE_REG : 0;
2385    if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
2386        tcg_out_ext32u(s, TCG_REG_TMP1, addrlo);
2387        addrlo = TCG_REG_TMP1;
2388    }
2389#endif
2390
2391    if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) {
2392        if (opc & MO_BSWAP) {
2393            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
2394            tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo));
2395            tcg_out32(s, STWBRX | SAB(datahi, rbase, TCG_REG_R0));
2396        } else if (rbase != 0) {
2397            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
2398            tcg_out32(s, STWX | SAB(datahi, rbase, addrlo));
2399            tcg_out32(s, STWX | SAB(datalo, rbase, TCG_REG_R0));
2400        } else {
2401            tcg_out32(s, STW | TAI(datahi, addrlo, 0));
2402            tcg_out32(s, STW | TAI(datalo, addrlo, 4));
2403        }
2404    } else {
2405        uint32_t insn = qemu_stx_opc[opc & (MO_BSWAP | MO_SIZE)];
2406        if (!have_isa_2_06 && insn == STDBRX) {
2407            tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo));
2408            tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, addrlo, 4));
2409            tcg_out_shri64(s, TCG_REG_R0, datalo, 32);
2410            tcg_out32(s, STWBRX | SAB(TCG_REG_R0, rbase, TCG_REG_TMP1));
2411        } else {
2412            tcg_out32(s, insn | SAB(datalo, rbase, addrlo));
2413        }
2414    }
2415
2416#ifdef CONFIG_SOFTMMU
2417    add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
2418                        s->code_ptr, label_ptr);
2419#endif
2420}
2421
2422static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2423{
2424    int i;
2425    for (i = 0; i < count; ++i) {
2426        p[i] = NOP;
2427    }
2428}
2429
2430/* Parameters for function call generation, used in tcg.c.  */
2431#define TCG_TARGET_STACK_ALIGN       16
2432
2433#ifdef _CALL_AIX
2434# define LINK_AREA_SIZE                (6 * SZR)
2435# define LR_OFFSET                     (1 * SZR)
2436# define TCG_TARGET_CALL_STACK_OFFSET  (LINK_AREA_SIZE + 8 * SZR)
2437#elif defined(_CALL_DARWIN)
2438# define LINK_AREA_SIZE                (6 * SZR)
2439# define LR_OFFSET                     (2 * SZR)
2440#elif TCG_TARGET_REG_BITS == 64
2441# if defined(_CALL_ELF) && _CALL_ELF == 2
2442#  define LINK_AREA_SIZE               (4 * SZR)
2443#  define LR_OFFSET                    (1 * SZR)
2444# endif
2445#else /* TCG_TARGET_REG_BITS == 32 */
2446# if defined(_CALL_SYSV)
2447#  define LINK_AREA_SIZE               (2 * SZR)
2448#  define LR_OFFSET                    (1 * SZR)
2449# endif
2450#endif
2451#ifndef LR_OFFSET
2452# error "Unhandled abi"
2453#endif
2454#ifndef TCG_TARGET_CALL_STACK_OFFSET
2455# define TCG_TARGET_CALL_STACK_OFFSET  LINK_AREA_SIZE
2456#endif
2457
2458#define CPU_TEMP_BUF_SIZE  (CPU_TEMP_BUF_NLONGS * (int)sizeof(long))
2459#define REG_SAVE_SIZE      ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * SZR)
2460
2461#define FRAME_SIZE ((TCG_TARGET_CALL_STACK_OFFSET   \
2462                     + TCG_STATIC_CALL_ARGS_SIZE    \
2463                     + CPU_TEMP_BUF_SIZE            \
2464                     + REG_SAVE_SIZE                \
2465                     + TCG_TARGET_STACK_ALIGN - 1)  \
2466                    & -TCG_TARGET_STACK_ALIGN)
2467
2468#define REG_SAVE_BOT (FRAME_SIZE - REG_SAVE_SIZE)
2469
2470static void tcg_target_qemu_prologue(TCGContext *s)
2471{
2472    int i;
2473
2474#ifdef _CALL_AIX
2475    const void **desc = (const void **)s->code_ptr;
2476    desc[0] = tcg_splitwx_to_rx(desc + 2);  /* entry point */
2477    desc[1] = 0;                            /* environment pointer */
2478    s->code_ptr = (void *)(desc + 2);       /* skip over descriptor */
2479#endif
2480
2481    tcg_set_frame(s, TCG_REG_CALL_STACK, REG_SAVE_BOT - CPU_TEMP_BUF_SIZE,
2482                  CPU_TEMP_BUF_SIZE);
2483
2484    /* Prologue */
2485    tcg_out32(s, MFSPR | RT(TCG_REG_R0) | LR);
2486    tcg_out32(s, (SZR == 8 ? STDU : STWU)
2487              | SAI(TCG_REG_R1, TCG_REG_R1, -FRAME_SIZE));
2488
2489    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
2490        tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
2491                   TCG_REG_R1, REG_SAVE_BOT + i * SZR);
2492    }
2493    tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
2494
2495#ifndef CONFIG_SOFTMMU
2496    if (guest_base) {
2497        tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true);
2498        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
2499    }
2500#endif
2501
2502    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2503    tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR);
2504    if (USE_REG_TB) {
2505        tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, tcg_target_call_iarg_regs[1]);
2506    }
2507    tcg_out32(s, BCCTR | BO_ALWAYS);
2508
2509    /* Epilogue */
2510    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
2511
2512    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
2513    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
2514        tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
2515                   TCG_REG_R1, REG_SAVE_BOT + i * SZR);
2516    }
2517    tcg_out32(s, MTSPR | RS(TCG_REG_R0) | LR);
2518    tcg_out32(s, ADDI | TAI(TCG_REG_R1, TCG_REG_R1, FRAME_SIZE));
2519    tcg_out32(s, BCLR | BO_ALWAYS);
2520}
2521
2522static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg)
2523{
2524    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, arg);
2525    tcg_out_b(s, 0, tcg_code_gen_epilogue);
2526}
2527
2528static void tcg_out_goto_tb(TCGContext *s, int which)
2529{
2530    uintptr_t ptr = get_jmp_target_addr(s, which);
2531
2532    if (USE_REG_TB) {
2533        ptrdiff_t offset = tcg_tbrel_diff(s, (void *)ptr);
2534        tcg_out_mem_long(s, LD, LDX, TCG_REG_TB, TCG_REG_TB, offset);
2535
2536        /* Direct branch will be patched by tb_target_set_jmp_target. */
2537        set_jmp_insn_offset(s, which);
2538        tcg_out32(s, MTSPR | RS(TCG_REG_TB) | CTR);
2539
2540        /* When branch is out of range, fall through to indirect. */
2541        tcg_out32(s, BCCTR | BO_ALWAYS);
2542
2543        /* For the unlinked case, need to reset TCG_REG_TB.  */
2544        set_jmp_reset_offset(s, which);
2545        tcg_out_mem_long(s, ADDI, ADD, TCG_REG_TB, TCG_REG_TB,
2546                         -tcg_current_code_size(s));
2547    } else {
2548        /* Direct branch will be patched by tb_target_set_jmp_target. */
2549        set_jmp_insn_offset(s, which);
2550        tcg_out32(s, NOP);
2551
2552        /* When branch is out of range, fall through to indirect. */
2553        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, ptr - (int16_t)ptr);
2554        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, (int16_t)ptr);
2555        tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR);
2556        tcg_out32(s, BCCTR | BO_ALWAYS);
2557        set_jmp_reset_offset(s, which);
2558    }
2559}
2560
2561void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
2562                              uintptr_t jmp_rx, uintptr_t jmp_rw)
2563{
2564    uintptr_t addr = tb->jmp_target_addr[n];
2565    intptr_t diff = addr - jmp_rx;
2566    tcg_insn_unit insn;
2567
2568    if (in_range_b(diff)) {
2569        insn = B | (diff & 0x3fffffc);
2570    } else if (USE_REG_TB) {
2571        insn = MTSPR | RS(TCG_REG_TB) | CTR;
2572    } else {
2573        insn = NOP;
2574    }
2575
2576    qatomic_set((uint32_t *)jmp_rw, insn);
2577    flush_idcache_range(jmp_rx, jmp_rw, 4);
2578}
2579
2580static void tcg_out_op(TCGContext *s, TCGOpcode opc,
2581                       const TCGArg args[TCG_MAX_OP_ARGS],
2582                       const int const_args[TCG_MAX_OP_ARGS])
2583{
2584    TCGArg a0, a1, a2;
2585
2586    switch (opc) {
2587    case INDEX_op_goto_ptr:
2588        tcg_out32(s, MTSPR | RS(args[0]) | CTR);
2589        if (USE_REG_TB) {
2590            tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, args[0]);
2591        }
2592        tcg_out32(s, ADDI | TAI(TCG_REG_R3, 0, 0));
2593        tcg_out32(s, BCCTR | BO_ALWAYS);
2594        break;
2595    case INDEX_op_br:
2596        {
2597            TCGLabel *l = arg_label(args[0]);
2598            uint32_t insn = B;
2599
2600            if (l->has_value) {
2601                insn |= reloc_pc24_val(tcg_splitwx_to_rx(s->code_ptr),
2602                                       l->u.value_ptr);
2603            } else {
2604                tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, l, 0);
2605            }
2606            tcg_out32(s, insn);
2607        }
2608        break;
2609    case INDEX_op_ld8u_i32:
2610    case INDEX_op_ld8u_i64:
2611        tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
2612        break;
2613    case INDEX_op_ld8s_i32:
2614    case INDEX_op_ld8s_i64:
2615        tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
2616        tcg_out_ext8s(s, args[0], args[0]);
2617        break;
2618    case INDEX_op_ld16u_i32:
2619    case INDEX_op_ld16u_i64:
2620        tcg_out_mem_long(s, LHZ, LHZX, args[0], args[1], args[2]);
2621        break;
2622    case INDEX_op_ld16s_i32:
2623    case INDEX_op_ld16s_i64:
2624        tcg_out_mem_long(s, LHA, LHAX, args[0], args[1], args[2]);
2625        break;
2626    case INDEX_op_ld_i32:
2627    case INDEX_op_ld32u_i64:
2628        tcg_out_mem_long(s, LWZ, LWZX, args[0], args[1], args[2]);
2629        break;
2630    case INDEX_op_ld32s_i64:
2631        tcg_out_mem_long(s, LWA, LWAX, args[0], args[1], args[2]);
2632        break;
2633    case INDEX_op_ld_i64:
2634        tcg_out_mem_long(s, LD, LDX, args[0], args[1], args[2]);
2635        break;
2636    case INDEX_op_st8_i32:
2637    case INDEX_op_st8_i64:
2638        tcg_out_mem_long(s, STB, STBX, args[0], args[1], args[2]);
2639        break;
2640    case INDEX_op_st16_i32:
2641    case INDEX_op_st16_i64:
2642        tcg_out_mem_long(s, STH, STHX, args[0], args[1], args[2]);
2643        break;
2644    case INDEX_op_st_i32:
2645    case INDEX_op_st32_i64:
2646        tcg_out_mem_long(s, STW, STWX, args[0], args[1], args[2]);
2647        break;
2648    case INDEX_op_st_i64:
2649        tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]);
2650        break;
2651
2652    case INDEX_op_add_i32:
2653        a0 = args[0], a1 = args[1], a2 = args[2];
2654        if (const_args[2]) {
2655        do_addi_32:
2656            tcg_out_mem_long(s, ADDI, ADD, a0, a1, (int32_t)a2);
2657        } else {
2658            tcg_out32(s, ADD | TAB(a0, a1, a2));
2659        }
2660        break;
2661    case INDEX_op_sub_i32:
2662        a0 = args[0], a1 = args[1], a2 = args[2];
2663        if (const_args[1]) {
2664            if (const_args[2]) {
2665                tcg_out_movi(s, TCG_TYPE_I32, a0, a1 - a2);
2666            } else {
2667                tcg_out32(s, SUBFIC | TAI(a0, a2, a1));
2668            }
2669        } else if (const_args[2]) {
2670            a2 = -a2;
2671            goto do_addi_32;
2672        } else {
2673            tcg_out32(s, SUBF | TAB(a0, a2, a1));
2674        }
2675        break;
2676
2677    case INDEX_op_and_i32:
2678        a0 = args[0], a1 = args[1], a2 = args[2];
2679        if (const_args[2]) {
2680            tcg_out_andi32(s, a0, a1, a2);
2681        } else {
2682            tcg_out32(s, AND | SAB(a1, a0, a2));
2683        }
2684        break;
2685    case INDEX_op_and_i64:
2686        a0 = args[0], a1 = args[1], a2 = args[2];
2687        if (const_args[2]) {
2688            tcg_out_andi64(s, a0, a1, a2);
2689        } else {
2690            tcg_out32(s, AND | SAB(a1, a0, a2));
2691        }
2692        break;
2693    case INDEX_op_or_i64:
2694    case INDEX_op_or_i32:
2695        a0 = args[0], a1 = args[1], a2 = args[2];
2696        if (const_args[2]) {
2697            tcg_out_ori32(s, a0, a1, a2);
2698        } else {
2699            tcg_out32(s, OR | SAB(a1, a0, a2));
2700        }
2701        break;
2702    case INDEX_op_xor_i64:
2703    case INDEX_op_xor_i32:
2704        a0 = args[0], a1 = args[1], a2 = args[2];
2705        if (const_args[2]) {
2706            tcg_out_xori32(s, a0, a1, a2);
2707        } else {
2708            tcg_out32(s, XOR | SAB(a1, a0, a2));
2709        }
2710        break;
2711    case INDEX_op_andc_i32:
2712        a0 = args[0], a1 = args[1], a2 = args[2];
2713        if (const_args[2]) {
2714            tcg_out_andi32(s, a0, a1, ~a2);
2715        } else {
2716            tcg_out32(s, ANDC | SAB(a1, a0, a2));
2717        }
2718        break;
2719    case INDEX_op_andc_i64:
2720        a0 = args[0], a1 = args[1], a2 = args[2];
2721        if (const_args[2]) {
2722            tcg_out_andi64(s, a0, a1, ~a2);
2723        } else {
2724            tcg_out32(s, ANDC | SAB(a1, a0, a2));
2725        }
2726        break;
2727    case INDEX_op_orc_i32:
2728        if (const_args[2]) {
2729            tcg_out_ori32(s, args[0], args[1], ~args[2]);
2730            break;
2731        }
2732        /* FALLTHRU */
2733    case INDEX_op_orc_i64:
2734        tcg_out32(s, ORC | SAB(args[1], args[0], args[2]));
2735        break;
2736    case INDEX_op_eqv_i32:
2737        if (const_args[2]) {
2738            tcg_out_xori32(s, args[0], args[1], ~args[2]);
2739            break;
2740        }
2741        /* FALLTHRU */
2742    case INDEX_op_eqv_i64:
2743        tcg_out32(s, EQV | SAB(args[1], args[0], args[2]));
2744        break;
2745    case INDEX_op_nand_i32:
2746    case INDEX_op_nand_i64:
2747        tcg_out32(s, NAND | SAB(args[1], args[0], args[2]));
2748        break;
2749    case INDEX_op_nor_i32:
2750    case INDEX_op_nor_i64:
2751        tcg_out32(s, NOR | SAB(args[1], args[0], args[2]));
2752        break;
2753
2754    case INDEX_op_clz_i32:
2755        tcg_out_cntxz(s, TCG_TYPE_I32, CNTLZW, args[0], args[1],
2756                      args[2], const_args[2]);
2757        break;
2758    case INDEX_op_ctz_i32:
2759        tcg_out_cntxz(s, TCG_TYPE_I32, CNTTZW, args[0], args[1],
2760                      args[2], const_args[2]);
2761        break;
2762    case INDEX_op_ctpop_i32:
2763        tcg_out32(s, CNTPOPW | SAB(args[1], args[0], 0));
2764        break;
2765
2766    case INDEX_op_clz_i64:
2767        tcg_out_cntxz(s, TCG_TYPE_I64, CNTLZD, args[0], args[1],
2768                      args[2], const_args[2]);
2769        break;
2770    case INDEX_op_ctz_i64:
2771        tcg_out_cntxz(s, TCG_TYPE_I64, CNTTZD, args[0], args[1],
2772                      args[2], const_args[2]);
2773        break;
2774    case INDEX_op_ctpop_i64:
2775        tcg_out32(s, CNTPOPD | SAB(args[1], args[0], 0));
2776        break;
2777
2778    case INDEX_op_mul_i32:
2779        a0 = args[0], a1 = args[1], a2 = args[2];
2780        if (const_args[2]) {
2781            tcg_out32(s, MULLI | TAI(a0, a1, a2));
2782        } else {
2783            tcg_out32(s, MULLW | TAB(a0, a1, a2));
2784        }
2785        break;
2786
2787    case INDEX_op_div_i32:
2788        tcg_out32(s, DIVW | TAB(args[0], args[1], args[2]));
2789        break;
2790
2791    case INDEX_op_divu_i32:
2792        tcg_out32(s, DIVWU | TAB(args[0], args[1], args[2]));
2793        break;
2794
2795    case INDEX_op_rem_i32:
2796        tcg_out32(s, MODSW | TAB(args[0], args[1], args[2]));
2797        break;
2798
2799    case INDEX_op_remu_i32:
2800        tcg_out32(s, MODUW | TAB(args[0], args[1], args[2]));
2801        break;
2802
2803    case INDEX_op_shl_i32:
2804        if (const_args[2]) {
2805            /* Limit immediate shift count lest we create an illegal insn.  */
2806            tcg_out_shli32(s, args[0], args[1], args[2] & 31);
2807        } else {
2808            tcg_out32(s, SLW | SAB(args[1], args[0], args[2]));
2809        }
2810        break;
2811    case INDEX_op_shr_i32:
2812        if (const_args[2]) {
2813            /* Limit immediate shift count lest we create an illegal insn.  */
2814            tcg_out_shri32(s, args[0], args[1], args[2] & 31);
2815        } else {
2816            tcg_out32(s, SRW | SAB(args[1], args[0], args[2]));
2817        }
2818        break;
2819    case INDEX_op_sar_i32:
2820        if (const_args[2]) {
2821            tcg_out_sari32(s, args[0], args[1], args[2]);
2822        } else {
2823            tcg_out32(s, SRAW | SAB(args[1], args[0], args[2]));
2824        }
2825        break;
2826    case INDEX_op_rotl_i32:
2827        if (const_args[2]) {
2828            tcg_out_rlw(s, RLWINM, args[0], args[1], args[2], 0, 31);
2829        } else {
2830            tcg_out32(s, RLWNM | SAB(args[1], args[0], args[2])
2831                         | MB(0) | ME(31));
2832        }
2833        break;
2834    case INDEX_op_rotr_i32:
2835        if (const_args[2]) {
2836            tcg_out_rlw(s, RLWINM, args[0], args[1], 32 - args[2], 0, 31);
2837        } else {
2838            tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 32));
2839            tcg_out32(s, RLWNM | SAB(args[1], args[0], TCG_REG_R0)
2840                         | MB(0) | ME(31));
2841        }
2842        break;
2843
2844    case INDEX_op_brcond_i32:
2845        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
2846                       arg_label(args[3]), TCG_TYPE_I32);
2847        break;
2848    case INDEX_op_brcond_i64:
2849        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
2850                       arg_label(args[3]), TCG_TYPE_I64);
2851        break;
2852    case INDEX_op_brcond2_i32:
2853        tcg_out_brcond2(s, args, const_args);
2854        break;
2855
2856    case INDEX_op_neg_i32:
2857    case INDEX_op_neg_i64:
2858        tcg_out32(s, NEG | RT(args[0]) | RA(args[1]));
2859        break;
2860
2861    case INDEX_op_not_i32:
2862    case INDEX_op_not_i64:
2863        tcg_out32(s, NOR | SAB(args[1], args[0], args[1]));
2864        break;
2865
2866    case INDEX_op_add_i64:
2867        a0 = args[0], a1 = args[1], a2 = args[2];
2868        if (const_args[2]) {
2869        do_addi_64:
2870            tcg_out_mem_long(s, ADDI, ADD, a0, a1, a2);
2871        } else {
2872            tcg_out32(s, ADD | TAB(a0, a1, a2));
2873        }
2874        break;
2875    case INDEX_op_sub_i64:
2876        a0 = args[0], a1 = args[1], a2 = args[2];
2877        if (const_args[1]) {
2878            if (const_args[2]) {
2879                tcg_out_movi(s, TCG_TYPE_I64, a0, a1 - a2);
2880            } else {
2881                tcg_out32(s, SUBFIC | TAI(a0, a2, a1));
2882            }
2883        } else if (const_args[2]) {
2884            a2 = -a2;
2885            goto do_addi_64;
2886        } else {
2887            tcg_out32(s, SUBF | TAB(a0, a2, a1));
2888        }
2889        break;
2890
2891    case INDEX_op_shl_i64:
2892        if (const_args[2]) {
2893            /* Limit immediate shift count lest we create an illegal insn.  */
2894            tcg_out_shli64(s, args[0], args[1], args[2] & 63);
2895        } else {
2896            tcg_out32(s, SLD | SAB(args[1], args[0], args[2]));
2897        }
2898        break;
2899    case INDEX_op_shr_i64:
2900        if (const_args[2]) {
2901            /* Limit immediate shift count lest we create an illegal insn.  */
2902            tcg_out_shri64(s, args[0], args[1], args[2] & 63);
2903        } else {
2904            tcg_out32(s, SRD | SAB(args[1], args[0], args[2]));
2905        }
2906        break;
2907    case INDEX_op_sar_i64:
2908        if (const_args[2]) {
2909            tcg_out_sari64(s, args[0], args[1], args[2]);
2910        } else {
2911            tcg_out32(s, SRAD | SAB(args[1], args[0], args[2]));
2912        }
2913        break;
2914    case INDEX_op_rotl_i64:
2915        if (const_args[2]) {
2916            tcg_out_rld(s, RLDICL, args[0], args[1], args[2], 0);
2917        } else {
2918            tcg_out32(s, RLDCL | SAB(args[1], args[0], args[2]) | MB64(0));
2919        }
2920        break;
2921    case INDEX_op_rotr_i64:
2922        if (const_args[2]) {
2923            tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 0);
2924        } else {
2925            tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 64));
2926            tcg_out32(s, RLDCL | SAB(args[1], args[0], TCG_REG_R0) | MB64(0));
2927        }
2928        break;
2929
2930    case INDEX_op_mul_i64:
2931        a0 = args[0], a1 = args[1], a2 = args[2];
2932        if (const_args[2]) {
2933            tcg_out32(s, MULLI | TAI(a0, a1, a2));
2934        } else {
2935            tcg_out32(s, MULLD | TAB(a0, a1, a2));
2936        }
2937        break;
2938    case INDEX_op_div_i64:
2939        tcg_out32(s, DIVD | TAB(args[0], args[1], args[2]));
2940        break;
2941    case INDEX_op_divu_i64:
2942        tcg_out32(s, DIVDU | TAB(args[0], args[1], args[2]));
2943        break;
2944    case INDEX_op_rem_i64:
2945        tcg_out32(s, MODSD | TAB(args[0], args[1], args[2]));
2946        break;
2947    case INDEX_op_remu_i64:
2948        tcg_out32(s, MODUD | TAB(args[0], args[1], args[2]));
2949        break;
2950
2951    case INDEX_op_qemu_ld_i32:
2952        tcg_out_qemu_ld(s, args, false);
2953        break;
2954    case INDEX_op_qemu_ld_i64:
2955        tcg_out_qemu_ld(s, args, true);
2956        break;
2957    case INDEX_op_qemu_st_i32:
2958        tcg_out_qemu_st(s, args, false);
2959        break;
2960    case INDEX_op_qemu_st_i64:
2961        tcg_out_qemu_st(s, args, true);
2962        break;
2963
2964    case INDEX_op_ext8s_i32:
2965    case INDEX_op_ext8s_i64:
2966        tcg_out_ext8s(s, args[0], args[1]);
2967        break;
2968    case INDEX_op_ext16s_i32:
2969    case INDEX_op_ext16s_i64:
2970        tcg_out_ext16s(s, args[0], args[1]);
2971        break;
2972    case INDEX_op_ext_i32_i64:
2973    case INDEX_op_ext32s_i64:
2974        tcg_out_ext32s(s, args[0], args[1]);
2975        break;
2976    case INDEX_op_extu_i32_i64:
2977        tcg_out_ext32u(s, args[0], args[1]);
2978        break;
2979
2980    case INDEX_op_setcond_i32:
2981        tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2],
2982                        const_args[2]);
2983        break;
2984    case INDEX_op_setcond_i64:
2985        tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2],
2986                        const_args[2]);
2987        break;
2988    case INDEX_op_setcond2_i32:
2989        tcg_out_setcond2(s, args, const_args);
2990        break;
2991
2992    case INDEX_op_bswap16_i32:
2993    case INDEX_op_bswap16_i64:
2994        tcg_out_bswap16(s, args[0], args[1], args[2]);
2995        break;
2996    case INDEX_op_bswap32_i32:
2997        tcg_out_bswap32(s, args[0], args[1], 0);
2998        break;
2999    case INDEX_op_bswap32_i64:
3000        tcg_out_bswap32(s, args[0], args[1], args[2]);
3001        break;
3002    case INDEX_op_bswap64_i64:
3003        tcg_out_bswap64(s, args[0], args[1]);
3004        break;
3005
3006    case INDEX_op_deposit_i32:
3007        if (const_args[2]) {
3008            uint32_t mask = ((2u << (args[4] - 1)) - 1) << args[3];
3009            tcg_out_andi32(s, args[0], args[0], ~mask);
3010        } else {
3011            tcg_out_rlw(s, RLWIMI, args[0], args[2], args[3],
3012                        32 - args[3] - args[4], 31 - args[3]);
3013        }
3014        break;
3015    case INDEX_op_deposit_i64:
3016        if (const_args[2]) {
3017            uint64_t mask = ((2ull << (args[4] - 1)) - 1) << args[3];
3018            tcg_out_andi64(s, args[0], args[0], ~mask);
3019        } else {
3020            tcg_out_rld(s, RLDIMI, args[0], args[2], args[3],
3021                        64 - args[3] - args[4]);
3022        }
3023        break;
3024
3025    case INDEX_op_extract_i32:
3026        tcg_out_rlw(s, RLWINM, args[0], args[1],
3027                    32 - args[2], 32 - args[3], 31);
3028        break;
3029    case INDEX_op_extract_i64:
3030        tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 64 - args[3]);
3031        break;
3032
3033    case INDEX_op_movcond_i32:
3034        tcg_out_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], args[2],
3035                        args[3], args[4], const_args[2]);
3036        break;
3037    case INDEX_op_movcond_i64:
3038        tcg_out_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1], args[2],
3039                        args[3], args[4], const_args[2]);
3040        break;
3041
3042#if TCG_TARGET_REG_BITS == 64
3043    case INDEX_op_add2_i64:
3044#else
3045    case INDEX_op_add2_i32:
3046#endif
3047        /* Note that the CA bit is defined based on the word size of the
3048           environment.  So in 64-bit mode it's always carry-out of bit 63.
3049           The fallback code using deposit works just as well for 32-bit.  */
3050        a0 = args[0], a1 = args[1];
3051        if (a0 == args[3] || (!const_args[5] && a0 == args[5])) {
3052            a0 = TCG_REG_R0;
3053        }
3054        if (const_args[4]) {
3055            tcg_out32(s, ADDIC | TAI(a0, args[2], args[4]));
3056        } else {
3057            tcg_out32(s, ADDC | TAB(a0, args[2], args[4]));
3058        }
3059        if (const_args[5]) {
3060            tcg_out32(s, (args[5] ? ADDME : ADDZE) | RT(a1) | RA(args[3]));
3061        } else {
3062            tcg_out32(s, ADDE | TAB(a1, args[3], args[5]));
3063        }
3064        if (a0 != args[0]) {
3065            tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
3066        }
3067        break;
3068
3069#if TCG_TARGET_REG_BITS == 64
3070    case INDEX_op_sub2_i64:
3071#else
3072    case INDEX_op_sub2_i32:
3073#endif
3074        a0 = args[0], a1 = args[1];
3075        if (a0 == args[5] || (!const_args[3] && a0 == args[3])) {
3076            a0 = TCG_REG_R0;
3077        }
3078        if (const_args[2]) {
3079            tcg_out32(s, SUBFIC | TAI(a0, args[4], args[2]));
3080        } else {
3081            tcg_out32(s, SUBFC | TAB(a0, args[4], args[2]));
3082        }
3083        if (const_args[3]) {
3084            tcg_out32(s, (args[3] ? SUBFME : SUBFZE) | RT(a1) | RA(args[5]));
3085        } else {
3086            tcg_out32(s, SUBFE | TAB(a1, args[5], args[3]));
3087        }
3088        if (a0 != args[0]) {
3089            tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
3090        }
3091        break;
3092
3093    case INDEX_op_muluh_i32:
3094        tcg_out32(s, MULHWU | TAB(args[0], args[1], args[2]));
3095        break;
3096    case INDEX_op_mulsh_i32:
3097        tcg_out32(s, MULHW | TAB(args[0], args[1], args[2]));
3098        break;
3099    case INDEX_op_muluh_i64:
3100        tcg_out32(s, MULHDU | TAB(args[0], args[1], args[2]));
3101        break;
3102    case INDEX_op_mulsh_i64:
3103        tcg_out32(s, MULHD | TAB(args[0], args[1], args[2]));
3104        break;
3105
3106    case INDEX_op_mb:
3107        tcg_out_mb(s, args[0]);
3108        break;
3109
3110    case INDEX_op_mov_i32:   /* Always emitted via tcg_out_mov.  */
3111    case INDEX_op_mov_i64:
3112    case INDEX_op_call:      /* Always emitted via tcg_out_call.  */
3113    case INDEX_op_exit_tb:   /* Always emitted via tcg_out_exit_tb.  */
3114    case INDEX_op_goto_tb:   /* Always emitted via tcg_out_goto_tb.  */
3115    default:
3116        tcg_abort();
3117    }
3118}
3119
3120int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
3121{
3122    switch (opc) {
3123    case INDEX_op_and_vec:
3124    case INDEX_op_or_vec:
3125    case INDEX_op_xor_vec:
3126    case INDEX_op_andc_vec:
3127    case INDEX_op_not_vec:
3128    case INDEX_op_nor_vec:
3129    case INDEX_op_eqv_vec:
3130    case INDEX_op_nand_vec:
3131        return 1;
3132    case INDEX_op_orc_vec:
3133        return have_isa_2_07;
3134    case INDEX_op_add_vec:
3135    case INDEX_op_sub_vec:
3136    case INDEX_op_smax_vec:
3137    case INDEX_op_smin_vec:
3138    case INDEX_op_umax_vec:
3139    case INDEX_op_umin_vec:
3140    case INDEX_op_shlv_vec:
3141    case INDEX_op_shrv_vec:
3142    case INDEX_op_sarv_vec:
3143    case INDEX_op_rotlv_vec:
3144        return vece <= MO_32 || have_isa_2_07;
3145    case INDEX_op_ssadd_vec:
3146    case INDEX_op_sssub_vec:
3147    case INDEX_op_usadd_vec:
3148    case INDEX_op_ussub_vec:
3149        return vece <= MO_32;
3150    case INDEX_op_cmp_vec:
3151    case INDEX_op_shli_vec:
3152    case INDEX_op_shri_vec:
3153    case INDEX_op_sari_vec:
3154    case INDEX_op_rotli_vec:
3155        return vece <= MO_32 || have_isa_2_07 ? -1 : 0;
3156    case INDEX_op_neg_vec:
3157        return vece >= MO_32 && have_isa_3_00;
3158    case INDEX_op_mul_vec:
3159        switch (vece) {
3160        case MO_8:
3161        case MO_16:
3162            return -1;
3163        case MO_32:
3164            return have_isa_2_07 ? 1 : -1;
3165        case MO_64:
3166            return have_isa_3_10;
3167        }
3168        return 0;
3169    case INDEX_op_bitsel_vec:
3170        return have_vsx;
3171    case INDEX_op_rotrv_vec:
3172        return -1;
3173    default:
3174        return 0;
3175    }
3176}
3177
3178static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
3179                            TCGReg dst, TCGReg src)
3180{
3181    tcg_debug_assert(dst >= TCG_REG_V0);
3182
3183    /* Splat from integer reg allowed via constraints for v3.00.  */
3184    if (src < TCG_REG_V0) {
3185        tcg_debug_assert(have_isa_3_00);
3186        switch (vece) {
3187        case MO_64:
3188            tcg_out32(s, MTVSRDD | VRT(dst) | RA(src) | RB(src));
3189            return true;
3190        case MO_32:
3191            tcg_out32(s, MTVSRWS | VRT(dst) | RA(src));
3192            return true;
3193        default:
3194            /* Fail, so that we fall back on either dupm or mov+dup.  */
3195            return false;
3196        }
3197    }
3198
3199    /*
3200     * Recall we use (or emulate) VSX integer loads, so the integer is
3201     * right justified within the left (zero-index) double-word.
3202     */
3203    switch (vece) {
3204    case MO_8:
3205        tcg_out32(s, VSPLTB | VRT(dst) | VRB(src) | (7 << 16));
3206        break;
3207    case MO_16:
3208        tcg_out32(s, VSPLTH | VRT(dst) | VRB(src) | (3 << 16));
3209        break;
3210    case MO_32:
3211        tcg_out32(s, VSPLTW | VRT(dst) | VRB(src) | (1 << 16));
3212        break;
3213    case MO_64:
3214        if (have_vsx) {
3215            tcg_out32(s, XXPERMDI | VRT(dst) | VRA(src) | VRB(src));
3216            break;
3217        }
3218        tcg_out_vsldoi(s, TCG_VEC_TMP1, src, src, 8);
3219        tcg_out_vsldoi(s, dst, TCG_VEC_TMP1, src, 8);
3220        break;
3221    default:
3222        g_assert_not_reached();
3223    }
3224    return true;
3225}
3226
3227static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
3228                             TCGReg out, TCGReg base, intptr_t offset)
3229{
3230    int elt;
3231
3232    tcg_debug_assert(out >= TCG_REG_V0);
3233    switch (vece) {
3234    case MO_8:
3235        if (have_isa_3_00) {
3236            tcg_out_mem_long(s, LXV, LVX, out, base, offset & -16);
3237        } else {
3238            tcg_out_mem_long(s, 0, LVEBX, out, base, offset);
3239        }
3240        elt = extract32(offset, 0, 4);
3241#if !HOST_BIG_ENDIAN
3242        elt ^= 15;
3243#endif
3244        tcg_out32(s, VSPLTB | VRT(out) | VRB(out) | (elt << 16));
3245        break;
3246    case MO_16:
3247        tcg_debug_assert((offset & 1) == 0);
3248        if (have_isa_3_00) {
3249            tcg_out_mem_long(s, LXV | 8, LVX, out, base, offset & -16);
3250        } else {
3251            tcg_out_mem_long(s, 0, LVEHX, out, base, offset);
3252        }
3253        elt = extract32(offset, 1, 3);
3254#if !HOST_BIG_ENDIAN
3255        elt ^= 7;
3256#endif
3257        tcg_out32(s, VSPLTH | VRT(out) | VRB(out) | (elt << 16));
3258        break;
3259    case MO_32:
3260        if (have_isa_3_00) {
3261            tcg_out_mem_long(s, 0, LXVWSX, out, base, offset);
3262            break;
3263        }
3264        tcg_debug_assert((offset & 3) == 0);
3265        tcg_out_mem_long(s, 0, LVEWX, out, base, offset);
3266        elt = extract32(offset, 2, 2);
3267#if !HOST_BIG_ENDIAN
3268        elt ^= 3;
3269#endif
3270        tcg_out32(s, VSPLTW | VRT(out) | VRB(out) | (elt << 16));
3271        break;
3272    case MO_64:
3273        if (have_vsx) {
3274            tcg_out_mem_long(s, 0, LXVDSX, out, base, offset);
3275            break;
3276        }
3277        tcg_debug_assert((offset & 7) == 0);
3278        tcg_out_mem_long(s, 0, LVX, out, base, offset & -16);
3279        tcg_out_vsldoi(s, TCG_VEC_TMP1, out, out, 8);
3280        elt = extract32(offset, 3, 1);
3281#if !HOST_BIG_ENDIAN
3282        elt = !elt;
3283#endif
3284        if (elt) {
3285            tcg_out_vsldoi(s, out, out, TCG_VEC_TMP1, 8);
3286        } else {
3287            tcg_out_vsldoi(s, out, TCG_VEC_TMP1, out, 8);
3288        }
3289        break;
3290    default:
3291        g_assert_not_reached();
3292    }
3293    return true;
3294}
3295
3296static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
3297                           unsigned vecl, unsigned vece,
3298                           const TCGArg args[TCG_MAX_OP_ARGS],
3299                           const int const_args[TCG_MAX_OP_ARGS])
3300{
3301    static const uint32_t
3302        add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM },
3303        sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM },
3304        mul_op[4] = { 0, 0, VMULUWM, VMULLD },
3305        neg_op[4] = { 0, 0, VNEGW, VNEGD },
3306        eq_op[4]  = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD },
3307        ne_op[4]  = { VCMPNEB, VCMPNEH, VCMPNEW, 0 },
3308        gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD },
3309        gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD },
3310        ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 },
3311        usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 },
3312        sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 },
3313        ussub_op[4] = { VSUBUBS, VSUBUHS, VSUBUWS, 0 },
3314        umin_op[4] = { VMINUB, VMINUH, VMINUW, VMINUD },
3315        smin_op[4] = { VMINSB, VMINSH, VMINSW, VMINSD },
3316        umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, VMAXUD },
3317        smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, VMAXSD },
3318        shlv_op[4] = { VSLB, VSLH, VSLW, VSLD },
3319        shrv_op[4] = { VSRB, VSRH, VSRW, VSRD },
3320        sarv_op[4] = { VSRAB, VSRAH, VSRAW, VSRAD },
3321        mrgh_op[4] = { VMRGHB, VMRGHH, VMRGHW, 0 },
3322        mrgl_op[4] = { VMRGLB, VMRGLH, VMRGLW, 0 },
3323        muleu_op[4] = { VMULEUB, VMULEUH, VMULEUW, 0 },
3324        mulou_op[4] = { VMULOUB, VMULOUH, VMULOUW, 0 },
3325        pkum_op[4] = { VPKUHUM, VPKUWUM, 0, 0 },
3326        rotl_op[4] = { VRLB, VRLH, VRLW, VRLD };
3327
3328    TCGType type = vecl + TCG_TYPE_V64;
3329    TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
3330    uint32_t insn;
3331
3332    switch (opc) {
3333    case INDEX_op_ld_vec:
3334        tcg_out_ld(s, type, a0, a1, a2);
3335        return;
3336    case INDEX_op_st_vec:
3337        tcg_out_st(s, type, a0, a1, a2);
3338        return;
3339    case INDEX_op_dupm_vec:
3340        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
3341        return;
3342
3343    case INDEX_op_add_vec:
3344        insn = add_op[vece];
3345        break;
3346    case INDEX_op_sub_vec:
3347        insn = sub_op[vece];
3348        break;
3349    case INDEX_op_neg_vec:
3350        insn = neg_op[vece];
3351        a2 = a1;
3352        a1 = 0;
3353        break;
3354    case INDEX_op_mul_vec:
3355        insn = mul_op[vece];
3356        break;
3357    case INDEX_op_ssadd_vec:
3358        insn = ssadd_op[vece];
3359        break;
3360    case INDEX_op_sssub_vec:
3361        insn = sssub_op[vece];
3362        break;
3363    case INDEX_op_usadd_vec:
3364        insn = usadd_op[vece];
3365        break;
3366    case INDEX_op_ussub_vec:
3367        insn = ussub_op[vece];
3368        break;
3369    case INDEX_op_smin_vec:
3370        insn = smin_op[vece];
3371        break;
3372    case INDEX_op_umin_vec:
3373        insn = umin_op[vece];
3374        break;
3375    case INDEX_op_smax_vec:
3376        insn = smax_op[vece];
3377        break;
3378    case INDEX_op_umax_vec:
3379        insn = umax_op[vece];
3380        break;
3381    case INDEX_op_shlv_vec:
3382        insn = shlv_op[vece];
3383        break;
3384    case INDEX_op_shrv_vec:
3385        insn = shrv_op[vece];
3386        break;
3387    case INDEX_op_sarv_vec:
3388        insn = sarv_op[vece];
3389        break;
3390    case INDEX_op_and_vec:
3391        insn = VAND;
3392        break;
3393    case INDEX_op_or_vec:
3394        insn = VOR;
3395        break;
3396    case INDEX_op_xor_vec:
3397        insn = VXOR;
3398        break;
3399    case INDEX_op_andc_vec:
3400        insn = VANDC;
3401        break;
3402    case INDEX_op_not_vec:
3403        insn = VNOR;
3404        a2 = a1;
3405        break;
3406    case INDEX_op_orc_vec:
3407        insn = VORC;
3408        break;
3409    case INDEX_op_nand_vec:
3410        insn = VNAND;
3411        break;
3412    case INDEX_op_nor_vec:
3413        insn = VNOR;
3414        break;
3415    case INDEX_op_eqv_vec:
3416        insn = VEQV;
3417        break;
3418
3419    case INDEX_op_cmp_vec:
3420        switch (args[3]) {
3421        case TCG_COND_EQ:
3422            insn = eq_op[vece];
3423            break;
3424        case TCG_COND_NE:
3425            insn = ne_op[vece];
3426            break;
3427        case TCG_COND_GT:
3428            insn = gts_op[vece];
3429            break;
3430        case TCG_COND_GTU:
3431            insn = gtu_op[vece];
3432            break;
3433        default:
3434            g_assert_not_reached();
3435        }
3436        break;
3437
3438    case INDEX_op_bitsel_vec:
3439        tcg_out32(s, XXSEL | VRT(a0) | VRC(a1) | VRB(a2) | VRA(args[3]));
3440        return;
3441
3442    case INDEX_op_dup2_vec:
3443        assert(TCG_TARGET_REG_BITS == 32);
3444        /* With inputs a1 = xLxx, a2 = xHxx  */
3445        tcg_out32(s, VMRGHW | VRT(a0) | VRA(a2) | VRB(a1));  /* a0  = xxHL */
3446        tcg_out_vsldoi(s, TCG_VEC_TMP1, a0, a0, 8);          /* tmp = HLxx */
3447        tcg_out_vsldoi(s, a0, a0, TCG_VEC_TMP1, 8);          /* a0  = HLHL */
3448        return;
3449
3450    case INDEX_op_ppc_mrgh_vec:
3451        insn = mrgh_op[vece];
3452        break;
3453    case INDEX_op_ppc_mrgl_vec:
3454        insn = mrgl_op[vece];
3455        break;
3456    case INDEX_op_ppc_muleu_vec:
3457        insn = muleu_op[vece];
3458        break;
3459    case INDEX_op_ppc_mulou_vec:
3460        insn = mulou_op[vece];
3461        break;
3462    case INDEX_op_ppc_pkum_vec:
3463        insn = pkum_op[vece];
3464        break;
3465    case INDEX_op_rotlv_vec:
3466        insn = rotl_op[vece];
3467        break;
3468    case INDEX_op_ppc_msum_vec:
3469        tcg_debug_assert(vece == MO_16);
3470        tcg_out32(s, VMSUMUHM | VRT(a0) | VRA(a1) | VRB(a2) | VRC(args[3]));
3471        return;
3472
3473    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
3474    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
3475    default:
3476        g_assert_not_reached();
3477    }
3478
3479    tcg_debug_assert(insn != 0);
3480    tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2));
3481}
3482
3483static void expand_vec_shi(TCGType type, unsigned vece, TCGv_vec v0,
3484                           TCGv_vec v1, TCGArg imm, TCGOpcode opci)
3485{
3486    TCGv_vec t1;
3487
3488    if (vece == MO_32) {
3489        /*
3490         * Only 5 bits are significant, and VSPLTISB can represent -16..15.
3491         * So using negative numbers gets us the 4th bit easily.
3492         */
3493        imm = sextract32(imm, 0, 5);
3494    } else {
3495        imm &= (8 << vece) - 1;
3496    }
3497
3498    /* Splat w/bytes for xxspltib when 2.07 allows MO_64. */
3499    t1 = tcg_constant_vec(type, MO_8, imm);
3500    vec_gen_3(opci, type, vece, tcgv_vec_arg(v0),
3501              tcgv_vec_arg(v1), tcgv_vec_arg(t1));
3502}
3503
3504static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
3505                           TCGv_vec v1, TCGv_vec v2, TCGCond cond)
3506{
3507    bool need_swap = false, need_inv = false;
3508
3509    tcg_debug_assert(vece <= MO_32 || have_isa_2_07);
3510
3511    switch (cond) {
3512    case TCG_COND_EQ:
3513    case TCG_COND_GT:
3514    case TCG_COND_GTU:
3515        break;
3516    case TCG_COND_NE:
3517        if (have_isa_3_00 && vece <= MO_32) {
3518            break;
3519        }
3520        /* fall through */
3521    case TCG_COND_LE:
3522    case TCG_COND_LEU:
3523        need_inv = true;
3524        break;
3525    case TCG_COND_LT:
3526    case TCG_COND_LTU:
3527        need_swap = true;
3528        break;
3529    case TCG_COND_GE:
3530    case TCG_COND_GEU:
3531        need_swap = need_inv = true;
3532        break;
3533    default:
3534        g_assert_not_reached();
3535    }
3536
3537    if (need_inv) {
3538        cond = tcg_invert_cond(cond);
3539    }
3540    if (need_swap) {
3541        TCGv_vec t1;
3542        t1 = v1, v1 = v2, v2 = t1;
3543        cond = tcg_swap_cond(cond);
3544    }
3545
3546    vec_gen_4(INDEX_op_cmp_vec, type, vece, tcgv_vec_arg(v0),
3547              tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond);
3548
3549    if (need_inv) {
3550        tcg_gen_not_vec(vece, v0, v0);
3551    }
3552}
3553
3554static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0,
3555                           TCGv_vec v1, TCGv_vec v2)
3556{
3557    TCGv_vec t1 = tcg_temp_new_vec(type);
3558    TCGv_vec t2 = tcg_temp_new_vec(type);
3559    TCGv_vec c0, c16;
3560
3561    switch (vece) {
3562    case MO_8:
3563    case MO_16:
3564        vec_gen_3(INDEX_op_ppc_muleu_vec, type, vece, tcgv_vec_arg(t1),
3565                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
3566        vec_gen_3(INDEX_op_ppc_mulou_vec, type, vece, tcgv_vec_arg(t2),
3567                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
3568        vec_gen_3(INDEX_op_ppc_mrgh_vec, type, vece + 1, tcgv_vec_arg(v0),
3569                  tcgv_vec_arg(t1), tcgv_vec_arg(t2));
3570        vec_gen_3(INDEX_op_ppc_mrgl_vec, type, vece + 1, tcgv_vec_arg(t1),
3571                  tcgv_vec_arg(t1), tcgv_vec_arg(t2));
3572        vec_gen_3(INDEX_op_ppc_pkum_vec, type, vece, tcgv_vec_arg(v0),
3573                  tcgv_vec_arg(v0), tcgv_vec_arg(t1));
3574	break;
3575
3576    case MO_32:
3577        tcg_debug_assert(!have_isa_2_07);
3578        /*
3579         * Only 5 bits are significant, and VSPLTISB can represent -16..15.
3580         * So using -16 is a quick way to represent 16.
3581         */
3582        c16 = tcg_constant_vec(type, MO_8, -16);
3583        c0 = tcg_constant_vec(type, MO_8, 0);
3584
3585        vec_gen_3(INDEX_op_rotlv_vec, type, MO_32, tcgv_vec_arg(t1),
3586                  tcgv_vec_arg(v2), tcgv_vec_arg(c16));
3587        vec_gen_3(INDEX_op_ppc_mulou_vec, type, MO_16, tcgv_vec_arg(t2),
3588                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
3589        vec_gen_4(INDEX_op_ppc_msum_vec, type, MO_16, tcgv_vec_arg(t1),
3590                  tcgv_vec_arg(v1), tcgv_vec_arg(t1), tcgv_vec_arg(c0));
3591        vec_gen_3(INDEX_op_shlv_vec, type, MO_32, tcgv_vec_arg(t1),
3592                  tcgv_vec_arg(t1), tcgv_vec_arg(c16));
3593        tcg_gen_add_vec(MO_32, v0, t1, t2);
3594        break;
3595
3596    default:
3597        g_assert_not_reached();
3598    }
3599    tcg_temp_free_vec(t1);
3600    tcg_temp_free_vec(t2);
3601}
3602
3603void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
3604                       TCGArg a0, ...)
3605{
3606    va_list va;
3607    TCGv_vec v0, v1, v2, t0;
3608    TCGArg a2;
3609
3610    va_start(va, a0);
3611    v0 = temp_tcgv_vec(arg_temp(a0));
3612    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3613    a2 = va_arg(va, TCGArg);
3614
3615    switch (opc) {
3616    case INDEX_op_shli_vec:
3617        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shlv_vec);
3618        break;
3619    case INDEX_op_shri_vec:
3620        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shrv_vec);
3621        break;
3622    case INDEX_op_sari_vec:
3623        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_sarv_vec);
3624        break;
3625    case INDEX_op_rotli_vec:
3626        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_rotlv_vec);
3627        break;
3628    case INDEX_op_cmp_vec:
3629        v2 = temp_tcgv_vec(arg_temp(a2));
3630        expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
3631        break;
3632    case INDEX_op_mul_vec:
3633        v2 = temp_tcgv_vec(arg_temp(a2));
3634        expand_vec_mul(type, vece, v0, v1, v2);
3635        break;
3636    case INDEX_op_rotlv_vec:
3637        v2 = temp_tcgv_vec(arg_temp(a2));
3638        t0 = tcg_temp_new_vec(type);
3639        tcg_gen_neg_vec(vece, t0, v2);
3640        tcg_gen_rotlv_vec(vece, v0, v1, t0);
3641        tcg_temp_free_vec(t0);
3642        break;
3643    default:
3644        g_assert_not_reached();
3645    }
3646    va_end(va);
3647}
3648
3649static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
3650{
3651    switch (op) {
3652    case INDEX_op_goto_ptr:
3653        return C_O0_I1(r);
3654
3655    case INDEX_op_ld8u_i32:
3656    case INDEX_op_ld8s_i32:
3657    case INDEX_op_ld16u_i32:
3658    case INDEX_op_ld16s_i32:
3659    case INDEX_op_ld_i32:
3660    case INDEX_op_ctpop_i32:
3661    case INDEX_op_neg_i32:
3662    case INDEX_op_not_i32:
3663    case INDEX_op_ext8s_i32:
3664    case INDEX_op_ext16s_i32:
3665    case INDEX_op_bswap16_i32:
3666    case INDEX_op_bswap32_i32:
3667    case INDEX_op_extract_i32:
3668    case INDEX_op_ld8u_i64:
3669    case INDEX_op_ld8s_i64:
3670    case INDEX_op_ld16u_i64:
3671    case INDEX_op_ld16s_i64:
3672    case INDEX_op_ld32u_i64:
3673    case INDEX_op_ld32s_i64:
3674    case INDEX_op_ld_i64:
3675    case INDEX_op_ctpop_i64:
3676    case INDEX_op_neg_i64:
3677    case INDEX_op_not_i64:
3678    case INDEX_op_ext8s_i64:
3679    case INDEX_op_ext16s_i64:
3680    case INDEX_op_ext32s_i64:
3681    case INDEX_op_ext_i32_i64:
3682    case INDEX_op_extu_i32_i64:
3683    case INDEX_op_bswap16_i64:
3684    case INDEX_op_bswap32_i64:
3685    case INDEX_op_bswap64_i64:
3686    case INDEX_op_extract_i64:
3687        return C_O1_I1(r, r);
3688
3689    case INDEX_op_st8_i32:
3690    case INDEX_op_st16_i32:
3691    case INDEX_op_st_i32:
3692    case INDEX_op_st8_i64:
3693    case INDEX_op_st16_i64:
3694    case INDEX_op_st32_i64:
3695    case INDEX_op_st_i64:
3696        return C_O0_I2(r, r);
3697
3698    case INDEX_op_add_i32:
3699    case INDEX_op_and_i32:
3700    case INDEX_op_or_i32:
3701    case INDEX_op_xor_i32:
3702    case INDEX_op_andc_i32:
3703    case INDEX_op_orc_i32:
3704    case INDEX_op_eqv_i32:
3705    case INDEX_op_shl_i32:
3706    case INDEX_op_shr_i32:
3707    case INDEX_op_sar_i32:
3708    case INDEX_op_rotl_i32:
3709    case INDEX_op_rotr_i32:
3710    case INDEX_op_setcond_i32:
3711    case INDEX_op_and_i64:
3712    case INDEX_op_andc_i64:
3713    case INDEX_op_shl_i64:
3714    case INDEX_op_shr_i64:
3715    case INDEX_op_sar_i64:
3716    case INDEX_op_rotl_i64:
3717    case INDEX_op_rotr_i64:
3718    case INDEX_op_setcond_i64:
3719        return C_O1_I2(r, r, ri);
3720
3721    case INDEX_op_mul_i32:
3722    case INDEX_op_mul_i64:
3723        return C_O1_I2(r, r, rI);
3724
3725    case INDEX_op_div_i32:
3726    case INDEX_op_divu_i32:
3727    case INDEX_op_rem_i32:
3728    case INDEX_op_remu_i32:
3729    case INDEX_op_nand_i32:
3730    case INDEX_op_nor_i32:
3731    case INDEX_op_muluh_i32:
3732    case INDEX_op_mulsh_i32:
3733    case INDEX_op_orc_i64:
3734    case INDEX_op_eqv_i64:
3735    case INDEX_op_nand_i64:
3736    case INDEX_op_nor_i64:
3737    case INDEX_op_div_i64:
3738    case INDEX_op_divu_i64:
3739    case INDEX_op_rem_i64:
3740    case INDEX_op_remu_i64:
3741    case INDEX_op_mulsh_i64:
3742    case INDEX_op_muluh_i64:
3743        return C_O1_I2(r, r, r);
3744
3745    case INDEX_op_sub_i32:
3746        return C_O1_I2(r, rI, ri);
3747    case INDEX_op_add_i64:
3748        return C_O1_I2(r, r, rT);
3749    case INDEX_op_or_i64:
3750    case INDEX_op_xor_i64:
3751        return C_O1_I2(r, r, rU);
3752    case INDEX_op_sub_i64:
3753        return C_O1_I2(r, rI, rT);
3754    case INDEX_op_clz_i32:
3755    case INDEX_op_ctz_i32:
3756    case INDEX_op_clz_i64:
3757    case INDEX_op_ctz_i64:
3758        return C_O1_I2(r, r, rZW);
3759
3760    case INDEX_op_brcond_i32:
3761    case INDEX_op_brcond_i64:
3762        return C_O0_I2(r, ri);
3763
3764    case INDEX_op_movcond_i32:
3765    case INDEX_op_movcond_i64:
3766        return C_O1_I4(r, r, ri, rZ, rZ);
3767    case INDEX_op_deposit_i32:
3768    case INDEX_op_deposit_i64:
3769        return C_O1_I2(r, 0, rZ);
3770    case INDEX_op_brcond2_i32:
3771        return C_O0_I4(r, r, ri, ri);
3772    case INDEX_op_setcond2_i32:
3773        return C_O1_I4(r, r, r, ri, ri);
3774    case INDEX_op_add2_i64:
3775    case INDEX_op_add2_i32:
3776        return C_O2_I4(r, r, r, r, rI, rZM);
3777    case INDEX_op_sub2_i64:
3778    case INDEX_op_sub2_i32:
3779        return C_O2_I4(r, r, rI, rZM, r, r);
3780
3781    case INDEX_op_qemu_ld_i32:
3782        return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
3783                ? C_O1_I1(r, L)
3784                : C_O1_I2(r, L, L));
3785
3786    case INDEX_op_qemu_st_i32:
3787        return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
3788                ? C_O0_I2(S, S)
3789                : C_O0_I3(S, S, S));
3790
3791    case INDEX_op_qemu_ld_i64:
3792        return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L)
3793                : TARGET_LONG_BITS == 32 ? C_O2_I1(L, L, L)
3794                : C_O2_I2(L, L, L, L));
3795
3796    case INDEX_op_qemu_st_i64:
3797        return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(S, S)
3798                : TARGET_LONG_BITS == 32 ? C_O0_I3(S, S, S)
3799                : C_O0_I4(S, S, S, S));
3800
3801    case INDEX_op_add_vec:
3802    case INDEX_op_sub_vec:
3803    case INDEX_op_mul_vec:
3804    case INDEX_op_and_vec:
3805    case INDEX_op_or_vec:
3806    case INDEX_op_xor_vec:
3807    case INDEX_op_andc_vec:
3808    case INDEX_op_orc_vec:
3809    case INDEX_op_nor_vec:
3810    case INDEX_op_eqv_vec:
3811    case INDEX_op_nand_vec:
3812    case INDEX_op_cmp_vec:
3813    case INDEX_op_ssadd_vec:
3814    case INDEX_op_sssub_vec:
3815    case INDEX_op_usadd_vec:
3816    case INDEX_op_ussub_vec:
3817    case INDEX_op_smax_vec:
3818    case INDEX_op_smin_vec:
3819    case INDEX_op_umax_vec:
3820    case INDEX_op_umin_vec:
3821    case INDEX_op_shlv_vec:
3822    case INDEX_op_shrv_vec:
3823    case INDEX_op_sarv_vec:
3824    case INDEX_op_rotlv_vec:
3825    case INDEX_op_rotrv_vec:
3826    case INDEX_op_ppc_mrgh_vec:
3827    case INDEX_op_ppc_mrgl_vec:
3828    case INDEX_op_ppc_muleu_vec:
3829    case INDEX_op_ppc_mulou_vec:
3830    case INDEX_op_ppc_pkum_vec:
3831    case INDEX_op_dup2_vec:
3832        return C_O1_I2(v, v, v);
3833
3834    case INDEX_op_not_vec:
3835    case INDEX_op_neg_vec:
3836        return C_O1_I1(v, v);
3837
3838    case INDEX_op_dup_vec:
3839        return have_isa_3_00 ? C_O1_I1(v, vr) : C_O1_I1(v, v);
3840
3841    case INDEX_op_ld_vec:
3842    case INDEX_op_dupm_vec:
3843        return C_O1_I1(v, r);
3844
3845    case INDEX_op_st_vec:
3846        return C_O0_I2(v, r);
3847
3848    case INDEX_op_bitsel_vec:
3849    case INDEX_op_ppc_msum_vec:
3850        return C_O1_I3(v, v, v, v);
3851
3852    default:
3853        g_assert_not_reached();
3854    }
3855}
3856
3857static void tcg_target_init(TCGContext *s)
3858{
3859    unsigned long hwcap = qemu_getauxval(AT_HWCAP);
3860    unsigned long hwcap2 = qemu_getauxval(AT_HWCAP2);
3861
3862    have_isa = tcg_isa_base;
3863    if (hwcap & PPC_FEATURE_ARCH_2_06) {
3864        have_isa = tcg_isa_2_06;
3865    }
3866#ifdef PPC_FEATURE2_ARCH_2_07
3867    if (hwcap2 & PPC_FEATURE2_ARCH_2_07) {
3868        have_isa = tcg_isa_2_07;
3869    }
3870#endif
3871#ifdef PPC_FEATURE2_ARCH_3_00
3872    if (hwcap2 & PPC_FEATURE2_ARCH_3_00) {
3873        have_isa = tcg_isa_3_00;
3874    }
3875#endif
3876#ifdef PPC_FEATURE2_ARCH_3_10
3877    if (hwcap2 & PPC_FEATURE2_ARCH_3_10) {
3878        have_isa = tcg_isa_3_10;
3879    }
3880#endif
3881
3882#ifdef PPC_FEATURE2_HAS_ISEL
3883    /* Prefer explicit instruction from the kernel. */
3884    have_isel = (hwcap2 & PPC_FEATURE2_HAS_ISEL) != 0;
3885#else
3886    /* Fall back to knowing Power7 (2.06) has ISEL. */
3887    have_isel = have_isa_2_06;
3888#endif
3889
3890    if (hwcap & PPC_FEATURE_HAS_ALTIVEC) {
3891        have_altivec = true;
3892        /* We only care about the portion of VSX that overlaps Altivec. */
3893        if (hwcap & PPC_FEATURE_HAS_VSX) {
3894            have_vsx = true;
3895        }
3896    }
3897
3898    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff;
3899    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff;
3900    if (have_altivec) {
3901        tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
3902        tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
3903    }
3904
3905    tcg_target_call_clobber_regs = 0;
3906    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
3907    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2);
3908    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3);
3909    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4);
3910    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5);
3911    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6);
3912    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R7);
3913    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
3914    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
3915    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
3916    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
3917    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R12);
3918
3919    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0);
3920    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1);
3921    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2);
3922    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3);
3923    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4);
3924    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5);
3925    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6);
3926    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7);
3927    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
3928    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
3929    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
3930    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
3931    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
3932    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
3933    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
3934    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
3935    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16);
3936    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17);
3937    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18);
3938    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19);
3939
3940    s->reserved_regs = 0;
3941    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */
3942    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */
3943#if defined(_CALL_SYSV)
3944    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* toc pointer */
3945#endif
3946#if defined(_CALL_SYSV) || TCG_TARGET_REG_BITS == 64
3947    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */
3948#endif
3949    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); /* mem temp */
3950    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP1);
3951    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP2);
3952    if (USE_REG_TB) {
3953        tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB);  /* tb->tc_ptr */
3954    }
3955}
3956
3957#ifdef __ELF__
3958typedef struct {
3959    DebugFrameCIE cie;
3960    DebugFrameFDEHeader fde;
3961    uint8_t fde_def_cfa[4];
3962    uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2 + 3];
3963} DebugFrame;
3964
3965/* We're expecting a 2 byte uleb128 encoded value.  */
3966QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
3967
3968#if TCG_TARGET_REG_BITS == 64
3969# define ELF_HOST_MACHINE EM_PPC64
3970#else
3971# define ELF_HOST_MACHINE EM_PPC
3972#endif
3973
3974static DebugFrame debug_frame = {
3975    .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3976    .cie.id = -1,
3977    .cie.version = 1,
3978    .cie.code_align = 1,
3979    .cie.data_align = (-SZR & 0x7f),         /* sleb128 -SZR */
3980    .cie.return_column = 65,
3981
3982    /* Total FDE size does not include the "len" member.  */
3983    .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
3984
3985    .fde_def_cfa = {
3986        12, TCG_REG_R1,                 /* DW_CFA_def_cfa r1, ... */
3987        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
3988        (FRAME_SIZE >> 7)
3989    },
3990    .fde_reg_ofs = {
3991        /* DW_CFA_offset_extended_sf, lr, LR_OFFSET */
3992        0x11, 65, (LR_OFFSET / -SZR) & 0x7f,
3993    }
3994};
3995
3996void tcg_register_jit(const void *buf, size_t buf_size)
3997{
3998    uint8_t *p = &debug_frame.fde_reg_ofs[3];
3999    int i;
4000
4001    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i, p += 2) {
4002        p[0] = 0x80 + tcg_target_callee_save_regs[i];
4003        p[1] = (FRAME_SIZE - (REG_SAVE_BOT + i * SZR)) / SZR;
4004    }
4005
4006    debug_frame.fde.func_start = (uintptr_t)buf;
4007    debug_frame.fde.func_len = buf_size;
4008
4009    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
4010}
4011#endif /* __ELF__ */
4012#undef VMULEUB
4013#undef VMULEUH
4014#undef VMULEUW
4015#undef VMULOUB
4016#undef VMULOUH
4017#undef VMULOUW
4018#undef VMSUMUHM
4019