xref: /qemu/tcg/s390x/tcg-target.c.inc (revision 83ecdb18)
1/*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2009 Ulrich Hecht <uli@suse.de>
5 * Copyright (c) 2009 Alexander Graf <agraf@suse.de>
6 * Copyright (c) 2010 Richard Henderson <rth@twiddle.net>
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a copy
9 * of this software and associated documentation files (the "Software"), to deal
10 * in the Software without restriction, including without limitation the rights
11 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 * copies of the Software, and to permit persons to whom the Software is
13 * furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 * THE SOFTWARE.
25 */
26
27/* We only support generating code for 64-bit mode.  */
28#if TCG_TARGET_REG_BITS != 64
29#error "unsupported code generation mode"
30#endif
31
32#include "../tcg-ldst.c.inc"
33#include "../tcg-pool.c.inc"
34#include "elf.h"
35
36#define TCG_CT_CONST_S16        (1 << 8)
37#define TCG_CT_CONST_S32        (1 << 9)
38#define TCG_CT_CONST_S33        (1 << 10)
39#define TCG_CT_CONST_ZERO       (1 << 11)
40#define TCG_CT_CONST_P32        (1 << 12)
41#define TCG_CT_CONST_INV        (1 << 13)
42#define TCG_CT_CONST_INVRISBG   (1 << 14)
43
44#define ALL_GENERAL_REGS     MAKE_64BIT_MASK(0, 16)
45#define ALL_VECTOR_REGS      MAKE_64BIT_MASK(32, 32)
46
47/*
48 * For softmmu, we need to avoid conflicts with the first 3
49 * argument registers to perform the tlb lookup, and to call
50 * the helper function.
51 */
52#ifdef CONFIG_SOFTMMU
53#define SOFTMMU_RESERVE_REGS MAKE_64BIT_MASK(TCG_REG_R2, 3)
54#else
55#define SOFTMMU_RESERVE_REGS 0
56#endif
57
58
59/* Several places within the instruction set 0 means "no register"
60   rather than TCG_REG_R0.  */
61#define TCG_REG_NONE    0
62
63/* A scratch register that may be be used throughout the backend.  */
64#define TCG_TMP0        TCG_REG_R1
65
66#ifndef CONFIG_SOFTMMU
67#define TCG_GUEST_BASE_REG TCG_REG_R13
68#endif
69
70/* All of the following instructions are prefixed with their instruction
71   format, and are defined as 8- or 16-bit quantities, even when the two
72   halves of the 16-bit quantity may appear 32 bits apart in the insn.
73   This makes it easy to copy the values from the tables in Appendix B.  */
74typedef enum S390Opcode {
75    RIL_AFI     = 0xc209,
76    RIL_AGFI    = 0xc208,
77    RIL_ALFI    = 0xc20b,
78    RIL_ALGFI   = 0xc20a,
79    RIL_BRASL   = 0xc005,
80    RIL_BRCL    = 0xc004,
81    RIL_CFI     = 0xc20d,
82    RIL_CGFI    = 0xc20c,
83    RIL_CLFI    = 0xc20f,
84    RIL_CLGFI   = 0xc20e,
85    RIL_CLRL    = 0xc60f,
86    RIL_CLGRL   = 0xc60a,
87    RIL_CRL     = 0xc60d,
88    RIL_CGRL    = 0xc608,
89    RIL_IIHF    = 0xc008,
90    RIL_IILF    = 0xc009,
91    RIL_LARL    = 0xc000,
92    RIL_LGFI    = 0xc001,
93    RIL_LGRL    = 0xc408,
94    RIL_LLIHF   = 0xc00e,
95    RIL_LLILF   = 0xc00f,
96    RIL_LRL     = 0xc40d,
97    RIL_MSFI    = 0xc201,
98    RIL_MSGFI   = 0xc200,
99    RIL_NIHF    = 0xc00a,
100    RIL_NILF    = 0xc00b,
101    RIL_OIHF    = 0xc00c,
102    RIL_OILF    = 0xc00d,
103    RIL_SLFI    = 0xc205,
104    RIL_SLGFI   = 0xc204,
105    RIL_XIHF    = 0xc006,
106    RIL_XILF    = 0xc007,
107
108    RI_AGHI     = 0xa70b,
109    RI_AHI      = 0xa70a,
110    RI_BRC      = 0xa704,
111    RI_CHI      = 0xa70e,
112    RI_CGHI     = 0xa70f,
113    RI_IIHH     = 0xa500,
114    RI_IIHL     = 0xa501,
115    RI_IILH     = 0xa502,
116    RI_IILL     = 0xa503,
117    RI_LGHI     = 0xa709,
118    RI_LLIHH    = 0xa50c,
119    RI_LLIHL    = 0xa50d,
120    RI_LLILH    = 0xa50e,
121    RI_LLILL    = 0xa50f,
122    RI_MGHI     = 0xa70d,
123    RI_MHI      = 0xa70c,
124    RI_NIHH     = 0xa504,
125    RI_NIHL     = 0xa505,
126    RI_NILH     = 0xa506,
127    RI_NILL     = 0xa507,
128    RI_OIHH     = 0xa508,
129    RI_OIHL     = 0xa509,
130    RI_OILH     = 0xa50a,
131    RI_OILL     = 0xa50b,
132    RI_TMLL     = 0xa701,
133
134    RIEb_CGRJ    = 0xec64,
135    RIEb_CLGRJ   = 0xec65,
136    RIEb_CLRJ    = 0xec77,
137    RIEb_CRJ     = 0xec76,
138
139    RIEc_CGIJ    = 0xec7c,
140    RIEc_CIJ     = 0xec7e,
141    RIEc_CLGIJ   = 0xec7d,
142    RIEc_CLIJ    = 0xec7f,
143
144    RIEf_RISBG   = 0xec55,
145
146    RIEg_LOCGHI  = 0xec46,
147
148    RRE_AGR     = 0xb908,
149    RRE_ALGR    = 0xb90a,
150    RRE_ALCR    = 0xb998,
151    RRE_ALCGR   = 0xb988,
152    RRE_CGR     = 0xb920,
153    RRE_CLGR    = 0xb921,
154    RRE_DLGR    = 0xb987,
155    RRE_DLR     = 0xb997,
156    RRE_DSGFR   = 0xb91d,
157    RRE_DSGR    = 0xb90d,
158    RRE_FLOGR   = 0xb983,
159    RRE_LGBR    = 0xb906,
160    RRE_LCGR    = 0xb903,
161    RRE_LGFR    = 0xb914,
162    RRE_LGHR    = 0xb907,
163    RRE_LGR     = 0xb904,
164    RRE_LLGCR   = 0xb984,
165    RRE_LLGFR   = 0xb916,
166    RRE_LLGHR   = 0xb985,
167    RRE_LRVR    = 0xb91f,
168    RRE_LRVGR   = 0xb90f,
169    RRE_LTGR    = 0xb902,
170    RRE_MLGR    = 0xb986,
171    RRE_MSGR    = 0xb90c,
172    RRE_MSR     = 0xb252,
173    RRE_NGR     = 0xb980,
174    RRE_OGR     = 0xb981,
175    RRE_SGR     = 0xb909,
176    RRE_SLGR    = 0xb90b,
177    RRE_SLBR    = 0xb999,
178    RRE_SLBGR   = 0xb989,
179    RRE_XGR     = 0xb982,
180
181    RRFa_MGRK   = 0xb9ec,
182    RRFa_MSRKC  = 0xb9fd,
183    RRFa_MSGRKC = 0xb9ed,
184    RRFa_NCRK   = 0xb9f5,
185    RRFa_NCGRK  = 0xb9e5,
186    RRFa_NNRK   = 0xb974,
187    RRFa_NNGRK  = 0xb964,
188    RRFa_NORK   = 0xb976,
189    RRFa_NOGRK  = 0xb966,
190    RRFa_NRK    = 0xb9f4,
191    RRFa_NGRK   = 0xb9e4,
192    RRFa_NXRK   = 0xb977,
193    RRFa_NXGRK  = 0xb967,
194    RRFa_OCRK   = 0xb975,
195    RRFa_OCGRK  = 0xb965,
196    RRFa_ORK    = 0xb9f6,
197    RRFa_OGRK   = 0xb9e6,
198    RRFa_SRK    = 0xb9f9,
199    RRFa_SGRK   = 0xb9e9,
200    RRFa_SLRK   = 0xb9fb,
201    RRFa_SLGRK  = 0xb9eb,
202    RRFa_XRK    = 0xb9f7,
203    RRFa_XGRK   = 0xb9e7,
204
205    RRFam_SELGR = 0xb9e3,
206
207    RRFc_LOCR   = 0xb9f2,
208    RRFc_LOCGR  = 0xb9e2,
209    RRFc_POPCNT = 0xb9e1,
210
211    RR_AR       = 0x1a,
212    RR_ALR      = 0x1e,
213    RR_BASR     = 0x0d,
214    RR_BCR      = 0x07,
215    RR_CLR      = 0x15,
216    RR_CR       = 0x19,
217    RR_DR       = 0x1d,
218    RR_LCR      = 0x13,
219    RR_LR       = 0x18,
220    RR_LTR      = 0x12,
221    RR_NR       = 0x14,
222    RR_OR       = 0x16,
223    RR_SR       = 0x1b,
224    RR_SLR      = 0x1f,
225    RR_XR       = 0x17,
226
227    RSY_RLL     = 0xeb1d,
228    RSY_RLLG    = 0xeb1c,
229    RSY_SLLG    = 0xeb0d,
230    RSY_SLLK    = 0xebdf,
231    RSY_SRAG    = 0xeb0a,
232    RSY_SRAK    = 0xebdc,
233    RSY_SRLG    = 0xeb0c,
234    RSY_SRLK    = 0xebde,
235
236    RS_SLL      = 0x89,
237    RS_SRA      = 0x8a,
238    RS_SRL      = 0x88,
239
240    RXY_AG      = 0xe308,
241    RXY_AY      = 0xe35a,
242    RXY_CG      = 0xe320,
243    RXY_CLG     = 0xe321,
244    RXY_CLY     = 0xe355,
245    RXY_CY      = 0xe359,
246    RXY_LAY     = 0xe371,
247    RXY_LB      = 0xe376,
248    RXY_LG      = 0xe304,
249    RXY_LGB     = 0xe377,
250    RXY_LGF     = 0xe314,
251    RXY_LGH     = 0xe315,
252    RXY_LHY     = 0xe378,
253    RXY_LLGC    = 0xe390,
254    RXY_LLGF    = 0xe316,
255    RXY_LLGH    = 0xe391,
256    RXY_LMG     = 0xeb04,
257    RXY_LRV     = 0xe31e,
258    RXY_LRVG    = 0xe30f,
259    RXY_LRVH    = 0xe31f,
260    RXY_LY      = 0xe358,
261    RXY_NG      = 0xe380,
262    RXY_OG      = 0xe381,
263    RXY_STCY    = 0xe372,
264    RXY_STG     = 0xe324,
265    RXY_STHY    = 0xe370,
266    RXY_STMG    = 0xeb24,
267    RXY_STRV    = 0xe33e,
268    RXY_STRVG   = 0xe32f,
269    RXY_STRVH   = 0xe33f,
270    RXY_STY     = 0xe350,
271    RXY_XG      = 0xe382,
272
273    RX_A        = 0x5a,
274    RX_C        = 0x59,
275    RX_L        = 0x58,
276    RX_LA       = 0x41,
277    RX_LH       = 0x48,
278    RX_ST       = 0x50,
279    RX_STC      = 0x42,
280    RX_STH      = 0x40,
281
282    VRIa_VGBM   = 0xe744,
283    VRIa_VREPI  = 0xe745,
284    VRIb_VGM    = 0xe746,
285    VRIc_VREP   = 0xe74d,
286
287    VRRa_VLC    = 0xe7de,
288    VRRa_VLP    = 0xe7df,
289    VRRa_VLR    = 0xe756,
290    VRRc_VA     = 0xe7f3,
291    VRRc_VCEQ   = 0xe7f8,   /* we leave the m5 cs field 0 */
292    VRRc_VCH    = 0xe7fb,   /* " */
293    VRRc_VCHL   = 0xe7f9,   /* " */
294    VRRc_VERLLV = 0xe773,
295    VRRc_VESLV  = 0xe770,
296    VRRc_VESRAV = 0xe77a,
297    VRRc_VESRLV = 0xe778,
298    VRRc_VML    = 0xe7a2,
299    VRRc_VMN    = 0xe7fe,
300    VRRc_VMNL   = 0xe7fc,
301    VRRc_VMX    = 0xe7ff,
302    VRRc_VMXL   = 0xe7fd,
303    VRRc_VN     = 0xe768,
304    VRRc_VNC    = 0xe769,
305    VRRc_VNN    = 0xe76e,
306    VRRc_VNO    = 0xe76b,
307    VRRc_VNX    = 0xe76c,
308    VRRc_VO     = 0xe76a,
309    VRRc_VOC    = 0xe76f,
310    VRRc_VPKS   = 0xe797,   /* we leave the m5 cs field 0 */
311    VRRc_VS     = 0xe7f7,
312    VRRa_VUPH   = 0xe7d7,
313    VRRa_VUPL   = 0xe7d6,
314    VRRc_VX     = 0xe76d,
315    VRRe_VSEL   = 0xe78d,
316    VRRf_VLVGP  = 0xe762,
317
318    VRSa_VERLL  = 0xe733,
319    VRSa_VESL   = 0xe730,
320    VRSa_VESRA  = 0xe73a,
321    VRSa_VESRL  = 0xe738,
322    VRSb_VLVG   = 0xe722,
323    VRSc_VLGV   = 0xe721,
324
325    VRX_VL      = 0xe706,
326    VRX_VLLEZ   = 0xe704,
327    VRX_VLREP   = 0xe705,
328    VRX_VST     = 0xe70e,
329    VRX_VSTEF   = 0xe70b,
330    VRX_VSTEG   = 0xe70a,
331
332    NOP         = 0x0707,
333} S390Opcode;
334
335#ifdef CONFIG_DEBUG_TCG
336static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
337    "%r0",  "%r1",  "%r2",  "%r3",  "%r4",  "%r5",  "%r6",  "%r7",
338    "%r8",  "%r9",  "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
339    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
340    "%v0",  "%v1",  "%v2",  "%v3",  "%v4",  "%v5",  "%v6",  "%v7",
341    "%v8",  "%v9",  "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
342    "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
343    "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
344};
345#endif
346
347/* Since R6 is a potential argument register, choose it last of the
348   call-saved registers.  Likewise prefer the call-clobbered registers
349   in reverse order to maximize the chance of avoiding the arguments.  */
350static const int tcg_target_reg_alloc_order[] = {
351    /* Call saved registers.  */
352    TCG_REG_R13,
353    TCG_REG_R12,
354    TCG_REG_R11,
355    TCG_REG_R10,
356    TCG_REG_R9,
357    TCG_REG_R8,
358    TCG_REG_R7,
359    TCG_REG_R6,
360    /* Call clobbered registers.  */
361    TCG_REG_R14,
362    TCG_REG_R0,
363    TCG_REG_R1,
364    /* Argument registers, in reverse order of allocation.  */
365    TCG_REG_R5,
366    TCG_REG_R4,
367    TCG_REG_R3,
368    TCG_REG_R2,
369
370    /* V8-V15 are call saved, and omitted. */
371    TCG_REG_V0,
372    TCG_REG_V1,
373    TCG_REG_V2,
374    TCG_REG_V3,
375    TCG_REG_V4,
376    TCG_REG_V5,
377    TCG_REG_V6,
378    TCG_REG_V7,
379    TCG_REG_V16,
380    TCG_REG_V17,
381    TCG_REG_V18,
382    TCG_REG_V19,
383    TCG_REG_V20,
384    TCG_REG_V21,
385    TCG_REG_V22,
386    TCG_REG_V23,
387    TCG_REG_V24,
388    TCG_REG_V25,
389    TCG_REG_V26,
390    TCG_REG_V27,
391    TCG_REG_V28,
392    TCG_REG_V29,
393    TCG_REG_V30,
394    TCG_REG_V31,
395};
396
397static const int tcg_target_call_iarg_regs[] = {
398    TCG_REG_R2,
399    TCG_REG_R3,
400    TCG_REG_R4,
401    TCG_REG_R5,
402    TCG_REG_R6,
403};
404
405static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
406{
407    tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
408    tcg_debug_assert(slot == 0);
409    return TCG_REG_R2;
410}
411
412#define S390_CC_EQ      8
413#define S390_CC_LT      4
414#define S390_CC_GT      2
415#define S390_CC_OV      1
416#define S390_CC_NE      (S390_CC_LT | S390_CC_GT)
417#define S390_CC_LE      (S390_CC_LT | S390_CC_EQ)
418#define S390_CC_GE      (S390_CC_GT | S390_CC_EQ)
419#define S390_CC_NEVER   0
420#define S390_CC_ALWAYS  15
421
422/* Condition codes that result from a COMPARE and COMPARE LOGICAL.  */
423static const uint8_t tcg_cond_to_s390_cond[] = {
424    [TCG_COND_EQ]  = S390_CC_EQ,
425    [TCG_COND_NE]  = S390_CC_NE,
426    [TCG_COND_LT]  = S390_CC_LT,
427    [TCG_COND_LE]  = S390_CC_LE,
428    [TCG_COND_GT]  = S390_CC_GT,
429    [TCG_COND_GE]  = S390_CC_GE,
430    [TCG_COND_LTU] = S390_CC_LT,
431    [TCG_COND_LEU] = S390_CC_LE,
432    [TCG_COND_GTU] = S390_CC_GT,
433    [TCG_COND_GEU] = S390_CC_GE,
434};
435
436/* Condition codes that result from a LOAD AND TEST.  Here, we have no
437   unsigned instruction variation, however since the test is vs zero we
438   can re-map the outcomes appropriately.  */
439static const uint8_t tcg_cond_to_ltr_cond[] = {
440    [TCG_COND_EQ]  = S390_CC_EQ,
441    [TCG_COND_NE]  = S390_CC_NE,
442    [TCG_COND_LT]  = S390_CC_LT,
443    [TCG_COND_LE]  = S390_CC_LE,
444    [TCG_COND_GT]  = S390_CC_GT,
445    [TCG_COND_GE]  = S390_CC_GE,
446    [TCG_COND_LTU] = S390_CC_NEVER,
447    [TCG_COND_LEU] = S390_CC_EQ,
448    [TCG_COND_GTU] = S390_CC_NE,
449    [TCG_COND_GEU] = S390_CC_ALWAYS,
450};
451
452#ifdef CONFIG_SOFTMMU
453static void * const qemu_ld_helpers[(MO_SSIZE | MO_BSWAP) + 1] = {
454    [MO_UB]   = helper_ret_ldub_mmu,
455    [MO_SB]   = helper_ret_ldsb_mmu,
456    [MO_LEUW] = helper_le_lduw_mmu,
457    [MO_LESW] = helper_le_ldsw_mmu,
458    [MO_LEUL] = helper_le_ldul_mmu,
459    [MO_LESL] = helper_le_ldsl_mmu,
460    [MO_LEUQ] = helper_le_ldq_mmu,
461    [MO_BEUW] = helper_be_lduw_mmu,
462    [MO_BESW] = helper_be_ldsw_mmu,
463    [MO_BEUL] = helper_be_ldul_mmu,
464    [MO_BESL] = helper_be_ldsl_mmu,
465    [MO_BEUQ] = helper_be_ldq_mmu,
466};
467
468static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
469    [MO_UB]   = helper_ret_stb_mmu,
470    [MO_LEUW] = helper_le_stw_mmu,
471    [MO_LEUL] = helper_le_stl_mmu,
472    [MO_LEUQ] = helper_le_stq_mmu,
473    [MO_BEUW] = helper_be_stw_mmu,
474    [MO_BEUL] = helper_be_stl_mmu,
475    [MO_BEUQ] = helper_be_stq_mmu,
476};
477#endif
478
479static const tcg_insn_unit *tb_ret_addr;
480uint64_t s390_facilities[3];
481
482static inline bool is_general_reg(TCGReg r)
483{
484    return r <= TCG_REG_R15;
485}
486
487static inline bool is_vector_reg(TCGReg r)
488{
489    return r >= TCG_REG_V0 && r <= TCG_REG_V31;
490}
491
492static bool patch_reloc(tcg_insn_unit *src_rw, int type,
493                        intptr_t value, intptr_t addend)
494{
495    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
496    intptr_t pcrel2;
497    uint32_t old;
498
499    value += addend;
500    pcrel2 = (tcg_insn_unit *)value - src_rx;
501
502    switch (type) {
503    case R_390_PC16DBL:
504        if (pcrel2 == (int16_t)pcrel2) {
505            tcg_patch16(src_rw, pcrel2);
506            return true;
507        }
508        break;
509    case R_390_PC32DBL:
510        if (pcrel2 == (int32_t)pcrel2) {
511            tcg_patch32(src_rw, pcrel2);
512            return true;
513        }
514        break;
515    case R_390_20:
516        if (value == sextract64(value, 0, 20)) {
517            old = *(uint32_t *)src_rw & 0xf00000ff;
518            old |= ((value & 0xfff) << 16) | ((value & 0xff000) >> 4);
519            tcg_patch32(src_rw, old);
520            return true;
521        }
522        break;
523    default:
524        g_assert_not_reached();
525    }
526    return false;
527}
528
529static int is_const_p16(uint64_t val)
530{
531    for (int i = 0; i < 4; ++i) {
532        uint64_t mask = 0xffffull << (i * 16);
533        if ((val & ~mask) == 0) {
534            return i;
535        }
536    }
537    return -1;
538}
539
540static int is_const_p32(uint64_t val)
541{
542    if ((val & 0xffffffff00000000ull) == 0) {
543        return 0;
544    }
545    if ((val & 0x00000000ffffffffull) == 0) {
546        return 1;
547    }
548    return -1;
549}
550
551/*
552 * Accept bit patterns like these:
553 *  0....01....1
554 *  1....10....0
555 *  1..10..01..1
556 *  0..01..10..0
557 * Copied from gcc sources.
558 */
559static bool risbg_mask(uint64_t c)
560{
561    uint64_t lsb;
562    /* We don't change the number of transitions by inverting,
563       so make sure we start with the LSB zero.  */
564    if (c & 1) {
565        c = ~c;
566    }
567    /* Reject all zeros or all ones.  */
568    if (c == 0) {
569        return false;
570    }
571    /* Find the first transition.  */
572    lsb = c & -c;
573    /* Invert to look for a second transition.  */
574    c = ~c;
575    /* Erase the first transition.  */
576    c &= -lsb;
577    /* Find the second transition, if any.  */
578    lsb = c & -c;
579    /* Match if all the bits are 1's, or if c is zero.  */
580    return c == -lsb;
581}
582
583/* Test if a constant matches the constraint. */
584static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
585{
586    if (ct & TCG_CT_CONST) {
587        return 1;
588    }
589
590    if (type == TCG_TYPE_I32) {
591        val = (int32_t)val;
592    }
593
594    /* The following are mutually exclusive.  */
595    if (ct & TCG_CT_CONST_S16) {
596        return val == (int16_t)val;
597    } else if (ct & TCG_CT_CONST_S32) {
598        return val == (int32_t)val;
599    } else if (ct & TCG_CT_CONST_S33) {
600        return val >= -0xffffffffll && val <= 0xffffffffll;
601    } else if (ct & TCG_CT_CONST_ZERO) {
602        return val == 0;
603    }
604
605    if (ct & TCG_CT_CONST_INV) {
606        val = ~val;
607    }
608    /*
609     * Note that is_const_p16 is a subset of is_const_p32,
610     * so we don't need both constraints.
611     */
612    if ((ct & TCG_CT_CONST_P32) && is_const_p32(val) >= 0) {
613        return true;
614    }
615    if ((ct & TCG_CT_CONST_INVRISBG) && risbg_mask(~val)) {
616        return true;
617    }
618
619    return 0;
620}
621
622/* Emit instructions according to the given instruction format.  */
623
624static void tcg_out_insn_RR(TCGContext *s, S390Opcode op, TCGReg r1, TCGReg r2)
625{
626    tcg_out16(s, (op << 8) | (r1 << 4) | r2);
627}
628
629static void tcg_out_insn_RRE(TCGContext *s, S390Opcode op,
630                             TCGReg r1, TCGReg r2)
631{
632    tcg_out32(s, (op << 16) | (r1 << 4) | r2);
633}
634
635/* RRF-a without the m4 field */
636static void tcg_out_insn_RRFa(TCGContext *s, S390Opcode op,
637                              TCGReg r1, TCGReg r2, TCGReg r3)
638{
639    tcg_out32(s, (op << 16) | (r3 << 12) | (r1 << 4) | r2);
640}
641
642/* RRF-a with the m4 field */
643static void tcg_out_insn_RRFam(TCGContext *s, S390Opcode op,
644                               TCGReg r1, TCGReg r2, TCGReg r3, int m4)
645{
646    tcg_out32(s, (op << 16) | (r3 << 12) | (m4 << 8) | (r1 << 4) | r2);
647}
648
649static void tcg_out_insn_RRFc(TCGContext *s, S390Opcode op,
650                              TCGReg r1, TCGReg r2, int m3)
651{
652    tcg_out32(s, (op << 16) | (m3 << 12) | (r1 << 4) | r2);
653}
654
655static void tcg_out_insn_RI(TCGContext *s, S390Opcode op, TCGReg r1, int i2)
656{
657    tcg_out32(s, (op << 16) | (r1 << 20) | (i2 & 0xffff));
658}
659
660static void tcg_out_insn_RIEg(TCGContext *s, S390Opcode op, TCGReg r1,
661                             int i2, int m3)
662{
663    tcg_out16(s, (op & 0xff00) | (r1 << 4) | m3);
664    tcg_out32(s, (i2 << 16) | (op & 0xff));
665}
666
667static void tcg_out_insn_RIL(TCGContext *s, S390Opcode op, TCGReg r1, int i2)
668{
669    tcg_out16(s, op | (r1 << 4));
670    tcg_out32(s, i2);
671}
672
673static void tcg_out_insn_RS(TCGContext *s, S390Opcode op, TCGReg r1,
674                            TCGReg b2, TCGReg r3, int disp)
675{
676    tcg_out32(s, (op << 24) | (r1 << 20) | (r3 << 16) | (b2 << 12)
677              | (disp & 0xfff));
678}
679
680static void tcg_out_insn_RSY(TCGContext *s, S390Opcode op, TCGReg r1,
681                             TCGReg b2, TCGReg r3, int disp)
682{
683    tcg_out16(s, (op & 0xff00) | (r1 << 4) | r3);
684    tcg_out32(s, (op & 0xff) | (b2 << 28)
685              | ((disp & 0xfff) << 16) | ((disp & 0xff000) >> 4));
686}
687
688#define tcg_out_insn_RX   tcg_out_insn_RS
689#define tcg_out_insn_RXY  tcg_out_insn_RSY
690
691static int RXB(TCGReg v1, TCGReg v2, TCGReg v3, TCGReg v4)
692{
693    /*
694     * Shift bit 4 of each regno to its corresponding bit of RXB.
695     * RXB itself begins at bit 8 of the instruction so 8 - 4 = 4
696     * is the left-shift of the 4th operand.
697     */
698    return ((v1 & 0x10) << (4 + 3))
699         | ((v2 & 0x10) << (4 + 2))
700         | ((v3 & 0x10) << (4 + 1))
701         | ((v4 & 0x10) << (4 + 0));
702}
703
704static void tcg_out_insn_VRIa(TCGContext *s, S390Opcode op,
705                              TCGReg v1, uint16_t i2, int m3)
706{
707    tcg_debug_assert(is_vector_reg(v1));
708    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4));
709    tcg_out16(s, i2);
710    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m3 << 12));
711}
712
713static void tcg_out_insn_VRIb(TCGContext *s, S390Opcode op,
714                              TCGReg v1, uint8_t i2, uint8_t i3, int m4)
715{
716    tcg_debug_assert(is_vector_reg(v1));
717    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4));
718    tcg_out16(s, (i2 << 8) | (i3 & 0xff));
719    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m4 << 12));
720}
721
722static void tcg_out_insn_VRIc(TCGContext *s, S390Opcode op,
723                              TCGReg v1, uint16_t i2, TCGReg v3, int m4)
724{
725    tcg_debug_assert(is_vector_reg(v1));
726    tcg_debug_assert(is_vector_reg(v3));
727    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v3 & 0xf));
728    tcg_out16(s, i2);
729    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, v3, 0) | (m4 << 12));
730}
731
732static void tcg_out_insn_VRRa(TCGContext *s, S390Opcode op,
733                              TCGReg v1, TCGReg v2, int m3)
734{
735    tcg_debug_assert(is_vector_reg(v1));
736    tcg_debug_assert(is_vector_reg(v2));
737    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
738    tcg_out32(s, (op & 0x00ff) | RXB(v1, v2, 0, 0) | (m3 << 12));
739}
740
741static void tcg_out_insn_VRRc(TCGContext *s, S390Opcode op,
742                              TCGReg v1, TCGReg v2, TCGReg v3, int m4)
743{
744    tcg_debug_assert(is_vector_reg(v1));
745    tcg_debug_assert(is_vector_reg(v2));
746    tcg_debug_assert(is_vector_reg(v3));
747    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
748    tcg_out16(s, v3 << 12);
749    tcg_out16(s, (op & 0x00ff) | RXB(v1, v2, v3, 0) | (m4 << 12));
750}
751
752static void tcg_out_insn_VRRe(TCGContext *s, S390Opcode op,
753                              TCGReg v1, TCGReg v2, TCGReg v3, TCGReg v4)
754{
755    tcg_debug_assert(is_vector_reg(v1));
756    tcg_debug_assert(is_vector_reg(v2));
757    tcg_debug_assert(is_vector_reg(v3));
758    tcg_debug_assert(is_vector_reg(v4));
759    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
760    tcg_out16(s, v3 << 12);
761    tcg_out16(s, (op & 0x00ff) | RXB(v1, v2, v3, v4) | (v4 << 12));
762}
763
764static void tcg_out_insn_VRRf(TCGContext *s, S390Opcode op,
765                              TCGReg v1, TCGReg r2, TCGReg r3)
766{
767    tcg_debug_assert(is_vector_reg(v1));
768    tcg_debug_assert(is_general_reg(r2));
769    tcg_debug_assert(is_general_reg(r3));
770    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | r2);
771    tcg_out16(s, r3 << 12);
772    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0));
773}
774
775static void tcg_out_insn_VRSa(TCGContext *s, S390Opcode op, TCGReg v1,
776                              intptr_t d2, TCGReg b2, TCGReg v3, int m4)
777{
778    tcg_debug_assert(is_vector_reg(v1));
779    tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
780    tcg_debug_assert(is_general_reg(b2));
781    tcg_debug_assert(is_vector_reg(v3));
782    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v3 & 0xf));
783    tcg_out16(s, b2 << 12 | d2);
784    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, v3, 0) | (m4 << 12));
785}
786
787static void tcg_out_insn_VRSb(TCGContext *s, S390Opcode op, TCGReg v1,
788                              intptr_t d2, TCGReg b2, TCGReg r3, int m4)
789{
790    tcg_debug_assert(is_vector_reg(v1));
791    tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
792    tcg_debug_assert(is_general_reg(b2));
793    tcg_debug_assert(is_general_reg(r3));
794    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | r3);
795    tcg_out16(s, b2 << 12 | d2);
796    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m4 << 12));
797}
798
799static void tcg_out_insn_VRSc(TCGContext *s, S390Opcode op, TCGReg r1,
800                              intptr_t d2, TCGReg b2, TCGReg v3, int m4)
801{
802    tcg_debug_assert(is_general_reg(r1));
803    tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
804    tcg_debug_assert(is_general_reg(b2));
805    tcg_debug_assert(is_vector_reg(v3));
806    tcg_out16(s, (op & 0xff00) | (r1 << 4) | (v3 & 0xf));
807    tcg_out16(s, b2 << 12 | d2);
808    tcg_out16(s, (op & 0x00ff) | RXB(0, 0, v3, 0) | (m4 << 12));
809}
810
811static void tcg_out_insn_VRX(TCGContext *s, S390Opcode op, TCGReg v1,
812                             TCGReg b2, TCGReg x2, intptr_t d2, int m3)
813{
814    tcg_debug_assert(is_vector_reg(v1));
815    tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
816    tcg_debug_assert(is_general_reg(x2));
817    tcg_debug_assert(is_general_reg(b2));
818    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | x2);
819    tcg_out16(s, (b2 << 12) | d2);
820    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m3 << 12));
821}
822
823/* Emit an opcode with "type-checking" of the format.  */
824#define tcg_out_insn(S, FMT, OP, ...) \
825    glue(tcg_out_insn_,FMT)(S, glue(glue(FMT,_),OP), ## __VA_ARGS__)
826
827
828/* emit 64-bit shifts */
829static void tcg_out_sh64(TCGContext* s, S390Opcode op, TCGReg dest,
830                         TCGReg src, TCGReg sh_reg, int sh_imm)
831{
832    tcg_out_insn_RSY(s, op, dest, sh_reg, src, sh_imm);
833}
834
835/* emit 32-bit shifts */
836static void tcg_out_sh32(TCGContext* s, S390Opcode op, TCGReg dest,
837                         TCGReg sh_reg, int sh_imm)
838{
839    tcg_out_insn_RS(s, op, dest, sh_reg, 0, sh_imm);
840}
841
842static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
843{
844    if (src == dst) {
845        return true;
846    }
847    switch (type) {
848    case TCG_TYPE_I32:
849        if (likely(is_general_reg(dst) && is_general_reg(src))) {
850            tcg_out_insn(s, RR, LR, dst, src);
851            break;
852        }
853        /* fallthru */
854
855    case TCG_TYPE_I64:
856        if (likely(is_general_reg(dst))) {
857            if (likely(is_general_reg(src))) {
858                tcg_out_insn(s, RRE, LGR, dst, src);
859            } else {
860                tcg_out_insn(s, VRSc, VLGV, dst, 0, 0, src, 3);
861            }
862            break;
863        } else if (is_general_reg(src)) {
864            tcg_out_insn(s, VRSb, VLVG, dst, 0, 0, src, 3);
865            break;
866        }
867        /* fallthru */
868
869    case TCG_TYPE_V64:
870    case TCG_TYPE_V128:
871        tcg_out_insn(s, VRRa, VLR, dst, src, 0);
872        break;
873
874    default:
875        g_assert_not_reached();
876    }
877    return true;
878}
879
880static const S390Opcode li_insns[4] = {
881    RI_LLILL, RI_LLILH, RI_LLIHL, RI_LLIHH
882};
883static const S390Opcode oi_insns[4] = {
884    RI_OILL, RI_OILH, RI_OIHL, RI_OIHH
885};
886static const S390Opcode lif_insns[2] = {
887    RIL_LLILF, RIL_LLIHF,
888};
889
890/* load a register with an immediate value */
891static void tcg_out_movi(TCGContext *s, TCGType type,
892                         TCGReg ret, tcg_target_long sval)
893{
894    tcg_target_ulong uval = sval;
895    ptrdiff_t pc_off;
896    int i;
897
898    if (type == TCG_TYPE_I32) {
899        uval = (uint32_t)sval;
900        sval = (int32_t)sval;
901    }
902
903    /* Try all 32-bit insns that can load it in one go.  */
904    if (sval >= -0x8000 && sval < 0x8000) {
905        tcg_out_insn(s, RI, LGHI, ret, sval);
906        return;
907    }
908
909    i = is_const_p16(uval);
910    if (i >= 0) {
911        tcg_out_insn_RI(s, li_insns[i], ret, uval >> (i * 16));
912        return;
913    }
914
915    /* Try all 48-bit insns that can load it in one go.  */
916    if (sval == (int32_t)sval) {
917        tcg_out_insn(s, RIL, LGFI, ret, sval);
918        return;
919    }
920
921    i = is_const_p32(uval);
922    if (i >= 0) {
923        tcg_out_insn_RIL(s, lif_insns[i], ret, uval >> (i * 32));
924        return;
925    }
926
927    /* Try for PC-relative address load.  For odd addresses, add one. */
928    pc_off = tcg_pcrel_diff(s, (void *)sval) >> 1;
929    if (pc_off == (int32_t)pc_off) {
930        tcg_out_insn(s, RIL, LARL, ret, pc_off);
931        if (sval & 1) {
932            tcg_out_insn(s, RI, AGHI, ret, 1);
933        }
934        return;
935    }
936
937    /* Otherwise, load it by parts. */
938    i = is_const_p16((uint32_t)uval);
939    if (i >= 0) {
940        tcg_out_insn_RI(s, li_insns[i], ret, uval >> (i * 16));
941    } else {
942        tcg_out_insn(s, RIL, LLILF, ret, uval);
943    }
944    uval >>= 32;
945    i = is_const_p16(uval);
946    if (i >= 0) {
947        tcg_out_insn_RI(s, oi_insns[i + 2], ret, uval >> (i * 16));
948    } else {
949        tcg_out_insn(s, RIL, OIHF, ret, uval);
950    }
951}
952
953/* Emit a load/store type instruction.  Inputs are:
954   DATA:     The register to be loaded or stored.
955   BASE+OFS: The effective address.
956   OPC_RX:   If the operation has an RX format opcode (e.g. STC), otherwise 0.
957   OPC_RXY:  The RXY format opcode for the operation (e.g. STCY).  */
958
959static void tcg_out_mem(TCGContext *s, S390Opcode opc_rx, S390Opcode opc_rxy,
960                        TCGReg data, TCGReg base, TCGReg index,
961                        tcg_target_long ofs)
962{
963    if (ofs < -0x80000 || ofs >= 0x80000) {
964        /* Combine the low 20 bits of the offset with the actual load insn;
965           the high 44 bits must come from an immediate load.  */
966        tcg_target_long low = ((ofs & 0xfffff) ^ 0x80000) - 0x80000;
967        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs - low);
968        ofs = low;
969
970        /* If we were already given an index register, add it in.  */
971        if (index != TCG_REG_NONE) {
972            tcg_out_insn(s, RRE, AGR, TCG_TMP0, index);
973        }
974        index = TCG_TMP0;
975    }
976
977    if (opc_rx && ofs >= 0 && ofs < 0x1000) {
978        tcg_out_insn_RX(s, opc_rx, data, base, index, ofs);
979    } else {
980        tcg_out_insn_RXY(s, opc_rxy, data, base, index, ofs);
981    }
982}
983
984static void tcg_out_vrx_mem(TCGContext *s, S390Opcode opc_vrx,
985                            TCGReg data, TCGReg base, TCGReg index,
986                            tcg_target_long ofs, int m3)
987{
988    if (ofs < 0 || ofs >= 0x1000) {
989        if (ofs >= -0x80000 && ofs < 0x80000) {
990            tcg_out_insn(s, RXY, LAY, TCG_TMP0, base, index, ofs);
991            base = TCG_TMP0;
992            index = TCG_REG_NONE;
993            ofs = 0;
994        } else {
995            tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs);
996            if (index != TCG_REG_NONE) {
997                tcg_out_insn(s, RRE, AGR, TCG_TMP0, index);
998            }
999            index = TCG_TMP0;
1000            ofs = 0;
1001        }
1002    }
1003    tcg_out_insn_VRX(s, opc_vrx, data, base, index, ofs, m3);
1004}
1005
1006/* load data without address translation or endianness conversion */
1007static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg data,
1008                       TCGReg base, intptr_t ofs)
1009{
1010    switch (type) {
1011    case TCG_TYPE_I32:
1012        if (likely(is_general_reg(data))) {
1013            tcg_out_mem(s, RX_L, RXY_LY, data, base, TCG_REG_NONE, ofs);
1014            break;
1015        }
1016        tcg_out_vrx_mem(s, VRX_VLLEZ, data, base, TCG_REG_NONE, ofs, MO_32);
1017        break;
1018
1019    case TCG_TYPE_I64:
1020        if (likely(is_general_reg(data))) {
1021            tcg_out_mem(s, 0, RXY_LG, data, base, TCG_REG_NONE, ofs);
1022            break;
1023        }
1024        /* fallthru */
1025
1026    case TCG_TYPE_V64:
1027        tcg_out_vrx_mem(s, VRX_VLLEZ, data, base, TCG_REG_NONE, ofs, MO_64);
1028        break;
1029
1030    case TCG_TYPE_V128:
1031        /* Hint quadword aligned.  */
1032        tcg_out_vrx_mem(s, VRX_VL, data, base, TCG_REG_NONE, ofs, 4);
1033        break;
1034
1035    default:
1036        g_assert_not_reached();
1037    }
1038}
1039
1040static void tcg_out_st(TCGContext *s, TCGType type, TCGReg data,
1041                       TCGReg base, intptr_t ofs)
1042{
1043    switch (type) {
1044    case TCG_TYPE_I32:
1045        if (likely(is_general_reg(data))) {
1046            tcg_out_mem(s, RX_ST, RXY_STY, data, base, TCG_REG_NONE, ofs);
1047        } else {
1048            tcg_out_vrx_mem(s, VRX_VSTEF, data, base, TCG_REG_NONE, ofs, 1);
1049        }
1050        break;
1051
1052    case TCG_TYPE_I64:
1053        if (likely(is_general_reg(data))) {
1054            tcg_out_mem(s, 0, RXY_STG, data, base, TCG_REG_NONE, ofs);
1055            break;
1056        }
1057        /* fallthru */
1058
1059    case TCG_TYPE_V64:
1060        tcg_out_vrx_mem(s, VRX_VSTEG, data, base, TCG_REG_NONE, ofs, 0);
1061        break;
1062
1063    case TCG_TYPE_V128:
1064        /* Hint quadword aligned.  */
1065        tcg_out_vrx_mem(s, VRX_VST, data, base, TCG_REG_NONE, ofs, 4);
1066        break;
1067
1068    default:
1069        g_assert_not_reached();
1070    }
1071}
1072
1073static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1074                               TCGReg base, intptr_t ofs)
1075{
1076    return false;
1077}
1078
1079static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
1080{
1081    return false;
1082}
1083
1084static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
1085                             tcg_target_long imm)
1086{
1087    /* This function is only used for passing structs by reference. */
1088    tcg_out_mem(s, RX_LA, RXY_LAY, rd, rs, TCG_REG_NONE, imm);
1089}
1090
1091static inline void tcg_out_risbg(TCGContext *s, TCGReg dest, TCGReg src,
1092                                 int msb, int lsb, int ofs, int z)
1093{
1094    /* Format RIE-f */
1095    tcg_out16(s, (RIEf_RISBG & 0xff00) | (dest << 4) | src);
1096    tcg_out16(s, (msb << 8) | (z << 7) | lsb);
1097    tcg_out16(s, (ofs << 8) | (RIEf_RISBG & 0xff));
1098}
1099
1100static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1101{
1102    tcg_out_insn(s, RRE, LGBR, dest, src);
1103}
1104
1105static void tcg_out_ext8u(TCGContext *s, TCGReg dest, TCGReg src)
1106{
1107    tcg_out_insn(s, RRE, LLGCR, dest, src);
1108}
1109
1110static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1111{
1112    tcg_out_insn(s, RRE, LGHR, dest, src);
1113}
1114
1115static void tcg_out_ext16u(TCGContext *s, TCGReg dest, TCGReg src)
1116{
1117    tcg_out_insn(s, RRE, LLGHR, dest, src);
1118}
1119
1120static void tcg_out_ext32s(TCGContext *s, TCGReg dest, TCGReg src)
1121{
1122    tcg_out_insn(s, RRE, LGFR, dest, src);
1123}
1124
1125static void tcg_out_ext32u(TCGContext *s, TCGReg dest, TCGReg src)
1126{
1127    tcg_out_insn(s, RRE, LLGFR, dest, src);
1128}
1129
1130static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg dest, TCGReg src)
1131{
1132    tcg_out_ext32s(s, dest, src);
1133}
1134
1135static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg dest, TCGReg src)
1136{
1137    tcg_out_ext32u(s, dest, src);
1138}
1139
1140static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg dest, TCGReg src)
1141{
1142    tcg_out_mov(s, TCG_TYPE_I32, dest, src);
1143}
1144
1145static void tgen_andi_risbg(TCGContext *s, TCGReg out, TCGReg in, uint64_t val)
1146{
1147    int msb, lsb;
1148    if ((val & 0x8000000000000001ull) == 0x8000000000000001ull) {
1149        /* Achieve wraparound by swapping msb and lsb.  */
1150        msb = 64 - ctz64(~val);
1151        lsb = clz64(~val) - 1;
1152    } else {
1153        msb = clz64(val);
1154        lsb = 63 - ctz64(val);
1155    }
1156    tcg_out_risbg(s, out, in, msb, lsb, 0, 1);
1157}
1158
1159static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
1160{
1161    static const S390Opcode ni_insns[4] = {
1162        RI_NILL, RI_NILH, RI_NIHL, RI_NIHH
1163    };
1164    static const S390Opcode nif_insns[2] = {
1165        RIL_NILF, RIL_NIHF
1166    };
1167    uint64_t valid = (type == TCG_TYPE_I32 ? 0xffffffffull : -1ull);
1168    int i;
1169
1170    /* Look for the zero-extensions.  */
1171    if ((val & valid) == 0xffffffff) {
1172        tcg_out_ext32u(s, dest, dest);
1173        return;
1174    }
1175    if ((val & valid) == 0xff) {
1176        tcg_out_ext8u(s, dest, dest);
1177        return;
1178    }
1179    if ((val & valid) == 0xffff) {
1180        tcg_out_ext16u(s, dest, dest);
1181        return;
1182    }
1183
1184    i = is_const_p16(~val & valid);
1185    if (i >= 0) {
1186        tcg_out_insn_RI(s, ni_insns[i], dest, val >> (i * 16));
1187        return;
1188    }
1189
1190    i = is_const_p32(~val & valid);
1191    tcg_debug_assert(i == 0 || type != TCG_TYPE_I32);
1192    if (i >= 0) {
1193        tcg_out_insn_RIL(s, nif_insns[i], dest, val >> (i * 32));
1194        return;
1195    }
1196
1197    if (risbg_mask(val)) {
1198        tgen_andi_risbg(s, dest, dest, val);
1199        return;
1200    }
1201
1202    g_assert_not_reached();
1203}
1204
1205static void tgen_ori(TCGContext *s, TCGReg dest, uint64_t val)
1206{
1207    static const S390Opcode oif_insns[2] = {
1208        RIL_OILF, RIL_OIHF
1209    };
1210
1211    int i;
1212
1213    i = is_const_p16(val);
1214    if (i >= 0) {
1215        tcg_out_insn_RI(s, oi_insns[i], dest, val >> (i * 16));
1216        return;
1217    }
1218
1219    i = is_const_p32(val);
1220    if (i >= 0) {
1221        tcg_out_insn_RIL(s, oif_insns[i], dest, val >> (i * 32));
1222        return;
1223    }
1224
1225    g_assert_not_reached();
1226}
1227
1228static void tgen_xori(TCGContext *s, TCGReg dest, uint64_t val)
1229{
1230    switch (is_const_p32(val)) {
1231    case 0:
1232        tcg_out_insn(s, RIL, XILF, dest, val);
1233        break;
1234    case 1:
1235        tcg_out_insn(s, RIL, XIHF, dest, val >> 32);
1236        break;
1237    default:
1238        g_assert_not_reached();
1239    }
1240}
1241
1242static int tgen_cmp2(TCGContext *s, TCGType type, TCGCond c, TCGReg r1,
1243                     TCGArg c2, bool c2const, bool need_carry, int *inv_cc)
1244{
1245    bool is_unsigned = is_unsigned_cond(c);
1246    TCGCond inv_c = tcg_invert_cond(c);
1247    S390Opcode op;
1248
1249    if (c2const) {
1250        if (c2 == 0) {
1251            if (!(is_unsigned && need_carry)) {
1252                if (type == TCG_TYPE_I32) {
1253                    tcg_out_insn(s, RR, LTR, r1, r1);
1254                } else {
1255                    tcg_out_insn(s, RRE, LTGR, r1, r1);
1256                }
1257                *inv_cc = tcg_cond_to_ltr_cond[inv_c];
1258                return tcg_cond_to_ltr_cond[c];
1259            }
1260        }
1261
1262        if (!is_unsigned && c2 == (int16_t)c2) {
1263            op = (type == TCG_TYPE_I32 ? RI_CHI : RI_CGHI);
1264            tcg_out_insn_RI(s, op, r1, c2);
1265            goto exit;
1266        }
1267
1268        if (type == TCG_TYPE_I32) {
1269            op = (is_unsigned ? RIL_CLFI : RIL_CFI);
1270            tcg_out_insn_RIL(s, op, r1, c2);
1271            goto exit;
1272        }
1273
1274        /*
1275         * Constraints are for a signed 33-bit operand, which is a
1276         * convenient superset of this signed/unsigned test.
1277         */
1278        if (c2 == (is_unsigned ? (TCGArg)(uint32_t)c2 : (TCGArg)(int32_t)c2)) {
1279            op = (is_unsigned ? RIL_CLGFI : RIL_CGFI);
1280            tcg_out_insn_RIL(s, op, r1, c2);
1281            goto exit;
1282        }
1283
1284        /* Load everything else into a register. */
1285        tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, c2);
1286        c2 = TCG_TMP0;
1287    }
1288
1289    if (type == TCG_TYPE_I32) {
1290        op = (is_unsigned ? RR_CLR : RR_CR);
1291        tcg_out_insn_RR(s, op, r1, c2);
1292    } else {
1293        op = (is_unsigned ? RRE_CLGR : RRE_CGR);
1294        tcg_out_insn_RRE(s, op, r1, c2);
1295    }
1296
1297 exit:
1298    *inv_cc = tcg_cond_to_s390_cond[inv_c];
1299    return tcg_cond_to_s390_cond[c];
1300}
1301
1302static int tgen_cmp(TCGContext *s, TCGType type, TCGCond c, TCGReg r1,
1303                    TCGArg c2, bool c2const, bool need_carry)
1304{
1305    int inv_cc;
1306    return tgen_cmp2(s, type, c, r1, c2, c2const, need_carry, &inv_cc);
1307}
1308
1309static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond,
1310                         TCGReg dest, TCGReg c1, TCGArg c2, int c2const)
1311{
1312    int cc;
1313
1314    /* With LOC2, we can always emit the minimum 3 insns.  */
1315    if (HAVE_FACILITY(LOAD_ON_COND2)) {
1316        /* Emit: d = 0, d = (cc ? 1 : d).  */
1317        cc = tgen_cmp(s, type, cond, c1, c2, c2const, false);
1318        tcg_out_movi(s, TCG_TYPE_I64, dest, 0);
1319        tcg_out_insn(s, RIEg, LOCGHI, dest, 1, cc);
1320        return;
1321    }
1322
1323 restart:
1324    switch (cond) {
1325    case TCG_COND_NE:
1326        /* X != 0 is X > 0.  */
1327        if (c2const && c2 == 0) {
1328            cond = TCG_COND_GTU;
1329        } else {
1330            break;
1331        }
1332        /* fallthru */
1333
1334    case TCG_COND_GTU:
1335    case TCG_COND_GT:
1336        /* The result of a compare has CC=2 for GT and CC=3 unused.
1337           ADD LOGICAL WITH CARRY considers (CC & 2) the carry bit.  */
1338        tgen_cmp(s, type, cond, c1, c2, c2const, true);
1339        tcg_out_movi(s, type, dest, 0);
1340        tcg_out_insn(s, RRE, ALCGR, dest, dest);
1341        return;
1342
1343    case TCG_COND_EQ:
1344        /* X == 0 is X <= 0.  */
1345        if (c2const && c2 == 0) {
1346            cond = TCG_COND_LEU;
1347        } else {
1348            break;
1349        }
1350        /* fallthru */
1351
1352    case TCG_COND_LEU:
1353    case TCG_COND_LE:
1354        /* As above, but we're looking for borrow, or !carry.
1355           The second insn computes d - d - borrow, or -1 for true
1356           and 0 for false.  So we must mask to 1 bit afterward.  */
1357        tgen_cmp(s, type, cond, c1, c2, c2const, true);
1358        tcg_out_insn(s, RRE, SLBGR, dest, dest);
1359        tgen_andi(s, type, dest, 1);
1360        return;
1361
1362    case TCG_COND_GEU:
1363    case TCG_COND_LTU:
1364    case TCG_COND_LT:
1365    case TCG_COND_GE:
1366        /* Swap operands so that we can use LEU/GTU/GT/LE.  */
1367        if (!c2const) {
1368            TCGReg t = c1;
1369            c1 = c2;
1370            c2 = t;
1371            cond = tcg_swap_cond(cond);
1372            goto restart;
1373        }
1374        break;
1375
1376    default:
1377        g_assert_not_reached();
1378    }
1379
1380    cc = tgen_cmp(s, type, cond, c1, c2, c2const, false);
1381    /* Emit: d = 0, t = 1, d = (cc ? t : d).  */
1382    tcg_out_movi(s, TCG_TYPE_I64, dest, 0);
1383    tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 1);
1384    tcg_out_insn(s, RRFc, LOCGR, dest, TCG_TMP0, cc);
1385}
1386
1387static void tgen_movcond_int(TCGContext *s, TCGType type, TCGReg dest,
1388                             TCGArg v3, int v3const, TCGReg v4,
1389                             int cc, int inv_cc)
1390{
1391    TCGReg src;
1392
1393    if (v3const) {
1394        if (dest == v4) {
1395            if (HAVE_FACILITY(LOAD_ON_COND2)) {
1396                /* Emit: if (cc) dest = v3. */
1397                tcg_out_insn(s, RIEg, LOCGHI, dest, v3, cc);
1398                return;
1399            }
1400            tcg_out_insn(s, RI, LGHI, TCG_TMP0, v3);
1401            src = TCG_TMP0;
1402        } else {
1403            /* LGR+LOCGHI is larger than LGHI+LOCGR. */
1404            tcg_out_insn(s, RI, LGHI, dest, v3);
1405            cc = inv_cc;
1406            src = v4;
1407        }
1408    } else {
1409        if (HAVE_FACILITY(MISC_INSN_EXT3)) {
1410            /* Emit: dest = cc ? v3 : v4. */
1411            tcg_out_insn(s, RRFam, SELGR, dest, v3, v4, cc);
1412            return;
1413        }
1414        if (dest == v4) {
1415            src = v3;
1416        } else {
1417            tcg_out_mov(s, type, dest, v3);
1418            cc = inv_cc;
1419            src = v4;
1420        }
1421    }
1422
1423    /* Emit: if (cc) dest = src. */
1424    tcg_out_insn(s, RRFc, LOCGR, dest, src, cc);
1425}
1426
1427static void tgen_movcond(TCGContext *s, TCGType type, TCGCond c, TCGReg dest,
1428                         TCGReg c1, TCGArg c2, int c2const,
1429                         TCGArg v3, int v3const, TCGReg v4)
1430{
1431    int cc, inv_cc;
1432
1433    cc = tgen_cmp2(s, type, c, c1, c2, c2const, false, &inv_cc);
1434    tgen_movcond_int(s, type, dest, v3, v3const, v4, cc, inv_cc);
1435}
1436
1437static void tgen_clz(TCGContext *s, TCGReg dest, TCGReg a1,
1438                     TCGArg a2, int a2const)
1439{
1440    /* Since this sets both R and R+1, we have no choice but to store the
1441       result into R0, allowing R1 == TCG_TMP0 to be clobbered as well.  */
1442    QEMU_BUILD_BUG_ON(TCG_TMP0 != TCG_REG_R1);
1443    tcg_out_insn(s, RRE, FLOGR, TCG_REG_R0, a1);
1444
1445    if (a2const && a2 == 64) {
1446        tcg_out_mov(s, TCG_TYPE_I64, dest, TCG_REG_R0);
1447        return;
1448    }
1449
1450    /*
1451     * Conditions from FLOGR are:
1452     *   2 -> one bit found
1453     *   8 -> no one bit found
1454     */
1455    tgen_movcond_int(s, TCG_TYPE_I64, dest, a2, a2const, TCG_REG_R0, 8, 2);
1456}
1457
1458static void tgen_ctpop(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1459{
1460    /* With MIE3, and bit 0 of m4 set, we get the complete result. */
1461    if (HAVE_FACILITY(MISC_INSN_EXT3)) {
1462        if (type == TCG_TYPE_I32) {
1463            tcg_out_ext32u(s, dest, src);
1464            src = dest;
1465        }
1466        tcg_out_insn(s, RRFc, POPCNT, dest, src, 8);
1467        return;
1468    }
1469
1470    /* Without MIE3, each byte gets the count of bits for the byte. */
1471    tcg_out_insn(s, RRFc, POPCNT, dest, src, 0);
1472
1473    /* Multiply to sum each byte at the top of the word. */
1474    if (type == TCG_TYPE_I32) {
1475        tcg_out_insn(s, RIL, MSFI, dest, 0x01010101);
1476        tcg_out_sh32(s, RS_SRL, dest, TCG_REG_NONE, 24);
1477    } else {
1478        tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 0x0101010101010101ull);
1479        tcg_out_insn(s, RRE, MSGR, dest, TCG_TMP0);
1480        tcg_out_sh64(s, RSY_SRLG, dest, dest, TCG_REG_NONE, 56);
1481    }
1482}
1483
1484static void tgen_deposit(TCGContext *s, TCGReg dest, TCGReg src,
1485                         int ofs, int len, int z)
1486{
1487    int lsb = (63 - ofs);
1488    int msb = lsb - (len - 1);
1489    tcg_out_risbg(s, dest, src, msb, lsb, ofs, z);
1490}
1491
1492static void tgen_extract(TCGContext *s, TCGReg dest, TCGReg src,
1493                         int ofs, int len)
1494{
1495    tcg_out_risbg(s, dest, src, 64 - len, 63, 64 - ofs, 1);
1496}
1497
1498static void tgen_gotoi(TCGContext *s, int cc, const tcg_insn_unit *dest)
1499{
1500    ptrdiff_t off = tcg_pcrel_diff(s, dest) >> 1;
1501    if (off == (int16_t)off) {
1502        tcg_out_insn(s, RI, BRC, cc, off);
1503    } else if (off == (int32_t)off) {
1504        tcg_out_insn(s, RIL, BRCL, cc, off);
1505    } else {
1506        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)dest);
1507        tcg_out_insn(s, RR, BCR, cc, TCG_TMP0);
1508    }
1509}
1510
1511static void tgen_branch(TCGContext *s, int cc, TCGLabel *l)
1512{
1513    if (l->has_value) {
1514        tgen_gotoi(s, cc, l->u.value_ptr);
1515    } else {
1516        tcg_out16(s, RI_BRC | (cc << 4));
1517        tcg_out_reloc(s, s->code_ptr, R_390_PC16DBL, l, 2);
1518        s->code_ptr += 1;
1519    }
1520}
1521
1522static void tgen_compare_branch(TCGContext *s, S390Opcode opc, int cc,
1523                                TCGReg r1, TCGReg r2, TCGLabel *l)
1524{
1525    tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2);
1526    /* Format RIE-b */
1527    tcg_out16(s, (opc & 0xff00) | (r1 << 4) | r2);
1528    tcg_out16(s, 0);
1529    tcg_out16(s, cc << 12 | (opc & 0xff));
1530}
1531
1532static void tgen_compare_imm_branch(TCGContext *s, S390Opcode opc, int cc,
1533                                    TCGReg r1, int i2, TCGLabel *l)
1534{
1535    tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2);
1536    /* Format RIE-c */
1537    tcg_out16(s, (opc & 0xff00) | (r1 << 4) | cc);
1538    tcg_out16(s, 0);
1539    tcg_out16(s, (i2 << 8) | (opc & 0xff));
1540}
1541
1542static void tgen_brcond(TCGContext *s, TCGType type, TCGCond c,
1543                        TCGReg r1, TCGArg c2, int c2const, TCGLabel *l)
1544{
1545    int cc;
1546    bool is_unsigned = is_unsigned_cond(c);
1547    bool in_range;
1548    S390Opcode opc;
1549
1550    cc = tcg_cond_to_s390_cond[c];
1551
1552    if (!c2const) {
1553        opc = (type == TCG_TYPE_I32
1554               ? (is_unsigned ? RIEb_CLRJ : RIEb_CRJ)
1555               : (is_unsigned ? RIEb_CLGRJ : RIEb_CGRJ));
1556        tgen_compare_branch(s, opc, cc, r1, c2, l);
1557        return;
1558    }
1559
1560    /*
1561     * COMPARE IMMEDIATE AND BRANCH RELATIVE has an 8-bit immediate field.
1562     * If the immediate we've been given does not fit that range, we'll
1563     * fall back to separate compare and branch instructions using the
1564     * larger comparison range afforded by COMPARE IMMEDIATE.
1565     */
1566    if (type == TCG_TYPE_I32) {
1567        if (is_unsigned) {
1568            opc = RIEc_CLIJ;
1569            in_range = (uint32_t)c2 == (uint8_t)c2;
1570        } else {
1571            opc = RIEc_CIJ;
1572            in_range = (int32_t)c2 == (int8_t)c2;
1573        }
1574    } else {
1575        if (is_unsigned) {
1576            opc = RIEc_CLGIJ;
1577            in_range = (uint64_t)c2 == (uint8_t)c2;
1578        } else {
1579            opc = RIEc_CGIJ;
1580            in_range = (int64_t)c2 == (int8_t)c2;
1581        }
1582    }
1583    if (in_range) {
1584        tgen_compare_imm_branch(s, opc, cc, r1, c2, l);
1585        return;
1586    }
1587
1588    cc = tgen_cmp(s, type, c, r1, c2, c2const, false);
1589    tgen_branch(s, cc, l);
1590}
1591
1592static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *dest)
1593{
1594    ptrdiff_t off = tcg_pcrel_diff(s, dest) >> 1;
1595    if (off == (int32_t)off) {
1596        tcg_out_insn(s, RIL, BRASL, TCG_REG_R14, off);
1597    } else {
1598        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)dest);
1599        tcg_out_insn(s, RR, BASR, TCG_REG_R14, TCG_TMP0);
1600    }
1601}
1602
1603static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest,
1604                         const TCGHelperInfo *info)
1605{
1606    tcg_out_call_int(s, dest);
1607}
1608
1609typedef struct {
1610    TCGReg base;
1611    TCGReg index;
1612    int disp;
1613} HostAddress;
1614
1615static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg data,
1616                                   HostAddress h)
1617{
1618    switch (opc & (MO_SSIZE | MO_BSWAP)) {
1619    case MO_UB:
1620        tcg_out_insn(s, RXY, LLGC, data, h.base, h.index, h.disp);
1621        break;
1622    case MO_SB:
1623        tcg_out_insn(s, RXY, LGB, data, h.base, h.index, h.disp);
1624        break;
1625
1626    case MO_UW | MO_BSWAP:
1627        /* swapped unsigned halfword load with upper bits zeroed */
1628        tcg_out_insn(s, RXY, LRVH, data, h.base, h.index, h.disp);
1629        tcg_out_ext16u(s, data, data);
1630        break;
1631    case MO_UW:
1632        tcg_out_insn(s, RXY, LLGH, data, h.base, h.index, h.disp);
1633        break;
1634
1635    case MO_SW | MO_BSWAP:
1636        /* swapped sign-extended halfword load */
1637        tcg_out_insn(s, RXY, LRVH, data, h.base, h.index, h.disp);
1638        tcg_out_ext16s(s, TCG_TYPE_REG, data, data);
1639        break;
1640    case MO_SW:
1641        tcg_out_insn(s, RXY, LGH, data, h.base, h.index, h.disp);
1642        break;
1643
1644    case MO_UL | MO_BSWAP:
1645        /* swapped unsigned int load with upper bits zeroed */
1646        tcg_out_insn(s, RXY, LRV, data, h.base, h.index, h.disp);
1647        tcg_out_ext32u(s, data, data);
1648        break;
1649    case MO_UL:
1650        tcg_out_insn(s, RXY, LLGF, data, h.base, h.index, h.disp);
1651        break;
1652
1653    case MO_SL | MO_BSWAP:
1654        /* swapped sign-extended int load */
1655        tcg_out_insn(s, RXY, LRV, data, h.base, h.index, h.disp);
1656        tcg_out_ext32s(s, data, data);
1657        break;
1658    case MO_SL:
1659        tcg_out_insn(s, RXY, LGF, data, h.base, h.index, h.disp);
1660        break;
1661
1662    case MO_UQ | MO_BSWAP:
1663        tcg_out_insn(s, RXY, LRVG, data, h.base, h.index, h.disp);
1664        break;
1665    case MO_UQ:
1666        tcg_out_insn(s, RXY, LG, data, h.base, h.index, h.disp);
1667        break;
1668
1669    default:
1670        g_assert_not_reached();
1671    }
1672}
1673
1674static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg data,
1675                                   HostAddress h)
1676{
1677    switch (opc & (MO_SIZE | MO_BSWAP)) {
1678    case MO_UB:
1679        if (h.disp >= 0 && h.disp < 0x1000) {
1680            tcg_out_insn(s, RX, STC, data, h.base, h.index, h.disp);
1681        } else {
1682            tcg_out_insn(s, RXY, STCY, data, h.base, h.index, h.disp);
1683        }
1684        break;
1685
1686    case MO_UW | MO_BSWAP:
1687        tcg_out_insn(s, RXY, STRVH, data, h.base, h.index, h.disp);
1688        break;
1689    case MO_UW:
1690        if (h.disp >= 0 && h.disp < 0x1000) {
1691            tcg_out_insn(s, RX, STH, data, h.base, h.index, h.disp);
1692        } else {
1693            tcg_out_insn(s, RXY, STHY, data, h.base, h.index, h.disp);
1694        }
1695        break;
1696
1697    case MO_UL | MO_BSWAP:
1698        tcg_out_insn(s, RXY, STRV, data, h.base, h.index, h.disp);
1699        break;
1700    case MO_UL:
1701        if (h.disp >= 0 && h.disp < 0x1000) {
1702            tcg_out_insn(s, RX, ST, data, h.base, h.index, h.disp);
1703        } else {
1704            tcg_out_insn(s, RXY, STY, data, h.base, h.index, h.disp);
1705        }
1706        break;
1707
1708    case MO_UQ | MO_BSWAP:
1709        tcg_out_insn(s, RXY, STRVG, data, h.base, h.index, h.disp);
1710        break;
1711    case MO_UQ:
1712        tcg_out_insn(s, RXY, STG, data, h.base, h.index, h.disp);
1713        break;
1714
1715    default:
1716        g_assert_not_reached();
1717    }
1718}
1719
1720#if defined(CONFIG_SOFTMMU)
1721/* We're expecting to use a 20-bit negative offset on the tlb memory ops.  */
1722QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1723QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 19));
1724
1725/* Load and compare a TLB entry, leaving the flags set.  Loads the TLB
1726   addend into R2.  Returns a register with the santitized guest address.  */
1727static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
1728                               int mem_index, bool is_ld)
1729{
1730    unsigned s_bits = opc & MO_SIZE;
1731    unsigned a_bits = get_alignment_bits(opc);
1732    unsigned s_mask = (1 << s_bits) - 1;
1733    unsigned a_mask = (1 << a_bits) - 1;
1734    int fast_off = TLB_MASK_TABLE_OFS(mem_index);
1735    int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
1736    int table_off = fast_off + offsetof(CPUTLBDescFast, table);
1737    int ofs, a_off;
1738    uint64_t tlb_mask;
1739
1740    tcg_out_sh64(s, RSY_SRLG, TCG_REG_R2, addr_reg, TCG_REG_NONE,
1741                 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1742    tcg_out_insn(s, RXY, NG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, mask_off);
1743    tcg_out_insn(s, RXY, AG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, table_off);
1744
1745    /* For aligned accesses, we check the first byte and include the alignment
1746       bits within the address.  For unaligned access, we check that we don't
1747       cross pages using the address of the last byte of the access.  */
1748    a_off = (a_bits >= s_bits ? 0 : s_mask - a_mask);
1749    tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1750    if (a_off == 0) {
1751        tgen_andi_risbg(s, TCG_REG_R3, addr_reg, tlb_mask);
1752    } else {
1753        tcg_out_insn(s, RX, LA, TCG_REG_R3, addr_reg, TCG_REG_NONE, a_off);
1754        tgen_andi(s, TCG_TYPE_TL, TCG_REG_R3, tlb_mask);
1755    }
1756
1757    if (is_ld) {
1758        ofs = offsetof(CPUTLBEntry, addr_read);
1759    } else {
1760        ofs = offsetof(CPUTLBEntry, addr_write);
1761    }
1762    if (TARGET_LONG_BITS == 32) {
1763        tcg_out_insn(s, RX, C, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs);
1764    } else {
1765        tcg_out_insn(s, RXY, CG, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs);
1766    }
1767
1768    tcg_out_insn(s, RXY, LG, TCG_REG_R2, TCG_REG_R2, TCG_REG_NONE,
1769                 offsetof(CPUTLBEntry, addend));
1770
1771    if (TARGET_LONG_BITS == 32) {
1772        tcg_out_ext32u(s, TCG_REG_R3, addr_reg);
1773        return TCG_REG_R3;
1774    }
1775    return addr_reg;
1776}
1777
1778static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
1779                                TCGType type, TCGReg data, TCGReg addr,
1780                                tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1781{
1782    TCGLabelQemuLdst *label = new_ldst_label(s);
1783
1784    label->is_ld = is_ld;
1785    label->oi = oi;
1786    label->type = type;
1787    label->datalo_reg = data;
1788    label->addrlo_reg = addr;
1789    label->raddr = tcg_splitwx_to_rx(raddr);
1790    label->label_ptr[0] = label_ptr;
1791}
1792
1793static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1794{
1795    TCGReg addr_reg = lb->addrlo_reg;
1796    TCGReg data_reg = lb->datalo_reg;
1797    MemOpIdx oi = lb->oi;
1798    MemOp opc = get_memop(oi);
1799
1800    if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL,
1801                     (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
1802        return false;
1803    }
1804
1805    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
1806    if (TARGET_LONG_BITS == 64) {
1807        tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg);
1808    }
1809    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, oi);
1810    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R5, (uintptr_t)lb->raddr);
1811    tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]);
1812    tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2);
1813
1814    tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
1815    return true;
1816}
1817
1818static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1819{
1820    TCGReg addr_reg = lb->addrlo_reg;
1821    TCGReg data_reg = lb->datalo_reg;
1822    MemOpIdx oi = lb->oi;
1823    MemOp opc = get_memop(oi);
1824    MemOp size = opc & MO_SIZE;
1825
1826    if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL,
1827                     (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
1828        return false;
1829    }
1830
1831    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
1832    if (TARGET_LONG_BITS == 64) {
1833        tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg);
1834    }
1835    tcg_out_movext(s, size == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32,
1836                   TCG_REG_R4, lb->type, size, data_reg);
1837    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R5, oi);
1838    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R6, (uintptr_t)lb->raddr);
1839    tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1840
1841    tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
1842    return true;
1843}
1844#else
1845static void tcg_out_test_alignment(TCGContext *s, bool is_ld,
1846                                   TCGReg addrlo, unsigned a_bits)
1847{
1848    unsigned a_mask = (1 << a_bits) - 1;
1849    TCGLabelQemuLdst *l = new_ldst_label(s);
1850
1851    l->is_ld = is_ld;
1852    l->addrlo_reg = addrlo;
1853
1854    /* We are expecting a_bits to max out at 7, much lower than TMLL. */
1855    tcg_debug_assert(a_bits < 16);
1856    tcg_out_insn(s, RI, TMLL, addrlo, a_mask);
1857
1858    tcg_out16(s, RI_BRC | (7 << 4)); /* CC in {1,2,3} */
1859    l->label_ptr[0] = s->code_ptr;
1860    s->code_ptr += 1;
1861
1862    l->raddr = tcg_splitwx_to_rx(s->code_ptr);
1863}
1864
1865static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
1866{
1867    if (!patch_reloc(l->label_ptr[0], R_390_PC16DBL,
1868                     (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
1869        return false;
1870    }
1871
1872    tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_R3, l->addrlo_reg);
1873    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
1874
1875    /* "Tail call" to the helper, with the return address back inline. */
1876    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R14, (uintptr_t)l->raddr);
1877    tgen_gotoi(s, S390_CC_ALWAYS, (const void *)(l->is_ld ? helper_unaligned_ld
1878                                                 : helper_unaligned_st));
1879    return true;
1880}
1881
1882static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1883{
1884    return tcg_out_fail_alignment(s, l);
1885}
1886
1887static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1888{
1889    return tcg_out_fail_alignment(s, l);
1890}
1891
1892static HostAddress tcg_prepare_user_ldst(TCGContext *s, TCGReg addr_reg)
1893{
1894    TCGReg index;
1895    int disp;
1896
1897    if (TARGET_LONG_BITS == 32) {
1898        tcg_out_ext32u(s, TCG_TMP0, addr_reg);
1899        addr_reg = TCG_TMP0;
1900    }
1901    if (guest_base < 0x80000) {
1902        index = TCG_REG_NONE;
1903        disp = guest_base;
1904    } else {
1905        index = TCG_GUEST_BASE_REG;
1906        disp = 0;
1907    }
1908    return (HostAddress){ .base = addr_reg, .index = index, .disp = disp };
1909}
1910#endif /* CONFIG_SOFTMMU */
1911
1912static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
1913                            MemOpIdx oi, TCGType data_type)
1914{
1915    MemOp opc = get_memop(oi);
1916    HostAddress h;
1917
1918#ifdef CONFIG_SOFTMMU
1919    unsigned mem_index = get_mmuidx(oi);
1920    tcg_insn_unit *label_ptr;
1921
1922    h.base = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 1);
1923    h.index = TCG_REG_R2;
1924    h.disp = 0;
1925
1926    tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
1927    label_ptr = s->code_ptr;
1928    s->code_ptr += 1;
1929
1930    tcg_out_qemu_ld_direct(s, opc, data_reg, h);
1931
1932    add_qemu_ldst_label(s, true, oi, data_type, data_reg, addr_reg,
1933                        s->code_ptr, label_ptr);
1934#else
1935    unsigned a_bits = get_alignment_bits(opc);
1936
1937    if (a_bits) {
1938        tcg_out_test_alignment(s, true, addr_reg, a_bits);
1939    }
1940    h = tcg_prepare_user_ldst(s, addr_reg);
1941    tcg_out_qemu_ld_direct(s, opc, data_reg, h);
1942#endif
1943}
1944
1945static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
1946                            MemOpIdx oi, TCGType data_type)
1947{
1948    MemOp opc = get_memop(oi);
1949    HostAddress h;
1950
1951#ifdef CONFIG_SOFTMMU
1952    unsigned mem_index = get_mmuidx(oi);
1953    tcg_insn_unit *label_ptr;
1954
1955    h.base = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 0);
1956    h.index = TCG_REG_R2;
1957    h.disp = 0;
1958
1959    tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
1960    label_ptr = s->code_ptr;
1961    s->code_ptr += 1;
1962
1963    tcg_out_qemu_st_direct(s, opc, data_reg, h);
1964
1965    add_qemu_ldst_label(s, false, oi, data_type, data_reg, addr_reg,
1966                        s->code_ptr, label_ptr);
1967#else
1968    unsigned a_bits = get_alignment_bits(opc);
1969
1970    if (a_bits) {
1971        tcg_out_test_alignment(s, false, addr_reg, a_bits);
1972    }
1973    h = tcg_prepare_user_ldst(s, addr_reg);
1974    tcg_out_qemu_st_direct(s, opc, data_reg, h);
1975#endif
1976}
1977
1978static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
1979{
1980    /* Reuse the zeroing that exists for goto_ptr.  */
1981    if (a0 == 0) {
1982        tgen_gotoi(s, S390_CC_ALWAYS, tcg_code_gen_epilogue);
1983    } else {
1984        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, a0);
1985        tgen_gotoi(s, S390_CC_ALWAYS, tb_ret_addr);
1986    }
1987}
1988
1989static void tcg_out_goto_tb(TCGContext *s, int which)
1990{
1991    /*
1992     * Branch displacement must be aligned for atomic patching;
1993     * see if we need to add extra nop before branch
1994     */
1995    if (!QEMU_PTR_IS_ALIGNED(s->code_ptr + 1, 4)) {
1996        tcg_out16(s, NOP);
1997    }
1998    tcg_out16(s, RIL_BRCL | (S390_CC_ALWAYS << 4));
1999    set_jmp_insn_offset(s, which);
2000    s->code_ptr += 2;
2001    set_jmp_reset_offset(s, which);
2002}
2003
2004void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
2005                              uintptr_t jmp_rx, uintptr_t jmp_rw)
2006{
2007    if (!HAVE_FACILITY(GEN_INST_EXT)) {
2008        return;
2009    }
2010    /* patch the branch destination */
2011    uintptr_t addr = tb->jmp_target_addr[n];
2012    intptr_t disp = addr - (jmp_rx - 2);
2013    qatomic_set((int32_t *)jmp_rw, disp / 2);
2014    /* no need to flush icache explicitly */
2015}
2016
2017# define OP_32_64(x) \
2018        case glue(glue(INDEX_op_,x),_i32): \
2019        case glue(glue(INDEX_op_,x),_i64)
2020
2021static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
2022                              const TCGArg args[TCG_MAX_OP_ARGS],
2023                              const int const_args[TCG_MAX_OP_ARGS])
2024{
2025    S390Opcode op, op2;
2026    TCGArg a0, a1, a2;
2027
2028    switch (opc) {
2029    case INDEX_op_goto_ptr:
2030        a0 = args[0];
2031        tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, a0);
2032        break;
2033
2034    OP_32_64(ld8u):
2035        /* ??? LLC (RXY format) is only present with the extended-immediate
2036           facility, whereas LLGC is always present.  */
2037        tcg_out_mem(s, 0, RXY_LLGC, args[0], args[1], TCG_REG_NONE, args[2]);
2038        break;
2039
2040    OP_32_64(ld8s):
2041        /* ??? LB is no smaller than LGB, so no point to using it.  */
2042        tcg_out_mem(s, 0, RXY_LGB, args[0], args[1], TCG_REG_NONE, args[2]);
2043        break;
2044
2045    OP_32_64(ld16u):
2046        /* ??? LLH (RXY format) is only present with the extended-immediate
2047           facility, whereas LLGH is always present.  */
2048        tcg_out_mem(s, 0, RXY_LLGH, args[0], args[1], TCG_REG_NONE, args[2]);
2049        break;
2050
2051    case INDEX_op_ld16s_i32:
2052        tcg_out_mem(s, RX_LH, RXY_LHY, args[0], args[1], TCG_REG_NONE, args[2]);
2053        break;
2054
2055    case INDEX_op_ld_i32:
2056        tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
2057        break;
2058
2059    OP_32_64(st8):
2060        tcg_out_mem(s, RX_STC, RXY_STCY, args[0], args[1],
2061                    TCG_REG_NONE, args[2]);
2062        break;
2063
2064    OP_32_64(st16):
2065        tcg_out_mem(s, RX_STH, RXY_STHY, args[0], args[1],
2066                    TCG_REG_NONE, args[2]);
2067        break;
2068
2069    case INDEX_op_st_i32:
2070        tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
2071        break;
2072
2073    case INDEX_op_add_i32:
2074        a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
2075        if (const_args[2]) {
2076        do_addi_32:
2077            if (a0 == a1) {
2078                if (a2 == (int16_t)a2) {
2079                    tcg_out_insn(s, RI, AHI, a0, a2);
2080                    break;
2081                }
2082                tcg_out_insn(s, RIL, AFI, a0, a2);
2083                break;
2084            }
2085            tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2);
2086        } else if (a0 == a1) {
2087            tcg_out_insn(s, RR, AR, a0, a2);
2088        } else {
2089            tcg_out_insn(s, RX, LA, a0, a1, a2, 0);
2090        }
2091        break;
2092    case INDEX_op_sub_i32:
2093        a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
2094        if (const_args[2]) {
2095            a2 = -a2;
2096            goto do_addi_32;
2097        } else if (a0 == a1) {
2098            tcg_out_insn(s, RR, SR, a0, a2);
2099        } else {
2100            tcg_out_insn(s, RRFa, SRK, a0, a1, a2);
2101        }
2102        break;
2103
2104    case INDEX_op_and_i32:
2105        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2106        if (const_args[2]) {
2107            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2108            tgen_andi(s, TCG_TYPE_I32, a0, a2);
2109        } else if (a0 == a1) {
2110            tcg_out_insn(s, RR, NR, a0, a2);
2111        } else {
2112            tcg_out_insn(s, RRFa, NRK, a0, a1, a2);
2113        }
2114        break;
2115    case INDEX_op_or_i32:
2116        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2117        if (const_args[2]) {
2118            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2119            tgen_ori(s, a0, a2);
2120        } else if (a0 == a1) {
2121            tcg_out_insn(s, RR, OR, a0, a2);
2122        } else {
2123            tcg_out_insn(s, RRFa, ORK, a0, a1, a2);
2124        }
2125        break;
2126    case INDEX_op_xor_i32:
2127        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2128        if (const_args[2]) {
2129            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2130            tcg_out_insn(s, RIL, XILF, a0, a2);
2131        } else if (a0 == a1) {
2132            tcg_out_insn(s, RR, XR, args[0], args[2]);
2133        } else {
2134            tcg_out_insn(s, RRFa, XRK, a0, a1, a2);
2135        }
2136        break;
2137
2138    case INDEX_op_andc_i32:
2139        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2140        if (const_args[2]) {
2141            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2142            tgen_andi(s, TCG_TYPE_I32, a0, (uint32_t)~a2);
2143	} else {
2144            tcg_out_insn(s, RRFa, NCRK, a0, a1, a2);
2145	}
2146        break;
2147    case INDEX_op_orc_i32:
2148        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2149        if (const_args[2]) {
2150            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2151            tgen_ori(s, a0, (uint32_t)~a2);
2152        } else {
2153            tcg_out_insn(s, RRFa, OCRK, a0, a1, a2);
2154        }
2155        break;
2156    case INDEX_op_eqv_i32:
2157        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2158        if (const_args[2]) {
2159            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2160            tcg_out_insn(s, RIL, XILF, a0, ~a2);
2161        } else {
2162            tcg_out_insn(s, RRFa, NXRK, a0, a1, a2);
2163        }
2164        break;
2165    case INDEX_op_nand_i32:
2166        tcg_out_insn(s, RRFa, NNRK, args[0], args[1], args[2]);
2167        break;
2168    case INDEX_op_nor_i32:
2169        tcg_out_insn(s, RRFa, NORK, args[0], args[1], args[2]);
2170        break;
2171
2172    case INDEX_op_neg_i32:
2173        tcg_out_insn(s, RR, LCR, args[0], args[1]);
2174        break;
2175    case INDEX_op_not_i32:
2176        tcg_out_insn(s, RRFa, NORK, args[0], args[1], args[1]);
2177        break;
2178
2179    case INDEX_op_mul_i32:
2180        a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
2181        if (const_args[2]) {
2182            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2183            if (a2 == (int16_t)a2) {
2184                tcg_out_insn(s, RI, MHI, a0, a2);
2185            } else {
2186                tcg_out_insn(s, RIL, MSFI, a0, a2);
2187            }
2188        } else if (a0 == a1) {
2189            tcg_out_insn(s, RRE, MSR, a0, a2);
2190        } else {
2191            tcg_out_insn(s, RRFa, MSRKC, a0, a1, a2);
2192        }
2193        break;
2194
2195    case INDEX_op_div2_i32:
2196        tcg_debug_assert(args[0] == args[2]);
2197        tcg_debug_assert(args[1] == args[3]);
2198        tcg_debug_assert((args[1] & 1) == 0);
2199        tcg_debug_assert(args[0] == args[1] + 1);
2200        tcg_out_insn(s, RR, DR, args[1], args[4]);
2201        break;
2202    case INDEX_op_divu2_i32:
2203        tcg_debug_assert(args[0] == args[2]);
2204        tcg_debug_assert(args[1] == args[3]);
2205        tcg_debug_assert((args[1] & 1) == 0);
2206        tcg_debug_assert(args[0] == args[1] + 1);
2207        tcg_out_insn(s, RRE, DLR, args[1], args[4]);
2208        break;
2209
2210    case INDEX_op_shl_i32:
2211        op = RS_SLL;
2212        op2 = RSY_SLLK;
2213    do_shift32:
2214        a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
2215        if (a0 == a1) {
2216            if (const_args[2]) {
2217                tcg_out_sh32(s, op, a0, TCG_REG_NONE, a2);
2218            } else {
2219                tcg_out_sh32(s, op, a0, a2, 0);
2220            }
2221        } else {
2222            /* Using tcg_out_sh64 here for the format; it is a 32-bit shift.  */
2223            if (const_args[2]) {
2224                tcg_out_sh64(s, op2, a0, a1, TCG_REG_NONE, a2);
2225            } else {
2226                tcg_out_sh64(s, op2, a0, a1, a2, 0);
2227            }
2228        }
2229        break;
2230    case INDEX_op_shr_i32:
2231        op = RS_SRL;
2232        op2 = RSY_SRLK;
2233        goto do_shift32;
2234    case INDEX_op_sar_i32:
2235        op = RS_SRA;
2236        op2 = RSY_SRAK;
2237        goto do_shift32;
2238
2239    case INDEX_op_rotl_i32:
2240        /* ??? Using tcg_out_sh64 here for the format; it is a 32-bit rol.  */
2241        if (const_args[2]) {
2242            tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_REG_NONE, args[2]);
2243        } else {
2244            tcg_out_sh64(s, RSY_RLL, args[0], args[1], args[2], 0);
2245        }
2246        break;
2247    case INDEX_op_rotr_i32:
2248        if (const_args[2]) {
2249            tcg_out_sh64(s, RSY_RLL, args[0], args[1],
2250                         TCG_REG_NONE, (32 - args[2]) & 31);
2251        } else {
2252            tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]);
2253            tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_TMP0, 0);
2254        }
2255        break;
2256
2257    case INDEX_op_bswap16_i32:
2258        a0 = args[0], a1 = args[1], a2 = args[2];
2259        tcg_out_insn(s, RRE, LRVR, a0, a1);
2260        if (a2 & TCG_BSWAP_OS) {
2261            tcg_out_sh32(s, RS_SRA, a0, TCG_REG_NONE, 16);
2262        } else {
2263            tcg_out_sh32(s, RS_SRL, a0, TCG_REG_NONE, 16);
2264        }
2265        break;
2266    case INDEX_op_bswap16_i64:
2267        a0 = args[0], a1 = args[1], a2 = args[2];
2268        tcg_out_insn(s, RRE, LRVGR, a0, a1);
2269        if (a2 & TCG_BSWAP_OS) {
2270            tcg_out_sh64(s, RSY_SRAG, a0, a0, TCG_REG_NONE, 48);
2271        } else {
2272            tcg_out_sh64(s, RSY_SRLG, a0, a0, TCG_REG_NONE, 48);
2273        }
2274        break;
2275
2276    case INDEX_op_bswap32_i32:
2277        tcg_out_insn(s, RRE, LRVR, args[0], args[1]);
2278        break;
2279    case INDEX_op_bswap32_i64:
2280        a0 = args[0], a1 = args[1], a2 = args[2];
2281        tcg_out_insn(s, RRE, LRVR, a0, a1);
2282        if (a2 & TCG_BSWAP_OS) {
2283            tcg_out_ext32s(s, a0, a0);
2284        } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
2285            tcg_out_ext32u(s, a0, a0);
2286        }
2287        break;
2288
2289    case INDEX_op_add2_i32:
2290        if (const_args[4]) {
2291            tcg_out_insn(s, RIL, ALFI, args[0], args[4]);
2292        } else {
2293            tcg_out_insn(s, RR, ALR, args[0], args[4]);
2294        }
2295        tcg_out_insn(s, RRE, ALCR, args[1], args[5]);
2296        break;
2297    case INDEX_op_sub2_i32:
2298        if (const_args[4]) {
2299            tcg_out_insn(s, RIL, SLFI, args[0], args[4]);
2300        } else {
2301            tcg_out_insn(s, RR, SLR, args[0], args[4]);
2302        }
2303        tcg_out_insn(s, RRE, SLBR, args[1], args[5]);
2304        break;
2305
2306    case INDEX_op_br:
2307        tgen_branch(s, S390_CC_ALWAYS, arg_label(args[0]));
2308        break;
2309
2310    case INDEX_op_brcond_i32:
2311        tgen_brcond(s, TCG_TYPE_I32, args[2], args[0],
2312                    args[1], const_args[1], arg_label(args[3]));
2313        break;
2314    case INDEX_op_setcond_i32:
2315        tgen_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1],
2316                     args[2], const_args[2]);
2317        break;
2318    case INDEX_op_movcond_i32:
2319        tgen_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1],
2320                     args[2], const_args[2], args[3], const_args[3], args[4]);
2321        break;
2322
2323    case INDEX_op_qemu_ld_i32:
2324        tcg_out_qemu_ld(s, args[0], args[1], args[2], TCG_TYPE_I32);
2325        break;
2326    case INDEX_op_qemu_ld_i64:
2327        tcg_out_qemu_ld(s, args[0], args[1], args[2], TCG_TYPE_I64);
2328        break;
2329    case INDEX_op_qemu_st_i32:
2330        tcg_out_qemu_st(s, args[0], args[1], args[2], TCG_TYPE_I32);
2331        break;
2332    case INDEX_op_qemu_st_i64:
2333        tcg_out_qemu_st(s, args[0], args[1], args[2], TCG_TYPE_I64);
2334        break;
2335
2336    case INDEX_op_ld16s_i64:
2337        tcg_out_mem(s, 0, RXY_LGH, args[0], args[1], TCG_REG_NONE, args[2]);
2338        break;
2339    case INDEX_op_ld32u_i64:
2340        tcg_out_mem(s, 0, RXY_LLGF, args[0], args[1], TCG_REG_NONE, args[2]);
2341        break;
2342    case INDEX_op_ld32s_i64:
2343        tcg_out_mem(s, 0, RXY_LGF, args[0], args[1], TCG_REG_NONE, args[2]);
2344        break;
2345    case INDEX_op_ld_i64:
2346        tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
2347        break;
2348
2349    case INDEX_op_st32_i64:
2350        tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
2351        break;
2352    case INDEX_op_st_i64:
2353        tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
2354        break;
2355
2356    case INDEX_op_add_i64:
2357        a0 = args[0], a1 = args[1], a2 = args[2];
2358        if (const_args[2]) {
2359        do_addi_64:
2360            if (a0 == a1) {
2361                if (a2 == (int16_t)a2) {
2362                    tcg_out_insn(s, RI, AGHI, a0, a2);
2363                    break;
2364                }
2365                if (a2 == (int32_t)a2) {
2366                    tcg_out_insn(s, RIL, AGFI, a0, a2);
2367                    break;
2368                }
2369                if (a2 == (uint32_t)a2) {
2370                    tcg_out_insn(s, RIL, ALGFI, a0, a2);
2371                    break;
2372                }
2373                if (-a2 == (uint32_t)-a2) {
2374                    tcg_out_insn(s, RIL, SLGFI, a0, -a2);
2375                    break;
2376                }
2377            }
2378            tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2);
2379        } else if (a0 == a1) {
2380            tcg_out_insn(s, RRE, AGR, a0, a2);
2381        } else {
2382            tcg_out_insn(s, RX, LA, a0, a1, a2, 0);
2383        }
2384        break;
2385    case INDEX_op_sub_i64:
2386        a0 = args[0], a1 = args[1], a2 = args[2];
2387        if (const_args[2]) {
2388            a2 = -a2;
2389            goto do_addi_64;
2390        } else {
2391            tcg_out_insn(s, RRFa, SGRK, a0, a1, a2);
2392        }
2393        break;
2394
2395    case INDEX_op_and_i64:
2396        a0 = args[0], a1 = args[1], a2 = args[2];
2397        if (const_args[2]) {
2398            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2399            tgen_andi(s, TCG_TYPE_I64, args[0], args[2]);
2400        } else {
2401            tcg_out_insn(s, RRFa, NGRK, a0, a1, a2);
2402        }
2403        break;
2404    case INDEX_op_or_i64:
2405        a0 = args[0], a1 = args[1], a2 = args[2];
2406        if (const_args[2]) {
2407            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2408            tgen_ori(s, a0, a2);
2409        } else {
2410            tcg_out_insn(s, RRFa, OGRK, a0, a1, a2);
2411        }
2412        break;
2413    case INDEX_op_xor_i64:
2414        a0 = args[0], a1 = args[1], a2 = args[2];
2415        if (const_args[2]) {
2416            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2417            tgen_xori(s, a0, a2);
2418        } else {
2419            tcg_out_insn(s, RRFa, XGRK, a0, a1, a2);
2420        }
2421        break;
2422
2423    case INDEX_op_andc_i64:
2424        a0 = args[0], a1 = args[1], a2 = args[2];
2425        if (const_args[2]) {
2426            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2427            tgen_andi(s, TCG_TYPE_I64, a0, ~a2);
2428        } else {
2429            tcg_out_insn(s, RRFa, NCGRK, a0, a1, a2);
2430        }
2431        break;
2432    case INDEX_op_orc_i64:
2433        a0 = args[0], a1 = args[1], a2 = args[2];
2434        if (const_args[2]) {
2435            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2436            tgen_ori(s, a0, ~a2);
2437        } else {
2438            tcg_out_insn(s, RRFa, OCGRK, a0, a1, a2);
2439        }
2440        break;
2441    case INDEX_op_eqv_i64:
2442        a0 = args[0], a1 = args[1], a2 = args[2];
2443        if (const_args[2]) {
2444            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2445            tgen_xori(s, a0, ~a2);
2446        } else {
2447            tcg_out_insn(s, RRFa, NXGRK, a0, a1, a2);
2448        }
2449        break;
2450    case INDEX_op_nand_i64:
2451        tcg_out_insn(s, RRFa, NNGRK, args[0], args[1], args[2]);
2452        break;
2453    case INDEX_op_nor_i64:
2454        tcg_out_insn(s, RRFa, NOGRK, args[0], args[1], args[2]);
2455        break;
2456
2457    case INDEX_op_neg_i64:
2458        tcg_out_insn(s, RRE, LCGR, args[0], args[1]);
2459        break;
2460    case INDEX_op_not_i64:
2461        tcg_out_insn(s, RRFa, NOGRK, args[0], args[1], args[1]);
2462        break;
2463    case INDEX_op_bswap64_i64:
2464        tcg_out_insn(s, RRE, LRVGR, args[0], args[1]);
2465        break;
2466
2467    case INDEX_op_mul_i64:
2468        a0 = args[0], a1 = args[1], a2 = args[2];
2469        if (const_args[2]) {
2470            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2471            if (a2 == (int16_t)a2) {
2472                tcg_out_insn(s, RI, MGHI, a0, a2);
2473            } else {
2474                tcg_out_insn(s, RIL, MSGFI, a0, a2);
2475            }
2476        } else if (a0 == a1) {
2477            tcg_out_insn(s, RRE, MSGR, a0, a2);
2478        } else {
2479            tcg_out_insn(s, RRFa, MSGRKC, a0, a1, a2);
2480        }
2481        break;
2482
2483    case INDEX_op_div2_i64:
2484        /*
2485         * ??? We get an unnecessary sign-extension of the dividend
2486         * into op0 with this definition, but as we do in fact always
2487         * produce both quotient and remainder using INDEX_op_div_i64
2488         * instead requires jumping through even more hoops.
2489         */
2490        tcg_debug_assert(args[0] == args[2]);
2491        tcg_debug_assert(args[1] == args[3]);
2492        tcg_debug_assert((args[1] & 1) == 0);
2493        tcg_debug_assert(args[0] == args[1] + 1);
2494        tcg_out_insn(s, RRE, DSGR, args[1], args[4]);
2495        break;
2496    case INDEX_op_divu2_i64:
2497        tcg_debug_assert(args[0] == args[2]);
2498        tcg_debug_assert(args[1] == args[3]);
2499        tcg_debug_assert((args[1] & 1) == 0);
2500        tcg_debug_assert(args[0] == args[1] + 1);
2501        tcg_out_insn(s, RRE, DLGR, args[1], args[4]);
2502        break;
2503    case INDEX_op_mulu2_i64:
2504        tcg_debug_assert(args[0] == args[2]);
2505        tcg_debug_assert((args[1] & 1) == 0);
2506        tcg_debug_assert(args[0] == args[1] + 1);
2507        tcg_out_insn(s, RRE, MLGR, args[1], args[3]);
2508        break;
2509    case INDEX_op_muls2_i64:
2510        tcg_debug_assert((args[1] & 1) == 0);
2511        tcg_debug_assert(args[0] == args[1] + 1);
2512        tcg_out_insn(s, RRFa, MGRK, args[1], args[2], args[3]);
2513        break;
2514
2515    case INDEX_op_shl_i64:
2516        op = RSY_SLLG;
2517    do_shift64:
2518        if (const_args[2]) {
2519            tcg_out_sh64(s, op, args[0], args[1], TCG_REG_NONE, args[2]);
2520        } else {
2521            tcg_out_sh64(s, op, args[0], args[1], args[2], 0);
2522        }
2523        break;
2524    case INDEX_op_shr_i64:
2525        op = RSY_SRLG;
2526        goto do_shift64;
2527    case INDEX_op_sar_i64:
2528        op = RSY_SRAG;
2529        goto do_shift64;
2530
2531    case INDEX_op_rotl_i64:
2532        if (const_args[2]) {
2533            tcg_out_sh64(s, RSY_RLLG, args[0], args[1],
2534                         TCG_REG_NONE, args[2]);
2535        } else {
2536            tcg_out_sh64(s, RSY_RLLG, args[0], args[1], args[2], 0);
2537        }
2538        break;
2539    case INDEX_op_rotr_i64:
2540        if (const_args[2]) {
2541            tcg_out_sh64(s, RSY_RLLG, args[0], args[1],
2542                         TCG_REG_NONE, (64 - args[2]) & 63);
2543        } else {
2544            /* We can use the smaller 32-bit negate because only the
2545               low 6 bits are examined for the rotate.  */
2546            tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]);
2547            tcg_out_sh64(s, RSY_RLLG, args[0], args[1], TCG_TMP0, 0);
2548        }
2549        break;
2550
2551    case INDEX_op_add2_i64:
2552        if (const_args[4]) {
2553            if ((int64_t)args[4] >= 0) {
2554                tcg_out_insn(s, RIL, ALGFI, args[0], args[4]);
2555            } else {
2556                tcg_out_insn(s, RIL, SLGFI, args[0], -args[4]);
2557            }
2558        } else {
2559            tcg_out_insn(s, RRE, ALGR, args[0], args[4]);
2560        }
2561        tcg_out_insn(s, RRE, ALCGR, args[1], args[5]);
2562        break;
2563    case INDEX_op_sub2_i64:
2564        if (const_args[4]) {
2565            if ((int64_t)args[4] >= 0) {
2566                tcg_out_insn(s, RIL, SLGFI, args[0], args[4]);
2567            } else {
2568                tcg_out_insn(s, RIL, ALGFI, args[0], -args[4]);
2569            }
2570        } else {
2571            tcg_out_insn(s, RRE, SLGR, args[0], args[4]);
2572        }
2573        tcg_out_insn(s, RRE, SLBGR, args[1], args[5]);
2574        break;
2575
2576    case INDEX_op_brcond_i64:
2577        tgen_brcond(s, TCG_TYPE_I64, args[2], args[0],
2578                    args[1], const_args[1], arg_label(args[3]));
2579        break;
2580    case INDEX_op_setcond_i64:
2581        tgen_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1],
2582                     args[2], const_args[2]);
2583        break;
2584    case INDEX_op_movcond_i64:
2585        tgen_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1],
2586                     args[2], const_args[2], args[3], const_args[3], args[4]);
2587        break;
2588
2589    OP_32_64(deposit):
2590        a0 = args[0], a1 = args[1], a2 = args[2];
2591        if (const_args[1]) {
2592            tgen_deposit(s, a0, a2, args[3], args[4], 1);
2593        } else {
2594            /* Since we can't support "0Z" as a constraint, we allow a1 in
2595               any register.  Fix things up as if a matching constraint.  */
2596            if (a0 != a1) {
2597                TCGType type = (opc == INDEX_op_deposit_i64);
2598                if (a0 == a2) {
2599                    tcg_out_mov(s, type, TCG_TMP0, a2);
2600                    a2 = TCG_TMP0;
2601                }
2602                tcg_out_mov(s, type, a0, a1);
2603            }
2604            tgen_deposit(s, a0, a2, args[3], args[4], 0);
2605        }
2606        break;
2607
2608    OP_32_64(extract):
2609        tgen_extract(s, args[0], args[1], args[2], args[3]);
2610        break;
2611
2612    case INDEX_op_clz_i64:
2613        tgen_clz(s, args[0], args[1], args[2], const_args[2]);
2614        break;
2615
2616    case INDEX_op_ctpop_i32:
2617        tgen_ctpop(s, TCG_TYPE_I32, args[0], args[1]);
2618        break;
2619    case INDEX_op_ctpop_i64:
2620        tgen_ctpop(s, TCG_TYPE_I64, args[0], args[1]);
2621        break;
2622
2623    case INDEX_op_mb:
2624        /* The host memory model is quite strong, we simply need to
2625           serialize the instruction stream.  */
2626        if (args[0] & TCG_MO_ST_LD) {
2627            /* fast-bcr-serialization facility (45) is present */
2628            tcg_out_insn(s, RR, BCR, 14, 0);
2629        }
2630        break;
2631
2632    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2633    case INDEX_op_mov_i64:
2634    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2635    case INDEX_op_exit_tb:  /* Always emitted via tcg_out_exit_tb.  */
2636    case INDEX_op_goto_tb:  /* Always emitted via tcg_out_goto_tb.  */
2637    case INDEX_op_ext8s_i32:  /* Always emitted via tcg_reg_alloc_op.  */
2638    case INDEX_op_ext8s_i64:
2639    case INDEX_op_ext8u_i32:
2640    case INDEX_op_ext8u_i64:
2641    case INDEX_op_ext16s_i32:
2642    case INDEX_op_ext16s_i64:
2643    case INDEX_op_ext16u_i32:
2644    case INDEX_op_ext16u_i64:
2645    case INDEX_op_ext32s_i64:
2646    case INDEX_op_ext32u_i64:
2647    case INDEX_op_ext_i32_i64:
2648    case INDEX_op_extu_i32_i64:
2649    case INDEX_op_extrl_i64_i32:
2650    default:
2651        g_assert_not_reached();
2652    }
2653}
2654
2655static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
2656                            TCGReg dst, TCGReg src)
2657{
2658    if (is_general_reg(src)) {
2659        /* Replicate general register into two MO_64. */
2660        tcg_out_insn(s, VRRf, VLVGP, dst, src, src);
2661        if (vece == MO_64) {
2662            return true;
2663        }
2664        src = dst;
2665    }
2666
2667    /*
2668     * Recall that the "standard" integer, within a vector, is the
2669     * rightmost element of the leftmost doubleword, a-la VLLEZ.
2670     */
2671    tcg_out_insn(s, VRIc, VREP, dst, (8 >> vece) - 1, src, vece);
2672    return true;
2673}
2674
2675static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
2676                             TCGReg dst, TCGReg base, intptr_t offset)
2677{
2678    tcg_out_vrx_mem(s, VRX_VLREP, dst, base, TCG_REG_NONE, offset, vece);
2679    return true;
2680}
2681
2682static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
2683                             TCGReg dst, int64_t val)
2684{
2685    int i, mask, msb, lsb;
2686
2687    /* Look for int16_t elements.  */
2688    if (vece <= MO_16 ||
2689        (vece == MO_32 ? (int32_t)val : val) == (int16_t)val) {
2690        tcg_out_insn(s, VRIa, VREPI, dst, val, vece);
2691        return;
2692    }
2693
2694    /* Look for bit masks.  */
2695    if (vece == MO_32) {
2696        if (risbg_mask((int32_t)val)) {
2697            /* Handle wraparound by swapping msb and lsb.  */
2698            if ((val & 0x80000001u) == 0x80000001u) {
2699                msb = 32 - ctz32(~val);
2700                lsb = clz32(~val) - 1;
2701            } else {
2702                msb = clz32(val);
2703                lsb = 31 - ctz32(val);
2704            }
2705            tcg_out_insn(s, VRIb, VGM, dst, msb, lsb, MO_32);
2706            return;
2707        }
2708    } else {
2709        if (risbg_mask(val)) {
2710            /* Handle wraparound by swapping msb and lsb.  */
2711            if ((val & 0x8000000000000001ull) == 0x8000000000000001ull) {
2712                /* Handle wraparound by swapping msb and lsb.  */
2713                msb = 64 - ctz64(~val);
2714                lsb = clz64(~val) - 1;
2715            } else {
2716                msb = clz64(val);
2717                lsb = 63 - ctz64(val);
2718            }
2719            tcg_out_insn(s, VRIb, VGM, dst, msb, lsb, MO_64);
2720            return;
2721        }
2722    }
2723
2724    /* Look for all bytes 0x00 or 0xff.  */
2725    for (i = mask = 0; i < 8; i++) {
2726        uint8_t byte = val >> (i * 8);
2727        if (byte == 0xff) {
2728            mask |= 1 << i;
2729        } else if (byte != 0) {
2730            break;
2731        }
2732    }
2733    if (i == 8) {
2734        tcg_out_insn(s, VRIa, VGBM, dst, mask * 0x0101, 0);
2735        return;
2736    }
2737
2738    /* Otherwise, stuff it in the constant pool.  */
2739    tcg_out_insn(s, RIL, LARL, TCG_TMP0, 0);
2740    new_pool_label(s, val, R_390_PC32DBL, s->code_ptr - 2, 2);
2741    tcg_out_insn(s, VRX, VLREP, dst, TCG_TMP0, TCG_REG_NONE, 0, MO_64);
2742}
2743
2744static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2745                           unsigned vecl, unsigned vece,
2746                           const TCGArg args[TCG_MAX_OP_ARGS],
2747                           const int const_args[TCG_MAX_OP_ARGS])
2748{
2749    TCGType type = vecl + TCG_TYPE_V64;
2750    TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
2751
2752    switch (opc) {
2753    case INDEX_op_ld_vec:
2754        tcg_out_ld(s, type, a0, a1, a2);
2755        break;
2756    case INDEX_op_st_vec:
2757        tcg_out_st(s, type, a0, a1, a2);
2758        break;
2759    case INDEX_op_dupm_vec:
2760        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2761        break;
2762
2763    case INDEX_op_abs_vec:
2764        tcg_out_insn(s, VRRa, VLP, a0, a1, vece);
2765        break;
2766    case INDEX_op_neg_vec:
2767        tcg_out_insn(s, VRRa, VLC, a0, a1, vece);
2768        break;
2769    case INDEX_op_not_vec:
2770        tcg_out_insn(s, VRRc, VNO, a0, a1, a1, 0);
2771        break;
2772
2773    case INDEX_op_add_vec:
2774        tcg_out_insn(s, VRRc, VA, a0, a1, a2, vece);
2775        break;
2776    case INDEX_op_sub_vec:
2777        tcg_out_insn(s, VRRc, VS, a0, a1, a2, vece);
2778        break;
2779    case INDEX_op_and_vec:
2780        tcg_out_insn(s, VRRc, VN, a0, a1, a2, 0);
2781        break;
2782    case INDEX_op_andc_vec:
2783        tcg_out_insn(s, VRRc, VNC, a0, a1, a2, 0);
2784        break;
2785    case INDEX_op_mul_vec:
2786        tcg_out_insn(s, VRRc, VML, a0, a1, a2, vece);
2787        break;
2788    case INDEX_op_or_vec:
2789        tcg_out_insn(s, VRRc, VO, a0, a1, a2, 0);
2790        break;
2791    case INDEX_op_orc_vec:
2792        tcg_out_insn(s, VRRc, VOC, a0, a1, a2, 0);
2793        break;
2794    case INDEX_op_xor_vec:
2795        tcg_out_insn(s, VRRc, VX, a0, a1, a2, 0);
2796        break;
2797    case INDEX_op_nand_vec:
2798        tcg_out_insn(s, VRRc, VNN, a0, a1, a2, 0);
2799        break;
2800    case INDEX_op_nor_vec:
2801        tcg_out_insn(s, VRRc, VNO, a0, a1, a2, 0);
2802        break;
2803    case INDEX_op_eqv_vec:
2804        tcg_out_insn(s, VRRc, VNX, a0, a1, a2, 0);
2805        break;
2806
2807    case INDEX_op_shli_vec:
2808        tcg_out_insn(s, VRSa, VESL, a0, a2, TCG_REG_NONE, a1, vece);
2809        break;
2810    case INDEX_op_shri_vec:
2811        tcg_out_insn(s, VRSa, VESRL, a0, a2, TCG_REG_NONE, a1, vece);
2812        break;
2813    case INDEX_op_sari_vec:
2814        tcg_out_insn(s, VRSa, VESRA, a0, a2, TCG_REG_NONE, a1, vece);
2815        break;
2816    case INDEX_op_rotli_vec:
2817        tcg_out_insn(s, VRSa, VERLL, a0, a2, TCG_REG_NONE, a1, vece);
2818        break;
2819    case INDEX_op_shls_vec:
2820        tcg_out_insn(s, VRSa, VESL, a0, 0, a2, a1, vece);
2821        break;
2822    case INDEX_op_shrs_vec:
2823        tcg_out_insn(s, VRSa, VESRL, a0, 0, a2, a1, vece);
2824        break;
2825    case INDEX_op_sars_vec:
2826        tcg_out_insn(s, VRSa, VESRA, a0, 0, a2, a1, vece);
2827        break;
2828    case INDEX_op_rotls_vec:
2829        tcg_out_insn(s, VRSa, VERLL, a0, 0, a2, a1, vece);
2830        break;
2831    case INDEX_op_shlv_vec:
2832        tcg_out_insn(s, VRRc, VESLV, a0, a1, a2, vece);
2833        break;
2834    case INDEX_op_shrv_vec:
2835        tcg_out_insn(s, VRRc, VESRLV, a0, a1, a2, vece);
2836        break;
2837    case INDEX_op_sarv_vec:
2838        tcg_out_insn(s, VRRc, VESRAV, a0, a1, a2, vece);
2839        break;
2840    case INDEX_op_rotlv_vec:
2841        tcg_out_insn(s, VRRc, VERLLV, a0, a1, a2, vece);
2842        break;
2843
2844    case INDEX_op_smin_vec:
2845        tcg_out_insn(s, VRRc, VMN, a0, a1, a2, vece);
2846        break;
2847    case INDEX_op_smax_vec:
2848        tcg_out_insn(s, VRRc, VMX, a0, a1, a2, vece);
2849        break;
2850    case INDEX_op_umin_vec:
2851        tcg_out_insn(s, VRRc, VMNL, a0, a1, a2, vece);
2852        break;
2853    case INDEX_op_umax_vec:
2854        tcg_out_insn(s, VRRc, VMXL, a0, a1, a2, vece);
2855        break;
2856
2857    case INDEX_op_bitsel_vec:
2858        tcg_out_insn(s, VRRe, VSEL, a0, a2, args[3], a1);
2859        break;
2860
2861    case INDEX_op_cmp_vec:
2862        switch ((TCGCond)args[3]) {
2863        case TCG_COND_EQ:
2864            tcg_out_insn(s, VRRc, VCEQ, a0, a1, a2, vece);
2865            break;
2866        case TCG_COND_GT:
2867            tcg_out_insn(s, VRRc, VCH, a0, a1, a2, vece);
2868            break;
2869        case TCG_COND_GTU:
2870            tcg_out_insn(s, VRRc, VCHL, a0, a1, a2, vece);
2871            break;
2872        default:
2873            g_assert_not_reached();
2874        }
2875        break;
2876
2877    case INDEX_op_s390_vuph_vec:
2878        tcg_out_insn(s, VRRa, VUPH, a0, a1, vece);
2879        break;
2880    case INDEX_op_s390_vupl_vec:
2881        tcg_out_insn(s, VRRa, VUPL, a0, a1, vece);
2882        break;
2883    case INDEX_op_s390_vpks_vec:
2884        tcg_out_insn(s, VRRc, VPKS, a0, a1, a2, vece);
2885        break;
2886
2887    case INDEX_op_mov_vec:   /* Always emitted via tcg_out_mov.  */
2888    case INDEX_op_dup_vec:   /* Always emitted via tcg_out_dup_vec.  */
2889    default:
2890        g_assert_not_reached();
2891    }
2892}
2893
2894int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2895{
2896    switch (opc) {
2897    case INDEX_op_abs_vec:
2898    case INDEX_op_add_vec:
2899    case INDEX_op_and_vec:
2900    case INDEX_op_andc_vec:
2901    case INDEX_op_bitsel_vec:
2902    case INDEX_op_eqv_vec:
2903    case INDEX_op_nand_vec:
2904    case INDEX_op_neg_vec:
2905    case INDEX_op_nor_vec:
2906    case INDEX_op_not_vec:
2907    case INDEX_op_or_vec:
2908    case INDEX_op_orc_vec:
2909    case INDEX_op_rotli_vec:
2910    case INDEX_op_rotls_vec:
2911    case INDEX_op_rotlv_vec:
2912    case INDEX_op_sari_vec:
2913    case INDEX_op_sars_vec:
2914    case INDEX_op_sarv_vec:
2915    case INDEX_op_shli_vec:
2916    case INDEX_op_shls_vec:
2917    case INDEX_op_shlv_vec:
2918    case INDEX_op_shri_vec:
2919    case INDEX_op_shrs_vec:
2920    case INDEX_op_shrv_vec:
2921    case INDEX_op_smax_vec:
2922    case INDEX_op_smin_vec:
2923    case INDEX_op_sub_vec:
2924    case INDEX_op_umax_vec:
2925    case INDEX_op_umin_vec:
2926    case INDEX_op_xor_vec:
2927        return 1;
2928    case INDEX_op_cmp_vec:
2929    case INDEX_op_cmpsel_vec:
2930    case INDEX_op_rotrv_vec:
2931        return -1;
2932    case INDEX_op_mul_vec:
2933        return vece < MO_64;
2934    case INDEX_op_ssadd_vec:
2935    case INDEX_op_sssub_vec:
2936        return vece < MO_64 ? -1 : 0;
2937    default:
2938        return 0;
2939    }
2940}
2941
2942static bool expand_vec_cmp_noinv(TCGType type, unsigned vece, TCGv_vec v0,
2943                                 TCGv_vec v1, TCGv_vec v2, TCGCond cond)
2944{
2945    bool need_swap = false, need_inv = false;
2946
2947    switch (cond) {
2948    case TCG_COND_EQ:
2949    case TCG_COND_GT:
2950    case TCG_COND_GTU:
2951        break;
2952    case TCG_COND_NE:
2953    case TCG_COND_LE:
2954    case TCG_COND_LEU:
2955        need_inv = true;
2956        break;
2957    case TCG_COND_LT:
2958    case TCG_COND_LTU:
2959        need_swap = true;
2960        break;
2961    case TCG_COND_GE:
2962    case TCG_COND_GEU:
2963        need_swap = need_inv = true;
2964        break;
2965    default:
2966        g_assert_not_reached();
2967    }
2968
2969    if (need_inv) {
2970        cond = tcg_invert_cond(cond);
2971    }
2972    if (need_swap) {
2973        TCGv_vec t1;
2974        t1 = v1, v1 = v2, v2 = t1;
2975        cond = tcg_swap_cond(cond);
2976    }
2977
2978    vec_gen_4(INDEX_op_cmp_vec, type, vece, tcgv_vec_arg(v0),
2979              tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond);
2980
2981    return need_inv;
2982}
2983
2984static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
2985                           TCGv_vec v1, TCGv_vec v2, TCGCond cond)
2986{
2987    if (expand_vec_cmp_noinv(type, vece, v0, v1, v2, cond)) {
2988        tcg_gen_not_vec(vece, v0, v0);
2989    }
2990}
2991
2992static void expand_vec_cmpsel(TCGType type, unsigned vece, TCGv_vec v0,
2993                              TCGv_vec c1, TCGv_vec c2,
2994                              TCGv_vec v3, TCGv_vec v4, TCGCond cond)
2995{
2996    TCGv_vec t = tcg_temp_new_vec(type);
2997
2998    if (expand_vec_cmp_noinv(type, vece, t, c1, c2, cond)) {
2999        /* Invert the sense of the compare by swapping arguments.  */
3000        tcg_gen_bitsel_vec(vece, v0, t, v4, v3);
3001    } else {
3002        tcg_gen_bitsel_vec(vece, v0, t, v3, v4);
3003    }
3004    tcg_temp_free_vec(t);
3005}
3006
3007static void expand_vec_sat(TCGType type, unsigned vece, TCGv_vec v0,
3008                           TCGv_vec v1, TCGv_vec v2, TCGOpcode add_sub_opc)
3009{
3010    TCGv_vec h1 = tcg_temp_new_vec(type);
3011    TCGv_vec h2 = tcg_temp_new_vec(type);
3012    TCGv_vec l1 = tcg_temp_new_vec(type);
3013    TCGv_vec l2 = tcg_temp_new_vec(type);
3014
3015    tcg_debug_assert (vece < MO_64);
3016
3017    /* Unpack with sign-extension. */
3018    vec_gen_2(INDEX_op_s390_vuph_vec, type, vece,
3019              tcgv_vec_arg(h1), tcgv_vec_arg(v1));
3020    vec_gen_2(INDEX_op_s390_vuph_vec, type, vece,
3021              tcgv_vec_arg(h2), tcgv_vec_arg(v2));
3022
3023    vec_gen_2(INDEX_op_s390_vupl_vec, type, vece,
3024              tcgv_vec_arg(l1), tcgv_vec_arg(v1));
3025    vec_gen_2(INDEX_op_s390_vupl_vec, type, vece,
3026              tcgv_vec_arg(l2), tcgv_vec_arg(v2));
3027
3028    /* Arithmetic on a wider element size. */
3029    vec_gen_3(add_sub_opc, type, vece + 1, tcgv_vec_arg(h1),
3030              tcgv_vec_arg(h1), tcgv_vec_arg(h2));
3031    vec_gen_3(add_sub_opc, type, vece + 1, tcgv_vec_arg(l1),
3032              tcgv_vec_arg(l1), tcgv_vec_arg(l2));
3033
3034    /* Pack with saturation. */
3035    vec_gen_3(INDEX_op_s390_vpks_vec, type, vece + 1,
3036              tcgv_vec_arg(v0), tcgv_vec_arg(h1), tcgv_vec_arg(l1));
3037
3038    tcg_temp_free_vec(h1);
3039    tcg_temp_free_vec(h2);
3040    tcg_temp_free_vec(l1);
3041    tcg_temp_free_vec(l2);
3042}
3043
3044void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
3045                       TCGArg a0, ...)
3046{
3047    va_list va;
3048    TCGv_vec v0, v1, v2, v3, v4, t0;
3049
3050    va_start(va, a0);
3051    v0 = temp_tcgv_vec(arg_temp(a0));
3052    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3053    v2 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3054
3055    switch (opc) {
3056    case INDEX_op_cmp_vec:
3057        expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
3058        break;
3059
3060    case INDEX_op_cmpsel_vec:
3061        v3 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3062        v4 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3063        expand_vec_cmpsel(type, vece, v0, v1, v2, v3, v4, va_arg(va, TCGArg));
3064        break;
3065
3066    case INDEX_op_rotrv_vec:
3067        t0 = tcg_temp_new_vec(type);
3068        tcg_gen_neg_vec(vece, t0, v2);
3069        tcg_gen_rotlv_vec(vece, v0, v1, t0);
3070        tcg_temp_free_vec(t0);
3071        break;
3072
3073    case INDEX_op_ssadd_vec:
3074        expand_vec_sat(type, vece, v0, v1, v2, INDEX_op_add_vec);
3075        break;
3076    case INDEX_op_sssub_vec:
3077        expand_vec_sat(type, vece, v0, v1, v2, INDEX_op_sub_vec);
3078        break;
3079
3080    default:
3081        g_assert_not_reached();
3082    }
3083    va_end(va);
3084}
3085
3086static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
3087{
3088    switch (op) {
3089    case INDEX_op_goto_ptr:
3090        return C_O0_I1(r);
3091
3092    case INDEX_op_ld8u_i32:
3093    case INDEX_op_ld8u_i64:
3094    case INDEX_op_ld8s_i32:
3095    case INDEX_op_ld8s_i64:
3096    case INDEX_op_ld16u_i32:
3097    case INDEX_op_ld16u_i64:
3098    case INDEX_op_ld16s_i32:
3099    case INDEX_op_ld16s_i64:
3100    case INDEX_op_ld_i32:
3101    case INDEX_op_ld32u_i64:
3102    case INDEX_op_ld32s_i64:
3103    case INDEX_op_ld_i64:
3104        return C_O1_I1(r, r);
3105
3106    case INDEX_op_st8_i32:
3107    case INDEX_op_st8_i64:
3108    case INDEX_op_st16_i32:
3109    case INDEX_op_st16_i64:
3110    case INDEX_op_st_i32:
3111    case INDEX_op_st32_i64:
3112    case INDEX_op_st_i64:
3113        return C_O0_I2(r, r);
3114
3115    case INDEX_op_add_i32:
3116    case INDEX_op_add_i64:
3117    case INDEX_op_shl_i64:
3118    case INDEX_op_shr_i64:
3119    case INDEX_op_sar_i64:
3120    case INDEX_op_rotl_i32:
3121    case INDEX_op_rotl_i64:
3122    case INDEX_op_rotr_i32:
3123    case INDEX_op_rotr_i64:
3124    case INDEX_op_setcond_i32:
3125        return C_O1_I2(r, r, ri);
3126    case INDEX_op_setcond_i64:
3127        return C_O1_I2(r, r, rA);
3128
3129    case INDEX_op_clz_i64:
3130        return C_O1_I2(r, r, rI);
3131
3132    case INDEX_op_sub_i32:
3133    case INDEX_op_sub_i64:
3134    case INDEX_op_and_i32:
3135    case INDEX_op_or_i32:
3136    case INDEX_op_xor_i32:
3137        return C_O1_I2(r, r, ri);
3138    case INDEX_op_and_i64:
3139        return C_O1_I2(r, r, rNKR);
3140    case INDEX_op_or_i64:
3141    case INDEX_op_xor_i64:
3142        return C_O1_I2(r, r, rK);
3143
3144    case INDEX_op_andc_i32:
3145    case INDEX_op_orc_i32:
3146    case INDEX_op_eqv_i32:
3147        return C_O1_I2(r, r, ri);
3148    case INDEX_op_andc_i64:
3149        return C_O1_I2(r, r, rKR);
3150    case INDEX_op_orc_i64:
3151    case INDEX_op_eqv_i64:
3152        return C_O1_I2(r, r, rNK);
3153
3154    case INDEX_op_nand_i32:
3155    case INDEX_op_nand_i64:
3156    case INDEX_op_nor_i32:
3157    case INDEX_op_nor_i64:
3158        return C_O1_I2(r, r, r);
3159
3160    case INDEX_op_mul_i32:
3161        return (HAVE_FACILITY(MISC_INSN_EXT2)
3162                ? C_O1_I2(r, r, ri)
3163                : C_O1_I2(r, 0, ri));
3164    case INDEX_op_mul_i64:
3165        return (HAVE_FACILITY(MISC_INSN_EXT2)
3166                ? C_O1_I2(r, r, rJ)
3167                : C_O1_I2(r, 0, rJ));
3168
3169    case INDEX_op_shl_i32:
3170    case INDEX_op_shr_i32:
3171    case INDEX_op_sar_i32:
3172        return C_O1_I2(r, r, ri);
3173
3174    case INDEX_op_brcond_i32:
3175        return C_O0_I2(r, ri);
3176    case INDEX_op_brcond_i64:
3177        return C_O0_I2(r, rA);
3178
3179    case INDEX_op_bswap16_i32:
3180    case INDEX_op_bswap16_i64:
3181    case INDEX_op_bswap32_i32:
3182    case INDEX_op_bswap32_i64:
3183    case INDEX_op_bswap64_i64:
3184    case INDEX_op_neg_i32:
3185    case INDEX_op_neg_i64:
3186    case INDEX_op_not_i32:
3187    case INDEX_op_not_i64:
3188    case INDEX_op_ext8s_i32:
3189    case INDEX_op_ext8s_i64:
3190    case INDEX_op_ext8u_i32:
3191    case INDEX_op_ext8u_i64:
3192    case INDEX_op_ext16s_i32:
3193    case INDEX_op_ext16s_i64:
3194    case INDEX_op_ext16u_i32:
3195    case INDEX_op_ext16u_i64:
3196    case INDEX_op_ext32s_i64:
3197    case INDEX_op_ext32u_i64:
3198    case INDEX_op_ext_i32_i64:
3199    case INDEX_op_extu_i32_i64:
3200    case INDEX_op_extract_i32:
3201    case INDEX_op_extract_i64:
3202    case INDEX_op_ctpop_i32:
3203    case INDEX_op_ctpop_i64:
3204        return C_O1_I1(r, r);
3205
3206    case INDEX_op_qemu_ld_i32:
3207    case INDEX_op_qemu_ld_i64:
3208        return C_O1_I1(r, L);
3209    case INDEX_op_qemu_st_i64:
3210    case INDEX_op_qemu_st_i32:
3211        return C_O0_I2(L, L);
3212
3213    case INDEX_op_deposit_i32:
3214    case INDEX_op_deposit_i64:
3215        return C_O1_I2(r, rZ, r);
3216
3217    case INDEX_op_movcond_i32:
3218        return C_O1_I4(r, r, ri, rI, r);
3219    case INDEX_op_movcond_i64:
3220        return C_O1_I4(r, r, rA, rI, r);
3221
3222    case INDEX_op_div2_i32:
3223    case INDEX_op_div2_i64:
3224    case INDEX_op_divu2_i32:
3225    case INDEX_op_divu2_i64:
3226        return C_O2_I3(o, m, 0, 1, r);
3227
3228    case INDEX_op_mulu2_i64:
3229        return C_O2_I2(o, m, 0, r);
3230    case INDEX_op_muls2_i64:
3231        return C_O2_I2(o, m, r, r);
3232
3233    case INDEX_op_add2_i32:
3234    case INDEX_op_sub2_i32:
3235        return C_O2_I4(r, r, 0, 1, ri, r);
3236
3237    case INDEX_op_add2_i64:
3238    case INDEX_op_sub2_i64:
3239        return C_O2_I4(r, r, 0, 1, rA, r);
3240
3241    case INDEX_op_st_vec:
3242        return C_O0_I2(v, r);
3243    case INDEX_op_ld_vec:
3244    case INDEX_op_dupm_vec:
3245        return C_O1_I1(v, r);
3246    case INDEX_op_dup_vec:
3247        return C_O1_I1(v, vr);
3248    case INDEX_op_abs_vec:
3249    case INDEX_op_neg_vec:
3250    case INDEX_op_not_vec:
3251    case INDEX_op_rotli_vec:
3252    case INDEX_op_sari_vec:
3253    case INDEX_op_shli_vec:
3254    case INDEX_op_shri_vec:
3255    case INDEX_op_s390_vuph_vec:
3256    case INDEX_op_s390_vupl_vec:
3257        return C_O1_I1(v, v);
3258    case INDEX_op_add_vec:
3259    case INDEX_op_sub_vec:
3260    case INDEX_op_and_vec:
3261    case INDEX_op_andc_vec:
3262    case INDEX_op_or_vec:
3263    case INDEX_op_orc_vec:
3264    case INDEX_op_xor_vec:
3265    case INDEX_op_nand_vec:
3266    case INDEX_op_nor_vec:
3267    case INDEX_op_eqv_vec:
3268    case INDEX_op_cmp_vec:
3269    case INDEX_op_mul_vec:
3270    case INDEX_op_rotlv_vec:
3271    case INDEX_op_rotrv_vec:
3272    case INDEX_op_shlv_vec:
3273    case INDEX_op_shrv_vec:
3274    case INDEX_op_sarv_vec:
3275    case INDEX_op_smax_vec:
3276    case INDEX_op_smin_vec:
3277    case INDEX_op_umax_vec:
3278    case INDEX_op_umin_vec:
3279    case INDEX_op_s390_vpks_vec:
3280        return C_O1_I2(v, v, v);
3281    case INDEX_op_rotls_vec:
3282    case INDEX_op_shls_vec:
3283    case INDEX_op_shrs_vec:
3284    case INDEX_op_sars_vec:
3285        return C_O1_I2(v, v, r);
3286    case INDEX_op_bitsel_vec:
3287        return C_O1_I3(v, v, v, v);
3288
3289    default:
3290        g_assert_not_reached();
3291    }
3292}
3293
3294/*
3295 * Mainline glibc added HWCAP_S390_VX before it was kernel abi.
3296 * Some distros have fixed this up locally, others have not.
3297 */
3298#ifndef HWCAP_S390_VXRS
3299#define HWCAP_S390_VXRS 2048
3300#endif
3301
3302static void query_s390_facilities(void)
3303{
3304    unsigned long hwcap = qemu_getauxval(AT_HWCAP);
3305    const char *which;
3306
3307    /* Is STORE FACILITY LIST EXTENDED available?  Honestly, I believe this
3308       is present on all 64-bit systems, but let's check for it anyway.  */
3309    if (hwcap & HWCAP_S390_STFLE) {
3310        register int r0 __asm__("0") = ARRAY_SIZE(s390_facilities) - 1;
3311        register void *r1 __asm__("1") = s390_facilities;
3312
3313        /* stfle 0(%r1) */
3314        asm volatile(".word 0xb2b0,0x1000"
3315                     : "=r"(r0) : "r"(r0), "r"(r1) : "memory", "cc");
3316    }
3317
3318    /*
3319     * Use of vector registers requires os support beyond the facility bit.
3320     * If the kernel does not advertise support, disable the facility bits.
3321     * There is nothing else we currently care about in the 3rd word, so
3322     * disable VECTOR with one store.
3323     */
3324    if (!(hwcap & HWCAP_S390_VXRS)) {
3325        s390_facilities[2] = 0;
3326    }
3327
3328    /*
3329     * Minimum supported cpu revision is z196.
3330     * Check for all required facilities.
3331     * ZARCH_ACTIVE is done via preprocessor check for 64-bit.
3332     */
3333    if (!HAVE_FACILITY(LONG_DISP)) {
3334        which = "long-displacement";
3335        goto fail;
3336    }
3337    if (!HAVE_FACILITY(EXT_IMM)) {
3338        which = "extended-immediate";
3339        goto fail;
3340    }
3341    if (!HAVE_FACILITY(GEN_INST_EXT)) {
3342        which = "general-instructions-extension";
3343        goto fail;
3344    }
3345    /*
3346     * Facility 45 is a big bin that contains: distinct-operands,
3347     * fast-BCR-serialization, high-word, population-count,
3348     * interlocked-access-1, and load/store-on-condition-1
3349     */
3350    if (!HAVE_FACILITY(45)) {
3351        which = "45";
3352        goto fail;
3353    }
3354    return;
3355
3356 fail:
3357    error_report("%s: missing required facility %s", __func__, which);
3358    exit(EXIT_FAILURE);
3359}
3360
3361static void tcg_target_init(TCGContext *s)
3362{
3363    query_s390_facilities();
3364
3365    tcg_target_available_regs[TCG_TYPE_I32] = 0xffff;
3366    tcg_target_available_regs[TCG_TYPE_I64] = 0xffff;
3367    if (HAVE_FACILITY(VECTOR)) {
3368        tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
3369        tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
3370    }
3371
3372    tcg_target_call_clobber_regs = 0;
3373    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
3374    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R1);
3375    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2);
3376    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3);
3377    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4);
3378    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5);
3379    /* The r6 register is technically call-saved, but it's also a parameter
3380       register, so it can get killed by setup for the qemu_st helper.  */
3381    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6);
3382    /* The return register can be considered call-clobbered.  */
3383    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R14);
3384
3385    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0);
3386    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1);
3387    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2);
3388    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3);
3389    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4);
3390    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5);
3391    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6);
3392    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7);
3393    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16);
3394    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17);
3395    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18);
3396    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19);
3397    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V20);
3398    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V21);
3399    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V22);
3400    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V23);
3401    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V24);
3402    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V25);
3403    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V26);
3404    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V27);
3405    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V28);
3406    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V29);
3407    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V30);
3408    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V31);
3409
3410    s->reserved_regs = 0;
3411    tcg_regset_set_reg(s->reserved_regs, TCG_TMP0);
3412    /* XXX many insns can't be used with R0, so we better avoid it for now */
3413    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0);
3414    tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
3415}
3416
3417#define FRAME_SIZE  ((int)(TCG_TARGET_CALL_STACK_OFFSET          \
3418                           + TCG_STATIC_CALL_ARGS_SIZE           \
3419                           + CPU_TEMP_BUF_NLONGS * sizeof(long)))
3420
3421static void tcg_target_qemu_prologue(TCGContext *s)
3422{
3423    /* stmg %r6,%r15,48(%r15) (save registers) */
3424    tcg_out_insn(s, RXY, STMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15, 48);
3425
3426    /* aghi %r15,-frame_size */
3427    tcg_out_insn(s, RI, AGHI, TCG_REG_R15, -FRAME_SIZE);
3428
3429    tcg_set_frame(s, TCG_REG_CALL_STACK,
3430                  TCG_STATIC_CALL_ARGS_SIZE + TCG_TARGET_CALL_STACK_OFFSET,
3431                  CPU_TEMP_BUF_NLONGS * sizeof(long));
3432
3433#ifndef CONFIG_SOFTMMU
3434    if (guest_base >= 0x80000) {
3435        tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base);
3436        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
3437    }
3438#endif
3439
3440    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
3441
3442    /* br %r3 (go to TB) */
3443    tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, tcg_target_call_iarg_regs[1]);
3444
3445    /*
3446     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
3447     * and fall through to the rest of the epilogue.
3448     */
3449    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
3450    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, 0);
3451
3452    /* TB epilogue */
3453    tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
3454
3455    /* lmg %r6,%r15,fs+48(%r15) (restore registers) */
3456    tcg_out_insn(s, RXY, LMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15,
3457                 FRAME_SIZE + 48);
3458
3459    /* br %r14 (return) */
3460    tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_R14);
3461}
3462
3463static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
3464{
3465    memset(p, 0x07, count * sizeof(tcg_insn_unit));
3466}
3467
3468typedef struct {
3469    DebugFrameHeader h;
3470    uint8_t fde_def_cfa[4];
3471    uint8_t fde_reg_ofs[18];
3472} DebugFrame;
3473
3474/* We're expecting a 2 byte uleb128 encoded value.  */
3475QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
3476
3477#define ELF_HOST_MACHINE  EM_S390
3478
3479static const DebugFrame debug_frame = {
3480    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3481    .h.cie.id = -1,
3482    .h.cie.version = 1,
3483    .h.cie.code_align = 1,
3484    .h.cie.data_align = 8,                /* sleb128 8 */
3485    .h.cie.return_column = TCG_REG_R14,
3486
3487    /* Total FDE size does not include the "len" member.  */
3488    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3489
3490    .fde_def_cfa = {
3491        12, TCG_REG_CALL_STACK,         /* DW_CFA_def_cfa %r15, ... */
3492        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
3493        (FRAME_SIZE >> 7)
3494    },
3495    .fde_reg_ofs = {
3496        0x86, 6,                        /* DW_CFA_offset, %r6, 48 */
3497        0x87, 7,                        /* DW_CFA_offset, %r7, 56 */
3498        0x88, 8,                        /* DW_CFA_offset, %r8, 64 */
3499        0x89, 9,                        /* DW_CFA_offset, %r92, 72 */
3500        0x8a, 10,                       /* DW_CFA_offset, %r10, 80 */
3501        0x8b, 11,                       /* DW_CFA_offset, %r11, 88 */
3502        0x8c, 12,                       /* DW_CFA_offset, %r12, 96 */
3503        0x8d, 13,                       /* DW_CFA_offset, %r13, 104 */
3504        0x8e, 14,                       /* DW_CFA_offset, %r14, 112 */
3505    }
3506};
3507
3508void tcg_register_jit(const void *buf, size_t buf_size)
3509{
3510    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3511}
3512