xref: /qemu/tcg/s390x/tcg-target.c.inc (revision dbd9e084)
1/*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2009 Ulrich Hecht <uli@suse.de>
5 * Copyright (c) 2009 Alexander Graf <agraf@suse.de>
6 * Copyright (c) 2010 Richard Henderson <rth@twiddle.net>
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a copy
9 * of this software and associated documentation files (the "Software"), to deal
10 * in the Software without restriction, including without limitation the rights
11 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 * copies of the Software, and to permit persons to whom the Software is
13 * furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 * THE SOFTWARE.
25 */
26
27/* We only support generating code for 64-bit mode.  */
28#if TCG_TARGET_REG_BITS != 64
29#error "unsupported code generation mode"
30#endif
31
32#include "../tcg-pool.c.inc"
33#include "elf.h"
34
35/* ??? The translation blocks produced by TCG are generally small enough to
36   be entirely reachable with a 16-bit displacement.  Leaving the option for
37   a 32-bit displacement here Just In Case.  */
38#define USE_LONG_BRANCHES 0
39
40#define TCG_CT_CONST_S16   0x100
41#define TCG_CT_CONST_S32   0x200
42#define TCG_CT_CONST_S33   0x400
43#define TCG_CT_CONST_ZERO  0x800
44
45#define ALL_GENERAL_REGS     MAKE_64BIT_MASK(0, 16)
46#define ALL_VECTOR_REGS      MAKE_64BIT_MASK(32, 32)
47
48/*
49 * For softmmu, we need to avoid conflicts with the first 3
50 * argument registers to perform the tlb lookup, and to call
51 * the helper function.
52 */
53#ifdef CONFIG_SOFTMMU
54#define SOFTMMU_RESERVE_REGS MAKE_64BIT_MASK(TCG_REG_R2, 3)
55#else
56#define SOFTMMU_RESERVE_REGS 0
57#endif
58
59
60/* Several places within the instruction set 0 means "no register"
61   rather than TCG_REG_R0.  */
62#define TCG_REG_NONE    0
63
64/* A scratch register that may be be used throughout the backend.  */
65#define TCG_TMP0        TCG_REG_R1
66
67/* A scratch register that holds a pointer to the beginning of the TB.
68   We don't need this when we have pc-relative loads with the general
69   instructions extension facility.  */
70#define TCG_REG_TB      TCG_REG_R12
71#define USE_REG_TB      (!HAVE_FACILITY(GEN_INST_EXT))
72
73#ifndef CONFIG_SOFTMMU
74#define TCG_GUEST_BASE_REG TCG_REG_R13
75#endif
76
77/* All of the following instructions are prefixed with their instruction
78   format, and are defined as 8- or 16-bit quantities, even when the two
79   halves of the 16-bit quantity may appear 32 bits apart in the insn.
80   This makes it easy to copy the values from the tables in Appendix B.  */
81typedef enum S390Opcode {
82    RIL_AFI     = 0xc209,
83    RIL_AGFI    = 0xc208,
84    RIL_ALFI    = 0xc20b,
85    RIL_ALGFI   = 0xc20a,
86    RIL_BRASL   = 0xc005,
87    RIL_BRCL    = 0xc004,
88    RIL_CFI     = 0xc20d,
89    RIL_CGFI    = 0xc20c,
90    RIL_CLFI    = 0xc20f,
91    RIL_CLGFI   = 0xc20e,
92    RIL_CLRL    = 0xc60f,
93    RIL_CLGRL   = 0xc60a,
94    RIL_CRL     = 0xc60d,
95    RIL_CGRL    = 0xc608,
96    RIL_IIHF    = 0xc008,
97    RIL_IILF    = 0xc009,
98    RIL_LARL    = 0xc000,
99    RIL_LGFI    = 0xc001,
100    RIL_LGRL    = 0xc408,
101    RIL_LLIHF   = 0xc00e,
102    RIL_LLILF   = 0xc00f,
103    RIL_LRL     = 0xc40d,
104    RIL_MSFI    = 0xc201,
105    RIL_MSGFI   = 0xc200,
106    RIL_NIHF    = 0xc00a,
107    RIL_NILF    = 0xc00b,
108    RIL_OIHF    = 0xc00c,
109    RIL_OILF    = 0xc00d,
110    RIL_SLFI    = 0xc205,
111    RIL_SLGFI   = 0xc204,
112    RIL_XIHF    = 0xc006,
113    RIL_XILF    = 0xc007,
114
115    RI_AGHI     = 0xa70b,
116    RI_AHI      = 0xa70a,
117    RI_BRC      = 0xa704,
118    RI_CHI      = 0xa70e,
119    RI_CGHI     = 0xa70f,
120    RI_IIHH     = 0xa500,
121    RI_IIHL     = 0xa501,
122    RI_IILH     = 0xa502,
123    RI_IILL     = 0xa503,
124    RI_LGHI     = 0xa709,
125    RI_LLIHH    = 0xa50c,
126    RI_LLIHL    = 0xa50d,
127    RI_LLILH    = 0xa50e,
128    RI_LLILL    = 0xa50f,
129    RI_MGHI     = 0xa70d,
130    RI_MHI      = 0xa70c,
131    RI_NIHH     = 0xa504,
132    RI_NIHL     = 0xa505,
133    RI_NILH     = 0xa506,
134    RI_NILL     = 0xa507,
135    RI_OIHH     = 0xa508,
136    RI_OIHL     = 0xa509,
137    RI_OILH     = 0xa50a,
138    RI_OILL     = 0xa50b,
139
140    RIE_CGIJ    = 0xec7c,
141    RIE_CGRJ    = 0xec64,
142    RIE_CIJ     = 0xec7e,
143    RIE_CLGRJ   = 0xec65,
144    RIE_CLIJ    = 0xec7f,
145    RIE_CLGIJ   = 0xec7d,
146    RIE_CLRJ    = 0xec77,
147    RIE_CRJ     = 0xec76,
148    RIE_LOCGHI  = 0xec46,
149    RIE_RISBG   = 0xec55,
150
151    RRE_AGR     = 0xb908,
152    RRE_ALGR    = 0xb90a,
153    RRE_ALCR    = 0xb998,
154    RRE_ALCGR   = 0xb988,
155    RRE_CGR     = 0xb920,
156    RRE_CLGR    = 0xb921,
157    RRE_DLGR    = 0xb987,
158    RRE_DLR     = 0xb997,
159    RRE_DSGFR   = 0xb91d,
160    RRE_DSGR    = 0xb90d,
161    RRE_FLOGR   = 0xb983,
162    RRE_LGBR    = 0xb906,
163    RRE_LCGR    = 0xb903,
164    RRE_LGFR    = 0xb914,
165    RRE_LGHR    = 0xb907,
166    RRE_LGR     = 0xb904,
167    RRE_LLGCR   = 0xb984,
168    RRE_LLGFR   = 0xb916,
169    RRE_LLGHR   = 0xb985,
170    RRE_LRVR    = 0xb91f,
171    RRE_LRVGR   = 0xb90f,
172    RRE_LTGR    = 0xb902,
173    RRE_MLGR    = 0xb986,
174    RRE_MSGR    = 0xb90c,
175    RRE_MSR     = 0xb252,
176    RRE_NGR     = 0xb980,
177    RRE_OGR     = 0xb981,
178    RRE_SGR     = 0xb909,
179    RRE_SLGR    = 0xb90b,
180    RRE_SLBR    = 0xb999,
181    RRE_SLBGR   = 0xb989,
182    RRE_XGR     = 0xb982,
183
184    RRF_LOCR    = 0xb9f2,
185    RRF_LOCGR   = 0xb9e2,
186    RRF_NRK     = 0xb9f4,
187    RRF_NGRK    = 0xb9e4,
188    RRF_ORK     = 0xb9f6,
189    RRF_OGRK    = 0xb9e6,
190    RRF_SRK     = 0xb9f9,
191    RRF_SGRK    = 0xb9e9,
192    RRF_SLRK    = 0xb9fb,
193    RRF_SLGRK   = 0xb9eb,
194    RRF_XRK     = 0xb9f7,
195    RRF_XGRK    = 0xb9e7,
196
197    RR_AR       = 0x1a,
198    RR_ALR      = 0x1e,
199    RR_BASR     = 0x0d,
200    RR_BCR      = 0x07,
201    RR_CLR      = 0x15,
202    RR_CR       = 0x19,
203    RR_DR       = 0x1d,
204    RR_LCR      = 0x13,
205    RR_LR       = 0x18,
206    RR_LTR      = 0x12,
207    RR_NR       = 0x14,
208    RR_OR       = 0x16,
209    RR_SR       = 0x1b,
210    RR_SLR      = 0x1f,
211    RR_XR       = 0x17,
212
213    RSY_RLL     = 0xeb1d,
214    RSY_RLLG    = 0xeb1c,
215    RSY_SLLG    = 0xeb0d,
216    RSY_SLLK    = 0xebdf,
217    RSY_SRAG    = 0xeb0a,
218    RSY_SRAK    = 0xebdc,
219    RSY_SRLG    = 0xeb0c,
220    RSY_SRLK    = 0xebde,
221
222    RS_SLL      = 0x89,
223    RS_SRA      = 0x8a,
224    RS_SRL      = 0x88,
225
226    RXY_AG      = 0xe308,
227    RXY_AY      = 0xe35a,
228    RXY_CG      = 0xe320,
229    RXY_CLG     = 0xe321,
230    RXY_CLY     = 0xe355,
231    RXY_CY      = 0xe359,
232    RXY_LAY     = 0xe371,
233    RXY_LB      = 0xe376,
234    RXY_LG      = 0xe304,
235    RXY_LGB     = 0xe377,
236    RXY_LGF     = 0xe314,
237    RXY_LGH     = 0xe315,
238    RXY_LHY     = 0xe378,
239    RXY_LLGC    = 0xe390,
240    RXY_LLGF    = 0xe316,
241    RXY_LLGH    = 0xe391,
242    RXY_LMG     = 0xeb04,
243    RXY_LRV     = 0xe31e,
244    RXY_LRVG    = 0xe30f,
245    RXY_LRVH    = 0xe31f,
246    RXY_LY      = 0xe358,
247    RXY_NG      = 0xe380,
248    RXY_OG      = 0xe381,
249    RXY_STCY    = 0xe372,
250    RXY_STG     = 0xe324,
251    RXY_STHY    = 0xe370,
252    RXY_STMG    = 0xeb24,
253    RXY_STRV    = 0xe33e,
254    RXY_STRVG   = 0xe32f,
255    RXY_STRVH   = 0xe33f,
256    RXY_STY     = 0xe350,
257    RXY_XG      = 0xe382,
258
259    RX_A        = 0x5a,
260    RX_C        = 0x59,
261    RX_L        = 0x58,
262    RX_LA       = 0x41,
263    RX_LH       = 0x48,
264    RX_ST       = 0x50,
265    RX_STC      = 0x42,
266    RX_STH      = 0x40,
267
268    VRIa_VGBM   = 0xe744,
269    VRIa_VREPI  = 0xe745,
270    VRIb_VGM    = 0xe746,
271    VRIc_VREP   = 0xe74d,
272
273    VRRa_VLC    = 0xe7de,
274    VRRa_VLP    = 0xe7df,
275    VRRa_VLR    = 0xe756,
276    VRRc_VA     = 0xe7f3,
277    VRRc_VCEQ   = 0xe7f8,   /* we leave the m5 cs field 0 */
278    VRRc_VCH    = 0xe7fb,   /* " */
279    VRRc_VCHL   = 0xe7f9,   /* " */
280    VRRc_VERLLV = 0xe773,
281    VRRc_VESLV  = 0xe770,
282    VRRc_VESRAV = 0xe77a,
283    VRRc_VESRLV = 0xe778,
284    VRRc_VML    = 0xe7a2,
285    VRRc_VMN    = 0xe7fe,
286    VRRc_VMNL   = 0xe7fc,
287    VRRc_VMX    = 0xe7ff,
288    VRRc_VMXL   = 0xe7fd,
289    VRRc_VN     = 0xe768,
290    VRRc_VNC    = 0xe769,
291    VRRc_VNO    = 0xe76b,
292    VRRc_VO     = 0xe76a,
293    VRRc_VOC    = 0xe76f,
294    VRRc_VPKS   = 0xe797,   /* we leave the m5 cs field 0 */
295    VRRc_VS     = 0xe7f7,
296    VRRa_VUPH   = 0xe7d7,
297    VRRa_VUPL   = 0xe7d6,
298    VRRc_VX     = 0xe76d,
299    VRRe_VSEL   = 0xe78d,
300    VRRf_VLVGP  = 0xe762,
301
302    VRSa_VERLL  = 0xe733,
303    VRSa_VESL   = 0xe730,
304    VRSa_VESRA  = 0xe73a,
305    VRSa_VESRL  = 0xe738,
306    VRSb_VLVG   = 0xe722,
307    VRSc_VLGV   = 0xe721,
308
309    VRX_VL      = 0xe706,
310    VRX_VLLEZ   = 0xe704,
311    VRX_VLREP   = 0xe705,
312    VRX_VST     = 0xe70e,
313    VRX_VSTEF   = 0xe70b,
314    VRX_VSTEG   = 0xe70a,
315
316    NOP         = 0x0707,
317} S390Opcode;
318
319#ifdef CONFIG_DEBUG_TCG
320static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
321    "%r0",  "%r1",  "%r2",  "%r3",  "%r4",  "%r5",  "%r6",  "%r7",
322    "%r8",  "%r9",  "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
323    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
324    "%v0",  "%v1",  "%v2",  "%v3",  "%v4",  "%v5",  "%v6",  "%v7",
325    "%v8",  "%v9",  "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
326    "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
327    "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
328};
329#endif
330
331/* Since R6 is a potential argument register, choose it last of the
332   call-saved registers.  Likewise prefer the call-clobbered registers
333   in reverse order to maximize the chance of avoiding the arguments.  */
334static const int tcg_target_reg_alloc_order[] = {
335    /* Call saved registers.  */
336    TCG_REG_R13,
337    TCG_REG_R12,
338    TCG_REG_R11,
339    TCG_REG_R10,
340    TCG_REG_R9,
341    TCG_REG_R8,
342    TCG_REG_R7,
343    TCG_REG_R6,
344    /* Call clobbered registers.  */
345    TCG_REG_R14,
346    TCG_REG_R0,
347    TCG_REG_R1,
348    /* Argument registers, in reverse order of allocation.  */
349    TCG_REG_R5,
350    TCG_REG_R4,
351    TCG_REG_R3,
352    TCG_REG_R2,
353
354    /* V8-V15 are call saved, and omitted. */
355    TCG_REG_V0,
356    TCG_REG_V1,
357    TCG_REG_V2,
358    TCG_REG_V3,
359    TCG_REG_V4,
360    TCG_REG_V5,
361    TCG_REG_V6,
362    TCG_REG_V7,
363    TCG_REG_V16,
364    TCG_REG_V17,
365    TCG_REG_V18,
366    TCG_REG_V19,
367    TCG_REG_V20,
368    TCG_REG_V21,
369    TCG_REG_V22,
370    TCG_REG_V23,
371    TCG_REG_V24,
372    TCG_REG_V25,
373    TCG_REG_V26,
374    TCG_REG_V27,
375    TCG_REG_V28,
376    TCG_REG_V29,
377    TCG_REG_V30,
378    TCG_REG_V31,
379};
380
381static const int tcg_target_call_iarg_regs[] = {
382    TCG_REG_R2,
383    TCG_REG_R3,
384    TCG_REG_R4,
385    TCG_REG_R5,
386    TCG_REG_R6,
387};
388
389static const int tcg_target_call_oarg_regs[] = {
390    TCG_REG_R2,
391};
392
393#define S390_CC_EQ      8
394#define S390_CC_LT      4
395#define S390_CC_GT      2
396#define S390_CC_OV      1
397#define S390_CC_NE      (S390_CC_LT | S390_CC_GT)
398#define S390_CC_LE      (S390_CC_LT | S390_CC_EQ)
399#define S390_CC_GE      (S390_CC_GT | S390_CC_EQ)
400#define S390_CC_NEVER   0
401#define S390_CC_ALWAYS  15
402
403/* Condition codes that result from a COMPARE and COMPARE LOGICAL.  */
404static const uint8_t tcg_cond_to_s390_cond[] = {
405    [TCG_COND_EQ]  = S390_CC_EQ,
406    [TCG_COND_NE]  = S390_CC_NE,
407    [TCG_COND_LT]  = S390_CC_LT,
408    [TCG_COND_LE]  = S390_CC_LE,
409    [TCG_COND_GT]  = S390_CC_GT,
410    [TCG_COND_GE]  = S390_CC_GE,
411    [TCG_COND_LTU] = S390_CC_LT,
412    [TCG_COND_LEU] = S390_CC_LE,
413    [TCG_COND_GTU] = S390_CC_GT,
414    [TCG_COND_GEU] = S390_CC_GE,
415};
416
417/* Condition codes that result from a LOAD AND TEST.  Here, we have no
418   unsigned instruction variation, however since the test is vs zero we
419   can re-map the outcomes appropriately.  */
420static const uint8_t tcg_cond_to_ltr_cond[] = {
421    [TCG_COND_EQ]  = S390_CC_EQ,
422    [TCG_COND_NE]  = S390_CC_NE,
423    [TCG_COND_LT]  = S390_CC_LT,
424    [TCG_COND_LE]  = S390_CC_LE,
425    [TCG_COND_GT]  = S390_CC_GT,
426    [TCG_COND_GE]  = S390_CC_GE,
427    [TCG_COND_LTU] = S390_CC_NEVER,
428    [TCG_COND_LEU] = S390_CC_EQ,
429    [TCG_COND_GTU] = S390_CC_NE,
430    [TCG_COND_GEU] = S390_CC_ALWAYS,
431};
432
433#ifdef CONFIG_SOFTMMU
434static void * const qemu_ld_helpers[(MO_SSIZE | MO_BSWAP) + 1] = {
435    [MO_UB]   = helper_ret_ldub_mmu,
436    [MO_SB]   = helper_ret_ldsb_mmu,
437    [MO_LEUW] = helper_le_lduw_mmu,
438    [MO_LESW] = helper_le_ldsw_mmu,
439    [MO_LEUL] = helper_le_ldul_mmu,
440    [MO_LESL] = helper_le_ldsl_mmu,
441    [MO_LEQ]  = helper_le_ldq_mmu,
442    [MO_BEUW] = helper_be_lduw_mmu,
443    [MO_BESW] = helper_be_ldsw_mmu,
444    [MO_BEUL] = helper_be_ldul_mmu,
445    [MO_BESL] = helper_be_ldsl_mmu,
446    [MO_BEQ]  = helper_be_ldq_mmu,
447};
448
449static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
450    [MO_UB]   = helper_ret_stb_mmu,
451    [MO_LEUW] = helper_le_stw_mmu,
452    [MO_LEUL] = helper_le_stl_mmu,
453    [MO_LEQ]  = helper_le_stq_mmu,
454    [MO_BEUW] = helper_be_stw_mmu,
455    [MO_BEUL] = helper_be_stl_mmu,
456    [MO_BEQ]  = helper_be_stq_mmu,
457};
458#endif
459
460static const tcg_insn_unit *tb_ret_addr;
461uint64_t s390_facilities[3];
462
463static inline bool is_general_reg(TCGReg r)
464{
465    return r <= TCG_REG_R15;
466}
467
468static inline bool is_vector_reg(TCGReg r)
469{
470    return r >= TCG_REG_V0 && r <= TCG_REG_V31;
471}
472
473static bool patch_reloc(tcg_insn_unit *src_rw, int type,
474                        intptr_t value, intptr_t addend)
475{
476    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
477    intptr_t pcrel2;
478    uint32_t old;
479
480    value += addend;
481    pcrel2 = (tcg_insn_unit *)value - src_rx;
482
483    switch (type) {
484    case R_390_PC16DBL:
485        if (pcrel2 == (int16_t)pcrel2) {
486            tcg_patch16(src_rw, pcrel2);
487            return true;
488        }
489        break;
490    case R_390_PC32DBL:
491        if (pcrel2 == (int32_t)pcrel2) {
492            tcg_patch32(src_rw, pcrel2);
493            return true;
494        }
495        break;
496    case R_390_20:
497        if (value == sextract64(value, 0, 20)) {
498            old = *(uint32_t *)src_rw & 0xf00000ff;
499            old |= ((value & 0xfff) << 16) | ((value & 0xff000) >> 4);
500            tcg_patch32(src_rw, old);
501            return true;
502        }
503        break;
504    default:
505        g_assert_not_reached();
506    }
507    return false;
508}
509
510/* Test if a constant matches the constraint. */
511static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
512{
513    if (ct & TCG_CT_CONST) {
514        return 1;
515    }
516
517    if (type == TCG_TYPE_I32) {
518        val = (int32_t)val;
519    }
520
521    /* The following are mutually exclusive.  */
522    if (ct & TCG_CT_CONST_S16) {
523        return val == (int16_t)val;
524    } else if (ct & TCG_CT_CONST_S32) {
525        return val == (int32_t)val;
526    } else if (ct & TCG_CT_CONST_S33) {
527        return val >= -0xffffffffll && val <= 0xffffffffll;
528    } else if (ct & TCG_CT_CONST_ZERO) {
529        return val == 0;
530    }
531
532    return 0;
533}
534
535/* Emit instructions according to the given instruction format.  */
536
537static void tcg_out_insn_RR(TCGContext *s, S390Opcode op, TCGReg r1, TCGReg r2)
538{
539    tcg_out16(s, (op << 8) | (r1 << 4) | r2);
540}
541
542static void tcg_out_insn_RRE(TCGContext *s, S390Opcode op,
543                             TCGReg r1, TCGReg r2)
544{
545    tcg_out32(s, (op << 16) | (r1 << 4) | r2);
546}
547
548static void tcg_out_insn_RRF(TCGContext *s, S390Opcode op,
549                             TCGReg r1, TCGReg r2, int m3)
550{
551    tcg_out32(s, (op << 16) | (m3 << 12) | (r1 << 4) | r2);
552}
553
554static void tcg_out_insn_RI(TCGContext *s, S390Opcode op, TCGReg r1, int i2)
555{
556    tcg_out32(s, (op << 16) | (r1 << 20) | (i2 & 0xffff));
557}
558
559static void tcg_out_insn_RIE(TCGContext *s, S390Opcode op, TCGReg r1,
560                             int i2, int m3)
561{
562    tcg_out16(s, (op & 0xff00) | (r1 << 4) | m3);
563    tcg_out32(s, (i2 << 16) | (op & 0xff));
564}
565
566static void tcg_out_insn_RIL(TCGContext *s, S390Opcode op, TCGReg r1, int i2)
567{
568    tcg_out16(s, op | (r1 << 4));
569    tcg_out32(s, i2);
570}
571
572static void tcg_out_insn_RS(TCGContext *s, S390Opcode op, TCGReg r1,
573                            TCGReg b2, TCGReg r3, int disp)
574{
575    tcg_out32(s, (op << 24) | (r1 << 20) | (r3 << 16) | (b2 << 12)
576              | (disp & 0xfff));
577}
578
579static void tcg_out_insn_RSY(TCGContext *s, S390Opcode op, TCGReg r1,
580                             TCGReg b2, TCGReg r3, int disp)
581{
582    tcg_out16(s, (op & 0xff00) | (r1 << 4) | r3);
583    tcg_out32(s, (op & 0xff) | (b2 << 28)
584              | ((disp & 0xfff) << 16) | ((disp & 0xff000) >> 4));
585}
586
587#define tcg_out_insn_RX   tcg_out_insn_RS
588#define tcg_out_insn_RXY  tcg_out_insn_RSY
589
590static int RXB(TCGReg v1, TCGReg v2, TCGReg v3, TCGReg v4)
591{
592    /*
593     * Shift bit 4 of each regno to its corresponding bit of RXB.
594     * RXB itself begins at bit 8 of the instruction so 8 - 4 = 4
595     * is the left-shift of the 4th operand.
596     */
597    return ((v1 & 0x10) << (4 + 3))
598         | ((v2 & 0x10) << (4 + 2))
599         | ((v3 & 0x10) << (4 + 1))
600         | ((v4 & 0x10) << (4 + 0));
601}
602
603static void tcg_out_insn_VRIa(TCGContext *s, S390Opcode op,
604                              TCGReg v1, uint16_t i2, int m3)
605{
606    tcg_debug_assert(is_vector_reg(v1));
607    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4));
608    tcg_out16(s, i2);
609    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m3 << 12));
610}
611
612static void tcg_out_insn_VRIb(TCGContext *s, S390Opcode op,
613                              TCGReg v1, uint8_t i2, uint8_t i3, int m4)
614{
615    tcg_debug_assert(is_vector_reg(v1));
616    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4));
617    tcg_out16(s, (i2 << 8) | (i3 & 0xff));
618    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m4 << 12));
619}
620
621static void tcg_out_insn_VRIc(TCGContext *s, S390Opcode op,
622                              TCGReg v1, uint16_t i2, TCGReg v3, int m4)
623{
624    tcg_debug_assert(is_vector_reg(v1));
625    tcg_debug_assert(is_vector_reg(v3));
626    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v3 & 0xf));
627    tcg_out16(s, i2);
628    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, v3, 0) | (m4 << 12));
629}
630
631static void tcg_out_insn_VRRa(TCGContext *s, S390Opcode op,
632                              TCGReg v1, TCGReg v2, int m3)
633{
634    tcg_debug_assert(is_vector_reg(v1));
635    tcg_debug_assert(is_vector_reg(v2));
636    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
637    tcg_out32(s, (op & 0x00ff) | RXB(v1, v2, 0, 0) | (m3 << 12));
638}
639
640static void tcg_out_insn_VRRc(TCGContext *s, S390Opcode op,
641                              TCGReg v1, TCGReg v2, TCGReg v3, int m4)
642{
643    tcg_debug_assert(is_vector_reg(v1));
644    tcg_debug_assert(is_vector_reg(v2));
645    tcg_debug_assert(is_vector_reg(v3));
646    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
647    tcg_out16(s, v3 << 12);
648    tcg_out16(s, (op & 0x00ff) | RXB(v1, v2, v3, 0) | (m4 << 12));
649}
650
651static void tcg_out_insn_VRRe(TCGContext *s, S390Opcode op,
652                              TCGReg v1, TCGReg v2, TCGReg v3, TCGReg v4)
653{
654    tcg_debug_assert(is_vector_reg(v1));
655    tcg_debug_assert(is_vector_reg(v2));
656    tcg_debug_assert(is_vector_reg(v3));
657    tcg_debug_assert(is_vector_reg(v4));
658    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
659    tcg_out16(s, v3 << 12);
660    tcg_out16(s, (op & 0x00ff) | RXB(v1, v2, v3, v4) | (v4 << 12));
661}
662
663static void tcg_out_insn_VRRf(TCGContext *s, S390Opcode op,
664                              TCGReg v1, TCGReg r2, TCGReg r3)
665{
666    tcg_debug_assert(is_vector_reg(v1));
667    tcg_debug_assert(is_general_reg(r2));
668    tcg_debug_assert(is_general_reg(r3));
669    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | r2);
670    tcg_out16(s, r3 << 12);
671    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0));
672}
673
674static void tcg_out_insn_VRSa(TCGContext *s, S390Opcode op, TCGReg v1,
675                              intptr_t d2, TCGReg b2, TCGReg v3, int m4)
676{
677    tcg_debug_assert(is_vector_reg(v1));
678    tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
679    tcg_debug_assert(is_general_reg(b2));
680    tcg_debug_assert(is_vector_reg(v3));
681    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v3 & 0xf));
682    tcg_out16(s, b2 << 12 | d2);
683    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, v3, 0) | (m4 << 12));
684}
685
686static void tcg_out_insn_VRSb(TCGContext *s, S390Opcode op, TCGReg v1,
687                              intptr_t d2, TCGReg b2, TCGReg r3, int m4)
688{
689    tcg_debug_assert(is_vector_reg(v1));
690    tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
691    tcg_debug_assert(is_general_reg(b2));
692    tcg_debug_assert(is_general_reg(r3));
693    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | r3);
694    tcg_out16(s, b2 << 12 | d2);
695    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m4 << 12));
696}
697
698static void tcg_out_insn_VRSc(TCGContext *s, S390Opcode op, TCGReg r1,
699                              intptr_t d2, TCGReg b2, TCGReg v3, int m4)
700{
701    tcg_debug_assert(is_general_reg(r1));
702    tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
703    tcg_debug_assert(is_general_reg(b2));
704    tcg_debug_assert(is_vector_reg(v3));
705    tcg_out16(s, (op & 0xff00) | (r1 << 4) | (v3 & 0xf));
706    tcg_out16(s, b2 << 12 | d2);
707    tcg_out16(s, (op & 0x00ff) | RXB(0, 0, v3, 0) | (m4 << 12));
708}
709
710static void tcg_out_insn_VRX(TCGContext *s, S390Opcode op, TCGReg v1,
711                             TCGReg b2, TCGReg x2, intptr_t d2, int m3)
712{
713    tcg_debug_assert(is_vector_reg(v1));
714    tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
715    tcg_debug_assert(is_general_reg(x2));
716    tcg_debug_assert(is_general_reg(b2));
717    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | x2);
718    tcg_out16(s, (b2 << 12) | d2);
719    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m3 << 12));
720}
721
722/* Emit an opcode with "type-checking" of the format.  */
723#define tcg_out_insn(S, FMT, OP, ...) \
724    glue(tcg_out_insn_,FMT)(S, glue(glue(FMT,_),OP), ## __VA_ARGS__)
725
726
727/* emit 64-bit shifts */
728static void tcg_out_sh64(TCGContext* s, S390Opcode op, TCGReg dest,
729                         TCGReg src, TCGReg sh_reg, int sh_imm)
730{
731    tcg_out_insn_RSY(s, op, dest, sh_reg, src, sh_imm);
732}
733
734/* emit 32-bit shifts */
735static void tcg_out_sh32(TCGContext* s, S390Opcode op, TCGReg dest,
736                         TCGReg sh_reg, int sh_imm)
737{
738    tcg_out_insn_RS(s, op, dest, sh_reg, 0, sh_imm);
739}
740
741static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
742{
743    if (src == dst) {
744        return true;
745    }
746    switch (type) {
747    case TCG_TYPE_I32:
748        if (likely(is_general_reg(dst) && is_general_reg(src))) {
749            tcg_out_insn(s, RR, LR, dst, src);
750            break;
751        }
752        /* fallthru */
753
754    case TCG_TYPE_I64:
755        if (likely(is_general_reg(dst))) {
756            if (likely(is_general_reg(src))) {
757                tcg_out_insn(s, RRE, LGR, dst, src);
758            } else {
759                tcg_out_insn(s, VRSc, VLGV, dst, 0, 0, src, 3);
760            }
761            break;
762        } else if (is_general_reg(src)) {
763            tcg_out_insn(s, VRSb, VLVG, dst, 0, 0, src, 3);
764            break;
765        }
766        /* fallthru */
767
768    case TCG_TYPE_V64:
769    case TCG_TYPE_V128:
770        tcg_out_insn(s, VRRa, VLR, dst, src, 0);
771        break;
772
773    default:
774        g_assert_not_reached();
775    }
776    return true;
777}
778
779static const S390Opcode lli_insns[4] = {
780    RI_LLILL, RI_LLILH, RI_LLIHL, RI_LLIHH
781};
782
783static bool maybe_out_small_movi(TCGContext *s, TCGType type,
784                                 TCGReg ret, tcg_target_long sval)
785{
786    tcg_target_ulong uval = sval;
787    int i;
788
789    if (type == TCG_TYPE_I32) {
790        uval = (uint32_t)sval;
791        sval = (int32_t)sval;
792    }
793
794    /* Try all 32-bit insns that can load it in one go.  */
795    if (sval >= -0x8000 && sval < 0x8000) {
796        tcg_out_insn(s, RI, LGHI, ret, sval);
797        return true;
798    }
799
800    for (i = 0; i < 4; i++) {
801        tcg_target_long mask = 0xffffull << i*16;
802        if ((uval & mask) == uval) {
803            tcg_out_insn_RI(s, lli_insns[i], ret, uval >> i*16);
804            return true;
805        }
806    }
807
808    return false;
809}
810
811/* load a register with an immediate value */
812static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
813                             tcg_target_long sval, bool in_prologue)
814{
815    tcg_target_ulong uval;
816
817    /* Try all 32-bit insns that can load it in one go.  */
818    if (maybe_out_small_movi(s, type, ret, sval)) {
819        return;
820    }
821
822    uval = sval;
823    if (type == TCG_TYPE_I32) {
824        uval = (uint32_t)sval;
825        sval = (int32_t)sval;
826    }
827
828    /* Try all 48-bit insns that can load it in one go.  */
829    if (HAVE_FACILITY(EXT_IMM)) {
830        if (sval == (int32_t)sval) {
831            tcg_out_insn(s, RIL, LGFI, ret, sval);
832            return;
833        }
834        if (uval <= 0xffffffff) {
835            tcg_out_insn(s, RIL, LLILF, ret, uval);
836            return;
837        }
838        if ((uval & 0xffffffff) == 0) {
839            tcg_out_insn(s, RIL, LLIHF, ret, uval >> 32);
840            return;
841        }
842    }
843
844    /* Try for PC-relative address load.  For odd addresses,
845       attempt to use an offset from the start of the TB.  */
846    if ((sval & 1) == 0) {
847        ptrdiff_t off = tcg_pcrel_diff(s, (void *)sval) >> 1;
848        if (off == (int32_t)off) {
849            tcg_out_insn(s, RIL, LARL, ret, off);
850            return;
851        }
852    } else if (USE_REG_TB && !in_prologue) {
853        ptrdiff_t off = tcg_tbrel_diff(s, (void *)sval);
854        if (off == sextract64(off, 0, 20)) {
855            /* This is certain to be an address within TB, and therefore
856               OFF will be negative; don't try RX_LA.  */
857            tcg_out_insn(s, RXY, LAY, ret, TCG_REG_TB, TCG_REG_NONE, off);
858            return;
859        }
860    }
861
862    /* A 32-bit unsigned value can be loaded in 2 insns.  And given
863       that LLILL, LLIHL, LLILF above did not succeed, we know that
864       both insns are required.  */
865    if (uval <= 0xffffffff) {
866        tcg_out_insn(s, RI, LLILL, ret, uval);
867        tcg_out_insn(s, RI, IILH, ret, uval >> 16);
868        return;
869    }
870
871    /* Otherwise, stuff it in the constant pool.  */
872    if (HAVE_FACILITY(GEN_INST_EXT)) {
873        tcg_out_insn(s, RIL, LGRL, ret, 0);
874        new_pool_label(s, sval, R_390_PC32DBL, s->code_ptr - 2, 2);
875    } else if (USE_REG_TB && !in_prologue) {
876        tcg_out_insn(s, RXY, LG, ret, TCG_REG_TB, TCG_REG_NONE, 0);
877        new_pool_label(s, sval, R_390_20, s->code_ptr - 2,
878                       tcg_tbrel_diff(s, NULL));
879    } else {
880        TCGReg base = ret ? ret : TCG_TMP0;
881        tcg_out_insn(s, RIL, LARL, base, 0);
882        new_pool_label(s, sval, R_390_PC32DBL, s->code_ptr - 2, 2);
883        tcg_out_insn(s, RXY, LG, ret, base, TCG_REG_NONE, 0);
884    }
885}
886
887static void tcg_out_movi(TCGContext *s, TCGType type,
888                         TCGReg ret, tcg_target_long sval)
889{
890    tcg_out_movi_int(s, type, ret, sval, false);
891}
892
893/* Emit a load/store type instruction.  Inputs are:
894   DATA:     The register to be loaded or stored.
895   BASE+OFS: The effective address.
896   OPC_RX:   If the operation has an RX format opcode (e.g. STC), otherwise 0.
897   OPC_RXY:  The RXY format opcode for the operation (e.g. STCY).  */
898
899static void tcg_out_mem(TCGContext *s, S390Opcode opc_rx, S390Opcode opc_rxy,
900                        TCGReg data, TCGReg base, TCGReg index,
901                        tcg_target_long ofs)
902{
903    if (ofs < -0x80000 || ofs >= 0x80000) {
904        /* Combine the low 20 bits of the offset with the actual load insn;
905           the high 44 bits must come from an immediate load.  */
906        tcg_target_long low = ((ofs & 0xfffff) ^ 0x80000) - 0x80000;
907        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs - low);
908        ofs = low;
909
910        /* If we were already given an index register, add it in.  */
911        if (index != TCG_REG_NONE) {
912            tcg_out_insn(s, RRE, AGR, TCG_TMP0, index);
913        }
914        index = TCG_TMP0;
915    }
916
917    if (opc_rx && ofs >= 0 && ofs < 0x1000) {
918        tcg_out_insn_RX(s, opc_rx, data, base, index, ofs);
919    } else {
920        tcg_out_insn_RXY(s, opc_rxy, data, base, index, ofs);
921    }
922}
923
924static void tcg_out_vrx_mem(TCGContext *s, S390Opcode opc_vrx,
925                            TCGReg data, TCGReg base, TCGReg index,
926                            tcg_target_long ofs, int m3)
927{
928    if (ofs < 0 || ofs >= 0x1000) {
929        if (ofs >= -0x80000 && ofs < 0x80000) {
930            tcg_out_insn(s, RXY, LAY, TCG_TMP0, base, index, ofs);
931            base = TCG_TMP0;
932            index = TCG_REG_NONE;
933            ofs = 0;
934        } else {
935            tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs);
936            if (index != TCG_REG_NONE) {
937                tcg_out_insn(s, RRE, AGR, TCG_TMP0, index);
938            }
939            index = TCG_TMP0;
940            ofs = 0;
941        }
942    }
943    tcg_out_insn_VRX(s, opc_vrx, data, base, index, ofs, m3);
944}
945
946/* load data without address translation or endianness conversion */
947static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg data,
948                       TCGReg base, intptr_t ofs)
949{
950    switch (type) {
951    case TCG_TYPE_I32:
952        if (likely(is_general_reg(data))) {
953            tcg_out_mem(s, RX_L, RXY_LY, data, base, TCG_REG_NONE, ofs);
954            break;
955        }
956        tcg_out_vrx_mem(s, VRX_VLLEZ, data, base, TCG_REG_NONE, ofs, MO_32);
957        break;
958
959    case TCG_TYPE_I64:
960        if (likely(is_general_reg(data))) {
961            tcg_out_mem(s, 0, RXY_LG, data, base, TCG_REG_NONE, ofs);
962            break;
963        }
964        /* fallthru */
965
966    case TCG_TYPE_V64:
967        tcg_out_vrx_mem(s, VRX_VLLEZ, data, base, TCG_REG_NONE, ofs, MO_64);
968        break;
969
970    case TCG_TYPE_V128:
971        /* Hint quadword aligned.  */
972        tcg_out_vrx_mem(s, VRX_VL, data, base, TCG_REG_NONE, ofs, 4);
973        break;
974
975    default:
976        g_assert_not_reached();
977    }
978}
979
980static void tcg_out_st(TCGContext *s, TCGType type, TCGReg data,
981                       TCGReg base, intptr_t ofs)
982{
983    switch (type) {
984    case TCG_TYPE_I32:
985        if (likely(is_general_reg(data))) {
986            tcg_out_mem(s, RX_ST, RXY_STY, data, base, TCG_REG_NONE, ofs);
987        } else {
988            tcg_out_vrx_mem(s, VRX_VSTEF, data, base, TCG_REG_NONE, ofs, 1);
989        }
990        break;
991
992    case TCG_TYPE_I64:
993        if (likely(is_general_reg(data))) {
994            tcg_out_mem(s, 0, RXY_STG, data, base, TCG_REG_NONE, ofs);
995            break;
996        }
997        /* fallthru */
998
999    case TCG_TYPE_V64:
1000        tcg_out_vrx_mem(s, VRX_VSTEG, data, base, TCG_REG_NONE, ofs, 0);
1001        break;
1002
1003    case TCG_TYPE_V128:
1004        /* Hint quadword aligned.  */
1005        tcg_out_vrx_mem(s, VRX_VST, data, base, TCG_REG_NONE, ofs, 4);
1006        break;
1007
1008    default:
1009        g_assert_not_reached();
1010    }
1011}
1012
1013static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1014                               TCGReg base, intptr_t ofs)
1015{
1016    return false;
1017}
1018
1019/* load data from an absolute host address */
1020static void tcg_out_ld_abs(TCGContext *s, TCGType type,
1021                           TCGReg dest, const void *abs)
1022{
1023    intptr_t addr = (intptr_t)abs;
1024
1025    if (HAVE_FACILITY(GEN_INST_EXT) && !(addr & 1)) {
1026        ptrdiff_t disp = tcg_pcrel_diff(s, abs) >> 1;
1027        if (disp == (int32_t)disp) {
1028            if (type == TCG_TYPE_I32) {
1029                tcg_out_insn(s, RIL, LRL, dest, disp);
1030            } else {
1031                tcg_out_insn(s, RIL, LGRL, dest, disp);
1032            }
1033            return;
1034        }
1035    }
1036    if (USE_REG_TB) {
1037        ptrdiff_t disp = tcg_tbrel_diff(s, abs);
1038        if (disp == sextract64(disp, 0, 20)) {
1039            tcg_out_ld(s, type, dest, TCG_REG_TB, disp);
1040            return;
1041        }
1042    }
1043
1044    tcg_out_movi(s, TCG_TYPE_PTR, dest, addr & ~0xffff);
1045    tcg_out_ld(s, type, dest, dest, addr & 0xffff);
1046}
1047
1048static inline void tcg_out_risbg(TCGContext *s, TCGReg dest, TCGReg src,
1049                                 int msb, int lsb, int ofs, int z)
1050{
1051    /* Format RIE-f */
1052    tcg_out16(s, (RIE_RISBG & 0xff00) | (dest << 4) | src);
1053    tcg_out16(s, (msb << 8) | (z << 7) | lsb);
1054    tcg_out16(s, (ofs << 8) | (RIE_RISBG & 0xff));
1055}
1056
1057static void tgen_ext8s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1058{
1059    if (HAVE_FACILITY(EXT_IMM)) {
1060        tcg_out_insn(s, RRE, LGBR, dest, src);
1061        return;
1062    }
1063
1064    if (type == TCG_TYPE_I32) {
1065        if (dest == src) {
1066            tcg_out_sh32(s, RS_SLL, dest, TCG_REG_NONE, 24);
1067        } else {
1068            tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 24);
1069        }
1070        tcg_out_sh32(s, RS_SRA, dest, TCG_REG_NONE, 24);
1071    } else {
1072        tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 56);
1073        tcg_out_sh64(s, RSY_SRAG, dest, dest, TCG_REG_NONE, 56);
1074    }
1075}
1076
1077static void tgen_ext8u(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1078{
1079    if (HAVE_FACILITY(EXT_IMM)) {
1080        tcg_out_insn(s, RRE, LLGCR, dest, src);
1081        return;
1082    }
1083
1084    if (dest == src) {
1085        tcg_out_movi(s, type, TCG_TMP0, 0xff);
1086        src = TCG_TMP0;
1087    } else {
1088        tcg_out_movi(s, type, dest, 0xff);
1089    }
1090    if (type == TCG_TYPE_I32) {
1091        tcg_out_insn(s, RR, NR, dest, src);
1092    } else {
1093        tcg_out_insn(s, RRE, NGR, dest, src);
1094    }
1095}
1096
1097static void tgen_ext16s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1098{
1099    if (HAVE_FACILITY(EXT_IMM)) {
1100        tcg_out_insn(s, RRE, LGHR, dest, src);
1101        return;
1102    }
1103
1104    if (type == TCG_TYPE_I32) {
1105        if (dest == src) {
1106            tcg_out_sh32(s, RS_SLL, dest, TCG_REG_NONE, 16);
1107        } else {
1108            tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 16);
1109        }
1110        tcg_out_sh32(s, RS_SRA, dest, TCG_REG_NONE, 16);
1111    } else {
1112        tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 48);
1113        tcg_out_sh64(s, RSY_SRAG, dest, dest, TCG_REG_NONE, 48);
1114    }
1115}
1116
1117static void tgen_ext16u(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1118{
1119    if (HAVE_FACILITY(EXT_IMM)) {
1120        tcg_out_insn(s, RRE, LLGHR, dest, src);
1121        return;
1122    }
1123
1124    if (dest == src) {
1125        tcg_out_movi(s, type, TCG_TMP0, 0xffff);
1126        src = TCG_TMP0;
1127    } else {
1128        tcg_out_movi(s, type, dest, 0xffff);
1129    }
1130    if (type == TCG_TYPE_I32) {
1131        tcg_out_insn(s, RR, NR, dest, src);
1132    } else {
1133        tcg_out_insn(s, RRE, NGR, dest, src);
1134    }
1135}
1136
1137static inline void tgen_ext32s(TCGContext *s, TCGReg dest, TCGReg src)
1138{
1139    tcg_out_insn(s, RRE, LGFR, dest, src);
1140}
1141
1142static inline void tgen_ext32u(TCGContext *s, TCGReg dest, TCGReg src)
1143{
1144    tcg_out_insn(s, RRE, LLGFR, dest, src);
1145}
1146
1147/* Accept bit patterns like these:
1148    0....01....1
1149    1....10....0
1150    1..10..01..1
1151    0..01..10..0
1152   Copied from gcc sources.  */
1153static inline bool risbg_mask(uint64_t c)
1154{
1155    uint64_t lsb;
1156    /* We don't change the number of transitions by inverting,
1157       so make sure we start with the LSB zero.  */
1158    if (c & 1) {
1159        c = ~c;
1160    }
1161    /* Reject all zeros or all ones.  */
1162    if (c == 0) {
1163        return false;
1164    }
1165    /* Find the first transition.  */
1166    lsb = c & -c;
1167    /* Invert to look for a second transition.  */
1168    c = ~c;
1169    /* Erase the first transition.  */
1170    c &= -lsb;
1171    /* Find the second transition, if any.  */
1172    lsb = c & -c;
1173    /* Match if all the bits are 1's, or if c is zero.  */
1174    return c == -lsb;
1175}
1176
1177static void tgen_andi_risbg(TCGContext *s, TCGReg out, TCGReg in, uint64_t val)
1178{
1179    int msb, lsb;
1180    if ((val & 0x8000000000000001ull) == 0x8000000000000001ull) {
1181        /* Achieve wraparound by swapping msb and lsb.  */
1182        msb = 64 - ctz64(~val);
1183        lsb = clz64(~val) - 1;
1184    } else {
1185        msb = clz64(val);
1186        lsb = 63 - ctz64(val);
1187    }
1188    tcg_out_risbg(s, out, in, msb, lsb, 0, 1);
1189}
1190
1191static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
1192{
1193    static const S390Opcode ni_insns[4] = {
1194        RI_NILL, RI_NILH, RI_NIHL, RI_NIHH
1195    };
1196    static const S390Opcode nif_insns[2] = {
1197        RIL_NILF, RIL_NIHF
1198    };
1199    uint64_t valid = (type == TCG_TYPE_I32 ? 0xffffffffull : -1ull);
1200    int i;
1201
1202    /* Look for the zero-extensions.  */
1203    if ((val & valid) == 0xffffffff) {
1204        tgen_ext32u(s, dest, dest);
1205        return;
1206    }
1207    if (HAVE_FACILITY(EXT_IMM)) {
1208        if ((val & valid) == 0xff) {
1209            tgen_ext8u(s, TCG_TYPE_I64, dest, dest);
1210            return;
1211        }
1212        if ((val & valid) == 0xffff) {
1213            tgen_ext16u(s, TCG_TYPE_I64, dest, dest);
1214            return;
1215        }
1216    }
1217
1218    /* Try all 32-bit insns that can perform it in one go.  */
1219    for (i = 0; i < 4; i++) {
1220        tcg_target_ulong mask = ~(0xffffull << i*16);
1221        if (((val | ~valid) & mask) == mask) {
1222            tcg_out_insn_RI(s, ni_insns[i], dest, val >> i*16);
1223            return;
1224        }
1225    }
1226
1227    /* Try all 48-bit insns that can perform it in one go.  */
1228    if (HAVE_FACILITY(EXT_IMM)) {
1229        for (i = 0; i < 2; i++) {
1230            tcg_target_ulong mask = ~(0xffffffffull << i*32);
1231            if (((val | ~valid) & mask) == mask) {
1232                tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i*32);
1233                return;
1234            }
1235        }
1236    }
1237    if (HAVE_FACILITY(GEN_INST_EXT) && risbg_mask(val)) {
1238        tgen_andi_risbg(s, dest, dest, val);
1239        return;
1240    }
1241
1242    /* Use the constant pool if USE_REG_TB, but not for small constants.  */
1243    if (USE_REG_TB) {
1244        if (!maybe_out_small_movi(s, type, TCG_TMP0, val)) {
1245            tcg_out_insn(s, RXY, NG, dest, TCG_REG_TB, TCG_REG_NONE, 0);
1246            new_pool_label(s, val & valid, R_390_20, s->code_ptr - 2,
1247                           tcg_tbrel_diff(s, NULL));
1248            return;
1249        }
1250    } else {
1251        tcg_out_movi(s, type, TCG_TMP0, val);
1252    }
1253    if (type == TCG_TYPE_I32) {
1254        tcg_out_insn(s, RR, NR, dest, TCG_TMP0);
1255    } else {
1256        tcg_out_insn(s, RRE, NGR, dest, TCG_TMP0);
1257    }
1258}
1259
1260static void tgen_ori(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
1261{
1262    static const S390Opcode oi_insns[4] = {
1263        RI_OILL, RI_OILH, RI_OIHL, RI_OIHH
1264    };
1265    static const S390Opcode oif_insns[2] = {
1266        RIL_OILF, RIL_OIHF
1267    };
1268
1269    int i;
1270
1271    /* Look for no-op.  */
1272    if (unlikely(val == 0)) {
1273        return;
1274    }
1275
1276    /* Try all 32-bit insns that can perform it in one go.  */
1277    for (i = 0; i < 4; i++) {
1278        tcg_target_ulong mask = (0xffffull << i*16);
1279        if ((val & mask) != 0 && (val & ~mask) == 0) {
1280            tcg_out_insn_RI(s, oi_insns[i], dest, val >> i*16);
1281            return;
1282        }
1283    }
1284
1285    /* Try all 48-bit insns that can perform it in one go.  */
1286    if (HAVE_FACILITY(EXT_IMM)) {
1287        for (i = 0; i < 2; i++) {
1288            tcg_target_ulong mask = (0xffffffffull << i*32);
1289            if ((val & mask) != 0 && (val & ~mask) == 0) {
1290                tcg_out_insn_RIL(s, oif_insns[i], dest, val >> i*32);
1291                return;
1292            }
1293        }
1294    }
1295
1296    /* Use the constant pool if USE_REG_TB, but not for small constants.  */
1297    if (maybe_out_small_movi(s, type, TCG_TMP0, val)) {
1298        if (type == TCG_TYPE_I32) {
1299            tcg_out_insn(s, RR, OR, dest, TCG_TMP0);
1300        } else {
1301            tcg_out_insn(s, RRE, OGR, dest, TCG_TMP0);
1302        }
1303    } else if (USE_REG_TB) {
1304        tcg_out_insn(s, RXY, OG, dest, TCG_REG_TB, TCG_REG_NONE, 0);
1305        new_pool_label(s, val, R_390_20, s->code_ptr - 2,
1306                       tcg_tbrel_diff(s, NULL));
1307    } else {
1308        /* Perform the OR via sequential modifications to the high and
1309           low parts.  Do this via recursion to handle 16-bit vs 32-bit
1310           masks in each half.  */
1311        tcg_debug_assert(HAVE_FACILITY(EXT_IMM));
1312        tgen_ori(s, type, dest, val & 0x00000000ffffffffull);
1313        tgen_ori(s, type, dest, val & 0xffffffff00000000ull);
1314    }
1315}
1316
1317static void tgen_xori(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
1318{
1319    /* Try all 48-bit insns that can perform it in one go.  */
1320    if (HAVE_FACILITY(EXT_IMM)) {
1321        if ((val & 0xffffffff00000000ull) == 0) {
1322            tcg_out_insn(s, RIL, XILF, dest, val);
1323            return;
1324        }
1325        if ((val & 0x00000000ffffffffull) == 0) {
1326            tcg_out_insn(s, RIL, XIHF, dest, val >> 32);
1327            return;
1328        }
1329    }
1330
1331    /* Use the constant pool if USE_REG_TB, but not for small constants.  */
1332    if (maybe_out_small_movi(s, type, TCG_TMP0, val)) {
1333        if (type == TCG_TYPE_I32) {
1334            tcg_out_insn(s, RR, XR, dest, TCG_TMP0);
1335        } else {
1336            tcg_out_insn(s, RRE, XGR, dest, TCG_TMP0);
1337        }
1338    } else if (USE_REG_TB) {
1339        tcg_out_insn(s, RXY, XG, dest, TCG_REG_TB, TCG_REG_NONE, 0);
1340        new_pool_label(s, val, R_390_20, s->code_ptr - 2,
1341                       tcg_tbrel_diff(s, NULL));
1342    } else {
1343        /* Perform the xor by parts.  */
1344        tcg_debug_assert(HAVE_FACILITY(EXT_IMM));
1345        if (val & 0xffffffff) {
1346            tcg_out_insn(s, RIL, XILF, dest, val);
1347        }
1348        if (val > 0xffffffff) {
1349            tcg_out_insn(s, RIL, XIHF, dest, val >> 32);
1350        }
1351    }
1352}
1353
1354static int tgen_cmp(TCGContext *s, TCGType type, TCGCond c, TCGReg r1,
1355                    TCGArg c2, bool c2const, bool need_carry)
1356{
1357    bool is_unsigned = is_unsigned_cond(c);
1358    S390Opcode op;
1359
1360    if (c2const) {
1361        if (c2 == 0) {
1362            if (!(is_unsigned && need_carry)) {
1363                if (type == TCG_TYPE_I32) {
1364                    tcg_out_insn(s, RR, LTR, r1, r1);
1365                } else {
1366                    tcg_out_insn(s, RRE, LTGR, r1, r1);
1367                }
1368                return tcg_cond_to_ltr_cond[c];
1369            }
1370        }
1371
1372        if (!is_unsigned && c2 == (int16_t)c2) {
1373            op = (type == TCG_TYPE_I32 ? RI_CHI : RI_CGHI);
1374            tcg_out_insn_RI(s, op, r1, c2);
1375            goto exit;
1376        }
1377
1378        if (HAVE_FACILITY(EXT_IMM)) {
1379            if (type == TCG_TYPE_I32) {
1380                op = (is_unsigned ? RIL_CLFI : RIL_CFI);
1381                tcg_out_insn_RIL(s, op, r1, c2);
1382                goto exit;
1383            } else if (c2 == (is_unsigned ? (TCGArg)(uint32_t)c2 : (TCGArg)(int32_t)c2)) {
1384                op = (is_unsigned ? RIL_CLGFI : RIL_CGFI);
1385                tcg_out_insn_RIL(s, op, r1, c2);
1386                goto exit;
1387            }
1388        }
1389
1390        /* Use the constant pool, but not for small constants.  */
1391        if (maybe_out_small_movi(s, type, TCG_TMP0, c2)) {
1392            c2 = TCG_TMP0;
1393            /* fall through to reg-reg */
1394        } else if (USE_REG_TB) {
1395            if (type == TCG_TYPE_I32) {
1396                op = (is_unsigned ? RXY_CLY : RXY_CY);
1397                tcg_out_insn_RXY(s, op, r1, TCG_REG_TB, TCG_REG_NONE, 0);
1398                new_pool_label(s, (uint32_t)c2, R_390_20, s->code_ptr - 2,
1399                               4 - tcg_tbrel_diff(s, NULL));
1400            } else {
1401                op = (is_unsigned ? RXY_CLG : RXY_CG);
1402                tcg_out_insn_RXY(s, op, r1, TCG_REG_TB, TCG_REG_NONE, 0);
1403                new_pool_label(s, c2, R_390_20, s->code_ptr - 2,
1404                               tcg_tbrel_diff(s, NULL));
1405            }
1406            goto exit;
1407        } else {
1408            if (type == TCG_TYPE_I32) {
1409                op = (is_unsigned ? RIL_CLRL : RIL_CRL);
1410                tcg_out_insn_RIL(s, op, r1, 0);
1411                new_pool_label(s, (uint32_t)c2, R_390_PC32DBL,
1412                               s->code_ptr - 2, 2 + 4);
1413            } else {
1414                op = (is_unsigned ? RIL_CLGRL : RIL_CGRL);
1415                tcg_out_insn_RIL(s, op, r1, 0);
1416                new_pool_label(s, c2, R_390_PC32DBL, s->code_ptr - 2, 2);
1417            }
1418            goto exit;
1419        }
1420    }
1421
1422    if (type == TCG_TYPE_I32) {
1423        op = (is_unsigned ? RR_CLR : RR_CR);
1424        tcg_out_insn_RR(s, op, r1, c2);
1425    } else {
1426        op = (is_unsigned ? RRE_CLGR : RRE_CGR);
1427        tcg_out_insn_RRE(s, op, r1, c2);
1428    }
1429
1430 exit:
1431    return tcg_cond_to_s390_cond[c];
1432}
1433
1434static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond,
1435                         TCGReg dest, TCGReg c1, TCGArg c2, int c2const)
1436{
1437    int cc;
1438    bool have_loc;
1439
1440    /* With LOC2, we can always emit the minimum 3 insns.  */
1441    if (HAVE_FACILITY(LOAD_ON_COND2)) {
1442        /* Emit: d = 0, d = (cc ? 1 : d).  */
1443        cc = tgen_cmp(s, type, cond, c1, c2, c2const, false);
1444        tcg_out_movi(s, TCG_TYPE_I64, dest, 0);
1445        tcg_out_insn(s, RIE, LOCGHI, dest, 1, cc);
1446        return;
1447    }
1448
1449    have_loc = HAVE_FACILITY(LOAD_ON_COND);
1450
1451    /* For HAVE_LOC, only the paths through GTU/GT/LEU/LE are smaller.  */
1452 restart:
1453    switch (cond) {
1454    case TCG_COND_NE:
1455        /* X != 0 is X > 0.  */
1456        if (c2const && c2 == 0) {
1457            cond = TCG_COND_GTU;
1458        } else {
1459            break;
1460        }
1461        /* fallthru */
1462
1463    case TCG_COND_GTU:
1464    case TCG_COND_GT:
1465        /* The result of a compare has CC=2 for GT and CC=3 unused.
1466           ADD LOGICAL WITH CARRY considers (CC & 2) the carry bit.  */
1467        tgen_cmp(s, type, cond, c1, c2, c2const, true);
1468        tcg_out_movi(s, type, dest, 0);
1469        tcg_out_insn(s, RRE, ALCGR, dest, dest);
1470        return;
1471
1472    case TCG_COND_EQ:
1473        /* X == 0 is X <= 0.  */
1474        if (c2const && c2 == 0) {
1475            cond = TCG_COND_LEU;
1476        } else {
1477            break;
1478        }
1479        /* fallthru */
1480
1481    case TCG_COND_LEU:
1482    case TCG_COND_LE:
1483        /* As above, but we're looking for borrow, or !carry.
1484           The second insn computes d - d - borrow, or -1 for true
1485           and 0 for false.  So we must mask to 1 bit afterward.  */
1486        tgen_cmp(s, type, cond, c1, c2, c2const, true);
1487        tcg_out_insn(s, RRE, SLBGR, dest, dest);
1488        tgen_andi(s, type, dest, 1);
1489        return;
1490
1491    case TCG_COND_GEU:
1492    case TCG_COND_LTU:
1493    case TCG_COND_LT:
1494    case TCG_COND_GE:
1495        /* Swap operands so that we can use LEU/GTU/GT/LE.  */
1496        if (c2const) {
1497            if (have_loc) {
1498                break;
1499            }
1500            tcg_out_movi(s, type, TCG_TMP0, c2);
1501            c2 = c1;
1502            c2const = 0;
1503            c1 = TCG_TMP0;
1504        } else {
1505            TCGReg t = c1;
1506            c1 = c2;
1507            c2 = t;
1508        }
1509        cond = tcg_swap_cond(cond);
1510        goto restart;
1511
1512    default:
1513        g_assert_not_reached();
1514    }
1515
1516    cc = tgen_cmp(s, type, cond, c1, c2, c2const, false);
1517    if (have_loc) {
1518        /* Emit: d = 0, t = 1, d = (cc ? t : d).  */
1519        tcg_out_movi(s, TCG_TYPE_I64, dest, 0);
1520        tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 1);
1521        tcg_out_insn(s, RRF, LOCGR, dest, TCG_TMP0, cc);
1522    } else {
1523        /* Emit: d = 1; if (cc) goto over; d = 0; over:  */
1524        tcg_out_movi(s, type, dest, 1);
1525        tcg_out_insn(s, RI, BRC, cc, (4 + 4) >> 1);
1526        tcg_out_movi(s, type, dest, 0);
1527    }
1528}
1529
1530static void tgen_movcond(TCGContext *s, TCGType type, TCGCond c, TCGReg dest,
1531                         TCGReg c1, TCGArg c2, int c2const,
1532                         TCGArg v3, int v3const)
1533{
1534    int cc;
1535    if (HAVE_FACILITY(LOAD_ON_COND)) {
1536        cc = tgen_cmp(s, type, c, c1, c2, c2const, false);
1537        if (v3const) {
1538            tcg_out_insn(s, RIE, LOCGHI, dest, v3, cc);
1539        } else {
1540            tcg_out_insn(s, RRF, LOCGR, dest, v3, cc);
1541        }
1542    } else {
1543        c = tcg_invert_cond(c);
1544        cc = tgen_cmp(s, type, c, c1, c2, c2const, false);
1545
1546        /* Emit: if (cc) goto over; dest = r3; over:  */
1547        tcg_out_insn(s, RI, BRC, cc, (4 + 4) >> 1);
1548        tcg_out_insn(s, RRE, LGR, dest, v3);
1549    }
1550}
1551
1552static void tgen_clz(TCGContext *s, TCGReg dest, TCGReg a1,
1553                     TCGArg a2, int a2const)
1554{
1555    /* Since this sets both R and R+1, we have no choice but to store the
1556       result into R0, allowing R1 == TCG_TMP0 to be clobbered as well.  */
1557    QEMU_BUILD_BUG_ON(TCG_TMP0 != TCG_REG_R1);
1558    tcg_out_insn(s, RRE, FLOGR, TCG_REG_R0, a1);
1559
1560    if (a2const && a2 == 64) {
1561        tcg_out_mov(s, TCG_TYPE_I64, dest, TCG_REG_R0);
1562    } else {
1563        if (a2const) {
1564            tcg_out_movi(s, TCG_TYPE_I64, dest, a2);
1565        } else {
1566            tcg_out_mov(s, TCG_TYPE_I64, dest, a2);
1567        }
1568        if (HAVE_FACILITY(LOAD_ON_COND)) {
1569            /* Emit: if (one bit found) dest = r0.  */
1570            tcg_out_insn(s, RRF, LOCGR, dest, TCG_REG_R0, 2);
1571        } else {
1572            /* Emit: if (no one bit found) goto over; dest = r0; over:  */
1573            tcg_out_insn(s, RI, BRC, 8, (4 + 4) >> 1);
1574            tcg_out_insn(s, RRE, LGR, dest, TCG_REG_R0);
1575        }
1576    }
1577}
1578
1579static void tgen_deposit(TCGContext *s, TCGReg dest, TCGReg src,
1580                         int ofs, int len, int z)
1581{
1582    int lsb = (63 - ofs);
1583    int msb = lsb - (len - 1);
1584    tcg_out_risbg(s, dest, src, msb, lsb, ofs, z);
1585}
1586
1587static void tgen_extract(TCGContext *s, TCGReg dest, TCGReg src,
1588                         int ofs, int len)
1589{
1590    tcg_out_risbg(s, dest, src, 64 - len, 63, 64 - ofs, 1);
1591}
1592
1593static void tgen_gotoi(TCGContext *s, int cc, const tcg_insn_unit *dest)
1594{
1595    ptrdiff_t off = tcg_pcrel_diff(s, dest) >> 1;
1596    if (off == (int16_t)off) {
1597        tcg_out_insn(s, RI, BRC, cc, off);
1598    } else if (off == (int32_t)off) {
1599        tcg_out_insn(s, RIL, BRCL, cc, off);
1600    } else {
1601        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)dest);
1602        tcg_out_insn(s, RR, BCR, cc, TCG_TMP0);
1603    }
1604}
1605
1606static void tgen_branch(TCGContext *s, int cc, TCGLabel *l)
1607{
1608    if (l->has_value) {
1609        tgen_gotoi(s, cc, l->u.value_ptr);
1610    } else if (USE_LONG_BRANCHES) {
1611        tcg_out16(s, RIL_BRCL | (cc << 4));
1612        tcg_out_reloc(s, s->code_ptr, R_390_PC32DBL, l, 2);
1613        s->code_ptr += 2;
1614    } else {
1615        tcg_out16(s, RI_BRC | (cc << 4));
1616        tcg_out_reloc(s, s->code_ptr, R_390_PC16DBL, l, 2);
1617        s->code_ptr += 1;
1618    }
1619}
1620
1621static void tgen_compare_branch(TCGContext *s, S390Opcode opc, int cc,
1622                                TCGReg r1, TCGReg r2, TCGLabel *l)
1623{
1624    tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2);
1625    tcg_out16(s, (opc & 0xff00) | (r1 << 4) | r2);
1626    tcg_out16(s, 0);
1627    tcg_out16(s, cc << 12 | (opc & 0xff));
1628}
1629
1630static void tgen_compare_imm_branch(TCGContext *s, S390Opcode opc, int cc,
1631                                    TCGReg r1, int i2, TCGLabel *l)
1632{
1633    tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2);
1634    tcg_out16(s, (opc & 0xff00) | (r1 << 4) | cc);
1635    tcg_out16(s, 0);
1636    tcg_out16(s, (i2 << 8) | (opc & 0xff));
1637}
1638
1639static void tgen_brcond(TCGContext *s, TCGType type, TCGCond c,
1640                        TCGReg r1, TCGArg c2, int c2const, TCGLabel *l)
1641{
1642    int cc;
1643
1644    if (HAVE_FACILITY(GEN_INST_EXT)) {
1645        bool is_unsigned = is_unsigned_cond(c);
1646        bool in_range;
1647        S390Opcode opc;
1648
1649        cc = tcg_cond_to_s390_cond[c];
1650
1651        if (!c2const) {
1652            opc = (type == TCG_TYPE_I32
1653                   ? (is_unsigned ? RIE_CLRJ : RIE_CRJ)
1654                   : (is_unsigned ? RIE_CLGRJ : RIE_CGRJ));
1655            tgen_compare_branch(s, opc, cc, r1, c2, l);
1656            return;
1657        }
1658
1659        /* COMPARE IMMEDIATE AND BRANCH RELATIVE has an 8-bit immediate field.
1660           If the immediate we've been given does not fit that range, we'll
1661           fall back to separate compare and branch instructions using the
1662           larger comparison range afforded by COMPARE IMMEDIATE.  */
1663        if (type == TCG_TYPE_I32) {
1664            if (is_unsigned) {
1665                opc = RIE_CLIJ;
1666                in_range = (uint32_t)c2 == (uint8_t)c2;
1667            } else {
1668                opc = RIE_CIJ;
1669                in_range = (int32_t)c2 == (int8_t)c2;
1670            }
1671        } else {
1672            if (is_unsigned) {
1673                opc = RIE_CLGIJ;
1674                in_range = (uint64_t)c2 == (uint8_t)c2;
1675            } else {
1676                opc = RIE_CGIJ;
1677                in_range = (int64_t)c2 == (int8_t)c2;
1678            }
1679        }
1680        if (in_range) {
1681            tgen_compare_imm_branch(s, opc, cc, r1, c2, l);
1682            return;
1683        }
1684    }
1685
1686    cc = tgen_cmp(s, type, c, r1, c2, c2const, false);
1687    tgen_branch(s, cc, l);
1688}
1689
1690static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest)
1691{
1692    ptrdiff_t off = tcg_pcrel_diff(s, dest) >> 1;
1693    if (off == (int32_t)off) {
1694        tcg_out_insn(s, RIL, BRASL, TCG_REG_R14, off);
1695    } else {
1696        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)dest);
1697        tcg_out_insn(s, RR, BASR, TCG_REG_R14, TCG_TMP0);
1698    }
1699}
1700
1701static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg data,
1702                                   TCGReg base, TCGReg index, int disp)
1703{
1704    switch (opc & (MO_SSIZE | MO_BSWAP)) {
1705    case MO_UB:
1706        tcg_out_insn(s, RXY, LLGC, data, base, index, disp);
1707        break;
1708    case MO_SB:
1709        tcg_out_insn(s, RXY, LGB, data, base, index, disp);
1710        break;
1711
1712    case MO_UW | MO_BSWAP:
1713        /* swapped unsigned halfword load with upper bits zeroed */
1714        tcg_out_insn(s, RXY, LRVH, data, base, index, disp);
1715        tgen_ext16u(s, TCG_TYPE_I64, data, data);
1716        break;
1717    case MO_UW:
1718        tcg_out_insn(s, RXY, LLGH, data, base, index, disp);
1719        break;
1720
1721    case MO_SW | MO_BSWAP:
1722        /* swapped sign-extended halfword load */
1723        tcg_out_insn(s, RXY, LRVH, data, base, index, disp);
1724        tgen_ext16s(s, TCG_TYPE_I64, data, data);
1725        break;
1726    case MO_SW:
1727        tcg_out_insn(s, RXY, LGH, data, base, index, disp);
1728        break;
1729
1730    case MO_UL | MO_BSWAP:
1731        /* swapped unsigned int load with upper bits zeroed */
1732        tcg_out_insn(s, RXY, LRV, data, base, index, disp);
1733        tgen_ext32u(s, data, data);
1734        break;
1735    case MO_UL:
1736        tcg_out_insn(s, RXY, LLGF, data, base, index, disp);
1737        break;
1738
1739    case MO_SL | MO_BSWAP:
1740        /* swapped sign-extended int load */
1741        tcg_out_insn(s, RXY, LRV, data, base, index, disp);
1742        tgen_ext32s(s, data, data);
1743        break;
1744    case MO_SL:
1745        tcg_out_insn(s, RXY, LGF, data, base, index, disp);
1746        break;
1747
1748    case MO_Q | MO_BSWAP:
1749        tcg_out_insn(s, RXY, LRVG, data, base, index, disp);
1750        break;
1751    case MO_Q:
1752        tcg_out_insn(s, RXY, LG, data, base, index, disp);
1753        break;
1754
1755    default:
1756        tcg_abort();
1757    }
1758}
1759
1760static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg data,
1761                                   TCGReg base, TCGReg index, int disp)
1762{
1763    switch (opc & (MO_SIZE | MO_BSWAP)) {
1764    case MO_UB:
1765        if (disp >= 0 && disp < 0x1000) {
1766            tcg_out_insn(s, RX, STC, data, base, index, disp);
1767        } else {
1768            tcg_out_insn(s, RXY, STCY, data, base, index, disp);
1769        }
1770        break;
1771
1772    case MO_UW | MO_BSWAP:
1773        tcg_out_insn(s, RXY, STRVH, data, base, index, disp);
1774        break;
1775    case MO_UW:
1776        if (disp >= 0 && disp < 0x1000) {
1777            tcg_out_insn(s, RX, STH, data, base, index, disp);
1778        } else {
1779            tcg_out_insn(s, RXY, STHY, data, base, index, disp);
1780        }
1781        break;
1782
1783    case MO_UL | MO_BSWAP:
1784        tcg_out_insn(s, RXY, STRV, data, base, index, disp);
1785        break;
1786    case MO_UL:
1787        if (disp >= 0 && disp < 0x1000) {
1788            tcg_out_insn(s, RX, ST, data, base, index, disp);
1789        } else {
1790            tcg_out_insn(s, RXY, STY, data, base, index, disp);
1791        }
1792        break;
1793
1794    case MO_Q | MO_BSWAP:
1795        tcg_out_insn(s, RXY, STRVG, data, base, index, disp);
1796        break;
1797    case MO_Q:
1798        tcg_out_insn(s, RXY, STG, data, base, index, disp);
1799        break;
1800
1801    default:
1802        tcg_abort();
1803    }
1804}
1805
1806#if defined(CONFIG_SOFTMMU)
1807#include "../tcg-ldst.c.inc"
1808
1809/* We're expecting to use a 20-bit negative offset on the tlb memory ops.  */
1810QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1811QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 19));
1812
1813/* Load and compare a TLB entry, leaving the flags set.  Loads the TLB
1814   addend into R2.  Returns a register with the santitized guest address.  */
1815static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
1816                               int mem_index, bool is_ld)
1817{
1818    unsigned s_bits = opc & MO_SIZE;
1819    unsigned a_bits = get_alignment_bits(opc);
1820    unsigned s_mask = (1 << s_bits) - 1;
1821    unsigned a_mask = (1 << a_bits) - 1;
1822    int fast_off = TLB_MASK_TABLE_OFS(mem_index);
1823    int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
1824    int table_off = fast_off + offsetof(CPUTLBDescFast, table);
1825    int ofs, a_off;
1826    uint64_t tlb_mask;
1827
1828    tcg_out_sh64(s, RSY_SRLG, TCG_REG_R2, addr_reg, TCG_REG_NONE,
1829                 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1830    tcg_out_insn(s, RXY, NG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, mask_off);
1831    tcg_out_insn(s, RXY, AG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, table_off);
1832
1833    /* For aligned accesses, we check the first byte and include the alignment
1834       bits within the address.  For unaligned access, we check that we don't
1835       cross pages using the address of the last byte of the access.  */
1836    a_off = (a_bits >= s_bits ? 0 : s_mask - a_mask);
1837    tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1838    if (HAVE_FACILITY(GEN_INST_EXT) && a_off == 0) {
1839        tgen_andi_risbg(s, TCG_REG_R3, addr_reg, tlb_mask);
1840    } else {
1841        tcg_out_insn(s, RX, LA, TCG_REG_R3, addr_reg, TCG_REG_NONE, a_off);
1842        tgen_andi(s, TCG_TYPE_TL, TCG_REG_R3, tlb_mask);
1843    }
1844
1845    if (is_ld) {
1846        ofs = offsetof(CPUTLBEntry, addr_read);
1847    } else {
1848        ofs = offsetof(CPUTLBEntry, addr_write);
1849    }
1850    if (TARGET_LONG_BITS == 32) {
1851        tcg_out_insn(s, RX, C, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs);
1852    } else {
1853        tcg_out_insn(s, RXY, CG, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs);
1854    }
1855
1856    tcg_out_insn(s, RXY, LG, TCG_REG_R2, TCG_REG_R2, TCG_REG_NONE,
1857                 offsetof(CPUTLBEntry, addend));
1858
1859    if (TARGET_LONG_BITS == 32) {
1860        tgen_ext32u(s, TCG_REG_R3, addr_reg);
1861        return TCG_REG_R3;
1862    }
1863    return addr_reg;
1864}
1865
1866static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
1867                                TCGReg data, TCGReg addr,
1868                                tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1869{
1870    TCGLabelQemuLdst *label = new_ldst_label(s);
1871
1872    label->is_ld = is_ld;
1873    label->oi = oi;
1874    label->datalo_reg = data;
1875    label->addrlo_reg = addr;
1876    label->raddr = tcg_splitwx_to_rx(raddr);
1877    label->label_ptr[0] = label_ptr;
1878}
1879
1880static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1881{
1882    TCGReg addr_reg = lb->addrlo_reg;
1883    TCGReg data_reg = lb->datalo_reg;
1884    MemOpIdx oi = lb->oi;
1885    MemOp opc = get_memop(oi);
1886
1887    if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL,
1888                     (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
1889        return false;
1890    }
1891
1892    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
1893    if (TARGET_LONG_BITS == 64) {
1894        tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg);
1895    }
1896    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, oi);
1897    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R5, (uintptr_t)lb->raddr);
1898    tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]);
1899    tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2);
1900
1901    tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
1902    return true;
1903}
1904
1905static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1906{
1907    TCGReg addr_reg = lb->addrlo_reg;
1908    TCGReg data_reg = lb->datalo_reg;
1909    MemOpIdx oi = lb->oi;
1910    MemOp opc = get_memop(oi);
1911
1912    if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL,
1913                     (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
1914        return false;
1915    }
1916
1917    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
1918    if (TARGET_LONG_BITS == 64) {
1919        tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg);
1920    }
1921    switch (opc & MO_SIZE) {
1922    case MO_UB:
1923        tgen_ext8u(s, TCG_TYPE_I64, TCG_REG_R4, data_reg);
1924        break;
1925    case MO_UW:
1926        tgen_ext16u(s, TCG_TYPE_I64, TCG_REG_R4, data_reg);
1927        break;
1928    case MO_UL:
1929        tgen_ext32u(s, TCG_REG_R4, data_reg);
1930        break;
1931    case MO_Q:
1932        tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R4, data_reg);
1933        break;
1934    default:
1935        tcg_abort();
1936    }
1937    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R5, oi);
1938    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R6, (uintptr_t)lb->raddr);
1939    tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1940
1941    tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
1942    return true;
1943}
1944#else
1945static void tcg_prepare_user_ldst(TCGContext *s, TCGReg *addr_reg,
1946                                  TCGReg *index_reg, tcg_target_long *disp)
1947{
1948    if (TARGET_LONG_BITS == 32) {
1949        tgen_ext32u(s, TCG_TMP0, *addr_reg);
1950        *addr_reg = TCG_TMP0;
1951    }
1952    if (guest_base < 0x80000) {
1953        *index_reg = TCG_REG_NONE;
1954        *disp = guest_base;
1955    } else {
1956        *index_reg = TCG_GUEST_BASE_REG;
1957        *disp = 0;
1958    }
1959}
1960#endif /* CONFIG_SOFTMMU */
1961
1962static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
1963                            MemOpIdx oi)
1964{
1965    MemOp opc = get_memop(oi);
1966#ifdef CONFIG_SOFTMMU
1967    unsigned mem_index = get_mmuidx(oi);
1968    tcg_insn_unit *label_ptr;
1969    TCGReg base_reg;
1970
1971    base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 1);
1972
1973    tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
1974    label_ptr = s->code_ptr;
1975    s->code_ptr += 1;
1976
1977    tcg_out_qemu_ld_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0);
1978
1979    add_qemu_ldst_label(s, 1, oi, data_reg, addr_reg, s->code_ptr, label_ptr);
1980#else
1981    TCGReg index_reg;
1982    tcg_target_long disp;
1983
1984    tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp);
1985    tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg, index_reg, disp);
1986#endif
1987}
1988
1989static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
1990                            MemOpIdx oi)
1991{
1992    MemOp opc = get_memop(oi);
1993#ifdef CONFIG_SOFTMMU
1994    unsigned mem_index = get_mmuidx(oi);
1995    tcg_insn_unit *label_ptr;
1996    TCGReg base_reg;
1997
1998    base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 0);
1999
2000    tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
2001    label_ptr = s->code_ptr;
2002    s->code_ptr += 1;
2003
2004    tcg_out_qemu_st_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0);
2005
2006    add_qemu_ldst_label(s, 0, oi, data_reg, addr_reg, s->code_ptr, label_ptr);
2007#else
2008    TCGReg index_reg;
2009    tcg_target_long disp;
2010
2011    tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp);
2012    tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg, index_reg, disp);
2013#endif
2014}
2015
2016# define OP_32_64(x) \
2017        case glue(glue(INDEX_op_,x),_i32): \
2018        case glue(glue(INDEX_op_,x),_i64)
2019
2020static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
2021                              const TCGArg args[TCG_MAX_OP_ARGS],
2022                              const int const_args[TCG_MAX_OP_ARGS])
2023{
2024    S390Opcode op, op2;
2025    TCGArg a0, a1, a2;
2026
2027    switch (opc) {
2028    case INDEX_op_exit_tb:
2029        /* Reuse the zeroing that exists for goto_ptr.  */
2030        a0 = args[0];
2031        if (a0 == 0) {
2032            tgen_gotoi(s, S390_CC_ALWAYS, tcg_code_gen_epilogue);
2033        } else {
2034            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, a0);
2035            tgen_gotoi(s, S390_CC_ALWAYS, tb_ret_addr);
2036        }
2037        break;
2038
2039    case INDEX_op_goto_tb:
2040        a0 = args[0];
2041        if (s->tb_jmp_insn_offset) {
2042            /*
2043             * branch displacement must be aligned for atomic patching;
2044             * see if we need to add extra nop before branch
2045             */
2046            if (!QEMU_PTR_IS_ALIGNED(s->code_ptr + 1, 4)) {
2047                tcg_out16(s, NOP);
2048            }
2049            tcg_debug_assert(!USE_REG_TB);
2050            tcg_out16(s, RIL_BRCL | (S390_CC_ALWAYS << 4));
2051            s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
2052            s->code_ptr += 2;
2053        } else {
2054            /* load address stored at s->tb_jmp_target_addr + a0 */
2055            tcg_out_ld_abs(s, TCG_TYPE_PTR, TCG_REG_TB,
2056                           tcg_splitwx_to_rx(s->tb_jmp_target_addr + a0));
2057            /* and go there */
2058            tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_TB);
2059        }
2060        set_jmp_reset_offset(s, a0);
2061
2062        /* For the unlinked path of goto_tb, we need to reset
2063           TCG_REG_TB to the beginning of this TB.  */
2064        if (USE_REG_TB) {
2065            int ofs = -tcg_current_code_size(s);
2066            /* All TB are restricted to 64KiB by unwind info. */
2067            tcg_debug_assert(ofs == sextract64(ofs, 0, 20));
2068            tcg_out_insn(s, RXY, LAY, TCG_REG_TB,
2069                         TCG_REG_TB, TCG_REG_NONE, ofs);
2070        }
2071        break;
2072
2073    case INDEX_op_goto_ptr:
2074        a0 = args[0];
2075        if (USE_REG_TB) {
2076            tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, a0);
2077        }
2078        tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, a0);
2079        break;
2080
2081    OP_32_64(ld8u):
2082        /* ??? LLC (RXY format) is only present with the extended-immediate
2083           facility, whereas LLGC is always present.  */
2084        tcg_out_mem(s, 0, RXY_LLGC, args[0], args[1], TCG_REG_NONE, args[2]);
2085        break;
2086
2087    OP_32_64(ld8s):
2088        /* ??? LB is no smaller than LGB, so no point to using it.  */
2089        tcg_out_mem(s, 0, RXY_LGB, args[0], args[1], TCG_REG_NONE, args[2]);
2090        break;
2091
2092    OP_32_64(ld16u):
2093        /* ??? LLH (RXY format) is only present with the extended-immediate
2094           facility, whereas LLGH is always present.  */
2095        tcg_out_mem(s, 0, RXY_LLGH, args[0], args[1], TCG_REG_NONE, args[2]);
2096        break;
2097
2098    case INDEX_op_ld16s_i32:
2099        tcg_out_mem(s, RX_LH, RXY_LHY, args[0], args[1], TCG_REG_NONE, args[2]);
2100        break;
2101
2102    case INDEX_op_ld_i32:
2103        tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
2104        break;
2105
2106    OP_32_64(st8):
2107        tcg_out_mem(s, RX_STC, RXY_STCY, args[0], args[1],
2108                    TCG_REG_NONE, args[2]);
2109        break;
2110
2111    OP_32_64(st16):
2112        tcg_out_mem(s, RX_STH, RXY_STHY, args[0], args[1],
2113                    TCG_REG_NONE, args[2]);
2114        break;
2115
2116    case INDEX_op_st_i32:
2117        tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
2118        break;
2119
2120    case INDEX_op_add_i32:
2121        a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
2122        if (const_args[2]) {
2123        do_addi_32:
2124            if (a0 == a1) {
2125                if (a2 == (int16_t)a2) {
2126                    tcg_out_insn(s, RI, AHI, a0, a2);
2127                    break;
2128                }
2129                if (HAVE_FACILITY(EXT_IMM)) {
2130                    tcg_out_insn(s, RIL, AFI, a0, a2);
2131                    break;
2132                }
2133            }
2134            tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2);
2135        } else if (a0 == a1) {
2136            tcg_out_insn(s, RR, AR, a0, a2);
2137        } else {
2138            tcg_out_insn(s, RX, LA, a0, a1, a2, 0);
2139        }
2140        break;
2141    case INDEX_op_sub_i32:
2142        a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
2143        if (const_args[2]) {
2144            a2 = -a2;
2145            goto do_addi_32;
2146        } else if (a0 == a1) {
2147            tcg_out_insn(s, RR, SR, a0, a2);
2148        } else {
2149            tcg_out_insn(s, RRF, SRK, a0, a1, a2);
2150        }
2151        break;
2152
2153    case INDEX_op_and_i32:
2154        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2155        if (const_args[2]) {
2156            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2157            tgen_andi(s, TCG_TYPE_I32, a0, a2);
2158        } else if (a0 == a1) {
2159            tcg_out_insn(s, RR, NR, a0, a2);
2160        } else {
2161            tcg_out_insn(s, RRF, NRK, a0, a1, a2);
2162        }
2163        break;
2164    case INDEX_op_or_i32:
2165        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2166        if (const_args[2]) {
2167            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2168            tgen_ori(s, TCG_TYPE_I32, a0, a2);
2169        } else if (a0 == a1) {
2170            tcg_out_insn(s, RR, OR, a0, a2);
2171        } else {
2172            tcg_out_insn(s, RRF, ORK, a0, a1, a2);
2173        }
2174        break;
2175    case INDEX_op_xor_i32:
2176        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2177        if (const_args[2]) {
2178            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2179            tgen_xori(s, TCG_TYPE_I32, a0, a2);
2180        } else if (a0 == a1) {
2181            tcg_out_insn(s, RR, XR, args[0], args[2]);
2182        } else {
2183            tcg_out_insn(s, RRF, XRK, a0, a1, a2);
2184        }
2185        break;
2186
2187    case INDEX_op_neg_i32:
2188        tcg_out_insn(s, RR, LCR, args[0], args[1]);
2189        break;
2190
2191    case INDEX_op_mul_i32:
2192        if (const_args[2]) {
2193            if ((int32_t)args[2] == (int16_t)args[2]) {
2194                tcg_out_insn(s, RI, MHI, args[0], args[2]);
2195            } else {
2196                tcg_out_insn(s, RIL, MSFI, args[0], args[2]);
2197            }
2198        } else {
2199            tcg_out_insn(s, RRE, MSR, args[0], args[2]);
2200        }
2201        break;
2202
2203    case INDEX_op_div2_i32:
2204        tcg_out_insn(s, RR, DR, TCG_REG_R2, args[4]);
2205        break;
2206    case INDEX_op_divu2_i32:
2207        tcg_out_insn(s, RRE, DLR, TCG_REG_R2, args[4]);
2208        break;
2209
2210    case INDEX_op_shl_i32:
2211        op = RS_SLL;
2212        op2 = RSY_SLLK;
2213    do_shift32:
2214        a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
2215        if (a0 == a1) {
2216            if (const_args[2]) {
2217                tcg_out_sh32(s, op, a0, TCG_REG_NONE, a2);
2218            } else {
2219                tcg_out_sh32(s, op, a0, a2, 0);
2220            }
2221        } else {
2222            /* Using tcg_out_sh64 here for the format; it is a 32-bit shift.  */
2223            if (const_args[2]) {
2224                tcg_out_sh64(s, op2, a0, a1, TCG_REG_NONE, a2);
2225            } else {
2226                tcg_out_sh64(s, op2, a0, a1, a2, 0);
2227            }
2228        }
2229        break;
2230    case INDEX_op_shr_i32:
2231        op = RS_SRL;
2232        op2 = RSY_SRLK;
2233        goto do_shift32;
2234    case INDEX_op_sar_i32:
2235        op = RS_SRA;
2236        op2 = RSY_SRAK;
2237        goto do_shift32;
2238
2239    case INDEX_op_rotl_i32:
2240        /* ??? Using tcg_out_sh64 here for the format; it is a 32-bit rol.  */
2241        if (const_args[2]) {
2242            tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_REG_NONE, args[2]);
2243        } else {
2244            tcg_out_sh64(s, RSY_RLL, args[0], args[1], args[2], 0);
2245        }
2246        break;
2247    case INDEX_op_rotr_i32:
2248        if (const_args[2]) {
2249            tcg_out_sh64(s, RSY_RLL, args[0], args[1],
2250                         TCG_REG_NONE, (32 - args[2]) & 31);
2251        } else {
2252            tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]);
2253            tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_TMP0, 0);
2254        }
2255        break;
2256
2257    case INDEX_op_ext8s_i32:
2258        tgen_ext8s(s, TCG_TYPE_I32, args[0], args[1]);
2259        break;
2260    case INDEX_op_ext16s_i32:
2261        tgen_ext16s(s, TCG_TYPE_I32, args[0], args[1]);
2262        break;
2263    case INDEX_op_ext8u_i32:
2264        tgen_ext8u(s, TCG_TYPE_I32, args[0], args[1]);
2265        break;
2266    case INDEX_op_ext16u_i32:
2267        tgen_ext16u(s, TCG_TYPE_I32, args[0], args[1]);
2268        break;
2269
2270    case INDEX_op_bswap16_i32:
2271        a0 = args[0], a1 = args[1], a2 = args[2];
2272        tcg_out_insn(s, RRE, LRVR, a0, a1);
2273        if (a2 & TCG_BSWAP_OS) {
2274            tcg_out_sh32(s, RS_SRA, a0, TCG_REG_NONE, 16);
2275        } else {
2276            tcg_out_sh32(s, RS_SRL, a0, TCG_REG_NONE, 16);
2277        }
2278        break;
2279    case INDEX_op_bswap16_i64:
2280        a0 = args[0], a1 = args[1], a2 = args[2];
2281        tcg_out_insn(s, RRE, LRVGR, a0, a1);
2282        if (a2 & TCG_BSWAP_OS) {
2283            tcg_out_sh64(s, RSY_SRAG, a0, a0, TCG_REG_NONE, 48);
2284        } else {
2285            tcg_out_sh64(s, RSY_SRLG, a0, a0, TCG_REG_NONE, 48);
2286        }
2287        break;
2288
2289    case INDEX_op_bswap32_i32:
2290        tcg_out_insn(s, RRE, LRVR, args[0], args[1]);
2291        break;
2292    case INDEX_op_bswap32_i64:
2293        a0 = args[0], a1 = args[1], a2 = args[2];
2294        tcg_out_insn(s, RRE, LRVR, a0, a1);
2295        if (a2 & TCG_BSWAP_OS) {
2296            tgen_ext32s(s, a0, a0);
2297        } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
2298            tgen_ext32u(s, a0, a0);
2299        }
2300        break;
2301
2302    case INDEX_op_add2_i32:
2303        if (const_args[4]) {
2304            tcg_out_insn(s, RIL, ALFI, args[0], args[4]);
2305        } else {
2306            tcg_out_insn(s, RR, ALR, args[0], args[4]);
2307        }
2308        tcg_out_insn(s, RRE, ALCR, args[1], args[5]);
2309        break;
2310    case INDEX_op_sub2_i32:
2311        if (const_args[4]) {
2312            tcg_out_insn(s, RIL, SLFI, args[0], args[4]);
2313        } else {
2314            tcg_out_insn(s, RR, SLR, args[0], args[4]);
2315        }
2316        tcg_out_insn(s, RRE, SLBR, args[1], args[5]);
2317        break;
2318
2319    case INDEX_op_br:
2320        tgen_branch(s, S390_CC_ALWAYS, arg_label(args[0]));
2321        break;
2322
2323    case INDEX_op_brcond_i32:
2324        tgen_brcond(s, TCG_TYPE_I32, args[2], args[0],
2325                    args[1], const_args[1], arg_label(args[3]));
2326        break;
2327    case INDEX_op_setcond_i32:
2328        tgen_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1],
2329                     args[2], const_args[2]);
2330        break;
2331    case INDEX_op_movcond_i32:
2332        tgen_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1],
2333                     args[2], const_args[2], args[3], const_args[3]);
2334        break;
2335
2336    case INDEX_op_qemu_ld_i32:
2337        /* ??? Technically we can use a non-extending instruction.  */
2338    case INDEX_op_qemu_ld_i64:
2339        tcg_out_qemu_ld(s, args[0], args[1], args[2]);
2340        break;
2341    case INDEX_op_qemu_st_i32:
2342    case INDEX_op_qemu_st_i64:
2343        tcg_out_qemu_st(s, args[0], args[1], args[2]);
2344        break;
2345
2346    case INDEX_op_ld16s_i64:
2347        tcg_out_mem(s, 0, RXY_LGH, args[0], args[1], TCG_REG_NONE, args[2]);
2348        break;
2349    case INDEX_op_ld32u_i64:
2350        tcg_out_mem(s, 0, RXY_LLGF, args[0], args[1], TCG_REG_NONE, args[2]);
2351        break;
2352    case INDEX_op_ld32s_i64:
2353        tcg_out_mem(s, 0, RXY_LGF, args[0], args[1], TCG_REG_NONE, args[2]);
2354        break;
2355    case INDEX_op_ld_i64:
2356        tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
2357        break;
2358
2359    case INDEX_op_st32_i64:
2360        tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
2361        break;
2362    case INDEX_op_st_i64:
2363        tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
2364        break;
2365
2366    case INDEX_op_add_i64:
2367        a0 = args[0], a1 = args[1], a2 = args[2];
2368        if (const_args[2]) {
2369        do_addi_64:
2370            if (a0 == a1) {
2371                if (a2 == (int16_t)a2) {
2372                    tcg_out_insn(s, RI, AGHI, a0, a2);
2373                    break;
2374                }
2375                if (HAVE_FACILITY(EXT_IMM)) {
2376                    if (a2 == (int32_t)a2) {
2377                        tcg_out_insn(s, RIL, AGFI, a0, a2);
2378                        break;
2379                    } else if (a2 == (uint32_t)a2) {
2380                        tcg_out_insn(s, RIL, ALGFI, a0, a2);
2381                        break;
2382                    } else if (-a2 == (uint32_t)-a2) {
2383                        tcg_out_insn(s, RIL, SLGFI, a0, -a2);
2384                        break;
2385                    }
2386                }
2387            }
2388            tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2);
2389        } else if (a0 == a1) {
2390            tcg_out_insn(s, RRE, AGR, a0, a2);
2391        } else {
2392            tcg_out_insn(s, RX, LA, a0, a1, a2, 0);
2393        }
2394        break;
2395    case INDEX_op_sub_i64:
2396        a0 = args[0], a1 = args[1], a2 = args[2];
2397        if (const_args[2]) {
2398            a2 = -a2;
2399            goto do_addi_64;
2400        } else if (a0 == a1) {
2401            tcg_out_insn(s, RRE, SGR, a0, a2);
2402        } else {
2403            tcg_out_insn(s, RRF, SGRK, a0, a1, a2);
2404        }
2405        break;
2406
2407    case INDEX_op_and_i64:
2408        a0 = args[0], a1 = args[1], a2 = args[2];
2409        if (const_args[2]) {
2410            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2411            tgen_andi(s, TCG_TYPE_I64, args[0], args[2]);
2412        } else if (a0 == a1) {
2413            tcg_out_insn(s, RRE, NGR, args[0], args[2]);
2414        } else {
2415            tcg_out_insn(s, RRF, NGRK, a0, a1, a2);
2416        }
2417        break;
2418    case INDEX_op_or_i64:
2419        a0 = args[0], a1 = args[1], a2 = args[2];
2420        if (const_args[2]) {
2421            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2422            tgen_ori(s, TCG_TYPE_I64, a0, a2);
2423        } else if (a0 == a1) {
2424            tcg_out_insn(s, RRE, OGR, a0, a2);
2425        } else {
2426            tcg_out_insn(s, RRF, OGRK, a0, a1, a2);
2427        }
2428        break;
2429    case INDEX_op_xor_i64:
2430        a0 = args[0], a1 = args[1], a2 = args[2];
2431        if (const_args[2]) {
2432            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2433            tgen_xori(s, TCG_TYPE_I64, a0, a2);
2434        } else if (a0 == a1) {
2435            tcg_out_insn(s, RRE, XGR, a0, a2);
2436        } else {
2437            tcg_out_insn(s, RRF, XGRK, a0, a1, a2);
2438        }
2439        break;
2440
2441    case INDEX_op_neg_i64:
2442        tcg_out_insn(s, RRE, LCGR, args[0], args[1]);
2443        break;
2444    case INDEX_op_bswap64_i64:
2445        tcg_out_insn(s, RRE, LRVGR, args[0], args[1]);
2446        break;
2447
2448    case INDEX_op_mul_i64:
2449        if (const_args[2]) {
2450            if (args[2] == (int16_t)args[2]) {
2451                tcg_out_insn(s, RI, MGHI, args[0], args[2]);
2452            } else {
2453                tcg_out_insn(s, RIL, MSGFI, args[0], args[2]);
2454            }
2455        } else {
2456            tcg_out_insn(s, RRE, MSGR, args[0], args[2]);
2457        }
2458        break;
2459
2460    case INDEX_op_div2_i64:
2461        /* ??? We get an unnecessary sign-extension of the dividend
2462           into R3 with this definition, but as we do in fact always
2463           produce both quotient and remainder using INDEX_op_div_i64
2464           instead requires jumping through even more hoops.  */
2465        tcg_out_insn(s, RRE, DSGR, TCG_REG_R2, args[4]);
2466        break;
2467    case INDEX_op_divu2_i64:
2468        tcg_out_insn(s, RRE, DLGR, TCG_REG_R2, args[4]);
2469        break;
2470    case INDEX_op_mulu2_i64:
2471        tcg_out_insn(s, RRE, MLGR, TCG_REG_R2, args[3]);
2472        break;
2473
2474    case INDEX_op_shl_i64:
2475        op = RSY_SLLG;
2476    do_shift64:
2477        if (const_args[2]) {
2478            tcg_out_sh64(s, op, args[0], args[1], TCG_REG_NONE, args[2]);
2479        } else {
2480            tcg_out_sh64(s, op, args[0], args[1], args[2], 0);
2481        }
2482        break;
2483    case INDEX_op_shr_i64:
2484        op = RSY_SRLG;
2485        goto do_shift64;
2486    case INDEX_op_sar_i64:
2487        op = RSY_SRAG;
2488        goto do_shift64;
2489
2490    case INDEX_op_rotl_i64:
2491        if (const_args[2]) {
2492            tcg_out_sh64(s, RSY_RLLG, args[0], args[1],
2493                         TCG_REG_NONE, args[2]);
2494        } else {
2495            tcg_out_sh64(s, RSY_RLLG, args[0], args[1], args[2], 0);
2496        }
2497        break;
2498    case INDEX_op_rotr_i64:
2499        if (const_args[2]) {
2500            tcg_out_sh64(s, RSY_RLLG, args[0], args[1],
2501                         TCG_REG_NONE, (64 - args[2]) & 63);
2502        } else {
2503            /* We can use the smaller 32-bit negate because only the
2504               low 6 bits are examined for the rotate.  */
2505            tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]);
2506            tcg_out_sh64(s, RSY_RLLG, args[0], args[1], TCG_TMP0, 0);
2507        }
2508        break;
2509
2510    case INDEX_op_ext8s_i64:
2511        tgen_ext8s(s, TCG_TYPE_I64, args[0], args[1]);
2512        break;
2513    case INDEX_op_ext16s_i64:
2514        tgen_ext16s(s, TCG_TYPE_I64, args[0], args[1]);
2515        break;
2516    case INDEX_op_ext_i32_i64:
2517    case INDEX_op_ext32s_i64:
2518        tgen_ext32s(s, args[0], args[1]);
2519        break;
2520    case INDEX_op_ext8u_i64:
2521        tgen_ext8u(s, TCG_TYPE_I64, args[0], args[1]);
2522        break;
2523    case INDEX_op_ext16u_i64:
2524        tgen_ext16u(s, TCG_TYPE_I64, args[0], args[1]);
2525        break;
2526    case INDEX_op_extu_i32_i64:
2527    case INDEX_op_ext32u_i64:
2528        tgen_ext32u(s, args[0], args[1]);
2529        break;
2530
2531    case INDEX_op_add2_i64:
2532        if (const_args[4]) {
2533            if ((int64_t)args[4] >= 0) {
2534                tcg_out_insn(s, RIL, ALGFI, args[0], args[4]);
2535            } else {
2536                tcg_out_insn(s, RIL, SLGFI, args[0], -args[4]);
2537            }
2538        } else {
2539            tcg_out_insn(s, RRE, ALGR, args[0], args[4]);
2540        }
2541        tcg_out_insn(s, RRE, ALCGR, args[1], args[5]);
2542        break;
2543    case INDEX_op_sub2_i64:
2544        if (const_args[4]) {
2545            if ((int64_t)args[4] >= 0) {
2546                tcg_out_insn(s, RIL, SLGFI, args[0], args[4]);
2547            } else {
2548                tcg_out_insn(s, RIL, ALGFI, args[0], -args[4]);
2549            }
2550        } else {
2551            tcg_out_insn(s, RRE, SLGR, args[0], args[4]);
2552        }
2553        tcg_out_insn(s, RRE, SLBGR, args[1], args[5]);
2554        break;
2555
2556    case INDEX_op_brcond_i64:
2557        tgen_brcond(s, TCG_TYPE_I64, args[2], args[0],
2558                    args[1], const_args[1], arg_label(args[3]));
2559        break;
2560    case INDEX_op_setcond_i64:
2561        tgen_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1],
2562                     args[2], const_args[2]);
2563        break;
2564    case INDEX_op_movcond_i64:
2565        tgen_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1],
2566                     args[2], const_args[2], args[3], const_args[3]);
2567        break;
2568
2569    OP_32_64(deposit):
2570        a0 = args[0], a1 = args[1], a2 = args[2];
2571        if (const_args[1]) {
2572            tgen_deposit(s, a0, a2, args[3], args[4], 1);
2573        } else {
2574            /* Since we can't support "0Z" as a constraint, we allow a1 in
2575               any register.  Fix things up as if a matching constraint.  */
2576            if (a0 != a1) {
2577                TCGType type = (opc == INDEX_op_deposit_i64);
2578                if (a0 == a2) {
2579                    tcg_out_mov(s, type, TCG_TMP0, a2);
2580                    a2 = TCG_TMP0;
2581                }
2582                tcg_out_mov(s, type, a0, a1);
2583            }
2584            tgen_deposit(s, a0, a2, args[3], args[4], 0);
2585        }
2586        break;
2587
2588    OP_32_64(extract):
2589        tgen_extract(s, args[0], args[1], args[2], args[3]);
2590        break;
2591
2592    case INDEX_op_clz_i64:
2593        tgen_clz(s, args[0], args[1], args[2], const_args[2]);
2594        break;
2595
2596    case INDEX_op_mb:
2597        /* The host memory model is quite strong, we simply need to
2598           serialize the instruction stream.  */
2599        if (args[0] & TCG_MO_ST_LD) {
2600            tcg_out_insn(s, RR, BCR, HAVE_FACILITY(FAST_BCR_SER) ? 14 : 15, 0);
2601        }
2602        break;
2603
2604    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2605    case INDEX_op_mov_i64:
2606    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2607    default:
2608        tcg_abort();
2609    }
2610}
2611
2612static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
2613                            TCGReg dst, TCGReg src)
2614{
2615    if (is_general_reg(src)) {
2616        /* Replicate general register into two MO_64. */
2617        tcg_out_insn(s, VRRf, VLVGP, dst, src, src);
2618        if (vece == MO_64) {
2619            return true;
2620        }
2621    }
2622
2623    /*
2624     * Recall that the "standard" integer, within a vector, is the
2625     * rightmost element of the leftmost doubleword, a-la VLLEZ.
2626     */
2627    tcg_out_insn(s, VRIc, VREP, dst, (8 >> vece) - 1, src, vece);
2628    return true;
2629}
2630
2631static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
2632                             TCGReg dst, TCGReg base, intptr_t offset)
2633{
2634    tcg_out_vrx_mem(s, VRX_VLREP, dst, base, TCG_REG_NONE, offset, vece);
2635    return true;
2636}
2637
2638static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
2639                             TCGReg dst, int64_t val)
2640{
2641    int i, mask, msb, lsb;
2642
2643    /* Look for int16_t elements.  */
2644    if (vece <= MO_16 ||
2645        (vece == MO_32 ? (int32_t)val : val) == (int16_t)val) {
2646        tcg_out_insn(s, VRIa, VREPI, dst, val, vece);
2647        return;
2648    }
2649
2650    /* Look for bit masks.  */
2651    if (vece == MO_32) {
2652        if (risbg_mask((int32_t)val)) {
2653            /* Handle wraparound by swapping msb and lsb.  */
2654            if ((val & 0x80000001u) == 0x80000001u) {
2655                msb = 32 - ctz32(~val);
2656                lsb = clz32(~val) - 1;
2657            } else {
2658                msb = clz32(val);
2659                lsb = 31 - ctz32(val);
2660            }
2661            tcg_out_insn(s, VRIb, VGM, dst, lsb, msb, MO_32);
2662            return;
2663        }
2664    } else {
2665        if (risbg_mask(val)) {
2666            /* Handle wraparound by swapping msb and lsb.  */
2667            if ((val & 0x8000000000000001ull) == 0x8000000000000001ull) {
2668                /* Handle wraparound by swapping msb and lsb.  */
2669                msb = 64 - ctz64(~val);
2670                lsb = clz64(~val) - 1;
2671            } else {
2672                msb = clz64(val);
2673                lsb = 63 - ctz64(val);
2674            }
2675            tcg_out_insn(s, VRIb, VGM, dst, lsb, msb, MO_64);
2676            return;
2677        }
2678    }
2679
2680    /* Look for all bytes 0x00 or 0xff.  */
2681    for (i = mask = 0; i < 8; i++) {
2682        uint8_t byte = val >> (i * 8);
2683        if (byte == 0xff) {
2684            mask |= 1 << i;
2685        } else if (byte != 0) {
2686            break;
2687        }
2688    }
2689    if (i == 8) {
2690        tcg_out_insn(s, VRIa, VGBM, dst, mask * 0x0101, 0);
2691        return;
2692    }
2693
2694    /* Otherwise, stuff it in the constant pool.  */
2695    tcg_out_insn(s, RIL, LARL, TCG_TMP0, 0);
2696    new_pool_label(s, val, R_390_PC32DBL, s->code_ptr - 2, 2);
2697    tcg_out_insn(s, VRX, VLREP, dst, TCG_TMP0, TCG_REG_NONE, 0, MO_64);
2698}
2699
2700static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2701                           unsigned vecl, unsigned vece,
2702                           const TCGArg *args, const int *const_args)
2703{
2704    TCGType type = vecl + TCG_TYPE_V64;
2705    TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
2706
2707    switch (opc) {
2708    case INDEX_op_ld_vec:
2709        tcg_out_ld(s, type, a0, a1, a2);
2710        break;
2711    case INDEX_op_st_vec:
2712        tcg_out_st(s, type, a0, a1, a2);
2713        break;
2714    case INDEX_op_dupm_vec:
2715        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2716        break;
2717
2718    case INDEX_op_abs_vec:
2719        tcg_out_insn(s, VRRa, VLP, a0, a1, vece);
2720        break;
2721    case INDEX_op_neg_vec:
2722        tcg_out_insn(s, VRRa, VLC, a0, a1, vece);
2723        break;
2724    case INDEX_op_not_vec:
2725        tcg_out_insn(s, VRRc, VNO, a0, a1, a1, 0);
2726        break;
2727
2728    case INDEX_op_add_vec:
2729        tcg_out_insn(s, VRRc, VA, a0, a1, a2, vece);
2730        break;
2731    case INDEX_op_sub_vec:
2732        tcg_out_insn(s, VRRc, VS, a0, a1, a2, vece);
2733        break;
2734    case INDEX_op_and_vec:
2735        tcg_out_insn(s, VRRc, VN, a0, a1, a2, 0);
2736        break;
2737    case INDEX_op_andc_vec:
2738        tcg_out_insn(s, VRRc, VNC, a0, a1, a2, 0);
2739        break;
2740    case INDEX_op_mul_vec:
2741        tcg_out_insn(s, VRRc, VML, a0, a1, a2, vece);
2742        break;
2743    case INDEX_op_or_vec:
2744        tcg_out_insn(s, VRRc, VO, a0, a1, a2, 0);
2745        break;
2746    case INDEX_op_orc_vec:
2747        tcg_out_insn(s, VRRc, VOC, a0, a1, a2, 0);
2748        break;
2749    case INDEX_op_xor_vec:
2750        tcg_out_insn(s, VRRc, VX, a0, a1, a2, 0);
2751        break;
2752
2753    case INDEX_op_shli_vec:
2754        tcg_out_insn(s, VRSa, VESL, a0, a2, TCG_REG_NONE, a1, vece);
2755        break;
2756    case INDEX_op_shri_vec:
2757        tcg_out_insn(s, VRSa, VESRL, a0, a2, TCG_REG_NONE, a1, vece);
2758        break;
2759    case INDEX_op_sari_vec:
2760        tcg_out_insn(s, VRSa, VESRA, a0, a2, TCG_REG_NONE, a1, vece);
2761        break;
2762    case INDEX_op_rotli_vec:
2763        tcg_out_insn(s, VRSa, VERLL, a0, a2, TCG_REG_NONE, a1, vece);
2764        break;
2765    case INDEX_op_shls_vec:
2766        tcg_out_insn(s, VRSa, VESL, a0, 0, a2, a1, vece);
2767        break;
2768    case INDEX_op_shrs_vec:
2769        tcg_out_insn(s, VRSa, VESRL, a0, 0, a2, a1, vece);
2770        break;
2771    case INDEX_op_sars_vec:
2772        tcg_out_insn(s, VRSa, VESRA, a0, 0, a2, a1, vece);
2773        break;
2774    case INDEX_op_rotls_vec:
2775        tcg_out_insn(s, VRSa, VERLL, a0, 0, a2, a1, vece);
2776        break;
2777    case INDEX_op_shlv_vec:
2778        tcg_out_insn(s, VRRc, VESLV, a0, a1, a2, vece);
2779        break;
2780    case INDEX_op_shrv_vec:
2781        tcg_out_insn(s, VRRc, VESRLV, a0, a1, a2, vece);
2782        break;
2783    case INDEX_op_sarv_vec:
2784        tcg_out_insn(s, VRRc, VESRAV, a0, a1, a2, vece);
2785        break;
2786    case INDEX_op_rotlv_vec:
2787        tcg_out_insn(s, VRRc, VERLLV, a0, a1, a2, vece);
2788        break;
2789
2790    case INDEX_op_smin_vec:
2791        tcg_out_insn(s, VRRc, VMN, a0, a1, a2, vece);
2792        break;
2793    case INDEX_op_smax_vec:
2794        tcg_out_insn(s, VRRc, VMX, a0, a1, a2, vece);
2795        break;
2796    case INDEX_op_umin_vec:
2797        tcg_out_insn(s, VRRc, VMNL, a0, a1, a2, vece);
2798        break;
2799    case INDEX_op_umax_vec:
2800        tcg_out_insn(s, VRRc, VMXL, a0, a1, a2, vece);
2801        break;
2802
2803    case INDEX_op_bitsel_vec:
2804        tcg_out_insn(s, VRRe, VSEL, a0, a1, a2, args[3]);
2805        break;
2806
2807    case INDEX_op_cmp_vec:
2808        switch ((TCGCond)args[3]) {
2809        case TCG_COND_EQ:
2810            tcg_out_insn(s, VRRc, VCEQ, a0, a1, a2, vece);
2811            break;
2812        case TCG_COND_GT:
2813            tcg_out_insn(s, VRRc, VCH, a0, a1, a2, vece);
2814            break;
2815        case TCG_COND_GTU:
2816            tcg_out_insn(s, VRRc, VCHL, a0, a1, a2, vece);
2817            break;
2818        default:
2819            g_assert_not_reached();
2820        }
2821        break;
2822
2823    case INDEX_op_s390_vuph_vec:
2824        tcg_out_insn(s, VRRa, VUPH, a0, a1, vece);
2825        break;
2826    case INDEX_op_s390_vupl_vec:
2827        tcg_out_insn(s, VRRa, VUPL, a0, a1, vece);
2828        break;
2829    case INDEX_op_s390_vpks_vec:
2830        tcg_out_insn(s, VRRc, VPKS, a0, a1, a2, vece);
2831        break;
2832
2833    case INDEX_op_mov_vec:   /* Always emitted via tcg_out_mov.  */
2834    case INDEX_op_dup_vec:   /* Always emitted via tcg_out_dup_vec.  */
2835    default:
2836        g_assert_not_reached();
2837    }
2838}
2839
2840int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2841{
2842    switch (opc) {
2843    case INDEX_op_abs_vec:
2844    case INDEX_op_add_vec:
2845    case INDEX_op_and_vec:
2846    case INDEX_op_andc_vec:
2847    case INDEX_op_bitsel_vec:
2848    case INDEX_op_neg_vec:
2849    case INDEX_op_not_vec:
2850    case INDEX_op_or_vec:
2851    case INDEX_op_orc_vec:
2852    case INDEX_op_rotli_vec:
2853    case INDEX_op_rotls_vec:
2854    case INDEX_op_rotlv_vec:
2855    case INDEX_op_sari_vec:
2856    case INDEX_op_sars_vec:
2857    case INDEX_op_sarv_vec:
2858    case INDEX_op_shli_vec:
2859    case INDEX_op_shls_vec:
2860    case INDEX_op_shlv_vec:
2861    case INDEX_op_shri_vec:
2862    case INDEX_op_shrs_vec:
2863    case INDEX_op_shrv_vec:
2864    case INDEX_op_smax_vec:
2865    case INDEX_op_smin_vec:
2866    case INDEX_op_sub_vec:
2867    case INDEX_op_umax_vec:
2868    case INDEX_op_umin_vec:
2869    case INDEX_op_xor_vec:
2870        return 1;
2871    case INDEX_op_cmp_vec:
2872    case INDEX_op_cmpsel_vec:
2873    case INDEX_op_rotrv_vec:
2874        return -1;
2875    case INDEX_op_mul_vec:
2876        return vece < MO_64;
2877    case INDEX_op_ssadd_vec:
2878    case INDEX_op_sssub_vec:
2879        return vece < MO_64 ? -1 : 0;
2880    default:
2881        return 0;
2882    }
2883}
2884
2885static bool expand_vec_cmp_noinv(TCGType type, unsigned vece, TCGv_vec v0,
2886                                 TCGv_vec v1, TCGv_vec v2, TCGCond cond)
2887{
2888    bool need_swap = false, need_inv = false;
2889
2890    switch (cond) {
2891    case TCG_COND_EQ:
2892    case TCG_COND_GT:
2893    case TCG_COND_GTU:
2894        break;
2895    case TCG_COND_NE:
2896    case TCG_COND_LE:
2897    case TCG_COND_LEU:
2898        need_inv = true;
2899        break;
2900    case TCG_COND_LT:
2901    case TCG_COND_LTU:
2902        need_swap = true;
2903        break;
2904    case TCG_COND_GE:
2905    case TCG_COND_GEU:
2906        need_swap = need_inv = true;
2907        break;
2908    default:
2909        g_assert_not_reached();
2910    }
2911
2912    if (need_inv) {
2913        cond = tcg_invert_cond(cond);
2914    }
2915    if (need_swap) {
2916        TCGv_vec t1;
2917        t1 = v1, v1 = v2, v2 = t1;
2918        cond = tcg_swap_cond(cond);
2919    }
2920
2921    vec_gen_4(INDEX_op_cmp_vec, type, vece, tcgv_vec_arg(v0),
2922              tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond);
2923
2924    return need_inv;
2925}
2926
2927static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
2928                           TCGv_vec v1, TCGv_vec v2, TCGCond cond)
2929{
2930    if (expand_vec_cmp_noinv(type, vece, v0, v1, v2, cond)) {
2931        tcg_gen_not_vec(vece, v0, v0);
2932    }
2933}
2934
2935static void expand_vec_cmpsel(TCGType type, unsigned vece, TCGv_vec v0,
2936                              TCGv_vec c1, TCGv_vec c2,
2937                              TCGv_vec v3, TCGv_vec v4, TCGCond cond)
2938{
2939    TCGv_vec t = tcg_temp_new_vec(type);
2940
2941    if (expand_vec_cmp_noinv(type, vece, t, c1, c2, cond)) {
2942        /* Invert the sense of the compare by swapping arguments.  */
2943        tcg_gen_bitsel_vec(vece, v0, t, v4, v3);
2944    } else {
2945        tcg_gen_bitsel_vec(vece, v0, t, v3, v4);
2946    }
2947    tcg_temp_free_vec(t);
2948}
2949
2950static void expand_vec_sat(TCGType type, unsigned vece, TCGv_vec v0,
2951                           TCGv_vec v1, TCGv_vec v2, TCGOpcode add_sub_opc)
2952{
2953    TCGv_vec h1 = tcg_temp_new_vec(type);
2954    TCGv_vec h2 = tcg_temp_new_vec(type);
2955    TCGv_vec l1 = tcg_temp_new_vec(type);
2956    TCGv_vec l2 = tcg_temp_new_vec(type);
2957
2958    tcg_debug_assert (vece < MO_64);
2959
2960    /* Unpack with sign-extension. */
2961    vec_gen_2(INDEX_op_s390_vuph_vec, type, vece,
2962              tcgv_vec_arg(h1), tcgv_vec_arg(v1));
2963    vec_gen_2(INDEX_op_s390_vuph_vec, type, vece,
2964              tcgv_vec_arg(h2), tcgv_vec_arg(v2));
2965
2966    vec_gen_2(INDEX_op_s390_vupl_vec, type, vece,
2967              tcgv_vec_arg(l1), tcgv_vec_arg(v1));
2968    vec_gen_2(INDEX_op_s390_vupl_vec, type, vece,
2969              tcgv_vec_arg(l2), tcgv_vec_arg(v2));
2970
2971    /* Arithmetic on a wider element size. */
2972    vec_gen_3(add_sub_opc, type, vece + 1, tcgv_vec_arg(h1),
2973              tcgv_vec_arg(h1), tcgv_vec_arg(h2));
2974    vec_gen_3(add_sub_opc, type, vece + 1, tcgv_vec_arg(l1),
2975              tcgv_vec_arg(l1), tcgv_vec_arg(l2));
2976
2977    /* Pack with saturation. */
2978    vec_gen_3(INDEX_op_s390_vpks_vec, type, vece + 1,
2979              tcgv_vec_arg(v0), tcgv_vec_arg(h1), tcgv_vec_arg(l1));
2980
2981    tcg_temp_free_vec(h1);
2982    tcg_temp_free_vec(h2);
2983    tcg_temp_free_vec(l1);
2984    tcg_temp_free_vec(l2);
2985}
2986
2987void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2988                       TCGArg a0, ...)
2989{
2990    va_list va;
2991    TCGv_vec v0, v1, v2, v3, v4, t0;
2992
2993    va_start(va, a0);
2994    v0 = temp_tcgv_vec(arg_temp(a0));
2995    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2996    v2 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2997
2998    switch (opc) {
2999    case INDEX_op_cmp_vec:
3000        expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
3001        break;
3002
3003    case INDEX_op_cmpsel_vec:
3004        v3 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3005        v4 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3006        expand_vec_cmpsel(type, vece, v0, v1, v2, v3, v4, va_arg(va, TCGArg));
3007        break;
3008
3009    case INDEX_op_rotrv_vec:
3010        t0 = tcg_temp_new_vec(type);
3011        tcg_gen_neg_vec(vece, t0, v2);
3012        tcg_gen_rotlv_vec(vece, v0, v1, t0);
3013        tcg_temp_free_vec(t0);
3014        break;
3015
3016    case INDEX_op_ssadd_vec:
3017        expand_vec_sat(type, vece, v0, v1, v2, INDEX_op_add_vec);
3018        break;
3019    case INDEX_op_sssub_vec:
3020        expand_vec_sat(type, vece, v0, v1, v2, INDEX_op_sub_vec);
3021        break;
3022
3023    default:
3024        g_assert_not_reached();
3025    }
3026    va_end(va);
3027}
3028
3029static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
3030{
3031    switch (op) {
3032    case INDEX_op_goto_ptr:
3033        return C_O0_I1(r);
3034
3035    case INDEX_op_ld8u_i32:
3036    case INDEX_op_ld8u_i64:
3037    case INDEX_op_ld8s_i32:
3038    case INDEX_op_ld8s_i64:
3039    case INDEX_op_ld16u_i32:
3040    case INDEX_op_ld16u_i64:
3041    case INDEX_op_ld16s_i32:
3042    case INDEX_op_ld16s_i64:
3043    case INDEX_op_ld_i32:
3044    case INDEX_op_ld32u_i64:
3045    case INDEX_op_ld32s_i64:
3046    case INDEX_op_ld_i64:
3047        return C_O1_I1(r, r);
3048
3049    case INDEX_op_st8_i32:
3050    case INDEX_op_st8_i64:
3051    case INDEX_op_st16_i32:
3052    case INDEX_op_st16_i64:
3053    case INDEX_op_st_i32:
3054    case INDEX_op_st32_i64:
3055    case INDEX_op_st_i64:
3056        return C_O0_I2(r, r);
3057
3058    case INDEX_op_add_i32:
3059    case INDEX_op_add_i64:
3060    case INDEX_op_shl_i64:
3061    case INDEX_op_shr_i64:
3062    case INDEX_op_sar_i64:
3063    case INDEX_op_rotl_i32:
3064    case INDEX_op_rotl_i64:
3065    case INDEX_op_rotr_i32:
3066    case INDEX_op_rotr_i64:
3067    case INDEX_op_clz_i64:
3068    case INDEX_op_setcond_i32:
3069    case INDEX_op_setcond_i64:
3070        return C_O1_I2(r, r, ri);
3071
3072    case INDEX_op_sub_i32:
3073    case INDEX_op_sub_i64:
3074    case INDEX_op_and_i32:
3075    case INDEX_op_and_i64:
3076    case INDEX_op_or_i32:
3077    case INDEX_op_or_i64:
3078    case INDEX_op_xor_i32:
3079    case INDEX_op_xor_i64:
3080        return (HAVE_FACILITY(DISTINCT_OPS)
3081                ? C_O1_I2(r, r, ri)
3082                : C_O1_I2(r, 0, ri));
3083
3084    case INDEX_op_mul_i32:
3085        /* If we have the general-instruction-extensions, then we have
3086           MULTIPLY SINGLE IMMEDIATE with a signed 32-bit, otherwise we
3087           have only MULTIPLY HALFWORD IMMEDIATE, with a signed 16-bit.  */
3088        return (HAVE_FACILITY(GEN_INST_EXT)
3089                ? C_O1_I2(r, 0, ri)
3090                : C_O1_I2(r, 0, rI));
3091
3092    case INDEX_op_mul_i64:
3093        return (HAVE_FACILITY(GEN_INST_EXT)
3094                ? C_O1_I2(r, 0, rJ)
3095                : C_O1_I2(r, 0, rI));
3096
3097    case INDEX_op_shl_i32:
3098    case INDEX_op_shr_i32:
3099    case INDEX_op_sar_i32:
3100        return (HAVE_FACILITY(DISTINCT_OPS)
3101                ? C_O1_I2(r, r, ri)
3102                : C_O1_I2(r, 0, ri));
3103
3104    case INDEX_op_brcond_i32:
3105    case INDEX_op_brcond_i64:
3106        return C_O0_I2(r, ri);
3107
3108    case INDEX_op_bswap16_i32:
3109    case INDEX_op_bswap16_i64:
3110    case INDEX_op_bswap32_i32:
3111    case INDEX_op_bswap32_i64:
3112    case INDEX_op_bswap64_i64:
3113    case INDEX_op_neg_i32:
3114    case INDEX_op_neg_i64:
3115    case INDEX_op_ext8s_i32:
3116    case INDEX_op_ext8s_i64:
3117    case INDEX_op_ext8u_i32:
3118    case INDEX_op_ext8u_i64:
3119    case INDEX_op_ext16s_i32:
3120    case INDEX_op_ext16s_i64:
3121    case INDEX_op_ext16u_i32:
3122    case INDEX_op_ext16u_i64:
3123    case INDEX_op_ext32s_i64:
3124    case INDEX_op_ext32u_i64:
3125    case INDEX_op_ext_i32_i64:
3126    case INDEX_op_extu_i32_i64:
3127    case INDEX_op_extract_i32:
3128    case INDEX_op_extract_i64:
3129        return C_O1_I1(r, r);
3130
3131    case INDEX_op_qemu_ld_i32:
3132    case INDEX_op_qemu_ld_i64:
3133        return C_O1_I1(r, L);
3134    case INDEX_op_qemu_st_i64:
3135    case INDEX_op_qemu_st_i32:
3136        return C_O0_I2(L, L);
3137
3138    case INDEX_op_deposit_i32:
3139    case INDEX_op_deposit_i64:
3140        return C_O1_I2(r, rZ, r);
3141
3142    case INDEX_op_movcond_i32:
3143    case INDEX_op_movcond_i64:
3144        return (HAVE_FACILITY(LOAD_ON_COND2)
3145                ? C_O1_I4(r, r, ri, rI, 0)
3146                : C_O1_I4(r, r, ri, r, 0));
3147
3148    case INDEX_op_div2_i32:
3149    case INDEX_op_div2_i64:
3150    case INDEX_op_divu2_i32:
3151    case INDEX_op_divu2_i64:
3152        return C_O2_I3(b, a, 0, 1, r);
3153
3154    case INDEX_op_mulu2_i64:
3155        return C_O2_I2(b, a, 0, r);
3156
3157    case INDEX_op_add2_i32:
3158    case INDEX_op_sub2_i32:
3159        return (HAVE_FACILITY(EXT_IMM)
3160                ? C_O2_I4(r, r, 0, 1, ri, r)
3161                : C_O2_I4(r, r, 0, 1, r, r));
3162
3163    case INDEX_op_add2_i64:
3164    case INDEX_op_sub2_i64:
3165        return (HAVE_FACILITY(EXT_IMM)
3166                ? C_O2_I4(r, r, 0, 1, rA, r)
3167                : C_O2_I4(r, r, 0, 1, r, r));
3168
3169    case INDEX_op_st_vec:
3170        return C_O0_I2(v, r);
3171    case INDEX_op_ld_vec:
3172    case INDEX_op_dupm_vec:
3173        return C_O1_I1(v, r);
3174    case INDEX_op_dup_vec:
3175        return C_O1_I1(v, vr);
3176    case INDEX_op_abs_vec:
3177    case INDEX_op_neg_vec:
3178    case INDEX_op_not_vec:
3179    case INDEX_op_rotli_vec:
3180    case INDEX_op_sari_vec:
3181    case INDEX_op_shli_vec:
3182    case INDEX_op_shri_vec:
3183    case INDEX_op_s390_vuph_vec:
3184    case INDEX_op_s390_vupl_vec:
3185        return C_O1_I1(v, v);
3186    case INDEX_op_add_vec:
3187    case INDEX_op_sub_vec:
3188    case INDEX_op_and_vec:
3189    case INDEX_op_andc_vec:
3190    case INDEX_op_or_vec:
3191    case INDEX_op_orc_vec:
3192    case INDEX_op_xor_vec:
3193    case INDEX_op_cmp_vec:
3194    case INDEX_op_mul_vec:
3195    case INDEX_op_rotlv_vec:
3196    case INDEX_op_rotrv_vec:
3197    case INDEX_op_shlv_vec:
3198    case INDEX_op_shrv_vec:
3199    case INDEX_op_sarv_vec:
3200    case INDEX_op_smax_vec:
3201    case INDEX_op_smin_vec:
3202    case INDEX_op_umax_vec:
3203    case INDEX_op_umin_vec:
3204    case INDEX_op_s390_vpks_vec:
3205        return C_O1_I2(v, v, v);
3206    case INDEX_op_rotls_vec:
3207    case INDEX_op_shls_vec:
3208    case INDEX_op_shrs_vec:
3209    case INDEX_op_sars_vec:
3210        return C_O1_I2(v, v, r);
3211    case INDEX_op_bitsel_vec:
3212        return C_O1_I3(v, v, v, v);
3213
3214    default:
3215        g_assert_not_reached();
3216    }
3217}
3218
3219/*
3220 * Mainline glibc added HWCAP_S390_VX before it was kernel abi.
3221 * Some distros have fixed this up locally, others have not.
3222 */
3223#ifndef HWCAP_S390_VXRS
3224#define HWCAP_S390_VXRS 2048
3225#endif
3226
3227static void query_s390_facilities(void)
3228{
3229    unsigned long hwcap = qemu_getauxval(AT_HWCAP);
3230
3231    /* Is STORE FACILITY LIST EXTENDED available?  Honestly, I believe this
3232       is present on all 64-bit systems, but let's check for it anyway.  */
3233    if (hwcap & HWCAP_S390_STFLE) {
3234        register int r0 __asm__("0") = ARRAY_SIZE(s390_facilities) - 1;
3235        register void *r1 __asm__("1") = s390_facilities;
3236
3237        /* stfle 0(%r1) */
3238        asm volatile(".word 0xb2b0,0x1000"
3239                     : "=r"(r0) : "r"(r0), "r"(r1) : "memory", "cc");
3240    }
3241
3242    /*
3243     * Use of vector registers requires os support beyond the facility bit.
3244     * If the kernel does not advertise support, disable the facility bits.
3245     * There is nothing else we currently care about in the 3rd word, so
3246     * disable VECTOR with one store.
3247     */
3248    if (!(hwcap & HWCAP_S390_VXRS)) {
3249        s390_facilities[2] = 0;
3250    }
3251}
3252
3253static void tcg_target_init(TCGContext *s)
3254{
3255    query_s390_facilities();
3256
3257    tcg_target_available_regs[TCG_TYPE_I32] = 0xffff;
3258    tcg_target_available_regs[TCG_TYPE_I64] = 0xffff;
3259    if (HAVE_FACILITY(VECTOR)) {
3260        tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
3261        tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
3262    }
3263
3264    tcg_target_call_clobber_regs = 0;
3265    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
3266    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R1);
3267    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2);
3268    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3);
3269    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4);
3270    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5);
3271    /* The r6 register is technically call-saved, but it's also a parameter
3272       register, so it can get killed by setup for the qemu_st helper.  */
3273    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6);
3274    /* The return register can be considered call-clobbered.  */
3275    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R14);
3276
3277    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0);
3278    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1);
3279    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2);
3280    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3);
3281    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4);
3282    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5);
3283    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6);
3284    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7);
3285    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16);
3286    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17);
3287    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18);
3288    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19);
3289    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V20);
3290    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V21);
3291    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V22);
3292    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V23);
3293    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V24);
3294    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V25);
3295    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V26);
3296    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V27);
3297    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V28);
3298    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V29);
3299    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V30);
3300    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V31);
3301
3302    s->reserved_regs = 0;
3303    tcg_regset_set_reg(s->reserved_regs, TCG_TMP0);
3304    /* XXX many insns can't be used with R0, so we better avoid it for now */
3305    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0);
3306    tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
3307    if (USE_REG_TB) {
3308        tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB);
3309    }
3310}
3311
3312#define FRAME_SIZE  ((int)(TCG_TARGET_CALL_STACK_OFFSET          \
3313                           + TCG_STATIC_CALL_ARGS_SIZE           \
3314                           + CPU_TEMP_BUF_NLONGS * sizeof(long)))
3315
3316static void tcg_target_qemu_prologue(TCGContext *s)
3317{
3318    /* stmg %r6,%r15,48(%r15) (save registers) */
3319    tcg_out_insn(s, RXY, STMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15, 48);
3320
3321    /* aghi %r15,-frame_size */
3322    tcg_out_insn(s, RI, AGHI, TCG_REG_R15, -FRAME_SIZE);
3323
3324    tcg_set_frame(s, TCG_REG_CALL_STACK,
3325                  TCG_STATIC_CALL_ARGS_SIZE + TCG_TARGET_CALL_STACK_OFFSET,
3326                  CPU_TEMP_BUF_NLONGS * sizeof(long));
3327
3328#ifndef CONFIG_SOFTMMU
3329    if (guest_base >= 0x80000) {
3330        tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true);
3331        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
3332    }
3333#endif
3334
3335    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
3336    if (USE_REG_TB) {
3337        tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB,
3338                    tcg_target_call_iarg_regs[1]);
3339    }
3340
3341    /* br %r3 (go to TB) */
3342    tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, tcg_target_call_iarg_regs[1]);
3343
3344    /*
3345     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
3346     * and fall through to the rest of the epilogue.
3347     */
3348    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
3349    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, 0);
3350
3351    /* TB epilogue */
3352    tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
3353
3354    /* lmg %r6,%r15,fs+48(%r15) (restore registers) */
3355    tcg_out_insn(s, RXY, LMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15,
3356                 FRAME_SIZE + 48);
3357
3358    /* br %r14 (return) */
3359    tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_R14);
3360}
3361
3362static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
3363{
3364    memset(p, 0x07, count * sizeof(tcg_insn_unit));
3365}
3366
3367typedef struct {
3368    DebugFrameHeader h;
3369    uint8_t fde_def_cfa[4];
3370    uint8_t fde_reg_ofs[18];
3371} DebugFrame;
3372
3373/* We're expecting a 2 byte uleb128 encoded value.  */
3374QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
3375
3376#define ELF_HOST_MACHINE  EM_S390
3377
3378static const DebugFrame debug_frame = {
3379    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3380    .h.cie.id = -1,
3381    .h.cie.version = 1,
3382    .h.cie.code_align = 1,
3383    .h.cie.data_align = 8,                /* sleb128 8 */
3384    .h.cie.return_column = TCG_REG_R14,
3385
3386    /* Total FDE size does not include the "len" member.  */
3387    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3388
3389    .fde_def_cfa = {
3390        12, TCG_REG_CALL_STACK,         /* DW_CFA_def_cfa %r15, ... */
3391        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
3392        (FRAME_SIZE >> 7)
3393    },
3394    .fde_reg_ofs = {
3395        0x86, 6,                        /* DW_CFA_offset, %r6, 48 */
3396        0x87, 7,                        /* DW_CFA_offset, %r7, 56 */
3397        0x88, 8,                        /* DW_CFA_offset, %r8, 64 */
3398        0x89, 9,                        /* DW_CFA_offset, %r92, 72 */
3399        0x8a, 10,                       /* DW_CFA_offset, %r10, 80 */
3400        0x8b, 11,                       /* DW_CFA_offset, %r11, 88 */
3401        0x8c, 12,                       /* DW_CFA_offset, %r12, 96 */
3402        0x8d, 13,                       /* DW_CFA_offset, %r13, 104 */
3403        0x8e, 14,                       /* DW_CFA_offset, %r14, 112 */
3404    }
3405};
3406
3407void tcg_register_jit(const void *buf, size_t buf_size)
3408{
3409    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3410}
3411