xref: /qemu/tcg/s390x/tcg-target.c.inc (revision aef04fc7)
1/*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2009 Ulrich Hecht <uli@suse.de>
5 * Copyright (c) 2009 Alexander Graf <agraf@suse.de>
6 * Copyright (c) 2010 Richard Henderson <rth@twiddle.net>
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a copy
9 * of this software and associated documentation files (the "Software"), to deal
10 * in the Software without restriction, including without limitation the rights
11 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 * copies of the Software, and to permit persons to whom the Software is
13 * furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 * THE SOFTWARE.
25 */
26
27/* We only support generating code for 64-bit mode.  */
28#if TCG_TARGET_REG_BITS != 64
29#error "unsupported code generation mode"
30#endif
31
32#include "../tcg-ldst.c.inc"
33#include "../tcg-pool.c.inc"
34#include "elf.h"
35
36#define TCG_CT_CONST_S16        (1 << 8)
37#define TCG_CT_CONST_S32        (1 << 9)
38#define TCG_CT_CONST_S33        (1 << 10)
39#define TCG_CT_CONST_ZERO       (1 << 11)
40#define TCG_CT_CONST_P32        (1 << 12)
41#define TCG_CT_CONST_INV        (1 << 13)
42#define TCG_CT_CONST_INVRISBG   (1 << 14)
43
44#define ALL_GENERAL_REGS     MAKE_64BIT_MASK(0, 16)
45#define ALL_VECTOR_REGS      MAKE_64BIT_MASK(32, 32)
46
47/*
48 * For softmmu, we need to avoid conflicts with the first 3
49 * argument registers to perform the tlb lookup, and to call
50 * the helper function.
51 */
52#ifdef CONFIG_SOFTMMU
53#define SOFTMMU_RESERVE_REGS MAKE_64BIT_MASK(TCG_REG_R2, 3)
54#else
55#define SOFTMMU_RESERVE_REGS 0
56#endif
57
58
59/* Several places within the instruction set 0 means "no register"
60   rather than TCG_REG_R0.  */
61#define TCG_REG_NONE    0
62
63/* A scratch register that may be be used throughout the backend.  */
64#define TCG_TMP0        TCG_REG_R1
65
66#ifndef CONFIG_SOFTMMU
67#define TCG_GUEST_BASE_REG TCG_REG_R13
68#endif
69
70/* All of the following instructions are prefixed with their instruction
71   format, and are defined as 8- or 16-bit quantities, even when the two
72   halves of the 16-bit quantity may appear 32 bits apart in the insn.
73   This makes it easy to copy the values from the tables in Appendix B.  */
74typedef enum S390Opcode {
75    RIL_AFI     = 0xc209,
76    RIL_AGFI    = 0xc208,
77    RIL_ALFI    = 0xc20b,
78    RIL_ALGFI   = 0xc20a,
79    RIL_BRASL   = 0xc005,
80    RIL_BRCL    = 0xc004,
81    RIL_CFI     = 0xc20d,
82    RIL_CGFI    = 0xc20c,
83    RIL_CLFI    = 0xc20f,
84    RIL_CLGFI   = 0xc20e,
85    RIL_CLRL    = 0xc60f,
86    RIL_CLGRL   = 0xc60a,
87    RIL_CRL     = 0xc60d,
88    RIL_CGRL    = 0xc608,
89    RIL_IIHF    = 0xc008,
90    RIL_IILF    = 0xc009,
91    RIL_LARL    = 0xc000,
92    RIL_LGFI    = 0xc001,
93    RIL_LGRL    = 0xc408,
94    RIL_LLIHF   = 0xc00e,
95    RIL_LLILF   = 0xc00f,
96    RIL_LRL     = 0xc40d,
97    RIL_MSFI    = 0xc201,
98    RIL_MSGFI   = 0xc200,
99    RIL_NIHF    = 0xc00a,
100    RIL_NILF    = 0xc00b,
101    RIL_OIHF    = 0xc00c,
102    RIL_OILF    = 0xc00d,
103    RIL_SLFI    = 0xc205,
104    RIL_SLGFI   = 0xc204,
105    RIL_XIHF    = 0xc006,
106    RIL_XILF    = 0xc007,
107
108    RI_AGHI     = 0xa70b,
109    RI_AHI      = 0xa70a,
110    RI_BRC      = 0xa704,
111    RI_CHI      = 0xa70e,
112    RI_CGHI     = 0xa70f,
113    RI_IIHH     = 0xa500,
114    RI_IIHL     = 0xa501,
115    RI_IILH     = 0xa502,
116    RI_IILL     = 0xa503,
117    RI_LGHI     = 0xa709,
118    RI_LLIHH    = 0xa50c,
119    RI_LLIHL    = 0xa50d,
120    RI_LLILH    = 0xa50e,
121    RI_LLILL    = 0xa50f,
122    RI_MGHI     = 0xa70d,
123    RI_MHI      = 0xa70c,
124    RI_NIHH     = 0xa504,
125    RI_NIHL     = 0xa505,
126    RI_NILH     = 0xa506,
127    RI_NILL     = 0xa507,
128    RI_OIHH     = 0xa508,
129    RI_OIHL     = 0xa509,
130    RI_OILH     = 0xa50a,
131    RI_OILL     = 0xa50b,
132    RI_TMLL     = 0xa701,
133
134    RIEb_CGRJ    = 0xec64,
135    RIEb_CLGRJ   = 0xec65,
136    RIEb_CLRJ    = 0xec77,
137    RIEb_CRJ     = 0xec76,
138
139    RIEc_CGIJ    = 0xec7c,
140    RIEc_CIJ     = 0xec7e,
141    RIEc_CLGIJ   = 0xec7d,
142    RIEc_CLIJ    = 0xec7f,
143
144    RIEf_RISBG   = 0xec55,
145
146    RIEg_LOCGHI  = 0xec46,
147
148    RRE_AGR     = 0xb908,
149    RRE_ALGR    = 0xb90a,
150    RRE_ALCR    = 0xb998,
151    RRE_ALCGR   = 0xb988,
152    RRE_CGR     = 0xb920,
153    RRE_CLGR    = 0xb921,
154    RRE_DLGR    = 0xb987,
155    RRE_DLR     = 0xb997,
156    RRE_DSGFR   = 0xb91d,
157    RRE_DSGR    = 0xb90d,
158    RRE_FLOGR   = 0xb983,
159    RRE_LGBR    = 0xb906,
160    RRE_LCGR    = 0xb903,
161    RRE_LGFR    = 0xb914,
162    RRE_LGHR    = 0xb907,
163    RRE_LGR     = 0xb904,
164    RRE_LLGCR   = 0xb984,
165    RRE_LLGFR   = 0xb916,
166    RRE_LLGHR   = 0xb985,
167    RRE_LRVR    = 0xb91f,
168    RRE_LRVGR   = 0xb90f,
169    RRE_LTGR    = 0xb902,
170    RRE_MLGR    = 0xb986,
171    RRE_MSGR    = 0xb90c,
172    RRE_MSR     = 0xb252,
173    RRE_NGR     = 0xb980,
174    RRE_OGR     = 0xb981,
175    RRE_SGR     = 0xb909,
176    RRE_SLGR    = 0xb90b,
177    RRE_SLBR    = 0xb999,
178    RRE_SLBGR   = 0xb989,
179    RRE_XGR     = 0xb982,
180
181    RRFa_MGRK   = 0xb9ec,
182    RRFa_MSRKC  = 0xb9fd,
183    RRFa_MSGRKC = 0xb9ed,
184    RRFa_NCRK   = 0xb9f5,
185    RRFa_NCGRK  = 0xb9e5,
186    RRFa_NNRK   = 0xb974,
187    RRFa_NNGRK  = 0xb964,
188    RRFa_NORK   = 0xb976,
189    RRFa_NOGRK  = 0xb966,
190    RRFa_NRK    = 0xb9f4,
191    RRFa_NGRK   = 0xb9e4,
192    RRFa_NXRK   = 0xb977,
193    RRFa_NXGRK  = 0xb967,
194    RRFa_OCRK   = 0xb975,
195    RRFa_OCGRK  = 0xb965,
196    RRFa_ORK    = 0xb9f6,
197    RRFa_OGRK   = 0xb9e6,
198    RRFa_SRK    = 0xb9f9,
199    RRFa_SGRK   = 0xb9e9,
200    RRFa_SLRK   = 0xb9fb,
201    RRFa_SLGRK  = 0xb9eb,
202    RRFa_XRK    = 0xb9f7,
203    RRFa_XGRK   = 0xb9e7,
204
205    RRFam_SELGR = 0xb9e3,
206
207    RRFc_LOCR   = 0xb9f2,
208    RRFc_LOCGR  = 0xb9e2,
209    RRFc_POPCNT = 0xb9e1,
210
211    RR_AR       = 0x1a,
212    RR_ALR      = 0x1e,
213    RR_BASR     = 0x0d,
214    RR_BCR      = 0x07,
215    RR_CLR      = 0x15,
216    RR_CR       = 0x19,
217    RR_DR       = 0x1d,
218    RR_LCR      = 0x13,
219    RR_LR       = 0x18,
220    RR_LTR      = 0x12,
221    RR_NR       = 0x14,
222    RR_OR       = 0x16,
223    RR_SR       = 0x1b,
224    RR_SLR      = 0x1f,
225    RR_XR       = 0x17,
226
227    RSY_RLL     = 0xeb1d,
228    RSY_RLLG    = 0xeb1c,
229    RSY_SLLG    = 0xeb0d,
230    RSY_SLLK    = 0xebdf,
231    RSY_SRAG    = 0xeb0a,
232    RSY_SRAK    = 0xebdc,
233    RSY_SRLG    = 0xeb0c,
234    RSY_SRLK    = 0xebde,
235
236    RS_SLL      = 0x89,
237    RS_SRA      = 0x8a,
238    RS_SRL      = 0x88,
239
240    RXY_AG      = 0xe308,
241    RXY_AY      = 0xe35a,
242    RXY_CG      = 0xe320,
243    RXY_CLG     = 0xe321,
244    RXY_CLY     = 0xe355,
245    RXY_CY      = 0xe359,
246    RXY_LAY     = 0xe371,
247    RXY_LB      = 0xe376,
248    RXY_LG      = 0xe304,
249    RXY_LGB     = 0xe377,
250    RXY_LGF     = 0xe314,
251    RXY_LGH     = 0xe315,
252    RXY_LHY     = 0xe378,
253    RXY_LLGC    = 0xe390,
254    RXY_LLGF    = 0xe316,
255    RXY_LLGH    = 0xe391,
256    RXY_LMG     = 0xeb04,
257    RXY_LRV     = 0xe31e,
258    RXY_LRVG    = 0xe30f,
259    RXY_LRVH    = 0xe31f,
260    RXY_LY      = 0xe358,
261    RXY_NG      = 0xe380,
262    RXY_OG      = 0xe381,
263    RXY_STCY    = 0xe372,
264    RXY_STG     = 0xe324,
265    RXY_STHY    = 0xe370,
266    RXY_STMG    = 0xeb24,
267    RXY_STRV    = 0xe33e,
268    RXY_STRVG   = 0xe32f,
269    RXY_STRVH   = 0xe33f,
270    RXY_STY     = 0xe350,
271    RXY_XG      = 0xe382,
272
273    RX_A        = 0x5a,
274    RX_C        = 0x59,
275    RX_L        = 0x58,
276    RX_LA       = 0x41,
277    RX_LH       = 0x48,
278    RX_ST       = 0x50,
279    RX_STC      = 0x42,
280    RX_STH      = 0x40,
281
282    VRIa_VGBM   = 0xe744,
283    VRIa_VREPI  = 0xe745,
284    VRIb_VGM    = 0xe746,
285    VRIc_VREP   = 0xe74d,
286
287    VRRa_VLC    = 0xe7de,
288    VRRa_VLP    = 0xe7df,
289    VRRa_VLR    = 0xe756,
290    VRRc_VA     = 0xe7f3,
291    VRRc_VCEQ   = 0xe7f8,   /* we leave the m5 cs field 0 */
292    VRRc_VCH    = 0xe7fb,   /* " */
293    VRRc_VCHL   = 0xe7f9,   /* " */
294    VRRc_VERLLV = 0xe773,
295    VRRc_VESLV  = 0xe770,
296    VRRc_VESRAV = 0xe77a,
297    VRRc_VESRLV = 0xe778,
298    VRRc_VML    = 0xe7a2,
299    VRRc_VMN    = 0xe7fe,
300    VRRc_VMNL   = 0xe7fc,
301    VRRc_VMX    = 0xe7ff,
302    VRRc_VMXL   = 0xe7fd,
303    VRRc_VN     = 0xe768,
304    VRRc_VNC    = 0xe769,
305    VRRc_VNN    = 0xe76e,
306    VRRc_VNO    = 0xe76b,
307    VRRc_VNX    = 0xe76c,
308    VRRc_VO     = 0xe76a,
309    VRRc_VOC    = 0xe76f,
310    VRRc_VPKS   = 0xe797,   /* we leave the m5 cs field 0 */
311    VRRc_VS     = 0xe7f7,
312    VRRa_VUPH   = 0xe7d7,
313    VRRa_VUPL   = 0xe7d6,
314    VRRc_VX     = 0xe76d,
315    VRRe_VSEL   = 0xe78d,
316    VRRf_VLVGP  = 0xe762,
317
318    VRSa_VERLL  = 0xe733,
319    VRSa_VESL   = 0xe730,
320    VRSa_VESRA  = 0xe73a,
321    VRSa_VESRL  = 0xe738,
322    VRSb_VLVG   = 0xe722,
323    VRSc_VLGV   = 0xe721,
324
325    VRX_VL      = 0xe706,
326    VRX_VLLEZ   = 0xe704,
327    VRX_VLREP   = 0xe705,
328    VRX_VST     = 0xe70e,
329    VRX_VSTEF   = 0xe70b,
330    VRX_VSTEG   = 0xe70a,
331
332    NOP         = 0x0707,
333} S390Opcode;
334
335#ifdef CONFIG_DEBUG_TCG
336static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
337    "%r0",  "%r1",  "%r2",  "%r3",  "%r4",  "%r5",  "%r6",  "%r7",
338    "%r8",  "%r9",  "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
339    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
340    "%v0",  "%v1",  "%v2",  "%v3",  "%v4",  "%v5",  "%v6",  "%v7",
341    "%v8",  "%v9",  "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
342    "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
343    "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
344};
345#endif
346
347/* Since R6 is a potential argument register, choose it last of the
348   call-saved registers.  Likewise prefer the call-clobbered registers
349   in reverse order to maximize the chance of avoiding the arguments.  */
350static const int tcg_target_reg_alloc_order[] = {
351    /* Call saved registers.  */
352    TCG_REG_R13,
353    TCG_REG_R12,
354    TCG_REG_R11,
355    TCG_REG_R10,
356    TCG_REG_R9,
357    TCG_REG_R8,
358    TCG_REG_R7,
359    TCG_REG_R6,
360    /* Call clobbered registers.  */
361    TCG_REG_R14,
362    TCG_REG_R0,
363    TCG_REG_R1,
364    /* Argument registers, in reverse order of allocation.  */
365    TCG_REG_R5,
366    TCG_REG_R4,
367    TCG_REG_R3,
368    TCG_REG_R2,
369
370    /* V8-V15 are call saved, and omitted. */
371    TCG_REG_V0,
372    TCG_REG_V1,
373    TCG_REG_V2,
374    TCG_REG_V3,
375    TCG_REG_V4,
376    TCG_REG_V5,
377    TCG_REG_V6,
378    TCG_REG_V7,
379    TCG_REG_V16,
380    TCG_REG_V17,
381    TCG_REG_V18,
382    TCG_REG_V19,
383    TCG_REG_V20,
384    TCG_REG_V21,
385    TCG_REG_V22,
386    TCG_REG_V23,
387    TCG_REG_V24,
388    TCG_REG_V25,
389    TCG_REG_V26,
390    TCG_REG_V27,
391    TCG_REG_V28,
392    TCG_REG_V29,
393    TCG_REG_V30,
394    TCG_REG_V31,
395};
396
397static const int tcg_target_call_iarg_regs[] = {
398    TCG_REG_R2,
399    TCG_REG_R3,
400    TCG_REG_R4,
401    TCG_REG_R5,
402    TCG_REG_R6,
403};
404
405static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
406{
407    tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
408    tcg_debug_assert(slot == 0);
409    return TCG_REG_R2;
410}
411
412#define S390_CC_EQ      8
413#define S390_CC_LT      4
414#define S390_CC_GT      2
415#define S390_CC_OV      1
416#define S390_CC_NE      (S390_CC_LT | S390_CC_GT)
417#define S390_CC_LE      (S390_CC_LT | S390_CC_EQ)
418#define S390_CC_GE      (S390_CC_GT | S390_CC_EQ)
419#define S390_CC_NEVER   0
420#define S390_CC_ALWAYS  15
421
422/* Condition codes that result from a COMPARE and COMPARE LOGICAL.  */
423static const uint8_t tcg_cond_to_s390_cond[] = {
424    [TCG_COND_EQ]  = S390_CC_EQ,
425    [TCG_COND_NE]  = S390_CC_NE,
426    [TCG_COND_LT]  = S390_CC_LT,
427    [TCG_COND_LE]  = S390_CC_LE,
428    [TCG_COND_GT]  = S390_CC_GT,
429    [TCG_COND_GE]  = S390_CC_GE,
430    [TCG_COND_LTU] = S390_CC_LT,
431    [TCG_COND_LEU] = S390_CC_LE,
432    [TCG_COND_GTU] = S390_CC_GT,
433    [TCG_COND_GEU] = S390_CC_GE,
434};
435
436/* Condition codes that result from a LOAD AND TEST.  Here, we have no
437   unsigned instruction variation, however since the test is vs zero we
438   can re-map the outcomes appropriately.  */
439static const uint8_t tcg_cond_to_ltr_cond[] = {
440    [TCG_COND_EQ]  = S390_CC_EQ,
441    [TCG_COND_NE]  = S390_CC_NE,
442    [TCG_COND_LT]  = S390_CC_LT,
443    [TCG_COND_LE]  = S390_CC_LE,
444    [TCG_COND_GT]  = S390_CC_GT,
445    [TCG_COND_GE]  = S390_CC_GE,
446    [TCG_COND_LTU] = S390_CC_NEVER,
447    [TCG_COND_LEU] = S390_CC_EQ,
448    [TCG_COND_GTU] = S390_CC_NE,
449    [TCG_COND_GEU] = S390_CC_ALWAYS,
450};
451
452#ifdef CONFIG_SOFTMMU
453static void * const qemu_ld_helpers[(MO_SSIZE | MO_BSWAP) + 1] = {
454    [MO_UB]   = helper_ret_ldub_mmu,
455    [MO_SB]   = helper_ret_ldsb_mmu,
456    [MO_LEUW] = helper_le_lduw_mmu,
457    [MO_LESW] = helper_le_ldsw_mmu,
458    [MO_LEUL] = helper_le_ldul_mmu,
459    [MO_LESL] = helper_le_ldsl_mmu,
460    [MO_LEUQ] = helper_le_ldq_mmu,
461    [MO_BEUW] = helper_be_lduw_mmu,
462    [MO_BESW] = helper_be_ldsw_mmu,
463    [MO_BEUL] = helper_be_ldul_mmu,
464    [MO_BESL] = helper_be_ldsl_mmu,
465    [MO_BEUQ] = helper_be_ldq_mmu,
466};
467
468static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
469    [MO_UB]   = helper_ret_stb_mmu,
470    [MO_LEUW] = helper_le_stw_mmu,
471    [MO_LEUL] = helper_le_stl_mmu,
472    [MO_LEUQ] = helper_le_stq_mmu,
473    [MO_BEUW] = helper_be_stw_mmu,
474    [MO_BEUL] = helper_be_stl_mmu,
475    [MO_BEUQ] = helper_be_stq_mmu,
476};
477#endif
478
479static const tcg_insn_unit *tb_ret_addr;
480uint64_t s390_facilities[3];
481
482static inline bool is_general_reg(TCGReg r)
483{
484    return r <= TCG_REG_R15;
485}
486
487static inline bool is_vector_reg(TCGReg r)
488{
489    return r >= TCG_REG_V0 && r <= TCG_REG_V31;
490}
491
492static bool patch_reloc(tcg_insn_unit *src_rw, int type,
493                        intptr_t value, intptr_t addend)
494{
495    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
496    intptr_t pcrel2;
497    uint32_t old;
498
499    value += addend;
500    pcrel2 = (tcg_insn_unit *)value - src_rx;
501
502    switch (type) {
503    case R_390_PC16DBL:
504        if (pcrel2 == (int16_t)pcrel2) {
505            tcg_patch16(src_rw, pcrel2);
506            return true;
507        }
508        break;
509    case R_390_PC32DBL:
510        if (pcrel2 == (int32_t)pcrel2) {
511            tcg_patch32(src_rw, pcrel2);
512            return true;
513        }
514        break;
515    case R_390_20:
516        if (value == sextract64(value, 0, 20)) {
517            old = *(uint32_t *)src_rw & 0xf00000ff;
518            old |= ((value & 0xfff) << 16) | ((value & 0xff000) >> 4);
519            tcg_patch32(src_rw, old);
520            return true;
521        }
522        break;
523    default:
524        g_assert_not_reached();
525    }
526    return false;
527}
528
529static int is_const_p16(uint64_t val)
530{
531    for (int i = 0; i < 4; ++i) {
532        uint64_t mask = 0xffffull << (i * 16);
533        if ((val & ~mask) == 0) {
534            return i;
535        }
536    }
537    return -1;
538}
539
540static int is_const_p32(uint64_t val)
541{
542    if ((val & 0xffffffff00000000ull) == 0) {
543        return 0;
544    }
545    if ((val & 0x00000000ffffffffull) == 0) {
546        return 1;
547    }
548    return -1;
549}
550
551/*
552 * Accept bit patterns like these:
553 *  0....01....1
554 *  1....10....0
555 *  1..10..01..1
556 *  0..01..10..0
557 * Copied from gcc sources.
558 */
559static bool risbg_mask(uint64_t c)
560{
561    uint64_t lsb;
562    /* We don't change the number of transitions by inverting,
563       so make sure we start with the LSB zero.  */
564    if (c & 1) {
565        c = ~c;
566    }
567    /* Reject all zeros or all ones.  */
568    if (c == 0) {
569        return false;
570    }
571    /* Find the first transition.  */
572    lsb = c & -c;
573    /* Invert to look for a second transition.  */
574    c = ~c;
575    /* Erase the first transition.  */
576    c &= -lsb;
577    /* Find the second transition, if any.  */
578    lsb = c & -c;
579    /* Match if all the bits are 1's, or if c is zero.  */
580    return c == -lsb;
581}
582
583/* Test if a constant matches the constraint. */
584static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
585{
586    if (ct & TCG_CT_CONST) {
587        return 1;
588    }
589
590    if (type == TCG_TYPE_I32) {
591        val = (int32_t)val;
592    }
593
594    /* The following are mutually exclusive.  */
595    if (ct & TCG_CT_CONST_S16) {
596        return val == (int16_t)val;
597    } else if (ct & TCG_CT_CONST_S32) {
598        return val == (int32_t)val;
599    } else if (ct & TCG_CT_CONST_S33) {
600        return val >= -0xffffffffll && val <= 0xffffffffll;
601    } else if (ct & TCG_CT_CONST_ZERO) {
602        return val == 0;
603    }
604
605    if (ct & TCG_CT_CONST_INV) {
606        val = ~val;
607    }
608    /*
609     * Note that is_const_p16 is a subset of is_const_p32,
610     * so we don't need both constraints.
611     */
612    if ((ct & TCG_CT_CONST_P32) && is_const_p32(val) >= 0) {
613        return true;
614    }
615    if ((ct & TCG_CT_CONST_INVRISBG) && risbg_mask(~val)) {
616        return true;
617    }
618
619    return 0;
620}
621
622/* Emit instructions according to the given instruction format.  */
623
624static void tcg_out_insn_RR(TCGContext *s, S390Opcode op, TCGReg r1, TCGReg r2)
625{
626    tcg_out16(s, (op << 8) | (r1 << 4) | r2);
627}
628
629static void tcg_out_insn_RRE(TCGContext *s, S390Opcode op,
630                             TCGReg r1, TCGReg r2)
631{
632    tcg_out32(s, (op << 16) | (r1 << 4) | r2);
633}
634
635/* RRF-a without the m4 field */
636static void tcg_out_insn_RRFa(TCGContext *s, S390Opcode op,
637                              TCGReg r1, TCGReg r2, TCGReg r3)
638{
639    tcg_out32(s, (op << 16) | (r3 << 12) | (r1 << 4) | r2);
640}
641
642/* RRF-a with the m4 field */
643static void tcg_out_insn_RRFam(TCGContext *s, S390Opcode op,
644                               TCGReg r1, TCGReg r2, TCGReg r3, int m4)
645{
646    tcg_out32(s, (op << 16) | (r3 << 12) | (m4 << 8) | (r1 << 4) | r2);
647}
648
649static void tcg_out_insn_RRFc(TCGContext *s, S390Opcode op,
650                              TCGReg r1, TCGReg r2, int m3)
651{
652    tcg_out32(s, (op << 16) | (m3 << 12) | (r1 << 4) | r2);
653}
654
655static void tcg_out_insn_RI(TCGContext *s, S390Opcode op, TCGReg r1, int i2)
656{
657    tcg_out32(s, (op << 16) | (r1 << 20) | (i2 & 0xffff));
658}
659
660static void tcg_out_insn_RIEg(TCGContext *s, S390Opcode op, TCGReg r1,
661                             int i2, int m3)
662{
663    tcg_out16(s, (op & 0xff00) | (r1 << 4) | m3);
664    tcg_out32(s, (i2 << 16) | (op & 0xff));
665}
666
667static void tcg_out_insn_RIL(TCGContext *s, S390Opcode op, TCGReg r1, int i2)
668{
669    tcg_out16(s, op | (r1 << 4));
670    tcg_out32(s, i2);
671}
672
673static void tcg_out_insn_RS(TCGContext *s, S390Opcode op, TCGReg r1,
674                            TCGReg b2, TCGReg r3, int disp)
675{
676    tcg_out32(s, (op << 24) | (r1 << 20) | (r3 << 16) | (b2 << 12)
677              | (disp & 0xfff));
678}
679
680static void tcg_out_insn_RSY(TCGContext *s, S390Opcode op, TCGReg r1,
681                             TCGReg b2, TCGReg r3, int disp)
682{
683    tcg_out16(s, (op & 0xff00) | (r1 << 4) | r3);
684    tcg_out32(s, (op & 0xff) | (b2 << 28)
685              | ((disp & 0xfff) << 16) | ((disp & 0xff000) >> 4));
686}
687
688#define tcg_out_insn_RX   tcg_out_insn_RS
689#define tcg_out_insn_RXY  tcg_out_insn_RSY
690
691static int RXB(TCGReg v1, TCGReg v2, TCGReg v3, TCGReg v4)
692{
693    /*
694     * Shift bit 4 of each regno to its corresponding bit of RXB.
695     * RXB itself begins at bit 8 of the instruction so 8 - 4 = 4
696     * is the left-shift of the 4th operand.
697     */
698    return ((v1 & 0x10) << (4 + 3))
699         | ((v2 & 0x10) << (4 + 2))
700         | ((v3 & 0x10) << (4 + 1))
701         | ((v4 & 0x10) << (4 + 0));
702}
703
704static void tcg_out_insn_VRIa(TCGContext *s, S390Opcode op,
705                              TCGReg v1, uint16_t i2, int m3)
706{
707    tcg_debug_assert(is_vector_reg(v1));
708    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4));
709    tcg_out16(s, i2);
710    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m3 << 12));
711}
712
713static void tcg_out_insn_VRIb(TCGContext *s, S390Opcode op,
714                              TCGReg v1, uint8_t i2, uint8_t i3, int m4)
715{
716    tcg_debug_assert(is_vector_reg(v1));
717    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4));
718    tcg_out16(s, (i2 << 8) | (i3 & 0xff));
719    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m4 << 12));
720}
721
722static void tcg_out_insn_VRIc(TCGContext *s, S390Opcode op,
723                              TCGReg v1, uint16_t i2, TCGReg v3, int m4)
724{
725    tcg_debug_assert(is_vector_reg(v1));
726    tcg_debug_assert(is_vector_reg(v3));
727    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v3 & 0xf));
728    tcg_out16(s, i2);
729    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, v3, 0) | (m4 << 12));
730}
731
732static void tcg_out_insn_VRRa(TCGContext *s, S390Opcode op,
733                              TCGReg v1, TCGReg v2, int m3)
734{
735    tcg_debug_assert(is_vector_reg(v1));
736    tcg_debug_assert(is_vector_reg(v2));
737    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
738    tcg_out32(s, (op & 0x00ff) | RXB(v1, v2, 0, 0) | (m3 << 12));
739}
740
741static void tcg_out_insn_VRRc(TCGContext *s, S390Opcode op,
742                              TCGReg v1, TCGReg v2, TCGReg v3, int m4)
743{
744    tcg_debug_assert(is_vector_reg(v1));
745    tcg_debug_assert(is_vector_reg(v2));
746    tcg_debug_assert(is_vector_reg(v3));
747    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
748    tcg_out16(s, v3 << 12);
749    tcg_out16(s, (op & 0x00ff) | RXB(v1, v2, v3, 0) | (m4 << 12));
750}
751
752static void tcg_out_insn_VRRe(TCGContext *s, S390Opcode op,
753                              TCGReg v1, TCGReg v2, TCGReg v3, TCGReg v4)
754{
755    tcg_debug_assert(is_vector_reg(v1));
756    tcg_debug_assert(is_vector_reg(v2));
757    tcg_debug_assert(is_vector_reg(v3));
758    tcg_debug_assert(is_vector_reg(v4));
759    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
760    tcg_out16(s, v3 << 12);
761    tcg_out16(s, (op & 0x00ff) | RXB(v1, v2, v3, v4) | (v4 << 12));
762}
763
764static void tcg_out_insn_VRRf(TCGContext *s, S390Opcode op,
765                              TCGReg v1, TCGReg r2, TCGReg r3)
766{
767    tcg_debug_assert(is_vector_reg(v1));
768    tcg_debug_assert(is_general_reg(r2));
769    tcg_debug_assert(is_general_reg(r3));
770    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | r2);
771    tcg_out16(s, r3 << 12);
772    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0));
773}
774
775static void tcg_out_insn_VRSa(TCGContext *s, S390Opcode op, TCGReg v1,
776                              intptr_t d2, TCGReg b2, TCGReg v3, int m4)
777{
778    tcg_debug_assert(is_vector_reg(v1));
779    tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
780    tcg_debug_assert(is_general_reg(b2));
781    tcg_debug_assert(is_vector_reg(v3));
782    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v3 & 0xf));
783    tcg_out16(s, b2 << 12 | d2);
784    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, v3, 0) | (m4 << 12));
785}
786
787static void tcg_out_insn_VRSb(TCGContext *s, S390Opcode op, TCGReg v1,
788                              intptr_t d2, TCGReg b2, TCGReg r3, int m4)
789{
790    tcg_debug_assert(is_vector_reg(v1));
791    tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
792    tcg_debug_assert(is_general_reg(b2));
793    tcg_debug_assert(is_general_reg(r3));
794    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | r3);
795    tcg_out16(s, b2 << 12 | d2);
796    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m4 << 12));
797}
798
799static void tcg_out_insn_VRSc(TCGContext *s, S390Opcode op, TCGReg r1,
800                              intptr_t d2, TCGReg b2, TCGReg v3, int m4)
801{
802    tcg_debug_assert(is_general_reg(r1));
803    tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
804    tcg_debug_assert(is_general_reg(b2));
805    tcg_debug_assert(is_vector_reg(v3));
806    tcg_out16(s, (op & 0xff00) | (r1 << 4) | (v3 & 0xf));
807    tcg_out16(s, b2 << 12 | d2);
808    tcg_out16(s, (op & 0x00ff) | RXB(0, 0, v3, 0) | (m4 << 12));
809}
810
811static void tcg_out_insn_VRX(TCGContext *s, S390Opcode op, TCGReg v1,
812                             TCGReg b2, TCGReg x2, intptr_t d2, int m3)
813{
814    tcg_debug_assert(is_vector_reg(v1));
815    tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
816    tcg_debug_assert(is_general_reg(x2));
817    tcg_debug_assert(is_general_reg(b2));
818    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | x2);
819    tcg_out16(s, (b2 << 12) | d2);
820    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m3 << 12));
821}
822
823/* Emit an opcode with "type-checking" of the format.  */
824#define tcg_out_insn(S, FMT, OP, ...) \
825    glue(tcg_out_insn_,FMT)(S, glue(glue(FMT,_),OP), ## __VA_ARGS__)
826
827
828/* emit 64-bit shifts */
829static void tcg_out_sh64(TCGContext* s, S390Opcode op, TCGReg dest,
830                         TCGReg src, TCGReg sh_reg, int sh_imm)
831{
832    tcg_out_insn_RSY(s, op, dest, sh_reg, src, sh_imm);
833}
834
835/* emit 32-bit shifts */
836static void tcg_out_sh32(TCGContext* s, S390Opcode op, TCGReg dest,
837                         TCGReg sh_reg, int sh_imm)
838{
839    tcg_out_insn_RS(s, op, dest, sh_reg, 0, sh_imm);
840}
841
842static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
843{
844    if (src == dst) {
845        return true;
846    }
847    switch (type) {
848    case TCG_TYPE_I32:
849        if (likely(is_general_reg(dst) && is_general_reg(src))) {
850            tcg_out_insn(s, RR, LR, dst, src);
851            break;
852        }
853        /* fallthru */
854
855    case TCG_TYPE_I64:
856        if (likely(is_general_reg(dst))) {
857            if (likely(is_general_reg(src))) {
858                tcg_out_insn(s, RRE, LGR, dst, src);
859            } else {
860                tcg_out_insn(s, VRSc, VLGV, dst, 0, 0, src, 3);
861            }
862            break;
863        } else if (is_general_reg(src)) {
864            tcg_out_insn(s, VRSb, VLVG, dst, 0, 0, src, 3);
865            break;
866        }
867        /* fallthru */
868
869    case TCG_TYPE_V64:
870    case TCG_TYPE_V128:
871        tcg_out_insn(s, VRRa, VLR, dst, src, 0);
872        break;
873
874    default:
875        g_assert_not_reached();
876    }
877    return true;
878}
879
880static const S390Opcode li_insns[4] = {
881    RI_LLILL, RI_LLILH, RI_LLIHL, RI_LLIHH
882};
883static const S390Opcode oi_insns[4] = {
884    RI_OILL, RI_OILH, RI_OIHL, RI_OIHH
885};
886static const S390Opcode lif_insns[2] = {
887    RIL_LLILF, RIL_LLIHF,
888};
889
890/* load a register with an immediate value */
891static void tcg_out_movi(TCGContext *s, TCGType type,
892                         TCGReg ret, tcg_target_long sval)
893{
894    tcg_target_ulong uval = sval;
895    ptrdiff_t pc_off;
896    int i;
897
898    if (type == TCG_TYPE_I32) {
899        uval = (uint32_t)sval;
900        sval = (int32_t)sval;
901    }
902
903    /* Try all 32-bit insns that can load it in one go.  */
904    if (sval >= -0x8000 && sval < 0x8000) {
905        tcg_out_insn(s, RI, LGHI, ret, sval);
906        return;
907    }
908
909    i = is_const_p16(uval);
910    if (i >= 0) {
911        tcg_out_insn_RI(s, li_insns[i], ret, uval >> (i * 16));
912        return;
913    }
914
915    /* Try all 48-bit insns that can load it in one go.  */
916    if (sval == (int32_t)sval) {
917        tcg_out_insn(s, RIL, LGFI, ret, sval);
918        return;
919    }
920
921    i = is_const_p32(uval);
922    if (i >= 0) {
923        tcg_out_insn_RIL(s, lif_insns[i], ret, uval >> (i * 32));
924        return;
925    }
926
927    /* Try for PC-relative address load.  For odd addresses, add one. */
928    pc_off = tcg_pcrel_diff(s, (void *)sval) >> 1;
929    if (pc_off == (int32_t)pc_off) {
930        tcg_out_insn(s, RIL, LARL, ret, pc_off);
931        if (sval & 1) {
932            tcg_out_insn(s, RI, AGHI, ret, 1);
933        }
934        return;
935    }
936
937    /* Otherwise, load it by parts. */
938    i = is_const_p16((uint32_t)uval);
939    if (i >= 0) {
940        tcg_out_insn_RI(s, li_insns[i], ret, uval >> (i * 16));
941    } else {
942        tcg_out_insn(s, RIL, LLILF, ret, uval);
943    }
944    uval >>= 32;
945    i = is_const_p16(uval);
946    if (i >= 0) {
947        tcg_out_insn_RI(s, oi_insns[i + 2], ret, uval >> (i * 16));
948    } else {
949        tcg_out_insn(s, RIL, OIHF, ret, uval);
950    }
951}
952
953/* Emit a load/store type instruction.  Inputs are:
954   DATA:     The register to be loaded or stored.
955   BASE+OFS: The effective address.
956   OPC_RX:   If the operation has an RX format opcode (e.g. STC), otherwise 0.
957   OPC_RXY:  The RXY format opcode for the operation (e.g. STCY).  */
958
959static void tcg_out_mem(TCGContext *s, S390Opcode opc_rx, S390Opcode opc_rxy,
960                        TCGReg data, TCGReg base, TCGReg index,
961                        tcg_target_long ofs)
962{
963    if (ofs < -0x80000 || ofs >= 0x80000) {
964        /* Combine the low 20 bits of the offset with the actual load insn;
965           the high 44 bits must come from an immediate load.  */
966        tcg_target_long low = ((ofs & 0xfffff) ^ 0x80000) - 0x80000;
967        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs - low);
968        ofs = low;
969
970        /* If we were already given an index register, add it in.  */
971        if (index != TCG_REG_NONE) {
972            tcg_out_insn(s, RRE, AGR, TCG_TMP0, index);
973        }
974        index = TCG_TMP0;
975    }
976
977    if (opc_rx && ofs >= 0 && ofs < 0x1000) {
978        tcg_out_insn_RX(s, opc_rx, data, base, index, ofs);
979    } else {
980        tcg_out_insn_RXY(s, opc_rxy, data, base, index, ofs);
981    }
982}
983
984static void tcg_out_vrx_mem(TCGContext *s, S390Opcode opc_vrx,
985                            TCGReg data, TCGReg base, TCGReg index,
986                            tcg_target_long ofs, int m3)
987{
988    if (ofs < 0 || ofs >= 0x1000) {
989        if (ofs >= -0x80000 && ofs < 0x80000) {
990            tcg_out_insn(s, RXY, LAY, TCG_TMP0, base, index, ofs);
991            base = TCG_TMP0;
992            index = TCG_REG_NONE;
993            ofs = 0;
994        } else {
995            tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs);
996            if (index != TCG_REG_NONE) {
997                tcg_out_insn(s, RRE, AGR, TCG_TMP0, index);
998            }
999            index = TCG_TMP0;
1000            ofs = 0;
1001        }
1002    }
1003    tcg_out_insn_VRX(s, opc_vrx, data, base, index, ofs, m3);
1004}
1005
1006/* load data without address translation or endianness conversion */
1007static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg data,
1008                       TCGReg base, intptr_t ofs)
1009{
1010    switch (type) {
1011    case TCG_TYPE_I32:
1012        if (likely(is_general_reg(data))) {
1013            tcg_out_mem(s, RX_L, RXY_LY, data, base, TCG_REG_NONE, ofs);
1014            break;
1015        }
1016        tcg_out_vrx_mem(s, VRX_VLLEZ, data, base, TCG_REG_NONE, ofs, MO_32);
1017        break;
1018
1019    case TCG_TYPE_I64:
1020        if (likely(is_general_reg(data))) {
1021            tcg_out_mem(s, 0, RXY_LG, data, base, TCG_REG_NONE, ofs);
1022            break;
1023        }
1024        /* fallthru */
1025
1026    case TCG_TYPE_V64:
1027        tcg_out_vrx_mem(s, VRX_VLLEZ, data, base, TCG_REG_NONE, ofs, MO_64);
1028        break;
1029
1030    case TCG_TYPE_V128:
1031        /* Hint quadword aligned.  */
1032        tcg_out_vrx_mem(s, VRX_VL, data, base, TCG_REG_NONE, ofs, 4);
1033        break;
1034
1035    default:
1036        g_assert_not_reached();
1037    }
1038}
1039
1040static void tcg_out_st(TCGContext *s, TCGType type, TCGReg data,
1041                       TCGReg base, intptr_t ofs)
1042{
1043    switch (type) {
1044    case TCG_TYPE_I32:
1045        if (likely(is_general_reg(data))) {
1046            tcg_out_mem(s, RX_ST, RXY_STY, data, base, TCG_REG_NONE, ofs);
1047        } else {
1048            tcg_out_vrx_mem(s, VRX_VSTEF, data, base, TCG_REG_NONE, ofs, 1);
1049        }
1050        break;
1051
1052    case TCG_TYPE_I64:
1053        if (likely(is_general_reg(data))) {
1054            tcg_out_mem(s, 0, RXY_STG, data, base, TCG_REG_NONE, ofs);
1055            break;
1056        }
1057        /* fallthru */
1058
1059    case TCG_TYPE_V64:
1060        tcg_out_vrx_mem(s, VRX_VSTEG, data, base, TCG_REG_NONE, ofs, 0);
1061        break;
1062
1063    case TCG_TYPE_V128:
1064        /* Hint quadword aligned.  */
1065        tcg_out_vrx_mem(s, VRX_VST, data, base, TCG_REG_NONE, ofs, 4);
1066        break;
1067
1068    default:
1069        g_assert_not_reached();
1070    }
1071}
1072
1073static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1074                               TCGReg base, intptr_t ofs)
1075{
1076    return false;
1077}
1078
1079static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
1080{
1081    return false;
1082}
1083
1084static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
1085                             tcg_target_long imm)
1086{
1087    /* This function is only used for passing structs by reference. */
1088    tcg_out_mem(s, RX_LA, RXY_LAY, rd, rs, TCG_REG_NONE, imm);
1089}
1090
1091static inline void tcg_out_risbg(TCGContext *s, TCGReg dest, TCGReg src,
1092                                 int msb, int lsb, int ofs, int z)
1093{
1094    /* Format RIE-f */
1095    tcg_out16(s, (RIEf_RISBG & 0xff00) | (dest << 4) | src);
1096    tcg_out16(s, (msb << 8) | (z << 7) | lsb);
1097    tcg_out16(s, (ofs << 8) | (RIEf_RISBG & 0xff));
1098}
1099
1100static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1101{
1102    tcg_out_insn(s, RRE, LGBR, dest, src);
1103}
1104
1105static void tcg_out_ext8u(TCGContext *s, TCGReg dest, TCGReg src)
1106{
1107    tcg_out_insn(s, RRE, LLGCR, dest, src);
1108}
1109
1110static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1111{
1112    tcg_out_insn(s, RRE, LGHR, dest, src);
1113}
1114
1115static void tcg_out_ext16u(TCGContext *s, TCGReg dest, TCGReg src)
1116{
1117    tcg_out_insn(s, RRE, LLGHR, dest, src);
1118}
1119
1120static void tcg_out_ext32s(TCGContext *s, TCGReg dest, TCGReg src)
1121{
1122    tcg_out_insn(s, RRE, LGFR, dest, src);
1123}
1124
1125static void tcg_out_ext32u(TCGContext *s, TCGReg dest, TCGReg src)
1126{
1127    tcg_out_insn(s, RRE, LLGFR, dest, src);
1128}
1129
1130static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg dest, TCGReg src)
1131{
1132    tcg_out_ext32s(s, dest, src);
1133}
1134
1135static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg dest, TCGReg src)
1136{
1137    tcg_out_ext32u(s, dest, src);
1138}
1139
1140static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg dest, TCGReg src)
1141{
1142    tcg_out_mov(s, TCG_TYPE_I32, dest, src);
1143}
1144
1145static void tgen_andi_risbg(TCGContext *s, TCGReg out, TCGReg in, uint64_t val)
1146{
1147    int msb, lsb;
1148    if ((val & 0x8000000000000001ull) == 0x8000000000000001ull) {
1149        /* Achieve wraparound by swapping msb and lsb.  */
1150        msb = 64 - ctz64(~val);
1151        lsb = clz64(~val) - 1;
1152    } else {
1153        msb = clz64(val);
1154        lsb = 63 - ctz64(val);
1155    }
1156    tcg_out_risbg(s, out, in, msb, lsb, 0, 1);
1157}
1158
1159static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
1160{
1161    static const S390Opcode ni_insns[4] = {
1162        RI_NILL, RI_NILH, RI_NIHL, RI_NIHH
1163    };
1164    static const S390Opcode nif_insns[2] = {
1165        RIL_NILF, RIL_NIHF
1166    };
1167    uint64_t valid = (type == TCG_TYPE_I32 ? 0xffffffffull : -1ull);
1168    int i;
1169
1170    /* Look for the zero-extensions.  */
1171    if ((val & valid) == 0xffffffff) {
1172        tcg_out_ext32u(s, dest, dest);
1173        return;
1174    }
1175    if ((val & valid) == 0xff) {
1176        tcg_out_ext8u(s, dest, dest);
1177        return;
1178    }
1179    if ((val & valid) == 0xffff) {
1180        tcg_out_ext16u(s, dest, dest);
1181        return;
1182    }
1183
1184    i = is_const_p16(~val & valid);
1185    if (i >= 0) {
1186        tcg_out_insn_RI(s, ni_insns[i], dest, val >> (i * 16));
1187        return;
1188    }
1189
1190    i = is_const_p32(~val & valid);
1191    tcg_debug_assert(i == 0 || type != TCG_TYPE_I32);
1192    if (i >= 0) {
1193        tcg_out_insn_RIL(s, nif_insns[i], dest, val >> (i * 32));
1194        return;
1195    }
1196
1197    if (risbg_mask(val)) {
1198        tgen_andi_risbg(s, dest, dest, val);
1199        return;
1200    }
1201
1202    g_assert_not_reached();
1203}
1204
1205static void tgen_ori(TCGContext *s, TCGReg dest, uint64_t val)
1206{
1207    static const S390Opcode oif_insns[2] = {
1208        RIL_OILF, RIL_OIHF
1209    };
1210
1211    int i;
1212
1213    i = is_const_p16(val);
1214    if (i >= 0) {
1215        tcg_out_insn_RI(s, oi_insns[i], dest, val >> (i * 16));
1216        return;
1217    }
1218
1219    i = is_const_p32(val);
1220    if (i >= 0) {
1221        tcg_out_insn_RIL(s, oif_insns[i], dest, val >> (i * 32));
1222        return;
1223    }
1224
1225    g_assert_not_reached();
1226}
1227
1228static void tgen_xori(TCGContext *s, TCGReg dest, uint64_t val)
1229{
1230    switch (is_const_p32(val)) {
1231    case 0:
1232        tcg_out_insn(s, RIL, XILF, dest, val);
1233        break;
1234    case 1:
1235        tcg_out_insn(s, RIL, XIHF, dest, val >> 32);
1236        break;
1237    default:
1238        g_assert_not_reached();
1239    }
1240}
1241
1242static int tgen_cmp2(TCGContext *s, TCGType type, TCGCond c, TCGReg r1,
1243                     TCGArg c2, bool c2const, bool need_carry, int *inv_cc)
1244{
1245    bool is_unsigned = is_unsigned_cond(c);
1246    TCGCond inv_c = tcg_invert_cond(c);
1247    S390Opcode op;
1248
1249    if (c2const) {
1250        if (c2 == 0) {
1251            if (!(is_unsigned && need_carry)) {
1252                if (type == TCG_TYPE_I32) {
1253                    tcg_out_insn(s, RR, LTR, r1, r1);
1254                } else {
1255                    tcg_out_insn(s, RRE, LTGR, r1, r1);
1256                }
1257                *inv_cc = tcg_cond_to_ltr_cond[inv_c];
1258                return tcg_cond_to_ltr_cond[c];
1259            }
1260        }
1261
1262        if (!is_unsigned && c2 == (int16_t)c2) {
1263            op = (type == TCG_TYPE_I32 ? RI_CHI : RI_CGHI);
1264            tcg_out_insn_RI(s, op, r1, c2);
1265            goto exit;
1266        }
1267
1268        if (type == TCG_TYPE_I32) {
1269            op = (is_unsigned ? RIL_CLFI : RIL_CFI);
1270            tcg_out_insn_RIL(s, op, r1, c2);
1271            goto exit;
1272        }
1273
1274        /*
1275         * Constraints are for a signed 33-bit operand, which is a
1276         * convenient superset of this signed/unsigned test.
1277         */
1278        if (c2 == (is_unsigned ? (TCGArg)(uint32_t)c2 : (TCGArg)(int32_t)c2)) {
1279            op = (is_unsigned ? RIL_CLGFI : RIL_CGFI);
1280            tcg_out_insn_RIL(s, op, r1, c2);
1281            goto exit;
1282        }
1283
1284        /* Load everything else into a register. */
1285        tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, c2);
1286        c2 = TCG_TMP0;
1287    }
1288
1289    if (type == TCG_TYPE_I32) {
1290        op = (is_unsigned ? RR_CLR : RR_CR);
1291        tcg_out_insn_RR(s, op, r1, c2);
1292    } else {
1293        op = (is_unsigned ? RRE_CLGR : RRE_CGR);
1294        tcg_out_insn_RRE(s, op, r1, c2);
1295    }
1296
1297 exit:
1298    *inv_cc = tcg_cond_to_s390_cond[inv_c];
1299    return tcg_cond_to_s390_cond[c];
1300}
1301
1302static int tgen_cmp(TCGContext *s, TCGType type, TCGCond c, TCGReg r1,
1303                    TCGArg c2, bool c2const, bool need_carry)
1304{
1305    int inv_cc;
1306    return tgen_cmp2(s, type, c, r1, c2, c2const, need_carry, &inv_cc);
1307}
1308
1309static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond,
1310                         TCGReg dest, TCGReg c1, TCGArg c2, int c2const)
1311{
1312    int cc;
1313
1314    /* With LOC2, we can always emit the minimum 3 insns.  */
1315    if (HAVE_FACILITY(LOAD_ON_COND2)) {
1316        /* Emit: d = 0, d = (cc ? 1 : d).  */
1317        cc = tgen_cmp(s, type, cond, c1, c2, c2const, false);
1318        tcg_out_movi(s, TCG_TYPE_I64, dest, 0);
1319        tcg_out_insn(s, RIEg, LOCGHI, dest, 1, cc);
1320        return;
1321    }
1322
1323 restart:
1324    switch (cond) {
1325    case TCG_COND_NE:
1326        /* X != 0 is X > 0.  */
1327        if (c2const && c2 == 0) {
1328            cond = TCG_COND_GTU;
1329        } else {
1330            break;
1331        }
1332        /* fallthru */
1333
1334    case TCG_COND_GTU:
1335    case TCG_COND_GT:
1336        /* The result of a compare has CC=2 for GT and CC=3 unused.
1337           ADD LOGICAL WITH CARRY considers (CC & 2) the carry bit.  */
1338        tgen_cmp(s, type, cond, c1, c2, c2const, true);
1339        tcg_out_movi(s, type, dest, 0);
1340        tcg_out_insn(s, RRE, ALCGR, dest, dest);
1341        return;
1342
1343    case TCG_COND_EQ:
1344        /* X == 0 is X <= 0.  */
1345        if (c2const && c2 == 0) {
1346            cond = TCG_COND_LEU;
1347        } else {
1348            break;
1349        }
1350        /* fallthru */
1351
1352    case TCG_COND_LEU:
1353    case TCG_COND_LE:
1354        /* As above, but we're looking for borrow, or !carry.
1355           The second insn computes d - d - borrow, or -1 for true
1356           and 0 for false.  So we must mask to 1 bit afterward.  */
1357        tgen_cmp(s, type, cond, c1, c2, c2const, true);
1358        tcg_out_insn(s, RRE, SLBGR, dest, dest);
1359        tgen_andi(s, type, dest, 1);
1360        return;
1361
1362    case TCG_COND_GEU:
1363    case TCG_COND_LTU:
1364    case TCG_COND_LT:
1365    case TCG_COND_GE:
1366        /* Swap operands so that we can use LEU/GTU/GT/LE.  */
1367        if (!c2const) {
1368            TCGReg t = c1;
1369            c1 = c2;
1370            c2 = t;
1371            cond = tcg_swap_cond(cond);
1372            goto restart;
1373        }
1374        break;
1375
1376    default:
1377        g_assert_not_reached();
1378    }
1379
1380    cc = tgen_cmp(s, type, cond, c1, c2, c2const, false);
1381    /* Emit: d = 0, t = 1, d = (cc ? t : d).  */
1382    tcg_out_movi(s, TCG_TYPE_I64, dest, 0);
1383    tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 1);
1384    tcg_out_insn(s, RRFc, LOCGR, dest, TCG_TMP0, cc);
1385}
1386
1387static void tgen_movcond_int(TCGContext *s, TCGType type, TCGReg dest,
1388                             TCGArg v3, int v3const, TCGReg v4,
1389                             int cc, int inv_cc)
1390{
1391    TCGReg src;
1392
1393    if (v3const) {
1394        if (dest == v4) {
1395            if (HAVE_FACILITY(LOAD_ON_COND2)) {
1396                /* Emit: if (cc) dest = v3. */
1397                tcg_out_insn(s, RIEg, LOCGHI, dest, v3, cc);
1398                return;
1399            }
1400            tcg_out_insn(s, RI, LGHI, TCG_TMP0, v3);
1401            src = TCG_TMP0;
1402        } else {
1403            /* LGR+LOCGHI is larger than LGHI+LOCGR. */
1404            tcg_out_insn(s, RI, LGHI, dest, v3);
1405            cc = inv_cc;
1406            src = v4;
1407        }
1408    } else {
1409        if (HAVE_FACILITY(MISC_INSN_EXT3)) {
1410            /* Emit: dest = cc ? v3 : v4. */
1411            tcg_out_insn(s, RRFam, SELGR, dest, v3, v4, cc);
1412            return;
1413        }
1414        if (dest == v4) {
1415            src = v3;
1416        } else {
1417            tcg_out_mov(s, type, dest, v3);
1418            cc = inv_cc;
1419            src = v4;
1420        }
1421    }
1422
1423    /* Emit: if (cc) dest = src. */
1424    tcg_out_insn(s, RRFc, LOCGR, dest, src, cc);
1425}
1426
1427static void tgen_movcond(TCGContext *s, TCGType type, TCGCond c, TCGReg dest,
1428                         TCGReg c1, TCGArg c2, int c2const,
1429                         TCGArg v3, int v3const, TCGReg v4)
1430{
1431    int cc, inv_cc;
1432
1433    cc = tgen_cmp2(s, type, c, c1, c2, c2const, false, &inv_cc);
1434    tgen_movcond_int(s, type, dest, v3, v3const, v4, cc, inv_cc);
1435}
1436
1437static void tgen_clz(TCGContext *s, TCGReg dest, TCGReg a1,
1438                     TCGArg a2, int a2const)
1439{
1440    /* Since this sets both R and R+1, we have no choice but to store the
1441       result into R0, allowing R1 == TCG_TMP0 to be clobbered as well.  */
1442    QEMU_BUILD_BUG_ON(TCG_TMP0 != TCG_REG_R1);
1443    tcg_out_insn(s, RRE, FLOGR, TCG_REG_R0, a1);
1444
1445    if (a2const && a2 == 64) {
1446        tcg_out_mov(s, TCG_TYPE_I64, dest, TCG_REG_R0);
1447        return;
1448    }
1449
1450    /*
1451     * Conditions from FLOGR are:
1452     *   2 -> one bit found
1453     *   8 -> no one bit found
1454     */
1455    tgen_movcond_int(s, TCG_TYPE_I64, dest, a2, a2const, TCG_REG_R0, 8, 2);
1456}
1457
1458static void tgen_ctpop(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1459{
1460    /* With MIE3, and bit 0 of m4 set, we get the complete result. */
1461    if (HAVE_FACILITY(MISC_INSN_EXT3)) {
1462        if (type == TCG_TYPE_I32) {
1463            tcg_out_ext32u(s, dest, src);
1464            src = dest;
1465        }
1466        tcg_out_insn(s, RRFc, POPCNT, dest, src, 8);
1467        return;
1468    }
1469
1470    /* Without MIE3, each byte gets the count of bits for the byte. */
1471    tcg_out_insn(s, RRFc, POPCNT, dest, src, 0);
1472
1473    /* Multiply to sum each byte at the top of the word. */
1474    if (type == TCG_TYPE_I32) {
1475        tcg_out_insn(s, RIL, MSFI, dest, 0x01010101);
1476        tcg_out_sh32(s, RS_SRL, dest, TCG_REG_NONE, 24);
1477    } else {
1478        tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 0x0101010101010101ull);
1479        tcg_out_insn(s, RRE, MSGR, dest, TCG_TMP0);
1480        tcg_out_sh64(s, RSY_SRLG, dest, dest, TCG_REG_NONE, 56);
1481    }
1482}
1483
1484static void tgen_deposit(TCGContext *s, TCGReg dest, TCGReg src,
1485                         int ofs, int len, int z)
1486{
1487    int lsb = (63 - ofs);
1488    int msb = lsb - (len - 1);
1489    tcg_out_risbg(s, dest, src, msb, lsb, ofs, z);
1490}
1491
1492static void tgen_extract(TCGContext *s, TCGReg dest, TCGReg src,
1493                         int ofs, int len)
1494{
1495    tcg_out_risbg(s, dest, src, 64 - len, 63, 64 - ofs, 1);
1496}
1497
1498static void tgen_gotoi(TCGContext *s, int cc, const tcg_insn_unit *dest)
1499{
1500    ptrdiff_t off = tcg_pcrel_diff(s, dest) >> 1;
1501    if (off == (int16_t)off) {
1502        tcg_out_insn(s, RI, BRC, cc, off);
1503    } else if (off == (int32_t)off) {
1504        tcg_out_insn(s, RIL, BRCL, cc, off);
1505    } else {
1506        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)dest);
1507        tcg_out_insn(s, RR, BCR, cc, TCG_TMP0);
1508    }
1509}
1510
1511static void tgen_branch(TCGContext *s, int cc, TCGLabel *l)
1512{
1513    if (l->has_value) {
1514        tgen_gotoi(s, cc, l->u.value_ptr);
1515    } else {
1516        tcg_out16(s, RI_BRC | (cc << 4));
1517        tcg_out_reloc(s, s->code_ptr, R_390_PC16DBL, l, 2);
1518        s->code_ptr += 1;
1519    }
1520}
1521
1522static void tgen_compare_branch(TCGContext *s, S390Opcode opc, int cc,
1523                                TCGReg r1, TCGReg r2, TCGLabel *l)
1524{
1525    tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2);
1526    /* Format RIE-b */
1527    tcg_out16(s, (opc & 0xff00) | (r1 << 4) | r2);
1528    tcg_out16(s, 0);
1529    tcg_out16(s, cc << 12 | (opc & 0xff));
1530}
1531
1532static void tgen_compare_imm_branch(TCGContext *s, S390Opcode opc, int cc,
1533                                    TCGReg r1, int i2, TCGLabel *l)
1534{
1535    tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2);
1536    /* Format RIE-c */
1537    tcg_out16(s, (opc & 0xff00) | (r1 << 4) | cc);
1538    tcg_out16(s, 0);
1539    tcg_out16(s, (i2 << 8) | (opc & 0xff));
1540}
1541
1542static void tgen_brcond(TCGContext *s, TCGType type, TCGCond c,
1543                        TCGReg r1, TCGArg c2, int c2const, TCGLabel *l)
1544{
1545    int cc;
1546    bool is_unsigned = is_unsigned_cond(c);
1547    bool in_range;
1548    S390Opcode opc;
1549
1550    cc = tcg_cond_to_s390_cond[c];
1551
1552    if (!c2const) {
1553        opc = (type == TCG_TYPE_I32
1554               ? (is_unsigned ? RIEb_CLRJ : RIEb_CRJ)
1555               : (is_unsigned ? RIEb_CLGRJ : RIEb_CGRJ));
1556        tgen_compare_branch(s, opc, cc, r1, c2, l);
1557        return;
1558    }
1559
1560    /*
1561     * COMPARE IMMEDIATE AND BRANCH RELATIVE has an 8-bit immediate field.
1562     * If the immediate we've been given does not fit that range, we'll
1563     * fall back to separate compare and branch instructions using the
1564     * larger comparison range afforded by COMPARE IMMEDIATE.
1565     */
1566    if (type == TCG_TYPE_I32) {
1567        if (is_unsigned) {
1568            opc = RIEc_CLIJ;
1569            in_range = (uint32_t)c2 == (uint8_t)c2;
1570        } else {
1571            opc = RIEc_CIJ;
1572            in_range = (int32_t)c2 == (int8_t)c2;
1573        }
1574    } else {
1575        if (is_unsigned) {
1576            opc = RIEc_CLGIJ;
1577            in_range = (uint64_t)c2 == (uint8_t)c2;
1578        } else {
1579            opc = RIEc_CGIJ;
1580            in_range = (int64_t)c2 == (int8_t)c2;
1581        }
1582    }
1583    if (in_range) {
1584        tgen_compare_imm_branch(s, opc, cc, r1, c2, l);
1585        return;
1586    }
1587
1588    cc = tgen_cmp(s, type, c, r1, c2, c2const, false);
1589    tgen_branch(s, cc, l);
1590}
1591
1592static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *dest)
1593{
1594    ptrdiff_t off = tcg_pcrel_diff(s, dest) >> 1;
1595    if (off == (int32_t)off) {
1596        tcg_out_insn(s, RIL, BRASL, TCG_REG_R14, off);
1597    } else {
1598        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)dest);
1599        tcg_out_insn(s, RR, BASR, TCG_REG_R14, TCG_TMP0);
1600    }
1601}
1602
1603static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest,
1604                         const TCGHelperInfo *info)
1605{
1606    tcg_out_call_int(s, dest);
1607}
1608
1609static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg data,
1610                                   TCGReg base, TCGReg index, int disp)
1611{
1612    switch (opc & (MO_SSIZE | MO_BSWAP)) {
1613    case MO_UB:
1614        tcg_out_insn(s, RXY, LLGC, data, base, index, disp);
1615        break;
1616    case MO_SB:
1617        tcg_out_insn(s, RXY, LGB, data, base, index, disp);
1618        break;
1619
1620    case MO_UW | MO_BSWAP:
1621        /* swapped unsigned halfword load with upper bits zeroed */
1622        tcg_out_insn(s, RXY, LRVH, data, base, index, disp);
1623        tcg_out_ext16u(s, data, data);
1624        break;
1625    case MO_UW:
1626        tcg_out_insn(s, RXY, LLGH, data, base, index, disp);
1627        break;
1628
1629    case MO_SW | MO_BSWAP:
1630        /* swapped sign-extended halfword load */
1631        tcg_out_insn(s, RXY, LRVH, data, base, index, disp);
1632        tcg_out_ext16s(s, TCG_TYPE_REG, data, data);
1633        break;
1634    case MO_SW:
1635        tcg_out_insn(s, RXY, LGH, data, base, index, disp);
1636        break;
1637
1638    case MO_UL | MO_BSWAP:
1639        /* swapped unsigned int load with upper bits zeroed */
1640        tcg_out_insn(s, RXY, LRV, data, base, index, disp);
1641        tcg_out_ext32u(s, data, data);
1642        break;
1643    case MO_UL:
1644        tcg_out_insn(s, RXY, LLGF, data, base, index, disp);
1645        break;
1646
1647    case MO_SL | MO_BSWAP:
1648        /* swapped sign-extended int load */
1649        tcg_out_insn(s, RXY, LRV, data, base, index, disp);
1650        tcg_out_ext32s(s, data, data);
1651        break;
1652    case MO_SL:
1653        tcg_out_insn(s, RXY, LGF, data, base, index, disp);
1654        break;
1655
1656    case MO_UQ | MO_BSWAP:
1657        tcg_out_insn(s, RXY, LRVG, data, base, index, disp);
1658        break;
1659    case MO_UQ:
1660        tcg_out_insn(s, RXY, LG, data, base, index, disp);
1661        break;
1662
1663    default:
1664        g_assert_not_reached();
1665    }
1666}
1667
1668static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg data,
1669                                   TCGReg base, TCGReg index, int disp)
1670{
1671    switch (opc & (MO_SIZE | MO_BSWAP)) {
1672    case MO_UB:
1673        if (disp >= 0 && disp < 0x1000) {
1674            tcg_out_insn(s, RX, STC, data, base, index, disp);
1675        } else {
1676            tcg_out_insn(s, RXY, STCY, data, base, index, disp);
1677        }
1678        break;
1679
1680    case MO_UW | MO_BSWAP:
1681        tcg_out_insn(s, RXY, STRVH, data, base, index, disp);
1682        break;
1683    case MO_UW:
1684        if (disp >= 0 && disp < 0x1000) {
1685            tcg_out_insn(s, RX, STH, data, base, index, disp);
1686        } else {
1687            tcg_out_insn(s, RXY, STHY, data, base, index, disp);
1688        }
1689        break;
1690
1691    case MO_UL | MO_BSWAP:
1692        tcg_out_insn(s, RXY, STRV, data, base, index, disp);
1693        break;
1694    case MO_UL:
1695        if (disp >= 0 && disp < 0x1000) {
1696            tcg_out_insn(s, RX, ST, data, base, index, disp);
1697        } else {
1698            tcg_out_insn(s, RXY, STY, data, base, index, disp);
1699        }
1700        break;
1701
1702    case MO_UQ | MO_BSWAP:
1703        tcg_out_insn(s, RXY, STRVG, data, base, index, disp);
1704        break;
1705    case MO_UQ:
1706        tcg_out_insn(s, RXY, STG, data, base, index, disp);
1707        break;
1708
1709    default:
1710        g_assert_not_reached();
1711    }
1712}
1713
1714#if defined(CONFIG_SOFTMMU)
1715/* We're expecting to use a 20-bit negative offset on the tlb memory ops.  */
1716QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1717QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 19));
1718
1719/* Load and compare a TLB entry, leaving the flags set.  Loads the TLB
1720   addend into R2.  Returns a register with the santitized guest address.  */
1721static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
1722                               int mem_index, bool is_ld)
1723{
1724    unsigned s_bits = opc & MO_SIZE;
1725    unsigned a_bits = get_alignment_bits(opc);
1726    unsigned s_mask = (1 << s_bits) - 1;
1727    unsigned a_mask = (1 << a_bits) - 1;
1728    int fast_off = TLB_MASK_TABLE_OFS(mem_index);
1729    int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
1730    int table_off = fast_off + offsetof(CPUTLBDescFast, table);
1731    int ofs, a_off;
1732    uint64_t tlb_mask;
1733
1734    tcg_out_sh64(s, RSY_SRLG, TCG_REG_R2, addr_reg, TCG_REG_NONE,
1735                 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1736    tcg_out_insn(s, RXY, NG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, mask_off);
1737    tcg_out_insn(s, RXY, AG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, table_off);
1738
1739    /* For aligned accesses, we check the first byte and include the alignment
1740       bits within the address.  For unaligned access, we check that we don't
1741       cross pages using the address of the last byte of the access.  */
1742    a_off = (a_bits >= s_bits ? 0 : s_mask - a_mask);
1743    tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1744    if (a_off == 0) {
1745        tgen_andi_risbg(s, TCG_REG_R3, addr_reg, tlb_mask);
1746    } else {
1747        tcg_out_insn(s, RX, LA, TCG_REG_R3, addr_reg, TCG_REG_NONE, a_off);
1748        tgen_andi(s, TCG_TYPE_TL, TCG_REG_R3, tlb_mask);
1749    }
1750
1751    if (is_ld) {
1752        ofs = offsetof(CPUTLBEntry, addr_read);
1753    } else {
1754        ofs = offsetof(CPUTLBEntry, addr_write);
1755    }
1756    if (TARGET_LONG_BITS == 32) {
1757        tcg_out_insn(s, RX, C, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs);
1758    } else {
1759        tcg_out_insn(s, RXY, CG, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs);
1760    }
1761
1762    tcg_out_insn(s, RXY, LG, TCG_REG_R2, TCG_REG_R2, TCG_REG_NONE,
1763                 offsetof(CPUTLBEntry, addend));
1764
1765    if (TARGET_LONG_BITS == 32) {
1766        tcg_out_ext32u(s, TCG_REG_R3, addr_reg);
1767        return TCG_REG_R3;
1768    }
1769    return addr_reg;
1770}
1771
1772static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
1773                                TCGReg data, TCGReg addr,
1774                                tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1775{
1776    TCGLabelQemuLdst *label = new_ldst_label(s);
1777
1778    label->is_ld = is_ld;
1779    label->oi = oi;
1780    label->datalo_reg = data;
1781    label->addrlo_reg = addr;
1782    label->raddr = tcg_splitwx_to_rx(raddr);
1783    label->label_ptr[0] = label_ptr;
1784}
1785
1786static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1787{
1788    TCGReg addr_reg = lb->addrlo_reg;
1789    TCGReg data_reg = lb->datalo_reg;
1790    MemOpIdx oi = lb->oi;
1791    MemOp opc = get_memop(oi);
1792
1793    if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL,
1794                     (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
1795        return false;
1796    }
1797
1798    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
1799    if (TARGET_LONG_BITS == 64) {
1800        tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg);
1801    }
1802    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, oi);
1803    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R5, (uintptr_t)lb->raddr);
1804    tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]);
1805    tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2);
1806
1807    tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
1808    return true;
1809}
1810
1811static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1812{
1813    TCGReg addr_reg = lb->addrlo_reg;
1814    TCGReg data_reg = lb->datalo_reg;
1815    MemOpIdx oi = lb->oi;
1816    MemOp opc = get_memop(oi);
1817    MemOp size = opc & MO_SIZE;
1818
1819    if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL,
1820                     (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
1821        return false;
1822    }
1823
1824    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
1825    if (TARGET_LONG_BITS == 64) {
1826        tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg);
1827    }
1828    tcg_out_movext(s, size == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32,
1829                   TCG_REG_R4, lb->type, size, data_reg);
1830    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R5, oi);
1831    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R6, (uintptr_t)lb->raddr);
1832    tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1833
1834    tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
1835    return true;
1836}
1837#else
1838static void tcg_out_test_alignment(TCGContext *s, bool is_ld,
1839                                   TCGReg addrlo, unsigned a_bits)
1840{
1841    unsigned a_mask = (1 << a_bits) - 1;
1842    TCGLabelQemuLdst *l = new_ldst_label(s);
1843
1844    l->is_ld = is_ld;
1845    l->addrlo_reg = addrlo;
1846
1847    /* We are expecting a_bits to max out at 7, much lower than TMLL. */
1848    tcg_debug_assert(a_bits < 16);
1849    tcg_out_insn(s, RI, TMLL, addrlo, a_mask);
1850
1851    tcg_out16(s, RI_BRC | (7 << 4)); /* CC in {1,2,3} */
1852    l->label_ptr[0] = s->code_ptr;
1853    s->code_ptr += 1;
1854
1855    l->raddr = tcg_splitwx_to_rx(s->code_ptr);
1856}
1857
1858static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
1859{
1860    if (!patch_reloc(l->label_ptr[0], R_390_PC16DBL,
1861                     (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
1862        return false;
1863    }
1864
1865    tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_R3, l->addrlo_reg);
1866    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
1867
1868    /* "Tail call" to the helper, with the return address back inline. */
1869    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R14, (uintptr_t)l->raddr);
1870    tgen_gotoi(s, S390_CC_ALWAYS, (const void *)(l->is_ld ? helper_unaligned_ld
1871                                                 : helper_unaligned_st));
1872    return true;
1873}
1874
1875static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1876{
1877    return tcg_out_fail_alignment(s, l);
1878}
1879
1880static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1881{
1882    return tcg_out_fail_alignment(s, l);
1883}
1884
1885static void tcg_prepare_user_ldst(TCGContext *s, TCGReg *addr_reg,
1886                                  TCGReg *index_reg, tcg_target_long *disp)
1887{
1888    if (TARGET_LONG_BITS == 32) {
1889        tcg_out_ext32u(s, TCG_TMP0, *addr_reg);
1890        *addr_reg = TCG_TMP0;
1891    }
1892    if (guest_base < 0x80000) {
1893        *index_reg = TCG_REG_NONE;
1894        *disp = guest_base;
1895    } else {
1896        *index_reg = TCG_GUEST_BASE_REG;
1897        *disp = 0;
1898    }
1899}
1900#endif /* CONFIG_SOFTMMU */
1901
1902static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
1903                            MemOpIdx oi)
1904{
1905    MemOp opc = get_memop(oi);
1906#ifdef CONFIG_SOFTMMU
1907    unsigned mem_index = get_mmuidx(oi);
1908    tcg_insn_unit *label_ptr;
1909    TCGReg base_reg;
1910
1911    base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 1);
1912
1913    tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
1914    label_ptr = s->code_ptr;
1915    s->code_ptr += 1;
1916
1917    tcg_out_qemu_ld_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0);
1918
1919    add_qemu_ldst_label(s, 1, oi, data_reg, addr_reg, s->code_ptr, label_ptr);
1920#else
1921    TCGReg index_reg;
1922    tcg_target_long disp;
1923    unsigned a_bits = get_alignment_bits(opc);
1924
1925    if (a_bits) {
1926        tcg_out_test_alignment(s, true, addr_reg, a_bits);
1927    }
1928    tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp);
1929    tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg, index_reg, disp);
1930#endif
1931}
1932
1933static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
1934                            MemOpIdx oi)
1935{
1936    MemOp opc = get_memop(oi);
1937#ifdef CONFIG_SOFTMMU
1938    unsigned mem_index = get_mmuidx(oi);
1939    tcg_insn_unit *label_ptr;
1940    TCGReg base_reg;
1941
1942    base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 0);
1943
1944    tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
1945    label_ptr = s->code_ptr;
1946    s->code_ptr += 1;
1947
1948    tcg_out_qemu_st_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0);
1949
1950    add_qemu_ldst_label(s, 0, oi, data_reg, addr_reg, s->code_ptr, label_ptr);
1951#else
1952    TCGReg index_reg;
1953    tcg_target_long disp;
1954    unsigned a_bits = get_alignment_bits(opc);
1955
1956    if (a_bits) {
1957        tcg_out_test_alignment(s, false, addr_reg, a_bits);
1958    }
1959    tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp);
1960    tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg, index_reg, disp);
1961#endif
1962}
1963
1964static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
1965{
1966    /* Reuse the zeroing that exists for goto_ptr.  */
1967    if (a0 == 0) {
1968        tgen_gotoi(s, S390_CC_ALWAYS, tcg_code_gen_epilogue);
1969    } else {
1970        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, a0);
1971        tgen_gotoi(s, S390_CC_ALWAYS, tb_ret_addr);
1972    }
1973}
1974
1975static void tcg_out_goto_tb(TCGContext *s, int which)
1976{
1977    /*
1978     * Branch displacement must be aligned for atomic patching;
1979     * see if we need to add extra nop before branch
1980     */
1981    if (!QEMU_PTR_IS_ALIGNED(s->code_ptr + 1, 4)) {
1982        tcg_out16(s, NOP);
1983    }
1984    tcg_out16(s, RIL_BRCL | (S390_CC_ALWAYS << 4));
1985    set_jmp_insn_offset(s, which);
1986    s->code_ptr += 2;
1987    set_jmp_reset_offset(s, which);
1988}
1989
1990void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
1991                              uintptr_t jmp_rx, uintptr_t jmp_rw)
1992{
1993    if (!HAVE_FACILITY(GEN_INST_EXT)) {
1994        return;
1995    }
1996    /* patch the branch destination */
1997    uintptr_t addr = tb->jmp_target_addr[n];
1998    intptr_t disp = addr - (jmp_rx - 2);
1999    qatomic_set((int32_t *)jmp_rw, disp / 2);
2000    /* no need to flush icache explicitly */
2001}
2002
2003# define OP_32_64(x) \
2004        case glue(glue(INDEX_op_,x),_i32): \
2005        case glue(glue(INDEX_op_,x),_i64)
2006
2007static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
2008                              const TCGArg args[TCG_MAX_OP_ARGS],
2009                              const int const_args[TCG_MAX_OP_ARGS])
2010{
2011    S390Opcode op, op2;
2012    TCGArg a0, a1, a2;
2013
2014    switch (opc) {
2015    case INDEX_op_goto_ptr:
2016        a0 = args[0];
2017        tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, a0);
2018        break;
2019
2020    OP_32_64(ld8u):
2021        /* ??? LLC (RXY format) is only present with the extended-immediate
2022           facility, whereas LLGC is always present.  */
2023        tcg_out_mem(s, 0, RXY_LLGC, args[0], args[1], TCG_REG_NONE, args[2]);
2024        break;
2025
2026    OP_32_64(ld8s):
2027        /* ??? LB is no smaller than LGB, so no point to using it.  */
2028        tcg_out_mem(s, 0, RXY_LGB, args[0], args[1], TCG_REG_NONE, args[2]);
2029        break;
2030
2031    OP_32_64(ld16u):
2032        /* ??? LLH (RXY format) is only present with the extended-immediate
2033           facility, whereas LLGH is always present.  */
2034        tcg_out_mem(s, 0, RXY_LLGH, args[0], args[1], TCG_REG_NONE, args[2]);
2035        break;
2036
2037    case INDEX_op_ld16s_i32:
2038        tcg_out_mem(s, RX_LH, RXY_LHY, args[0], args[1], TCG_REG_NONE, args[2]);
2039        break;
2040
2041    case INDEX_op_ld_i32:
2042        tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
2043        break;
2044
2045    OP_32_64(st8):
2046        tcg_out_mem(s, RX_STC, RXY_STCY, args[0], args[1],
2047                    TCG_REG_NONE, args[2]);
2048        break;
2049
2050    OP_32_64(st16):
2051        tcg_out_mem(s, RX_STH, RXY_STHY, args[0], args[1],
2052                    TCG_REG_NONE, args[2]);
2053        break;
2054
2055    case INDEX_op_st_i32:
2056        tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
2057        break;
2058
2059    case INDEX_op_add_i32:
2060        a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
2061        if (const_args[2]) {
2062        do_addi_32:
2063            if (a0 == a1) {
2064                if (a2 == (int16_t)a2) {
2065                    tcg_out_insn(s, RI, AHI, a0, a2);
2066                    break;
2067                }
2068                tcg_out_insn(s, RIL, AFI, a0, a2);
2069                break;
2070            }
2071            tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2);
2072        } else if (a0 == a1) {
2073            tcg_out_insn(s, RR, AR, a0, a2);
2074        } else {
2075            tcg_out_insn(s, RX, LA, a0, a1, a2, 0);
2076        }
2077        break;
2078    case INDEX_op_sub_i32:
2079        a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
2080        if (const_args[2]) {
2081            a2 = -a2;
2082            goto do_addi_32;
2083        } else if (a0 == a1) {
2084            tcg_out_insn(s, RR, SR, a0, a2);
2085        } else {
2086            tcg_out_insn(s, RRFa, SRK, a0, a1, a2);
2087        }
2088        break;
2089
2090    case INDEX_op_and_i32:
2091        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2092        if (const_args[2]) {
2093            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2094            tgen_andi(s, TCG_TYPE_I32, a0, a2);
2095        } else if (a0 == a1) {
2096            tcg_out_insn(s, RR, NR, a0, a2);
2097        } else {
2098            tcg_out_insn(s, RRFa, NRK, a0, a1, a2);
2099        }
2100        break;
2101    case INDEX_op_or_i32:
2102        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2103        if (const_args[2]) {
2104            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2105            tgen_ori(s, a0, a2);
2106        } else if (a0 == a1) {
2107            tcg_out_insn(s, RR, OR, a0, a2);
2108        } else {
2109            tcg_out_insn(s, RRFa, ORK, a0, a1, a2);
2110        }
2111        break;
2112    case INDEX_op_xor_i32:
2113        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2114        if (const_args[2]) {
2115            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2116            tcg_out_insn(s, RIL, XILF, a0, a2);
2117        } else if (a0 == a1) {
2118            tcg_out_insn(s, RR, XR, args[0], args[2]);
2119        } else {
2120            tcg_out_insn(s, RRFa, XRK, a0, a1, a2);
2121        }
2122        break;
2123
2124    case INDEX_op_andc_i32:
2125        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2126        if (const_args[2]) {
2127            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2128            tgen_andi(s, TCG_TYPE_I32, a0, (uint32_t)~a2);
2129	} else {
2130            tcg_out_insn(s, RRFa, NCRK, a0, a1, a2);
2131	}
2132        break;
2133    case INDEX_op_orc_i32:
2134        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2135        if (const_args[2]) {
2136            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2137            tgen_ori(s, a0, (uint32_t)~a2);
2138        } else {
2139            tcg_out_insn(s, RRFa, OCRK, a0, a1, a2);
2140        }
2141        break;
2142    case INDEX_op_eqv_i32:
2143        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2144        if (const_args[2]) {
2145            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2146            tcg_out_insn(s, RIL, XILF, a0, ~a2);
2147        } else {
2148            tcg_out_insn(s, RRFa, NXRK, a0, a1, a2);
2149        }
2150        break;
2151    case INDEX_op_nand_i32:
2152        tcg_out_insn(s, RRFa, NNRK, args[0], args[1], args[2]);
2153        break;
2154    case INDEX_op_nor_i32:
2155        tcg_out_insn(s, RRFa, NORK, args[0], args[1], args[2]);
2156        break;
2157
2158    case INDEX_op_neg_i32:
2159        tcg_out_insn(s, RR, LCR, args[0], args[1]);
2160        break;
2161    case INDEX_op_not_i32:
2162        tcg_out_insn(s, RRFa, NORK, args[0], args[1], args[1]);
2163        break;
2164
2165    case INDEX_op_mul_i32:
2166        a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
2167        if (const_args[2]) {
2168            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2169            if (a2 == (int16_t)a2) {
2170                tcg_out_insn(s, RI, MHI, a0, a2);
2171            } else {
2172                tcg_out_insn(s, RIL, MSFI, a0, a2);
2173            }
2174        } else if (a0 == a1) {
2175            tcg_out_insn(s, RRE, MSR, a0, a2);
2176        } else {
2177            tcg_out_insn(s, RRFa, MSRKC, a0, a1, a2);
2178        }
2179        break;
2180
2181    case INDEX_op_div2_i32:
2182        tcg_debug_assert(args[0] == args[2]);
2183        tcg_debug_assert(args[1] == args[3]);
2184        tcg_debug_assert((args[1] & 1) == 0);
2185        tcg_debug_assert(args[0] == args[1] + 1);
2186        tcg_out_insn(s, RR, DR, args[1], args[4]);
2187        break;
2188    case INDEX_op_divu2_i32:
2189        tcg_debug_assert(args[0] == args[2]);
2190        tcg_debug_assert(args[1] == args[3]);
2191        tcg_debug_assert((args[1] & 1) == 0);
2192        tcg_debug_assert(args[0] == args[1] + 1);
2193        tcg_out_insn(s, RRE, DLR, args[1], args[4]);
2194        break;
2195
2196    case INDEX_op_shl_i32:
2197        op = RS_SLL;
2198        op2 = RSY_SLLK;
2199    do_shift32:
2200        a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
2201        if (a0 == a1) {
2202            if (const_args[2]) {
2203                tcg_out_sh32(s, op, a0, TCG_REG_NONE, a2);
2204            } else {
2205                tcg_out_sh32(s, op, a0, a2, 0);
2206            }
2207        } else {
2208            /* Using tcg_out_sh64 here for the format; it is a 32-bit shift.  */
2209            if (const_args[2]) {
2210                tcg_out_sh64(s, op2, a0, a1, TCG_REG_NONE, a2);
2211            } else {
2212                tcg_out_sh64(s, op2, a0, a1, a2, 0);
2213            }
2214        }
2215        break;
2216    case INDEX_op_shr_i32:
2217        op = RS_SRL;
2218        op2 = RSY_SRLK;
2219        goto do_shift32;
2220    case INDEX_op_sar_i32:
2221        op = RS_SRA;
2222        op2 = RSY_SRAK;
2223        goto do_shift32;
2224
2225    case INDEX_op_rotl_i32:
2226        /* ??? Using tcg_out_sh64 here for the format; it is a 32-bit rol.  */
2227        if (const_args[2]) {
2228            tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_REG_NONE, args[2]);
2229        } else {
2230            tcg_out_sh64(s, RSY_RLL, args[0], args[1], args[2], 0);
2231        }
2232        break;
2233    case INDEX_op_rotr_i32:
2234        if (const_args[2]) {
2235            tcg_out_sh64(s, RSY_RLL, args[0], args[1],
2236                         TCG_REG_NONE, (32 - args[2]) & 31);
2237        } else {
2238            tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]);
2239            tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_TMP0, 0);
2240        }
2241        break;
2242
2243    case INDEX_op_bswap16_i32:
2244        a0 = args[0], a1 = args[1], a2 = args[2];
2245        tcg_out_insn(s, RRE, LRVR, a0, a1);
2246        if (a2 & TCG_BSWAP_OS) {
2247            tcg_out_sh32(s, RS_SRA, a0, TCG_REG_NONE, 16);
2248        } else {
2249            tcg_out_sh32(s, RS_SRL, a0, TCG_REG_NONE, 16);
2250        }
2251        break;
2252    case INDEX_op_bswap16_i64:
2253        a0 = args[0], a1 = args[1], a2 = args[2];
2254        tcg_out_insn(s, RRE, LRVGR, a0, a1);
2255        if (a2 & TCG_BSWAP_OS) {
2256            tcg_out_sh64(s, RSY_SRAG, a0, a0, TCG_REG_NONE, 48);
2257        } else {
2258            tcg_out_sh64(s, RSY_SRLG, a0, a0, TCG_REG_NONE, 48);
2259        }
2260        break;
2261
2262    case INDEX_op_bswap32_i32:
2263        tcg_out_insn(s, RRE, LRVR, args[0], args[1]);
2264        break;
2265    case INDEX_op_bswap32_i64:
2266        a0 = args[0], a1 = args[1], a2 = args[2];
2267        tcg_out_insn(s, RRE, LRVR, a0, a1);
2268        if (a2 & TCG_BSWAP_OS) {
2269            tcg_out_ext32s(s, a0, a0);
2270        } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
2271            tcg_out_ext32u(s, a0, a0);
2272        }
2273        break;
2274
2275    case INDEX_op_add2_i32:
2276        if (const_args[4]) {
2277            tcg_out_insn(s, RIL, ALFI, args[0], args[4]);
2278        } else {
2279            tcg_out_insn(s, RR, ALR, args[0], args[4]);
2280        }
2281        tcg_out_insn(s, RRE, ALCR, args[1], args[5]);
2282        break;
2283    case INDEX_op_sub2_i32:
2284        if (const_args[4]) {
2285            tcg_out_insn(s, RIL, SLFI, args[0], args[4]);
2286        } else {
2287            tcg_out_insn(s, RR, SLR, args[0], args[4]);
2288        }
2289        tcg_out_insn(s, RRE, SLBR, args[1], args[5]);
2290        break;
2291
2292    case INDEX_op_br:
2293        tgen_branch(s, S390_CC_ALWAYS, arg_label(args[0]));
2294        break;
2295
2296    case INDEX_op_brcond_i32:
2297        tgen_brcond(s, TCG_TYPE_I32, args[2], args[0],
2298                    args[1], const_args[1], arg_label(args[3]));
2299        break;
2300    case INDEX_op_setcond_i32:
2301        tgen_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1],
2302                     args[2], const_args[2]);
2303        break;
2304    case INDEX_op_movcond_i32:
2305        tgen_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1],
2306                     args[2], const_args[2], args[3], const_args[3], args[4]);
2307        break;
2308
2309    case INDEX_op_qemu_ld_i32:
2310        /* ??? Technically we can use a non-extending instruction.  */
2311    case INDEX_op_qemu_ld_i64:
2312        tcg_out_qemu_ld(s, args[0], args[1], args[2]);
2313        break;
2314    case INDEX_op_qemu_st_i32:
2315    case INDEX_op_qemu_st_i64:
2316        tcg_out_qemu_st(s, args[0], args[1], args[2]);
2317        break;
2318
2319    case INDEX_op_ld16s_i64:
2320        tcg_out_mem(s, 0, RXY_LGH, args[0], args[1], TCG_REG_NONE, args[2]);
2321        break;
2322    case INDEX_op_ld32u_i64:
2323        tcg_out_mem(s, 0, RXY_LLGF, args[0], args[1], TCG_REG_NONE, args[2]);
2324        break;
2325    case INDEX_op_ld32s_i64:
2326        tcg_out_mem(s, 0, RXY_LGF, args[0], args[1], TCG_REG_NONE, args[2]);
2327        break;
2328    case INDEX_op_ld_i64:
2329        tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
2330        break;
2331
2332    case INDEX_op_st32_i64:
2333        tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
2334        break;
2335    case INDEX_op_st_i64:
2336        tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
2337        break;
2338
2339    case INDEX_op_add_i64:
2340        a0 = args[0], a1 = args[1], a2 = args[2];
2341        if (const_args[2]) {
2342        do_addi_64:
2343            if (a0 == a1) {
2344                if (a2 == (int16_t)a2) {
2345                    tcg_out_insn(s, RI, AGHI, a0, a2);
2346                    break;
2347                }
2348                if (a2 == (int32_t)a2) {
2349                    tcg_out_insn(s, RIL, AGFI, a0, a2);
2350                    break;
2351                }
2352                if (a2 == (uint32_t)a2) {
2353                    tcg_out_insn(s, RIL, ALGFI, a0, a2);
2354                    break;
2355                }
2356                if (-a2 == (uint32_t)-a2) {
2357                    tcg_out_insn(s, RIL, SLGFI, a0, -a2);
2358                    break;
2359                }
2360            }
2361            tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2);
2362        } else if (a0 == a1) {
2363            tcg_out_insn(s, RRE, AGR, a0, a2);
2364        } else {
2365            tcg_out_insn(s, RX, LA, a0, a1, a2, 0);
2366        }
2367        break;
2368    case INDEX_op_sub_i64:
2369        a0 = args[0], a1 = args[1], a2 = args[2];
2370        if (const_args[2]) {
2371            a2 = -a2;
2372            goto do_addi_64;
2373        } else {
2374            tcg_out_insn(s, RRFa, SGRK, a0, a1, a2);
2375        }
2376        break;
2377
2378    case INDEX_op_and_i64:
2379        a0 = args[0], a1 = args[1], a2 = args[2];
2380        if (const_args[2]) {
2381            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2382            tgen_andi(s, TCG_TYPE_I64, args[0], args[2]);
2383        } else {
2384            tcg_out_insn(s, RRFa, NGRK, a0, a1, a2);
2385        }
2386        break;
2387    case INDEX_op_or_i64:
2388        a0 = args[0], a1 = args[1], a2 = args[2];
2389        if (const_args[2]) {
2390            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2391            tgen_ori(s, a0, a2);
2392        } else {
2393            tcg_out_insn(s, RRFa, OGRK, a0, a1, a2);
2394        }
2395        break;
2396    case INDEX_op_xor_i64:
2397        a0 = args[0], a1 = args[1], a2 = args[2];
2398        if (const_args[2]) {
2399            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2400            tgen_xori(s, a0, a2);
2401        } else {
2402            tcg_out_insn(s, RRFa, XGRK, a0, a1, a2);
2403        }
2404        break;
2405
2406    case INDEX_op_andc_i64:
2407        a0 = args[0], a1 = args[1], a2 = args[2];
2408        if (const_args[2]) {
2409            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2410            tgen_andi(s, TCG_TYPE_I64, a0, ~a2);
2411        } else {
2412            tcg_out_insn(s, RRFa, NCGRK, a0, a1, a2);
2413        }
2414        break;
2415    case INDEX_op_orc_i64:
2416        a0 = args[0], a1 = args[1], a2 = args[2];
2417        if (const_args[2]) {
2418            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2419            tgen_ori(s, a0, ~a2);
2420        } else {
2421            tcg_out_insn(s, RRFa, OCGRK, a0, a1, a2);
2422        }
2423        break;
2424    case INDEX_op_eqv_i64:
2425        a0 = args[0], a1 = args[1], a2 = args[2];
2426        if (const_args[2]) {
2427            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2428            tgen_xori(s, a0, ~a2);
2429        } else {
2430            tcg_out_insn(s, RRFa, NXGRK, a0, a1, a2);
2431        }
2432        break;
2433    case INDEX_op_nand_i64:
2434        tcg_out_insn(s, RRFa, NNGRK, args[0], args[1], args[2]);
2435        break;
2436    case INDEX_op_nor_i64:
2437        tcg_out_insn(s, RRFa, NOGRK, args[0], args[1], args[2]);
2438        break;
2439
2440    case INDEX_op_neg_i64:
2441        tcg_out_insn(s, RRE, LCGR, args[0], args[1]);
2442        break;
2443    case INDEX_op_not_i64:
2444        tcg_out_insn(s, RRFa, NOGRK, args[0], args[1], args[1]);
2445        break;
2446    case INDEX_op_bswap64_i64:
2447        tcg_out_insn(s, RRE, LRVGR, args[0], args[1]);
2448        break;
2449
2450    case INDEX_op_mul_i64:
2451        a0 = args[0], a1 = args[1], a2 = args[2];
2452        if (const_args[2]) {
2453            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2454            if (a2 == (int16_t)a2) {
2455                tcg_out_insn(s, RI, MGHI, a0, a2);
2456            } else {
2457                tcg_out_insn(s, RIL, MSGFI, a0, a2);
2458            }
2459        } else if (a0 == a1) {
2460            tcg_out_insn(s, RRE, MSGR, a0, a2);
2461        } else {
2462            tcg_out_insn(s, RRFa, MSGRKC, a0, a1, a2);
2463        }
2464        break;
2465
2466    case INDEX_op_div2_i64:
2467        /*
2468         * ??? We get an unnecessary sign-extension of the dividend
2469         * into op0 with this definition, but as we do in fact always
2470         * produce both quotient and remainder using INDEX_op_div_i64
2471         * instead requires jumping through even more hoops.
2472         */
2473        tcg_debug_assert(args[0] == args[2]);
2474        tcg_debug_assert(args[1] == args[3]);
2475        tcg_debug_assert((args[1] & 1) == 0);
2476        tcg_debug_assert(args[0] == args[1] + 1);
2477        tcg_out_insn(s, RRE, DSGR, args[1], args[4]);
2478        break;
2479    case INDEX_op_divu2_i64:
2480        tcg_debug_assert(args[0] == args[2]);
2481        tcg_debug_assert(args[1] == args[3]);
2482        tcg_debug_assert((args[1] & 1) == 0);
2483        tcg_debug_assert(args[0] == args[1] + 1);
2484        tcg_out_insn(s, RRE, DLGR, args[1], args[4]);
2485        break;
2486    case INDEX_op_mulu2_i64:
2487        tcg_debug_assert(args[0] == args[2]);
2488        tcg_debug_assert((args[1] & 1) == 0);
2489        tcg_debug_assert(args[0] == args[1] + 1);
2490        tcg_out_insn(s, RRE, MLGR, args[1], args[3]);
2491        break;
2492    case INDEX_op_muls2_i64:
2493        tcg_debug_assert((args[1] & 1) == 0);
2494        tcg_debug_assert(args[0] == args[1] + 1);
2495        tcg_out_insn(s, RRFa, MGRK, args[1], args[2], args[3]);
2496        break;
2497
2498    case INDEX_op_shl_i64:
2499        op = RSY_SLLG;
2500    do_shift64:
2501        if (const_args[2]) {
2502            tcg_out_sh64(s, op, args[0], args[1], TCG_REG_NONE, args[2]);
2503        } else {
2504            tcg_out_sh64(s, op, args[0], args[1], args[2], 0);
2505        }
2506        break;
2507    case INDEX_op_shr_i64:
2508        op = RSY_SRLG;
2509        goto do_shift64;
2510    case INDEX_op_sar_i64:
2511        op = RSY_SRAG;
2512        goto do_shift64;
2513
2514    case INDEX_op_rotl_i64:
2515        if (const_args[2]) {
2516            tcg_out_sh64(s, RSY_RLLG, args[0], args[1],
2517                         TCG_REG_NONE, args[2]);
2518        } else {
2519            tcg_out_sh64(s, RSY_RLLG, args[0], args[1], args[2], 0);
2520        }
2521        break;
2522    case INDEX_op_rotr_i64:
2523        if (const_args[2]) {
2524            tcg_out_sh64(s, RSY_RLLG, args[0], args[1],
2525                         TCG_REG_NONE, (64 - args[2]) & 63);
2526        } else {
2527            /* We can use the smaller 32-bit negate because only the
2528               low 6 bits are examined for the rotate.  */
2529            tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]);
2530            tcg_out_sh64(s, RSY_RLLG, args[0], args[1], TCG_TMP0, 0);
2531        }
2532        break;
2533
2534    case INDEX_op_add2_i64:
2535        if (const_args[4]) {
2536            if ((int64_t)args[4] >= 0) {
2537                tcg_out_insn(s, RIL, ALGFI, args[0], args[4]);
2538            } else {
2539                tcg_out_insn(s, RIL, SLGFI, args[0], -args[4]);
2540            }
2541        } else {
2542            tcg_out_insn(s, RRE, ALGR, args[0], args[4]);
2543        }
2544        tcg_out_insn(s, RRE, ALCGR, args[1], args[5]);
2545        break;
2546    case INDEX_op_sub2_i64:
2547        if (const_args[4]) {
2548            if ((int64_t)args[4] >= 0) {
2549                tcg_out_insn(s, RIL, SLGFI, args[0], args[4]);
2550            } else {
2551                tcg_out_insn(s, RIL, ALGFI, args[0], -args[4]);
2552            }
2553        } else {
2554            tcg_out_insn(s, RRE, SLGR, args[0], args[4]);
2555        }
2556        tcg_out_insn(s, RRE, SLBGR, args[1], args[5]);
2557        break;
2558
2559    case INDEX_op_brcond_i64:
2560        tgen_brcond(s, TCG_TYPE_I64, args[2], args[0],
2561                    args[1], const_args[1], arg_label(args[3]));
2562        break;
2563    case INDEX_op_setcond_i64:
2564        tgen_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1],
2565                     args[2], const_args[2]);
2566        break;
2567    case INDEX_op_movcond_i64:
2568        tgen_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1],
2569                     args[2], const_args[2], args[3], const_args[3], args[4]);
2570        break;
2571
2572    OP_32_64(deposit):
2573        a0 = args[0], a1 = args[1], a2 = args[2];
2574        if (const_args[1]) {
2575            tgen_deposit(s, a0, a2, args[3], args[4], 1);
2576        } else {
2577            /* Since we can't support "0Z" as a constraint, we allow a1 in
2578               any register.  Fix things up as if a matching constraint.  */
2579            if (a0 != a1) {
2580                TCGType type = (opc == INDEX_op_deposit_i64);
2581                if (a0 == a2) {
2582                    tcg_out_mov(s, type, TCG_TMP0, a2);
2583                    a2 = TCG_TMP0;
2584                }
2585                tcg_out_mov(s, type, a0, a1);
2586            }
2587            tgen_deposit(s, a0, a2, args[3], args[4], 0);
2588        }
2589        break;
2590
2591    OP_32_64(extract):
2592        tgen_extract(s, args[0], args[1], args[2], args[3]);
2593        break;
2594
2595    case INDEX_op_clz_i64:
2596        tgen_clz(s, args[0], args[1], args[2], const_args[2]);
2597        break;
2598
2599    case INDEX_op_ctpop_i32:
2600        tgen_ctpop(s, TCG_TYPE_I32, args[0], args[1]);
2601        break;
2602    case INDEX_op_ctpop_i64:
2603        tgen_ctpop(s, TCG_TYPE_I64, args[0], args[1]);
2604        break;
2605
2606    case INDEX_op_mb:
2607        /* The host memory model is quite strong, we simply need to
2608           serialize the instruction stream.  */
2609        if (args[0] & TCG_MO_ST_LD) {
2610            /* fast-bcr-serialization facility (45) is present */
2611            tcg_out_insn(s, RR, BCR, 14, 0);
2612        }
2613        break;
2614
2615    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2616    case INDEX_op_mov_i64:
2617    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2618    case INDEX_op_exit_tb:  /* Always emitted via tcg_out_exit_tb.  */
2619    case INDEX_op_goto_tb:  /* Always emitted via tcg_out_goto_tb.  */
2620    case INDEX_op_ext8s_i32:  /* Always emitted via tcg_reg_alloc_op.  */
2621    case INDEX_op_ext8s_i64:
2622    case INDEX_op_ext8u_i32:
2623    case INDEX_op_ext8u_i64:
2624    case INDEX_op_ext16s_i32:
2625    case INDEX_op_ext16s_i64:
2626    case INDEX_op_ext16u_i32:
2627    case INDEX_op_ext16u_i64:
2628    case INDEX_op_ext32s_i64:
2629    case INDEX_op_ext32u_i64:
2630    case INDEX_op_ext_i32_i64:
2631    case INDEX_op_extu_i32_i64:
2632    case INDEX_op_extrl_i64_i32:
2633    default:
2634        g_assert_not_reached();
2635    }
2636}
2637
2638static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
2639                            TCGReg dst, TCGReg src)
2640{
2641    if (is_general_reg(src)) {
2642        /* Replicate general register into two MO_64. */
2643        tcg_out_insn(s, VRRf, VLVGP, dst, src, src);
2644        if (vece == MO_64) {
2645            return true;
2646        }
2647        src = dst;
2648    }
2649
2650    /*
2651     * Recall that the "standard" integer, within a vector, is the
2652     * rightmost element of the leftmost doubleword, a-la VLLEZ.
2653     */
2654    tcg_out_insn(s, VRIc, VREP, dst, (8 >> vece) - 1, src, vece);
2655    return true;
2656}
2657
2658static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
2659                             TCGReg dst, TCGReg base, intptr_t offset)
2660{
2661    tcg_out_vrx_mem(s, VRX_VLREP, dst, base, TCG_REG_NONE, offset, vece);
2662    return true;
2663}
2664
2665static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
2666                             TCGReg dst, int64_t val)
2667{
2668    int i, mask, msb, lsb;
2669
2670    /* Look for int16_t elements.  */
2671    if (vece <= MO_16 ||
2672        (vece == MO_32 ? (int32_t)val : val) == (int16_t)val) {
2673        tcg_out_insn(s, VRIa, VREPI, dst, val, vece);
2674        return;
2675    }
2676
2677    /* Look for bit masks.  */
2678    if (vece == MO_32) {
2679        if (risbg_mask((int32_t)val)) {
2680            /* Handle wraparound by swapping msb and lsb.  */
2681            if ((val & 0x80000001u) == 0x80000001u) {
2682                msb = 32 - ctz32(~val);
2683                lsb = clz32(~val) - 1;
2684            } else {
2685                msb = clz32(val);
2686                lsb = 31 - ctz32(val);
2687            }
2688            tcg_out_insn(s, VRIb, VGM, dst, msb, lsb, MO_32);
2689            return;
2690        }
2691    } else {
2692        if (risbg_mask(val)) {
2693            /* Handle wraparound by swapping msb and lsb.  */
2694            if ((val & 0x8000000000000001ull) == 0x8000000000000001ull) {
2695                /* Handle wraparound by swapping msb and lsb.  */
2696                msb = 64 - ctz64(~val);
2697                lsb = clz64(~val) - 1;
2698            } else {
2699                msb = clz64(val);
2700                lsb = 63 - ctz64(val);
2701            }
2702            tcg_out_insn(s, VRIb, VGM, dst, msb, lsb, MO_64);
2703            return;
2704        }
2705    }
2706
2707    /* Look for all bytes 0x00 or 0xff.  */
2708    for (i = mask = 0; i < 8; i++) {
2709        uint8_t byte = val >> (i * 8);
2710        if (byte == 0xff) {
2711            mask |= 1 << i;
2712        } else if (byte != 0) {
2713            break;
2714        }
2715    }
2716    if (i == 8) {
2717        tcg_out_insn(s, VRIa, VGBM, dst, mask * 0x0101, 0);
2718        return;
2719    }
2720
2721    /* Otherwise, stuff it in the constant pool.  */
2722    tcg_out_insn(s, RIL, LARL, TCG_TMP0, 0);
2723    new_pool_label(s, val, R_390_PC32DBL, s->code_ptr - 2, 2);
2724    tcg_out_insn(s, VRX, VLREP, dst, TCG_TMP0, TCG_REG_NONE, 0, MO_64);
2725}
2726
2727static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2728                           unsigned vecl, unsigned vece,
2729                           const TCGArg args[TCG_MAX_OP_ARGS],
2730                           const int const_args[TCG_MAX_OP_ARGS])
2731{
2732    TCGType type = vecl + TCG_TYPE_V64;
2733    TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
2734
2735    switch (opc) {
2736    case INDEX_op_ld_vec:
2737        tcg_out_ld(s, type, a0, a1, a2);
2738        break;
2739    case INDEX_op_st_vec:
2740        tcg_out_st(s, type, a0, a1, a2);
2741        break;
2742    case INDEX_op_dupm_vec:
2743        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2744        break;
2745
2746    case INDEX_op_abs_vec:
2747        tcg_out_insn(s, VRRa, VLP, a0, a1, vece);
2748        break;
2749    case INDEX_op_neg_vec:
2750        tcg_out_insn(s, VRRa, VLC, a0, a1, vece);
2751        break;
2752    case INDEX_op_not_vec:
2753        tcg_out_insn(s, VRRc, VNO, a0, a1, a1, 0);
2754        break;
2755
2756    case INDEX_op_add_vec:
2757        tcg_out_insn(s, VRRc, VA, a0, a1, a2, vece);
2758        break;
2759    case INDEX_op_sub_vec:
2760        tcg_out_insn(s, VRRc, VS, a0, a1, a2, vece);
2761        break;
2762    case INDEX_op_and_vec:
2763        tcg_out_insn(s, VRRc, VN, a0, a1, a2, 0);
2764        break;
2765    case INDEX_op_andc_vec:
2766        tcg_out_insn(s, VRRc, VNC, a0, a1, a2, 0);
2767        break;
2768    case INDEX_op_mul_vec:
2769        tcg_out_insn(s, VRRc, VML, a0, a1, a2, vece);
2770        break;
2771    case INDEX_op_or_vec:
2772        tcg_out_insn(s, VRRc, VO, a0, a1, a2, 0);
2773        break;
2774    case INDEX_op_orc_vec:
2775        tcg_out_insn(s, VRRc, VOC, a0, a1, a2, 0);
2776        break;
2777    case INDEX_op_xor_vec:
2778        tcg_out_insn(s, VRRc, VX, a0, a1, a2, 0);
2779        break;
2780    case INDEX_op_nand_vec:
2781        tcg_out_insn(s, VRRc, VNN, a0, a1, a2, 0);
2782        break;
2783    case INDEX_op_nor_vec:
2784        tcg_out_insn(s, VRRc, VNO, a0, a1, a2, 0);
2785        break;
2786    case INDEX_op_eqv_vec:
2787        tcg_out_insn(s, VRRc, VNX, a0, a1, a2, 0);
2788        break;
2789
2790    case INDEX_op_shli_vec:
2791        tcg_out_insn(s, VRSa, VESL, a0, a2, TCG_REG_NONE, a1, vece);
2792        break;
2793    case INDEX_op_shri_vec:
2794        tcg_out_insn(s, VRSa, VESRL, a0, a2, TCG_REG_NONE, a1, vece);
2795        break;
2796    case INDEX_op_sari_vec:
2797        tcg_out_insn(s, VRSa, VESRA, a0, a2, TCG_REG_NONE, a1, vece);
2798        break;
2799    case INDEX_op_rotli_vec:
2800        tcg_out_insn(s, VRSa, VERLL, a0, a2, TCG_REG_NONE, a1, vece);
2801        break;
2802    case INDEX_op_shls_vec:
2803        tcg_out_insn(s, VRSa, VESL, a0, 0, a2, a1, vece);
2804        break;
2805    case INDEX_op_shrs_vec:
2806        tcg_out_insn(s, VRSa, VESRL, a0, 0, a2, a1, vece);
2807        break;
2808    case INDEX_op_sars_vec:
2809        tcg_out_insn(s, VRSa, VESRA, a0, 0, a2, a1, vece);
2810        break;
2811    case INDEX_op_rotls_vec:
2812        tcg_out_insn(s, VRSa, VERLL, a0, 0, a2, a1, vece);
2813        break;
2814    case INDEX_op_shlv_vec:
2815        tcg_out_insn(s, VRRc, VESLV, a0, a1, a2, vece);
2816        break;
2817    case INDEX_op_shrv_vec:
2818        tcg_out_insn(s, VRRc, VESRLV, a0, a1, a2, vece);
2819        break;
2820    case INDEX_op_sarv_vec:
2821        tcg_out_insn(s, VRRc, VESRAV, a0, a1, a2, vece);
2822        break;
2823    case INDEX_op_rotlv_vec:
2824        tcg_out_insn(s, VRRc, VERLLV, a0, a1, a2, vece);
2825        break;
2826
2827    case INDEX_op_smin_vec:
2828        tcg_out_insn(s, VRRc, VMN, a0, a1, a2, vece);
2829        break;
2830    case INDEX_op_smax_vec:
2831        tcg_out_insn(s, VRRc, VMX, a0, a1, a2, vece);
2832        break;
2833    case INDEX_op_umin_vec:
2834        tcg_out_insn(s, VRRc, VMNL, a0, a1, a2, vece);
2835        break;
2836    case INDEX_op_umax_vec:
2837        tcg_out_insn(s, VRRc, VMXL, a0, a1, a2, vece);
2838        break;
2839
2840    case INDEX_op_bitsel_vec:
2841        tcg_out_insn(s, VRRe, VSEL, a0, a2, args[3], a1);
2842        break;
2843
2844    case INDEX_op_cmp_vec:
2845        switch ((TCGCond)args[3]) {
2846        case TCG_COND_EQ:
2847            tcg_out_insn(s, VRRc, VCEQ, a0, a1, a2, vece);
2848            break;
2849        case TCG_COND_GT:
2850            tcg_out_insn(s, VRRc, VCH, a0, a1, a2, vece);
2851            break;
2852        case TCG_COND_GTU:
2853            tcg_out_insn(s, VRRc, VCHL, a0, a1, a2, vece);
2854            break;
2855        default:
2856            g_assert_not_reached();
2857        }
2858        break;
2859
2860    case INDEX_op_s390_vuph_vec:
2861        tcg_out_insn(s, VRRa, VUPH, a0, a1, vece);
2862        break;
2863    case INDEX_op_s390_vupl_vec:
2864        tcg_out_insn(s, VRRa, VUPL, a0, a1, vece);
2865        break;
2866    case INDEX_op_s390_vpks_vec:
2867        tcg_out_insn(s, VRRc, VPKS, a0, a1, a2, vece);
2868        break;
2869
2870    case INDEX_op_mov_vec:   /* Always emitted via tcg_out_mov.  */
2871    case INDEX_op_dup_vec:   /* Always emitted via tcg_out_dup_vec.  */
2872    default:
2873        g_assert_not_reached();
2874    }
2875}
2876
2877int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2878{
2879    switch (opc) {
2880    case INDEX_op_abs_vec:
2881    case INDEX_op_add_vec:
2882    case INDEX_op_and_vec:
2883    case INDEX_op_andc_vec:
2884    case INDEX_op_bitsel_vec:
2885    case INDEX_op_eqv_vec:
2886    case INDEX_op_nand_vec:
2887    case INDEX_op_neg_vec:
2888    case INDEX_op_nor_vec:
2889    case INDEX_op_not_vec:
2890    case INDEX_op_or_vec:
2891    case INDEX_op_orc_vec:
2892    case INDEX_op_rotli_vec:
2893    case INDEX_op_rotls_vec:
2894    case INDEX_op_rotlv_vec:
2895    case INDEX_op_sari_vec:
2896    case INDEX_op_sars_vec:
2897    case INDEX_op_sarv_vec:
2898    case INDEX_op_shli_vec:
2899    case INDEX_op_shls_vec:
2900    case INDEX_op_shlv_vec:
2901    case INDEX_op_shri_vec:
2902    case INDEX_op_shrs_vec:
2903    case INDEX_op_shrv_vec:
2904    case INDEX_op_smax_vec:
2905    case INDEX_op_smin_vec:
2906    case INDEX_op_sub_vec:
2907    case INDEX_op_umax_vec:
2908    case INDEX_op_umin_vec:
2909    case INDEX_op_xor_vec:
2910        return 1;
2911    case INDEX_op_cmp_vec:
2912    case INDEX_op_cmpsel_vec:
2913    case INDEX_op_rotrv_vec:
2914        return -1;
2915    case INDEX_op_mul_vec:
2916        return vece < MO_64;
2917    case INDEX_op_ssadd_vec:
2918    case INDEX_op_sssub_vec:
2919        return vece < MO_64 ? -1 : 0;
2920    default:
2921        return 0;
2922    }
2923}
2924
2925static bool expand_vec_cmp_noinv(TCGType type, unsigned vece, TCGv_vec v0,
2926                                 TCGv_vec v1, TCGv_vec v2, TCGCond cond)
2927{
2928    bool need_swap = false, need_inv = false;
2929
2930    switch (cond) {
2931    case TCG_COND_EQ:
2932    case TCG_COND_GT:
2933    case TCG_COND_GTU:
2934        break;
2935    case TCG_COND_NE:
2936    case TCG_COND_LE:
2937    case TCG_COND_LEU:
2938        need_inv = true;
2939        break;
2940    case TCG_COND_LT:
2941    case TCG_COND_LTU:
2942        need_swap = true;
2943        break;
2944    case TCG_COND_GE:
2945    case TCG_COND_GEU:
2946        need_swap = need_inv = true;
2947        break;
2948    default:
2949        g_assert_not_reached();
2950    }
2951
2952    if (need_inv) {
2953        cond = tcg_invert_cond(cond);
2954    }
2955    if (need_swap) {
2956        TCGv_vec t1;
2957        t1 = v1, v1 = v2, v2 = t1;
2958        cond = tcg_swap_cond(cond);
2959    }
2960
2961    vec_gen_4(INDEX_op_cmp_vec, type, vece, tcgv_vec_arg(v0),
2962              tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond);
2963
2964    return need_inv;
2965}
2966
2967static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
2968                           TCGv_vec v1, TCGv_vec v2, TCGCond cond)
2969{
2970    if (expand_vec_cmp_noinv(type, vece, v0, v1, v2, cond)) {
2971        tcg_gen_not_vec(vece, v0, v0);
2972    }
2973}
2974
2975static void expand_vec_cmpsel(TCGType type, unsigned vece, TCGv_vec v0,
2976                              TCGv_vec c1, TCGv_vec c2,
2977                              TCGv_vec v3, TCGv_vec v4, TCGCond cond)
2978{
2979    TCGv_vec t = tcg_temp_new_vec(type);
2980
2981    if (expand_vec_cmp_noinv(type, vece, t, c1, c2, cond)) {
2982        /* Invert the sense of the compare by swapping arguments.  */
2983        tcg_gen_bitsel_vec(vece, v0, t, v4, v3);
2984    } else {
2985        tcg_gen_bitsel_vec(vece, v0, t, v3, v4);
2986    }
2987    tcg_temp_free_vec(t);
2988}
2989
2990static void expand_vec_sat(TCGType type, unsigned vece, TCGv_vec v0,
2991                           TCGv_vec v1, TCGv_vec v2, TCGOpcode add_sub_opc)
2992{
2993    TCGv_vec h1 = tcg_temp_new_vec(type);
2994    TCGv_vec h2 = tcg_temp_new_vec(type);
2995    TCGv_vec l1 = tcg_temp_new_vec(type);
2996    TCGv_vec l2 = tcg_temp_new_vec(type);
2997
2998    tcg_debug_assert (vece < MO_64);
2999
3000    /* Unpack with sign-extension. */
3001    vec_gen_2(INDEX_op_s390_vuph_vec, type, vece,
3002              tcgv_vec_arg(h1), tcgv_vec_arg(v1));
3003    vec_gen_2(INDEX_op_s390_vuph_vec, type, vece,
3004              tcgv_vec_arg(h2), tcgv_vec_arg(v2));
3005
3006    vec_gen_2(INDEX_op_s390_vupl_vec, type, vece,
3007              tcgv_vec_arg(l1), tcgv_vec_arg(v1));
3008    vec_gen_2(INDEX_op_s390_vupl_vec, type, vece,
3009              tcgv_vec_arg(l2), tcgv_vec_arg(v2));
3010
3011    /* Arithmetic on a wider element size. */
3012    vec_gen_3(add_sub_opc, type, vece + 1, tcgv_vec_arg(h1),
3013              tcgv_vec_arg(h1), tcgv_vec_arg(h2));
3014    vec_gen_3(add_sub_opc, type, vece + 1, tcgv_vec_arg(l1),
3015              tcgv_vec_arg(l1), tcgv_vec_arg(l2));
3016
3017    /* Pack with saturation. */
3018    vec_gen_3(INDEX_op_s390_vpks_vec, type, vece + 1,
3019              tcgv_vec_arg(v0), tcgv_vec_arg(h1), tcgv_vec_arg(l1));
3020
3021    tcg_temp_free_vec(h1);
3022    tcg_temp_free_vec(h2);
3023    tcg_temp_free_vec(l1);
3024    tcg_temp_free_vec(l2);
3025}
3026
3027void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
3028                       TCGArg a0, ...)
3029{
3030    va_list va;
3031    TCGv_vec v0, v1, v2, v3, v4, t0;
3032
3033    va_start(va, a0);
3034    v0 = temp_tcgv_vec(arg_temp(a0));
3035    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3036    v2 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3037
3038    switch (opc) {
3039    case INDEX_op_cmp_vec:
3040        expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
3041        break;
3042
3043    case INDEX_op_cmpsel_vec:
3044        v3 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3045        v4 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3046        expand_vec_cmpsel(type, vece, v0, v1, v2, v3, v4, va_arg(va, TCGArg));
3047        break;
3048
3049    case INDEX_op_rotrv_vec:
3050        t0 = tcg_temp_new_vec(type);
3051        tcg_gen_neg_vec(vece, t0, v2);
3052        tcg_gen_rotlv_vec(vece, v0, v1, t0);
3053        tcg_temp_free_vec(t0);
3054        break;
3055
3056    case INDEX_op_ssadd_vec:
3057        expand_vec_sat(type, vece, v0, v1, v2, INDEX_op_add_vec);
3058        break;
3059    case INDEX_op_sssub_vec:
3060        expand_vec_sat(type, vece, v0, v1, v2, INDEX_op_sub_vec);
3061        break;
3062
3063    default:
3064        g_assert_not_reached();
3065    }
3066    va_end(va);
3067}
3068
3069static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
3070{
3071    switch (op) {
3072    case INDEX_op_goto_ptr:
3073        return C_O0_I1(r);
3074
3075    case INDEX_op_ld8u_i32:
3076    case INDEX_op_ld8u_i64:
3077    case INDEX_op_ld8s_i32:
3078    case INDEX_op_ld8s_i64:
3079    case INDEX_op_ld16u_i32:
3080    case INDEX_op_ld16u_i64:
3081    case INDEX_op_ld16s_i32:
3082    case INDEX_op_ld16s_i64:
3083    case INDEX_op_ld_i32:
3084    case INDEX_op_ld32u_i64:
3085    case INDEX_op_ld32s_i64:
3086    case INDEX_op_ld_i64:
3087        return C_O1_I1(r, r);
3088
3089    case INDEX_op_st8_i32:
3090    case INDEX_op_st8_i64:
3091    case INDEX_op_st16_i32:
3092    case INDEX_op_st16_i64:
3093    case INDEX_op_st_i32:
3094    case INDEX_op_st32_i64:
3095    case INDEX_op_st_i64:
3096        return C_O0_I2(r, r);
3097
3098    case INDEX_op_add_i32:
3099    case INDEX_op_add_i64:
3100    case INDEX_op_shl_i64:
3101    case INDEX_op_shr_i64:
3102    case INDEX_op_sar_i64:
3103    case INDEX_op_rotl_i32:
3104    case INDEX_op_rotl_i64:
3105    case INDEX_op_rotr_i32:
3106    case INDEX_op_rotr_i64:
3107    case INDEX_op_setcond_i32:
3108        return C_O1_I2(r, r, ri);
3109    case INDEX_op_setcond_i64:
3110        return C_O1_I2(r, r, rA);
3111
3112    case INDEX_op_clz_i64:
3113        return C_O1_I2(r, r, rI);
3114
3115    case INDEX_op_sub_i32:
3116    case INDEX_op_sub_i64:
3117    case INDEX_op_and_i32:
3118    case INDEX_op_or_i32:
3119    case INDEX_op_xor_i32:
3120        return C_O1_I2(r, r, ri);
3121    case INDEX_op_and_i64:
3122        return C_O1_I2(r, r, rNKR);
3123    case INDEX_op_or_i64:
3124    case INDEX_op_xor_i64:
3125        return C_O1_I2(r, r, rK);
3126
3127    case INDEX_op_andc_i32:
3128    case INDEX_op_orc_i32:
3129    case INDEX_op_eqv_i32:
3130        return C_O1_I2(r, r, ri);
3131    case INDEX_op_andc_i64:
3132        return C_O1_I2(r, r, rKR);
3133    case INDEX_op_orc_i64:
3134    case INDEX_op_eqv_i64:
3135        return C_O1_I2(r, r, rNK);
3136
3137    case INDEX_op_nand_i32:
3138    case INDEX_op_nand_i64:
3139    case INDEX_op_nor_i32:
3140    case INDEX_op_nor_i64:
3141        return C_O1_I2(r, r, r);
3142
3143    case INDEX_op_mul_i32:
3144        return (HAVE_FACILITY(MISC_INSN_EXT2)
3145                ? C_O1_I2(r, r, ri)
3146                : C_O1_I2(r, 0, ri));
3147    case INDEX_op_mul_i64:
3148        return (HAVE_FACILITY(MISC_INSN_EXT2)
3149                ? C_O1_I2(r, r, rJ)
3150                : C_O1_I2(r, 0, rJ));
3151
3152    case INDEX_op_shl_i32:
3153    case INDEX_op_shr_i32:
3154    case INDEX_op_sar_i32:
3155        return C_O1_I2(r, r, ri);
3156
3157    case INDEX_op_brcond_i32:
3158        return C_O0_I2(r, ri);
3159    case INDEX_op_brcond_i64:
3160        return C_O0_I2(r, rA);
3161
3162    case INDEX_op_bswap16_i32:
3163    case INDEX_op_bswap16_i64:
3164    case INDEX_op_bswap32_i32:
3165    case INDEX_op_bswap32_i64:
3166    case INDEX_op_bswap64_i64:
3167    case INDEX_op_neg_i32:
3168    case INDEX_op_neg_i64:
3169    case INDEX_op_not_i32:
3170    case INDEX_op_not_i64:
3171    case INDEX_op_ext8s_i32:
3172    case INDEX_op_ext8s_i64:
3173    case INDEX_op_ext8u_i32:
3174    case INDEX_op_ext8u_i64:
3175    case INDEX_op_ext16s_i32:
3176    case INDEX_op_ext16s_i64:
3177    case INDEX_op_ext16u_i32:
3178    case INDEX_op_ext16u_i64:
3179    case INDEX_op_ext32s_i64:
3180    case INDEX_op_ext32u_i64:
3181    case INDEX_op_ext_i32_i64:
3182    case INDEX_op_extu_i32_i64:
3183    case INDEX_op_extract_i32:
3184    case INDEX_op_extract_i64:
3185    case INDEX_op_ctpop_i32:
3186    case INDEX_op_ctpop_i64:
3187        return C_O1_I1(r, r);
3188
3189    case INDEX_op_qemu_ld_i32:
3190    case INDEX_op_qemu_ld_i64:
3191        return C_O1_I1(r, L);
3192    case INDEX_op_qemu_st_i64:
3193    case INDEX_op_qemu_st_i32:
3194        return C_O0_I2(L, L);
3195
3196    case INDEX_op_deposit_i32:
3197    case INDEX_op_deposit_i64:
3198        return C_O1_I2(r, rZ, r);
3199
3200    case INDEX_op_movcond_i32:
3201        return C_O1_I4(r, r, ri, rI, r);
3202    case INDEX_op_movcond_i64:
3203        return C_O1_I4(r, r, rA, rI, r);
3204
3205    case INDEX_op_div2_i32:
3206    case INDEX_op_div2_i64:
3207    case INDEX_op_divu2_i32:
3208    case INDEX_op_divu2_i64:
3209        return C_O2_I3(o, m, 0, 1, r);
3210
3211    case INDEX_op_mulu2_i64:
3212        return C_O2_I2(o, m, 0, r);
3213    case INDEX_op_muls2_i64:
3214        return C_O2_I2(o, m, r, r);
3215
3216    case INDEX_op_add2_i32:
3217    case INDEX_op_sub2_i32:
3218        return C_O2_I4(r, r, 0, 1, ri, r);
3219
3220    case INDEX_op_add2_i64:
3221    case INDEX_op_sub2_i64:
3222        return C_O2_I4(r, r, 0, 1, rA, r);
3223
3224    case INDEX_op_st_vec:
3225        return C_O0_I2(v, r);
3226    case INDEX_op_ld_vec:
3227    case INDEX_op_dupm_vec:
3228        return C_O1_I1(v, r);
3229    case INDEX_op_dup_vec:
3230        return C_O1_I1(v, vr);
3231    case INDEX_op_abs_vec:
3232    case INDEX_op_neg_vec:
3233    case INDEX_op_not_vec:
3234    case INDEX_op_rotli_vec:
3235    case INDEX_op_sari_vec:
3236    case INDEX_op_shli_vec:
3237    case INDEX_op_shri_vec:
3238    case INDEX_op_s390_vuph_vec:
3239    case INDEX_op_s390_vupl_vec:
3240        return C_O1_I1(v, v);
3241    case INDEX_op_add_vec:
3242    case INDEX_op_sub_vec:
3243    case INDEX_op_and_vec:
3244    case INDEX_op_andc_vec:
3245    case INDEX_op_or_vec:
3246    case INDEX_op_orc_vec:
3247    case INDEX_op_xor_vec:
3248    case INDEX_op_nand_vec:
3249    case INDEX_op_nor_vec:
3250    case INDEX_op_eqv_vec:
3251    case INDEX_op_cmp_vec:
3252    case INDEX_op_mul_vec:
3253    case INDEX_op_rotlv_vec:
3254    case INDEX_op_rotrv_vec:
3255    case INDEX_op_shlv_vec:
3256    case INDEX_op_shrv_vec:
3257    case INDEX_op_sarv_vec:
3258    case INDEX_op_smax_vec:
3259    case INDEX_op_smin_vec:
3260    case INDEX_op_umax_vec:
3261    case INDEX_op_umin_vec:
3262    case INDEX_op_s390_vpks_vec:
3263        return C_O1_I2(v, v, v);
3264    case INDEX_op_rotls_vec:
3265    case INDEX_op_shls_vec:
3266    case INDEX_op_shrs_vec:
3267    case INDEX_op_sars_vec:
3268        return C_O1_I2(v, v, r);
3269    case INDEX_op_bitsel_vec:
3270        return C_O1_I3(v, v, v, v);
3271
3272    default:
3273        g_assert_not_reached();
3274    }
3275}
3276
3277/*
3278 * Mainline glibc added HWCAP_S390_VX before it was kernel abi.
3279 * Some distros have fixed this up locally, others have not.
3280 */
3281#ifndef HWCAP_S390_VXRS
3282#define HWCAP_S390_VXRS 2048
3283#endif
3284
3285static void query_s390_facilities(void)
3286{
3287    unsigned long hwcap = qemu_getauxval(AT_HWCAP);
3288    const char *which;
3289
3290    /* Is STORE FACILITY LIST EXTENDED available?  Honestly, I believe this
3291       is present on all 64-bit systems, but let's check for it anyway.  */
3292    if (hwcap & HWCAP_S390_STFLE) {
3293        register int r0 __asm__("0") = ARRAY_SIZE(s390_facilities) - 1;
3294        register void *r1 __asm__("1") = s390_facilities;
3295
3296        /* stfle 0(%r1) */
3297        asm volatile(".word 0xb2b0,0x1000"
3298                     : "=r"(r0) : "r"(r0), "r"(r1) : "memory", "cc");
3299    }
3300
3301    /*
3302     * Use of vector registers requires os support beyond the facility bit.
3303     * If the kernel does not advertise support, disable the facility bits.
3304     * There is nothing else we currently care about in the 3rd word, so
3305     * disable VECTOR with one store.
3306     */
3307    if (!(hwcap & HWCAP_S390_VXRS)) {
3308        s390_facilities[2] = 0;
3309    }
3310
3311    /*
3312     * Minimum supported cpu revision is z196.
3313     * Check for all required facilities.
3314     * ZARCH_ACTIVE is done via preprocessor check for 64-bit.
3315     */
3316    if (!HAVE_FACILITY(LONG_DISP)) {
3317        which = "long-displacement";
3318        goto fail;
3319    }
3320    if (!HAVE_FACILITY(EXT_IMM)) {
3321        which = "extended-immediate";
3322        goto fail;
3323    }
3324    if (!HAVE_FACILITY(GEN_INST_EXT)) {
3325        which = "general-instructions-extension";
3326        goto fail;
3327    }
3328    /*
3329     * Facility 45 is a big bin that contains: distinct-operands,
3330     * fast-BCR-serialization, high-word, population-count,
3331     * interlocked-access-1, and load/store-on-condition-1
3332     */
3333    if (!HAVE_FACILITY(45)) {
3334        which = "45";
3335        goto fail;
3336    }
3337    return;
3338
3339 fail:
3340    error_report("%s: missing required facility %s", __func__, which);
3341    exit(EXIT_FAILURE);
3342}
3343
3344static void tcg_target_init(TCGContext *s)
3345{
3346    query_s390_facilities();
3347
3348    tcg_target_available_regs[TCG_TYPE_I32] = 0xffff;
3349    tcg_target_available_regs[TCG_TYPE_I64] = 0xffff;
3350    if (HAVE_FACILITY(VECTOR)) {
3351        tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
3352        tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
3353    }
3354
3355    tcg_target_call_clobber_regs = 0;
3356    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
3357    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R1);
3358    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2);
3359    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3);
3360    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4);
3361    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5);
3362    /* The r6 register is technically call-saved, but it's also a parameter
3363       register, so it can get killed by setup for the qemu_st helper.  */
3364    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6);
3365    /* The return register can be considered call-clobbered.  */
3366    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R14);
3367
3368    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0);
3369    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1);
3370    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2);
3371    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3);
3372    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4);
3373    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5);
3374    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6);
3375    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7);
3376    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16);
3377    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17);
3378    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18);
3379    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19);
3380    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V20);
3381    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V21);
3382    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V22);
3383    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V23);
3384    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V24);
3385    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V25);
3386    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V26);
3387    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V27);
3388    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V28);
3389    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V29);
3390    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V30);
3391    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V31);
3392
3393    s->reserved_regs = 0;
3394    tcg_regset_set_reg(s->reserved_regs, TCG_TMP0);
3395    /* XXX many insns can't be used with R0, so we better avoid it for now */
3396    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0);
3397    tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
3398}
3399
3400#define FRAME_SIZE  ((int)(TCG_TARGET_CALL_STACK_OFFSET          \
3401                           + TCG_STATIC_CALL_ARGS_SIZE           \
3402                           + CPU_TEMP_BUF_NLONGS * sizeof(long)))
3403
3404static void tcg_target_qemu_prologue(TCGContext *s)
3405{
3406    /* stmg %r6,%r15,48(%r15) (save registers) */
3407    tcg_out_insn(s, RXY, STMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15, 48);
3408
3409    /* aghi %r15,-frame_size */
3410    tcg_out_insn(s, RI, AGHI, TCG_REG_R15, -FRAME_SIZE);
3411
3412    tcg_set_frame(s, TCG_REG_CALL_STACK,
3413                  TCG_STATIC_CALL_ARGS_SIZE + TCG_TARGET_CALL_STACK_OFFSET,
3414                  CPU_TEMP_BUF_NLONGS * sizeof(long));
3415
3416#ifndef CONFIG_SOFTMMU
3417    if (guest_base >= 0x80000) {
3418        tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base);
3419        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
3420    }
3421#endif
3422
3423    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
3424
3425    /* br %r3 (go to TB) */
3426    tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, tcg_target_call_iarg_regs[1]);
3427
3428    /*
3429     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
3430     * and fall through to the rest of the epilogue.
3431     */
3432    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
3433    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, 0);
3434
3435    /* TB epilogue */
3436    tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
3437
3438    /* lmg %r6,%r15,fs+48(%r15) (restore registers) */
3439    tcg_out_insn(s, RXY, LMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15,
3440                 FRAME_SIZE + 48);
3441
3442    /* br %r14 (return) */
3443    tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_R14);
3444}
3445
3446static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
3447{
3448    memset(p, 0x07, count * sizeof(tcg_insn_unit));
3449}
3450
3451typedef struct {
3452    DebugFrameHeader h;
3453    uint8_t fde_def_cfa[4];
3454    uint8_t fde_reg_ofs[18];
3455} DebugFrame;
3456
3457/* We're expecting a 2 byte uleb128 encoded value.  */
3458QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
3459
3460#define ELF_HOST_MACHINE  EM_S390
3461
3462static const DebugFrame debug_frame = {
3463    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3464    .h.cie.id = -1,
3465    .h.cie.version = 1,
3466    .h.cie.code_align = 1,
3467    .h.cie.data_align = 8,                /* sleb128 8 */
3468    .h.cie.return_column = TCG_REG_R14,
3469
3470    /* Total FDE size does not include the "len" member.  */
3471    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3472
3473    .fde_def_cfa = {
3474        12, TCG_REG_CALL_STACK,         /* DW_CFA_def_cfa %r15, ... */
3475        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
3476        (FRAME_SIZE >> 7)
3477    },
3478    .fde_reg_ofs = {
3479        0x86, 6,                        /* DW_CFA_offset, %r6, 48 */
3480        0x87, 7,                        /* DW_CFA_offset, %r7, 56 */
3481        0x88, 8,                        /* DW_CFA_offset, %r8, 64 */
3482        0x89, 9,                        /* DW_CFA_offset, %r92, 72 */
3483        0x8a, 10,                       /* DW_CFA_offset, %r10, 80 */
3484        0x8b, 11,                       /* DW_CFA_offset, %r11, 88 */
3485        0x8c, 12,                       /* DW_CFA_offset, %r12, 96 */
3486        0x8d, 13,                       /* DW_CFA_offset, %r13, 104 */
3487        0x8e, 14,                       /* DW_CFA_offset, %r14, 112 */
3488    }
3489};
3490
3491void tcg_register_jit(const void *buf, size_t buf_size)
3492{
3493    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3494}
3495