xref: /qemu/tcg/s390x/tcg-target.c.inc (revision 7653b1ea)
1/*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2009 Ulrich Hecht <uli@suse.de>
5 * Copyright (c) 2009 Alexander Graf <agraf@suse.de>
6 * Copyright (c) 2010 Richard Henderson <rth@twiddle.net>
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a copy
9 * of this software and associated documentation files (the "Software"), to deal
10 * in the Software without restriction, including without limitation the rights
11 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 * copies of the Software, and to permit persons to whom the Software is
13 * furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 * THE SOFTWARE.
25 */
26
27#include "../tcg-ldst.c.inc"
28#include "../tcg-pool.c.inc"
29#include "elf.h"
30
31#define TCG_CT_CONST_S16        (1 << 8)
32#define TCG_CT_CONST_S32        (1 << 9)
33#define TCG_CT_CONST_U32        (1 << 10)
34#define TCG_CT_CONST_ZERO       (1 << 11)
35#define TCG_CT_CONST_P32        (1 << 12)
36#define TCG_CT_CONST_INV        (1 << 13)
37#define TCG_CT_CONST_INVRISBG   (1 << 14)
38#define TCG_CT_CONST_CMP        (1 << 15)
39
40#define ALL_GENERAL_REGS     MAKE_64BIT_MASK(0, 16)
41#define ALL_VECTOR_REGS      MAKE_64BIT_MASK(32, 32)
42
43/* Several places within the instruction set 0 means "no register"
44   rather than TCG_REG_R0.  */
45#define TCG_REG_NONE    0
46
47/* A scratch register that may be be used throughout the backend.  */
48#define TCG_TMP0        TCG_REG_R1
49
50#define TCG_GUEST_BASE_REG TCG_REG_R13
51
52/* All of the following instructions are prefixed with their instruction
53   format, and are defined as 8- or 16-bit quantities, even when the two
54   halves of the 16-bit quantity may appear 32 bits apart in the insn.
55   This makes it easy to copy the values from the tables in Appendix B.  */
56typedef enum S390Opcode {
57    RIL_AFI     = 0xc209,
58    RIL_AGFI    = 0xc208,
59    RIL_ALFI    = 0xc20b,
60    RIL_ALGFI   = 0xc20a,
61    RIL_BRASL   = 0xc005,
62    RIL_BRCL    = 0xc004,
63    RIL_CFI     = 0xc20d,
64    RIL_CGFI    = 0xc20c,
65    RIL_CLFI    = 0xc20f,
66    RIL_CLGFI   = 0xc20e,
67    RIL_CLRL    = 0xc60f,
68    RIL_CLGRL   = 0xc60a,
69    RIL_CRL     = 0xc60d,
70    RIL_CGRL    = 0xc608,
71    RIL_IIHF    = 0xc008,
72    RIL_IILF    = 0xc009,
73    RIL_LARL    = 0xc000,
74    RIL_LGFI    = 0xc001,
75    RIL_LGRL    = 0xc408,
76    RIL_LLIHF   = 0xc00e,
77    RIL_LLILF   = 0xc00f,
78    RIL_LRL     = 0xc40d,
79    RIL_MSFI    = 0xc201,
80    RIL_MSGFI   = 0xc200,
81    RIL_NIHF    = 0xc00a,
82    RIL_NILF    = 0xc00b,
83    RIL_OIHF    = 0xc00c,
84    RIL_OILF    = 0xc00d,
85    RIL_SLFI    = 0xc205,
86    RIL_SLGFI   = 0xc204,
87    RIL_XIHF    = 0xc006,
88    RIL_XILF    = 0xc007,
89
90    RI_AGHI     = 0xa70b,
91    RI_AHI      = 0xa70a,
92    RI_BRC      = 0xa704,
93    RI_CHI      = 0xa70e,
94    RI_CGHI     = 0xa70f,
95    RI_IIHH     = 0xa500,
96    RI_IIHL     = 0xa501,
97    RI_IILH     = 0xa502,
98    RI_IILL     = 0xa503,
99    RI_LGHI     = 0xa709,
100    RI_LLIHH    = 0xa50c,
101    RI_LLIHL    = 0xa50d,
102    RI_LLILH    = 0xa50e,
103    RI_LLILL    = 0xa50f,
104    RI_MGHI     = 0xa70d,
105    RI_MHI      = 0xa70c,
106    RI_NIHH     = 0xa504,
107    RI_NIHL     = 0xa505,
108    RI_NILH     = 0xa506,
109    RI_NILL     = 0xa507,
110    RI_OIHH     = 0xa508,
111    RI_OIHL     = 0xa509,
112    RI_OILH     = 0xa50a,
113    RI_OILL     = 0xa50b,
114    RI_TMLL     = 0xa701,
115    RI_TMLH     = 0xa700,
116    RI_TMHL     = 0xa703,
117    RI_TMHH     = 0xa702,
118
119    RIEb_CGRJ    = 0xec64,
120    RIEb_CLGRJ   = 0xec65,
121    RIEb_CLRJ    = 0xec77,
122    RIEb_CRJ     = 0xec76,
123
124    RIEc_CGIJ    = 0xec7c,
125    RIEc_CIJ     = 0xec7e,
126    RIEc_CLGIJ   = 0xec7d,
127    RIEc_CLIJ    = 0xec7f,
128
129    RIEf_RISBG   = 0xec55,
130
131    RIEg_LOCGHI  = 0xec46,
132
133    RRE_AGR     = 0xb908,
134    RRE_ALGR    = 0xb90a,
135    RRE_ALCR    = 0xb998,
136    RRE_ALCGR   = 0xb988,
137    RRE_ALGFR   = 0xb91a,
138    RRE_CGR     = 0xb920,
139    RRE_CLGR    = 0xb921,
140    RRE_DLGR    = 0xb987,
141    RRE_DLR     = 0xb997,
142    RRE_DSGFR   = 0xb91d,
143    RRE_DSGR    = 0xb90d,
144    RRE_FLOGR   = 0xb983,
145    RRE_LGBR    = 0xb906,
146    RRE_LCGR    = 0xb903,
147    RRE_LGFR    = 0xb914,
148    RRE_LGHR    = 0xb907,
149    RRE_LGR     = 0xb904,
150    RRE_LLGCR   = 0xb984,
151    RRE_LLGFR   = 0xb916,
152    RRE_LLGHR   = 0xb985,
153    RRE_LRVR    = 0xb91f,
154    RRE_LRVGR   = 0xb90f,
155    RRE_LTGR    = 0xb902,
156    RRE_MLGR    = 0xb986,
157    RRE_MSGR    = 0xb90c,
158    RRE_MSR     = 0xb252,
159    RRE_NGR     = 0xb980,
160    RRE_OGR     = 0xb981,
161    RRE_SGR     = 0xb909,
162    RRE_SLGR    = 0xb90b,
163    RRE_SLBR    = 0xb999,
164    RRE_SLBGR   = 0xb989,
165    RRE_XGR     = 0xb982,
166
167    RRFa_MGRK   = 0xb9ec,
168    RRFa_MSRKC  = 0xb9fd,
169    RRFa_MSGRKC = 0xb9ed,
170    RRFa_NCRK   = 0xb9f5,
171    RRFa_NCGRK  = 0xb9e5,
172    RRFa_NNRK   = 0xb974,
173    RRFa_NNGRK  = 0xb964,
174    RRFa_NORK   = 0xb976,
175    RRFa_NOGRK  = 0xb966,
176    RRFa_NRK    = 0xb9f4,
177    RRFa_NGRK   = 0xb9e4,
178    RRFa_NXRK   = 0xb977,
179    RRFa_NXGRK  = 0xb967,
180    RRFa_OCRK   = 0xb975,
181    RRFa_OCGRK  = 0xb965,
182    RRFa_ORK    = 0xb9f6,
183    RRFa_OGRK   = 0xb9e6,
184    RRFa_SRK    = 0xb9f9,
185    RRFa_SGRK   = 0xb9e9,
186    RRFa_SLRK   = 0xb9fb,
187    RRFa_SLGRK  = 0xb9eb,
188    RRFa_XRK    = 0xb9f7,
189    RRFa_XGRK   = 0xb9e7,
190
191    RRFam_SELGR = 0xb9e3,
192
193    RRFc_LOCR   = 0xb9f2,
194    RRFc_LOCGR  = 0xb9e2,
195    RRFc_POPCNT = 0xb9e1,
196
197    RR_AR       = 0x1a,
198    RR_ALR      = 0x1e,
199    RR_BASR     = 0x0d,
200    RR_BCR      = 0x07,
201    RR_CLR      = 0x15,
202    RR_CR       = 0x19,
203    RR_DR       = 0x1d,
204    RR_LCR      = 0x13,
205    RR_LR       = 0x18,
206    RR_LTR      = 0x12,
207    RR_NR       = 0x14,
208    RR_OR       = 0x16,
209    RR_SR       = 0x1b,
210    RR_SLR      = 0x1f,
211    RR_XR       = 0x17,
212
213    RSY_RLL     = 0xeb1d,
214    RSY_RLLG    = 0xeb1c,
215    RSY_SLLG    = 0xeb0d,
216    RSY_SLLK    = 0xebdf,
217    RSY_SRAG    = 0xeb0a,
218    RSY_SRAK    = 0xebdc,
219    RSY_SRLG    = 0xeb0c,
220    RSY_SRLK    = 0xebde,
221
222    RS_SLL      = 0x89,
223    RS_SRA      = 0x8a,
224    RS_SRL      = 0x88,
225
226    RXY_AG      = 0xe308,
227    RXY_AY      = 0xe35a,
228    RXY_CG      = 0xe320,
229    RXY_CLG     = 0xe321,
230    RXY_CLY     = 0xe355,
231    RXY_CY      = 0xe359,
232    RXY_LAY     = 0xe371,
233    RXY_LB      = 0xe376,
234    RXY_LG      = 0xe304,
235    RXY_LGB     = 0xe377,
236    RXY_LGF     = 0xe314,
237    RXY_LGH     = 0xe315,
238    RXY_LHY     = 0xe378,
239    RXY_LLGC    = 0xe390,
240    RXY_LLGF    = 0xe316,
241    RXY_LLGH    = 0xe391,
242    RXY_LMG     = 0xeb04,
243    RXY_LPQ     = 0xe38f,
244    RXY_LRV     = 0xe31e,
245    RXY_LRVG    = 0xe30f,
246    RXY_LRVH    = 0xe31f,
247    RXY_LY      = 0xe358,
248    RXY_NG      = 0xe380,
249    RXY_OG      = 0xe381,
250    RXY_STCY    = 0xe372,
251    RXY_STG     = 0xe324,
252    RXY_STHY    = 0xe370,
253    RXY_STMG    = 0xeb24,
254    RXY_STPQ    = 0xe38e,
255    RXY_STRV    = 0xe33e,
256    RXY_STRVG   = 0xe32f,
257    RXY_STRVH   = 0xe33f,
258    RXY_STY     = 0xe350,
259    RXY_XG      = 0xe382,
260
261    RX_A        = 0x5a,
262    RX_C        = 0x59,
263    RX_L        = 0x58,
264    RX_LA       = 0x41,
265    RX_LH       = 0x48,
266    RX_ST       = 0x50,
267    RX_STC      = 0x42,
268    RX_STH      = 0x40,
269
270    VRIa_VGBM   = 0xe744,
271    VRIa_VREPI  = 0xe745,
272    VRIb_VGM    = 0xe746,
273    VRIc_VREP   = 0xe74d,
274
275    VRRa_VLC    = 0xe7de,
276    VRRa_VLP    = 0xe7df,
277    VRRa_VLR    = 0xe756,
278    VRRc_VA     = 0xe7f3,
279    VRRc_VCEQ   = 0xe7f8,   /* we leave the m5 cs field 0 */
280    VRRc_VCH    = 0xe7fb,   /* " */
281    VRRc_VCHL   = 0xe7f9,   /* " */
282    VRRc_VERLLV = 0xe773,
283    VRRc_VESLV  = 0xe770,
284    VRRc_VESRAV = 0xe77a,
285    VRRc_VESRLV = 0xe778,
286    VRRc_VML    = 0xe7a2,
287    VRRc_VMN    = 0xe7fe,
288    VRRc_VMNL   = 0xe7fc,
289    VRRc_VMX    = 0xe7ff,
290    VRRc_VMXL   = 0xe7fd,
291    VRRc_VN     = 0xe768,
292    VRRc_VNC    = 0xe769,
293    VRRc_VNN    = 0xe76e,
294    VRRc_VNO    = 0xe76b,
295    VRRc_VNX    = 0xe76c,
296    VRRc_VO     = 0xe76a,
297    VRRc_VOC    = 0xe76f,
298    VRRc_VPKS   = 0xe797,   /* we leave the m5 cs field 0 */
299    VRRc_VS     = 0xe7f7,
300    VRRa_VUPH   = 0xe7d7,
301    VRRa_VUPL   = 0xe7d6,
302    VRRc_VX     = 0xe76d,
303    VRRe_VSEL   = 0xe78d,
304    VRRf_VLVGP  = 0xe762,
305
306    VRSa_VERLL  = 0xe733,
307    VRSa_VESL   = 0xe730,
308    VRSa_VESRA  = 0xe73a,
309    VRSa_VESRL  = 0xe738,
310    VRSb_VLVG   = 0xe722,
311    VRSc_VLGV   = 0xe721,
312
313    VRX_VL      = 0xe706,
314    VRX_VLLEZ   = 0xe704,
315    VRX_VLREP   = 0xe705,
316    VRX_VST     = 0xe70e,
317    VRX_VSTEF   = 0xe70b,
318    VRX_VSTEG   = 0xe70a,
319
320    NOP         = 0x0707,
321} S390Opcode;
322
323#ifdef CONFIG_DEBUG_TCG
324static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
325    "%r0",  "%r1",  "%r2",  "%r3",  "%r4",  "%r5",  "%r6",  "%r7",
326    "%r8",  "%r9",  "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
327    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
328    "%v0",  "%v1",  "%v2",  "%v3",  "%v4",  "%v5",  "%v6",  "%v7",
329    "%v8",  "%v9",  "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
330    "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
331    "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
332};
333#endif
334
335/* Since R6 is a potential argument register, choose it last of the
336   call-saved registers.  Likewise prefer the call-clobbered registers
337   in reverse order to maximize the chance of avoiding the arguments.  */
338static const int tcg_target_reg_alloc_order[] = {
339    /* Call saved registers.  */
340    TCG_REG_R13,
341    TCG_REG_R12,
342    TCG_REG_R11,
343    TCG_REG_R10,
344    TCG_REG_R9,
345    TCG_REG_R8,
346    TCG_REG_R7,
347    TCG_REG_R6,
348    /* Call clobbered registers.  */
349    TCG_REG_R14,
350    TCG_REG_R0,
351    TCG_REG_R1,
352    /* Argument registers, in reverse order of allocation.  */
353    TCG_REG_R5,
354    TCG_REG_R4,
355    TCG_REG_R3,
356    TCG_REG_R2,
357
358    /* V8-V15 are call saved, and omitted. */
359    TCG_REG_V0,
360    TCG_REG_V1,
361    TCG_REG_V2,
362    TCG_REG_V3,
363    TCG_REG_V4,
364    TCG_REG_V5,
365    TCG_REG_V6,
366    TCG_REG_V7,
367    TCG_REG_V16,
368    TCG_REG_V17,
369    TCG_REG_V18,
370    TCG_REG_V19,
371    TCG_REG_V20,
372    TCG_REG_V21,
373    TCG_REG_V22,
374    TCG_REG_V23,
375    TCG_REG_V24,
376    TCG_REG_V25,
377    TCG_REG_V26,
378    TCG_REG_V27,
379    TCG_REG_V28,
380    TCG_REG_V29,
381    TCG_REG_V30,
382    TCG_REG_V31,
383};
384
385static const int tcg_target_call_iarg_regs[] = {
386    TCG_REG_R2,
387    TCG_REG_R3,
388    TCG_REG_R4,
389    TCG_REG_R5,
390    TCG_REG_R6,
391};
392
393static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
394{
395    tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
396    tcg_debug_assert(slot == 0);
397    return TCG_REG_R2;
398}
399
400#define S390_CC_EQ      8
401#define S390_CC_LT      4
402#define S390_CC_GT      2
403#define S390_CC_OV      1
404#define S390_CC_NE      (S390_CC_LT | S390_CC_GT)
405#define S390_CC_LE      (S390_CC_LT | S390_CC_EQ)
406#define S390_CC_GE      (S390_CC_GT | S390_CC_EQ)
407#define S390_CC_NEVER   0
408#define S390_CC_ALWAYS  15
409
410#define S390_TM_EQ      8  /* CC == 0 */
411#define S390_TM_NE      7  /* CC in {1,2,3} */
412
413/* Condition codes that result from a COMPARE and COMPARE LOGICAL.  */
414static const uint8_t tcg_cond_to_s390_cond[16] = {
415    [TCG_COND_EQ]  = S390_CC_EQ,
416    [TCG_COND_NE]  = S390_CC_NE,
417    [TCG_COND_TSTEQ] = S390_CC_EQ,
418    [TCG_COND_TSTNE] = S390_CC_NE,
419    [TCG_COND_LT]  = S390_CC_LT,
420    [TCG_COND_LE]  = S390_CC_LE,
421    [TCG_COND_GT]  = S390_CC_GT,
422    [TCG_COND_GE]  = S390_CC_GE,
423    [TCG_COND_LTU] = S390_CC_LT,
424    [TCG_COND_LEU] = S390_CC_LE,
425    [TCG_COND_GTU] = S390_CC_GT,
426    [TCG_COND_GEU] = S390_CC_GE,
427};
428
429/* Condition codes that result from a LOAD AND TEST.  Here, we have no
430   unsigned instruction variation, however since the test is vs zero we
431   can re-map the outcomes appropriately.  */
432static const uint8_t tcg_cond_to_ltr_cond[16] = {
433    [TCG_COND_EQ]  = S390_CC_EQ,
434    [TCG_COND_NE]  = S390_CC_NE,
435    [TCG_COND_TSTEQ] = S390_CC_ALWAYS,
436    [TCG_COND_TSTNE] = S390_CC_NEVER,
437    [TCG_COND_LT]  = S390_CC_LT,
438    [TCG_COND_LE]  = S390_CC_LE,
439    [TCG_COND_GT]  = S390_CC_GT,
440    [TCG_COND_GE]  = S390_CC_GE,
441    [TCG_COND_LTU] = S390_CC_NEVER,
442    [TCG_COND_LEU] = S390_CC_EQ,
443    [TCG_COND_GTU] = S390_CC_NE,
444    [TCG_COND_GEU] = S390_CC_ALWAYS,
445};
446
447static const tcg_insn_unit *tb_ret_addr;
448uint64_t s390_facilities[3];
449
450static inline bool is_general_reg(TCGReg r)
451{
452    return r <= TCG_REG_R15;
453}
454
455static inline bool is_vector_reg(TCGReg r)
456{
457    return r >= TCG_REG_V0 && r <= TCG_REG_V31;
458}
459
460static bool patch_reloc(tcg_insn_unit *src_rw, int type,
461                        intptr_t value, intptr_t addend)
462{
463    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
464    intptr_t pcrel2;
465    uint32_t old;
466
467    value += addend;
468    pcrel2 = (tcg_insn_unit *)value - src_rx;
469
470    switch (type) {
471    case R_390_PC16DBL:
472        if (pcrel2 == (int16_t)pcrel2) {
473            tcg_patch16(src_rw, pcrel2);
474            return true;
475        }
476        break;
477    case R_390_PC32DBL:
478        if (pcrel2 == (int32_t)pcrel2) {
479            tcg_patch32(src_rw, pcrel2);
480            return true;
481        }
482        break;
483    case R_390_20:
484        if (value == sextract64(value, 0, 20)) {
485            old = *(uint32_t *)src_rw & 0xf00000ff;
486            old |= ((value & 0xfff) << 16) | ((value & 0xff000) >> 4);
487            tcg_patch32(src_rw, old);
488            return true;
489        }
490        break;
491    default:
492        g_assert_not_reached();
493    }
494    return false;
495}
496
497static int is_const_p16(uint64_t val)
498{
499    for (int i = 0; i < 4; ++i) {
500        uint64_t mask = 0xffffull << (i * 16);
501        if ((val & ~mask) == 0) {
502            return i;
503        }
504    }
505    return -1;
506}
507
508static int is_const_p32(uint64_t val)
509{
510    if ((val & 0xffffffff00000000ull) == 0) {
511        return 0;
512    }
513    if ((val & 0x00000000ffffffffull) == 0) {
514        return 1;
515    }
516    return -1;
517}
518
519/*
520 * Accept bit patterns like these:
521 *  0....01....1
522 *  1....10....0
523 *  1..10..01..1
524 *  0..01..10..0
525 * Copied from gcc sources.
526 */
527static bool risbg_mask(uint64_t c)
528{
529    uint64_t lsb;
530    /* We don't change the number of transitions by inverting,
531       so make sure we start with the LSB zero.  */
532    if (c & 1) {
533        c = ~c;
534    }
535    /* Reject all zeros or all ones.  */
536    if (c == 0) {
537        return false;
538    }
539    /* Find the first transition.  */
540    lsb = c & -c;
541    /* Invert to look for a second transition.  */
542    c = ~c;
543    /* Erase the first transition.  */
544    c &= -lsb;
545    /* Find the second transition, if any.  */
546    lsb = c & -c;
547    /* Match if all the bits are 1's, or if c is zero.  */
548    return c == -lsb;
549}
550
551/* Test if a constant matches the constraint. */
552static bool tcg_target_const_match(int64_t val, int ct,
553                                   TCGType type, TCGCond cond, int vece)
554{
555    uint64_t uval = val;
556
557    if (ct & TCG_CT_CONST) {
558        return true;
559    }
560    if (type == TCG_TYPE_I32) {
561        uval = (uint32_t)val;
562        val = (int32_t)val;
563    }
564
565    if (ct & TCG_CT_CONST_CMP) {
566        switch (cond) {
567        case TCG_COND_EQ:
568        case TCG_COND_NE:
569            ct |= TCG_CT_CONST_S32 | TCG_CT_CONST_U32;  /* CGFI or CLGFI */
570            break;
571        case TCG_COND_LT:
572        case TCG_COND_GE:
573        case TCG_COND_LE:
574        case TCG_COND_GT:
575            ct |= TCG_CT_CONST_S32;  /* CGFI */
576            break;
577        case TCG_COND_LTU:
578        case TCG_COND_GEU:
579        case TCG_COND_LEU:
580        case TCG_COND_GTU:
581            ct |= TCG_CT_CONST_U32;  /* CLGFI */
582            break;
583        case TCG_COND_TSTNE:
584        case TCG_COND_TSTEQ:
585            if (is_const_p16(uval) >= 0) {
586                return true;  /* TMxx */
587            }
588            if (risbg_mask(uval)) {
589                return true;  /* RISBG */
590            }
591            break;
592        default:
593            g_assert_not_reached();
594        }
595    }
596
597    if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
598        return true;
599    }
600    if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
601        return true;
602    }
603    if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) {
604        return true;
605    }
606    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
607        return true;
608    }
609
610    if (ct & TCG_CT_CONST_INV) {
611        val = ~val;
612    }
613    if ((ct & TCG_CT_CONST_P32) && is_const_p32(val) >= 0) {
614        return true;
615    }
616    if ((ct & TCG_CT_CONST_INVRISBG) && risbg_mask(~val)) {
617        return true;
618    }
619    return false;
620}
621
622/* Emit instructions according to the given instruction format.  */
623
624static void tcg_out_insn_RR(TCGContext *s, S390Opcode op, TCGReg r1, TCGReg r2)
625{
626    tcg_out16(s, (op << 8) | (r1 << 4) | r2);
627}
628
629static void tcg_out_insn_RRE(TCGContext *s, S390Opcode op,
630                             TCGReg r1, TCGReg r2)
631{
632    tcg_out32(s, (op << 16) | (r1 << 4) | r2);
633}
634
635/* RRF-a without the m4 field */
636static void tcg_out_insn_RRFa(TCGContext *s, S390Opcode op,
637                              TCGReg r1, TCGReg r2, TCGReg r3)
638{
639    tcg_out32(s, (op << 16) | (r3 << 12) | (r1 << 4) | r2);
640}
641
642/* RRF-a with the m4 field */
643static void tcg_out_insn_RRFam(TCGContext *s, S390Opcode op,
644                               TCGReg r1, TCGReg r2, TCGReg r3, int m4)
645{
646    tcg_out32(s, (op << 16) | (r3 << 12) | (m4 << 8) | (r1 << 4) | r2);
647}
648
649static void tcg_out_insn_RRFc(TCGContext *s, S390Opcode op,
650                              TCGReg r1, TCGReg r2, int m3)
651{
652    tcg_out32(s, (op << 16) | (m3 << 12) | (r1 << 4) | r2);
653}
654
655static void tcg_out_insn_RI(TCGContext *s, S390Opcode op, TCGReg r1, int i2)
656{
657    tcg_out32(s, (op << 16) | (r1 << 20) | (i2 & 0xffff));
658}
659
660static void tcg_out_insn_RIEg(TCGContext *s, S390Opcode op, TCGReg r1,
661                             int i2, int m3)
662{
663    tcg_out16(s, (op & 0xff00) | (r1 << 4) | m3);
664    tcg_out32(s, (i2 << 16) | (op & 0xff));
665}
666
667static void tcg_out_insn_RIL(TCGContext *s, S390Opcode op, TCGReg r1, int i2)
668{
669    tcg_out16(s, op | (r1 << 4));
670    tcg_out32(s, i2);
671}
672
673static void tcg_out_insn_RS(TCGContext *s, S390Opcode op, TCGReg r1,
674                            TCGReg b2, TCGReg r3, int disp)
675{
676    tcg_out32(s, (op << 24) | (r1 << 20) | (r3 << 16) | (b2 << 12)
677              | (disp & 0xfff));
678}
679
680static void tcg_out_insn_RSY(TCGContext *s, S390Opcode op, TCGReg r1,
681                             TCGReg b2, TCGReg r3, int disp)
682{
683    tcg_out16(s, (op & 0xff00) | (r1 << 4) | r3);
684    tcg_out32(s, (op & 0xff) | (b2 << 28)
685              | ((disp & 0xfff) << 16) | ((disp & 0xff000) >> 4));
686}
687
688#define tcg_out_insn_RX   tcg_out_insn_RS
689#define tcg_out_insn_RXY  tcg_out_insn_RSY
690
691static int RXB(TCGReg v1, TCGReg v2, TCGReg v3, TCGReg v4)
692{
693    /*
694     * Shift bit 4 of each regno to its corresponding bit of RXB.
695     * RXB itself begins at bit 8 of the instruction so 8 - 4 = 4
696     * is the left-shift of the 4th operand.
697     */
698    return ((v1 & 0x10) << (4 + 3))
699         | ((v2 & 0x10) << (4 + 2))
700         | ((v3 & 0x10) << (4 + 1))
701         | ((v4 & 0x10) << (4 + 0));
702}
703
704static void tcg_out_insn_VRIa(TCGContext *s, S390Opcode op,
705                              TCGReg v1, uint16_t i2, int m3)
706{
707    tcg_debug_assert(is_vector_reg(v1));
708    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4));
709    tcg_out16(s, i2);
710    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m3 << 12));
711}
712
713static void tcg_out_insn_VRIb(TCGContext *s, S390Opcode op,
714                              TCGReg v1, uint8_t i2, uint8_t i3, int m4)
715{
716    tcg_debug_assert(is_vector_reg(v1));
717    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4));
718    tcg_out16(s, (i2 << 8) | (i3 & 0xff));
719    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m4 << 12));
720}
721
722static void tcg_out_insn_VRIc(TCGContext *s, S390Opcode op,
723                              TCGReg v1, uint16_t i2, TCGReg v3, int m4)
724{
725    tcg_debug_assert(is_vector_reg(v1));
726    tcg_debug_assert(is_vector_reg(v3));
727    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v3 & 0xf));
728    tcg_out16(s, i2);
729    tcg_out16(s, (op & 0x00ff) | RXB(v1, v3, 0, 0) | (m4 << 12));
730}
731
732static void tcg_out_insn_VRRa(TCGContext *s, S390Opcode op,
733                              TCGReg v1, TCGReg v2, int m3)
734{
735    tcg_debug_assert(is_vector_reg(v1));
736    tcg_debug_assert(is_vector_reg(v2));
737    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
738    tcg_out32(s, (op & 0x00ff) | RXB(v1, v2, 0, 0) | (m3 << 12));
739}
740
741static void tcg_out_insn_VRRc(TCGContext *s, S390Opcode op,
742                              TCGReg v1, TCGReg v2, TCGReg v3, int m4)
743{
744    tcg_debug_assert(is_vector_reg(v1));
745    tcg_debug_assert(is_vector_reg(v2));
746    tcg_debug_assert(is_vector_reg(v3));
747    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
748    tcg_out16(s, v3 << 12);
749    tcg_out16(s, (op & 0x00ff) | RXB(v1, v2, v3, 0) | (m4 << 12));
750}
751
752static void tcg_out_insn_VRRe(TCGContext *s, S390Opcode op,
753                              TCGReg v1, TCGReg v2, TCGReg v3, TCGReg v4)
754{
755    tcg_debug_assert(is_vector_reg(v1));
756    tcg_debug_assert(is_vector_reg(v2));
757    tcg_debug_assert(is_vector_reg(v3));
758    tcg_debug_assert(is_vector_reg(v4));
759    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
760    tcg_out16(s, v3 << 12);
761    tcg_out16(s, (op & 0x00ff) | RXB(v1, v2, v3, v4) | (v4 << 12));
762}
763
764static void tcg_out_insn_VRRf(TCGContext *s, S390Opcode op,
765                              TCGReg v1, TCGReg r2, TCGReg r3)
766{
767    tcg_debug_assert(is_vector_reg(v1));
768    tcg_debug_assert(is_general_reg(r2));
769    tcg_debug_assert(is_general_reg(r3));
770    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | r2);
771    tcg_out16(s, r3 << 12);
772    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0));
773}
774
775static void tcg_out_insn_VRSa(TCGContext *s, S390Opcode op, TCGReg v1,
776                              intptr_t d2, TCGReg b2, TCGReg v3, int m4)
777{
778    tcg_debug_assert(is_vector_reg(v1));
779    tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
780    tcg_debug_assert(is_general_reg(b2));
781    tcg_debug_assert(is_vector_reg(v3));
782    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v3 & 0xf));
783    tcg_out16(s, b2 << 12 | d2);
784    tcg_out16(s, (op & 0x00ff) | RXB(v1, v3, 0, 0) | (m4 << 12));
785}
786
787static void tcg_out_insn_VRSb(TCGContext *s, S390Opcode op, TCGReg v1,
788                              intptr_t d2, TCGReg b2, TCGReg r3, int m4)
789{
790    tcg_debug_assert(is_vector_reg(v1));
791    tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
792    tcg_debug_assert(is_general_reg(b2));
793    tcg_debug_assert(is_general_reg(r3));
794    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | r3);
795    tcg_out16(s, b2 << 12 | d2);
796    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m4 << 12));
797}
798
799static void tcg_out_insn_VRSc(TCGContext *s, S390Opcode op, TCGReg r1,
800                              intptr_t d2, TCGReg b2, TCGReg v3, int m4)
801{
802    tcg_debug_assert(is_general_reg(r1));
803    tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
804    tcg_debug_assert(is_general_reg(b2));
805    tcg_debug_assert(is_vector_reg(v3));
806    tcg_out16(s, (op & 0xff00) | (r1 << 4) | (v3 & 0xf));
807    tcg_out16(s, b2 << 12 | d2);
808    tcg_out16(s, (op & 0x00ff) | RXB(0, v3, 0, 0) | (m4 << 12));
809}
810
811static void tcg_out_insn_VRX(TCGContext *s, S390Opcode op, TCGReg v1,
812                             TCGReg b2, TCGReg x2, intptr_t d2, int m3)
813{
814    tcg_debug_assert(is_vector_reg(v1));
815    tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
816    tcg_debug_assert(is_general_reg(x2));
817    tcg_debug_assert(is_general_reg(b2));
818    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | x2);
819    tcg_out16(s, (b2 << 12) | d2);
820    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m3 << 12));
821}
822
823/* Emit an opcode with "type-checking" of the format.  */
824#define tcg_out_insn(S, FMT, OP, ...) \
825    glue(tcg_out_insn_,FMT)(S, glue(glue(FMT,_),OP), ## __VA_ARGS__)
826
827
828/* emit 64-bit shifts */
829static void tcg_out_sh64(TCGContext* s, S390Opcode op, TCGReg dest,
830                         TCGReg src, TCGReg sh_reg, int sh_imm)
831{
832    tcg_out_insn_RSY(s, op, dest, sh_reg, src, sh_imm);
833}
834
835/* emit 32-bit shifts */
836static void tcg_out_sh32(TCGContext* s, S390Opcode op, TCGReg dest,
837                         TCGReg sh_reg, int sh_imm)
838{
839    tcg_out_insn_RS(s, op, dest, sh_reg, 0, sh_imm);
840}
841
842static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
843{
844    if (src == dst) {
845        return true;
846    }
847    switch (type) {
848    case TCG_TYPE_I32:
849        if (likely(is_general_reg(dst) && is_general_reg(src))) {
850            tcg_out_insn(s, RR, LR, dst, src);
851            break;
852        }
853        /* fallthru */
854
855    case TCG_TYPE_I64:
856        if (likely(is_general_reg(dst))) {
857            if (likely(is_general_reg(src))) {
858                tcg_out_insn(s, RRE, LGR, dst, src);
859            } else {
860                tcg_out_insn(s, VRSc, VLGV, dst, 0, 0, src, 3);
861            }
862            break;
863        } else if (is_general_reg(src)) {
864            tcg_out_insn(s, VRSb, VLVG, dst, 0, 0, src, 3);
865            break;
866        }
867        /* fallthru */
868
869    case TCG_TYPE_V64:
870    case TCG_TYPE_V128:
871        tcg_out_insn(s, VRRa, VLR, dst, src, 0);
872        break;
873
874    default:
875        g_assert_not_reached();
876    }
877    return true;
878}
879
880static const S390Opcode li_insns[4] = {
881    RI_LLILL, RI_LLILH, RI_LLIHL, RI_LLIHH
882};
883static const S390Opcode oi_insns[4] = {
884    RI_OILL, RI_OILH, RI_OIHL, RI_OIHH
885};
886static const S390Opcode lif_insns[2] = {
887    RIL_LLILF, RIL_LLIHF,
888};
889static const S390Opcode tm_insns[4] = {
890    RI_TMLL, RI_TMLH, RI_TMHL, RI_TMHH
891};
892
893/* load a register with an immediate value */
894static void tcg_out_movi(TCGContext *s, TCGType type,
895                         TCGReg ret, tcg_target_long sval)
896{
897    tcg_target_ulong uval = sval;
898    ptrdiff_t pc_off;
899    int i;
900
901    if (type == TCG_TYPE_I32) {
902        uval = (uint32_t)sval;
903        sval = (int32_t)sval;
904    }
905
906    /* Try all 32-bit insns that can load it in one go.  */
907    if (sval >= -0x8000 && sval < 0x8000) {
908        tcg_out_insn(s, RI, LGHI, ret, sval);
909        return;
910    }
911
912    i = is_const_p16(uval);
913    if (i >= 0) {
914        tcg_out_insn_RI(s, li_insns[i], ret, uval >> (i * 16));
915        return;
916    }
917
918    /* Try all 48-bit insns that can load it in one go.  */
919    if (sval == (int32_t)sval) {
920        tcg_out_insn(s, RIL, LGFI, ret, sval);
921        return;
922    }
923
924    i = is_const_p32(uval);
925    if (i >= 0) {
926        tcg_out_insn_RIL(s, lif_insns[i], ret, uval >> (i * 32));
927        return;
928    }
929
930    /* Try for PC-relative address load.  For odd addresses, add one. */
931    pc_off = tcg_pcrel_diff(s, (void *)sval) >> 1;
932    if (pc_off == (int32_t)pc_off) {
933        tcg_out_insn(s, RIL, LARL, ret, pc_off);
934        if (sval & 1) {
935            tcg_out_insn(s, RI, AGHI, ret, 1);
936        }
937        return;
938    }
939
940    /* Otherwise, load it by parts. */
941    i = is_const_p16((uint32_t)uval);
942    if (i >= 0) {
943        tcg_out_insn_RI(s, li_insns[i], ret, uval >> (i * 16));
944    } else {
945        tcg_out_insn(s, RIL, LLILF, ret, uval);
946    }
947    uval >>= 32;
948    i = is_const_p16(uval);
949    if (i >= 0) {
950        tcg_out_insn_RI(s, oi_insns[i + 2], ret, uval >> (i * 16));
951    } else {
952        tcg_out_insn(s, RIL, OIHF, ret, uval);
953    }
954}
955
956/* Emit a load/store type instruction.  Inputs are:
957   DATA:     The register to be loaded or stored.
958   BASE+OFS: The effective address.
959   OPC_RX:   If the operation has an RX format opcode (e.g. STC), otherwise 0.
960   OPC_RXY:  The RXY format opcode for the operation (e.g. STCY).  */
961
962static void tcg_out_mem(TCGContext *s, S390Opcode opc_rx, S390Opcode opc_rxy,
963                        TCGReg data, TCGReg base, TCGReg index,
964                        tcg_target_long ofs)
965{
966    if (ofs < -0x80000 || ofs >= 0x80000) {
967        /* Combine the low 20 bits of the offset with the actual load insn;
968           the high 44 bits must come from an immediate load.  */
969        tcg_target_long low = ((ofs & 0xfffff) ^ 0x80000) - 0x80000;
970        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs - low);
971        ofs = low;
972
973        /* If we were already given an index register, add it in.  */
974        if (index != TCG_REG_NONE) {
975            tcg_out_insn(s, RRE, AGR, TCG_TMP0, index);
976        }
977        index = TCG_TMP0;
978    }
979
980    if (opc_rx && ofs >= 0 && ofs < 0x1000) {
981        tcg_out_insn_RX(s, opc_rx, data, base, index, ofs);
982    } else {
983        tcg_out_insn_RXY(s, opc_rxy, data, base, index, ofs);
984    }
985}
986
987static void tcg_out_vrx_mem(TCGContext *s, S390Opcode opc_vrx,
988                            TCGReg data, TCGReg base, TCGReg index,
989                            tcg_target_long ofs, int m3)
990{
991    if (ofs < 0 || ofs >= 0x1000) {
992        if (ofs >= -0x80000 && ofs < 0x80000) {
993            tcg_out_insn(s, RXY, LAY, TCG_TMP0, base, index, ofs);
994            base = TCG_TMP0;
995            index = TCG_REG_NONE;
996            ofs = 0;
997        } else {
998            tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs);
999            if (index != TCG_REG_NONE) {
1000                tcg_out_insn(s, RRE, AGR, TCG_TMP0, index);
1001            }
1002            index = TCG_TMP0;
1003            ofs = 0;
1004        }
1005    }
1006    tcg_out_insn_VRX(s, opc_vrx, data, base, index, ofs, m3);
1007}
1008
1009/* load data without address translation or endianness conversion */
1010static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg data,
1011                       TCGReg base, intptr_t ofs)
1012{
1013    switch (type) {
1014    case TCG_TYPE_I32:
1015        if (likely(is_general_reg(data))) {
1016            tcg_out_mem(s, RX_L, RXY_LY, data, base, TCG_REG_NONE, ofs);
1017            break;
1018        }
1019        tcg_out_vrx_mem(s, VRX_VLLEZ, data, base, TCG_REG_NONE, ofs, MO_32);
1020        break;
1021
1022    case TCG_TYPE_I64:
1023        if (likely(is_general_reg(data))) {
1024            tcg_out_mem(s, 0, RXY_LG, data, base, TCG_REG_NONE, ofs);
1025            break;
1026        }
1027        /* fallthru */
1028
1029    case TCG_TYPE_V64:
1030        tcg_out_vrx_mem(s, VRX_VLLEZ, data, base, TCG_REG_NONE, ofs, MO_64);
1031        break;
1032
1033    case TCG_TYPE_V128:
1034        /* Hint quadword aligned.  */
1035        tcg_out_vrx_mem(s, VRX_VL, data, base, TCG_REG_NONE, ofs, 4);
1036        break;
1037
1038    default:
1039        g_assert_not_reached();
1040    }
1041}
1042
1043static void tcg_out_st(TCGContext *s, TCGType type, TCGReg data,
1044                       TCGReg base, intptr_t ofs)
1045{
1046    switch (type) {
1047    case TCG_TYPE_I32:
1048        if (likely(is_general_reg(data))) {
1049            tcg_out_mem(s, RX_ST, RXY_STY, data, base, TCG_REG_NONE, ofs);
1050        } else {
1051            tcg_out_vrx_mem(s, VRX_VSTEF, data, base, TCG_REG_NONE, ofs, 1);
1052        }
1053        break;
1054
1055    case TCG_TYPE_I64:
1056        if (likely(is_general_reg(data))) {
1057            tcg_out_mem(s, 0, RXY_STG, data, base, TCG_REG_NONE, ofs);
1058            break;
1059        }
1060        /* fallthru */
1061
1062    case TCG_TYPE_V64:
1063        tcg_out_vrx_mem(s, VRX_VSTEG, data, base, TCG_REG_NONE, ofs, 0);
1064        break;
1065
1066    case TCG_TYPE_V128:
1067        /* Hint quadword aligned.  */
1068        tcg_out_vrx_mem(s, VRX_VST, data, base, TCG_REG_NONE, ofs, 4);
1069        break;
1070
1071    default:
1072        g_assert_not_reached();
1073    }
1074}
1075
1076static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1077                               TCGReg base, intptr_t ofs)
1078{
1079    return false;
1080}
1081
1082static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
1083{
1084    return false;
1085}
1086
1087static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
1088                             tcg_target_long imm)
1089{
1090    /* This function is only used for passing structs by reference. */
1091    tcg_out_mem(s, RX_LA, RXY_LAY, rd, rs, TCG_REG_NONE, imm);
1092}
1093
1094static inline void tcg_out_risbg(TCGContext *s, TCGReg dest, TCGReg src,
1095                                 int msb, int lsb, int ofs, int z)
1096{
1097    /* Format RIE-f */
1098    tcg_out16(s, (RIEf_RISBG & 0xff00) | (dest << 4) | src);
1099    tcg_out16(s, (msb << 8) | (z << 7) | lsb);
1100    tcg_out16(s, (ofs << 8) | (RIEf_RISBG & 0xff));
1101}
1102
1103static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1104{
1105    tcg_out_insn(s, RRE, LGBR, dest, src);
1106}
1107
1108static void tcg_out_ext8u(TCGContext *s, TCGReg dest, TCGReg src)
1109{
1110    tcg_out_insn(s, RRE, LLGCR, dest, src);
1111}
1112
1113static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1114{
1115    tcg_out_insn(s, RRE, LGHR, dest, src);
1116}
1117
1118static void tcg_out_ext16u(TCGContext *s, TCGReg dest, TCGReg src)
1119{
1120    tcg_out_insn(s, RRE, LLGHR, dest, src);
1121}
1122
1123static void tcg_out_ext32s(TCGContext *s, TCGReg dest, TCGReg src)
1124{
1125    tcg_out_insn(s, RRE, LGFR, dest, src);
1126}
1127
1128static void tcg_out_ext32u(TCGContext *s, TCGReg dest, TCGReg src)
1129{
1130    tcg_out_insn(s, RRE, LLGFR, dest, src);
1131}
1132
1133static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg dest, TCGReg src)
1134{
1135    tcg_out_ext32s(s, dest, src);
1136}
1137
1138static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg dest, TCGReg src)
1139{
1140    tcg_out_ext32u(s, dest, src);
1141}
1142
1143static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg dest, TCGReg src)
1144{
1145    tcg_out_mov(s, TCG_TYPE_I32, dest, src);
1146}
1147
1148static void tgen_andi_risbg(TCGContext *s, TCGReg out, TCGReg in, uint64_t val)
1149{
1150    int msb, lsb;
1151    if ((val & 0x8000000000000001ull) == 0x8000000000000001ull) {
1152        /* Achieve wraparound by swapping msb and lsb.  */
1153        msb = 64 - ctz64(~val);
1154        lsb = clz64(~val) - 1;
1155    } else {
1156        msb = clz64(val);
1157        lsb = 63 - ctz64(val);
1158    }
1159    tcg_out_risbg(s, out, in, msb, lsb, 0, 1);
1160}
1161
1162static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
1163{
1164    static const S390Opcode ni_insns[4] = {
1165        RI_NILL, RI_NILH, RI_NIHL, RI_NIHH
1166    };
1167    static const S390Opcode nif_insns[2] = {
1168        RIL_NILF, RIL_NIHF
1169    };
1170    uint64_t valid = (type == TCG_TYPE_I32 ? 0xffffffffull : -1ull);
1171    int i;
1172
1173    /* Look for the zero-extensions.  */
1174    if ((val & valid) == 0xffffffff) {
1175        tcg_out_ext32u(s, dest, dest);
1176        return;
1177    }
1178    if ((val & valid) == 0xff) {
1179        tcg_out_ext8u(s, dest, dest);
1180        return;
1181    }
1182    if ((val & valid) == 0xffff) {
1183        tcg_out_ext16u(s, dest, dest);
1184        return;
1185    }
1186
1187    i = is_const_p16(~val & valid);
1188    if (i >= 0) {
1189        tcg_out_insn_RI(s, ni_insns[i], dest, val >> (i * 16));
1190        return;
1191    }
1192
1193    i = is_const_p32(~val & valid);
1194    tcg_debug_assert(i == 0 || type != TCG_TYPE_I32);
1195    if (i >= 0) {
1196        tcg_out_insn_RIL(s, nif_insns[i], dest, val >> (i * 32));
1197        return;
1198    }
1199
1200    if (risbg_mask(val)) {
1201        tgen_andi_risbg(s, dest, dest, val);
1202        return;
1203    }
1204
1205    g_assert_not_reached();
1206}
1207
1208static void tgen_ori(TCGContext *s, TCGReg dest, uint64_t val)
1209{
1210    static const S390Opcode oif_insns[2] = {
1211        RIL_OILF, RIL_OIHF
1212    };
1213
1214    int i;
1215
1216    i = is_const_p16(val);
1217    if (i >= 0) {
1218        tcg_out_insn_RI(s, oi_insns[i], dest, val >> (i * 16));
1219        return;
1220    }
1221
1222    i = is_const_p32(val);
1223    if (i >= 0) {
1224        tcg_out_insn_RIL(s, oif_insns[i], dest, val >> (i * 32));
1225        return;
1226    }
1227
1228    g_assert_not_reached();
1229}
1230
1231static void tgen_xori(TCGContext *s, TCGReg dest, uint64_t val)
1232{
1233    switch (is_const_p32(val)) {
1234    case 0:
1235        tcg_out_insn(s, RIL, XILF, dest, val);
1236        break;
1237    case 1:
1238        tcg_out_insn(s, RIL, XIHF, dest, val >> 32);
1239        break;
1240    default:
1241        g_assert_not_reached();
1242    }
1243}
1244
1245static int tgen_cmp2(TCGContext *s, TCGType type, TCGCond c, TCGReg r1,
1246                     TCGArg c2, bool c2const, bool need_carry, int *inv_cc)
1247{
1248    bool is_unsigned = is_unsigned_cond(c);
1249    TCGCond inv_c = tcg_invert_cond(c);
1250    S390Opcode op;
1251
1252    if (is_tst_cond(c)) {
1253        tcg_debug_assert(!need_carry);
1254
1255        if (!c2const) {
1256            if (type == TCG_TYPE_I32) {
1257                tcg_out_insn(s, RRFa, NRK, TCG_REG_R0, r1, c2);
1258            } else {
1259                tcg_out_insn(s, RRFa, NGRK, TCG_REG_R0, r1, c2);
1260            }
1261            goto exit;
1262        }
1263
1264        if (type == TCG_TYPE_I32) {
1265            c2 = (uint32_t)c2;
1266        }
1267
1268        int i = is_const_p16(c2);
1269        if (i >= 0) {
1270            tcg_out_insn_RI(s, tm_insns[i], r1, c2 >> (i * 16));
1271            *inv_cc = c == TCG_COND_TSTEQ ? S390_TM_NE : S390_TM_EQ;
1272            return *inv_cc ^ 15;
1273        }
1274
1275        if (risbg_mask(c2)) {
1276            tgen_andi_risbg(s, TCG_REG_R0, r1, c2);
1277            goto exit;
1278        }
1279        g_assert_not_reached();
1280    }
1281
1282    if (c2const) {
1283        if (c2 == 0) {
1284            if (!(is_unsigned && need_carry)) {
1285                if (type == TCG_TYPE_I32) {
1286                    tcg_out_insn(s, RR, LTR, r1, r1);
1287                } else {
1288                    tcg_out_insn(s, RRE, LTGR, r1, r1);
1289                }
1290                *inv_cc = tcg_cond_to_ltr_cond[inv_c];
1291                return tcg_cond_to_ltr_cond[c];
1292            }
1293        }
1294
1295        if (!is_unsigned && c2 == (int16_t)c2) {
1296            op = (type == TCG_TYPE_I32 ? RI_CHI : RI_CGHI);
1297            tcg_out_insn_RI(s, op, r1, c2);
1298            goto exit;
1299        }
1300
1301        if (type == TCG_TYPE_I32) {
1302            op = (is_unsigned ? RIL_CLFI : RIL_CFI);
1303            tcg_out_insn_RIL(s, op, r1, c2);
1304            goto exit;
1305        }
1306
1307        /* Should match TCG_CT_CONST_CMP. */
1308        switch (c) {
1309        case TCG_COND_LT:
1310        case TCG_COND_GE:
1311        case TCG_COND_LE:
1312        case TCG_COND_GT:
1313            tcg_debug_assert(c2 == (int32_t)c2);
1314            op = RIL_CGFI;
1315            break;
1316        case TCG_COND_EQ:
1317        case TCG_COND_NE:
1318            if (c2 == (int32_t)c2) {
1319                op = RIL_CGFI;
1320                break;
1321            }
1322            /* fall through */
1323        case TCG_COND_LTU:
1324        case TCG_COND_GEU:
1325        case TCG_COND_LEU:
1326        case TCG_COND_GTU:
1327            tcg_debug_assert(c2 == (uint32_t)c2);
1328            op = RIL_CLGFI;
1329            break;
1330        default:
1331            g_assert_not_reached();
1332        }
1333        tcg_out_insn_RIL(s, op, r1, c2);
1334    } else if (type == TCG_TYPE_I32) {
1335        op = (is_unsigned ? RR_CLR : RR_CR);
1336        tcg_out_insn_RR(s, op, r1, c2);
1337    } else {
1338        op = (is_unsigned ? RRE_CLGR : RRE_CGR);
1339        tcg_out_insn_RRE(s, op, r1, c2);
1340    }
1341
1342 exit:
1343    *inv_cc = tcg_cond_to_s390_cond[inv_c];
1344    return tcg_cond_to_s390_cond[c];
1345}
1346
1347static int tgen_cmp(TCGContext *s, TCGType type, TCGCond c, TCGReg r1,
1348                    TCGArg c2, bool c2const, bool need_carry)
1349{
1350    int inv_cc;
1351    return tgen_cmp2(s, type, c, r1, c2, c2const, need_carry, &inv_cc);
1352}
1353
1354static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond,
1355                         TCGReg dest, TCGReg c1, TCGArg c2,
1356                         bool c2const, bool neg)
1357{
1358    int cc;
1359
1360    /* With LOC2, we can always emit the minimum 3 insns.  */
1361    if (HAVE_FACILITY(LOAD_ON_COND2)) {
1362        /* Emit: d = 0, d = (cc ? 1 : d).  */
1363        cc = tgen_cmp(s, type, cond, c1, c2, c2const, false);
1364        tcg_out_movi(s, TCG_TYPE_I64, dest, 0);
1365        tcg_out_insn(s, RIEg, LOCGHI, dest, neg ? -1 : 1, cc);
1366        return;
1367    }
1368
1369    switch (cond) {
1370    case TCG_COND_GEU:
1371    case TCG_COND_LTU:
1372    case TCG_COND_LT:
1373    case TCG_COND_GE:
1374        /* Swap operands so that we can use LEU/GTU/GT/LE.  */
1375        if (!c2const) {
1376            TCGReg t = c1;
1377            c1 = c2;
1378            c2 = t;
1379            cond = tcg_swap_cond(cond);
1380        }
1381        break;
1382    default:
1383        break;
1384    }
1385
1386    switch (cond) {
1387    case TCG_COND_NE:
1388        /* X != 0 is X > 0.  */
1389        if (c2const && c2 == 0) {
1390            cond = TCG_COND_GTU;
1391        } else {
1392            break;
1393        }
1394        /* fallthru */
1395
1396    case TCG_COND_GTU:
1397    case TCG_COND_GT:
1398        /*
1399         * The result of a compare has CC=2 for GT and CC=3 unused.
1400         * ADD LOGICAL WITH CARRY considers (CC & 2) the carry bit.
1401         */
1402        tgen_cmp(s, type, cond, c1, c2, c2const, true);
1403        tcg_out_movi(s, type, dest, 0);
1404        tcg_out_insn(s, RRE, ALCGR, dest, dest);
1405        if (neg) {
1406            if (type == TCG_TYPE_I32) {
1407                tcg_out_insn(s, RR, LCR, dest, dest);
1408            } else {
1409                tcg_out_insn(s, RRE, LCGR, dest, dest);
1410            }
1411        }
1412        return;
1413
1414    case TCG_COND_EQ:
1415        /* X == 0 is X <= 0.  */
1416        if (c2const && c2 == 0) {
1417            cond = TCG_COND_LEU;
1418        } else {
1419            break;
1420        }
1421        /* fallthru */
1422
1423    case TCG_COND_LEU:
1424    case TCG_COND_LE:
1425        /*
1426         * As above, but we're looking for borrow, or !carry.
1427         * The second insn computes d - d - borrow, or -1 for true
1428         * and 0 for false.  So we must mask to 1 bit afterward.
1429         */
1430        tgen_cmp(s, type, cond, c1, c2, c2const, true);
1431        tcg_out_insn(s, RRE, SLBGR, dest, dest);
1432        if (!neg) {
1433            tgen_andi(s, type, dest, 1);
1434        }
1435        return;
1436
1437    default:
1438        g_assert_not_reached();
1439    }
1440
1441    cc = tgen_cmp(s, type, cond, c1, c2, c2const, false);
1442    /* Emit: d = 0, t = 1, d = (cc ? t : d).  */
1443    tcg_out_movi(s, TCG_TYPE_I64, dest, 0);
1444    tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, neg ? -1 : 1);
1445    tcg_out_insn(s, RRFc, LOCGR, dest, TCG_TMP0, cc);
1446}
1447
1448static void tgen_movcond_int(TCGContext *s, TCGType type, TCGReg dest,
1449                             TCGArg v3, int v3const, TCGReg v4,
1450                             int cc, int inv_cc)
1451{
1452    TCGReg src;
1453
1454    if (v3const) {
1455        if (dest == v4) {
1456            if (HAVE_FACILITY(LOAD_ON_COND2)) {
1457                /* Emit: if (cc) dest = v3. */
1458                tcg_out_insn(s, RIEg, LOCGHI, dest, v3, cc);
1459                return;
1460            }
1461            tcg_out_insn(s, RI, LGHI, TCG_TMP0, v3);
1462            src = TCG_TMP0;
1463        } else {
1464            /* LGR+LOCGHI is larger than LGHI+LOCGR. */
1465            tcg_out_insn(s, RI, LGHI, dest, v3);
1466            cc = inv_cc;
1467            src = v4;
1468        }
1469    } else {
1470        if (HAVE_FACILITY(MISC_INSN_EXT3)) {
1471            /* Emit: dest = cc ? v3 : v4. */
1472            tcg_out_insn(s, RRFam, SELGR, dest, v3, v4, cc);
1473            return;
1474        }
1475        if (dest == v4) {
1476            src = v3;
1477        } else {
1478            tcg_out_mov(s, type, dest, v3);
1479            cc = inv_cc;
1480            src = v4;
1481        }
1482    }
1483
1484    /* Emit: if (cc) dest = src. */
1485    tcg_out_insn(s, RRFc, LOCGR, dest, src, cc);
1486}
1487
1488static void tgen_movcond(TCGContext *s, TCGType type, TCGCond c, TCGReg dest,
1489                         TCGReg c1, TCGArg c2, int c2const,
1490                         TCGArg v3, int v3const, TCGReg v4)
1491{
1492    int cc, inv_cc;
1493
1494    cc = tgen_cmp2(s, type, c, c1, c2, c2const, false, &inv_cc);
1495    tgen_movcond_int(s, type, dest, v3, v3const, v4, cc, inv_cc);
1496}
1497
1498static void tgen_clz(TCGContext *s, TCGReg dest, TCGReg a1,
1499                     TCGArg a2, int a2const)
1500{
1501    /* Since this sets both R and R+1, we have no choice but to store the
1502       result into R0, allowing R1 == TCG_TMP0 to be clobbered as well.  */
1503    QEMU_BUILD_BUG_ON(TCG_TMP0 != TCG_REG_R1);
1504    tcg_out_insn(s, RRE, FLOGR, TCG_REG_R0, a1);
1505
1506    if (a2const && a2 == 64) {
1507        tcg_out_mov(s, TCG_TYPE_I64, dest, TCG_REG_R0);
1508        return;
1509    }
1510
1511    /*
1512     * Conditions from FLOGR are:
1513     *   2 -> one bit found
1514     *   8 -> no one bit found
1515     */
1516    tgen_movcond_int(s, TCG_TYPE_I64, dest, a2, a2const, TCG_REG_R0, 8, 2);
1517}
1518
1519static void tgen_ctpop(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1520{
1521    /* With MIE3, and bit 0 of m4 set, we get the complete result. */
1522    if (HAVE_FACILITY(MISC_INSN_EXT3)) {
1523        if (type == TCG_TYPE_I32) {
1524            tcg_out_ext32u(s, dest, src);
1525            src = dest;
1526        }
1527        tcg_out_insn(s, RRFc, POPCNT, dest, src, 8);
1528        return;
1529    }
1530
1531    /* Without MIE3, each byte gets the count of bits for the byte. */
1532    tcg_out_insn(s, RRFc, POPCNT, dest, src, 0);
1533
1534    /* Multiply to sum each byte at the top of the word. */
1535    if (type == TCG_TYPE_I32) {
1536        tcg_out_insn(s, RIL, MSFI, dest, 0x01010101);
1537        tcg_out_sh32(s, RS_SRL, dest, TCG_REG_NONE, 24);
1538    } else {
1539        tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 0x0101010101010101ull);
1540        tcg_out_insn(s, RRE, MSGR, dest, TCG_TMP0);
1541        tcg_out_sh64(s, RSY_SRLG, dest, dest, TCG_REG_NONE, 56);
1542    }
1543}
1544
1545static void tgen_deposit(TCGContext *s, TCGReg dest, TCGReg src,
1546                         int ofs, int len, int z)
1547{
1548    int lsb = (63 - ofs);
1549    int msb = lsb - (len - 1);
1550    tcg_out_risbg(s, dest, src, msb, lsb, ofs, z);
1551}
1552
1553static void tgen_extract(TCGContext *s, TCGReg dest, TCGReg src,
1554                         int ofs, int len)
1555{
1556    tcg_out_risbg(s, dest, src, 64 - len, 63, 64 - ofs, 1);
1557}
1558
1559static void tgen_gotoi(TCGContext *s, int cc, const tcg_insn_unit *dest)
1560{
1561    ptrdiff_t off = tcg_pcrel_diff(s, dest) >> 1;
1562    if (off == (int16_t)off) {
1563        tcg_out_insn(s, RI, BRC, cc, off);
1564    } else if (off == (int32_t)off) {
1565        tcg_out_insn(s, RIL, BRCL, cc, off);
1566    } else {
1567        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)dest);
1568        tcg_out_insn(s, RR, BCR, cc, TCG_TMP0);
1569    }
1570}
1571
1572static void tgen_branch(TCGContext *s, int cc, TCGLabel *l)
1573{
1574    if (l->has_value) {
1575        tgen_gotoi(s, cc, l->u.value_ptr);
1576    } else {
1577        tcg_out16(s, RI_BRC | (cc << 4));
1578        tcg_out_reloc(s, s->code_ptr, R_390_PC16DBL, l, 2);
1579        s->code_ptr += 1;
1580    }
1581}
1582
1583static void tgen_compare_branch(TCGContext *s, S390Opcode opc, int cc,
1584                                TCGReg r1, TCGReg r2, TCGLabel *l)
1585{
1586    tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2);
1587    /* Format RIE-b */
1588    tcg_out16(s, (opc & 0xff00) | (r1 << 4) | r2);
1589    tcg_out16(s, 0);
1590    tcg_out16(s, cc << 12 | (opc & 0xff));
1591}
1592
1593static void tgen_compare_imm_branch(TCGContext *s, S390Opcode opc, int cc,
1594                                    TCGReg r1, int i2, TCGLabel *l)
1595{
1596    tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2);
1597    /* Format RIE-c */
1598    tcg_out16(s, (opc & 0xff00) | (r1 << 4) | cc);
1599    tcg_out16(s, 0);
1600    tcg_out16(s, (i2 << 8) | (opc & 0xff));
1601}
1602
1603static void tgen_brcond(TCGContext *s, TCGType type, TCGCond c,
1604                        TCGReg r1, TCGArg c2, int c2const, TCGLabel *l)
1605{
1606    int cc;
1607
1608    if (!is_tst_cond(c)) {
1609        bool is_unsigned = is_unsigned_cond(c);
1610        bool in_range;
1611        S390Opcode opc;
1612
1613        cc = tcg_cond_to_s390_cond[c];
1614
1615        if (!c2const) {
1616            opc = (type == TCG_TYPE_I32
1617                   ? (is_unsigned ? RIEb_CLRJ : RIEb_CRJ)
1618                   : (is_unsigned ? RIEb_CLGRJ : RIEb_CGRJ));
1619            tgen_compare_branch(s, opc, cc, r1, c2, l);
1620            return;
1621        }
1622
1623        /*
1624         * COMPARE IMMEDIATE AND BRANCH RELATIVE has an 8-bit immediate field.
1625         * If the immediate we've been given does not fit that range, we'll
1626         * fall back to separate compare and branch instructions using the
1627         * larger comparison range afforded by COMPARE IMMEDIATE.
1628         */
1629        if (type == TCG_TYPE_I32) {
1630            if (is_unsigned) {
1631                opc = RIEc_CLIJ;
1632                in_range = (uint32_t)c2 == (uint8_t)c2;
1633            } else {
1634                opc = RIEc_CIJ;
1635                in_range = (int32_t)c2 == (int8_t)c2;
1636            }
1637        } else {
1638            if (is_unsigned) {
1639                opc = RIEc_CLGIJ;
1640                in_range = (uint64_t)c2 == (uint8_t)c2;
1641            } else {
1642                opc = RIEc_CGIJ;
1643                in_range = (int64_t)c2 == (int8_t)c2;
1644            }
1645        }
1646        if (in_range) {
1647            tgen_compare_imm_branch(s, opc, cc, r1, c2, l);
1648            return;
1649        }
1650    }
1651
1652    cc = tgen_cmp(s, type, c, r1, c2, c2const, false);
1653    tgen_branch(s, cc, l);
1654}
1655
1656static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *dest)
1657{
1658    ptrdiff_t off = tcg_pcrel_diff(s, dest) >> 1;
1659    if (off == (int32_t)off) {
1660        tcg_out_insn(s, RIL, BRASL, TCG_REG_R14, off);
1661    } else {
1662        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)dest);
1663        tcg_out_insn(s, RR, BASR, TCG_REG_R14, TCG_TMP0);
1664    }
1665}
1666
1667static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest,
1668                         const TCGHelperInfo *info)
1669{
1670    tcg_out_call_int(s, dest);
1671}
1672
1673typedef struct {
1674    TCGReg base;
1675    TCGReg index;
1676    int disp;
1677    TCGAtomAlign aa;
1678} HostAddress;
1679
1680bool tcg_target_has_memory_bswap(MemOp memop)
1681{
1682    TCGAtomAlign aa;
1683
1684    if ((memop & MO_SIZE) <= MO_64) {
1685        return true;
1686    }
1687
1688    /*
1689     * Reject 16-byte memop with 16-byte atomicity,
1690     * but do allow a pair of 64-bit operations.
1691     */
1692    aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true);
1693    return aa.atom <= MO_64;
1694}
1695
1696static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg data,
1697                                   HostAddress h)
1698{
1699    switch (opc & (MO_SSIZE | MO_BSWAP)) {
1700    case MO_UB:
1701        tcg_out_insn(s, RXY, LLGC, data, h.base, h.index, h.disp);
1702        break;
1703    case MO_SB:
1704        tcg_out_insn(s, RXY, LGB, data, h.base, h.index, h.disp);
1705        break;
1706
1707    case MO_UW | MO_BSWAP:
1708        /* swapped unsigned halfword load with upper bits zeroed */
1709        tcg_out_insn(s, RXY, LRVH, data, h.base, h.index, h.disp);
1710        tcg_out_ext16u(s, data, data);
1711        break;
1712    case MO_UW:
1713        tcg_out_insn(s, RXY, LLGH, data, h.base, h.index, h.disp);
1714        break;
1715
1716    case MO_SW | MO_BSWAP:
1717        /* swapped sign-extended halfword load */
1718        tcg_out_insn(s, RXY, LRVH, data, h.base, h.index, h.disp);
1719        tcg_out_ext16s(s, TCG_TYPE_REG, data, data);
1720        break;
1721    case MO_SW:
1722        tcg_out_insn(s, RXY, LGH, data, h.base, h.index, h.disp);
1723        break;
1724
1725    case MO_UL | MO_BSWAP:
1726        /* swapped unsigned int load with upper bits zeroed */
1727        tcg_out_insn(s, RXY, LRV, data, h.base, h.index, h.disp);
1728        tcg_out_ext32u(s, data, data);
1729        break;
1730    case MO_UL:
1731        tcg_out_insn(s, RXY, LLGF, data, h.base, h.index, h.disp);
1732        break;
1733
1734    case MO_SL | MO_BSWAP:
1735        /* swapped sign-extended int load */
1736        tcg_out_insn(s, RXY, LRV, data, h.base, h.index, h.disp);
1737        tcg_out_ext32s(s, data, data);
1738        break;
1739    case MO_SL:
1740        tcg_out_insn(s, RXY, LGF, data, h.base, h.index, h.disp);
1741        break;
1742
1743    case MO_UQ | MO_BSWAP:
1744        tcg_out_insn(s, RXY, LRVG, data, h.base, h.index, h.disp);
1745        break;
1746    case MO_UQ:
1747        tcg_out_insn(s, RXY, LG, data, h.base, h.index, h.disp);
1748        break;
1749
1750    default:
1751        g_assert_not_reached();
1752    }
1753}
1754
1755static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg data,
1756                                   HostAddress h)
1757{
1758    switch (opc & (MO_SIZE | MO_BSWAP)) {
1759    case MO_UB:
1760        if (h.disp >= 0 && h.disp < 0x1000) {
1761            tcg_out_insn(s, RX, STC, data, h.base, h.index, h.disp);
1762        } else {
1763            tcg_out_insn(s, RXY, STCY, data, h.base, h.index, h.disp);
1764        }
1765        break;
1766
1767    case MO_UW | MO_BSWAP:
1768        tcg_out_insn(s, RXY, STRVH, data, h.base, h.index, h.disp);
1769        break;
1770    case MO_UW:
1771        if (h.disp >= 0 && h.disp < 0x1000) {
1772            tcg_out_insn(s, RX, STH, data, h.base, h.index, h.disp);
1773        } else {
1774            tcg_out_insn(s, RXY, STHY, data, h.base, h.index, h.disp);
1775        }
1776        break;
1777
1778    case MO_UL | MO_BSWAP:
1779        tcg_out_insn(s, RXY, STRV, data, h.base, h.index, h.disp);
1780        break;
1781    case MO_UL:
1782        if (h.disp >= 0 && h.disp < 0x1000) {
1783            tcg_out_insn(s, RX, ST, data, h.base, h.index, h.disp);
1784        } else {
1785            tcg_out_insn(s, RXY, STY, data, h.base, h.index, h.disp);
1786        }
1787        break;
1788
1789    case MO_UQ | MO_BSWAP:
1790        tcg_out_insn(s, RXY, STRVG, data, h.base, h.index, h.disp);
1791        break;
1792    case MO_UQ:
1793        tcg_out_insn(s, RXY, STG, data, h.base, h.index, h.disp);
1794        break;
1795
1796    default:
1797        g_assert_not_reached();
1798    }
1799}
1800
1801static const TCGLdstHelperParam ldst_helper_param = {
1802    .ntmp = 1, .tmp = { TCG_TMP0 }
1803};
1804
1805static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1806{
1807    MemOp opc = get_memop(lb->oi);
1808
1809    if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL,
1810                     (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
1811        return false;
1812    }
1813
1814    tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
1815    tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
1816    tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
1817
1818    tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
1819    return true;
1820}
1821
1822static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1823{
1824    MemOp opc = get_memop(lb->oi);
1825
1826    if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL,
1827                     (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
1828        return false;
1829    }
1830
1831    tcg_out_st_helper_args(s, lb, &ldst_helper_param);
1832    tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]);
1833
1834    tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
1835    return true;
1836}
1837
1838/* We're expecting to use a 20-bit negative offset on the tlb memory ops.  */
1839#define MIN_TLB_MASK_TABLE_OFS  -(1 << 19)
1840
1841/*
1842 * For system-mode, perform the TLB load and compare.
1843 * For user-mode, perform any required alignment tests.
1844 * In both cases, return a TCGLabelQemuLdst structure if the slow path
1845 * is required and fill in @h with the host address for the fast path.
1846 */
1847static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
1848                                           TCGReg addr_reg, MemOpIdx oi,
1849                                           bool is_ld)
1850{
1851    TCGType addr_type = s->addr_type;
1852    TCGLabelQemuLdst *ldst = NULL;
1853    MemOp opc = get_memop(oi);
1854    MemOp s_bits = opc & MO_SIZE;
1855    unsigned a_mask;
1856
1857    h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, s_bits == MO_128);
1858    a_mask = (1 << h->aa.align) - 1;
1859
1860    if (tcg_use_softmmu) {
1861        unsigned s_mask = (1 << s_bits) - 1;
1862        int mem_index = get_mmuidx(oi);
1863        int fast_off = tlb_mask_table_ofs(s, mem_index);
1864        int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
1865        int table_off = fast_off + offsetof(CPUTLBDescFast, table);
1866        int ofs, a_off;
1867        uint64_t tlb_mask;
1868
1869        ldst = new_ldst_label(s);
1870        ldst->is_ld = is_ld;
1871        ldst->oi = oi;
1872        ldst->addrlo_reg = addr_reg;
1873
1874        tcg_out_sh64(s, RSY_SRLG, TCG_TMP0, addr_reg, TCG_REG_NONE,
1875                     s->page_bits - CPU_TLB_ENTRY_BITS);
1876
1877        tcg_out_insn(s, RXY, NG, TCG_TMP0, TCG_AREG0, TCG_REG_NONE, mask_off);
1878        tcg_out_insn(s, RXY, AG, TCG_TMP0, TCG_AREG0, TCG_REG_NONE, table_off);
1879
1880        /*
1881         * For aligned accesses, we check the first byte and include the
1882         * alignment bits within the address.  For unaligned access, we
1883         * check that we don't cross pages using the address of the last
1884         * byte of the access.
1885         */
1886        a_off = (a_mask >= s_mask ? 0 : s_mask - a_mask);
1887        tlb_mask = (uint64_t)s->page_mask | a_mask;
1888        if (a_off == 0) {
1889            tgen_andi_risbg(s, TCG_REG_R0, addr_reg, tlb_mask);
1890        } else {
1891            tcg_out_insn(s, RX, LA, TCG_REG_R0, addr_reg, TCG_REG_NONE, a_off);
1892            tgen_andi(s, addr_type, TCG_REG_R0, tlb_mask);
1893        }
1894
1895        if (is_ld) {
1896            ofs = offsetof(CPUTLBEntry, addr_read);
1897        } else {
1898            ofs = offsetof(CPUTLBEntry, addr_write);
1899        }
1900        if (addr_type == TCG_TYPE_I32) {
1901            ofs += HOST_BIG_ENDIAN * 4;
1902            tcg_out_insn(s, RX, C, TCG_REG_R0, TCG_TMP0, TCG_REG_NONE, ofs);
1903        } else {
1904            tcg_out_insn(s, RXY, CG, TCG_REG_R0, TCG_TMP0, TCG_REG_NONE, ofs);
1905        }
1906
1907        tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
1908        ldst->label_ptr[0] = s->code_ptr++;
1909
1910        h->index = TCG_TMP0;
1911        tcg_out_insn(s, RXY, LG, h->index, TCG_TMP0, TCG_REG_NONE,
1912                     offsetof(CPUTLBEntry, addend));
1913
1914        if (addr_type == TCG_TYPE_I32) {
1915            tcg_out_insn(s, RRE, ALGFR, h->index, addr_reg);
1916            h->base = TCG_REG_NONE;
1917        } else {
1918            h->base = addr_reg;
1919        }
1920        h->disp = 0;
1921    } else {
1922        if (a_mask) {
1923            ldst = new_ldst_label(s);
1924            ldst->is_ld = is_ld;
1925            ldst->oi = oi;
1926            ldst->addrlo_reg = addr_reg;
1927
1928            tcg_debug_assert(a_mask <= 0xffff);
1929            tcg_out_insn(s, RI, TMLL, addr_reg, a_mask);
1930
1931            tcg_out16(s, RI_BRC | (S390_TM_NE << 4));
1932            ldst->label_ptr[0] = s->code_ptr++;
1933        }
1934
1935        h->base = addr_reg;
1936        if (addr_type == TCG_TYPE_I32) {
1937            tcg_out_ext32u(s, TCG_TMP0, addr_reg);
1938            h->base = TCG_TMP0;
1939        }
1940        if (guest_base < 0x80000) {
1941            h->index = TCG_REG_NONE;
1942            h->disp = guest_base;
1943        } else {
1944            h->index = TCG_GUEST_BASE_REG;
1945            h->disp = 0;
1946        }
1947    }
1948
1949    return ldst;
1950}
1951
1952static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
1953                            MemOpIdx oi, TCGType data_type)
1954{
1955    TCGLabelQemuLdst *ldst;
1956    HostAddress h;
1957
1958    ldst = prepare_host_addr(s, &h, addr_reg, oi, true);
1959    tcg_out_qemu_ld_direct(s, get_memop(oi), data_reg, h);
1960
1961    if (ldst) {
1962        ldst->type = data_type;
1963        ldst->datalo_reg = data_reg;
1964        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1965    }
1966}
1967
1968static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
1969                            MemOpIdx oi, TCGType data_type)
1970{
1971    TCGLabelQemuLdst *ldst;
1972    HostAddress h;
1973
1974    ldst = prepare_host_addr(s, &h, addr_reg, oi, false);
1975    tcg_out_qemu_st_direct(s, get_memop(oi), data_reg, h);
1976
1977    if (ldst) {
1978        ldst->type = data_type;
1979        ldst->datalo_reg = data_reg;
1980        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1981    }
1982}
1983
1984static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
1985                                   TCGReg addr_reg, MemOpIdx oi, bool is_ld)
1986{
1987    TCGLabel *l1 = NULL, *l2 = NULL;
1988    TCGLabelQemuLdst *ldst;
1989    HostAddress h;
1990    bool need_bswap;
1991    bool use_pair;
1992    S390Opcode insn;
1993
1994    ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld);
1995
1996    use_pair = h.aa.atom < MO_128;
1997    need_bswap = get_memop(oi) & MO_BSWAP;
1998
1999    if (!use_pair) {
2000        /*
2001         * Atomicity requires we use LPQ.  If we've already checked for
2002         * 16-byte alignment, that's all we need.  If we arrive with
2003         * lesser alignment, we have determined that less than 16-byte
2004         * alignment can be satisfied with two 8-byte loads.
2005         */
2006        if (h.aa.align < MO_128) {
2007            use_pair = true;
2008            l1 = gen_new_label();
2009            l2 = gen_new_label();
2010
2011            tcg_out_insn(s, RI, TMLL, addr_reg, 15);
2012            tgen_branch(s, S390_TM_NE, l1);
2013        }
2014
2015        tcg_debug_assert(!need_bswap);
2016        tcg_debug_assert(datalo & 1);
2017        tcg_debug_assert(datahi == datalo - 1);
2018        insn = is_ld ? RXY_LPQ : RXY_STPQ;
2019        tcg_out_insn_RXY(s, insn, datahi, h.base, h.index, h.disp);
2020
2021        if (use_pair) {
2022            tgen_branch(s, S390_CC_ALWAYS, l2);
2023            tcg_out_label(s, l1);
2024        }
2025    }
2026    if (use_pair) {
2027        TCGReg d1, d2;
2028
2029        if (need_bswap) {
2030            d1 = datalo, d2 = datahi;
2031            insn = is_ld ? RXY_LRVG : RXY_STRVG;
2032        } else {
2033            d1 = datahi, d2 = datalo;
2034            insn = is_ld ? RXY_LG : RXY_STG;
2035        }
2036
2037        if (h.base == d1 || h.index == d1) {
2038            tcg_out_insn(s, RXY, LAY, TCG_TMP0, h.base, h.index, h.disp);
2039            h.base = TCG_TMP0;
2040            h.index = TCG_REG_NONE;
2041            h.disp = 0;
2042        }
2043        tcg_out_insn_RXY(s, insn, d1, h.base, h.index, h.disp);
2044        tcg_out_insn_RXY(s, insn, d2, h.base, h.index, h.disp + 8);
2045    }
2046    if (l2) {
2047        tcg_out_label(s, l2);
2048    }
2049
2050    if (ldst) {
2051        ldst->type = TCG_TYPE_I128;
2052        ldst->datalo_reg = datalo;
2053        ldst->datahi_reg = datahi;
2054        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
2055    }
2056}
2057
2058static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
2059{
2060    /* Reuse the zeroing that exists for goto_ptr.  */
2061    if (a0 == 0) {
2062        tgen_gotoi(s, S390_CC_ALWAYS, tcg_code_gen_epilogue);
2063    } else {
2064        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, a0);
2065        tgen_gotoi(s, S390_CC_ALWAYS, tb_ret_addr);
2066    }
2067}
2068
2069static void tcg_out_goto_tb(TCGContext *s, int which)
2070{
2071    /*
2072     * Branch displacement must be aligned for atomic patching;
2073     * see if we need to add extra nop before branch
2074     */
2075    if (!QEMU_PTR_IS_ALIGNED(s->code_ptr + 1, 4)) {
2076        tcg_out16(s, NOP);
2077    }
2078    tcg_out16(s, RIL_BRCL | (S390_CC_ALWAYS << 4));
2079    set_jmp_insn_offset(s, which);
2080    s->code_ptr += 2;
2081    set_jmp_reset_offset(s, which);
2082}
2083
2084void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
2085                              uintptr_t jmp_rx, uintptr_t jmp_rw)
2086{
2087    if (!HAVE_FACILITY(GEN_INST_EXT)) {
2088        return;
2089    }
2090    /* patch the branch destination */
2091    uintptr_t addr = tb->jmp_target_addr[n];
2092    intptr_t disp = addr - (jmp_rx - 2);
2093    qatomic_set((int32_t *)jmp_rw, disp / 2);
2094    /* no need to flush icache explicitly */
2095}
2096
2097# define OP_32_64(x) \
2098        case glue(glue(INDEX_op_,x),_i32): \
2099        case glue(glue(INDEX_op_,x),_i64)
2100
2101static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
2102                              const TCGArg args[TCG_MAX_OP_ARGS],
2103                              const int const_args[TCG_MAX_OP_ARGS])
2104{
2105    S390Opcode op, op2;
2106    TCGArg a0, a1, a2;
2107
2108    switch (opc) {
2109    case INDEX_op_goto_ptr:
2110        a0 = args[0];
2111        tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, a0);
2112        break;
2113
2114    OP_32_64(ld8u):
2115        /* ??? LLC (RXY format) is only present with the extended-immediate
2116           facility, whereas LLGC is always present.  */
2117        tcg_out_mem(s, 0, RXY_LLGC, args[0], args[1], TCG_REG_NONE, args[2]);
2118        break;
2119
2120    OP_32_64(ld8s):
2121        /* ??? LB is no smaller than LGB, so no point to using it.  */
2122        tcg_out_mem(s, 0, RXY_LGB, args[0], args[1], TCG_REG_NONE, args[2]);
2123        break;
2124
2125    OP_32_64(ld16u):
2126        /* ??? LLH (RXY format) is only present with the extended-immediate
2127           facility, whereas LLGH is always present.  */
2128        tcg_out_mem(s, 0, RXY_LLGH, args[0], args[1], TCG_REG_NONE, args[2]);
2129        break;
2130
2131    case INDEX_op_ld16s_i32:
2132        tcg_out_mem(s, RX_LH, RXY_LHY, args[0], args[1], TCG_REG_NONE, args[2]);
2133        break;
2134
2135    case INDEX_op_ld_i32:
2136        tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
2137        break;
2138
2139    OP_32_64(st8):
2140        tcg_out_mem(s, RX_STC, RXY_STCY, args[0], args[1],
2141                    TCG_REG_NONE, args[2]);
2142        break;
2143
2144    OP_32_64(st16):
2145        tcg_out_mem(s, RX_STH, RXY_STHY, args[0], args[1],
2146                    TCG_REG_NONE, args[2]);
2147        break;
2148
2149    case INDEX_op_st_i32:
2150        tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
2151        break;
2152
2153    case INDEX_op_add_i32:
2154        a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
2155        if (const_args[2]) {
2156        do_addi_32:
2157            if (a0 == a1) {
2158                if (a2 == (int16_t)a2) {
2159                    tcg_out_insn(s, RI, AHI, a0, a2);
2160                    break;
2161                }
2162                tcg_out_insn(s, RIL, AFI, a0, a2);
2163                break;
2164            }
2165            tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2);
2166        } else if (a0 == a1) {
2167            tcg_out_insn(s, RR, AR, a0, a2);
2168        } else {
2169            tcg_out_insn(s, RX, LA, a0, a1, a2, 0);
2170        }
2171        break;
2172    case INDEX_op_sub_i32:
2173        a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
2174        if (const_args[2]) {
2175            a2 = -a2;
2176            goto do_addi_32;
2177        } else if (a0 == a1) {
2178            tcg_out_insn(s, RR, SR, a0, a2);
2179        } else {
2180            tcg_out_insn(s, RRFa, SRK, a0, a1, a2);
2181        }
2182        break;
2183
2184    case INDEX_op_and_i32:
2185        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2186        if (const_args[2]) {
2187            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2188            tgen_andi(s, TCG_TYPE_I32, a0, a2);
2189        } else if (a0 == a1) {
2190            tcg_out_insn(s, RR, NR, a0, a2);
2191        } else {
2192            tcg_out_insn(s, RRFa, NRK, a0, a1, a2);
2193        }
2194        break;
2195    case INDEX_op_or_i32:
2196        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2197        if (const_args[2]) {
2198            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2199            tgen_ori(s, a0, a2);
2200        } else if (a0 == a1) {
2201            tcg_out_insn(s, RR, OR, a0, a2);
2202        } else {
2203            tcg_out_insn(s, RRFa, ORK, a0, a1, a2);
2204        }
2205        break;
2206    case INDEX_op_xor_i32:
2207        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2208        if (const_args[2]) {
2209            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2210            tcg_out_insn(s, RIL, XILF, a0, a2);
2211        } else if (a0 == a1) {
2212            tcg_out_insn(s, RR, XR, args[0], args[2]);
2213        } else {
2214            tcg_out_insn(s, RRFa, XRK, a0, a1, a2);
2215        }
2216        break;
2217
2218    case INDEX_op_andc_i32:
2219        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2220        if (const_args[2]) {
2221            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2222            tgen_andi(s, TCG_TYPE_I32, a0, (uint32_t)~a2);
2223	} else {
2224            tcg_out_insn(s, RRFa, NCRK, a0, a1, a2);
2225	}
2226        break;
2227    case INDEX_op_orc_i32:
2228        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2229        if (const_args[2]) {
2230            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2231            tgen_ori(s, a0, (uint32_t)~a2);
2232        } else {
2233            tcg_out_insn(s, RRFa, OCRK, a0, a1, a2);
2234        }
2235        break;
2236    case INDEX_op_eqv_i32:
2237        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2238        if (const_args[2]) {
2239            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2240            tcg_out_insn(s, RIL, XILF, a0, ~a2);
2241        } else {
2242            tcg_out_insn(s, RRFa, NXRK, a0, a1, a2);
2243        }
2244        break;
2245    case INDEX_op_nand_i32:
2246        tcg_out_insn(s, RRFa, NNRK, args[0], args[1], args[2]);
2247        break;
2248    case INDEX_op_nor_i32:
2249        tcg_out_insn(s, RRFa, NORK, args[0], args[1], args[2]);
2250        break;
2251
2252    case INDEX_op_neg_i32:
2253        tcg_out_insn(s, RR, LCR, args[0], args[1]);
2254        break;
2255    case INDEX_op_not_i32:
2256        tcg_out_insn(s, RRFa, NORK, args[0], args[1], args[1]);
2257        break;
2258
2259    case INDEX_op_mul_i32:
2260        a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
2261        if (const_args[2]) {
2262            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2263            if (a2 == (int16_t)a2) {
2264                tcg_out_insn(s, RI, MHI, a0, a2);
2265            } else {
2266                tcg_out_insn(s, RIL, MSFI, a0, a2);
2267            }
2268        } else if (a0 == a1) {
2269            tcg_out_insn(s, RRE, MSR, a0, a2);
2270        } else {
2271            tcg_out_insn(s, RRFa, MSRKC, a0, a1, a2);
2272        }
2273        break;
2274
2275    case INDEX_op_div2_i32:
2276        tcg_debug_assert(args[0] == args[2]);
2277        tcg_debug_assert(args[1] == args[3]);
2278        tcg_debug_assert((args[1] & 1) == 0);
2279        tcg_debug_assert(args[0] == args[1] + 1);
2280        tcg_out_insn(s, RR, DR, args[1], args[4]);
2281        break;
2282    case INDEX_op_divu2_i32:
2283        tcg_debug_assert(args[0] == args[2]);
2284        tcg_debug_assert(args[1] == args[3]);
2285        tcg_debug_assert((args[1] & 1) == 0);
2286        tcg_debug_assert(args[0] == args[1] + 1);
2287        tcg_out_insn(s, RRE, DLR, args[1], args[4]);
2288        break;
2289
2290    case INDEX_op_shl_i32:
2291        op = RS_SLL;
2292        op2 = RSY_SLLK;
2293    do_shift32:
2294        a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
2295        if (a0 == a1) {
2296            if (const_args[2]) {
2297                tcg_out_sh32(s, op, a0, TCG_REG_NONE, a2);
2298            } else {
2299                tcg_out_sh32(s, op, a0, a2, 0);
2300            }
2301        } else {
2302            /* Using tcg_out_sh64 here for the format; it is a 32-bit shift.  */
2303            if (const_args[2]) {
2304                tcg_out_sh64(s, op2, a0, a1, TCG_REG_NONE, a2);
2305            } else {
2306                tcg_out_sh64(s, op2, a0, a1, a2, 0);
2307            }
2308        }
2309        break;
2310    case INDEX_op_shr_i32:
2311        op = RS_SRL;
2312        op2 = RSY_SRLK;
2313        goto do_shift32;
2314    case INDEX_op_sar_i32:
2315        op = RS_SRA;
2316        op2 = RSY_SRAK;
2317        goto do_shift32;
2318
2319    case INDEX_op_rotl_i32:
2320        /* ??? Using tcg_out_sh64 here for the format; it is a 32-bit rol.  */
2321        if (const_args[2]) {
2322            tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_REG_NONE, args[2]);
2323        } else {
2324            tcg_out_sh64(s, RSY_RLL, args[0], args[1], args[2], 0);
2325        }
2326        break;
2327    case INDEX_op_rotr_i32:
2328        if (const_args[2]) {
2329            tcg_out_sh64(s, RSY_RLL, args[0], args[1],
2330                         TCG_REG_NONE, (32 - args[2]) & 31);
2331        } else {
2332            tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]);
2333            tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_TMP0, 0);
2334        }
2335        break;
2336
2337    case INDEX_op_bswap16_i32:
2338        a0 = args[0], a1 = args[1], a2 = args[2];
2339        tcg_out_insn(s, RRE, LRVR, a0, a1);
2340        if (a2 & TCG_BSWAP_OS) {
2341            tcg_out_sh32(s, RS_SRA, a0, TCG_REG_NONE, 16);
2342        } else {
2343            tcg_out_sh32(s, RS_SRL, a0, TCG_REG_NONE, 16);
2344        }
2345        break;
2346    case INDEX_op_bswap16_i64:
2347        a0 = args[0], a1 = args[1], a2 = args[2];
2348        tcg_out_insn(s, RRE, LRVGR, a0, a1);
2349        if (a2 & TCG_BSWAP_OS) {
2350            tcg_out_sh64(s, RSY_SRAG, a0, a0, TCG_REG_NONE, 48);
2351        } else {
2352            tcg_out_sh64(s, RSY_SRLG, a0, a0, TCG_REG_NONE, 48);
2353        }
2354        break;
2355
2356    case INDEX_op_bswap32_i32:
2357        tcg_out_insn(s, RRE, LRVR, args[0], args[1]);
2358        break;
2359    case INDEX_op_bswap32_i64:
2360        a0 = args[0], a1 = args[1], a2 = args[2];
2361        tcg_out_insn(s, RRE, LRVR, a0, a1);
2362        if (a2 & TCG_BSWAP_OS) {
2363            tcg_out_ext32s(s, a0, a0);
2364        } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
2365            tcg_out_ext32u(s, a0, a0);
2366        }
2367        break;
2368
2369    case INDEX_op_add2_i32:
2370        if (const_args[4]) {
2371            tcg_out_insn(s, RIL, ALFI, args[0], args[4]);
2372        } else {
2373            tcg_out_insn(s, RR, ALR, args[0], args[4]);
2374        }
2375        tcg_out_insn(s, RRE, ALCR, args[1], args[5]);
2376        break;
2377    case INDEX_op_sub2_i32:
2378        if (const_args[4]) {
2379            tcg_out_insn(s, RIL, SLFI, args[0], args[4]);
2380        } else {
2381            tcg_out_insn(s, RR, SLR, args[0], args[4]);
2382        }
2383        tcg_out_insn(s, RRE, SLBR, args[1], args[5]);
2384        break;
2385
2386    case INDEX_op_br:
2387        tgen_branch(s, S390_CC_ALWAYS, arg_label(args[0]));
2388        break;
2389
2390    case INDEX_op_brcond_i32:
2391        tgen_brcond(s, TCG_TYPE_I32, args[2], args[0],
2392                    args[1], const_args[1], arg_label(args[3]));
2393        break;
2394    case INDEX_op_setcond_i32:
2395        tgen_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1],
2396                     args[2], const_args[2], false);
2397        break;
2398    case INDEX_op_negsetcond_i32:
2399        tgen_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1],
2400                     args[2], const_args[2], true);
2401        break;
2402    case INDEX_op_movcond_i32:
2403        tgen_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1],
2404                     args[2], const_args[2], args[3], const_args[3], args[4]);
2405        break;
2406
2407    case INDEX_op_qemu_ld_a32_i32:
2408    case INDEX_op_qemu_ld_a64_i32:
2409        tcg_out_qemu_ld(s, args[0], args[1], args[2], TCG_TYPE_I32);
2410        break;
2411    case INDEX_op_qemu_ld_a32_i64:
2412    case INDEX_op_qemu_ld_a64_i64:
2413        tcg_out_qemu_ld(s, args[0], args[1], args[2], TCG_TYPE_I64);
2414        break;
2415    case INDEX_op_qemu_st_a32_i32:
2416    case INDEX_op_qemu_st_a64_i32:
2417        tcg_out_qemu_st(s, args[0], args[1], args[2], TCG_TYPE_I32);
2418        break;
2419    case INDEX_op_qemu_st_a32_i64:
2420    case INDEX_op_qemu_st_a64_i64:
2421        tcg_out_qemu_st(s, args[0], args[1], args[2], TCG_TYPE_I64);
2422        break;
2423    case INDEX_op_qemu_ld_a32_i128:
2424    case INDEX_op_qemu_ld_a64_i128:
2425        tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true);
2426        break;
2427    case INDEX_op_qemu_st_a32_i128:
2428    case INDEX_op_qemu_st_a64_i128:
2429        tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false);
2430        break;
2431
2432    case INDEX_op_ld16s_i64:
2433        tcg_out_mem(s, 0, RXY_LGH, args[0], args[1], TCG_REG_NONE, args[2]);
2434        break;
2435    case INDEX_op_ld32u_i64:
2436        tcg_out_mem(s, 0, RXY_LLGF, args[0], args[1], TCG_REG_NONE, args[2]);
2437        break;
2438    case INDEX_op_ld32s_i64:
2439        tcg_out_mem(s, 0, RXY_LGF, args[0], args[1], TCG_REG_NONE, args[2]);
2440        break;
2441    case INDEX_op_ld_i64:
2442        tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
2443        break;
2444
2445    case INDEX_op_st32_i64:
2446        tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
2447        break;
2448    case INDEX_op_st_i64:
2449        tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
2450        break;
2451
2452    case INDEX_op_add_i64:
2453        a0 = args[0], a1 = args[1], a2 = args[2];
2454        if (const_args[2]) {
2455        do_addi_64:
2456            if (a0 == a1) {
2457                if (a2 == (int16_t)a2) {
2458                    tcg_out_insn(s, RI, AGHI, a0, a2);
2459                    break;
2460                }
2461                if (a2 == (int32_t)a2) {
2462                    tcg_out_insn(s, RIL, AGFI, a0, a2);
2463                    break;
2464                }
2465                if (a2 == (uint32_t)a2) {
2466                    tcg_out_insn(s, RIL, ALGFI, a0, a2);
2467                    break;
2468                }
2469                if (-a2 == (uint32_t)-a2) {
2470                    tcg_out_insn(s, RIL, SLGFI, a0, -a2);
2471                    break;
2472                }
2473            }
2474            tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2);
2475        } else if (a0 == a1) {
2476            tcg_out_insn(s, RRE, AGR, a0, a2);
2477        } else {
2478            tcg_out_insn(s, RX, LA, a0, a1, a2, 0);
2479        }
2480        break;
2481    case INDEX_op_sub_i64:
2482        a0 = args[0], a1 = args[1], a2 = args[2];
2483        if (const_args[2]) {
2484            a2 = -a2;
2485            goto do_addi_64;
2486        } else {
2487            tcg_out_insn(s, RRFa, SGRK, a0, a1, a2);
2488        }
2489        break;
2490
2491    case INDEX_op_and_i64:
2492        a0 = args[0], a1 = args[1], a2 = args[2];
2493        if (const_args[2]) {
2494            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2495            tgen_andi(s, TCG_TYPE_I64, args[0], args[2]);
2496        } else {
2497            tcg_out_insn(s, RRFa, NGRK, a0, a1, a2);
2498        }
2499        break;
2500    case INDEX_op_or_i64:
2501        a0 = args[0], a1 = args[1], a2 = args[2];
2502        if (const_args[2]) {
2503            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2504            tgen_ori(s, a0, a2);
2505        } else {
2506            tcg_out_insn(s, RRFa, OGRK, a0, a1, a2);
2507        }
2508        break;
2509    case INDEX_op_xor_i64:
2510        a0 = args[0], a1 = args[1], a2 = args[2];
2511        if (const_args[2]) {
2512            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2513            tgen_xori(s, a0, a2);
2514        } else {
2515            tcg_out_insn(s, RRFa, XGRK, a0, a1, a2);
2516        }
2517        break;
2518
2519    case INDEX_op_andc_i64:
2520        a0 = args[0], a1 = args[1], a2 = args[2];
2521        if (const_args[2]) {
2522            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2523            tgen_andi(s, TCG_TYPE_I64, a0, ~a2);
2524        } else {
2525            tcg_out_insn(s, RRFa, NCGRK, a0, a1, a2);
2526        }
2527        break;
2528    case INDEX_op_orc_i64:
2529        a0 = args[0], a1 = args[1], a2 = args[2];
2530        if (const_args[2]) {
2531            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2532            tgen_ori(s, a0, ~a2);
2533        } else {
2534            tcg_out_insn(s, RRFa, OCGRK, a0, a1, a2);
2535        }
2536        break;
2537    case INDEX_op_eqv_i64:
2538        a0 = args[0], a1 = args[1], a2 = args[2];
2539        if (const_args[2]) {
2540            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2541            tgen_xori(s, a0, ~a2);
2542        } else {
2543            tcg_out_insn(s, RRFa, NXGRK, a0, a1, a2);
2544        }
2545        break;
2546    case INDEX_op_nand_i64:
2547        tcg_out_insn(s, RRFa, NNGRK, args[0], args[1], args[2]);
2548        break;
2549    case INDEX_op_nor_i64:
2550        tcg_out_insn(s, RRFa, NOGRK, args[0], args[1], args[2]);
2551        break;
2552
2553    case INDEX_op_neg_i64:
2554        tcg_out_insn(s, RRE, LCGR, args[0], args[1]);
2555        break;
2556    case INDEX_op_not_i64:
2557        tcg_out_insn(s, RRFa, NOGRK, args[0], args[1], args[1]);
2558        break;
2559    case INDEX_op_bswap64_i64:
2560        tcg_out_insn(s, RRE, LRVGR, args[0], args[1]);
2561        break;
2562
2563    case INDEX_op_mul_i64:
2564        a0 = args[0], a1 = args[1], a2 = args[2];
2565        if (const_args[2]) {
2566            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2567            if (a2 == (int16_t)a2) {
2568                tcg_out_insn(s, RI, MGHI, a0, a2);
2569            } else {
2570                tcg_out_insn(s, RIL, MSGFI, a0, a2);
2571            }
2572        } else if (a0 == a1) {
2573            tcg_out_insn(s, RRE, MSGR, a0, a2);
2574        } else {
2575            tcg_out_insn(s, RRFa, MSGRKC, a0, a1, a2);
2576        }
2577        break;
2578
2579    case INDEX_op_div2_i64:
2580        /*
2581         * ??? We get an unnecessary sign-extension of the dividend
2582         * into op0 with this definition, but as we do in fact always
2583         * produce both quotient and remainder using INDEX_op_div_i64
2584         * instead requires jumping through even more hoops.
2585         */
2586        tcg_debug_assert(args[0] == args[2]);
2587        tcg_debug_assert(args[1] == args[3]);
2588        tcg_debug_assert((args[1] & 1) == 0);
2589        tcg_debug_assert(args[0] == args[1] + 1);
2590        tcg_out_insn(s, RRE, DSGR, args[1], args[4]);
2591        break;
2592    case INDEX_op_divu2_i64:
2593        tcg_debug_assert(args[0] == args[2]);
2594        tcg_debug_assert(args[1] == args[3]);
2595        tcg_debug_assert((args[1] & 1) == 0);
2596        tcg_debug_assert(args[0] == args[1] + 1);
2597        tcg_out_insn(s, RRE, DLGR, args[1], args[4]);
2598        break;
2599    case INDEX_op_mulu2_i64:
2600        tcg_debug_assert(args[0] == args[2]);
2601        tcg_debug_assert((args[1] & 1) == 0);
2602        tcg_debug_assert(args[0] == args[1] + 1);
2603        tcg_out_insn(s, RRE, MLGR, args[1], args[3]);
2604        break;
2605    case INDEX_op_muls2_i64:
2606        tcg_debug_assert((args[1] & 1) == 0);
2607        tcg_debug_assert(args[0] == args[1] + 1);
2608        tcg_out_insn(s, RRFa, MGRK, args[1], args[2], args[3]);
2609        break;
2610
2611    case INDEX_op_shl_i64:
2612        op = RSY_SLLG;
2613    do_shift64:
2614        if (const_args[2]) {
2615            tcg_out_sh64(s, op, args[0], args[1], TCG_REG_NONE, args[2]);
2616        } else {
2617            tcg_out_sh64(s, op, args[0], args[1], args[2], 0);
2618        }
2619        break;
2620    case INDEX_op_shr_i64:
2621        op = RSY_SRLG;
2622        goto do_shift64;
2623    case INDEX_op_sar_i64:
2624        op = RSY_SRAG;
2625        goto do_shift64;
2626
2627    case INDEX_op_rotl_i64:
2628        if (const_args[2]) {
2629            tcg_out_sh64(s, RSY_RLLG, args[0], args[1],
2630                         TCG_REG_NONE, args[2]);
2631        } else {
2632            tcg_out_sh64(s, RSY_RLLG, args[0], args[1], args[2], 0);
2633        }
2634        break;
2635    case INDEX_op_rotr_i64:
2636        if (const_args[2]) {
2637            tcg_out_sh64(s, RSY_RLLG, args[0], args[1],
2638                         TCG_REG_NONE, (64 - args[2]) & 63);
2639        } else {
2640            /* We can use the smaller 32-bit negate because only the
2641               low 6 bits are examined for the rotate.  */
2642            tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]);
2643            tcg_out_sh64(s, RSY_RLLG, args[0], args[1], TCG_TMP0, 0);
2644        }
2645        break;
2646
2647    case INDEX_op_add2_i64:
2648        if (const_args[4]) {
2649            if ((int64_t)args[4] >= 0) {
2650                tcg_out_insn(s, RIL, ALGFI, args[0], args[4]);
2651            } else {
2652                tcg_out_insn(s, RIL, SLGFI, args[0], -args[4]);
2653            }
2654        } else {
2655            tcg_out_insn(s, RRE, ALGR, args[0], args[4]);
2656        }
2657        tcg_out_insn(s, RRE, ALCGR, args[1], args[5]);
2658        break;
2659    case INDEX_op_sub2_i64:
2660        if (const_args[4]) {
2661            if ((int64_t)args[4] >= 0) {
2662                tcg_out_insn(s, RIL, SLGFI, args[0], args[4]);
2663            } else {
2664                tcg_out_insn(s, RIL, ALGFI, args[0], -args[4]);
2665            }
2666        } else {
2667            tcg_out_insn(s, RRE, SLGR, args[0], args[4]);
2668        }
2669        tcg_out_insn(s, RRE, SLBGR, args[1], args[5]);
2670        break;
2671
2672    case INDEX_op_brcond_i64:
2673        tgen_brcond(s, TCG_TYPE_I64, args[2], args[0],
2674                    args[1], const_args[1], arg_label(args[3]));
2675        break;
2676    case INDEX_op_setcond_i64:
2677        tgen_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1],
2678                     args[2], const_args[2], false);
2679        break;
2680    case INDEX_op_negsetcond_i64:
2681        tgen_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1],
2682                     args[2], const_args[2], true);
2683        break;
2684    case INDEX_op_movcond_i64:
2685        tgen_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1],
2686                     args[2], const_args[2], args[3], const_args[3], args[4]);
2687        break;
2688
2689    OP_32_64(deposit):
2690        a0 = args[0], a1 = args[1], a2 = args[2];
2691        if (const_args[1]) {
2692            tgen_deposit(s, a0, a2, args[3], args[4], 1);
2693        } else {
2694            /* Since we can't support "0Z" as a constraint, we allow a1 in
2695               any register.  Fix things up as if a matching constraint.  */
2696            if (a0 != a1) {
2697                TCGType type = (opc == INDEX_op_deposit_i64);
2698                if (a0 == a2) {
2699                    tcg_out_mov(s, type, TCG_TMP0, a2);
2700                    a2 = TCG_TMP0;
2701                }
2702                tcg_out_mov(s, type, a0, a1);
2703            }
2704            tgen_deposit(s, a0, a2, args[3], args[4], 0);
2705        }
2706        break;
2707
2708    OP_32_64(extract):
2709        tgen_extract(s, args[0], args[1], args[2], args[3]);
2710        break;
2711
2712    case INDEX_op_clz_i64:
2713        tgen_clz(s, args[0], args[1], args[2], const_args[2]);
2714        break;
2715
2716    case INDEX_op_ctpop_i32:
2717        tgen_ctpop(s, TCG_TYPE_I32, args[0], args[1]);
2718        break;
2719    case INDEX_op_ctpop_i64:
2720        tgen_ctpop(s, TCG_TYPE_I64, args[0], args[1]);
2721        break;
2722
2723    case INDEX_op_mb:
2724        /* The host memory model is quite strong, we simply need to
2725           serialize the instruction stream.  */
2726        if (args[0] & TCG_MO_ST_LD) {
2727            /* fast-bcr-serialization facility (45) is present */
2728            tcg_out_insn(s, RR, BCR, 14, 0);
2729        }
2730        break;
2731
2732    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2733    case INDEX_op_mov_i64:
2734    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2735    case INDEX_op_exit_tb:  /* Always emitted via tcg_out_exit_tb.  */
2736    case INDEX_op_goto_tb:  /* Always emitted via tcg_out_goto_tb.  */
2737    case INDEX_op_ext8s_i32:  /* Always emitted via tcg_reg_alloc_op.  */
2738    case INDEX_op_ext8s_i64:
2739    case INDEX_op_ext8u_i32:
2740    case INDEX_op_ext8u_i64:
2741    case INDEX_op_ext16s_i32:
2742    case INDEX_op_ext16s_i64:
2743    case INDEX_op_ext16u_i32:
2744    case INDEX_op_ext16u_i64:
2745    case INDEX_op_ext32s_i64:
2746    case INDEX_op_ext32u_i64:
2747    case INDEX_op_ext_i32_i64:
2748    case INDEX_op_extu_i32_i64:
2749    case INDEX_op_extrl_i64_i32:
2750    default:
2751        g_assert_not_reached();
2752    }
2753}
2754
2755static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
2756                            TCGReg dst, TCGReg src)
2757{
2758    if (is_general_reg(src)) {
2759        /* Replicate general register into two MO_64. */
2760        tcg_out_insn(s, VRRf, VLVGP, dst, src, src);
2761        if (vece == MO_64) {
2762            return true;
2763        }
2764        src = dst;
2765    }
2766
2767    /*
2768     * Recall that the "standard" integer, within a vector, is the
2769     * rightmost element of the leftmost doubleword, a-la VLLEZ.
2770     */
2771    tcg_out_insn(s, VRIc, VREP, dst, (8 >> vece) - 1, src, vece);
2772    return true;
2773}
2774
2775static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
2776                             TCGReg dst, TCGReg base, intptr_t offset)
2777{
2778    tcg_out_vrx_mem(s, VRX_VLREP, dst, base, TCG_REG_NONE, offset, vece);
2779    return true;
2780}
2781
2782static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
2783                             TCGReg dst, int64_t val)
2784{
2785    int i, mask, msb, lsb;
2786
2787    /* Look for int16_t elements.  */
2788    if (vece <= MO_16 ||
2789        (vece == MO_32 ? (int32_t)val : val) == (int16_t)val) {
2790        tcg_out_insn(s, VRIa, VREPI, dst, val, vece);
2791        return;
2792    }
2793
2794    /* Look for bit masks.  */
2795    if (vece == MO_32) {
2796        if (risbg_mask((int32_t)val)) {
2797            /* Handle wraparound by swapping msb and lsb.  */
2798            if ((val & 0x80000001u) == 0x80000001u) {
2799                msb = 32 - ctz32(~val);
2800                lsb = clz32(~val) - 1;
2801            } else {
2802                msb = clz32(val);
2803                lsb = 31 - ctz32(val);
2804            }
2805            tcg_out_insn(s, VRIb, VGM, dst, msb, lsb, MO_32);
2806            return;
2807        }
2808    } else {
2809        if (risbg_mask(val)) {
2810            /* Handle wraparound by swapping msb and lsb.  */
2811            if ((val & 0x8000000000000001ull) == 0x8000000000000001ull) {
2812                /* Handle wraparound by swapping msb and lsb.  */
2813                msb = 64 - ctz64(~val);
2814                lsb = clz64(~val) - 1;
2815            } else {
2816                msb = clz64(val);
2817                lsb = 63 - ctz64(val);
2818            }
2819            tcg_out_insn(s, VRIb, VGM, dst, msb, lsb, MO_64);
2820            return;
2821        }
2822    }
2823
2824    /* Look for all bytes 0x00 or 0xff.  */
2825    for (i = mask = 0; i < 8; i++) {
2826        uint8_t byte = val >> (i * 8);
2827        if (byte == 0xff) {
2828            mask |= 1 << i;
2829        } else if (byte != 0) {
2830            break;
2831        }
2832    }
2833    if (i == 8) {
2834        tcg_out_insn(s, VRIa, VGBM, dst, mask * 0x0101, 0);
2835        return;
2836    }
2837
2838    /* Otherwise, stuff it in the constant pool.  */
2839    tcg_out_insn(s, RIL, LARL, TCG_TMP0, 0);
2840    new_pool_label(s, val, R_390_PC32DBL, s->code_ptr - 2, 2);
2841    tcg_out_insn(s, VRX, VLREP, dst, TCG_TMP0, TCG_REG_NONE, 0, MO_64);
2842}
2843
2844static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2845                           unsigned vecl, unsigned vece,
2846                           const TCGArg args[TCG_MAX_OP_ARGS],
2847                           const int const_args[TCG_MAX_OP_ARGS])
2848{
2849    TCGType type = vecl + TCG_TYPE_V64;
2850    TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
2851
2852    switch (opc) {
2853    case INDEX_op_ld_vec:
2854        tcg_out_ld(s, type, a0, a1, a2);
2855        break;
2856    case INDEX_op_st_vec:
2857        tcg_out_st(s, type, a0, a1, a2);
2858        break;
2859    case INDEX_op_dupm_vec:
2860        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2861        break;
2862
2863    case INDEX_op_abs_vec:
2864        tcg_out_insn(s, VRRa, VLP, a0, a1, vece);
2865        break;
2866    case INDEX_op_neg_vec:
2867        tcg_out_insn(s, VRRa, VLC, a0, a1, vece);
2868        break;
2869    case INDEX_op_not_vec:
2870        tcg_out_insn(s, VRRc, VNO, a0, a1, a1, 0);
2871        break;
2872
2873    case INDEX_op_add_vec:
2874        tcg_out_insn(s, VRRc, VA, a0, a1, a2, vece);
2875        break;
2876    case INDEX_op_sub_vec:
2877        tcg_out_insn(s, VRRc, VS, a0, a1, a2, vece);
2878        break;
2879    case INDEX_op_and_vec:
2880        tcg_out_insn(s, VRRc, VN, a0, a1, a2, 0);
2881        break;
2882    case INDEX_op_andc_vec:
2883        tcg_out_insn(s, VRRc, VNC, a0, a1, a2, 0);
2884        break;
2885    case INDEX_op_mul_vec:
2886        tcg_out_insn(s, VRRc, VML, a0, a1, a2, vece);
2887        break;
2888    case INDEX_op_or_vec:
2889        tcg_out_insn(s, VRRc, VO, a0, a1, a2, 0);
2890        break;
2891    case INDEX_op_orc_vec:
2892        tcg_out_insn(s, VRRc, VOC, a0, a1, a2, 0);
2893        break;
2894    case INDEX_op_xor_vec:
2895        tcg_out_insn(s, VRRc, VX, a0, a1, a2, 0);
2896        break;
2897    case INDEX_op_nand_vec:
2898        tcg_out_insn(s, VRRc, VNN, a0, a1, a2, 0);
2899        break;
2900    case INDEX_op_nor_vec:
2901        tcg_out_insn(s, VRRc, VNO, a0, a1, a2, 0);
2902        break;
2903    case INDEX_op_eqv_vec:
2904        tcg_out_insn(s, VRRc, VNX, a0, a1, a2, 0);
2905        break;
2906
2907    case INDEX_op_shli_vec:
2908        tcg_out_insn(s, VRSa, VESL, a0, a2, TCG_REG_NONE, a1, vece);
2909        break;
2910    case INDEX_op_shri_vec:
2911        tcg_out_insn(s, VRSa, VESRL, a0, a2, TCG_REG_NONE, a1, vece);
2912        break;
2913    case INDEX_op_sari_vec:
2914        tcg_out_insn(s, VRSa, VESRA, a0, a2, TCG_REG_NONE, a1, vece);
2915        break;
2916    case INDEX_op_rotli_vec:
2917        tcg_out_insn(s, VRSa, VERLL, a0, a2, TCG_REG_NONE, a1, vece);
2918        break;
2919    case INDEX_op_shls_vec:
2920        tcg_out_insn(s, VRSa, VESL, a0, 0, a2, a1, vece);
2921        break;
2922    case INDEX_op_shrs_vec:
2923        tcg_out_insn(s, VRSa, VESRL, a0, 0, a2, a1, vece);
2924        break;
2925    case INDEX_op_sars_vec:
2926        tcg_out_insn(s, VRSa, VESRA, a0, 0, a2, a1, vece);
2927        break;
2928    case INDEX_op_rotls_vec:
2929        tcg_out_insn(s, VRSa, VERLL, a0, 0, a2, a1, vece);
2930        break;
2931    case INDEX_op_shlv_vec:
2932        tcg_out_insn(s, VRRc, VESLV, a0, a1, a2, vece);
2933        break;
2934    case INDEX_op_shrv_vec:
2935        tcg_out_insn(s, VRRc, VESRLV, a0, a1, a2, vece);
2936        break;
2937    case INDEX_op_sarv_vec:
2938        tcg_out_insn(s, VRRc, VESRAV, a0, a1, a2, vece);
2939        break;
2940    case INDEX_op_rotlv_vec:
2941        tcg_out_insn(s, VRRc, VERLLV, a0, a1, a2, vece);
2942        break;
2943
2944    case INDEX_op_smin_vec:
2945        tcg_out_insn(s, VRRc, VMN, a0, a1, a2, vece);
2946        break;
2947    case INDEX_op_smax_vec:
2948        tcg_out_insn(s, VRRc, VMX, a0, a1, a2, vece);
2949        break;
2950    case INDEX_op_umin_vec:
2951        tcg_out_insn(s, VRRc, VMNL, a0, a1, a2, vece);
2952        break;
2953    case INDEX_op_umax_vec:
2954        tcg_out_insn(s, VRRc, VMXL, a0, a1, a2, vece);
2955        break;
2956
2957    case INDEX_op_bitsel_vec:
2958        tcg_out_insn(s, VRRe, VSEL, a0, a2, args[3], a1);
2959        break;
2960
2961    case INDEX_op_cmp_vec:
2962        switch ((TCGCond)args[3]) {
2963        case TCG_COND_EQ:
2964            tcg_out_insn(s, VRRc, VCEQ, a0, a1, a2, vece);
2965            break;
2966        case TCG_COND_GT:
2967            tcg_out_insn(s, VRRc, VCH, a0, a1, a2, vece);
2968            break;
2969        case TCG_COND_GTU:
2970            tcg_out_insn(s, VRRc, VCHL, a0, a1, a2, vece);
2971            break;
2972        default:
2973            g_assert_not_reached();
2974        }
2975        break;
2976
2977    case INDEX_op_s390_vuph_vec:
2978        tcg_out_insn(s, VRRa, VUPH, a0, a1, vece);
2979        break;
2980    case INDEX_op_s390_vupl_vec:
2981        tcg_out_insn(s, VRRa, VUPL, a0, a1, vece);
2982        break;
2983    case INDEX_op_s390_vpks_vec:
2984        tcg_out_insn(s, VRRc, VPKS, a0, a1, a2, vece);
2985        break;
2986
2987    case INDEX_op_mov_vec:   /* Always emitted via tcg_out_mov.  */
2988    case INDEX_op_dup_vec:   /* Always emitted via tcg_out_dup_vec.  */
2989    default:
2990        g_assert_not_reached();
2991    }
2992}
2993
2994int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2995{
2996    switch (opc) {
2997    case INDEX_op_abs_vec:
2998    case INDEX_op_add_vec:
2999    case INDEX_op_and_vec:
3000    case INDEX_op_andc_vec:
3001    case INDEX_op_bitsel_vec:
3002    case INDEX_op_eqv_vec:
3003    case INDEX_op_nand_vec:
3004    case INDEX_op_neg_vec:
3005    case INDEX_op_nor_vec:
3006    case INDEX_op_not_vec:
3007    case INDEX_op_or_vec:
3008    case INDEX_op_orc_vec:
3009    case INDEX_op_rotli_vec:
3010    case INDEX_op_rotls_vec:
3011    case INDEX_op_rotlv_vec:
3012    case INDEX_op_sari_vec:
3013    case INDEX_op_sars_vec:
3014    case INDEX_op_sarv_vec:
3015    case INDEX_op_shli_vec:
3016    case INDEX_op_shls_vec:
3017    case INDEX_op_shlv_vec:
3018    case INDEX_op_shri_vec:
3019    case INDEX_op_shrs_vec:
3020    case INDEX_op_shrv_vec:
3021    case INDEX_op_smax_vec:
3022    case INDEX_op_smin_vec:
3023    case INDEX_op_sub_vec:
3024    case INDEX_op_umax_vec:
3025    case INDEX_op_umin_vec:
3026    case INDEX_op_xor_vec:
3027        return 1;
3028    case INDEX_op_cmp_vec:
3029    case INDEX_op_cmpsel_vec:
3030    case INDEX_op_rotrv_vec:
3031        return -1;
3032    case INDEX_op_mul_vec:
3033        return vece < MO_64;
3034    case INDEX_op_ssadd_vec:
3035    case INDEX_op_sssub_vec:
3036        return vece < MO_64 ? -1 : 0;
3037    default:
3038        return 0;
3039    }
3040}
3041
3042static bool expand_vec_cmp_noinv(TCGType type, unsigned vece, TCGv_vec v0,
3043                                 TCGv_vec v1, TCGv_vec v2, TCGCond cond)
3044{
3045    bool need_swap = false, need_inv = false;
3046
3047    switch (cond) {
3048    case TCG_COND_EQ:
3049    case TCG_COND_GT:
3050    case TCG_COND_GTU:
3051        break;
3052    case TCG_COND_NE:
3053    case TCG_COND_LE:
3054    case TCG_COND_LEU:
3055        need_inv = true;
3056        break;
3057    case TCG_COND_LT:
3058    case TCG_COND_LTU:
3059        need_swap = true;
3060        break;
3061    case TCG_COND_GE:
3062    case TCG_COND_GEU:
3063        need_swap = need_inv = true;
3064        break;
3065    default:
3066        g_assert_not_reached();
3067    }
3068
3069    if (need_inv) {
3070        cond = tcg_invert_cond(cond);
3071    }
3072    if (need_swap) {
3073        TCGv_vec t1;
3074        t1 = v1, v1 = v2, v2 = t1;
3075        cond = tcg_swap_cond(cond);
3076    }
3077
3078    vec_gen_4(INDEX_op_cmp_vec, type, vece, tcgv_vec_arg(v0),
3079              tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond);
3080
3081    return need_inv;
3082}
3083
3084static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
3085                           TCGv_vec v1, TCGv_vec v2, TCGCond cond)
3086{
3087    if (expand_vec_cmp_noinv(type, vece, v0, v1, v2, cond)) {
3088        tcg_gen_not_vec(vece, v0, v0);
3089    }
3090}
3091
3092static void expand_vec_cmpsel(TCGType type, unsigned vece, TCGv_vec v0,
3093                              TCGv_vec c1, TCGv_vec c2,
3094                              TCGv_vec v3, TCGv_vec v4, TCGCond cond)
3095{
3096    TCGv_vec t = tcg_temp_new_vec(type);
3097
3098    if (expand_vec_cmp_noinv(type, vece, t, c1, c2, cond)) {
3099        /* Invert the sense of the compare by swapping arguments.  */
3100        tcg_gen_bitsel_vec(vece, v0, t, v4, v3);
3101    } else {
3102        tcg_gen_bitsel_vec(vece, v0, t, v3, v4);
3103    }
3104    tcg_temp_free_vec(t);
3105}
3106
3107static void expand_vec_sat(TCGType type, unsigned vece, TCGv_vec v0,
3108                           TCGv_vec v1, TCGv_vec v2, TCGOpcode add_sub_opc)
3109{
3110    TCGv_vec h1 = tcg_temp_new_vec(type);
3111    TCGv_vec h2 = tcg_temp_new_vec(type);
3112    TCGv_vec l1 = tcg_temp_new_vec(type);
3113    TCGv_vec l2 = tcg_temp_new_vec(type);
3114
3115    tcg_debug_assert (vece < MO_64);
3116
3117    /* Unpack with sign-extension. */
3118    vec_gen_2(INDEX_op_s390_vuph_vec, type, vece,
3119              tcgv_vec_arg(h1), tcgv_vec_arg(v1));
3120    vec_gen_2(INDEX_op_s390_vuph_vec, type, vece,
3121              tcgv_vec_arg(h2), tcgv_vec_arg(v2));
3122
3123    vec_gen_2(INDEX_op_s390_vupl_vec, type, vece,
3124              tcgv_vec_arg(l1), tcgv_vec_arg(v1));
3125    vec_gen_2(INDEX_op_s390_vupl_vec, type, vece,
3126              tcgv_vec_arg(l2), tcgv_vec_arg(v2));
3127
3128    /* Arithmetic on a wider element size. */
3129    vec_gen_3(add_sub_opc, type, vece + 1, tcgv_vec_arg(h1),
3130              tcgv_vec_arg(h1), tcgv_vec_arg(h2));
3131    vec_gen_3(add_sub_opc, type, vece + 1, tcgv_vec_arg(l1),
3132              tcgv_vec_arg(l1), tcgv_vec_arg(l2));
3133
3134    /* Pack with saturation. */
3135    vec_gen_3(INDEX_op_s390_vpks_vec, type, vece + 1,
3136              tcgv_vec_arg(v0), tcgv_vec_arg(h1), tcgv_vec_arg(l1));
3137
3138    tcg_temp_free_vec(h1);
3139    tcg_temp_free_vec(h2);
3140    tcg_temp_free_vec(l1);
3141    tcg_temp_free_vec(l2);
3142}
3143
3144void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
3145                       TCGArg a0, ...)
3146{
3147    va_list va;
3148    TCGv_vec v0, v1, v2, v3, v4, t0;
3149
3150    va_start(va, a0);
3151    v0 = temp_tcgv_vec(arg_temp(a0));
3152    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3153    v2 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3154
3155    switch (opc) {
3156    case INDEX_op_cmp_vec:
3157        expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
3158        break;
3159
3160    case INDEX_op_cmpsel_vec:
3161        v3 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3162        v4 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3163        expand_vec_cmpsel(type, vece, v0, v1, v2, v3, v4, va_arg(va, TCGArg));
3164        break;
3165
3166    case INDEX_op_rotrv_vec:
3167        t0 = tcg_temp_new_vec(type);
3168        tcg_gen_neg_vec(vece, t0, v2);
3169        tcg_gen_rotlv_vec(vece, v0, v1, t0);
3170        tcg_temp_free_vec(t0);
3171        break;
3172
3173    case INDEX_op_ssadd_vec:
3174        expand_vec_sat(type, vece, v0, v1, v2, INDEX_op_add_vec);
3175        break;
3176    case INDEX_op_sssub_vec:
3177        expand_vec_sat(type, vece, v0, v1, v2, INDEX_op_sub_vec);
3178        break;
3179
3180    default:
3181        g_assert_not_reached();
3182    }
3183    va_end(va);
3184}
3185
3186static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
3187{
3188    switch (op) {
3189    case INDEX_op_goto_ptr:
3190        return C_O0_I1(r);
3191
3192    case INDEX_op_ld8u_i32:
3193    case INDEX_op_ld8u_i64:
3194    case INDEX_op_ld8s_i32:
3195    case INDEX_op_ld8s_i64:
3196    case INDEX_op_ld16u_i32:
3197    case INDEX_op_ld16u_i64:
3198    case INDEX_op_ld16s_i32:
3199    case INDEX_op_ld16s_i64:
3200    case INDEX_op_ld_i32:
3201    case INDEX_op_ld32u_i64:
3202    case INDEX_op_ld32s_i64:
3203    case INDEX_op_ld_i64:
3204        return C_O1_I1(r, r);
3205
3206    case INDEX_op_st8_i32:
3207    case INDEX_op_st8_i64:
3208    case INDEX_op_st16_i32:
3209    case INDEX_op_st16_i64:
3210    case INDEX_op_st_i32:
3211    case INDEX_op_st32_i64:
3212    case INDEX_op_st_i64:
3213        return C_O0_I2(r, r);
3214
3215    case INDEX_op_add_i32:
3216    case INDEX_op_add_i64:
3217    case INDEX_op_shl_i64:
3218    case INDEX_op_shr_i64:
3219    case INDEX_op_sar_i64:
3220    case INDEX_op_rotl_i32:
3221    case INDEX_op_rotl_i64:
3222    case INDEX_op_rotr_i32:
3223    case INDEX_op_rotr_i64:
3224    case INDEX_op_setcond_i32:
3225    case INDEX_op_negsetcond_i32:
3226        return C_O1_I2(r, r, ri);
3227    case INDEX_op_setcond_i64:
3228    case INDEX_op_negsetcond_i64:
3229        return C_O1_I2(r, r, rC);
3230
3231    case INDEX_op_clz_i64:
3232        return C_O1_I2(r, r, rI);
3233
3234    case INDEX_op_sub_i32:
3235    case INDEX_op_sub_i64:
3236    case INDEX_op_and_i32:
3237    case INDEX_op_or_i32:
3238    case INDEX_op_xor_i32:
3239        return C_O1_I2(r, r, ri);
3240    case INDEX_op_and_i64:
3241        return C_O1_I2(r, r, rNKR);
3242    case INDEX_op_or_i64:
3243    case INDEX_op_xor_i64:
3244        return C_O1_I2(r, r, rK);
3245
3246    case INDEX_op_andc_i32:
3247    case INDEX_op_orc_i32:
3248    case INDEX_op_eqv_i32:
3249        return C_O1_I2(r, r, ri);
3250    case INDEX_op_andc_i64:
3251        return C_O1_I2(r, r, rKR);
3252    case INDEX_op_orc_i64:
3253    case INDEX_op_eqv_i64:
3254        return C_O1_I2(r, r, rNK);
3255
3256    case INDEX_op_nand_i32:
3257    case INDEX_op_nand_i64:
3258    case INDEX_op_nor_i32:
3259    case INDEX_op_nor_i64:
3260        return C_O1_I2(r, r, r);
3261
3262    case INDEX_op_mul_i32:
3263        return (HAVE_FACILITY(MISC_INSN_EXT2)
3264                ? C_O1_I2(r, r, ri)
3265                : C_O1_I2(r, 0, ri));
3266    case INDEX_op_mul_i64:
3267        return (HAVE_FACILITY(MISC_INSN_EXT2)
3268                ? C_O1_I2(r, r, rJ)
3269                : C_O1_I2(r, 0, rJ));
3270
3271    case INDEX_op_shl_i32:
3272    case INDEX_op_shr_i32:
3273    case INDEX_op_sar_i32:
3274        return C_O1_I2(r, r, ri);
3275
3276    case INDEX_op_brcond_i32:
3277        return C_O0_I2(r, ri);
3278    case INDEX_op_brcond_i64:
3279        return C_O0_I2(r, rC);
3280
3281    case INDEX_op_bswap16_i32:
3282    case INDEX_op_bswap16_i64:
3283    case INDEX_op_bswap32_i32:
3284    case INDEX_op_bswap32_i64:
3285    case INDEX_op_bswap64_i64:
3286    case INDEX_op_neg_i32:
3287    case INDEX_op_neg_i64:
3288    case INDEX_op_not_i32:
3289    case INDEX_op_not_i64:
3290    case INDEX_op_ext8s_i32:
3291    case INDEX_op_ext8s_i64:
3292    case INDEX_op_ext8u_i32:
3293    case INDEX_op_ext8u_i64:
3294    case INDEX_op_ext16s_i32:
3295    case INDEX_op_ext16s_i64:
3296    case INDEX_op_ext16u_i32:
3297    case INDEX_op_ext16u_i64:
3298    case INDEX_op_ext32s_i64:
3299    case INDEX_op_ext32u_i64:
3300    case INDEX_op_ext_i32_i64:
3301    case INDEX_op_extu_i32_i64:
3302    case INDEX_op_extract_i32:
3303    case INDEX_op_extract_i64:
3304    case INDEX_op_ctpop_i32:
3305    case INDEX_op_ctpop_i64:
3306        return C_O1_I1(r, r);
3307
3308    case INDEX_op_qemu_ld_a32_i32:
3309    case INDEX_op_qemu_ld_a64_i32:
3310    case INDEX_op_qemu_ld_a32_i64:
3311    case INDEX_op_qemu_ld_a64_i64:
3312        return C_O1_I1(r, r);
3313    case INDEX_op_qemu_st_a32_i64:
3314    case INDEX_op_qemu_st_a64_i64:
3315    case INDEX_op_qemu_st_a32_i32:
3316    case INDEX_op_qemu_st_a64_i32:
3317        return C_O0_I2(r, r);
3318    case INDEX_op_qemu_ld_a32_i128:
3319    case INDEX_op_qemu_ld_a64_i128:
3320        return C_O2_I1(o, m, r);
3321    case INDEX_op_qemu_st_a32_i128:
3322    case INDEX_op_qemu_st_a64_i128:
3323        return C_O0_I3(o, m, r);
3324
3325    case INDEX_op_deposit_i32:
3326    case INDEX_op_deposit_i64:
3327        return C_O1_I2(r, rZ, r);
3328
3329    case INDEX_op_movcond_i32:
3330        return C_O1_I4(r, r, ri, rI, r);
3331    case INDEX_op_movcond_i64:
3332        return C_O1_I4(r, r, rC, rI, r);
3333
3334    case INDEX_op_div2_i32:
3335    case INDEX_op_div2_i64:
3336    case INDEX_op_divu2_i32:
3337    case INDEX_op_divu2_i64:
3338        return C_O2_I3(o, m, 0, 1, r);
3339
3340    case INDEX_op_mulu2_i64:
3341        return C_O2_I2(o, m, 0, r);
3342    case INDEX_op_muls2_i64:
3343        return C_O2_I2(o, m, r, r);
3344
3345    case INDEX_op_add2_i32:
3346    case INDEX_op_sub2_i32:
3347        return C_N1_O1_I4(r, r, 0, 1, ri, r);
3348
3349    case INDEX_op_add2_i64:
3350    case INDEX_op_sub2_i64:
3351        return C_N1_O1_I4(r, r, 0, 1, rJU, r);
3352
3353    case INDEX_op_st_vec:
3354        return C_O0_I2(v, r);
3355    case INDEX_op_ld_vec:
3356    case INDEX_op_dupm_vec:
3357        return C_O1_I1(v, r);
3358    case INDEX_op_dup_vec:
3359        return C_O1_I1(v, vr);
3360    case INDEX_op_abs_vec:
3361    case INDEX_op_neg_vec:
3362    case INDEX_op_not_vec:
3363    case INDEX_op_rotli_vec:
3364    case INDEX_op_sari_vec:
3365    case INDEX_op_shli_vec:
3366    case INDEX_op_shri_vec:
3367    case INDEX_op_s390_vuph_vec:
3368    case INDEX_op_s390_vupl_vec:
3369        return C_O1_I1(v, v);
3370    case INDEX_op_add_vec:
3371    case INDEX_op_sub_vec:
3372    case INDEX_op_and_vec:
3373    case INDEX_op_andc_vec:
3374    case INDEX_op_or_vec:
3375    case INDEX_op_orc_vec:
3376    case INDEX_op_xor_vec:
3377    case INDEX_op_nand_vec:
3378    case INDEX_op_nor_vec:
3379    case INDEX_op_eqv_vec:
3380    case INDEX_op_cmp_vec:
3381    case INDEX_op_mul_vec:
3382    case INDEX_op_rotlv_vec:
3383    case INDEX_op_rotrv_vec:
3384    case INDEX_op_shlv_vec:
3385    case INDEX_op_shrv_vec:
3386    case INDEX_op_sarv_vec:
3387    case INDEX_op_smax_vec:
3388    case INDEX_op_smin_vec:
3389    case INDEX_op_umax_vec:
3390    case INDEX_op_umin_vec:
3391    case INDEX_op_s390_vpks_vec:
3392        return C_O1_I2(v, v, v);
3393    case INDEX_op_rotls_vec:
3394    case INDEX_op_shls_vec:
3395    case INDEX_op_shrs_vec:
3396    case INDEX_op_sars_vec:
3397        return C_O1_I2(v, v, r);
3398    case INDEX_op_bitsel_vec:
3399        return C_O1_I3(v, v, v, v);
3400
3401    default:
3402        g_assert_not_reached();
3403    }
3404}
3405
3406/*
3407 * Mainline glibc added HWCAP_S390_VX before it was kernel abi.
3408 * Some distros have fixed this up locally, others have not.
3409 */
3410#ifndef HWCAP_S390_VXRS
3411#define HWCAP_S390_VXRS 2048
3412#endif
3413
3414static void query_s390_facilities(void)
3415{
3416    unsigned long hwcap = qemu_getauxval(AT_HWCAP);
3417    const char *which;
3418
3419    /* Is STORE FACILITY LIST EXTENDED available?  Honestly, I believe this
3420       is present on all 64-bit systems, but let's check for it anyway.  */
3421    if (hwcap & HWCAP_S390_STFLE) {
3422        register int r0 __asm__("0") = ARRAY_SIZE(s390_facilities) - 1;
3423        register void *r1 __asm__("1") = s390_facilities;
3424
3425        /* stfle 0(%r1) */
3426        asm volatile(".word 0xb2b0,0x1000"
3427                     : "=r"(r0) : "r"(r0), "r"(r1) : "memory", "cc");
3428    }
3429
3430    /*
3431     * Use of vector registers requires os support beyond the facility bit.
3432     * If the kernel does not advertise support, disable the facility bits.
3433     * There is nothing else we currently care about in the 3rd word, so
3434     * disable VECTOR with one store.
3435     */
3436    if (!(hwcap & HWCAP_S390_VXRS)) {
3437        s390_facilities[2] = 0;
3438    }
3439
3440    /*
3441     * Minimum supported cpu revision is z196.
3442     * Check for all required facilities.
3443     * ZARCH_ACTIVE is done via preprocessor check for 64-bit.
3444     */
3445    if (!HAVE_FACILITY(LONG_DISP)) {
3446        which = "long-displacement";
3447        goto fail;
3448    }
3449    if (!HAVE_FACILITY(EXT_IMM)) {
3450        which = "extended-immediate";
3451        goto fail;
3452    }
3453    if (!HAVE_FACILITY(GEN_INST_EXT)) {
3454        which = "general-instructions-extension";
3455        goto fail;
3456    }
3457    /*
3458     * Facility 45 is a big bin that contains: distinct-operands,
3459     * fast-BCR-serialization, high-word, population-count,
3460     * interlocked-access-1, and load/store-on-condition-1
3461     */
3462    if (!HAVE_FACILITY(45)) {
3463        which = "45";
3464        goto fail;
3465    }
3466    return;
3467
3468 fail:
3469    error_report("%s: missing required facility %s", __func__, which);
3470    exit(EXIT_FAILURE);
3471}
3472
3473static void tcg_target_init(TCGContext *s)
3474{
3475    query_s390_facilities();
3476
3477    tcg_target_available_regs[TCG_TYPE_I32] = 0xffff;
3478    tcg_target_available_regs[TCG_TYPE_I64] = 0xffff;
3479    if (HAVE_FACILITY(VECTOR)) {
3480        tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
3481        tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
3482    }
3483
3484    tcg_target_call_clobber_regs = 0;
3485    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
3486    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R1);
3487    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2);
3488    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3);
3489    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4);
3490    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5);
3491    /* The r6 register is technically call-saved, but it's also a parameter
3492       register, so it can get killed by setup for the qemu_st helper.  */
3493    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6);
3494    /* The return register can be considered call-clobbered.  */
3495    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R14);
3496
3497    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0);
3498    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1);
3499    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2);
3500    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3);
3501    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4);
3502    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5);
3503    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6);
3504    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7);
3505    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16);
3506    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17);
3507    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18);
3508    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19);
3509    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V20);
3510    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V21);
3511    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V22);
3512    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V23);
3513    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V24);
3514    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V25);
3515    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V26);
3516    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V27);
3517    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V28);
3518    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V29);
3519    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V30);
3520    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V31);
3521
3522    s->reserved_regs = 0;
3523    tcg_regset_set_reg(s->reserved_regs, TCG_TMP0);
3524    /* XXX many insns can't be used with R0, so we better avoid it for now */
3525    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0);
3526    tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
3527}
3528
3529#define FRAME_SIZE  ((int)(TCG_TARGET_CALL_STACK_OFFSET          \
3530                           + TCG_STATIC_CALL_ARGS_SIZE           \
3531                           + CPU_TEMP_BUF_NLONGS * sizeof(long)))
3532
3533static void tcg_target_qemu_prologue(TCGContext *s)
3534{
3535    /* stmg %r6,%r15,48(%r15) (save registers) */
3536    tcg_out_insn(s, RXY, STMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15, 48);
3537
3538    /* aghi %r15,-frame_size */
3539    tcg_out_insn(s, RI, AGHI, TCG_REG_R15, -FRAME_SIZE);
3540
3541    tcg_set_frame(s, TCG_REG_CALL_STACK,
3542                  TCG_STATIC_CALL_ARGS_SIZE + TCG_TARGET_CALL_STACK_OFFSET,
3543                  CPU_TEMP_BUF_NLONGS * sizeof(long));
3544
3545    if (!tcg_use_softmmu && guest_base >= 0x80000) {
3546        tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base);
3547        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
3548    }
3549
3550    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
3551
3552    /* br %r3 (go to TB) */
3553    tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, tcg_target_call_iarg_regs[1]);
3554
3555    /*
3556     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
3557     * and fall through to the rest of the epilogue.
3558     */
3559    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
3560    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, 0);
3561
3562    /* TB epilogue */
3563    tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
3564
3565    /* lmg %r6,%r15,fs+48(%r15) (restore registers) */
3566    tcg_out_insn(s, RXY, LMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15,
3567                 FRAME_SIZE + 48);
3568
3569    /* br %r14 (return) */
3570    tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_R14);
3571}
3572
3573static void tcg_out_tb_start(TCGContext *s)
3574{
3575    /* nothing to do */
3576}
3577
3578static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
3579{
3580    memset(p, 0x07, count * sizeof(tcg_insn_unit));
3581}
3582
3583typedef struct {
3584    DebugFrameHeader h;
3585    uint8_t fde_def_cfa[4];
3586    uint8_t fde_reg_ofs[18];
3587} DebugFrame;
3588
3589/* We're expecting a 2 byte uleb128 encoded value.  */
3590QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
3591
3592#define ELF_HOST_MACHINE  EM_S390
3593
3594static const DebugFrame debug_frame = {
3595    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3596    .h.cie.id = -1,
3597    .h.cie.version = 1,
3598    .h.cie.code_align = 1,
3599    .h.cie.data_align = 8,                /* sleb128 8 */
3600    .h.cie.return_column = TCG_REG_R14,
3601
3602    /* Total FDE size does not include the "len" member.  */
3603    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3604
3605    .fde_def_cfa = {
3606        12, TCG_REG_CALL_STACK,         /* DW_CFA_def_cfa %r15, ... */
3607        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
3608        (FRAME_SIZE >> 7)
3609    },
3610    .fde_reg_ofs = {
3611        0x86, 6,                        /* DW_CFA_offset, %r6, 48 */
3612        0x87, 7,                        /* DW_CFA_offset, %r7, 56 */
3613        0x88, 8,                        /* DW_CFA_offset, %r8, 64 */
3614        0x89, 9,                        /* DW_CFA_offset, %r92, 72 */
3615        0x8a, 10,                       /* DW_CFA_offset, %r10, 80 */
3616        0x8b, 11,                       /* DW_CFA_offset, %r11, 88 */
3617        0x8c, 12,                       /* DW_CFA_offset, %r12, 96 */
3618        0x8d, 13,                       /* DW_CFA_offset, %r13, 104 */
3619        0x8e, 14,                       /* DW_CFA_offset, %r14, 112 */
3620    }
3621};
3622
3623void tcg_register_jit(const void *buf, size_t buf_size)
3624{
3625    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3626}
3627