1 /* -*- mode: C; c-basic-offset: 3; -*- */
2 
3 /*---------------------------------------------------------------*/
4 /*--- begin                              guest_s390_helpers.c ---*/
5 /*---------------------------------------------------------------*/
6 
7 /*
8    This file is part of Valgrind, a dynamic binary instrumentation
9    framework.
10 
11    Copyright IBM Corp. 2010-2017
12 
13    This program is free software; you can redistribute it and/or
14    modify it under the terms of the GNU General Public License as
15    published by the Free Software Foundation; either version 2 of the
16    License, or (at your option) any later version.
17 
18    This program is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21    General Public License for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26    02110-1301, USA.
27 
28    The GNU General Public License is contained in the file COPYING.
29 */
30 
31 /* Contributed by Florian Krohm */
32 
33 #include "libvex_basictypes.h"
34 #include "libvex_emnote.h"
35 #include "libvex_guest_s390x.h"
36 #include "libvex_ir.h"
37 #include "libvex.h"
38 #include "libvex_s390x_common.h"
39 
40 #include "main_util.h"
41 #include "main_globals.h"
42 #include "guest_generic_bb_to_IR.h"
43 #include "guest_s390_defs.h"
44 #include "s390_defs.h"               /* S390_BFP_ROUND_xyzzy */
45 
46 void
LibVEX_GuestS390X_initialise(VexGuestS390XState * state)47 LibVEX_GuestS390X_initialise(VexGuestS390XState *state)
48 {
49 /*------------------------------------------------------------*/
50 /*--- Initialise ar registers                              ---*/
51 /*------------------------------------------------------------*/
52 
53    state->guest_a0 = 0;
54    state->guest_a1 = 0;
55    state->guest_a2 = 0;
56    state->guest_a3 = 0;
57    state->guest_a4 = 0;
58    state->guest_a5 = 0;
59    state->guest_a6 = 0;
60    state->guest_a7 = 0;
61    state->guest_a8 = 0;
62    state->guest_a9 = 0;
63    state->guest_a10 = 0;
64    state->guest_a11 = 0;
65    state->guest_a12 = 0;
66    state->guest_a13 = 0;
67    state->guest_a14 = 0;
68    state->guest_a15 = 0;
69 
70 /*------------------------------------------------------------*/
71 /*--- Initialise vr registers                             ---*/
72 /*------------------------------------------------------------*/
73 
74 #define VRZERO(vr) \
75    do { \
76       vr.w64[0] = vr.w64[1] = 0ULL; \
77    } while(0);
78 
79    VRZERO(state->guest_v0)
80    VRZERO(state->guest_v1)
81    VRZERO(state->guest_v2)
82    VRZERO(state->guest_v3)
83    VRZERO(state->guest_v4)
84    VRZERO(state->guest_v5)
85    VRZERO(state->guest_v6)
86    VRZERO(state->guest_v7)
87    VRZERO(state->guest_v8)
88    VRZERO(state->guest_v9)
89    VRZERO(state->guest_v10)
90    VRZERO(state->guest_v11)
91    VRZERO(state->guest_v12)
92    VRZERO(state->guest_v13)
93    VRZERO(state->guest_v14)
94    VRZERO(state->guest_v15)
95    VRZERO(state->guest_v16)
96    VRZERO(state->guest_v17)
97    VRZERO(state->guest_v18)
98    VRZERO(state->guest_v19)
99    VRZERO(state->guest_v20)
100    VRZERO(state->guest_v21)
101    VRZERO(state->guest_v22)
102    VRZERO(state->guest_v23)
103    VRZERO(state->guest_v24)
104    VRZERO(state->guest_v25)
105    VRZERO(state->guest_v26)
106    VRZERO(state->guest_v27)
107    VRZERO(state->guest_v28)
108    VRZERO(state->guest_v29)
109    VRZERO(state->guest_v30)
110    VRZERO(state->guest_v31)
111 
112 #undef VRZERO
113 /*------------------------------------------------------------*/
114 /*--- Initialise gpr registers                             ---*/
115 /*------------------------------------------------------------*/
116 
117    state->guest_r0 = 0;
118    state->guest_r1 = 0;
119    state->guest_r2 = 0;
120    state->guest_r3 = 0;
121    state->guest_r4 = 0;
122    state->guest_r5 = 0;
123    state->guest_r6 = 0;
124    state->guest_r7 = 0;
125    state->guest_r8 = 0;
126    state->guest_r9 = 0;
127    state->guest_r10 = 0;
128    state->guest_r11 = 0;
129    state->guest_r12 = 0;
130    state->guest_r13 = 0;
131    state->guest_r14 = 0;
132    state->guest_r15 = 0;
133 
134 /*------------------------------------------------------------*/
135 /*--- Initialise S390 miscellaneous registers              ---*/
136 /*------------------------------------------------------------*/
137 
138    state->guest_counter = 0;
139    state->guest_fpc = 0;
140    state->guest_IA = 0;
141 
142 /*------------------------------------------------------------*/
143 /*--- Initialise S390 pseudo registers                     ---*/
144 /*------------------------------------------------------------*/
145 
146    state->guest_SYSNO = 0;
147 
148 /*------------------------------------------------------------*/
149 /*--- Initialise generic pseudo registers                  ---*/
150 /*------------------------------------------------------------*/
151 
152    state->guest_NRADDR = 0;
153    state->guest_CMSTART = 0;
154    state->guest_CMLEN = 0;
155    state->guest_IP_AT_SYSCALL = 0;
156    state->guest_EMNOTE = EmNote_NONE;
157    state->host_EvC_COUNTER = 0;
158    state->host_EvC_FAILADDR = 0;
159 
160 /*------------------------------------------------------------*/
161 /*--- Initialise thunk                                     ---*/
162 /*------------------------------------------------------------*/
163 
164    state->guest_CC_OP = 0;
165    state->guest_CC_DEP1 = 0;
166    state->guest_CC_DEP2 = 0;
167    state->guest_CC_NDEP = 0;
168 
169    __builtin_memset(state->padding, 0x0, sizeof(state->padding));
170 }
171 
172 
173 /* Figure out if any part of the guest state contained in minoff
174    .. maxoff requires precise memory exceptions.  If in doubt return
175    True (but this generates significantly slower code).  */
176 Bool
guest_s390x_state_requires_precise_mem_exns(Int minoff,Int maxoff,VexRegisterUpdates pxControl)177 guest_s390x_state_requires_precise_mem_exns (
178    Int minoff, Int maxoff, VexRegisterUpdates pxControl
179 )
180 {
181    Int lr_min = S390X_GUEST_OFFSET(guest_LR);
182    Int lr_max = lr_min + 8 - 1;
183    Int sp_min = S390X_GUEST_OFFSET(guest_SP);
184    Int sp_max = sp_min + 8 - 1;
185    Int fp_min = S390X_GUEST_OFFSET(guest_FP);
186    Int fp_max = fp_min + 8 - 1;
187    Int ia_min = S390X_GUEST_OFFSET(guest_IA);
188    Int ia_max = ia_min + 8 - 1;
189 
190    if (maxoff < sp_min || minoff > sp_max) {
191       /* No overlap with SP */
192       if (pxControl == VexRegUpdSpAtMemAccess)
193          return False; // We only need to check stack pointer.
194    } else {
195       return True;
196    }
197 
198    if (maxoff < lr_min || minoff > lr_max) {
199       /* No overlap with LR */
200    } else {
201       return True;
202    }
203 
204    if (maxoff < fp_min || minoff > fp_max) {
205       /* No overlap with FP */
206    } else {
207       return True;
208    }
209 
210    if (maxoff < ia_min || minoff > ia_max) {
211       /* No overlap with IA */
212    } else {
213       return True;
214    }
215 
216    return False;
217 }
218 
219 
220 #define ALWAYSDEFD(field)                             \
221     { S390X_GUEST_OFFSET(field),            \
222       (sizeof ((VexGuestS390XState*)0)->field) }
223 
224 VexGuestLayout s390xGuest_layout = {
225 
226    /* Total size of the guest state, in bytes. */
227    .total_sizeB = sizeof(VexGuestS390XState),
228 
229    /* Describe the stack pointer. */
230    .offset_SP = S390X_GUEST_OFFSET(guest_SP),
231    .sizeof_SP = 8,
232 
233    /* Describe the frame pointer. */
234    .offset_FP = S390X_GUEST_OFFSET(guest_FP),
235    .sizeof_FP = 8,
236 
237    /* Describe the instruction pointer. */
238    .offset_IP = S390X_GUEST_OFFSET(guest_IA),
239    .sizeof_IP = 8,
240 
241    /* Describe any sections to be regarded by Memcheck as
242       'always-defined'. */
243    .n_alwaysDefd = 9,
244 
245    /* Flags thunk: OP and NDEP are always defined, whereas DEP1
246       and DEP2 have to be tracked.  See detailed comment in
247       gdefs.h on meaning of thunk fields. */
248    .alwaysDefd = {
249       /*  0 */ ALWAYSDEFD(guest_CC_OP),     /* generic */
250       /*  1 */ ALWAYSDEFD(guest_CC_NDEP),   /* generic */
251       /*  2 */ ALWAYSDEFD(guest_EMNOTE),    /* generic */
252       /*  3 */ ALWAYSDEFD(guest_CMSTART),   /* generic */
253       /*  4 */ ALWAYSDEFD(guest_CMLEN),     /* generic */
254       /*  5 */ ALWAYSDEFD(guest_IP_AT_SYSCALL), /* generic */
255       /*  6 */ ALWAYSDEFD(guest_IA),        /* control reg */
256       /*  7 */ ALWAYSDEFD(guest_fpc),       /* control reg */
257       /*  8 */ ALWAYSDEFD(guest_counter),   /* internal usage register */
258    }
259 };
260 
261 /*------------------------------------------------------------*/
262 /*--- Dirty helper for EXecute                             ---*/
263 /*------------------------------------------------------------*/
264 void
s390x_dirtyhelper_EX(ULong torun)265 s390x_dirtyhelper_EX(ULong torun)
266 {
267    last_execute_target = torun;
268 }
269 
270 
271 /*------------------------------------------------------------*/
272 /*--- Dirty helper for Clock instructions                  ---*/
273 /*------------------------------------------------------------*/
274 #if defined(VGA_s390x)
275 ULong
s390x_dirtyhelper_STCK(ULong * addr)276 s390x_dirtyhelper_STCK(ULong *addr)
277 {
278    UInt cc;
279 
280    asm volatile("stck %0\n"
281                 "ipm %1\n"
282                 "srl %1,28\n"
283                 : "+Q" (*addr), "=d" (cc) : : "cc");
284    return cc;
285 }
286 
287 ULong
s390x_dirtyhelper_STCKE(ULong * addr)288 s390x_dirtyhelper_STCKE(ULong *addr)
289 {
290    UInt cc;
291 
292    asm volatile("stcke %0\n"
293                 "ipm %1\n"
294                 "srl %1,28\n"
295                 : "+Q" (*addr), "=d" (cc) : : "cc");
296    return cc;
297 }
298 
s390x_dirtyhelper_STCKF(ULong * addr)299 ULong s390x_dirtyhelper_STCKF(ULong *addr)
300 {
301    UInt cc;
302 
303    asm volatile(".insn s,0xb27c0000,%0\n"
304                 "ipm %1\n"
305                 "srl %1,28\n"
306                 : "+Q" (*addr), "=d" (cc) : : "cc");
307    return cc;
308 }
309 #else
s390x_dirtyhelper_STCK(ULong * addr)310 ULong s390x_dirtyhelper_STCK(ULong *addr)  {return 3;}
s390x_dirtyhelper_STCKF(ULong * addr)311 ULong s390x_dirtyhelper_STCKF(ULong *addr) {return 3;}
s390x_dirtyhelper_STCKE(ULong * addr)312 ULong s390x_dirtyhelper_STCKE(ULong *addr) {return 3;}
313 #endif /* VGA_s390x */
314 
315 /*------------------------------------------------------------*/
316 /*--- Dirty helper for Store Facility instruction          ---*/
317 /*------------------------------------------------------------*/
318 #if defined(VGA_s390x)
319 static void
s390_set_facility_bit(ULong * addr,UInt bitno,UInt value)320 s390_set_facility_bit(ULong *addr, UInt bitno, UInt value)
321 {
322    addr  += bitno / 64;
323    bitno  = bitno % 64;
324 
325    ULong mask = 1;
326    mask <<= (63 - bitno);
327 
328    if (value == 1) {
329       *addr |= mask;   // set
330    } else {
331       *addr &= ~mask;  // clear
332    }
333 }
334 
335 ULong
s390x_dirtyhelper_STFLE(VexGuestS390XState * guest_state,ULong * addr)336 s390x_dirtyhelper_STFLE(VexGuestS390XState *guest_state, ULong *addr)
337 {
338    ULong hoststfle[S390_NUM_FACILITY_DW], cc, num_dw, i;
339    register ULong reg0 asm("0") = guest_state->guest_r0 & 0xF;  /* r0[56:63] */
340 
341    /* We cannot store more than S390_NUM_FACILITY_DW
342       (and it makes not much sense to do so anyhow) */
343    if (reg0 > S390_NUM_FACILITY_DW - 1)
344       reg0 = S390_NUM_FACILITY_DW - 1;
345 
346    num_dw = reg0 + 1;  /* number of double words written */
347 
348    asm volatile(" .insn s,0xb2b00000,%0\n"   /* stfle */
349                 "ipm    %2\n"
350                 "srl    %2,28\n"
351                 : "=m" (hoststfle), "+d"(reg0), "=d"(cc) : : "cc", "memory");
352 
353    /* Update guest register 0  with what STFLE set r0 to */
354    guest_state->guest_r0 = reg0;
355 
356    /* Set default: VM facilities = host facilities */
357    for (i = 0; i < num_dw; ++i)
358       addr[i] = hoststfle[i];
359 
360    /* Now adjust the VM facilities according to what the VM supports */
361    s390_set_facility_bit(addr, S390_FAC_LDISP,  1);
362    s390_set_facility_bit(addr, S390_FAC_EIMM,   1);
363    s390_set_facility_bit(addr, S390_FAC_ETF2,   1);
364    s390_set_facility_bit(addr, S390_FAC_ETF3,   1);
365    s390_set_facility_bit(addr, S390_FAC_GIE,    1);
366    s390_set_facility_bit(addr, S390_FAC_EXEXT,  1);
367    s390_set_facility_bit(addr, S390_FAC_HIGHW,  1);
368    s390_set_facility_bit(addr, S390_FAC_LSC2,   1);
369 
370    s390_set_facility_bit(addr, S390_FAC_HFPMAS, 0);
371    s390_set_facility_bit(addr, S390_FAC_HFPUNX, 0);
372    s390_set_facility_bit(addr, S390_FAC_XCPUT,  0);
373    s390_set_facility_bit(addr, S390_FAC_MSA,    0);
374    s390_set_facility_bit(addr, S390_FAC_PENH,   0);
375    s390_set_facility_bit(addr, S390_FAC_DFP,    0);
376    s390_set_facility_bit(addr, S390_FAC_PFPO,   0);
377    s390_set_facility_bit(addr, S390_FAC_DFPZC,  0);
378    s390_set_facility_bit(addr, S390_FAC_MISC,   0);
379    s390_set_facility_bit(addr, S390_FAC_CTREXE, 0);
380    s390_set_facility_bit(addr, S390_FAC_TREXE,  0);
381    s390_set_facility_bit(addr, S390_FAC_MSA4,   0);
382 
383    return cc;
384 }
385 
386 #else
387 
388 ULong
s390x_dirtyhelper_STFLE(VexGuestS390XState * guest_state,ULong * addr)389 s390x_dirtyhelper_STFLE(VexGuestS390XState *guest_state, ULong *addr)
390 {
391    return 3;
392 }
393 #endif /* VGA_s390x */
394 
395 /*------------------------------------------------------------*/
396 /*--- Dirty helper for the "convert unicode" insn family.  ---*/
397 /*------------------------------------------------------------*/
398 void
s390x_dirtyhelper_CUxy(UChar * address,ULong data,ULong num_bytes)399 s390x_dirtyhelper_CUxy(UChar *address, ULong data, ULong num_bytes)
400 {
401    UInt i;
402 
403    vassert(num_bytes >= 1 && num_bytes <= 4);
404 
405    /* Store the least significant NUM_BYTES bytes in DATA left to right
406       at ADDRESS. */
407    for (i = 1; i <= num_bytes; ++i) {
408       address[num_bytes - i] = data & 0xff;
409       data >>= 8;
410    }
411 }
412 
413 
414 /*------------------------------------------------------------*/
415 /*--- Clean helper for CU21.                               ---*/
416 /*------------------------------------------------------------*/
417 
418 /* The function performs a CU21 operation. It returns three things
419    encoded in an ULong value:
420    - the converted bytes (at most 4)
421    - the number of converted bytes
422    - an indication whether LOW_SURROGATE, if any, is invalid
423 
424    64      48                16           8                       0
425     +-------+-----------------+-----------+-----------------------+
426     |  0x0  | converted bytes | num_bytes | invalid_low_surrogate |
427     +-------+-----------------+-----------+-----------------------+
428 */
429 ULong
s390_do_cu21(UInt srcval,UInt low_surrogate)430 s390_do_cu21(UInt srcval, UInt low_surrogate)
431 {
432    ULong retval = 0;   // shut up gcc
433    UInt b1, b2, b3, b4, num_bytes, invalid_low_surrogate = 0;
434 
435    srcval &= 0xffff;
436 
437    /* Determine the number of bytes in the converted value */
438    if (srcval <= 0x007f)
439       num_bytes = 1;
440    else if (srcval >= 0x0080 && srcval <= 0x07ff)
441       num_bytes = 2;
442    else if ((srcval >= 0x0800 && srcval <= 0xd7ff) ||
443             (srcval >= 0xdc00 && srcval <= 0xffff))
444       num_bytes = 3;
445    else
446       num_bytes = 4;
447 
448    /* Determine UTF-8 bytes according to calculated num_bytes */
449    switch (num_bytes){
450    case 1:
451       retval = srcval;
452       break;
453 
454    case 2:
455       /* order of bytes left to right: b1, b2 */
456       b1  = 0xc0;
457       b1 |= srcval >> 6;
458 
459       b2  = 0x80;
460       b2 |= srcval & 0x3f;
461 
462       retval = (b1 << 8) | b2;
463       break;
464 
465    case 3:
466       /* order of bytes left to right: b1, b2, b3 */
467       b1  = 0xe0;
468       b1 |= srcval >> 12;
469 
470       b2  = 0x80;
471       b2 |= (srcval >> 6) & 0x3f;
472 
473       b3  = 0x80;
474       b3 |= srcval & 0x3f;
475 
476       retval = (b1 << 16) | (b2 << 8) | b3;
477       break;
478 
479    case 4: {
480       /* order of bytes left to right: b1, b2, b3, b4 */
481       UInt high_surrogate = srcval;
482       UInt uvwxy = ((high_surrogate >> 6) & 0xf) + 1;   // abcd + 1
483 
484       b1  = 0xf0;
485       b1 |= uvwxy >> 2;     // uvw
486 
487       b2  = 0x80;
488       b2 |= (uvwxy & 0x3) << 4;           // xy
489       b2 |= (high_surrogate >> 2) & 0xf;  // efgh
490 
491       b3  = 0x80;
492       b3 |= (high_surrogate & 0x3) << 4;   // ij
493       b3 |= (low_surrogate >> 6) & 0xf;    // klmn
494 
495       b4  = 0x80;
496       b4 |= low_surrogate & 0x3f;
497 
498       retval = (b1 << 24) | (b2 << 16) | (b3 << 8) | b4;
499 
500       invalid_low_surrogate = (low_surrogate & 0xfc00) != 0xdc00;
501       break;
502    }
503    }
504 
505    /* At this point RETVAL contains the converted bytes.
506       Build up the final return value. */
507    return (retval << 16) | (num_bytes << 8) | invalid_low_surrogate;
508 }
509 
510 
511 /*------------------------------------------------------------*/
512 /*--- Clean helper for CU24.                               ---*/
513 /*------------------------------------------------------------*/
514 
515 /* The function performs a CU24 operation. It returns two things
516    encoded in an ULong value:
517    - the 4 converted bytes
518    - an indication whether LOW_SURROGATE, if any, is invalid
519 
520    64     40                 8                       0
521     +------------------------+-----------------------+
522     |  0x0 | converted bytes | invalid_low_surrogate |
523     +------------------------+-----------------------+
524 */
525 ULong
s390_do_cu24(UInt srcval,UInt low_surrogate)526 s390_do_cu24(UInt srcval, UInt low_surrogate)
527 {
528    ULong retval;
529    UInt invalid_low_surrogate = 0;
530 
531    srcval &= 0xffff;
532 
533    if ((srcval >= 0x0000 && srcval <= 0xd7ff) ||
534        (srcval >= 0xdc00 && srcval <= 0xffff)) {
535       retval = srcval;
536    } else {
537       /* D800 - DBFF */
538       UInt high_surrogate = srcval;
539       UInt uvwxy  = ((high_surrogate >> 6) & 0xf) + 1;   // abcd + 1
540       UInt efghij = high_surrogate & 0x3f;
541       UInt klmnoprst = low_surrogate & 0x3ff;
542 
543       retval = (uvwxy << 16) | (efghij << 10) | klmnoprst;
544 
545       invalid_low_surrogate = (low_surrogate & 0xfc00) != 0xdc00;
546    }
547 
548    /* At this point RETVAL contains the converted bytes.
549       Build up the final return value. */
550    return (retval << 8) | invalid_low_surrogate;
551 }
552 
553 
554 /*------------------------------------------------------------*/
555 /*--- Clean helper for CU42.                               ---*/
556 /*------------------------------------------------------------*/
557 
558 /* The function performs a CU42 operation. It returns three things
559    encoded in an ULong value:
560    - the converted bytes (at most 4)
561    - the number of converted bytes (2 or 4; 0 if invalid character)
562    - an indication whether the UTF-32 character is invalid
563 
564    64      48                16           8                   0
565     +-------+-----------------+-----------+-------------------+
566     |  0x0  | converted bytes | num_bytes | invalid_character |
567     +-------+-----------------+-----------+-------------------+
568 */
569 ULong
s390_do_cu42(UInt srcval)570 s390_do_cu42(UInt srcval)
571 {
572    ULong retval;
573    UInt num_bytes, invalid_character = 0;
574 
575    if ((srcval >= 0x0000 && srcval <= 0xd7ff) ||
576        (srcval >= 0xdc00 && srcval <= 0xffff)) {
577       retval = srcval;
578       num_bytes = 2;
579    } else if (srcval >= 0x00010000 && srcval <= 0x0010FFFF) {
580       UInt uvwxy  = srcval >> 16;
581       UInt abcd   = (uvwxy - 1) & 0xf;
582       UInt efghij = (srcval >> 10) & 0x3f;
583 
584       UInt high_surrogate = (0xd8 << 8) | (abcd << 6) | efghij;
585       UInt low_surrogate  = (0xdc << 8) | (srcval & 0x3ff);
586 
587       retval = (high_surrogate << 16) | low_surrogate;
588       num_bytes = 4;
589    } else {
590       /* D800 - DBFF or 00110000 - FFFFFFFF */
591       invalid_character = 1;
592       retval = num_bytes = 0;   /* does not matter; not used */
593    }
594 
595    /* At this point RETVAL contains the converted bytes.
596       Build up the final return value. */
597    return (retval << 16) | (num_bytes << 8) | invalid_character;
598 }
599 
600 
601 /*------------------------------------------------------------*/
602 /*--- Clean helper for CU41.                               ---*/
603 /*------------------------------------------------------------*/
604 
605 /* The function performs a CU41 operation. It returns three things
606    encoded in an ULong value:
607    - the converted bytes (at most 4)
608    - the number of converted bytes (1, 2, 3, or 4; 0 if invalid character)
609    - an indication whether the UTF-32 character is invalid
610 
611    64      48                16           8                   0
612     +-------+-----------------+-----------+-------------------+
613     |  0x0  | converted bytes | num_bytes | invalid_character |
614     +-------+-----------------+-----------+-------------------+
615 */
616 ULong
s390_do_cu41(UInt srcval)617 s390_do_cu41(UInt srcval)
618 {
619    ULong retval;
620    UInt num_bytes, invalid_character = 0;
621 
622    if (srcval <= 0x7f) {
623       retval = srcval;
624       num_bytes = 1;
625    } else if (srcval >= 0x80 && srcval <= 0x7ff) {
626       UInt fghij  = srcval >> 6;
627       UInt klmnop = srcval & 0x3f;
628       UInt byte1  = (0xc0 | fghij);
629       UInt byte2  = (0x80 | klmnop);
630 
631       retval = (byte1 << 8) | byte2;
632       num_bytes = 2;
633    } else if ((srcval >= 0x800  && srcval <= 0xd7ff) ||
634               (srcval >= 0xdc00 && srcval <= 0xffff)) {
635       UInt abcd   = srcval >> 12;
636       UInt efghij = (srcval >> 6) & 0x3f;
637       UInt klmnop = srcval & 0x3f;
638       UInt byte1  = 0xe0 | abcd;
639       UInt byte2  = 0x80 | efghij;
640       UInt byte3  = 0x80 | klmnop;
641 
642       retval = (byte1 << 16) | (byte2 << 8) | byte3;
643       num_bytes = 3;
644    } else if (srcval >= 0x10000 && srcval <= 0x10ffff) {
645       UInt uvw    = (srcval >> 18) & 0x7;
646       UInt xy     = (srcval >> 16) & 0x3;
647       UInt efgh   = (srcval >> 12) & 0xf;
648       UInt ijklmn = (srcval >>  6) & 0x3f;
649       UInt opqrst = srcval & 0x3f;
650       UInt byte1  = 0xf0 | uvw;
651       UInt byte2  = 0x80 | (xy << 4) | efgh;
652       UInt byte3  = 0x80 | ijklmn;
653       UInt byte4  = 0x80 | opqrst;
654 
655       retval = (byte1 << 24) | (byte2 << 16) | (byte3 << 8) | byte4;
656       num_bytes = 4;
657    } else {
658       /* d800 ... dbff or 00110000 ... ffffffff */
659       invalid_character = 1;
660 
661       retval = 0;
662       num_bytes = 0;
663    }
664 
665    /* At this point RETVAL contains the converted bytes.
666       Build up the final return value. */
667    return (retval << 16) | (num_bytes << 8) | invalid_character;
668 }
669 
670 
671 /*------------------------------------------------------------*/
672 /*--- Clean helpers for CU12.                              ---*/
673 /*------------------------------------------------------------*/
674 
675 /* The function looks at the first byte of an UTF-8 character and returns
676    two things encoded in an ULong value:
677 
678    - the number of bytes that need to be read
679    - an indication whether the UTF-8 character is invalid
680 
681    64      16           8                   0
682     +-------------------+-------------------+
683     |  0x0  | num_bytes | invalid_character |
684     +-------+-----------+-------------------+
685 */
686 ULong
s390_do_cu12_cu14_helper1(UInt byte,UInt etf3_and_m3_is_1)687 s390_do_cu12_cu14_helper1(UInt byte, UInt etf3_and_m3_is_1)
688 {
689    vassert(byte <= 0xff);
690 
691    /* Check whether the character is invalid */
692    if (byte >= 0x80 && byte <= 0xbf) return 1;
693    if (byte >= 0xf8) return 1;
694 
695    if (etf3_and_m3_is_1) {
696       if (byte == 0xc0 || byte == 0xc1) return 1;
697       if (byte >= 0xf5 && byte <= 0xf7) return 1;
698    }
699 
700    /* Character is valid */
701    if (byte <= 0x7f) return 1 << 8;   // 1 byte
702    if (byte <= 0xdf) return 2 << 8;   // 2 bytes
703    if (byte <= 0xef) return 3 << 8;   // 3 bytes
704 
705    return 4 << 8;  // 4 bytes
706 }
707 
708 /* The function performs a CU12 or CU14 operation. BYTE1, BYTE2, etc are the
709    bytes as read from the input stream, left to right. BYTE1 is a valid
710    byte. The function returns three things encoded in an ULong value:
711 
712    - the converted bytes
713    - the number of converted bytes (2 or 4; 0 if invalid character)
714    - an indication whether the UTF-16 character is invalid
715 
716    64      48                16           8                   0
717     +-------+-----------------+-----------+-------------------+
718     |  0x0  | converted bytes | num_bytes | invalid_character |
719     +-------+-----------------+-----------+-------------------+
720 */
721 static ULong
s390_do_cu12_cu14_helper2(UInt byte1,UInt byte2,UInt byte3,UInt byte4,ULong stuff,Bool is_cu12)722 s390_do_cu12_cu14_helper2(UInt byte1, UInt byte2, UInt byte3, UInt byte4,
723                           ULong stuff, Bool is_cu12)
724 {
725    UInt num_src_bytes = stuff >> 1, etf3_and_m3_is_1 = stuff & 0x1;
726    UInt num_bytes = 0, invalid_character = 0;
727    ULong retval = 0;
728 
729    vassert(num_src_bytes <= 4);
730 
731    switch (num_src_bytes) {
732    case 1:
733       num_bytes = 2;
734       retval = byte1;
735       break;
736 
737    case 2: {
738       /* Test validity */
739       if (etf3_and_m3_is_1) {
740          if (byte2 < 0x80 || byte2 > 0xbf) {
741             invalid_character = 1;
742             break;
743          }
744       }
745 
746       /* OK */
747       UInt fghij  = byte1 & 0x1f;
748       UInt klmnop = byte2 & 0x3f;
749 
750       num_bytes = 2;
751       retval = (fghij << 6) | klmnop;
752       break;
753    }
754 
755    case 3: {
756       /* Test validity */
757       if (etf3_and_m3_is_1) {
758          if (byte1 == 0xe0) {
759             if ((byte2 < 0xa0 || byte2 > 0xbf) ||
760                 (byte3 < 0x80 || byte3 > 0xbf)) {
761                invalid_character = 1;
762                break;
763             }
764          }
765          if ((byte1 >= 0xe1 && byte1 <= 0xec) ||
766              byte1 == 0xee || byte1 == 0xef) {
767             if ((byte2 < 0x80 || byte2 > 0xbf) ||
768                 (byte3 < 0x80 || byte3 > 0xbf)) {
769                invalid_character = 1;
770                break;
771             }
772          }
773          if (byte1 == 0xed) {
774             if ((byte2 < 0x80 || byte2 > 0x9f) ||
775                 (byte3 < 0x80 || byte3 > 0xbf)) {
776                invalid_character = 1;
777                break;
778             }
779          }
780       }
781 
782       /* OK */
783       UInt abcd   = byte1 & 0xf;
784       UInt efghij = byte2 & 0x3f;
785       UInt klmnop = byte3 & 0x3f;
786 
787       num_bytes = 2;
788       retval = (abcd << 12) | (efghij << 6) | klmnop;
789       break;
790    }
791 
792    case 4: {
793       /* Test validity */
794       if (etf3_and_m3_is_1) {
795          if (byte1 == 0xf0) {
796             if ((byte2 < 0x90 || byte2 > 0xbf) ||
797                 (byte3 < 0x80 || byte3 > 0xbf) ||
798                 (byte4 < 0x80 || byte4 > 0xbf)) {
799                invalid_character = 1;
800                break;
801             }
802          }
803          if (byte1 == 0xf1 || byte1 == 0xf2 || byte1 == 0xf3) {
804             if ((byte2 < 0x80 || byte2 > 0xbf) ||
805                 (byte3 < 0x80 || byte3 > 0xbf) ||
806                 (byte4 < 0x80 || byte4 > 0xbf)) {
807                invalid_character = 1;
808                break;
809             }
810          }
811          if (byte1 == 0xf4) {
812             if ((byte2 < 0x80 || byte2 > 0x8f) ||
813                 (byte3 < 0x80 || byte3 > 0xbf) ||
814                 (byte4 < 0x80 || byte4 > 0xbf)) {
815                invalid_character = 1;
816                break;
817             }
818          }
819       }
820 
821       /* OK */
822       UInt uvw    = byte1 & 0x7;
823       UInt xy     = (byte2 >> 4) & 0x3;
824       UInt uvwxy  = (uvw << 2) | xy;
825       UInt efgh   = byte2 & 0xf;
826       UInt ij     = (byte3 >> 4) & 0x3;
827       UInt klmn   = byte3 & 0xf;
828       UInt opqrst = byte4 & 0x3f;
829 
830       if (is_cu12) {
831          UInt abcd = (uvwxy - 1) & 0xf;
832          UInt high_surrogate = (0xd8 << 8) | (abcd << 6) | (efgh << 2) | ij;
833          UInt low_surrogate  = (0xdc << 8) | (klmn << 6) | opqrst;
834 
835          num_bytes = 4;
836          retval = (high_surrogate << 16) | low_surrogate;
837       } else {
838          num_bytes = 4;
839          retval =
840             (uvwxy << 16) | (efgh << 12) | (ij << 10) | (klmn << 6) | opqrst;
841       }
842       break;
843    }
844    }
845 
846    if (! is_cu12) num_bytes = 4;   // for CU14, by definition
847 
848    /* At this point RETVAL contains the converted bytes.
849       Build up the final return value. */
850    return (retval << 16) | (num_bytes << 8) | invalid_character;
851 }
852 
853 ULong
s390_do_cu12_helper2(UInt byte1,UInt byte2,UInt byte3,UInt byte4,ULong stuff)854 s390_do_cu12_helper2(UInt byte1, UInt byte2, UInt byte3, UInt byte4,
855                      ULong stuff)
856 {
857    return s390_do_cu12_cu14_helper2(byte1, byte2, byte3, byte4, stuff,
858                                     /* is_cu12 = */ 1);
859 }
860 
861 ULong
s390_do_cu14_helper2(UInt byte1,UInt byte2,UInt byte3,UInt byte4,ULong stuff)862 s390_do_cu14_helper2(UInt byte1, UInt byte2, UInt byte3, UInt byte4,
863                      ULong stuff)
864 {
865    return s390_do_cu12_cu14_helper2(byte1, byte2, byte3, byte4, stuff,
866                                     /* is_cu12 = */ 0);
867 }
868 
869 
870 /*------------------------------------------------------------*/
871 /*--- Clean helper for "convert to binary".                ---*/
872 /*------------------------------------------------------------*/
873 #if defined(VGA_s390x)
874 UInt
s390_do_cvb(ULong decimal)875 s390_do_cvb(ULong decimal)
876 {
877    UInt binary;
878 
879    __asm__ volatile (
880         "cvb %[result],%[input]\n\t"
881           : [result] "=d"(binary)
882           : [input] "m"(decimal)
883    );
884 
885    return binary;
886 }
887 
888 #else
s390_do_cvb(ULong decimal)889 UInt s390_do_cvb(ULong decimal) { return 0; }
890 #endif
891 
892 
893 /*------------------------------------------------------------*/
894 /*--- Clean helper for "convert to decimal".                ---*/
895 /*------------------------------------------------------------*/
896 #if defined(VGA_s390x)
897 ULong
s390_do_cvd(ULong binary_in)898 s390_do_cvd(ULong binary_in)
899 {
900    UInt binary = binary_in & 0xffffffffULL;
901    ULong decimal;
902 
903    __asm__ volatile (
904         "cvd %[input],%[result]\n\t"
905           : [result] "=m"(decimal)
906           : [input] "d"(binary)
907    );
908 
909    return decimal;
910 }
911 
912 #else
s390_do_cvd(ULong binary)913 ULong s390_do_cvd(ULong binary) { return 0; }
914 #endif
915 
916 /*------------------------------------------------------------*/
917 /*--- Clean helper for "Extract cache attribute".          ---*/
918 /*------------------------------------------------------------*/
919 #if defined(VGA_s390x)
920 ULong
s390_do_ecag(ULong op2addr)921 s390_do_ecag(ULong op2addr)
922 {
923    ULong result;
924 
925    __asm__ volatile(".insn rsy,0xEB000000004C,%[out],0,0(%[in])\n\t"
926                     : [out] "=d"(result)
927                     : [in] "d"(op2addr));
928    return result;
929 }
930 
931 #else
s390_do_ecag(ULong op2addr)932 ULong s390_do_ecag(ULong op2addr) { return 0; }
933 #endif
934 
935 /*------------------------------------------------------------*/
936 /*--- Clean helper for "Perform Floating Point Operation". ---*/
937 /*------------------------------------------------------------*/
938 #if defined(VGA_s390x)
939 UInt
s390_do_pfpo(UInt gpr0)940 s390_do_pfpo(UInt gpr0)
941 {
942    UChar rm;
943    UChar op1_ty, op2_ty;
944 
945    rm  = gpr0 & 0xf;
946    if (rm > 1 && rm < 8)
947       return EmFail_S390X_invalid_PFPO_rounding_mode;
948 
949    op1_ty = (gpr0 >> 16) & 0xff; // gpr0[40:47]
950    op2_ty = (gpr0 >> 8)  & 0xff; // gpr0[48:55]
951    /* Operand type must be BFP 32, 64, 128 or DFP 32, 64, 128
952       which correspond to 0x5, 0x6, 0x7, 0x8, 0x9, 0xa respectively.
953       Any other operand type value is unsupported */
954    if ((op1_ty == op2_ty) ||
955        (op1_ty < 0x5 || op1_ty > 0xa) ||
956        (op2_ty < 0x5 || op2_ty > 0xa))
957       return EmFail_S390X_invalid_PFPO_function;
958 
959    return EmNote_NONE;
960 }
961 #else
s390_do_pfpo(UInt gpr0)962 UInt s390_do_pfpo(UInt gpr0) { return 0; }
963 #endif
964 
965 /*------------------------------------------------------------*/
966 /*--- Helper for condition code.                           ---*/
967 /*------------------------------------------------------------*/
968 
969 /* Convert an IRRoundingMode value to s390_bfp_round_t */
970 #if defined(VGA_s390x)
971 static s390_bfp_round_t
decode_bfp_rounding_mode(UInt irrm)972 decode_bfp_rounding_mode(UInt irrm)
973 {
974    switch (irrm) {
975    case Irrm_NEAREST: return S390_BFP_ROUND_NEAREST_EVEN;
976    case Irrm_NegINF:  return S390_BFP_ROUND_NEGINF;
977    case Irrm_PosINF:  return S390_BFP_ROUND_POSINF;
978    case Irrm_ZERO:    return S390_BFP_ROUND_ZERO;
979    }
980    vpanic("decode_bfp_rounding_mode");
981 }
982 #endif
983 
984 
985 #define S390_CC_FOR_BINARY(opcode,cc_dep1,cc_dep2) \
986 ({ \
987    __asm__ volatile ( \
988         opcode " %[op1],%[op2]\n\t" \
989         "ipm %[psw]\n\t"           : [psw] "=d"(psw), [op1] "+d"(cc_dep1) \
990                                    : [op2] "d"(cc_dep2) \
991                                    : "cc");\
992    psw >> 28;   /* cc */ \
993 })
994 
995 #define S390_CC_FOR_TERNARY_SUBB(opcode,cc_dep1,cc_dep2,cc_ndep) \
996 ({ \
997    /* Recover the original DEP2 value. See comment near s390_cc_thunk_put3 \
998       for rationale. */ \
999    cc_dep2 = cc_dep2 ^ cc_ndep; \
1000    __asm__ volatile ( \
1001 	"lghi 0,1\n\t" \
1002 	"sr 0,%[op3]\n\t" /* borrow to cc */ \
1003         opcode " %[op1],%[op2]\n\t" /* then redo the op */\
1004         "ipm %[psw]\n\t"           : [psw] "=d"(psw), [op1] "+&d"(cc_dep1) \
1005                                    : [op2] "d"(cc_dep2), [op3] "d"(cc_ndep) \
1006                                    : "0", "cc");\
1007    psw >> 28;   /* cc */ \
1008 })
1009 
1010 #define S390_CC_FOR_TERNARY_ADDC(opcode,cc_dep1,cc_dep2,cc_ndep) \
1011 ({ \
1012    /* Recover the original DEP2 value. See comment near s390_cc_thunk_put3 \
1013       for rationale. */ \
1014    cc_dep2 = cc_dep2 ^ cc_ndep; \
1015    __asm__ volatile ( \
1016 	"lgfr 0,%[op3]\n\t" /* first load cc_ndep */ \
1017 	"aghi 0,0\n\t" /* and convert it into a cc */ \
1018         opcode " %[op1],%[op2]\n\t" /* then redo the op */\
1019         "ipm %[psw]\n\t"           : [psw] "=d"(psw), [op1] "+&d"(cc_dep1) \
1020                                    : [op2] "d"(cc_dep2), [op3] "d"(cc_ndep) \
1021                                    : "0", "cc");\
1022    psw >> 28;   /* cc */ \
1023 })
1024 
1025 
1026 #define S390_CC_FOR_BFP_RESULT(opcode,cc_dep1) \
1027 ({ \
1028    __asm__ volatile ( \
1029         opcode " 0,%[op]\n\t" \
1030         "ipm %[psw]\n\t"           : [psw] "=d"(psw) \
1031                                    : [op]  "f"(cc_dep1) \
1032                                    : "cc", "f0");\
1033    psw >> 28;   /* cc */ \
1034 })
1035 
1036 #define S390_CC_FOR_BFP128_RESULT(hi,lo) \
1037 ({ \
1038    __asm__ volatile ( \
1039         "ldr   4,%[high]\n\t" \
1040         "ldr   6,%[low]\n\t" \
1041         "ltxbr 0,4\n\t" \
1042         "ipm %[psw]\n\t"           : [psw] "=d"(psw) \
1043                                    : [high] "f"(hi), [low] "f"(lo) \
1044                                    : "cc", "f0", "f2", "f4", "f6");\
1045    psw >> 28;   /* cc */ \
1046 })
1047 
1048 #define S390_CC_FOR_BFP_CONVERT_AUX(opcode,cc_dep1,rounding_mode) \
1049 ({ \
1050    __asm__ volatile ( \
1051         opcode " 0," #rounding_mode ",%[op]\n\t" \
1052         "ipm %[psw]\n\t"           : [psw] "=d"(psw) \
1053                                    : [op]  "f"(cc_dep1) \
1054                                    : "cc", "r0");\
1055    psw >> 28;   /* cc */ \
1056 })
1057 
1058 #define S390_CC_FOR_BFP_CONVERT(opcode,cc_dep1,cc_dep2)   \
1059 ({                                                        \
1060    UInt cc;                                               \
1061    switch (decode_bfp_rounding_mode(cc_dep2)) {           \
1062    case S390_BFP_ROUND_NEAREST_EVEN:                      \
1063       cc = S390_CC_FOR_BFP_CONVERT_AUX(opcode,cc_dep1,4); \
1064       break;                                              \
1065    case S390_BFP_ROUND_ZERO:                              \
1066       cc = S390_CC_FOR_BFP_CONVERT_AUX(opcode,cc_dep1,5); \
1067       break;                                              \
1068    case S390_BFP_ROUND_POSINF:                            \
1069       cc = S390_CC_FOR_BFP_CONVERT_AUX(opcode,cc_dep1,6); \
1070       break;                                              \
1071    case S390_BFP_ROUND_NEGINF:                            \
1072       cc = S390_CC_FOR_BFP_CONVERT_AUX(opcode,cc_dep1,7); \
1073       break;                                              \
1074    default:                                               \
1075       vpanic("unexpected bfp rounding mode");             \
1076    }                                                      \
1077    cc;                                                    \
1078 })
1079 
1080 #define S390_CC_FOR_BFP_UCONVERT_AUX(opcode,cc_dep1,rounding_mode) \
1081 ({ \
1082    __asm__ volatile ( \
1083         opcode ",0,%[op]," #rounding_mode ",0\n\t" \
1084         "ipm %[psw]\n\t"           : [psw] "=d"(psw) \
1085                                    : [op]  "f"(cc_dep1) \
1086                                    : "cc", "r0");\
1087    psw >> 28;   /* cc */ \
1088 })
1089 
1090 #define S390_CC_FOR_BFP_UCONVERT(opcode,cc_dep1,cc_dep2)   \
1091 ({                                                         \
1092    UInt cc;                                                \
1093    switch (decode_bfp_rounding_mode(cc_dep2)) {            \
1094    case S390_BFP_ROUND_NEAREST_EVEN:                       \
1095       cc = S390_CC_FOR_BFP_UCONVERT_AUX(opcode,cc_dep1,4); \
1096       break;                                               \
1097    case S390_BFP_ROUND_ZERO:                               \
1098       cc = S390_CC_FOR_BFP_UCONVERT_AUX(opcode,cc_dep1,5); \
1099       break;                                               \
1100    case S390_BFP_ROUND_POSINF:                             \
1101       cc = S390_CC_FOR_BFP_UCONVERT_AUX(opcode,cc_dep1,6); \
1102       break;                                               \
1103    case S390_BFP_ROUND_NEGINF:                             \
1104       cc = S390_CC_FOR_BFP_UCONVERT_AUX(opcode,cc_dep1,7); \
1105       break;                                               \
1106    default:                                                \
1107       vpanic("unexpected bfp rounding mode");              \
1108    }                                                       \
1109    cc;                                                     \
1110 })
1111 
1112 #define S390_CC_FOR_BFP128_CONVERT_AUX(opcode,hi,lo,rounding_mode) \
1113 ({ \
1114    __asm__ volatile ( \
1115         "ldr   4,%[high]\n\t" \
1116         "ldr   6,%[low]\n\t" \
1117         opcode " 0," #rounding_mode ",4\n\t" \
1118         "ipm %[psw]\n\t"           : [psw] "=d"(psw) \
1119                                    : [high] "f"(hi), [low] "f"(lo) \
1120                                    : "cc", "r0", "f4", "f6");\
1121    psw >> 28;   /* cc */ \
1122 })
1123 
1124 #define S390_CC_FOR_BFP128_CONVERT(opcode,cc_dep1,cc_dep2,cc_ndep)   \
1125 ({                                                                   \
1126    UInt cc;                                                          \
1127    /* Recover the original DEP2 value. See comment near              \
1128       s390_cc_thunk_put3 for rationale. */                           \
1129    cc_dep2 = cc_dep2 ^ cc_ndep;                                      \
1130    switch (decode_bfp_rounding_mode(cc_ndep)) {                      \
1131    case S390_BFP_ROUND_NEAREST_EVEN:                                 \
1132       cc = S390_CC_FOR_BFP128_CONVERT_AUX(opcode,cc_dep1,cc_dep2,4); \
1133       break;                                                         \
1134    case S390_BFP_ROUND_ZERO:                                         \
1135       cc = S390_CC_FOR_BFP128_CONVERT_AUX(opcode,cc_dep1,cc_dep2,5); \
1136       break;                                                         \
1137    case S390_BFP_ROUND_POSINF:                                       \
1138       cc = S390_CC_FOR_BFP128_CONVERT_AUX(opcode,cc_dep1,cc_dep2,6); \
1139       break;                                                         \
1140    case S390_BFP_ROUND_NEGINF:                                       \
1141       cc = S390_CC_FOR_BFP128_CONVERT_AUX(opcode,cc_dep1,cc_dep2,7); \
1142       break;                                                         \
1143    default:                                                          \
1144       vpanic("unexpected bfp rounding mode");                        \
1145    }                                                                 \
1146    cc;                                                               \
1147 })
1148 
1149 #define S390_CC_FOR_BFP128_UCONVERT_AUX(opcode,hi,lo,rounding_mode) \
1150 ({ \
1151    __asm__ volatile ( \
1152         "ldr   4,%[high]\n\t" \
1153         "ldr   6,%[low]\n\t" \
1154         opcode ",0,4," #rounding_mode ",0\n\t" \
1155         "ipm %[psw]\n\t"           : [psw] "=d"(psw) \
1156                                    : [high] "f"(hi), [low] "f"(lo) \
1157                                    : "cc", "r0", "f4", "f6");\
1158    psw >> 28;   /* cc */ \
1159 })
1160 
1161 #define S390_CC_FOR_BFP128_UCONVERT(opcode,cc_dep1,cc_dep2,cc_ndep)   \
1162 ({                                                                    \
1163    UInt cc;                                                           \
1164    /* Recover the original DEP2 value. See comment near               \
1165       s390_cc_thunk_put3 for rationale. */                            \
1166    cc_dep2 = cc_dep2 ^ cc_ndep;                                       \
1167    switch (decode_bfp_rounding_mode(cc_ndep)) {                       \
1168    case S390_BFP_ROUND_NEAREST_EVEN:                                  \
1169       cc = S390_CC_FOR_BFP128_UCONVERT_AUX(opcode,cc_dep1,cc_dep2,4); \
1170       break;                                                          \
1171    case S390_BFP_ROUND_ZERO:                                          \
1172       cc = S390_CC_FOR_BFP128_UCONVERT_AUX(opcode,cc_dep1,cc_dep2,5); \
1173       break;                                                          \
1174    case S390_BFP_ROUND_POSINF:                                        \
1175       cc = S390_CC_FOR_BFP128_UCONVERT_AUX(opcode,cc_dep1,cc_dep2,6); \
1176       break;                                                          \
1177    case S390_BFP_ROUND_NEGINF:                                        \
1178       cc = S390_CC_FOR_BFP128_UCONVERT_AUX(opcode,cc_dep1,cc_dep2,7); \
1179       break;                                                          \
1180    default:                                                           \
1181       vpanic("unexpected bfp rounding mode");                         \
1182    }                                                                  \
1183    cc;                                                                \
1184 })
1185 
1186 #define S390_CC_FOR_BFP_TDC(opcode,cc_dep1,cc_dep2) \
1187 ({ \
1188    __asm__ volatile ( \
1189         opcode " %[value],0(%[class])\n\t" \
1190         "ipm %[psw]\n\t"           : [psw] "=d"(psw) \
1191                                    : [value] "f"(cc_dep1), \
1192                                      [class] "a"(cc_dep2)  \
1193                                    : "cc");\
1194    psw >> 28;   /* cc */ \
1195 })
1196 
1197 #define S390_CC_FOR_BFP128_TDC(cc_dep1,cc_dep2,cc_ndep) \
1198 ({ \
1199    /* Recover the original DEP2 value. See comment near \
1200       s390_cc_thunk_put1f128Z for rationale. */ \
1201    cc_dep2 = cc_dep2 ^ cc_ndep; \
1202    __asm__ volatile ( \
1203         "ldr  4,%[high]\n\t" \
1204         "ldr  6,%[low]\n\t" \
1205         "tcxb 4,0(%[class])\n\t" \
1206         "ipm  %[psw]\n\t"          : [psw] "=d"(psw) \
1207                                    : [high] "f"(cc_dep1), [low] "f"(cc_dep2), \
1208                                      [class] "a"(cc_ndep)  \
1209                                    : "cc", "f4", "f6");\
1210    psw >> 28;   /* cc */ \
1211 })
1212 
1213 /* Convert an IRRoundingMode value to s390_dfp_round_t */
1214 #if defined(VGA_s390x)
1215 static s390_dfp_round_t
decode_dfp_rounding_mode(UInt irrm)1216 decode_dfp_rounding_mode(UInt irrm)
1217 {
1218    switch (irrm) {
1219    case Irrm_NEAREST:
1220       return S390_DFP_ROUND_NEAREST_EVEN_4;
1221    case Irrm_NegINF:
1222       return S390_DFP_ROUND_NEGINF_7;
1223    case Irrm_PosINF:
1224       return S390_DFP_ROUND_POSINF_6;
1225    case Irrm_ZERO:
1226       return S390_DFP_ROUND_ZERO_5;
1227    case Irrm_NEAREST_TIE_AWAY_0:
1228       return S390_DFP_ROUND_NEAREST_TIE_AWAY_0_1;
1229    case Irrm_PREPARE_SHORTER:
1230       return S390_DFP_ROUND_PREPARE_SHORT_3;
1231    case Irrm_AWAY_FROM_ZERO:
1232       return S390_DFP_ROUND_AWAY_0;
1233    case Irrm_NEAREST_TIE_TOWARD_0:
1234       return S390_DFP_ROUND_NEAREST_TIE_TOWARD_0;
1235    }
1236    vpanic("decode_dfp_rounding_mode");
1237 }
1238 #endif
1239 
1240 #define S390_CC_FOR_DFP_RESULT(cc_dep1) \
1241 ({ \
1242    __asm__ volatile ( \
1243         ".insn rre, 0xb3d60000,0,%[op]\n\t"              /* LTDTR */ \
1244         "ipm %[psw]\n\t"           : [psw] "=d"(psw) \
1245                                    : [op]  "f"(cc_dep1) \
1246                                    : "cc", "f0"); \
1247    psw >> 28;   /* cc */ \
1248 })
1249 
1250 #define S390_CC_FOR_DFP128_RESULT(hi,lo) \
1251 ({ \
1252    __asm__ volatile ( \
1253         "ldr   4,%[high]\n\t"                                           \
1254         "ldr   6,%[low]\n\t"                                            \
1255         ".insn rre, 0xb3de0000,0,4\n\t"    /* LTXTR */                  \
1256         "ipm %[psw]\n\t"           : [psw] "=d"(psw)                    \
1257                                    : [high] "f"(hi), [low] "f"(lo)      \
1258                                    : "cc", "f0", "f2", "f4", "f6");     \
1259    psw >> 28;   /* cc */                                                \
1260 })
1261 
1262 #define S390_CC_FOR_DFP_TD(opcode,cc_dep1,cc_dep2)                      \
1263 ({                                                                      \
1264    __asm__ volatile (                                                   \
1265         opcode ",%[value],0(%[class])\n\t"                              \
1266         "ipm %[psw]\n\t"           : [psw] "=d"(psw)                    \
1267                                    : [value] "f"(cc_dep1),              \
1268                                      [class] "a"(cc_dep2)               \
1269                                    : "cc");                             \
1270    psw >> 28;   /* cc */                                                \
1271 })
1272 
1273 #define S390_CC_FOR_DFP128_TD(opcode,cc_dep1,cc_dep2,cc_ndep)           \
1274 ({                                                                      \
1275    /* Recover the original DEP2 value. See comment near                 \
1276       s390_cc_thunk_put1d128Z for rationale. */                         \
1277    cc_dep2 = cc_dep2 ^ cc_ndep;                                         \
1278    __asm__ volatile (                                                   \
1279         "ldr  4,%[high]\n\t"                                            \
1280         "ldr  6,%[low]\n\t"                                             \
1281         opcode ",4,0(%[class])\n\t"                                     \
1282         "ipm  %[psw]\n\t"          : [psw] "=d"(psw)                    \
1283                                    : [high] "f"(cc_dep1), [low] "f"(cc_dep2), \
1284                                      [class] "a"(cc_ndep)               \
1285                                    : "cc", "f4", "f6");                 \
1286    psw >> 28;   /* cc */                                                \
1287 })
1288 
1289 #define S390_CC_FOR_DFP_CONVERT_AUX(opcode,cc_dep1,rounding_mode)       \
1290    ({                                                                   \
1291       __asm__ volatile (                                                \
1292                         opcode ",0,%[op]," #rounding_mode ",0\n\t"      \
1293                         "ipm %[psw]\n\t"           : [psw] "=d"(psw)    \
1294                         : [op] "f"(cc_dep1)                             \
1295                         : "cc", "r0");                                  \
1296       psw >> 28;   /* cc */                                             \
1297    })
1298 
1299 #define S390_CC_FOR_DFP_CONVERT(opcode,cc_dep1,cc_dep2)                 \
1300    ({                                                                   \
1301       UInt cc;                                                          \
1302       switch (decode_dfp_rounding_mode(cc_dep2)) {                      \
1303       case S390_DFP_ROUND_NEAREST_TIE_AWAY_0_1:                         \
1304       case S390_DFP_ROUND_NEAREST_TIE_AWAY_0_12:                        \
1305          cc = S390_CC_FOR_DFP_CONVERT_AUX(opcode,cc_dep1,1);            \
1306          break;                                                         \
1307       case S390_DFP_ROUND_PREPARE_SHORT_3:                              \
1308       case S390_DFP_ROUND_PREPARE_SHORT_15:                             \
1309          cc = S390_CC_FOR_DFP_CONVERT_AUX(opcode,cc_dep1,3);            \
1310          break;                                                         \
1311       case S390_DFP_ROUND_NEAREST_EVEN_4:                               \
1312       case S390_DFP_ROUND_NEAREST_EVEN_8:                               \
1313          cc = S390_CC_FOR_DFP_CONVERT_AUX(opcode,cc_dep1,4);            \
1314          break;                                                         \
1315       case S390_DFP_ROUND_ZERO_5:                                       \
1316       case S390_DFP_ROUND_ZERO_9:                                       \
1317          cc = S390_CC_FOR_DFP_CONVERT_AUX(opcode,cc_dep1,5);            \
1318          break;                                                         \
1319       case S390_DFP_ROUND_POSINF_6:                                     \
1320       case S390_DFP_ROUND_POSINF_10:                                    \
1321          cc = S390_CC_FOR_DFP_CONVERT_AUX(opcode,cc_dep1,6);            \
1322          break;                                                         \
1323       case S390_DFP_ROUND_NEGINF_7:                                     \
1324       case S390_DFP_ROUND_NEGINF_11:                                    \
1325          cc = S390_CC_FOR_DFP_CONVERT_AUX(opcode,cc_dep1,7);            \
1326          break;                                                         \
1327       case S390_DFP_ROUND_NEAREST_TIE_TOWARD_0:                         \
1328          cc = S390_CC_FOR_DFP_CONVERT_AUX(opcode,cc_dep1,13);           \
1329          break;                                                         \
1330       case S390_DFP_ROUND_AWAY_0:                                       \
1331          cc = S390_CC_FOR_DFP_CONVERT_AUX(opcode,cc_dep1,14);           \
1332          break;                                                         \
1333       default:                                                          \
1334          vpanic("unexpected dfp rounding mode");                        \
1335       }                                                                 \
1336       cc;                                                               \
1337    })
1338 
1339 #define S390_CC_FOR_DFP_UCONVERT_AUX(opcode,cc_dep1,rounding_mode)      \
1340    ({                                                                   \
1341       __asm__ volatile (                                                \
1342                         opcode ",0,%[op]," #rounding_mode ",0\n\t"      \
1343                         "ipm %[psw]\n\t"           : [psw] "=d"(psw)    \
1344                         : [op] "f"(cc_dep1)                             \
1345                         : "cc", "r0");                                  \
1346       psw >> 28;   /* cc */                                             \
1347    })
1348 
1349 #define S390_CC_FOR_DFP_UCONVERT(opcode,cc_dep1,cc_dep2)                \
1350    ({                                                                   \
1351       UInt cc;                                                          \
1352       switch (decode_dfp_rounding_mode(cc_dep2)) {                      \
1353       case S390_DFP_ROUND_NEAREST_TIE_AWAY_0_1:                         \
1354       case S390_DFP_ROUND_NEAREST_TIE_AWAY_0_12:                        \
1355          cc = S390_CC_FOR_DFP_UCONVERT_AUX(opcode,cc_dep1,1);           \
1356          break;                                                         \
1357       case S390_DFP_ROUND_PREPARE_SHORT_3:                              \
1358       case S390_DFP_ROUND_PREPARE_SHORT_15:                             \
1359          cc = S390_CC_FOR_DFP_UCONVERT_AUX(opcode,cc_dep1,3);           \
1360          break;                                                         \
1361       case S390_DFP_ROUND_NEAREST_EVEN_4:                               \
1362       case S390_DFP_ROUND_NEAREST_EVEN_8:                               \
1363          cc = S390_CC_FOR_DFP_UCONVERT_AUX(opcode,cc_dep1,4);           \
1364          break;                                                         \
1365       case S390_DFP_ROUND_ZERO_5:                                       \
1366       case S390_DFP_ROUND_ZERO_9:                                       \
1367          cc = S390_CC_FOR_DFP_UCONVERT_AUX(opcode,cc_dep1,5);           \
1368          break;                                                         \
1369       case S390_DFP_ROUND_POSINF_6:                                     \
1370       case S390_DFP_ROUND_POSINF_10:                                    \
1371          cc = S390_CC_FOR_DFP_UCONVERT_AUX(opcode,cc_dep1,6);           \
1372          break;                                                         \
1373       case S390_DFP_ROUND_NEGINF_7:                                     \
1374       case S390_DFP_ROUND_NEGINF_11:                                    \
1375          cc = S390_CC_FOR_DFP_UCONVERT_AUX(opcode,cc_dep1,7);           \
1376          break;                                                         \
1377       case S390_DFP_ROUND_NEAREST_TIE_TOWARD_0:                         \
1378          cc = S390_CC_FOR_DFP_UCONVERT_AUX(opcode,cc_dep1,13);          \
1379          break;                                                         \
1380       case S390_DFP_ROUND_AWAY_0:                                       \
1381          cc = S390_CC_FOR_DFP_UCONVERT_AUX(opcode,cc_dep1,14);          \
1382          break;                                                         \
1383       default:                                                          \
1384          vpanic("unexpected dfp rounding mode");                        \
1385       }                                                                 \
1386       cc;                                                               \
1387    })
1388 
1389 #define S390_CC_FOR_DFP128_CONVERT_AUX(opcode,hi,lo,rounding_mode)      \
1390    ({                                                                   \
1391       __asm__ volatile (                                                \
1392                         "ldr   4,%[high]\n\t"                           \
1393                         "ldr   6,%[low]\n\t"                            \
1394                         opcode ",0,4," #rounding_mode ",0\n\t"          \
1395                         "ipm %[psw]\n\t"           : [psw] "=d"(psw)    \
1396                         : [high] "f"(hi), [low] "f"(lo)                 \
1397                         : "cc", "r0", "f4", "f6");                      \
1398       psw >> 28;   /* cc */                                             \
1399    })
1400 
1401 #define S390_CC_FOR_DFP128_CONVERT(opcode,cc_dep1,cc_dep2,cc_ndep)       \
1402    ({                                                                    \
1403       UInt cc;                                                           \
1404       /* Recover the original DEP2 value. See comment near               \
1405          s390_cc_thunk_put3 for rationale. */                            \
1406       cc_dep2 = cc_dep2 ^ cc_ndep;                                       \
1407       switch (decode_dfp_rounding_mode(cc_ndep)) {                       \
1408       case S390_DFP_ROUND_NEAREST_TIE_AWAY_0_1:                          \
1409       case S390_DFP_ROUND_NEAREST_TIE_AWAY_0_12:                         \
1410          cc = S390_CC_FOR_DFP128_CONVERT_AUX(opcode,cc_dep1,cc_dep2,1);  \
1411          break;                                                          \
1412       case S390_DFP_ROUND_PREPARE_SHORT_3:                               \
1413       case S390_DFP_ROUND_PREPARE_SHORT_15:                              \
1414          cc = S390_CC_FOR_DFP128_CONVERT_AUX(opcode,cc_dep1,cc_dep2,3);  \
1415          break;                                                          \
1416       case S390_DFP_ROUND_NEAREST_EVEN_4:                                \
1417       case S390_DFP_ROUND_NEAREST_EVEN_8:                                \
1418          cc = S390_CC_FOR_DFP128_CONVERT_AUX(opcode,cc_dep1,cc_dep2,4);  \
1419          break;                                                          \
1420       case S390_DFP_ROUND_ZERO_5:                                        \
1421       case S390_DFP_ROUND_ZERO_9:                                        \
1422          cc = S390_CC_FOR_DFP128_CONVERT_AUX(opcode,cc_dep1,cc_dep2,5);  \
1423          break;                                                          \
1424       case S390_DFP_ROUND_POSINF_6:                                      \
1425       case S390_DFP_ROUND_POSINF_10:                                     \
1426          cc = S390_CC_FOR_DFP128_CONVERT_AUX(opcode,cc_dep1,cc_dep2,6);  \
1427          break;                                                          \
1428       case S390_DFP_ROUND_NEGINF_7:                                      \
1429       case S390_DFP_ROUND_NEGINF_11:                                     \
1430          cc = S390_CC_FOR_DFP128_CONVERT_AUX(opcode,cc_dep1,cc_dep2,7);  \
1431          break;                                                          \
1432       case S390_DFP_ROUND_NEAREST_TIE_TOWARD_0:                          \
1433          cc = S390_CC_FOR_DFP128_CONVERT_AUX(opcode,cc_dep1,cc_dep2,13); \
1434          break;                                                          \
1435       case S390_DFP_ROUND_AWAY_0:                                        \
1436          cc = S390_CC_FOR_DFP128_CONVERT_AUX(opcode,cc_dep1,cc_dep2,14); \
1437          break;                                                          \
1438       default:                                                           \
1439          vpanic("unexpected dfp rounding mode");                         \
1440       }                                                                  \
1441       cc;                                                                \
1442    })
1443 
1444 #define S390_CC_FOR_DFP128_UCONVERT_AUX(opcode,hi,lo,rounding_mode)      \
1445    ({                                                                    \
1446       __asm__ volatile (                                                 \
1447                         "ldr   4,%[high]\n\t"                            \
1448                         "ldr   6,%[low]\n\t"                             \
1449                         opcode ",0,4," #rounding_mode ",0\n\t"           \
1450                         "ipm %[psw]\n\t"           : [psw] "=d"(psw)     \
1451                         : [high] "f"(hi), [low] "f"(lo)                  \
1452                         : "cc", "r0", "f4", "f6");                       \
1453       psw >> 28;   /* cc */                                              \
1454    })
1455 
1456 #define S390_CC_FOR_DFP128_UCONVERT(opcode,cc_dep1,cc_dep2,cc_ndep)       \
1457    ({                                                                     \
1458       UInt cc;                                                            \
1459       /* Recover the original DEP2 value. See comment near                \
1460          s390_cc_thunk_put3 for rationale. */                             \
1461       cc_dep2 = cc_dep2 ^ cc_ndep;                                        \
1462       switch (decode_dfp_rounding_mode(cc_ndep)) {                        \
1463       case S390_DFP_ROUND_NEAREST_TIE_AWAY_0_1:                           \
1464       case S390_DFP_ROUND_NEAREST_TIE_AWAY_0_12:                          \
1465          cc = S390_CC_FOR_DFP128_UCONVERT_AUX(opcode,cc_dep1,cc_dep2,1);  \
1466          break;                                                           \
1467       case S390_DFP_ROUND_PREPARE_SHORT_3:                                \
1468       case S390_DFP_ROUND_PREPARE_SHORT_15:                               \
1469          cc = S390_CC_FOR_DFP128_UCONVERT_AUX(opcode,cc_dep1,cc_dep2,3);  \
1470          break;                                                           \
1471       case S390_DFP_ROUND_NEAREST_EVEN_4:                                 \
1472       case S390_DFP_ROUND_NEAREST_EVEN_8:                                 \
1473          cc = S390_CC_FOR_DFP128_UCONVERT_AUX(opcode,cc_dep1,cc_dep2,4);  \
1474          break;                                                           \
1475       case S390_DFP_ROUND_ZERO_5:                                         \
1476       case S390_DFP_ROUND_ZERO_9:                                         \
1477          cc = S390_CC_FOR_DFP128_UCONVERT_AUX(opcode,cc_dep1,cc_dep2,5);  \
1478          break;                                                           \
1479       case S390_DFP_ROUND_POSINF_6:                                       \
1480       case S390_DFP_ROUND_POSINF_10:                                      \
1481          cc = S390_CC_FOR_DFP128_UCONVERT_AUX(opcode,cc_dep1,cc_dep2,6);  \
1482          break;                                                           \
1483       case S390_DFP_ROUND_NEGINF_7:                                       \
1484       case S390_DFP_ROUND_NEGINF_11:                                      \
1485          cc = S390_CC_FOR_DFP128_UCONVERT_AUX(opcode,cc_dep1,cc_dep2,7);  \
1486          break;                                                           \
1487       case S390_DFP_ROUND_NEAREST_TIE_TOWARD_0:                           \
1488          cc = S390_CC_FOR_DFP128_UCONVERT_AUX(opcode,cc_dep1,cc_dep2,13); \
1489          break;                                                           \
1490       case S390_DFP_ROUND_AWAY_0:                                         \
1491          cc = S390_CC_FOR_DFP128_UCONVERT_AUX(opcode,cc_dep1,cc_dep2,14); \
1492          break;                                                           \
1493       default:                                                            \
1494          vpanic("unexpected dfp rounding mode");                          \
1495       }                                                                   \
1496       cc;                                                                 \
1497    })
1498 
1499 
1500 /* Return the value of the condition code from the supplied thunk parameters.
1501    This is not the value of the PSW. It is the value of the 2 CC bits within
1502    the PSW. The returned value is thusly in the interval [0:3]. */
1503 UInt
s390_calculate_cc(ULong cc_op,ULong cc_dep1,ULong cc_dep2,ULong cc_ndep)1504 s390_calculate_cc(ULong cc_op, ULong cc_dep1, ULong cc_dep2, ULong cc_ndep)
1505 {
1506 #if defined(VGA_s390x)
1507    UInt psw;
1508 
1509    switch (cc_op) {
1510 
1511    case S390_CC_OP_BITWISE:
1512       return S390_CC_FOR_BINARY("ogr", cc_dep1, (ULong)0);
1513 
1514    case S390_CC_OP_SIGNED_COMPARE:
1515       return S390_CC_FOR_BINARY("cgr", cc_dep1, cc_dep2);
1516 
1517    case S390_CC_OP_UNSIGNED_COMPARE:
1518       return S390_CC_FOR_BINARY("clgr", cc_dep1, cc_dep2);
1519 
1520    case S390_CC_OP_SIGNED_ADD_64:
1521       return S390_CC_FOR_BINARY("agr", cc_dep1, cc_dep2);
1522 
1523    case S390_CC_OP_SIGNED_ADD_32:
1524       return S390_CC_FOR_BINARY("ar", cc_dep1, cc_dep2);
1525 
1526    case S390_CC_OP_SIGNED_SUB_64:
1527       return S390_CC_FOR_BINARY("sgr", cc_dep1, cc_dep2);
1528 
1529    case S390_CC_OP_SIGNED_SUB_32:
1530       return S390_CC_FOR_BINARY("sr", cc_dep1, cc_dep2);
1531 
1532    case S390_CC_OP_UNSIGNED_ADD_64:
1533       return S390_CC_FOR_BINARY("algr", cc_dep1, cc_dep2);
1534 
1535    case S390_CC_OP_UNSIGNED_ADD_32:
1536       return S390_CC_FOR_BINARY("alr", cc_dep1, cc_dep2);
1537 
1538    case S390_CC_OP_UNSIGNED_ADDC_64:
1539       return S390_CC_FOR_TERNARY_ADDC("alcgr", cc_dep1, cc_dep2, cc_ndep);
1540 
1541    case S390_CC_OP_UNSIGNED_ADDC_32:
1542       return S390_CC_FOR_TERNARY_ADDC("alcr", cc_dep1, cc_dep2, cc_ndep);
1543 
1544    case S390_CC_OP_UNSIGNED_SUB_64:
1545       return S390_CC_FOR_BINARY("slgr", cc_dep1, cc_dep2);
1546 
1547    case S390_CC_OP_UNSIGNED_SUB_32:
1548       return S390_CC_FOR_BINARY("slr", cc_dep1, cc_dep2);
1549 
1550    case S390_CC_OP_UNSIGNED_SUBB_64:
1551       return S390_CC_FOR_TERNARY_SUBB("slbgr", cc_dep1, cc_dep2, cc_ndep);
1552 
1553    case S390_CC_OP_UNSIGNED_SUBB_32:
1554       return S390_CC_FOR_TERNARY_SUBB("slbr", cc_dep1, cc_dep2, cc_ndep);
1555 
1556    case S390_CC_OP_LOAD_AND_TEST:
1557       /* Like signed comparison with 0 */
1558       return S390_CC_FOR_BINARY("cgr", cc_dep1, (Long)0);
1559 
1560    case S390_CC_OP_LOAD_POSITIVE_32:
1561       __asm__ volatile (
1562            "lpr  %[result],%[op]\n\t"
1563            "ipm  %[psw]\n\t"         : [psw] "=d"(psw), [result] "=d"(cc_dep1)
1564                                      : [op] "d"(cc_dep1)
1565                                      : "cc");
1566       return psw >> 28;   /* cc */
1567 
1568    case S390_CC_OP_LOAD_POSITIVE_64:
1569       __asm__ volatile (
1570            "lpgr %[result],%[op]\n\t"
1571            "ipm  %[psw]\n\t"         : [psw] "=d"(psw), [result] "=d"(cc_dep1)
1572                                      : [op] "d"(cc_dep1)
1573                                      : "cc");
1574       return psw >> 28;   /* cc */
1575 
1576    case S390_CC_OP_TEST_UNDER_MASK_8: {
1577       UChar value  = cc_dep1;
1578       UChar mask   = cc_dep2;
1579 
1580       __asm__ volatile (
1581            "bras %%r2,1f\n\t"             /* %r2 = address of next insn */
1582            "tm %[value],0\n\t"            /* this is skipped, then EXecuted */
1583            "1: ex %[mask],0(%%r2)\n\t"    /* EXecute TM after modifying mask */
1584            "ipm %[psw]\n\t"             : [psw] "=d"(psw)
1585                                         : [value] "m"(value), [mask] "a"(mask)
1586                                         : "r2", "cc");
1587       return psw >> 28;   /* cc */
1588    }
1589 
1590    case S390_CC_OP_TEST_UNDER_MASK_16: {
1591       /* Create a TMLL insn with the mask as given by cc_dep2 */
1592       UInt insn  = (0xA701u << 16) | cc_dep2;
1593       UInt value = cc_dep1;
1594 
1595       __asm__ volatile (
1596            "lr   1,%[value]\n\t"
1597            "lhi  2,0x10\n\t"
1598            "ex   2,%[insn]\n\t"
1599            "ipm  %[psw]\n\t"       : [psw] "=d"(psw)
1600                                    : [value] "d"(value), [insn] "m"(insn)
1601                                    : "r1", "r2", "cc");
1602       return psw >> 28;   /* cc */
1603    }
1604 
1605    case S390_CC_OP_SHIFT_LEFT_32:
1606       __asm__ volatile (
1607            "sla  %[op],0(%[amount])\n\t"
1608            "ipm  %[psw]\n\t"            : [psw] "=d"(psw), [op] "+d"(cc_dep1)
1609                                         : [amount] "a"(cc_dep2)
1610                                         : "cc");
1611       return psw >> 28;   /* cc */
1612 
1613    case S390_CC_OP_SHIFT_LEFT_64: {
1614       Int high = (Int)(cc_dep1 >> 32);
1615       Int low  = (Int)(cc_dep1 & 0xFFFFFFFF);
1616 
1617       __asm__ volatile (
1618            "lr   2,%[high]\n\t"
1619            "lr   3,%[low]\n\t"
1620            "slda 2,0(%[amount])\n\t"
1621            "ipm %[psw]\n\t"             : [psw] "=d"(psw), [high] "+d"(high),
1622                                           [low] "+d"(low)
1623                                         : [amount] "a"(cc_dep2)
1624                                         : "cc", "r2", "r3");
1625       return psw >> 28;   /* cc */
1626    }
1627 
1628    case S390_CC_OP_INSERT_CHAR_MASK_32: {
1629       Int inserted = 0;
1630       Int msb = 0;
1631 
1632       if (cc_dep2 & 1) {
1633          inserted |= cc_dep1 & 0xff;
1634          msb = 0x80;
1635       }
1636       if (cc_dep2 & 2) {
1637          inserted |= cc_dep1 & 0xff00;
1638          msb = 0x8000;
1639       }
1640       if (cc_dep2 & 4) {
1641          inserted |= cc_dep1 & 0xff0000;
1642          msb = 0x800000;
1643       }
1644       if (cc_dep2 & 8) {
1645          inserted |= cc_dep1 & 0xff000000;
1646          msb = 0x80000000;
1647       }
1648 
1649       if (inserted & msb)  // MSB is 1
1650          return 1;
1651       if (inserted > 0)
1652          return 2;
1653       return 0;
1654    }
1655 
1656    case S390_CC_OP_BFP_RESULT_32:
1657       return S390_CC_FOR_BFP_RESULT("ltebr", cc_dep1);
1658 
1659    case S390_CC_OP_BFP_RESULT_64:
1660       return S390_CC_FOR_BFP_RESULT("ltdbr", cc_dep1);
1661 
1662    case S390_CC_OP_BFP_RESULT_128:
1663       return S390_CC_FOR_BFP128_RESULT(cc_dep1, cc_dep2);
1664 
1665    case S390_CC_OP_BFP_32_TO_INT_32:
1666       return S390_CC_FOR_BFP_CONVERT("cfebr", cc_dep1, cc_dep2);
1667 
1668    case S390_CC_OP_BFP_64_TO_INT_32:
1669       return S390_CC_FOR_BFP_CONVERT("cfdbr", cc_dep1, cc_dep2);
1670 
1671    case S390_CC_OP_BFP_128_TO_INT_32:
1672       return S390_CC_FOR_BFP128_CONVERT("cfxbr", cc_dep1, cc_dep2, cc_ndep);
1673 
1674    case S390_CC_OP_BFP_32_TO_INT_64:
1675       return S390_CC_FOR_BFP_CONVERT("cgebr", cc_dep1, cc_dep2);
1676 
1677    case S390_CC_OP_BFP_64_TO_INT_64:
1678       return S390_CC_FOR_BFP_CONVERT("cgdbr", cc_dep1, cc_dep2);
1679 
1680    case S390_CC_OP_BFP_128_TO_INT_64:
1681       return S390_CC_FOR_BFP128_CONVERT("cgxbr", cc_dep1, cc_dep2, cc_ndep);
1682 
1683    case S390_CC_OP_BFP_TDC_32:
1684       return S390_CC_FOR_BFP_TDC("tceb", cc_dep1, cc_dep2);
1685 
1686    case S390_CC_OP_BFP_TDC_64:
1687       return S390_CC_FOR_BFP_TDC("tcdb", cc_dep1, cc_dep2);
1688 
1689    case S390_CC_OP_BFP_TDC_128:
1690       return S390_CC_FOR_BFP128_TDC(cc_dep1, cc_dep2, cc_ndep);
1691 
1692    case S390_CC_OP_SET:
1693       return cc_dep1;
1694 
1695    case S390_CC_OP_BFP_32_TO_UINT_32:
1696       return S390_CC_FOR_BFP_UCONVERT(".insn rrf,0xb39c0000", cc_dep1, cc_dep2);
1697 
1698    case S390_CC_OP_BFP_64_TO_UINT_32:
1699       return S390_CC_FOR_BFP_UCONVERT(".insn rrf,0xb39d0000", cc_dep1, cc_dep2);
1700 
1701    case S390_CC_OP_BFP_128_TO_UINT_32:
1702       return S390_CC_FOR_BFP128_UCONVERT(".insn rrf,0xb39e0000", cc_dep1,
1703                                          cc_dep2, cc_ndep);
1704 
1705    case S390_CC_OP_BFP_32_TO_UINT_64:
1706       return S390_CC_FOR_BFP_UCONVERT(".insn rrf,0xb3ac0000", cc_dep1, cc_dep2);
1707 
1708    case S390_CC_OP_BFP_64_TO_UINT_64:
1709       return S390_CC_FOR_BFP_UCONVERT(".insn rrf,0xb3ad0000", cc_dep1, cc_dep2);
1710 
1711    case S390_CC_OP_BFP_128_TO_UINT_64:
1712       return S390_CC_FOR_BFP128_UCONVERT(".insn rrf,0xb3ae0000", cc_dep1,
1713                                          cc_dep2, cc_ndep);
1714 
1715    case S390_CC_OP_DFP_RESULT_64:
1716       return S390_CC_FOR_DFP_RESULT(cc_dep1);
1717 
1718    case S390_CC_OP_DFP_RESULT_128:
1719       return S390_CC_FOR_DFP128_RESULT(cc_dep1, cc_dep2);
1720 
1721    case S390_CC_OP_DFP_TDC_32:  /* TDCET */
1722       return S390_CC_FOR_DFP_TD(".insn rxe, 0xed0000000050", cc_dep1, cc_dep2);
1723 
1724    case S390_CC_OP_DFP_TDC_64:  /* TDCDT */
1725       return S390_CC_FOR_DFP_TD(".insn rxe, 0xed0000000054", cc_dep1, cc_dep2);
1726 
1727    case S390_CC_OP_DFP_TDC_128: /* TDCXT */
1728       return S390_CC_FOR_DFP128_TD(".insn rxe, 0xed0000000058", cc_dep1,
1729                                    cc_dep2, cc_ndep);
1730 
1731    case S390_CC_OP_DFP_TDG_32:  /* TDGET */
1732       return S390_CC_FOR_DFP_TD(".insn rxe, 0xed0000000051", cc_dep1, cc_dep2);
1733 
1734    case S390_CC_OP_DFP_TDG_64:  /* TDGDT */
1735       return S390_CC_FOR_DFP_TD(".insn rxe, 0xed0000000055", cc_dep1, cc_dep2);
1736 
1737    case S390_CC_OP_DFP_TDG_128: /* TDGXT */
1738       return S390_CC_FOR_DFP128_TD(".insn rxe, 0xed0000000059", cc_dep1,
1739                                    cc_dep2, cc_ndep);
1740 
1741    case S390_CC_OP_DFP_64_TO_INT_32: /* CFDTR */
1742       return S390_CC_FOR_DFP_CONVERT(".insn rrf,0xb9410000", cc_dep1, cc_dep2);
1743 
1744    case S390_CC_OP_DFP_128_TO_INT_32: /* CFXTR */
1745       return S390_CC_FOR_DFP128_CONVERT(".insn rrf,0xb9490000", cc_dep1,
1746                                         cc_dep2, cc_ndep);
1747 
1748    case S390_CC_OP_DFP_64_TO_INT_64: /* CGDTR */
1749       return S390_CC_FOR_DFP_CONVERT(".insn rrf,0xb3e10000", cc_dep1, cc_dep2);
1750 
1751    case S390_CC_OP_DFP_128_TO_INT_64: /* CGXTR */
1752       return S390_CC_FOR_DFP128_CONVERT(".insn rrf,0xb3e90000", cc_dep1,
1753                                         cc_dep2, cc_ndep);
1754 
1755    case S390_CC_OP_DFP_64_TO_UINT_32: /* CLFDTR */
1756       return S390_CC_FOR_DFP_UCONVERT(".insn rrf,0xb9430000", cc_dep1, cc_dep2);
1757 
1758    case S390_CC_OP_DFP_128_TO_UINT_32: /* CLFXTR */
1759       return S390_CC_FOR_DFP128_UCONVERT(".insn rrf,0xb94b0000", cc_dep1,
1760                                          cc_dep2, cc_ndep);
1761 
1762    case S390_CC_OP_DFP_64_TO_UINT_64: /* CLGDTR */
1763       return S390_CC_FOR_DFP_UCONVERT(".insn rrf,0xb9420000", cc_dep1, cc_dep2);
1764 
1765    case S390_CC_OP_DFP_128_TO_UINT_64: /* CLGXTR */
1766       return S390_CC_FOR_DFP128_UCONVERT(".insn rrf,0xb94a0000", cc_dep1,
1767                                          cc_dep2, cc_ndep);
1768 
1769    case S390_CC_OP_PFPO_32: {
1770       __asm__ volatile(
1771            "ler 4, %[cc_dep1]\n\t"      /* 32 bit FR move */
1772            "lr  0, %[cc_dep2]\n\t"      /* 32 bit GR move */
1773            ".short 0x010a\n\t"          /* PFPO */
1774            "ipm %[psw]\n\t"             : [psw] "=d"(psw)
1775                                         : [cc_dep1] "f"(cc_dep1),
1776                                           [cc_dep2] "d"(cc_dep2)
1777                                         : "r0", "r1", "f4");
1778       return psw >> 28;  /* cc */
1779    }
1780 
1781    case S390_CC_OP_PFPO_64: {
1782       __asm__ volatile(
1783            "ldr 4, %[cc_dep1]\n\t"
1784            "lr  0, %[cc_dep2]\n\t"      /* 32 bit register move */
1785            ".short 0x010a\n\t"          /* PFPO */
1786            "ipm %[psw]\n\t"             : [psw] "=d"(psw)
1787                                         : [cc_dep1] "f"(cc_dep1),
1788                                           [cc_dep2] "d"(cc_dep2)
1789                                         : "r0", "r1", "f4");
1790       return psw >> 28;  /* cc */
1791    }
1792 
1793    case S390_CC_OP_PFPO_128: {
1794       __asm__ volatile(
1795            "ldr 4,%[cc_dep1]\n\t"
1796            "ldr 6,%[cc_dep2]\n\t"
1797            "lr  0,%[cc_ndep]\n\t"       /* 32 bit register move */
1798            ".short 0x010a\n\t"          /* PFPO */
1799            "ipm %[psw]\n\t"             : [psw] "=d"(psw)
1800                                         : [cc_dep1] "f"(cc_dep1),
1801                                           [cc_dep2] "f"(cc_dep2),
1802                                           [cc_ndep] "d"(cc_ndep)
1803                                         : "r0", "r1", "f0", "f2", "f4", "f6");
1804       return psw >> 28;  /* cc */
1805    }
1806 
1807    default:
1808       break;
1809    }
1810 #endif
1811    vpanic("s390_calculate_cc");
1812 }
1813 
1814 
1815 /* Note that this does *not* return a Boolean value. The result needs to be
1816    explicitly tested against zero. */
1817 UInt
s390_calculate_cond(ULong mask,ULong op,ULong dep1,ULong dep2,ULong ndep)1818 s390_calculate_cond(ULong mask, ULong op, ULong dep1, ULong dep2, ULong ndep)
1819 {
1820    UInt cc = s390_calculate_cc(op, dep1, dep2, ndep);
1821 
1822    return ((mask << cc) & 0x8);
1823 }
1824 
1825 /*------------------------------------------------------------*/
1826 /*--- spechelper for performance                           ---*/
1827 /*------------------------------------------------------------*/
1828 
1829 
1830 /* Convenience macros */
1831 #define unop(op,a1) IRExpr_Unop((op),(a1))
1832 #define binop(op,a1,a2) IRExpr_Binop((op),(a1),(a2))
1833 #define mkU64(v) IRExpr_Const(IRConst_U64(v))
1834 #define mkU32(v) IRExpr_Const(IRConst_U32(v))
1835 #define mkU8(v)  IRExpr_Const(IRConst_U8(v))
1836 
1837 
1838 static inline Bool
isC64(const IRExpr * expr)1839 isC64(const IRExpr *expr)
1840 {
1841    return expr->tag == Iex_Const && expr->Iex.Const.con->tag == Ico_U64;
1842 }
1843 
1844 static inline Bool
isC64_exactly(const IRExpr * expr,ULong n)1845 isC64_exactly(const IRExpr *expr, ULong n)
1846 {
1847    return expr->tag == Iex_Const && expr->Iex.Const.con->tag == Ico_U64
1848           && expr->Iex.Const.con->Ico.U64 == n;
1849 }
1850 
1851 
1852 /* The returned expression is NULL if no specialization was found. In that
1853    case the helper function will be called. Otherwise, the expression has
1854    type Ity_I32 and a Boolean value. */
1855 IRExpr *
guest_s390x_spechelper(const HChar * function_name,IRExpr ** args,IRStmt ** precedingStmts,Int n_precedingStmts)1856 guest_s390x_spechelper(const HChar *function_name, IRExpr **args,
1857                        IRStmt **precedingStmts, Int n_precedingStmts)
1858 {
1859    UInt i, arity = 0;
1860 
1861    for (i = 0; args[i]; i++)
1862       arity++;
1863 
1864 #  if 0
1865    vex_printf("spec request:\n");
1866    vex_printf("   %s  ", function_name);
1867    for (i = 0; i < arity; i++) {
1868       vex_printf("  ");
1869       ppIRExpr(args[i]);
1870    }
1871    vex_printf("\n");
1872 #  endif
1873 
1874    /* --------- Specialising "s390_calculate_cond" --------- */
1875 
1876    if (vex_streq(function_name, "s390_calculate_cond")) {
1877       IRExpr *cond_expr, *cc_op_expr, *cc_dep1, *cc_dep2;
1878       ULong cond, cc_op;
1879 
1880       vassert(arity == 5);
1881 
1882       cond_expr  = args[0];
1883       cc_op_expr = args[1];
1884 
1885       /* The necessary requirement for all optimizations here is that the
1886          condition and the cc_op are constant. So check that upfront. */
1887       if (! isC64(cond_expr))  return NULL;
1888       if (! isC64(cc_op_expr)) return NULL;
1889 
1890       cond    = cond_expr->Iex.Const.con->Ico.U64;
1891       cc_op   = cc_op_expr->Iex.Const.con->Ico.U64;
1892 
1893       vassert(cond <= 15);
1894 
1895       /*
1896         +------+---+---+---+---+
1897         | cc   | 0 | 1 | 2 | 3 |
1898         | cond | 8 | 4 | 2 | 1 |
1899         +------+---+---+---+---+
1900       */
1901       cc_dep1 = args[2];
1902       cc_dep2 = args[3];
1903 
1904       /* S390_CC_OP_SIGNED_COMPARE */
1905       if (cc_op == S390_CC_OP_SIGNED_COMPARE) {
1906          /*
1907             cc == 0  --> cc_dep1 == cc_dep2   (cond == 8)
1908             cc == 1  --> cc_dep1 <  cc_dep2   (cond == 4)
1909             cc == 2  --> cc_dep1 >  cc_dep2   (cond == 2)
1910 
1911             Because cc == 3 cannot occur the rightmost bit of cond is
1912             a don't care.
1913          */
1914          if (cond == 8 || cond == 8 + 1) {
1915             return unop(Iop_1Uto32, binop(Iop_CmpEQ64, cc_dep1, cc_dep2));
1916          }
1917          if (cond == 4 + 2 || cond == 4 + 2 + 1) {
1918             return unop(Iop_1Uto32, binop(Iop_CmpNE64, cc_dep1, cc_dep2));
1919          }
1920          if (cond == 4 || cond == 4 + 1) {
1921             if (isC64_exactly(cc_dep2, 0)) {
1922                /*     dep1 <signed 0
1923                   --> m.s.bit of dep1 == 1 */
1924                return unop(Iop_64to32,
1925                            binop(Iop_And64,
1926                                  binop(Iop_Shr64, cc_dep1, mkU8(63)),
1927                                  mkU64(1)));
1928             }
1929             return unop(Iop_1Uto32, binop(Iop_CmpLT64S, cc_dep1, cc_dep2));
1930          }
1931          if (cond == 8 + 4 || cond == 8 + 4 + 1) {
1932             return unop(Iop_1Uto32, binop(Iop_CmpLE64S, cc_dep1, cc_dep2));
1933          }
1934          /* cc_dep1 > cc_dep2  ---->  cc_dep2 < cc_dep1 */
1935          if (cond == 2 || cond == 2 + 1) {
1936             /* If we ever need the counterpart of the bug387712 fix just
1937                below, then here is the place.  We'll need to give an
1938                alternative expression for the case "cc_dep2 <s 0".  From a
1939                bit of simple testing, I've yet to see any such cases,
1940                however. */
1941             return unop(Iop_1Uto32, binop(Iop_CmpLT64S, cc_dep2, cc_dep1));
1942          }
1943          if (cond == 8 + 2 || cond == 8 + 2 + 1) {
1944             if (isC64_exactly(cc_dep2, 0)) {
1945                /*     0    <=signed dep1
1946                   --> dep1 >=signed 0
1947                   --> m.s.bit of dep1 == 0 */
1948                /* See bug 387712.  This is an old trick from gcc to extract
1949                   the most significant bit of a word. */
1950                return unop(Iop_64to32,
1951                            binop(Iop_Xor64,
1952                                  binop(Iop_Shr64, cc_dep1, mkU8(63)),
1953                                  mkU64(1)));
1954             }
1955             return unop(Iop_1Uto32, binop(Iop_CmpLE64S, cc_dep2, cc_dep1));
1956          }
1957          if (cond == 8 + 4 + 2 || cond == 8 + 4 + 2 + 1) {
1958             return mkU32(1);
1959          }
1960          /* Remaining case */
1961          return mkU32(0);
1962       }
1963 
1964       /* S390_CC_OP_UNSIGNED_COMPARE */
1965       if (cc_op == S390_CC_OP_UNSIGNED_COMPARE) {
1966          /*
1967             cc == 0  --> cc_dep1 == cc_dep2   (cond == 8)
1968             cc == 1  --> cc_dep1 <  cc_dep2   (cond == 4)
1969             cc == 2  --> cc_dep1 >  cc_dep2   (cond == 2)
1970 
1971             Because cc == 3 cannot occur the rightmost bit of cond is
1972             a don't care.
1973          */
1974          if (cond == 8 || cond == 8 + 1) {
1975             return unop(Iop_1Uto32, binop(Iop_CmpEQ64, cc_dep1, cc_dep2));
1976          }
1977          if (cond == 4 + 2 || cond == 4 + 2 + 1) {
1978             return unop(Iop_1Uto32, binop(Iop_CmpNE64, cc_dep1, cc_dep2));
1979          }
1980          if (cond == 4 || cond == 4 + 1) {
1981             return unop(Iop_1Uto32, binop(Iop_CmpLT64U, cc_dep1, cc_dep2));
1982          }
1983          if (cond == 8 + 4 || cond == 8 + 4 + 1) {
1984             return unop(Iop_1Uto32, binop(Iop_CmpLE64U, cc_dep1, cc_dep2));
1985          }
1986          /* cc_dep1 > cc_dep2  ---->  cc_dep2 < cc_dep1 */
1987          if (cond == 2 || cond == 2 + 1) {
1988             return unop(Iop_1Uto32, binop(Iop_CmpLT64U, cc_dep2, cc_dep1));
1989          }
1990          if (cond == 8 + 2 || cond == 8 + 2 + 1) {
1991             return unop(Iop_1Uto32, binop(Iop_CmpLE64U, cc_dep2, cc_dep1));
1992          }
1993          if (cond == 8 + 4 + 2 || cond == 8 + 4 + 2 + 1) {
1994             return mkU32(1);
1995          }
1996          /* Remaining case */
1997          return mkU32(0);
1998       }
1999 
2000       /* S390_CC_OP_LOAD_AND_TEST */
2001       if (cc_op == S390_CC_OP_LOAD_AND_TEST) {
2002          /*
2003             cc == 0  --> cc_dep1 == 0   (cond == 8)
2004             cc == 1  --> cc_dep1 <  0   (cond == 4)
2005             cc == 2  --> cc_dep1 >  0   (cond == 2)
2006 
2007             Because cc == 3 cannot occur the rightmost bit of cond is
2008             a don't care.
2009          */
2010          if (cond == 8 || cond == 8 + 1) {
2011             return unop(Iop_1Uto32, binop(Iop_CmpEQ64, cc_dep1, mkU64(0)));
2012          }
2013          if (cond == 4 + 2 || cond == 4 + 2 + 1) {
2014             return unop(Iop_1Uto32, binop(Iop_CmpNE64, cc_dep1, mkU64(0)));
2015          }
2016          if (cond == 4 || cond == 4 + 1) {
2017              /* Special case cc_dep < 0. Only check the MSB to avoid bogus
2018                memcheck complaints due to gcc magic. Fixes 343802
2019              */
2020             return unop(Iop_64to32, binop(Iop_Shr64, cc_dep1, mkU8(63)));
2021          }
2022          if (cond == 8 + 4 || cond == 8 + 4 + 1) {
2023             return unop(Iop_1Uto32, binop(Iop_CmpLE64S, cc_dep1, mkU64(0)));
2024          }
2025          /* cc_dep1 > 0  ---->  0 < cc_dep1 */
2026          if (cond == 2 || cond == 2 + 1) {
2027             return unop(Iop_1Uto32, binop(Iop_CmpLT64S, mkU64(0), cc_dep1));
2028          }
2029          if (cond == 8 + 2 || cond == 8 + 2 + 1) {
2030             /* Special case cc_dep >= 0. Only check the MSB to avoid bogus
2031                memcheck complaints due to gcc magic. Fixes 308427
2032              */
2033             return unop(Iop_64to32, binop(Iop_Xor64,
2034                                           binop(Iop_Shr64, cc_dep1, mkU8(63)),
2035                                           mkU64(1)));
2036          }
2037          if (cond == 8 + 4 + 2 || cond == 8 + 4 + 2 + 1) {
2038             return mkU32(1);
2039          }
2040          /* Remaining case */
2041          return mkU32(0);
2042       }
2043 
2044       /* S390_CC_OP_BITWISE */
2045       if (cc_op == S390_CC_OP_BITWISE) {
2046          /*
2047             cc_dep1 is the result of the boolean operation.
2048 
2049             cc == 0  --> cc_dep1 == 0   (cond == 8)
2050             cc == 1  --> cc_dep1 != 0   (cond == 4)
2051 
2052             Because cc == 2 and cc == 3 cannot occur the two rightmost bits of
2053             cond are don't cares. Therefore:
2054 
2055             cond == 00xx  -> always false
2056             cond == 01xx  -> not equal
2057             cond == 10xx  -> equal
2058             cond == 11xx  -> always true
2059          */
2060          if ((cond & (8 + 4)) == 8 + 4) {
2061             return mkU32(1);
2062          }
2063          if (cond & 8) {
2064             return unop(Iop_1Uto32, binop(Iop_CmpEQ64, cc_dep1, mkU64(0)));
2065          }
2066          if (cond & 4) {
2067             return unop(Iop_1Uto32, binop(Iop_CmpNE64, cc_dep1, mkU64(0)));
2068          }
2069          /* Remaining case */
2070          return mkU32(0);
2071       }
2072 
2073       /* S390_CC_OP_INSERT_CHAR_MASK_32
2074          Since the mask comes from an immediate field in the opcode, we
2075          expect the mask to be a constant here. That simplifies matters. */
2076       if (cc_op == S390_CC_OP_INSERT_CHAR_MASK_32) {
2077          ULong mask;
2078          UInt imask = 0, shift = 0;
2079          IRExpr *word;
2080 
2081          if (! isC64(cc_dep2)) goto missed;
2082 
2083          mask = cc_dep2->Iex.Const.con->Ico.U64;
2084 
2085          /* Extract the 32-bit value from the thunk */
2086 
2087          word = unop(Iop_64to32, cc_dep1);
2088 
2089          switch (mask) {
2090          case 0:  shift =  0; imask = 0x00000000; break;
2091          case 1:  shift = 24; imask = 0x000000FF; break;
2092          case 2:  shift = 16; imask = 0x0000FF00; break;
2093          case 3:  shift = 16; imask = 0x0000FFFF; break;
2094          case 4:  shift =  8; imask = 0x00FF0000; break;
2095          case 5:  shift =  8; imask = 0x00FF00FF; break;
2096          case 6:  shift =  8; imask = 0x00FFFF00; break;
2097          case 7:  shift =  8; imask = 0x00FFFFFF; break;
2098          case 8:  shift =  0; imask = 0xFF000000; break;
2099          case 9:  shift =  0; imask = 0xFF0000FF; break;
2100          case 10: shift =  0; imask = 0xFF00FF00; break;
2101          case 11: shift =  0; imask = 0xFF00FFFF; break;
2102          case 12: shift =  0; imask = 0xFFFF0000; break;
2103          case 13: shift =  0; imask = 0xFFFF00FF; break;
2104          case 14: shift =  0; imask = 0xFFFFFF00; break;
2105          case 15: shift =  0; imask = 0xFFFFFFFF; break;
2106          }
2107 
2108          /* Select the bits that were inserted */
2109          word = binop(Iop_And32, word, mkU32(imask));
2110 
2111          /* cc == 0  --> all inserted bits zero or mask == 0   (cond == 8)
2112             cc == 1  --> leftmost inserted bit is one          (cond == 4)
2113             cc == 2  --> leftmost inserted bit is zero and not (cond == 2)
2114                          all inserted bits are zero
2115 
2116             Because cc == 0,1,2 the rightmost bit of the mask is a don't care */
2117          if (cond == 8 || cond == 8 + 1) {
2118             return unop(Iop_1Uto32, binop(Iop_CmpEQ32, word, mkU32(0)));
2119          }
2120          if (cond == 4 + 2 || cond == 4 + 2 + 1) {
2121             return unop(Iop_1Uto32, binop(Iop_CmpNE32, word, mkU32(0)));
2122          }
2123 
2124          /* Sign extend */
2125          if (shift != 0) {
2126             word = binop(Iop_Sar32, binop(Iop_Shl32, word, mkU8(shift)),
2127                          mkU8(shift));
2128          }
2129 
2130          if (cond == 4 || cond == 4 + 1) {  /* word < 0 */
2131             return unop(Iop_1Uto32, binop(Iop_CmpLT32S, word, mkU32(0)));
2132          }
2133          if (cond == 2 || cond == 2 + 1) {  /* word > 0 */
2134             return unop(Iop_1Uto32, binop(Iop_CmpLT32S, mkU32(0), word));
2135          }
2136          if (cond == 8 + 4 || cond == 8 + 4 + 1) {
2137             return unop(Iop_1Uto32, binop(Iop_CmpLE32S, word, mkU32(0)));
2138          }
2139          if (cond == 8 + 2 || cond == 8 + 2 + 1) {
2140             return unop(Iop_1Uto32, binop(Iop_CmpLE32S, mkU32(0), word));
2141          }
2142          if (cond == 8 + 4 + 2 || cond == 8 + 4 + 2 + 1) {
2143             return mkU32(1);
2144          }
2145          /* Remaining case */
2146          return mkU32(0);
2147       }
2148 
2149       /* S390_CC_OP_TEST_UNDER_MASK_8
2150          Since the mask comes from an immediate field in the opcode, we
2151          expect the mask to be a constant here. That simplifies matters. */
2152       if (cc_op == S390_CC_OP_TEST_UNDER_MASK_8) {
2153          ULong mask16;
2154 
2155          if (! isC64(cc_dep2)) goto missed;
2156 
2157          mask16 = cc_dep2->Iex.Const.con->Ico.U64;
2158 
2159          /* Get rid of the mask16 == 0 case first. Some of the simplifications
2160             below (e.g. for OVFL) only hold if mask16 == 0.  */
2161          if (mask16 == 0) {   /* cc == 0 */
2162             if (cond & 0x8) return mkU32(1);
2163             return mkU32(0);
2164          }
2165 
2166          /* cc == 2 is a don't care */
2167          if (cond == 8 || cond == 8 + 2) {
2168             return unop(Iop_1Uto32, binop(Iop_CmpEQ64,
2169                                           binop(Iop_And64, cc_dep1, cc_dep2),
2170                                           mkU64(0)));
2171          }
2172          if (cond == 7 || cond == 7 - 2) {
2173             return unop(Iop_1Uto32, binop(Iop_CmpNE64,
2174                                           binop(Iop_And64, cc_dep1, cc_dep2),
2175                                           mkU64(0)));
2176          }
2177          if (cond == 1 || cond == 1 + 2) {
2178             return unop(Iop_1Uto32, binop(Iop_CmpEQ64,
2179                                           binop(Iop_And64, cc_dep1, cc_dep2),
2180                                           cc_dep2));
2181          }
2182          if (cond == 14 || cond == 14 - 2) {  /* ! OVFL */
2183             return unop(Iop_1Uto32, binop(Iop_CmpNE64,
2184                                           binop(Iop_And64, cc_dep1, cc_dep2),
2185                                           cc_dep2));
2186          }
2187          goto missed;
2188       }
2189 
2190       /* S390_CC_OP_TEST_UNDER_MASK_16
2191          Since the mask comes from an immediate field in the opcode, we
2192          expect the mask to be a constant here. That simplifies matters. */
2193       if (cc_op == S390_CC_OP_TEST_UNDER_MASK_16) {
2194          ULong mask16;
2195          UInt msb;
2196 
2197          if (! isC64(cc_dep2)) goto missed;
2198 
2199          mask16 = cc_dep2->Iex.Const.con->Ico.U64;
2200 
2201          /* Get rid of the mask16 == 0 case first. Some of the simplifications
2202             below (e.g. for OVFL) only hold if mask16 == 0.  */
2203          if (mask16 == 0) {   /* cc == 0 */
2204             if (cond & 0x8) return mkU32(1);
2205             return mkU32(0);
2206          }
2207 
2208          if (cond == 15) return mkU32(1);
2209 
2210          if (cond == 8) {
2211             return unop(Iop_1Uto32, binop(Iop_CmpEQ64,
2212                                           binop(Iop_And64, cc_dep1, cc_dep2),
2213                                           mkU64(0)));
2214          }
2215          if (cond == 7) {
2216             return unop(Iop_1Uto32, binop(Iop_CmpNE64,
2217                                           binop(Iop_And64, cc_dep1, cc_dep2),
2218                                           mkU64(0)));
2219          }
2220          if (cond == 1) {
2221             return unop(Iop_1Uto32, binop(Iop_CmpEQ64,
2222                                           binop(Iop_And64, cc_dep1, cc_dep2),
2223                                           mkU64(mask16)));
2224          }
2225          if (cond == 14) {  /* ! OVFL */
2226             return unop(Iop_1Uto32, binop(Iop_CmpNE64,
2227                                           binop(Iop_And64, cc_dep1, cc_dep2),
2228                                           mkU64(mask16)));
2229          }
2230 
2231          /* Find MSB in mask */
2232          msb = 0x8000;
2233          while (msb > mask16)
2234             msb >>= 1;
2235 
2236          if (cond == 2) {  /* cc == 2 */
2237             IRExpr *c1, *c2;
2238 
2239             /* (cc_dep & msb) != 0 && (cc_dep & mask16) != mask16 */
2240             c1 = binop(Iop_CmpNE64,
2241                        binop(Iop_And64, cc_dep1, mkU64(msb)), mkU64(0));
2242             c2 = binop(Iop_CmpNE64,
2243                        binop(Iop_And64, cc_dep1, cc_dep2),
2244                        mkU64(mask16));
2245             return binop(Iop_And32, unop(Iop_1Uto32, c1),
2246                          unop(Iop_1Uto32, c2));
2247          }
2248 
2249          if (cond == 4) {  /* cc == 1 */
2250             IRExpr *c1, *c2;
2251 
2252             /* (cc_dep & msb) == 0 && (cc_dep & mask16) != 0 */
2253             c1 = binop(Iop_CmpEQ64,
2254                        binop(Iop_And64, cc_dep1, mkU64(msb)), mkU64(0));
2255             c2 = binop(Iop_CmpNE64,
2256                        binop(Iop_And64, cc_dep1, cc_dep2),
2257                        mkU64(0));
2258             return binop(Iop_And32, unop(Iop_1Uto32, c1),
2259                          unop(Iop_1Uto32, c2));
2260          }
2261 
2262          if (cond == 11) {  /* cc == 0,2,3 */
2263             IRExpr *c1, *c2;
2264 
2265             c1 = binop(Iop_CmpNE64,
2266                        binop(Iop_And64, cc_dep1, mkU64(msb)), mkU64(0));
2267             c2 = binop(Iop_CmpEQ64,
2268                        binop(Iop_And64, cc_dep1, cc_dep2),
2269                        mkU64(0));
2270             return binop(Iop_Or32, unop(Iop_1Uto32, c1),
2271                          unop(Iop_1Uto32, c2));
2272          }
2273 
2274          if (cond == 3) {  /* cc == 2 || cc == 3 */
2275             return unop(Iop_1Uto32,
2276                         binop(Iop_CmpNE64,
2277                               binop(Iop_And64, cc_dep1, mkU64(msb)),
2278                               mkU64(0)));
2279          }
2280          if (cond == 12) { /* cc == 0 || cc == 1 */
2281             return unop(Iop_1Uto32,
2282                         binop(Iop_CmpEQ64,
2283                               binop(Iop_And64, cc_dep1, mkU64(msb)),
2284                               mkU64(0)));
2285          }
2286          if (cond == 13) { /* cc == 0 || cc == 1 || cc == 3 */
2287             IRExpr *c01, *c3;
2288 
2289             c01 = binop(Iop_CmpEQ64, binop(Iop_And64, cc_dep1, mkU64(msb)),
2290                         mkU64(0));
2291             c3 = binop(Iop_CmpEQ64, binop(Iop_And64, cc_dep1, cc_dep2),
2292                        mkU64(mask16));
2293             return binop(Iop_Or32, unop(Iop_1Uto32, c01),
2294                          unop(Iop_1Uto32, c3));
2295          }
2296          // fixs390: handle cond = 5,6,9,10 (the missing cases)
2297          // vex_printf("TUM mask = 0x%llx\n", mask16);
2298          goto missed;
2299       }
2300 
2301       /* S390_CC_OP_UNSIGNED_SUB_64/32 */
2302       if (cc_op == S390_CC_OP_UNSIGNED_SUB_64 ||
2303           cc_op == S390_CC_OP_UNSIGNED_SUB_32) {
2304          /*
2305             cc_dep1, cc_dep2 are the zero extended left and right operands
2306 
2307             cc == 1  --> result != 0, borrow    (cond == 4)
2308             cc == 2  --> result == 0, no borrow (cond == 2)
2309             cc == 3  --> result != 0, no borrow (cond == 1)
2310 
2311             cc = (cc_dep1 == cc_dep2) ? 2
2312                                       : (cc_dep1 > cc_dep2) ? 3 : 1;
2313 
2314             Because cc == 0 cannot occur the leftmost bit of cond is
2315             a don't care.
2316          */
2317          if (cond == 1 || cond == 1 + 8) {  /* cc == 3   op2 < op1 */
2318             return unop(Iop_1Uto32, binop(Iop_CmpLT64U, cc_dep2, cc_dep1));
2319          }
2320          if (cond == 2 || cond == 2 + 8) {  /* cc == 2 */
2321             return unop(Iop_1Uto32, binop(Iop_CmpEQ64, cc_dep1, cc_dep2));
2322          }
2323          if (cond == 4 || cond == 4 + 8) {  /* cc == 1 */
2324             return unop(Iop_1Uto32, binop(Iop_CmpLT64U, cc_dep1, cc_dep2));
2325          }
2326          if (cond == 3 || cond == 3 + 8) {  /* cc == 2 || cc == 3 */
2327             return unop(Iop_1Uto32, binop(Iop_CmpLE64U, cc_dep2, cc_dep1));
2328          }
2329          if (cond == 6 || cond == 6 + 8) {  /* cc == 2 || cc == 1 */
2330             return unop(Iop_1Uto32, binop(Iop_CmpLE64U, cc_dep1, cc_dep2));
2331          }
2332 
2333          if (cond == 5 || cond == 5 + 8) {  /* cc == 3 || cc == 1 */
2334             return unop(Iop_1Uto32, binop(Iop_CmpNE64, cc_dep1, cc_dep2));
2335          }
2336          if (cond == 7 || cond == 7 + 8) {
2337             return mkU32(1);
2338          }
2339          /* Remaining case */
2340          return mkU32(0);
2341       }
2342 
2343       /* S390_CC_OP_UNSIGNED_ADD_64 */
2344       if (cc_op == S390_CC_OP_UNSIGNED_ADD_64) {
2345          /*
2346             cc_dep1, cc_dep2 are the zero extended left and right operands
2347 
2348             cc == 0  --> result == 0, no carry  (cond == 8)
2349             cc == 1  --> result != 0, no carry  (cond == 4)
2350             cc == 2  --> result == 0, carry     (cond == 2)
2351             cc == 3  --> result != 0, carry     (cond == 1)
2352          */
2353          if (cond == 8) { /* cc == 0 */
2354             /* Both inputs are 0 */
2355             return unop(Iop_1Uto32, binop(Iop_CmpEQ64,
2356                                           binop(Iop_Or64, cc_dep1, cc_dep2),
2357                                           mkU64(0)));
2358          }
2359          if (cond == 7) { /* cc == 1,2,3 */
2360             /* Not both inputs are 0 */
2361             return unop(Iop_1Uto32, binop(Iop_CmpNE64,
2362                                           binop(Iop_Or64, cc_dep1, cc_dep2),
2363                                           mkU64(0)));
2364          }
2365          if (cond == 8 + 2) {  /* cc == 0,2  -> result is zero */
2366             return unop(Iop_1Uto32, binop(Iop_CmpEQ64,
2367                                           binop(Iop_Add64, cc_dep1, cc_dep2),
2368                                           mkU64(0)));
2369          }
2370          if (cond == 4 + 1) {  /* cc == 1,3  -> result is not zero */
2371             return unop(Iop_1Uto32, binop(Iop_CmpNE64,
2372                                           binop(Iop_Add64, cc_dep1, cc_dep2),
2373                                           mkU64(0)));
2374          }
2375          goto missed;
2376       }
2377 
2378       /* S390_CC_OP_UNSIGNED_ADD_32 */
2379       if (cc_op == S390_CC_OP_UNSIGNED_ADD_32) {
2380          /*
2381             cc_dep1, cc_dep2 are the zero extended left and right operands
2382 
2383             cc == 0  --> result == 0, no carry  (cond == 8)
2384             cc == 1  --> result != 0, no carry  (cond == 4)
2385             cc == 2  --> result == 0, carry     (cond == 2)
2386             cc == 3  --> result != 0, carry     (cond == 1)
2387          */
2388          if (cond == 8) { /* cc == 0 */
2389             /* Both inputs are 0 */
2390             return unop(Iop_1Uto32, binop(Iop_CmpEQ64,
2391                                           binop(Iop_Or64, cc_dep1, cc_dep2),
2392                                           mkU64(0)));
2393          }
2394          if (cond == 7) { /* cc == 1,2,3 */
2395             /* Not both inputs are 0 */
2396             return unop(Iop_1Uto32, binop(Iop_CmpNE64,
2397                                           binop(Iop_Or64, cc_dep1, cc_dep2),
2398                                           mkU64(0)));
2399          }
2400          if (cond == 8 + 2) {  /* cc == 0,2  -> result is zero */
2401             return unop(Iop_1Uto32, binop(Iop_CmpEQ32,
2402                                           binop(Iop_Add32,
2403                                                 unop(Iop_64to32, cc_dep1),
2404                                                 unop(Iop_64to32, cc_dep2)),
2405                                           mkU32(0)));
2406          }
2407          if (cond == 4 + 1) {  /* cc == 1,3  -> result is not zero */
2408             return unop(Iop_1Uto32, binop(Iop_CmpNE32,
2409                                           binop(Iop_Add32,
2410                                                 unop(Iop_64to32, cc_dep1),
2411                                                 unop(Iop_64to32, cc_dep2)),
2412                                           mkU32(0)));
2413          }
2414          goto missed;
2415       }
2416 
2417       /* S390_CC_OP_SET */
2418       if (cc_op == S390_CC_OP_SET) {
2419          /* cc_dep1 is the condition code
2420 
2421             Return 1, if ((cond << cc_dep1) & 0x8) != 0 */
2422 
2423         return unop(Iop_1Uto32,
2424                     binop(Iop_CmpNE64,
2425                           binop(Iop_And64,
2426                                 binop(Iop_Shl64, cond_expr,
2427                                       unop(Iop_64to8, cc_dep1)),
2428                                 mkU64(8)),
2429                           mkU64(0)));
2430       }
2431 
2432       goto missed;
2433    }
2434 
2435    /* --------- Specialising "s390_calculate_cc" --------- */
2436 
2437    if (vex_streq(function_name, "s390_calculate_cc")) {
2438       IRExpr *cc_op_expr, *cc_dep1;
2439       ULong cc_op;
2440 
2441       vassert(arity == 4);
2442 
2443       cc_op_expr = args[0];
2444 
2445       /* The necessary requirement for all optimizations here is that
2446          cc_op is constant. So check that upfront. */
2447       if (! isC64(cc_op_expr)) return NULL;
2448 
2449       cc_op   = cc_op_expr->Iex.Const.con->Ico.U64;
2450       cc_dep1 = args[1];
2451 
2452       if (cc_op == S390_CC_OP_BITWISE) {
2453          return unop(Iop_1Uto32,
2454                      binop(Iop_CmpNE64, cc_dep1, mkU64(0)));
2455       }
2456 
2457       if (cc_op == S390_CC_OP_SET) {
2458          return unop(Iop_64to32, cc_dep1);
2459       }
2460 
2461       goto missed;
2462    }
2463 
2464 missed:
2465    return NULL;
2466 }
2467 
2468 /*------------------------------------------------------------*/
2469 /*--- Dirty helper for vector instructions                 ---*/
2470 /*------------------------------------------------------------*/
2471 
2472 #if defined(VGA_s390x)
2473 ULong
s390x_dirtyhelper_vec_op(VexGuestS390XState * guest_state,const ULong serialized)2474 s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state,
2475                          const ULong serialized)
2476 {
2477    UInt psw;
2478    s390x_vec_op_details_t details;
2479    const s390x_vec_op_details_t* d = (const s390x_vec_op_details_t*) &details;
2480 
2481    details.serialized = serialized;
2482 
2483    vassert(d->op > S390_VEC_OP_INVALID && d->op < S390_VEC_OP_LAST);
2484    static const UChar opcodes[][2] = {
2485       {0x00, 0x00}, /* invalid */
2486       {0xe7, 0x97}, /* VPKS */
2487       {0xe7, 0x95}, /* VPKLS */
2488       {0xe7, 0x82}, /* VFAE */
2489       {0xe7, 0x80}, /* VFEE */
2490       {0xe7, 0x81}, /* VFENE */
2491       {0xe7, 0x5c}, /* VISTR */
2492       {0xe7, 0x8a}, /* VSTRC */
2493       {0xe7, 0xf8}, /* VCEQ */
2494       {0xe7, 0xd8}, /* VTM */
2495       {0xe7, 0xb4}, /* VGFM */
2496       {0xe7, 0xbc}, /* VGFMA */
2497       {0xe7, 0xab}, /* VMAH */
2498       {0xe7, 0xa9}, /* VMALH */
2499       {0xe7, 0xfb}, /* VCH */
2500       {0xe7, 0xf9}, /* VCHL */
2501       {0xe7, 0xe8}, /* VFCE */
2502       {0xe7, 0xeb}, /* VFCH */
2503       {0xe7, 0xea}, /* VFCHE */
2504       {0xe7, 0x4a}  /* VFTCI */
2505    };
2506 
2507    union {
2508       struct {
2509         unsigned int op1 : 8;
2510         unsigned int v1  : 4;
2511         unsigned int v2  : 4;
2512         unsigned int v3  : 4;
2513         unsigned int     : 4;
2514         unsigned int m5  : 4;
2515         unsigned int     : 4;
2516         unsigned int m4  : 4;
2517         unsigned int rxb : 4;
2518         unsigned int op2 : 8;
2519       } VRR;
2520       struct {
2521         unsigned int op1 : 8;
2522         unsigned int v1  : 4;
2523         unsigned int v2  : 4;
2524         unsigned int v3  : 4;
2525         unsigned int m5  : 4;
2526         unsigned int m6  : 4;
2527         unsigned int     : 4;
2528         unsigned int v4  : 4;
2529         unsigned int rxb : 4;
2530         unsigned int op2 : 8;
2531       } VRRd;
2532       struct {
2533          UInt op1 : 8;
2534          UInt v1  : 4;
2535          UInt v2  : 4;
2536          UInt v3  : 4;
2537          UInt     : 4;
2538          UInt m6  : 4;
2539          UInt m5  : 4;
2540          UInt m4  : 4;
2541          UInt rxb : 4;
2542          UInt op2 : 8;
2543       } VRRc;
2544       struct {
2545          UInt op1 : 8;
2546          UInt v1  : 4;
2547          UInt v2  : 4;
2548          UInt i3  : 12;
2549          UInt m5  : 4;
2550          UInt m4  : 4;
2551          UInt rxb : 4;
2552          UInt op2 : 8;
2553       } VRIe;
2554       UChar bytes[6];
2555    } the_insn;
2556 
2557    the_insn.VRR.op1 = opcodes[d->op][0];
2558    the_insn.bytes[1] = the_insn.bytes[2]
2559       = the_insn.bytes[3] = the_insn.bytes[4] = 0;
2560    the_insn.VRR.op2 = opcodes[d->op][1];
2561 
2562    switch(d->op) {
2563    case S390_VEC_OP_VISTR:
2564       the_insn.VRR.v1 = 1;
2565       the_insn.VRR.v2 = 2;
2566       the_insn.VRR.rxb = 0b1100;
2567       the_insn.VRR.m4 = d->m4;
2568       the_insn.VRR.m5 = d->m5;
2569       break;
2570 
2571    case S390_VEC_OP_VTM:
2572       the_insn.VRR.v1 = 2;
2573       the_insn.VRR.v2 = 3;
2574       the_insn.VRR.rxb = 0b1100;
2575       break;
2576 
2577    case S390_VEC_OP_VPKS:
2578    case S390_VEC_OP_VPKLS:
2579    case S390_VEC_OP_VFAE:
2580    case S390_VEC_OP_VFEE:
2581    case S390_VEC_OP_VFENE:
2582    case S390_VEC_OP_VCEQ:
2583    case S390_VEC_OP_VGFM:
2584    case S390_VEC_OP_VCH:
2585    case S390_VEC_OP_VCHL:
2586       the_insn.VRR.v1 = 1;
2587       the_insn.VRR.v2 = 2;
2588       the_insn.VRR.v3 = 3;
2589       the_insn.VRR.rxb = 0b1110;
2590       the_insn.VRR.m4 = d->m4;
2591       the_insn.VRR.m5 = d->m5;
2592       break;
2593 
2594    case S390_VEC_OP_VSTRC:
2595    case S390_VEC_OP_VGFMA:
2596    case S390_VEC_OP_VMAH:
2597    case S390_VEC_OP_VMALH:
2598       the_insn.VRRd.v1 = 1;
2599       the_insn.VRRd.v2 = 2;
2600       the_insn.VRRd.v3 = 3;
2601       the_insn.VRRd.v4 = 4;
2602       the_insn.VRRd.rxb = 0b1111;
2603       the_insn.VRRd.m5 = d->m4;
2604       the_insn.VRRd.m6 = d->m5;
2605       break;
2606 
2607    case S390_VEC_OP_VFCE:
2608    case S390_VEC_OP_VFCH:
2609    case S390_VEC_OP_VFCHE:
2610       the_insn.VRRc.v1 = 1;
2611       the_insn.VRRc.v2 = 2;
2612       the_insn.VRRc.v3 = 3;
2613       the_insn.VRRc.rxb = 0b1110;
2614       the_insn.VRRc.m4 = d->m4;
2615       the_insn.VRRc.m5 = d->m5;
2616       the_insn.VRRc.m6 = d->m6;
2617       break;
2618 
2619    case S390_VEC_OP_VFTCI:
2620       the_insn.VRIe.v1 = 1;
2621       the_insn.VRIe.v2 = 2;
2622       the_insn.VRIe.rxb = 0b1100;
2623       the_insn.VRIe.i3 = d->i3;
2624       the_insn.VRIe.m4 = d->m4;
2625       the_insn.VRIe.m5 = d->m5;
2626       break;
2627 
2628    default:
2629       vex_printf("operation = %d\n", d->op);
2630       vpanic("s390x_dirtyhelper_vec_op: unknown operation");
2631    }
2632 
2633    const V128* guest_v = &(guest_state->guest_v0);
2634    __asm__ volatile (
2635       "lgr %%r10, %[arg1]\n"
2636       VL(2, 0, a, 000, 8)
2637       "lgr %%r10, %[arg2]\n"
2638       VL(3, 0, a, 000, 8)
2639       "lgr %%r10, %[arg3]\n"
2640       VL(4, 0, a, 000, 8)
2641       "ex %[zero], %[insn]\n"
2642 
2643       "cijne %[read_only], 0, return_cc\n"
2644       "lgr %%r10, %[res]\n"
2645       VST(1, 0, a, 000, 8)
2646 
2647       "return_cc: "
2648       "ipm %[psw]\n\t"
2649          : [psw] "=d" (psw)
2650 
2651          : [res]  "r" (&guest_v[d->v1]),
2652            [arg1] "r" (&guest_v[d->v2]),
2653            [arg2] "r" (&guest_v[d->v3]),
2654            [arg3] "r" (&guest_v[d->v4]),
2655 
2656            [zero] "r" (0ULL),
2657            [insn] "m" (the_insn),
2658            [read_only] "r" (d->read_only)
2659 
2660          : "cc", "r10", "v16", "v17", "v18", "v19"
2661       );
2662 
2663    return psw >> 28;   /* cc */
2664 }
2665 
2666 #else
2667 
2668 ULong
s390x_dirtyhelper_vec_op(VexGuestS390XState * guest_state,const ULong serialized)2669 s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state,
2670                          const ULong serialized)
2671 { return 0; }
2672 
2673 #endif
2674 
2675 /*-----------------------------------------------------------------*/
2676 /*--- Dirty helper for Perform Pseudorandom number instruction  ---*/
2677 /*-----------------------------------------------------------------*/
2678 
2679 /* Dummy helper that is needed to indicate load of parameter block.
2680    We have to use it because dirty helper cannot have two memory side
2681    effects.
2682  */
s390x_dirtyhelper_PPNO_sha512_load_param_block(void)2683 void s390x_dirtyhelper_PPNO_sha512_load_param_block( void )
2684 {
2685 }
2686 
2687 #if defined(VGA_s390x)
2688 
2689 /* IMPORTANT!
2690    We return here bit mask where only supported functions are set to one.
2691    If you implement new functions don't forget the supported array.
2692  */
2693 void
s390x_dirtyhelper_PPNO_query(VexGuestS390XState * guest_state,ULong r1,ULong r2)2694 s390x_dirtyhelper_PPNO_query(VexGuestS390XState *guest_state, ULong r1, ULong r2)
2695 {
2696    ULong supported[2] = {0x9000000000000000ULL, 0x0000000000000000ULL};
2697    ULong *result = (ULong*) guest_state->guest_r1;
2698 
2699    result[0] = supported[0];
2700    result[1] = supported[1];
2701 }
2702 
2703 ULong
s390x_dirtyhelper_PPNO_sha512(VexGuestS390XState * guest_state,ULong r1,ULong r2)2704 s390x_dirtyhelper_PPNO_sha512(VexGuestS390XState *guest_state, ULong r1, ULong r2)
2705 {
2706    ULong* op1 = (ULong*) (((ULong)(&guest_state->guest_r0)) + r1 * sizeof(ULong));
2707    ULong* op2 = (ULong*) (((ULong)(&guest_state->guest_r0)) + r2 * sizeof(ULong));
2708 
2709    register ULong reg0 asm("0") = guest_state->guest_r0;
2710    register ULong reg1 asm("1") = guest_state->guest_r1;
2711    register ULong reg2 asm("2") = op1[0];
2712    register ULong reg3 asm("3") = op1[1];
2713    register ULong reg4 asm("4") = op2[0];
2714    register ULong reg5 asm("5") = op2[1];
2715 
2716    ULong cc = 0;
2717    asm volatile(".insn rre, 0xb93c0000, %%r2, %%r4\n"
2718                 "ipm %[cc]\n"
2719                 "srl %[cc], 28\n"
2720                 : "+d"(reg0), "+d"(reg1),
2721                   "+d"(reg2), "+d"(reg3),
2722                   "+d"(reg4), "+d"(reg5),
2723                   [cc] "=d"(cc)
2724                 :
2725                 : "cc", "memory");
2726 
2727    return cc;
2728 }
2729 
2730 #else
2731 
2732 void
s390x_dirtyhelper_PPNO_query(VexGuestS390XState * guest_state,ULong r1,ULong r2)2733 s390x_dirtyhelper_PPNO_query(VexGuestS390XState *guest_state, ULong r1, ULong r2)
2734 {
2735 }
2736 
2737 ULong
s390x_dirtyhelper_PPNO_sha512(VexGuestS390XState * guest_state,ULong r1,ULong r2)2738 s390x_dirtyhelper_PPNO_sha512(VexGuestS390XState *guest_state, ULong r1, ULong r2)
2739 {
2740    return 0;
2741 }
2742 
2743 #endif /* VGA_s390x */
2744 /*---------------------------------------------------------------*/
2745 /*--- end                                guest_s390_helpers.c ---*/
2746 /*---------------------------------------------------------------*/
2747