xref: /qemu/target/s390x/tcg/mem_helper.c (revision 93c9aeed)
1 /*
2  *  S/390 memory access helper routines
3  *
4  *  Copyright (c) 2009 Ulrich Hecht
5  *  Copyright (c) 2009 Alexander Graf
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * This library is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19  */
20 
21 #include "qemu/osdep.h"
22 #include "qemu/log.h"
23 #include "cpu.h"
24 #include "s390x-internal.h"
25 #include "tcg_s390x.h"
26 #include "exec/helper-proto.h"
27 #include "exec/exec-all.h"
28 #include "exec/cpu_ldst.h"
29 #include "qemu/int128.h"
30 #include "qemu/atomic128.h"
31 #include "trace.h"
32 
33 #if !defined(CONFIG_USER_ONLY)
34 #include "hw/s390x/storage-keys.h"
35 #include "hw/boards.h"
36 #endif
37 
38 /*****************************************************************************/
39 /* Softmmu support */
40 
41 /* #define DEBUG_HELPER */
42 #ifdef DEBUG_HELPER
43 #define HELPER_LOG(x...) qemu_log(x)
44 #else
45 #define HELPER_LOG(x...)
46 #endif
47 
48 static inline bool psw_key_valid(CPUS390XState *env, uint8_t psw_key)
49 {
50     uint16_t pkm = env->cregs[3] >> 16;
51 
52     if (env->psw.mask & PSW_MASK_PSTATE) {
53         /* PSW key has range 0..15, it is valid if the bit is 1 in the PKM */
54         return pkm & (0x80 >> psw_key);
55     }
56     return true;
57 }
58 
59 static bool is_destructive_overlap(CPUS390XState *env, uint64_t dest,
60                                    uint64_t src, uint32_t len)
61 {
62     if (!len || src == dest) {
63         return false;
64     }
65     /* Take care of wrapping at the end of address space. */
66     if (unlikely(wrap_address(env, src + len - 1) < src)) {
67         return dest > src || dest <= wrap_address(env, src + len - 1);
68     }
69     return dest > src && dest <= src + len - 1;
70 }
71 
72 /* Trigger a SPECIFICATION exception if an address or a length is not
73    naturally aligned.  */
74 static inline void check_alignment(CPUS390XState *env, uint64_t v,
75                                    int wordsize, uintptr_t ra)
76 {
77     if (v % wordsize) {
78         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
79     }
80 }
81 
82 /* Load a value from memory according to its size.  */
83 static inline uint64_t cpu_ldusize_data_ra(CPUS390XState *env, uint64_t addr,
84                                            int wordsize, uintptr_t ra)
85 {
86     switch (wordsize) {
87     case 1:
88         return cpu_ldub_data_ra(env, addr, ra);
89     case 2:
90         return cpu_lduw_data_ra(env, addr, ra);
91     default:
92         abort();
93     }
94 }
95 
96 /* Store a to memory according to its size.  */
97 static inline void cpu_stsize_data_ra(CPUS390XState *env, uint64_t addr,
98                                       uint64_t value, int wordsize,
99                                       uintptr_t ra)
100 {
101     switch (wordsize) {
102     case 1:
103         cpu_stb_data_ra(env, addr, value, ra);
104         break;
105     case 2:
106         cpu_stw_data_ra(env, addr, value, ra);
107         break;
108     default:
109         abort();
110     }
111 }
112 
113 /* An access covers at most 4096 bytes and therefore at most two pages. */
114 typedef struct S390Access {
115     target_ulong vaddr1;
116     target_ulong vaddr2;
117     char *haddr1;
118     char *haddr2;
119     uint16_t size1;
120     uint16_t size2;
121     /*
122      * If we can't access the host page directly, we'll have to do I/O access
123      * via ld/st helpers. These are internal details, so we store the
124      * mmu idx to do the access here instead of passing it around in the
125      * helpers. Maybe, one day we can get rid of ld/st access - once we can
126      * handle TLB_NOTDIRTY differently. We don't expect these special accesses
127      * to trigger exceptions - only if we would have TLB_NOTDIRTY on LAP
128      * pages, we might trigger a new MMU translation - very unlikely that
129      * the mapping changes in between and we would trigger a fault.
130      */
131     int mmu_idx;
132 } S390Access;
133 
134 /*
135  * With nonfault=1, return the PGM_ exception that would have been injected
136  * into the guest; return 0 if no exception was detected.
137  *
138  * For !CONFIG_USER_ONLY, the TEC is stored stored to env->tlb_fill_tec.
139  * For CONFIG_USER_ONLY, the faulting address is stored to env->__excp_addr.
140  */
141 static int s390_probe_access(CPUArchState *env, target_ulong addr, int size,
142                              MMUAccessType access_type, int mmu_idx,
143                              bool nonfault, void **phost, uintptr_t ra)
144 {
145 #if defined(CONFIG_USER_ONLY)
146     return probe_access_flags(env, addr, access_type, mmu_idx,
147                               nonfault, phost, ra);
148 #else
149     int flags;
150 
151     /*
152      * For !CONFIG_USER_ONLY, we cannot rely on TLB_INVALID_MASK or haddr==NULL
153      * to detect if there was an exception during tlb_fill().
154      */
155     env->tlb_fill_exc = 0;
156     flags = probe_access_flags(env, addr, access_type, mmu_idx, nonfault, phost,
157                                ra);
158     if (env->tlb_fill_exc) {
159         return env->tlb_fill_exc;
160     }
161 
162     if (unlikely(flags & TLB_WATCHPOINT)) {
163         /* S390 does not presently use transaction attributes. */
164         cpu_check_watchpoint(env_cpu(env), addr, size,
165                              MEMTXATTRS_UNSPECIFIED,
166                              (access_type == MMU_DATA_STORE
167                               ? BP_MEM_WRITE : BP_MEM_READ), ra);
168     }
169     return 0;
170 #endif
171 }
172 
173 static int access_prepare_nf(S390Access *access, CPUS390XState *env,
174                              bool nonfault, vaddr vaddr1, int size,
175                              MMUAccessType access_type,
176                              int mmu_idx, uintptr_t ra)
177 {
178     void *haddr1, *haddr2 = NULL;
179     int size1, size2, exc;
180     vaddr vaddr2 = 0;
181 
182     assert(size > 0 && size <= 4096);
183 
184     size1 = MIN(size, -(vaddr1 | TARGET_PAGE_MASK)),
185     size2 = size - size1;
186 
187     exc = s390_probe_access(env, vaddr1, size1, access_type, mmu_idx, nonfault,
188                             &haddr1, ra);
189     if (exc) {
190         return exc;
191     }
192     if (unlikely(size2)) {
193         /* The access crosses page boundaries. */
194         vaddr2 = wrap_address(env, vaddr1 + size1);
195         exc = s390_probe_access(env, vaddr2, size2, access_type, mmu_idx,
196                                 nonfault, &haddr2, ra);
197         if (exc) {
198             return exc;
199         }
200     }
201 
202     *access = (S390Access) {
203         .vaddr1 = vaddr1,
204         .vaddr2 = vaddr2,
205         .haddr1 = haddr1,
206         .haddr2 = haddr2,
207         .size1 = size1,
208         .size2 = size2,
209         .mmu_idx = mmu_idx
210     };
211     return 0;
212 }
213 
214 static S390Access access_prepare(CPUS390XState *env, vaddr vaddr, int size,
215                                  MMUAccessType access_type, int mmu_idx,
216                                  uintptr_t ra)
217 {
218     S390Access ret;
219     int exc = access_prepare_nf(&ret, env, false, vaddr, size,
220                                 access_type, mmu_idx, ra);
221     assert(!exc);
222     return ret;
223 }
224 
225 /* Helper to handle memset on a single page. */
226 static void do_access_memset(CPUS390XState *env, vaddr vaddr, char *haddr,
227                              uint8_t byte, uint16_t size, int mmu_idx,
228                              uintptr_t ra)
229 {
230 #ifdef CONFIG_USER_ONLY
231     g_assert(haddr);
232     memset(haddr, byte, size);
233 #else
234     MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
235     int i;
236 
237     if (likely(haddr)) {
238         memset(haddr, byte, size);
239     } else {
240         /*
241          * Do a single access and test if we can then get access to the
242          * page. This is especially relevant to speed up TLB_NOTDIRTY.
243          */
244         g_assert(size > 0);
245         cpu_stb_mmu(env, vaddr, byte, oi, ra);
246         haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
247         if (likely(haddr)) {
248             memset(haddr + 1, byte, size - 1);
249         } else {
250             for (i = 1; i < size; i++) {
251                 cpu_stb_mmu(env, vaddr + i, byte, oi, ra);
252             }
253         }
254     }
255 #endif
256 }
257 
258 static void access_memset(CPUS390XState *env, S390Access *desta,
259                           uint8_t byte, uintptr_t ra)
260 {
261 
262     do_access_memset(env, desta->vaddr1, desta->haddr1, byte, desta->size1,
263                      desta->mmu_idx, ra);
264     if (likely(!desta->size2)) {
265         return;
266     }
267     do_access_memset(env, desta->vaddr2, desta->haddr2, byte, desta->size2,
268                      desta->mmu_idx, ra);
269 }
270 
271 static uint8_t do_access_get_byte(CPUS390XState *env, vaddr vaddr, char **haddr,
272                                   int offset, int mmu_idx, uintptr_t ra)
273 {
274 #ifdef CONFIG_USER_ONLY
275     return ldub_p(*haddr + offset);
276 #else
277     MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
278     uint8_t byte;
279 
280     if (likely(*haddr)) {
281         return ldub_p(*haddr + offset);
282     }
283     /*
284      * Do a single access and test if we can then get access to the
285      * page. This is especially relevant to speed up TLB_NOTDIRTY.
286      */
287     byte = cpu_ldb_mmu(env, vaddr + offset, oi, ra);
288     *haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_LOAD, mmu_idx);
289     return byte;
290 #endif
291 }
292 
293 static uint8_t access_get_byte(CPUS390XState *env, S390Access *access,
294                                int offset, uintptr_t ra)
295 {
296     if (offset < access->size1) {
297         return do_access_get_byte(env, access->vaddr1, &access->haddr1,
298                                   offset, access->mmu_idx, ra);
299     }
300     return do_access_get_byte(env, access->vaddr2, &access->haddr2,
301                               offset - access->size1, access->mmu_idx, ra);
302 }
303 
304 static void do_access_set_byte(CPUS390XState *env, vaddr vaddr, char **haddr,
305                                int offset, uint8_t byte, int mmu_idx,
306                                uintptr_t ra)
307 {
308 #ifdef CONFIG_USER_ONLY
309     stb_p(*haddr + offset, byte);
310 #else
311     MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
312 
313     if (likely(*haddr)) {
314         stb_p(*haddr + offset, byte);
315         return;
316     }
317     /*
318      * Do a single access and test if we can then get access to the
319      * page. This is especially relevant to speed up TLB_NOTDIRTY.
320      */
321     cpu_stb_mmu(env, vaddr + offset, byte, oi, ra);
322     *haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
323 #endif
324 }
325 
326 static void access_set_byte(CPUS390XState *env, S390Access *access,
327                             int offset, uint8_t byte, uintptr_t ra)
328 {
329     if (offset < access->size1) {
330         do_access_set_byte(env, access->vaddr1, &access->haddr1, offset, byte,
331                            access->mmu_idx, ra);
332     } else {
333         do_access_set_byte(env, access->vaddr2, &access->haddr2,
334                            offset - access->size1, byte, access->mmu_idx, ra);
335     }
336 }
337 
338 /*
339  * Move data with the same semantics as memmove() in case ranges don't overlap
340  * or src > dest. Undefined behavior on destructive overlaps.
341  */
342 static void access_memmove(CPUS390XState *env, S390Access *desta,
343                            S390Access *srca, uintptr_t ra)
344 {
345     int diff;
346 
347     g_assert(desta->size1 + desta->size2 == srca->size1 + srca->size2);
348 
349     /* Fallback to slow access in case we don't have access to all host pages */
350     if (unlikely(!desta->haddr1 || (desta->size2 && !desta->haddr2) ||
351                  !srca->haddr1 || (srca->size2 && !srca->haddr2))) {
352         int i;
353 
354         for (i = 0; i < desta->size1 + desta->size2; i++) {
355             uint8_t byte = access_get_byte(env, srca, i, ra);
356 
357             access_set_byte(env, desta, i, byte, ra);
358         }
359         return;
360     }
361 
362     if (srca->size1 == desta->size1) {
363         memmove(desta->haddr1, srca->haddr1, srca->size1);
364         if (unlikely(srca->size2)) {
365             memmove(desta->haddr2, srca->haddr2, srca->size2);
366         }
367     } else if (srca->size1 < desta->size1) {
368         diff = desta->size1 - srca->size1;
369         memmove(desta->haddr1, srca->haddr1, srca->size1);
370         memmove(desta->haddr1 + srca->size1, srca->haddr2, diff);
371         if (likely(desta->size2)) {
372             memmove(desta->haddr2, srca->haddr2 + diff, desta->size2);
373         }
374     } else {
375         diff = srca->size1 - desta->size1;
376         memmove(desta->haddr1, srca->haddr1, desta->size1);
377         memmove(desta->haddr2, srca->haddr1 + desta->size1, diff);
378         if (likely(srca->size2)) {
379             memmove(desta->haddr2 + diff, srca->haddr2, srca->size2);
380         }
381     }
382 }
383 
384 static int mmu_idx_from_as(uint8_t as)
385 {
386     switch (as) {
387     case AS_PRIMARY:
388         return MMU_PRIMARY_IDX;
389     case AS_SECONDARY:
390         return MMU_SECONDARY_IDX;
391     case AS_HOME:
392         return MMU_HOME_IDX;
393     default:
394         /* FIXME AS_ACCREG */
395         g_assert_not_reached();
396     }
397 }
398 
399 /* and on array */
400 static uint32_t do_helper_nc(CPUS390XState *env, uint32_t l, uint64_t dest,
401                              uint64_t src, uintptr_t ra)
402 {
403     const int mmu_idx = cpu_mmu_index(env, false);
404     S390Access srca1, srca2, desta;
405     uint32_t i;
406     uint8_t c = 0;
407 
408     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
409                __func__, l, dest, src);
410 
411     /* NC always processes one more byte than specified - maximum is 256 */
412     l++;
413 
414     srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
415     srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
416     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
417     for (i = 0; i < l; i++) {
418         const uint8_t x = access_get_byte(env, &srca1, i, ra) &
419                           access_get_byte(env, &srca2, i, ra);
420 
421         c |= x;
422         access_set_byte(env, &desta, i, x, ra);
423     }
424     return c != 0;
425 }
426 
427 uint32_t HELPER(nc)(CPUS390XState *env, uint32_t l, uint64_t dest,
428                     uint64_t src)
429 {
430     return do_helper_nc(env, l, dest, src, GETPC());
431 }
432 
433 /* xor on array */
434 static uint32_t do_helper_xc(CPUS390XState *env, uint32_t l, uint64_t dest,
435                              uint64_t src, uintptr_t ra)
436 {
437     const int mmu_idx = cpu_mmu_index(env, false);
438     S390Access srca1, srca2, desta;
439     uint32_t i;
440     uint8_t c = 0;
441 
442     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
443                __func__, l, dest, src);
444 
445     /* XC always processes one more byte than specified - maximum is 256 */
446     l++;
447 
448     srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
449     srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
450     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
451 
452     /* xor with itself is the same as memset(0) */
453     if (src == dest) {
454         access_memset(env, &desta, 0, ra);
455         return 0;
456     }
457 
458     for (i = 0; i < l; i++) {
459         const uint8_t x = access_get_byte(env, &srca1, i, ra) ^
460                           access_get_byte(env, &srca2, i, ra);
461 
462         c |= x;
463         access_set_byte(env, &desta, i, x, ra);
464     }
465     return c != 0;
466 }
467 
468 uint32_t HELPER(xc)(CPUS390XState *env, uint32_t l, uint64_t dest,
469                     uint64_t src)
470 {
471     return do_helper_xc(env, l, dest, src, GETPC());
472 }
473 
474 /* or on array */
475 static uint32_t do_helper_oc(CPUS390XState *env, uint32_t l, uint64_t dest,
476                              uint64_t src, uintptr_t ra)
477 {
478     const int mmu_idx = cpu_mmu_index(env, false);
479     S390Access srca1, srca2, desta;
480     uint32_t i;
481     uint8_t c = 0;
482 
483     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
484                __func__, l, dest, src);
485 
486     /* OC always processes one more byte than specified - maximum is 256 */
487     l++;
488 
489     srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
490     srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
491     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
492     for (i = 0; i < l; i++) {
493         const uint8_t x = access_get_byte(env, &srca1, i, ra) |
494                           access_get_byte(env, &srca2, i, ra);
495 
496         c |= x;
497         access_set_byte(env, &desta, i, x, ra);
498     }
499     return c != 0;
500 }
501 
502 uint32_t HELPER(oc)(CPUS390XState *env, uint32_t l, uint64_t dest,
503                     uint64_t src)
504 {
505     return do_helper_oc(env, l, dest, src, GETPC());
506 }
507 
508 /* memmove */
509 static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest,
510                               uint64_t src, uintptr_t ra)
511 {
512     const int mmu_idx = cpu_mmu_index(env, false);
513     S390Access srca, desta;
514     uint32_t i;
515 
516     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
517                __func__, l, dest, src);
518 
519     /* MVC always copies one more byte than specified - maximum is 256 */
520     l++;
521 
522     srca = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
523     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
524 
525     /*
526      * "When the operands overlap, the result is obtained as if the operands
527      * were processed one byte at a time". Only non-destructive overlaps
528      * behave like memmove().
529      */
530     if (dest == src + 1) {
531         access_memset(env, &desta, access_get_byte(env, &srca, 0, ra), ra);
532     } else if (!is_destructive_overlap(env, dest, src, l)) {
533         access_memmove(env, &desta, &srca, ra);
534     } else {
535         for (i = 0; i < l; i++) {
536             uint8_t byte = access_get_byte(env, &srca, i, ra);
537 
538             access_set_byte(env, &desta, i, byte, ra);
539         }
540     }
541 
542     return env->cc_op;
543 }
544 
545 void HELPER(mvc)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
546 {
547     do_helper_mvc(env, l, dest, src, GETPC());
548 }
549 
550 /* move inverse  */
551 void HELPER(mvcin)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
552 {
553     const int mmu_idx = cpu_mmu_index(env, false);
554     S390Access srca, desta;
555     uintptr_t ra = GETPC();
556     int i;
557 
558     /* MVCIN always copies one more byte than specified - maximum is 256 */
559     l++;
560 
561     src = wrap_address(env, src - l + 1);
562     srca = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
563     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
564     for (i = 0; i < l; i++) {
565         const uint8_t x = access_get_byte(env, &srca, l - i - 1, ra);
566 
567         access_set_byte(env, &desta, i, x, ra);
568     }
569 }
570 
571 /* move numerics  */
572 void HELPER(mvn)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
573 {
574     const int mmu_idx = cpu_mmu_index(env, false);
575     S390Access srca1, srca2, desta;
576     uintptr_t ra = GETPC();
577     int i;
578 
579     /* MVN always copies one more byte than specified - maximum is 256 */
580     l++;
581 
582     srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
583     srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
584     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
585     for (i = 0; i < l; i++) {
586         const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0x0f) |
587                           (access_get_byte(env, &srca2, i, ra) & 0xf0);
588 
589         access_set_byte(env, &desta, i, x, ra);
590     }
591 }
592 
593 /* move with offset  */
594 void HELPER(mvo)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
595 {
596     const int mmu_idx = cpu_mmu_index(env, false);
597     /* MVO always processes one more byte than specified - maximum is 16 */
598     const int len_dest = (l >> 4) + 1;
599     const int len_src = (l & 0xf) + 1;
600     uintptr_t ra = GETPC();
601     uint8_t byte_dest, byte_src;
602     S390Access srca, desta;
603     int i, j;
604 
605     srca = access_prepare(env, src, len_src, MMU_DATA_LOAD, mmu_idx, ra);
606     desta = access_prepare(env, dest, len_dest, MMU_DATA_STORE, mmu_idx, ra);
607 
608     /* Handle rightmost byte */
609     byte_dest = cpu_ldub_data_ra(env, dest + len_dest - 1, ra);
610     byte_src = access_get_byte(env, &srca, len_src - 1, ra);
611     byte_dest = (byte_dest & 0x0f) | (byte_src << 4);
612     access_set_byte(env, &desta, len_dest - 1, byte_dest, ra);
613 
614     /* Process remaining bytes from right to left */
615     for (i = len_dest - 2, j = len_src - 2; i >= 0; i--, j--) {
616         byte_dest = byte_src >> 4;
617         if (j >= 0) {
618             byte_src = access_get_byte(env, &srca, j, ra);
619         } else {
620             byte_src = 0;
621         }
622         byte_dest |= byte_src << 4;
623         access_set_byte(env, &desta, i, byte_dest, ra);
624     }
625 }
626 
627 /* move zones  */
628 void HELPER(mvz)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
629 {
630     const int mmu_idx = cpu_mmu_index(env, false);
631     S390Access srca1, srca2, desta;
632     uintptr_t ra = GETPC();
633     int i;
634 
635     /* MVZ always copies one more byte than specified - maximum is 256 */
636     l++;
637 
638     srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
639     srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
640     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
641     for (i = 0; i < l; i++) {
642         const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0xf0) |
643                           (access_get_byte(env, &srca2, i, ra) & 0x0f);
644 
645         access_set_byte(env, &desta, i, x, ra);
646     }
647 }
648 
649 /* compare unsigned byte arrays */
650 static uint32_t do_helper_clc(CPUS390XState *env, uint32_t l, uint64_t s1,
651                               uint64_t s2, uintptr_t ra)
652 {
653     uint32_t i;
654     uint32_t cc = 0;
655 
656     HELPER_LOG("%s l %d s1 %" PRIx64 " s2 %" PRIx64 "\n",
657                __func__, l, s1, s2);
658 
659     for (i = 0; i <= l; i++) {
660         uint8_t x = cpu_ldub_data_ra(env, s1 + i, ra);
661         uint8_t y = cpu_ldub_data_ra(env, s2 + i, ra);
662         HELPER_LOG("%02x (%c)/%02x (%c) ", x, x, y, y);
663         if (x < y) {
664             cc = 1;
665             break;
666         } else if (x > y) {
667             cc = 2;
668             break;
669         }
670     }
671 
672     HELPER_LOG("\n");
673     return cc;
674 }
675 
676 uint32_t HELPER(clc)(CPUS390XState *env, uint32_t l, uint64_t s1, uint64_t s2)
677 {
678     return do_helper_clc(env, l, s1, s2, GETPC());
679 }
680 
681 /* compare logical under mask */
682 uint32_t HELPER(clm)(CPUS390XState *env, uint32_t r1, uint32_t mask,
683                      uint64_t addr)
684 {
685     uintptr_t ra = GETPC();
686     uint32_t cc = 0;
687 
688     HELPER_LOG("%s: r1 0x%x mask 0x%x addr 0x%" PRIx64 "\n", __func__, r1,
689                mask, addr);
690 
691     while (mask) {
692         if (mask & 8) {
693             uint8_t d = cpu_ldub_data_ra(env, addr, ra);
694             uint8_t r = extract32(r1, 24, 8);
695             HELPER_LOG("mask 0x%x %02x/%02x (0x%" PRIx64 ") ", mask, r, d,
696                        addr);
697             if (r < d) {
698                 cc = 1;
699                 break;
700             } else if (r > d) {
701                 cc = 2;
702                 break;
703             }
704             addr++;
705         }
706         mask = (mask << 1) & 0xf;
707         r1 <<= 8;
708     }
709 
710     HELPER_LOG("\n");
711     return cc;
712 }
713 
714 static inline uint64_t get_address(CPUS390XState *env, int reg)
715 {
716     return wrap_address(env, env->regs[reg]);
717 }
718 
719 /*
720  * Store the address to the given register, zeroing out unused leftmost
721  * bits in bit positions 32-63 (24-bit and 31-bit mode only).
722  */
723 static inline void set_address_zero(CPUS390XState *env, int reg,
724                                     uint64_t address)
725 {
726     if (env->psw.mask & PSW_MASK_64) {
727         env->regs[reg] = address;
728     } else {
729         if (!(env->psw.mask & PSW_MASK_32)) {
730             address &= 0x00ffffff;
731         } else {
732             address &= 0x7fffffff;
733         }
734         env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
735     }
736 }
737 
738 static inline void set_address(CPUS390XState *env, int reg, uint64_t address)
739 {
740     if (env->psw.mask & PSW_MASK_64) {
741         /* 64-Bit mode */
742         env->regs[reg] = address;
743     } else {
744         if (!(env->psw.mask & PSW_MASK_32)) {
745             /* 24-Bit mode. According to the PoO it is implementation
746             dependent if bits 32-39 remain unchanged or are set to
747             zeros.  Choose the former so that the function can also be
748             used for TRT.  */
749             env->regs[reg] = deposit64(env->regs[reg], 0, 24, address);
750         } else {
751             /* 31-Bit mode. According to the PoO it is implementation
752             dependent if bit 32 remains unchanged or is set to zero.
753             Choose the latter so that the function can also be used for
754             TRT.  */
755             address &= 0x7fffffff;
756             env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
757         }
758     }
759 }
760 
761 static inline uint64_t wrap_length32(CPUS390XState *env, uint64_t length)
762 {
763     if (!(env->psw.mask & PSW_MASK_64)) {
764         return (uint32_t)length;
765     }
766     return length;
767 }
768 
769 static inline uint64_t wrap_length31(CPUS390XState *env, uint64_t length)
770 {
771     if (!(env->psw.mask & PSW_MASK_64)) {
772         /* 24-Bit and 31-Bit mode */
773         length &= 0x7fffffff;
774     }
775     return length;
776 }
777 
778 static inline uint64_t get_length(CPUS390XState *env, int reg)
779 {
780     return wrap_length31(env, env->regs[reg]);
781 }
782 
783 static inline void set_length(CPUS390XState *env, int reg, uint64_t length)
784 {
785     if (env->psw.mask & PSW_MASK_64) {
786         /* 64-Bit mode */
787         env->regs[reg] = length;
788     } else {
789         /* 24-Bit and 31-Bit mode */
790         env->regs[reg] = deposit64(env->regs[reg], 0, 32, length);
791     }
792 }
793 
794 /* search string (c is byte to search, r2 is string, r1 end of string) */
795 void HELPER(srst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
796 {
797     uintptr_t ra = GETPC();
798     uint64_t end, str;
799     uint32_t len;
800     uint8_t v, c = env->regs[0];
801 
802     /* Bits 32-55 must contain all 0.  */
803     if (env->regs[0] & 0xffffff00u) {
804         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
805     }
806 
807     str = get_address(env, r2);
808     end = get_address(env, r1);
809 
810     /* Lest we fail to service interrupts in a timely manner, limit the
811        amount of work we're willing to do.  For now, let's cap at 8k.  */
812     for (len = 0; len < 0x2000; ++len) {
813         if (str + len == end) {
814             /* Character not found.  R1 & R2 are unmodified.  */
815             env->cc_op = 2;
816             return;
817         }
818         v = cpu_ldub_data_ra(env, str + len, ra);
819         if (v == c) {
820             /* Character found.  Set R1 to the location; R2 is unmodified.  */
821             env->cc_op = 1;
822             set_address(env, r1, str + len);
823             return;
824         }
825     }
826 
827     /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
828     env->cc_op = 3;
829     set_address(env, r2, str + len);
830 }
831 
832 void HELPER(srstu)(CPUS390XState *env, uint32_t r1, uint32_t r2)
833 {
834     uintptr_t ra = GETPC();
835     uint32_t len;
836     uint16_t v, c = env->regs[0];
837     uint64_t end, str, adj_end;
838 
839     /* Bits 32-47 of R0 must be zero.  */
840     if (env->regs[0] & 0xffff0000u) {
841         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
842     }
843 
844     str = get_address(env, r2);
845     end = get_address(env, r1);
846 
847     /* If the LSB of the two addresses differ, use one extra byte.  */
848     adj_end = end + ((str ^ end) & 1);
849 
850     /* Lest we fail to service interrupts in a timely manner, limit the
851        amount of work we're willing to do.  For now, let's cap at 8k.  */
852     for (len = 0; len < 0x2000; len += 2) {
853         if (str + len == adj_end) {
854             /* End of input found.  */
855             env->cc_op = 2;
856             return;
857         }
858         v = cpu_lduw_data_ra(env, str + len, ra);
859         if (v == c) {
860             /* Character found.  Set R1 to the location; R2 is unmodified.  */
861             env->cc_op = 1;
862             set_address(env, r1, str + len);
863             return;
864         }
865     }
866 
867     /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
868     env->cc_op = 3;
869     set_address(env, r2, str + len);
870 }
871 
872 /* unsigned string compare (c is string terminator) */
873 uint64_t HELPER(clst)(CPUS390XState *env, uint64_t c, uint64_t s1, uint64_t s2)
874 {
875     uintptr_t ra = GETPC();
876     uint32_t len;
877 
878     c = c & 0xff;
879     s1 = wrap_address(env, s1);
880     s2 = wrap_address(env, s2);
881 
882     /* Lest we fail to service interrupts in a timely manner, limit the
883        amount of work we're willing to do.  For now, let's cap at 8k.  */
884     for (len = 0; len < 0x2000; ++len) {
885         uint8_t v1 = cpu_ldub_data_ra(env, s1 + len, ra);
886         uint8_t v2 = cpu_ldub_data_ra(env, s2 + len, ra);
887         if (v1 == v2) {
888             if (v1 == c) {
889                 /* Equal.  CC=0, and don't advance the registers.  */
890                 env->cc_op = 0;
891                 env->retxl = s2;
892                 return s1;
893             }
894         } else {
895             /* Unequal.  CC={1,2}, and advance the registers.  Note that
896                the terminator need not be zero, but the string that contains
897                the terminator is by definition "low".  */
898             env->cc_op = (v1 == c ? 1 : v2 == c ? 2 : v1 < v2 ? 1 : 2);
899             env->retxl = s2 + len;
900             return s1 + len;
901         }
902     }
903 
904     /* CPU-determined bytes equal; advance the registers.  */
905     env->cc_op = 3;
906     env->retxl = s2 + len;
907     return s1 + len;
908 }
909 
910 /* move page */
911 uint32_t HELPER(mvpg)(CPUS390XState *env, uint64_t r0, uint32_t r1, uint32_t r2)
912 {
913     const uint64_t src = get_address(env, r2) & TARGET_PAGE_MASK;
914     const uint64_t dst = get_address(env, r1) & TARGET_PAGE_MASK;
915     const int mmu_idx = cpu_mmu_index(env, false);
916     const bool f = extract64(r0, 11, 1);
917     const bool s = extract64(r0, 10, 1);
918     const bool cco = extract64(r0, 8, 1);
919     uintptr_t ra = GETPC();
920     S390Access srca, desta;
921     int exc;
922 
923     if ((f && s) || extract64(r0, 12, 4)) {
924         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
925     }
926 
927     /*
928      * We always manually handle exceptions such that we can properly store
929      * r1/r2 to the lowcore on page-translation exceptions.
930      *
931      * TODO: Access key handling
932      */
933     exc = access_prepare_nf(&srca, env, true, src, TARGET_PAGE_SIZE,
934                             MMU_DATA_LOAD, mmu_idx, ra);
935     if (exc) {
936         if (cco) {
937             return 2;
938         }
939         goto inject_exc;
940     }
941     exc = access_prepare_nf(&desta, env, true, dst, TARGET_PAGE_SIZE,
942                             MMU_DATA_STORE, mmu_idx, ra);
943     if (exc) {
944         if (cco && exc != PGM_PROTECTION) {
945             return 1;
946         }
947         goto inject_exc;
948     }
949     access_memmove(env, &desta, &srca, ra);
950     return 0; /* data moved */
951 inject_exc:
952 #if !defined(CONFIG_USER_ONLY)
953     if (exc != PGM_ADDRESSING) {
954         stq_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, trans_exc_code),
955                  env->tlb_fill_tec);
956     }
957     if (exc == PGM_PAGE_TRANS) {
958         stb_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, op_access_id),
959                  r1 << 4 | r2);
960     }
961 #endif
962     tcg_s390_program_interrupt(env, exc, ra);
963 }
964 
965 /* string copy */
966 uint32_t HELPER(mvst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
967 {
968     const int mmu_idx = cpu_mmu_index(env, false);
969     const uint64_t d = get_address(env, r1);
970     const uint64_t s = get_address(env, r2);
971     const uint8_t c = env->regs[0];
972     const int len = MIN(-(d | TARGET_PAGE_MASK), -(s | TARGET_PAGE_MASK));
973     S390Access srca, desta;
974     uintptr_t ra = GETPC();
975     int i;
976 
977     if (env->regs[0] & 0xffffff00ull) {
978         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
979     }
980 
981     /*
982      * Our access should not exceed single pages, as we must not report access
983      * exceptions exceeding the actually copied range (which we don't know at
984      * this point). We might over-indicate watchpoints within the pages
985      * (if we ever care, we have to limit processing to a single byte).
986      */
987     srca = access_prepare(env, s, len, MMU_DATA_LOAD, mmu_idx, ra);
988     desta = access_prepare(env, d, len, MMU_DATA_STORE, mmu_idx, ra);
989     for (i = 0; i < len; i++) {
990         const uint8_t v = access_get_byte(env, &srca, i, ra);
991 
992         access_set_byte(env, &desta, i, v, ra);
993         if (v == c) {
994             set_address_zero(env, r1, d + i);
995             return 1;
996         }
997     }
998     set_address_zero(env, r1, d + len);
999     set_address_zero(env, r2, s + len);
1000     return 3;
1001 }
1002 
1003 /* load access registers r1 to r3 from memory at a2 */
1004 void HELPER(lam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1005 {
1006     uintptr_t ra = GETPC();
1007     int i;
1008 
1009     if (a2 & 0x3) {
1010         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1011     }
1012 
1013     for (i = r1;; i = (i + 1) % 16) {
1014         env->aregs[i] = cpu_ldl_data_ra(env, a2, ra);
1015         a2 += 4;
1016 
1017         if (i == r3) {
1018             break;
1019         }
1020     }
1021 }
1022 
1023 /* store access registers r1 to r3 in memory at a2 */
1024 void HELPER(stam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1025 {
1026     uintptr_t ra = GETPC();
1027     int i;
1028 
1029     if (a2 & 0x3) {
1030         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1031     }
1032 
1033     for (i = r1;; i = (i + 1) % 16) {
1034         cpu_stl_data_ra(env, a2, env->aregs[i], ra);
1035         a2 += 4;
1036 
1037         if (i == r3) {
1038             break;
1039         }
1040     }
1041 }
1042 
1043 /* move long helper */
1044 static inline uint32_t do_mvcl(CPUS390XState *env,
1045                                uint64_t *dest, uint64_t *destlen,
1046                                uint64_t *src, uint64_t *srclen,
1047                                uint16_t pad, int wordsize, uintptr_t ra)
1048 {
1049     const int mmu_idx = cpu_mmu_index(env, false);
1050     int len = MIN(*destlen, -(*dest | TARGET_PAGE_MASK));
1051     S390Access srca, desta;
1052     int i, cc;
1053 
1054     if (*destlen == *srclen) {
1055         cc = 0;
1056     } else if (*destlen < *srclen) {
1057         cc = 1;
1058     } else {
1059         cc = 2;
1060     }
1061 
1062     if (!*destlen) {
1063         return cc;
1064     }
1065 
1066     /*
1067      * Only perform one type of type of operation (move/pad) at a time.
1068      * Stay within single pages.
1069      */
1070     if (*srclen) {
1071         /* Copy the src array */
1072         len = MIN(MIN(*srclen, -(*src | TARGET_PAGE_MASK)), len);
1073         *destlen -= len;
1074         *srclen -= len;
1075         srca = access_prepare(env, *src, len, MMU_DATA_LOAD, mmu_idx, ra);
1076         desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1077         access_memmove(env, &desta, &srca, ra);
1078         *src = wrap_address(env, *src + len);
1079         *dest = wrap_address(env, *dest + len);
1080     } else if (wordsize == 1) {
1081         /* Pad the remaining area */
1082         *destlen -= len;
1083         desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1084         access_memset(env, &desta, pad, ra);
1085         *dest = wrap_address(env, *dest + len);
1086     } else {
1087         desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1088 
1089         /* The remaining length selects the padding byte. */
1090         for (i = 0; i < len; (*destlen)--, i++) {
1091             if (*destlen & 1) {
1092                 access_set_byte(env, &desta, i, pad, ra);
1093             } else {
1094                 access_set_byte(env, &desta, i, pad >> 8, ra);
1095             }
1096         }
1097         *dest = wrap_address(env, *dest + len);
1098     }
1099 
1100     return *destlen ? 3 : cc;
1101 }
1102 
1103 /* move long */
1104 uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1105 {
1106     const int mmu_idx = cpu_mmu_index(env, false);
1107     uintptr_t ra = GETPC();
1108     uint64_t destlen = env->regs[r1 + 1] & 0xffffff;
1109     uint64_t dest = get_address(env, r1);
1110     uint64_t srclen = env->regs[r2 + 1] & 0xffffff;
1111     uint64_t src = get_address(env, r2);
1112     uint8_t pad = env->regs[r2 + 1] >> 24;
1113     CPUState *cs = env_cpu(env);
1114     S390Access srca, desta;
1115     uint32_t cc, cur_len;
1116 
1117     if (is_destructive_overlap(env, dest, src, MIN(srclen, destlen))) {
1118         cc = 3;
1119     } else if (srclen == destlen) {
1120         cc = 0;
1121     } else if (destlen < srclen) {
1122         cc = 1;
1123     } else {
1124         cc = 2;
1125     }
1126 
1127     /* We might have to zero-out some bits even if there was no action. */
1128     if (unlikely(!destlen || cc == 3)) {
1129         set_address_zero(env, r2, src);
1130         set_address_zero(env, r1, dest);
1131         return cc;
1132     } else if (!srclen) {
1133         set_address_zero(env, r2, src);
1134     }
1135 
1136     /*
1137      * Only perform one type of type of operation (move/pad) in one step.
1138      * Stay within single pages.
1139      */
1140     while (destlen) {
1141         cur_len = MIN(destlen, -(dest | TARGET_PAGE_MASK));
1142         if (!srclen) {
1143             desta = access_prepare(env, dest, cur_len, MMU_DATA_STORE, mmu_idx,
1144                                    ra);
1145             access_memset(env, &desta, pad, ra);
1146         } else {
1147             cur_len = MIN(MIN(srclen, -(src | TARGET_PAGE_MASK)), cur_len);
1148 
1149             srca = access_prepare(env, src, cur_len, MMU_DATA_LOAD, mmu_idx,
1150                                   ra);
1151             desta = access_prepare(env, dest, cur_len, MMU_DATA_STORE, mmu_idx,
1152                                    ra);
1153             access_memmove(env, &desta, &srca, ra);
1154             src = wrap_address(env, src + cur_len);
1155             srclen -= cur_len;
1156             env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, srclen);
1157             set_address_zero(env, r2, src);
1158         }
1159         dest = wrap_address(env, dest + cur_len);
1160         destlen -= cur_len;
1161         env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, destlen);
1162         set_address_zero(env, r1, dest);
1163 
1164         /*
1165          * MVCL is interruptible. Return to the main loop if requested after
1166          * writing back all state to registers. If no interrupt will get
1167          * injected, we'll end up back in this handler and continue processing
1168          * the remaining parts.
1169          */
1170         if (destlen && unlikely(cpu_loop_exit_requested(cs))) {
1171             cpu_loop_exit_restore(cs, ra);
1172         }
1173     }
1174     return cc;
1175 }
1176 
1177 /* move long extended */
1178 uint32_t HELPER(mvcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1179                        uint32_t r3)
1180 {
1181     uintptr_t ra = GETPC();
1182     uint64_t destlen = get_length(env, r1 + 1);
1183     uint64_t dest = get_address(env, r1);
1184     uint64_t srclen = get_length(env, r3 + 1);
1185     uint64_t src = get_address(env, r3);
1186     uint8_t pad = a2;
1187     uint32_t cc;
1188 
1189     cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 1, ra);
1190 
1191     set_length(env, r1 + 1, destlen);
1192     set_length(env, r3 + 1, srclen);
1193     set_address(env, r1, dest);
1194     set_address(env, r3, src);
1195 
1196     return cc;
1197 }
1198 
1199 /* move long unicode */
1200 uint32_t HELPER(mvclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1201                        uint32_t r3)
1202 {
1203     uintptr_t ra = GETPC();
1204     uint64_t destlen = get_length(env, r1 + 1);
1205     uint64_t dest = get_address(env, r1);
1206     uint64_t srclen = get_length(env, r3 + 1);
1207     uint64_t src = get_address(env, r3);
1208     uint16_t pad = a2;
1209     uint32_t cc;
1210 
1211     cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 2, ra);
1212 
1213     set_length(env, r1 + 1, destlen);
1214     set_length(env, r3 + 1, srclen);
1215     set_address(env, r1, dest);
1216     set_address(env, r3, src);
1217 
1218     return cc;
1219 }
1220 
1221 /* compare logical long helper */
1222 static inline uint32_t do_clcl(CPUS390XState *env,
1223                                uint64_t *src1, uint64_t *src1len,
1224                                uint64_t *src3, uint64_t *src3len,
1225                                uint16_t pad, uint64_t limit,
1226                                int wordsize, uintptr_t ra)
1227 {
1228     uint64_t len = MAX(*src1len, *src3len);
1229     uint32_t cc = 0;
1230 
1231     check_alignment(env, *src1len | *src3len, wordsize, ra);
1232 
1233     if (!len) {
1234         return cc;
1235     }
1236 
1237     /* Lest we fail to service interrupts in a timely manner, limit the
1238        amount of work we're willing to do.  */
1239     if (len > limit) {
1240         len = limit;
1241         cc = 3;
1242     }
1243 
1244     for (; len; len -= wordsize) {
1245         uint16_t v1 = pad;
1246         uint16_t v3 = pad;
1247 
1248         if (*src1len) {
1249             v1 = cpu_ldusize_data_ra(env, *src1, wordsize, ra);
1250         }
1251         if (*src3len) {
1252             v3 = cpu_ldusize_data_ra(env, *src3, wordsize, ra);
1253         }
1254 
1255         if (v1 != v3) {
1256             cc = (v1 < v3) ? 1 : 2;
1257             break;
1258         }
1259 
1260         if (*src1len) {
1261             *src1 += wordsize;
1262             *src1len -= wordsize;
1263         }
1264         if (*src3len) {
1265             *src3 += wordsize;
1266             *src3len -= wordsize;
1267         }
1268     }
1269 
1270     return cc;
1271 }
1272 
1273 
1274 /* compare logical long */
1275 uint32_t HELPER(clcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1276 {
1277     uintptr_t ra = GETPC();
1278     uint64_t src1len = extract64(env->regs[r1 + 1], 0, 24);
1279     uint64_t src1 = get_address(env, r1);
1280     uint64_t src3len = extract64(env->regs[r2 + 1], 0, 24);
1281     uint64_t src3 = get_address(env, r2);
1282     uint8_t pad = env->regs[r2 + 1] >> 24;
1283     uint32_t cc;
1284 
1285     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, -1, 1, ra);
1286 
1287     env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, src1len);
1288     env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, src3len);
1289     set_address(env, r1, src1);
1290     set_address(env, r2, src3);
1291 
1292     return cc;
1293 }
1294 
1295 /* compare logical long extended memcompare insn with padding */
1296 uint32_t HELPER(clcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1297                        uint32_t r3)
1298 {
1299     uintptr_t ra = GETPC();
1300     uint64_t src1len = get_length(env, r1 + 1);
1301     uint64_t src1 = get_address(env, r1);
1302     uint64_t src3len = get_length(env, r3 + 1);
1303     uint64_t src3 = get_address(env, r3);
1304     uint8_t pad = a2;
1305     uint32_t cc;
1306 
1307     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x2000, 1, ra);
1308 
1309     set_length(env, r1 + 1, src1len);
1310     set_length(env, r3 + 1, src3len);
1311     set_address(env, r1, src1);
1312     set_address(env, r3, src3);
1313 
1314     return cc;
1315 }
1316 
1317 /* compare logical long unicode memcompare insn with padding */
1318 uint32_t HELPER(clclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1319                        uint32_t r3)
1320 {
1321     uintptr_t ra = GETPC();
1322     uint64_t src1len = get_length(env, r1 + 1);
1323     uint64_t src1 = get_address(env, r1);
1324     uint64_t src3len = get_length(env, r3 + 1);
1325     uint64_t src3 = get_address(env, r3);
1326     uint16_t pad = a2;
1327     uint32_t cc = 0;
1328 
1329     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x1000, 2, ra);
1330 
1331     set_length(env, r1 + 1, src1len);
1332     set_length(env, r3 + 1, src3len);
1333     set_address(env, r1, src1);
1334     set_address(env, r3, src3);
1335 
1336     return cc;
1337 }
1338 
1339 /* checksum */
1340 uint64_t HELPER(cksm)(CPUS390XState *env, uint64_t r1,
1341                       uint64_t src, uint64_t src_len)
1342 {
1343     uintptr_t ra = GETPC();
1344     uint64_t max_len, len;
1345     uint64_t cksm = (uint32_t)r1;
1346 
1347     /* Lest we fail to service interrupts in a timely manner, limit the
1348        amount of work we're willing to do.  For now, let's cap at 8k.  */
1349     max_len = (src_len > 0x2000 ? 0x2000 : src_len);
1350 
1351     /* Process full words as available.  */
1352     for (len = 0; len + 4 <= max_len; len += 4, src += 4) {
1353         cksm += (uint32_t)cpu_ldl_data_ra(env, src, ra);
1354     }
1355 
1356     switch (max_len - len) {
1357     case 1:
1358         cksm += cpu_ldub_data_ra(env, src, ra) << 24;
1359         len += 1;
1360         break;
1361     case 2:
1362         cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1363         len += 2;
1364         break;
1365     case 3:
1366         cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1367         cksm += cpu_ldub_data_ra(env, src + 2, ra) << 8;
1368         len += 3;
1369         break;
1370     }
1371 
1372     /* Fold the carry from the checksum.  Note that we can see carry-out
1373        during folding more than once (but probably not more than twice).  */
1374     while (cksm > 0xffffffffull) {
1375         cksm = (uint32_t)cksm + (cksm >> 32);
1376     }
1377 
1378     /* Indicate whether or not we've processed everything.  */
1379     env->cc_op = (len == src_len ? 0 : 3);
1380 
1381     /* Return both cksm and processed length.  */
1382     env->retxl = cksm;
1383     return len;
1384 }
1385 
1386 void HELPER(pack)(CPUS390XState *env, uint32_t len, uint64_t dest, uint64_t src)
1387 {
1388     uintptr_t ra = GETPC();
1389     int len_dest = len >> 4;
1390     int len_src = len & 0xf;
1391     uint8_t b;
1392 
1393     dest += len_dest;
1394     src += len_src;
1395 
1396     /* last byte is special, it only flips the nibbles */
1397     b = cpu_ldub_data_ra(env, src, ra);
1398     cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1399     src--;
1400     len_src--;
1401 
1402     /* now pack every value */
1403     while (len_dest > 0) {
1404         b = 0;
1405 
1406         if (len_src >= 0) {
1407             b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1408             src--;
1409             len_src--;
1410         }
1411         if (len_src >= 0) {
1412             b |= cpu_ldub_data_ra(env, src, ra) << 4;
1413             src--;
1414             len_src--;
1415         }
1416 
1417         len_dest--;
1418         dest--;
1419         cpu_stb_data_ra(env, dest, b, ra);
1420     }
1421 }
1422 
1423 static inline void do_pkau(CPUS390XState *env, uint64_t dest, uint64_t src,
1424                            uint32_t srclen, int ssize, uintptr_t ra)
1425 {
1426     int i;
1427     /* The destination operand is always 16 bytes long.  */
1428     const int destlen = 16;
1429 
1430     /* The operands are processed from right to left.  */
1431     src += srclen - 1;
1432     dest += destlen - 1;
1433 
1434     for (i = 0; i < destlen; i++) {
1435         uint8_t b = 0;
1436 
1437         /* Start with a positive sign */
1438         if (i == 0) {
1439             b = 0xc;
1440         } else if (srclen > ssize) {
1441             b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1442             src -= ssize;
1443             srclen -= ssize;
1444         }
1445 
1446         if (srclen > ssize) {
1447             b |= cpu_ldub_data_ra(env, src, ra) << 4;
1448             src -= ssize;
1449             srclen -= ssize;
1450         }
1451 
1452         cpu_stb_data_ra(env, dest, b, ra);
1453         dest--;
1454     }
1455 }
1456 
1457 
1458 void HELPER(pka)(CPUS390XState *env, uint64_t dest, uint64_t src,
1459                  uint32_t srclen)
1460 {
1461     do_pkau(env, dest, src, srclen, 1, GETPC());
1462 }
1463 
1464 void HELPER(pku)(CPUS390XState *env, uint64_t dest, uint64_t src,
1465                  uint32_t srclen)
1466 {
1467     do_pkau(env, dest, src, srclen, 2, GETPC());
1468 }
1469 
1470 void HELPER(unpk)(CPUS390XState *env, uint32_t len, uint64_t dest,
1471                   uint64_t src)
1472 {
1473     uintptr_t ra = GETPC();
1474     int len_dest = len >> 4;
1475     int len_src = len & 0xf;
1476     uint8_t b;
1477     int second_nibble = 0;
1478 
1479     dest += len_dest;
1480     src += len_src;
1481 
1482     /* last byte is special, it only flips the nibbles */
1483     b = cpu_ldub_data_ra(env, src, ra);
1484     cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1485     src--;
1486     len_src--;
1487 
1488     /* now pad every nibble with 0xf0 */
1489 
1490     while (len_dest > 0) {
1491         uint8_t cur_byte = 0;
1492 
1493         if (len_src > 0) {
1494             cur_byte = cpu_ldub_data_ra(env, src, ra);
1495         }
1496 
1497         len_dest--;
1498         dest--;
1499 
1500         /* only advance one nibble at a time */
1501         if (second_nibble) {
1502             cur_byte >>= 4;
1503             len_src--;
1504             src--;
1505         }
1506         second_nibble = !second_nibble;
1507 
1508         /* digit */
1509         cur_byte = (cur_byte & 0xf);
1510         /* zone bits */
1511         cur_byte |= 0xf0;
1512 
1513         cpu_stb_data_ra(env, dest, cur_byte, ra);
1514     }
1515 }
1516 
1517 static inline uint32_t do_unpkau(CPUS390XState *env, uint64_t dest,
1518                                  uint32_t destlen, int dsize, uint64_t src,
1519                                  uintptr_t ra)
1520 {
1521     int i;
1522     uint32_t cc;
1523     uint8_t b;
1524     /* The source operand is always 16 bytes long.  */
1525     const int srclen = 16;
1526 
1527     /* The operands are processed from right to left.  */
1528     src += srclen - 1;
1529     dest += destlen - dsize;
1530 
1531     /* Check for the sign.  */
1532     b = cpu_ldub_data_ra(env, src, ra);
1533     src--;
1534     switch (b & 0xf) {
1535     case 0xa:
1536     case 0xc:
1537     case 0xe ... 0xf:
1538         cc = 0;  /* plus */
1539         break;
1540     case 0xb:
1541     case 0xd:
1542         cc = 1;  /* minus */
1543         break;
1544     default:
1545     case 0x0 ... 0x9:
1546         cc = 3;  /* invalid */
1547         break;
1548     }
1549 
1550     /* Now pad every nibble with 0x30, advancing one nibble at a time. */
1551     for (i = 0; i < destlen; i += dsize) {
1552         if (i == (31 * dsize)) {
1553             /* If length is 32/64 bytes, the leftmost byte is 0. */
1554             b = 0;
1555         } else if (i % (2 * dsize)) {
1556             b = cpu_ldub_data_ra(env, src, ra);
1557             src--;
1558         } else {
1559             b >>= 4;
1560         }
1561         cpu_stsize_data_ra(env, dest, 0x30 + (b & 0xf), dsize, ra);
1562         dest -= dsize;
1563     }
1564 
1565     return cc;
1566 }
1567 
1568 uint32_t HELPER(unpka)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1569                        uint64_t src)
1570 {
1571     return do_unpkau(env, dest, destlen, 1, src, GETPC());
1572 }
1573 
1574 uint32_t HELPER(unpku)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1575                        uint64_t src)
1576 {
1577     return do_unpkau(env, dest, destlen, 2, src, GETPC());
1578 }
1579 
1580 uint32_t HELPER(tp)(CPUS390XState *env, uint64_t dest, uint32_t destlen)
1581 {
1582     uintptr_t ra = GETPC();
1583     uint32_t cc = 0;
1584     int i;
1585 
1586     for (i = 0; i < destlen; i++) {
1587         uint8_t b = cpu_ldub_data_ra(env, dest + i, ra);
1588         /* digit */
1589         cc |= (b & 0xf0) > 0x90 ? 2 : 0;
1590 
1591         if (i == (destlen - 1)) {
1592             /* sign */
1593             cc |= (b & 0xf) < 0xa ? 1 : 0;
1594         } else {
1595             /* digit */
1596             cc |= (b & 0xf) > 0x9 ? 2 : 0;
1597         }
1598     }
1599 
1600     return cc;
1601 }
1602 
1603 static uint32_t do_helper_tr(CPUS390XState *env, uint32_t len, uint64_t array,
1604                              uint64_t trans, uintptr_t ra)
1605 {
1606     uint32_t i;
1607 
1608     for (i = 0; i <= len; i++) {
1609         uint8_t byte = cpu_ldub_data_ra(env, array + i, ra);
1610         uint8_t new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1611         cpu_stb_data_ra(env, array + i, new_byte, ra);
1612     }
1613 
1614     return env->cc_op;
1615 }
1616 
1617 void HELPER(tr)(CPUS390XState *env, uint32_t len, uint64_t array,
1618                 uint64_t trans)
1619 {
1620     do_helper_tr(env, len, array, trans, GETPC());
1621 }
1622 
1623 uint64_t HELPER(tre)(CPUS390XState *env, uint64_t array,
1624                      uint64_t len, uint64_t trans)
1625 {
1626     uintptr_t ra = GETPC();
1627     uint8_t end = env->regs[0] & 0xff;
1628     uint64_t l = len;
1629     uint64_t i;
1630     uint32_t cc = 0;
1631 
1632     if (!(env->psw.mask & PSW_MASK_64)) {
1633         array &= 0x7fffffff;
1634         l = (uint32_t)l;
1635     }
1636 
1637     /* Lest we fail to service interrupts in a timely manner, limit the
1638        amount of work we're willing to do.  For now, let's cap at 8k.  */
1639     if (l > 0x2000) {
1640         l = 0x2000;
1641         cc = 3;
1642     }
1643 
1644     for (i = 0; i < l; i++) {
1645         uint8_t byte, new_byte;
1646 
1647         byte = cpu_ldub_data_ra(env, array + i, ra);
1648 
1649         if (byte == end) {
1650             cc = 1;
1651             break;
1652         }
1653 
1654         new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1655         cpu_stb_data_ra(env, array + i, new_byte, ra);
1656     }
1657 
1658     env->cc_op = cc;
1659     env->retxl = len - i;
1660     return array + i;
1661 }
1662 
1663 static inline uint32_t do_helper_trt(CPUS390XState *env, int len,
1664                                      uint64_t array, uint64_t trans,
1665                                      int inc, uintptr_t ra)
1666 {
1667     int i;
1668 
1669     for (i = 0; i <= len; i++) {
1670         uint8_t byte = cpu_ldub_data_ra(env, array + i * inc, ra);
1671         uint8_t sbyte = cpu_ldub_data_ra(env, trans + byte, ra);
1672 
1673         if (sbyte != 0) {
1674             set_address(env, 1, array + i * inc);
1675             env->regs[2] = deposit64(env->regs[2], 0, 8, sbyte);
1676             return (i == len) ? 2 : 1;
1677         }
1678     }
1679 
1680     return 0;
1681 }
1682 
1683 static uint32_t do_helper_trt_fwd(CPUS390XState *env, uint32_t len,
1684                                   uint64_t array, uint64_t trans,
1685                                   uintptr_t ra)
1686 {
1687     return do_helper_trt(env, len, array, trans, 1, ra);
1688 }
1689 
1690 uint32_t HELPER(trt)(CPUS390XState *env, uint32_t len, uint64_t array,
1691                      uint64_t trans)
1692 {
1693     return do_helper_trt(env, len, array, trans, 1, GETPC());
1694 }
1695 
1696 static uint32_t do_helper_trt_bkwd(CPUS390XState *env, uint32_t len,
1697                                    uint64_t array, uint64_t trans,
1698                                    uintptr_t ra)
1699 {
1700     return do_helper_trt(env, len, array, trans, -1, ra);
1701 }
1702 
1703 uint32_t HELPER(trtr)(CPUS390XState *env, uint32_t len, uint64_t array,
1704                       uint64_t trans)
1705 {
1706     return do_helper_trt(env, len, array, trans, -1, GETPC());
1707 }
1708 
1709 /* Translate one/two to one/two */
1710 uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2,
1711                       uint32_t tst, uint32_t sizes)
1712 {
1713     uintptr_t ra = GETPC();
1714     int dsize = (sizes & 1) ? 1 : 2;
1715     int ssize = (sizes & 2) ? 1 : 2;
1716     uint64_t tbl = get_address(env, 1);
1717     uint64_t dst = get_address(env, r1);
1718     uint64_t len = get_length(env, r1 + 1);
1719     uint64_t src = get_address(env, r2);
1720     uint32_t cc = 3;
1721     int i;
1722 
1723     /* The lower address bits of TBL are ignored.  For TROO, TROT, it's
1724        the low 3 bits (double-word aligned).  For TRTO, TRTT, it's either
1725        the low 12 bits (4K, without ETF2-ENH) or 3 bits (with ETF2-ENH).  */
1726     if (ssize == 2 && !s390_has_feat(S390_FEAT_ETF2_ENH)) {
1727         tbl &= -4096;
1728     } else {
1729         tbl &= -8;
1730     }
1731 
1732     check_alignment(env, len, ssize, ra);
1733 
1734     /* Lest we fail to service interrupts in a timely manner, */
1735     /* limit the amount of work we're willing to do.   */
1736     for (i = 0; i < 0x2000; i++) {
1737         uint16_t sval = cpu_ldusize_data_ra(env, src, ssize, ra);
1738         uint64_t tble = tbl + (sval * dsize);
1739         uint16_t dval = cpu_ldusize_data_ra(env, tble, dsize, ra);
1740         if (dval == tst) {
1741             cc = 1;
1742             break;
1743         }
1744         cpu_stsize_data_ra(env, dst, dval, dsize, ra);
1745 
1746         len -= ssize;
1747         src += ssize;
1748         dst += dsize;
1749 
1750         if (len == 0) {
1751             cc = 0;
1752             break;
1753         }
1754     }
1755 
1756     set_address(env, r1, dst);
1757     set_length(env, r1 + 1, len);
1758     set_address(env, r2, src);
1759 
1760     return cc;
1761 }
1762 
1763 void HELPER(cdsg)(CPUS390XState *env, uint64_t addr,
1764                   uint32_t r1, uint32_t r3)
1765 {
1766     uintptr_t ra = GETPC();
1767     Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
1768     Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1769     Int128 oldv;
1770     uint64_t oldh, oldl;
1771     bool fail;
1772 
1773     check_alignment(env, addr, 16, ra);
1774 
1775     oldh = cpu_ldq_data_ra(env, addr + 0, ra);
1776     oldl = cpu_ldq_data_ra(env, addr + 8, ra);
1777 
1778     oldv = int128_make128(oldl, oldh);
1779     fail = !int128_eq(oldv, cmpv);
1780     if (fail) {
1781         newv = oldv;
1782     }
1783 
1784     cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra);
1785     cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra);
1786 
1787     env->cc_op = fail;
1788     env->regs[r1] = int128_gethi(oldv);
1789     env->regs[r1 + 1] = int128_getlo(oldv);
1790 }
1791 
1792 void HELPER(cdsg_parallel)(CPUS390XState *env, uint64_t addr,
1793                            uint32_t r1, uint32_t r3)
1794 {
1795     uintptr_t ra = GETPC();
1796     Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
1797     Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1798     int mem_idx;
1799     MemOpIdx oi;
1800     Int128 oldv;
1801     bool fail;
1802 
1803     assert(HAVE_CMPXCHG128);
1804 
1805     mem_idx = cpu_mmu_index(env, false);
1806     oi = make_memop_idx(MO_TE | MO_128 | MO_ALIGN, mem_idx);
1807     oldv = cpu_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
1808     fail = !int128_eq(oldv, cmpv);
1809 
1810     env->cc_op = fail;
1811     env->regs[r1] = int128_gethi(oldv);
1812     env->regs[r1 + 1] = int128_getlo(oldv);
1813 }
1814 
1815 static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
1816                         uint64_t a2, bool parallel)
1817 {
1818     uint32_t mem_idx = cpu_mmu_index(env, false);
1819     uintptr_t ra = GETPC();
1820     uint32_t fc = extract32(env->regs[0], 0, 8);
1821     uint32_t sc = extract32(env->regs[0], 8, 8);
1822     uint64_t pl = get_address(env, 1) & -16;
1823     uint64_t svh, svl;
1824     uint32_t cc;
1825 
1826     /* Sanity check the function code and storage characteristic.  */
1827     if (fc > 1 || sc > 3) {
1828         if (!s390_has_feat(S390_FEAT_COMPARE_AND_SWAP_AND_STORE_2)) {
1829             goto spec_exception;
1830         }
1831         if (fc > 2 || sc > 4 || (fc == 2 && (r3 & 1))) {
1832             goto spec_exception;
1833         }
1834     }
1835 
1836     /* Sanity check the alignments.  */
1837     if (extract32(a1, 0, fc + 2) || extract32(a2, 0, sc)) {
1838         goto spec_exception;
1839     }
1840 
1841     /* Sanity check writability of the store address.  */
1842     probe_write(env, a2, 1 << sc, mem_idx, ra);
1843 
1844     /*
1845      * Note that the compare-and-swap is atomic, and the store is atomic,
1846      * but the complete operation is not.  Therefore we do not need to
1847      * assert serial context in order to implement this.  That said,
1848      * restart early if we can't support either operation that is supposed
1849      * to be atomic.
1850      */
1851     if (parallel) {
1852         uint32_t max = 2;
1853 #ifdef CONFIG_ATOMIC64
1854         max = 3;
1855 #endif
1856         if ((HAVE_CMPXCHG128 ? 0 : fc + 2 > max) ||
1857             (HAVE_ATOMIC128  ? 0 : sc > max)) {
1858             cpu_loop_exit_atomic(env_cpu(env), ra);
1859         }
1860     }
1861 
1862     /* All loads happen before all stores.  For simplicity, load the entire
1863        store value area from the parameter list.  */
1864     svh = cpu_ldq_data_ra(env, pl + 16, ra);
1865     svl = cpu_ldq_data_ra(env, pl + 24, ra);
1866 
1867     switch (fc) {
1868     case 0:
1869         {
1870             uint32_t nv = cpu_ldl_data_ra(env, pl, ra);
1871             uint32_t cv = env->regs[r3];
1872             uint32_t ov;
1873 
1874             if (parallel) {
1875 #ifdef CONFIG_USER_ONLY
1876                 uint32_t *haddr = g2h(env_cpu(env), a1);
1877                 ov = qatomic_cmpxchg__nocheck(haddr, cv, nv);
1878 #else
1879                 MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mem_idx);
1880                 ov = cpu_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi, ra);
1881 #endif
1882             } else {
1883                 ov = cpu_ldl_data_ra(env, a1, ra);
1884                 cpu_stl_data_ra(env, a1, (ov == cv ? nv : ov), ra);
1885             }
1886             cc = (ov != cv);
1887             env->regs[r3] = deposit64(env->regs[r3], 32, 32, ov);
1888         }
1889         break;
1890 
1891     case 1:
1892         {
1893             uint64_t nv = cpu_ldq_data_ra(env, pl, ra);
1894             uint64_t cv = env->regs[r3];
1895             uint64_t ov;
1896 
1897             if (parallel) {
1898 #ifdef CONFIG_ATOMIC64
1899                 MemOpIdx oi = make_memop_idx(MO_TEUQ | MO_ALIGN, mem_idx);
1900                 ov = cpu_atomic_cmpxchgq_be_mmu(env, a1, cv, nv, oi, ra);
1901 #else
1902                 /* Note that we asserted !parallel above.  */
1903                 g_assert_not_reached();
1904 #endif
1905             } else {
1906                 ov = cpu_ldq_data_ra(env, a1, ra);
1907                 cpu_stq_data_ra(env, a1, (ov == cv ? nv : ov), ra);
1908             }
1909             cc = (ov != cv);
1910             env->regs[r3] = ov;
1911         }
1912         break;
1913 
1914     case 2:
1915         {
1916             uint64_t nvh = cpu_ldq_data_ra(env, pl, ra);
1917             uint64_t nvl = cpu_ldq_data_ra(env, pl + 8, ra);
1918             Int128 nv = int128_make128(nvl, nvh);
1919             Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1920             Int128 ov;
1921 
1922             if (!parallel) {
1923                 uint64_t oh = cpu_ldq_data_ra(env, a1 + 0, ra);
1924                 uint64_t ol = cpu_ldq_data_ra(env, a1 + 8, ra);
1925 
1926                 ov = int128_make128(ol, oh);
1927                 cc = !int128_eq(ov, cv);
1928                 if (cc) {
1929                     nv = ov;
1930                 }
1931 
1932                 cpu_stq_data_ra(env, a1 + 0, int128_gethi(nv), ra);
1933                 cpu_stq_data_ra(env, a1 + 8, int128_getlo(nv), ra);
1934             } else if (HAVE_CMPXCHG128) {
1935                 MemOpIdx oi = make_memop_idx(MO_TE | MO_128 | MO_ALIGN, mem_idx);
1936                 ov = cpu_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra);
1937                 cc = !int128_eq(ov, cv);
1938             } else {
1939                 /* Note that we asserted !parallel above.  */
1940                 g_assert_not_reached();
1941             }
1942 
1943             env->regs[r3 + 0] = int128_gethi(ov);
1944             env->regs[r3 + 1] = int128_getlo(ov);
1945         }
1946         break;
1947 
1948     default:
1949         g_assert_not_reached();
1950     }
1951 
1952     /* Store only if the comparison succeeded.  Note that above we use a pair
1953        of 64-bit big-endian loads, so for sc < 3 we must extract the value
1954        from the most-significant bits of svh.  */
1955     if (cc == 0) {
1956         switch (sc) {
1957         case 0:
1958             cpu_stb_data_ra(env, a2, svh >> 56, ra);
1959             break;
1960         case 1:
1961             cpu_stw_data_ra(env, a2, svh >> 48, ra);
1962             break;
1963         case 2:
1964             cpu_stl_data_ra(env, a2, svh >> 32, ra);
1965             break;
1966         case 3:
1967             cpu_stq_data_ra(env, a2, svh, ra);
1968             break;
1969         case 4:
1970             if (!parallel) {
1971                 cpu_stq_data_ra(env, a2 + 0, svh, ra);
1972                 cpu_stq_data_ra(env, a2 + 8, svl, ra);
1973             } else if (HAVE_ATOMIC128) {
1974                 MemOpIdx oi = make_memop_idx(MO_TEUQ | MO_ALIGN_16, mem_idx);
1975                 Int128 sv = int128_make128(svl, svh);
1976                 cpu_atomic_sto_be_mmu(env, a2, sv, oi, ra);
1977             } else {
1978                 /* Note that we asserted !parallel above.  */
1979                 g_assert_not_reached();
1980             }
1981             break;
1982         default:
1983             g_assert_not_reached();
1984         }
1985     }
1986 
1987     return cc;
1988 
1989  spec_exception:
1990     tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1991 }
1992 
1993 uint32_t HELPER(csst)(CPUS390XState *env, uint32_t r3, uint64_t a1, uint64_t a2)
1994 {
1995     return do_csst(env, r3, a1, a2, false);
1996 }
1997 
1998 uint32_t HELPER(csst_parallel)(CPUS390XState *env, uint32_t r3, uint64_t a1,
1999                                uint64_t a2)
2000 {
2001     return do_csst(env, r3, a1, a2, true);
2002 }
2003 
2004 #if !defined(CONFIG_USER_ONLY)
2005 void HELPER(lctlg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2006 {
2007     uintptr_t ra = GETPC();
2008     bool PERchanged = false;
2009     uint64_t src = a2;
2010     uint32_t i;
2011 
2012     if (src & 0x7) {
2013         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2014     }
2015 
2016     for (i = r1;; i = (i + 1) % 16) {
2017         uint64_t val = cpu_ldq_data_ra(env, src, ra);
2018         if (env->cregs[i] != val && i >= 9 && i <= 11) {
2019             PERchanged = true;
2020         }
2021         env->cregs[i] = val;
2022         HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%" PRIx64 "\n",
2023                    i, src, val);
2024         src += sizeof(uint64_t);
2025 
2026         if (i == r3) {
2027             break;
2028         }
2029     }
2030 
2031     if (PERchanged && env->psw.mask & PSW_MASK_PER) {
2032         s390_cpu_recompute_watchpoints(env_cpu(env));
2033     }
2034 
2035     tlb_flush(env_cpu(env));
2036 }
2037 
2038 void HELPER(lctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2039 {
2040     uintptr_t ra = GETPC();
2041     bool PERchanged = false;
2042     uint64_t src = a2;
2043     uint32_t i;
2044 
2045     if (src & 0x3) {
2046         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2047     }
2048 
2049     for (i = r1;; i = (i + 1) % 16) {
2050         uint32_t val = cpu_ldl_data_ra(env, src, ra);
2051         if ((uint32_t)env->cregs[i] != val && i >= 9 && i <= 11) {
2052             PERchanged = true;
2053         }
2054         env->cregs[i] = deposit64(env->cregs[i], 0, 32, val);
2055         HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%x\n", i, src, val);
2056         src += sizeof(uint32_t);
2057 
2058         if (i == r3) {
2059             break;
2060         }
2061     }
2062 
2063     if (PERchanged && env->psw.mask & PSW_MASK_PER) {
2064         s390_cpu_recompute_watchpoints(env_cpu(env));
2065     }
2066 
2067     tlb_flush(env_cpu(env));
2068 }
2069 
2070 void HELPER(stctg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2071 {
2072     uintptr_t ra = GETPC();
2073     uint64_t dest = a2;
2074     uint32_t i;
2075 
2076     if (dest & 0x7) {
2077         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2078     }
2079 
2080     for (i = r1;; i = (i + 1) % 16) {
2081         cpu_stq_data_ra(env, dest, env->cregs[i], ra);
2082         dest += sizeof(uint64_t);
2083 
2084         if (i == r3) {
2085             break;
2086         }
2087     }
2088 }
2089 
2090 void HELPER(stctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2091 {
2092     uintptr_t ra = GETPC();
2093     uint64_t dest = a2;
2094     uint32_t i;
2095 
2096     if (dest & 0x3) {
2097         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2098     }
2099 
2100     for (i = r1;; i = (i + 1) % 16) {
2101         cpu_stl_data_ra(env, dest, env->cregs[i], ra);
2102         dest += sizeof(uint32_t);
2103 
2104         if (i == r3) {
2105             break;
2106         }
2107     }
2108 }
2109 
2110 uint32_t HELPER(testblock)(CPUS390XState *env, uint64_t real_addr)
2111 {
2112     uintptr_t ra = GETPC();
2113     int i;
2114 
2115     real_addr = wrap_address(env, real_addr) & TARGET_PAGE_MASK;
2116 
2117     for (i = 0; i < TARGET_PAGE_SIZE; i += 8) {
2118         cpu_stq_mmuidx_ra(env, real_addr + i, 0, MMU_REAL_IDX, ra);
2119     }
2120 
2121     return 0;
2122 }
2123 
2124 uint32_t HELPER(tprot)(CPUS390XState *env, uint64_t a1, uint64_t a2)
2125 {
2126     S390CPU *cpu = env_archcpu(env);
2127     CPUState *cs = env_cpu(env);
2128 
2129     /*
2130      * TODO: we currently don't handle all access protection types
2131      * (including access-list and key-controlled) as well as AR mode.
2132      */
2133     if (!s390_cpu_virt_mem_check_write(cpu, a1, 0, 1)) {
2134         /* Fetching permitted; storing permitted */
2135         return 0;
2136     }
2137 
2138     if (env->int_pgm_code == PGM_PROTECTION) {
2139         /* retry if reading is possible */
2140         cs->exception_index = -1;
2141         if (!s390_cpu_virt_mem_check_read(cpu, a1, 0, 1)) {
2142             /* Fetching permitted; storing not permitted */
2143             return 1;
2144         }
2145     }
2146 
2147     switch (env->int_pgm_code) {
2148     case PGM_PROTECTION:
2149         /* Fetching not permitted; storing not permitted */
2150         cs->exception_index = -1;
2151         return 2;
2152     case PGM_ADDRESSING:
2153     case PGM_TRANS_SPEC:
2154         /* exceptions forwarded to the guest */
2155         s390_cpu_virt_mem_handle_exc(cpu, GETPC());
2156         return 0;
2157     }
2158 
2159     /* Translation not available */
2160     cs->exception_index = -1;
2161     return 3;
2162 }
2163 
2164 /* insert storage key extended */
2165 uint64_t HELPER(iske)(CPUS390XState *env, uint64_t r2)
2166 {
2167     static S390SKeysState *ss;
2168     static S390SKeysClass *skeyclass;
2169     uint64_t addr = wrap_address(env, r2);
2170     uint8_t key;
2171     int rc;
2172 
2173     addr = mmu_real2abs(env, addr);
2174     if (!mmu_absolute_addr_valid(addr, false)) {
2175         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2176     }
2177 
2178     if (unlikely(!ss)) {
2179         ss = s390_get_skeys_device();
2180         skeyclass = S390_SKEYS_GET_CLASS(ss);
2181         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2182             tlb_flush_all_cpus_synced(env_cpu(env));
2183         }
2184     }
2185 
2186     rc = skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2187     if (rc) {
2188         trace_get_skeys_nonzero(rc);
2189         return 0;
2190     }
2191     return key;
2192 }
2193 
2194 /* set storage key extended */
2195 void HELPER(sske)(CPUS390XState *env, uint64_t r1, uint64_t r2)
2196 {
2197     static S390SKeysState *ss;
2198     static S390SKeysClass *skeyclass;
2199     uint64_t addr = wrap_address(env, r2);
2200     uint8_t key;
2201     int rc;
2202 
2203     addr = mmu_real2abs(env, addr);
2204     if (!mmu_absolute_addr_valid(addr, false)) {
2205         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2206     }
2207 
2208     if (unlikely(!ss)) {
2209         ss = s390_get_skeys_device();
2210         skeyclass = S390_SKEYS_GET_CLASS(ss);
2211         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2212             tlb_flush_all_cpus_synced(env_cpu(env));
2213         }
2214     }
2215 
2216     key = r1 & 0xfe;
2217     rc = skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2218     if (rc) {
2219         trace_set_skeys_nonzero(rc);
2220     }
2221    /*
2222     * As we can only flush by virtual address and not all the entries
2223     * that point to a physical address we have to flush the whole TLB.
2224     */
2225     tlb_flush_all_cpus_synced(env_cpu(env));
2226 }
2227 
2228 /* reset reference bit extended */
2229 uint32_t HELPER(rrbe)(CPUS390XState *env, uint64_t r2)
2230 {
2231     uint64_t addr = wrap_address(env, r2);
2232     static S390SKeysState *ss;
2233     static S390SKeysClass *skeyclass;
2234     uint8_t re, key;
2235     int rc;
2236 
2237     addr = mmu_real2abs(env, addr);
2238     if (!mmu_absolute_addr_valid(addr, false)) {
2239         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2240     }
2241 
2242     if (unlikely(!ss)) {
2243         ss = s390_get_skeys_device();
2244         skeyclass = S390_SKEYS_GET_CLASS(ss);
2245         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2246             tlb_flush_all_cpus_synced(env_cpu(env));
2247         }
2248     }
2249 
2250     rc = skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2251     if (rc) {
2252         trace_get_skeys_nonzero(rc);
2253         return 0;
2254     }
2255 
2256     re = key & (SK_R | SK_C);
2257     key &= ~SK_R;
2258 
2259     rc = skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2260     if (rc) {
2261         trace_set_skeys_nonzero(rc);
2262         return 0;
2263     }
2264    /*
2265     * As we can only flush by virtual address and not all the entries
2266     * that point to a physical address we have to flush the whole TLB.
2267     */
2268     tlb_flush_all_cpus_synced(env_cpu(env));
2269 
2270     /*
2271      * cc
2272      *
2273      * 0  Reference bit zero; change bit zero
2274      * 1  Reference bit zero; change bit one
2275      * 2  Reference bit one; change bit zero
2276      * 3  Reference bit one; change bit one
2277      */
2278 
2279     return re >> 1;
2280 }
2281 
2282 uint32_t HELPER(mvcs)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2)
2283 {
2284     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2285     S390Access srca, desta;
2286     uintptr_t ra = GETPC();
2287     int cc = 0;
2288 
2289     HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2290                __func__, l, a1, a2);
2291 
2292     if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2293         psw_as == AS_HOME || psw_as == AS_ACCREG) {
2294         s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2295     }
2296 
2297     l = wrap_length32(env, l);
2298     if (l > 256) {
2299         /* max 256 */
2300         l = 256;
2301         cc = 3;
2302     } else if (!l) {
2303         return cc;
2304     }
2305 
2306     /* TODO: Access key handling */
2307     srca = access_prepare(env, a2, l, MMU_DATA_LOAD, MMU_PRIMARY_IDX, ra);
2308     desta = access_prepare(env, a1, l, MMU_DATA_STORE, MMU_SECONDARY_IDX, ra);
2309     access_memmove(env, &desta, &srca, ra);
2310     return cc;
2311 }
2312 
2313 uint32_t HELPER(mvcp)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2)
2314 {
2315     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2316     S390Access srca, desta;
2317     uintptr_t ra = GETPC();
2318     int cc = 0;
2319 
2320     HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2321                __func__, l, a1, a2);
2322 
2323     if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2324         psw_as == AS_HOME || psw_as == AS_ACCREG) {
2325         s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2326     }
2327 
2328     l = wrap_length32(env, l);
2329     if (l > 256) {
2330         /* max 256 */
2331         l = 256;
2332         cc = 3;
2333     } else if (!l) {
2334         return cc;
2335     }
2336 
2337     /* TODO: Access key handling */
2338     srca = access_prepare(env, a2, l, MMU_DATA_LOAD, MMU_SECONDARY_IDX, ra);
2339     desta = access_prepare(env, a1, l, MMU_DATA_STORE, MMU_PRIMARY_IDX, ra);
2340     access_memmove(env, &desta, &srca, ra);
2341     return cc;
2342 }
2343 
2344 void HELPER(idte)(CPUS390XState *env, uint64_t r1, uint64_t r2, uint32_t m4)
2345 {
2346     CPUState *cs = env_cpu(env);
2347     const uintptr_t ra = GETPC();
2348     uint64_t table, entry, raddr;
2349     uint16_t entries, i, index = 0;
2350 
2351     if (r2 & 0xff000) {
2352         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2353     }
2354 
2355     if (!(r2 & 0x800)) {
2356         /* invalidation-and-clearing operation */
2357         table = r1 & ASCE_ORIGIN;
2358         entries = (r2 & 0x7ff) + 1;
2359 
2360         switch (r1 & ASCE_TYPE_MASK) {
2361         case ASCE_TYPE_REGION1:
2362             index = (r2 >> 53) & 0x7ff;
2363             break;
2364         case ASCE_TYPE_REGION2:
2365             index = (r2 >> 42) & 0x7ff;
2366             break;
2367         case ASCE_TYPE_REGION3:
2368             index = (r2 >> 31) & 0x7ff;
2369             break;
2370         case ASCE_TYPE_SEGMENT:
2371             index = (r2 >> 20) & 0x7ff;
2372             break;
2373         }
2374         for (i = 0; i < entries; i++) {
2375             /* addresses are not wrapped in 24/31bit mode but table index is */
2376             raddr = table + ((index + i) & 0x7ff) * sizeof(entry);
2377             entry = cpu_ldq_mmuidx_ra(env, raddr, MMU_REAL_IDX, ra);
2378             if (!(entry & REGION_ENTRY_I)) {
2379                 /* we are allowed to not store if already invalid */
2380                 entry |= REGION_ENTRY_I;
2381                 cpu_stq_mmuidx_ra(env, raddr, entry, MMU_REAL_IDX, ra);
2382             }
2383         }
2384     }
2385 
2386     /* We simply flush the complete tlb, therefore we can ignore r3. */
2387     if (m4 & 1) {
2388         tlb_flush(cs);
2389     } else {
2390         tlb_flush_all_cpus_synced(cs);
2391     }
2392 }
2393 
2394 /* invalidate pte */
2395 void HELPER(ipte)(CPUS390XState *env, uint64_t pto, uint64_t vaddr,
2396                   uint32_t m4)
2397 {
2398     CPUState *cs = env_cpu(env);
2399     const uintptr_t ra = GETPC();
2400     uint64_t page = vaddr & TARGET_PAGE_MASK;
2401     uint64_t pte_addr, pte;
2402 
2403     /* Compute the page table entry address */
2404     pte_addr = (pto & SEGMENT_ENTRY_ORIGIN);
2405     pte_addr += VADDR_PAGE_TX(vaddr) * 8;
2406 
2407     /* Mark the page table entry as invalid */
2408     pte = cpu_ldq_mmuidx_ra(env, pte_addr, MMU_REAL_IDX, ra);
2409     pte |= PAGE_ENTRY_I;
2410     cpu_stq_mmuidx_ra(env, pte_addr, pte, MMU_REAL_IDX, ra);
2411 
2412     /* XXX we exploit the fact that Linux passes the exact virtual
2413        address here - it's not obliged to! */
2414     if (m4 & 1) {
2415         if (vaddr & ~VADDR_PAGE_TX_MASK) {
2416             tlb_flush_page(cs, page);
2417             /* XXX 31-bit hack */
2418             tlb_flush_page(cs, page ^ 0x80000000);
2419         } else {
2420             /* looks like we don't have a valid virtual address */
2421             tlb_flush(cs);
2422         }
2423     } else {
2424         if (vaddr & ~VADDR_PAGE_TX_MASK) {
2425             tlb_flush_page_all_cpus_synced(cs, page);
2426             /* XXX 31-bit hack */
2427             tlb_flush_page_all_cpus_synced(cs, page ^ 0x80000000);
2428         } else {
2429             /* looks like we don't have a valid virtual address */
2430             tlb_flush_all_cpus_synced(cs);
2431         }
2432     }
2433 }
2434 
2435 /* flush local tlb */
2436 void HELPER(ptlb)(CPUS390XState *env)
2437 {
2438     tlb_flush(env_cpu(env));
2439 }
2440 
2441 /* flush global tlb */
2442 void HELPER(purge)(CPUS390XState *env)
2443 {
2444     tlb_flush_all_cpus_synced(env_cpu(env));
2445 }
2446 
2447 /* load real address */
2448 uint64_t HELPER(lra)(CPUS390XState *env, uint64_t addr)
2449 {
2450     uint64_t asc = env->psw.mask & PSW_MASK_ASC;
2451     uint64_t ret, tec;
2452     int flags, exc, cc;
2453 
2454     /* XXX incomplete - has more corner cases */
2455     if (!(env->psw.mask & PSW_MASK_64) && (addr >> 32)) {
2456         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, GETPC());
2457     }
2458 
2459     exc = mmu_translate(env, addr, MMU_S390_LRA, asc, &ret, &flags, &tec);
2460     if (exc) {
2461         cc = 3;
2462         ret = exc | 0x80000000;
2463     } else {
2464         cc = 0;
2465         ret |= addr & ~TARGET_PAGE_MASK;
2466     }
2467 
2468     env->cc_op = cc;
2469     return ret;
2470 }
2471 #endif
2472 
2473 /* load pair from quadword */
2474 uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr)
2475 {
2476     uintptr_t ra = GETPC();
2477     uint64_t hi, lo;
2478 
2479     check_alignment(env, addr, 16, ra);
2480     hi = cpu_ldq_data_ra(env, addr + 0, ra);
2481     lo = cpu_ldq_data_ra(env, addr + 8, ra);
2482 
2483     env->retxl = lo;
2484     return hi;
2485 }
2486 
2487 uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr)
2488 {
2489     uintptr_t ra = GETPC();
2490     uint64_t hi, lo;
2491     int mem_idx;
2492     MemOpIdx oi;
2493     Int128 v;
2494 
2495     assert(HAVE_ATOMIC128);
2496 
2497     mem_idx = cpu_mmu_index(env, false);
2498     oi = make_memop_idx(MO_TEUQ | MO_ALIGN_16, mem_idx);
2499     v = cpu_atomic_ldo_be_mmu(env, addr, oi, ra);
2500     hi = int128_gethi(v);
2501     lo = int128_getlo(v);
2502 
2503     env->retxl = lo;
2504     return hi;
2505 }
2506 
2507 /* store pair to quadword */
2508 void HELPER(stpq)(CPUS390XState *env, uint64_t addr,
2509                   uint64_t low, uint64_t high)
2510 {
2511     uintptr_t ra = GETPC();
2512 
2513     check_alignment(env, addr, 16, ra);
2514     cpu_stq_data_ra(env, addr + 0, high, ra);
2515     cpu_stq_data_ra(env, addr + 8, low, ra);
2516 }
2517 
2518 void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr,
2519                            uint64_t low, uint64_t high)
2520 {
2521     uintptr_t ra = GETPC();
2522     int mem_idx;
2523     MemOpIdx oi;
2524     Int128 v;
2525 
2526     assert(HAVE_ATOMIC128);
2527 
2528     mem_idx = cpu_mmu_index(env, false);
2529     oi = make_memop_idx(MO_TEUQ | MO_ALIGN_16, mem_idx);
2530     v = int128_make128(low, high);
2531     cpu_atomic_sto_be_mmu(env, addr, v, oi, ra);
2532 }
2533 
2534 /* Execute instruction.  This instruction executes an insn modified with
2535    the contents of r1.  It does not change the executed instruction in memory;
2536    it does not change the program counter.
2537 
2538    Perform this by recording the modified instruction in env->ex_value.
2539    This will be noticed by cpu_get_tb_cpu_state and thus tb translation.
2540 */
2541 void HELPER(ex)(CPUS390XState *env, uint32_t ilen, uint64_t r1, uint64_t addr)
2542 {
2543     uint64_t insn = cpu_lduw_code(env, addr);
2544     uint8_t opc = insn >> 8;
2545 
2546     /* Or in the contents of R1[56:63].  */
2547     insn |= r1 & 0xff;
2548 
2549     /* Load the rest of the instruction.  */
2550     insn <<= 48;
2551     switch (get_ilen(opc)) {
2552     case 2:
2553         break;
2554     case 4:
2555         insn |= (uint64_t)cpu_lduw_code(env, addr + 2) << 32;
2556         break;
2557     case 6:
2558         insn |= (uint64_t)(uint32_t)cpu_ldl_code(env, addr + 2) << 16;
2559         break;
2560     default:
2561         g_assert_not_reached();
2562     }
2563 
2564     /* The very most common cases can be sped up by avoiding a new TB.  */
2565     if ((opc & 0xf0) == 0xd0) {
2566         typedef uint32_t (*dx_helper)(CPUS390XState *, uint32_t, uint64_t,
2567                                       uint64_t, uintptr_t);
2568         static const dx_helper dx[16] = {
2569             [0x0] = do_helper_trt_bkwd,
2570             [0x2] = do_helper_mvc,
2571             [0x4] = do_helper_nc,
2572             [0x5] = do_helper_clc,
2573             [0x6] = do_helper_oc,
2574             [0x7] = do_helper_xc,
2575             [0xc] = do_helper_tr,
2576             [0xd] = do_helper_trt_fwd,
2577         };
2578         dx_helper helper = dx[opc & 0xf];
2579 
2580         if (helper) {
2581             uint32_t l = extract64(insn, 48, 8);
2582             uint32_t b1 = extract64(insn, 44, 4);
2583             uint32_t d1 = extract64(insn, 32, 12);
2584             uint32_t b2 = extract64(insn, 28, 4);
2585             uint32_t d2 = extract64(insn, 16, 12);
2586             uint64_t a1 = wrap_address(env, (b1 ? env->regs[b1] : 0) + d1);
2587             uint64_t a2 = wrap_address(env, (b2 ? env->regs[b2] : 0) + d2);
2588 
2589             env->cc_op = helper(env, l, a1, a2, 0);
2590             env->psw.addr += ilen;
2591             return;
2592         }
2593     } else if (opc == 0x0a) {
2594         env->int_svc_code = extract64(insn, 48, 8);
2595         env->int_svc_ilen = ilen;
2596         helper_exception(env, EXCP_SVC);
2597         g_assert_not_reached();
2598     }
2599 
2600     /* Record the insn we want to execute as well as the ilen to use
2601        during the execution of the target insn.  This will also ensure
2602        that ex_value is non-zero, which flags that we are in a state
2603        that requires such execution.  */
2604     env->ex_value = insn | ilen;
2605 }
2606 
2607 uint32_t HELPER(mvcos)(CPUS390XState *env, uint64_t dest, uint64_t src,
2608                        uint64_t len)
2609 {
2610     const uint8_t psw_key = (env->psw.mask & PSW_MASK_KEY) >> PSW_SHIFT_KEY;
2611     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2612     const uint64_t r0 = env->regs[0];
2613     const uintptr_t ra = GETPC();
2614     uint8_t dest_key, dest_as, dest_k, dest_a;
2615     uint8_t src_key, src_as, src_k, src_a;
2616     uint64_t val;
2617     int cc = 0;
2618 
2619     HELPER_LOG("%s dest %" PRIx64 ", src %" PRIx64 ", len %" PRIx64 "\n",
2620                __func__, dest, src, len);
2621 
2622     if (!(env->psw.mask & PSW_MASK_DAT)) {
2623         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2624     }
2625 
2626     /* OAC (operand access control) for the first operand -> dest */
2627     val = (r0 & 0xffff0000ULL) >> 16;
2628     dest_key = (val >> 12) & 0xf;
2629     dest_as = (val >> 6) & 0x3;
2630     dest_k = (val >> 1) & 0x1;
2631     dest_a = val & 0x1;
2632 
2633     /* OAC (operand access control) for the second operand -> src */
2634     val = (r0 & 0x0000ffffULL);
2635     src_key = (val >> 12) & 0xf;
2636     src_as = (val >> 6) & 0x3;
2637     src_k = (val >> 1) & 0x1;
2638     src_a = val & 0x1;
2639 
2640     if (!dest_k) {
2641         dest_key = psw_key;
2642     }
2643     if (!src_k) {
2644         src_key = psw_key;
2645     }
2646     if (!dest_a) {
2647         dest_as = psw_as;
2648     }
2649     if (!src_a) {
2650         src_as = psw_as;
2651     }
2652 
2653     if (dest_a && dest_as == AS_HOME && (env->psw.mask & PSW_MASK_PSTATE)) {
2654         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2655     }
2656     if (!(env->cregs[0] & CR0_SECONDARY) &&
2657         (dest_as == AS_SECONDARY || src_as == AS_SECONDARY)) {
2658         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2659     }
2660     if (!psw_key_valid(env, dest_key) || !psw_key_valid(env, src_key)) {
2661         tcg_s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2662     }
2663 
2664     len = wrap_length32(env, len);
2665     if (len > 4096) {
2666         cc = 3;
2667         len = 4096;
2668     }
2669 
2670     /* FIXME: AR-mode and proper problem state mode (using PSW keys) missing */
2671     if (src_as == AS_ACCREG || dest_as == AS_ACCREG ||
2672         (env->psw.mask & PSW_MASK_PSTATE)) {
2673         qemu_log_mask(LOG_UNIMP, "%s: AR-mode and PSTATE support missing\n",
2674                       __func__);
2675         tcg_s390_program_interrupt(env, PGM_ADDRESSING, ra);
2676     }
2677 
2678     /* FIXME: Access using correct keys and AR-mode */
2679     if (len) {
2680         S390Access srca = access_prepare(env, src, len, MMU_DATA_LOAD,
2681                                          mmu_idx_from_as(src_as), ra);
2682         S390Access desta = access_prepare(env, dest, len, MMU_DATA_STORE,
2683                                           mmu_idx_from_as(dest_as), ra);
2684 
2685         access_memmove(env, &desta, &srca, ra);
2686     }
2687 
2688     return cc;
2689 }
2690 
2691 /* Decode a Unicode character.  A return value < 0 indicates success, storing
2692    the UTF-32 result into OCHAR and the input length into OLEN.  A return
2693    value >= 0 indicates failure, and the CC value to be returned.  */
2694 typedef int (*decode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2695                                  uint64_t ilen, bool enh_check, uintptr_t ra,
2696                                  uint32_t *ochar, uint32_t *olen);
2697 
2698 /* Encode a Unicode character.  A return value < 0 indicates success, storing
2699    the bytes into ADDR and the output length into OLEN.  A return value >= 0
2700    indicates failure, and the CC value to be returned.  */
2701 typedef int (*encode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2702                                  uint64_t ilen, uintptr_t ra, uint32_t c,
2703                                  uint32_t *olen);
2704 
2705 static int decode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2706                        bool enh_check, uintptr_t ra,
2707                        uint32_t *ochar, uint32_t *olen)
2708 {
2709     uint8_t s0, s1, s2, s3;
2710     uint32_t c, l;
2711 
2712     if (ilen < 1) {
2713         return 0;
2714     }
2715     s0 = cpu_ldub_data_ra(env, addr, ra);
2716     if (s0 <= 0x7f) {
2717         /* one byte character */
2718         l = 1;
2719         c = s0;
2720     } else if (s0 <= (enh_check ? 0xc1 : 0xbf)) {
2721         /* invalid character */
2722         return 2;
2723     } else if (s0 <= 0xdf) {
2724         /* two byte character */
2725         l = 2;
2726         if (ilen < 2) {
2727             return 0;
2728         }
2729         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2730         c = s0 & 0x1f;
2731         c = (c << 6) | (s1 & 0x3f);
2732         if (enh_check && (s1 & 0xc0) != 0x80) {
2733             return 2;
2734         }
2735     } else if (s0 <= 0xef) {
2736         /* three byte character */
2737         l = 3;
2738         if (ilen < 3) {
2739             return 0;
2740         }
2741         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2742         s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2743         c = s0 & 0x0f;
2744         c = (c << 6) | (s1 & 0x3f);
2745         c = (c << 6) | (s2 & 0x3f);
2746         /* Fold the byte-by-byte range descriptions in the PoO into
2747            tests against the complete value.  It disallows encodings
2748            that could be smaller, and the UTF-16 surrogates.  */
2749         if (enh_check
2750             && ((s1 & 0xc0) != 0x80
2751                 || (s2 & 0xc0) != 0x80
2752                 || c < 0x1000
2753                 || (c >= 0xd800 && c <= 0xdfff))) {
2754             return 2;
2755         }
2756     } else if (s0 <= (enh_check ? 0xf4 : 0xf7)) {
2757         /* four byte character */
2758         l = 4;
2759         if (ilen < 4) {
2760             return 0;
2761         }
2762         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2763         s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2764         s3 = cpu_ldub_data_ra(env, addr + 3, ra);
2765         c = s0 & 0x07;
2766         c = (c << 6) | (s1 & 0x3f);
2767         c = (c << 6) | (s2 & 0x3f);
2768         c = (c << 6) | (s3 & 0x3f);
2769         /* See above.  */
2770         if (enh_check
2771             && ((s1 & 0xc0) != 0x80
2772                 || (s2 & 0xc0) != 0x80
2773                 || (s3 & 0xc0) != 0x80
2774                 || c < 0x010000
2775                 || c > 0x10ffff)) {
2776             return 2;
2777         }
2778     } else {
2779         /* invalid character */
2780         return 2;
2781     }
2782 
2783     *ochar = c;
2784     *olen = l;
2785     return -1;
2786 }
2787 
2788 static int decode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2789                         bool enh_check, uintptr_t ra,
2790                         uint32_t *ochar, uint32_t *olen)
2791 {
2792     uint16_t s0, s1;
2793     uint32_t c, l;
2794 
2795     if (ilen < 2) {
2796         return 0;
2797     }
2798     s0 = cpu_lduw_data_ra(env, addr, ra);
2799     if ((s0 & 0xfc00) != 0xd800) {
2800         /* one word character */
2801         l = 2;
2802         c = s0;
2803     } else {
2804         /* two word character */
2805         l = 4;
2806         if (ilen < 4) {
2807             return 0;
2808         }
2809         s1 = cpu_lduw_data_ra(env, addr + 2, ra);
2810         c = extract32(s0, 6, 4) + 1;
2811         c = (c << 6) | (s0 & 0x3f);
2812         c = (c << 10) | (s1 & 0x3ff);
2813         if (enh_check && (s1 & 0xfc00) != 0xdc00) {
2814             /* invalid surrogate character */
2815             return 2;
2816         }
2817     }
2818 
2819     *ochar = c;
2820     *olen = l;
2821     return -1;
2822 }
2823 
2824 static int decode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2825                         bool enh_check, uintptr_t ra,
2826                         uint32_t *ochar, uint32_t *olen)
2827 {
2828     uint32_t c;
2829 
2830     if (ilen < 4) {
2831         return 0;
2832     }
2833     c = cpu_ldl_data_ra(env, addr, ra);
2834     if ((c >= 0xd800 && c <= 0xdbff) || c > 0x10ffff) {
2835         /* invalid unicode character */
2836         return 2;
2837     }
2838 
2839     *ochar = c;
2840     *olen = 4;
2841     return -1;
2842 }
2843 
2844 static int encode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2845                        uintptr_t ra, uint32_t c, uint32_t *olen)
2846 {
2847     uint8_t d[4];
2848     uint32_t l, i;
2849 
2850     if (c <= 0x7f) {
2851         /* one byte character */
2852         l = 1;
2853         d[0] = c;
2854     } else if (c <= 0x7ff) {
2855         /* two byte character */
2856         l = 2;
2857         d[1] = 0x80 | extract32(c, 0, 6);
2858         d[0] = 0xc0 | extract32(c, 6, 5);
2859     } else if (c <= 0xffff) {
2860         /* three byte character */
2861         l = 3;
2862         d[2] = 0x80 | extract32(c, 0, 6);
2863         d[1] = 0x80 | extract32(c, 6, 6);
2864         d[0] = 0xe0 | extract32(c, 12, 4);
2865     } else {
2866         /* four byte character */
2867         l = 4;
2868         d[3] = 0x80 | extract32(c, 0, 6);
2869         d[2] = 0x80 | extract32(c, 6, 6);
2870         d[1] = 0x80 | extract32(c, 12, 6);
2871         d[0] = 0xf0 | extract32(c, 18, 3);
2872     }
2873 
2874     if (ilen < l) {
2875         return 1;
2876     }
2877     for (i = 0; i < l; ++i) {
2878         cpu_stb_data_ra(env, addr + i, d[i], ra);
2879     }
2880 
2881     *olen = l;
2882     return -1;
2883 }
2884 
2885 static int encode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2886                         uintptr_t ra, uint32_t c, uint32_t *olen)
2887 {
2888     uint16_t d0, d1;
2889 
2890     if (c <= 0xffff) {
2891         /* one word character */
2892         if (ilen < 2) {
2893             return 1;
2894         }
2895         cpu_stw_data_ra(env, addr, c, ra);
2896         *olen = 2;
2897     } else {
2898         /* two word character */
2899         if (ilen < 4) {
2900             return 1;
2901         }
2902         d1 = 0xdc00 | extract32(c, 0, 10);
2903         d0 = 0xd800 | extract32(c, 10, 6);
2904         d0 = deposit32(d0, 6, 4, extract32(c, 16, 5) - 1);
2905         cpu_stw_data_ra(env, addr + 0, d0, ra);
2906         cpu_stw_data_ra(env, addr + 2, d1, ra);
2907         *olen = 4;
2908     }
2909 
2910     return -1;
2911 }
2912 
2913 static int encode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2914                         uintptr_t ra, uint32_t c, uint32_t *olen)
2915 {
2916     if (ilen < 4) {
2917         return 1;
2918     }
2919     cpu_stl_data_ra(env, addr, c, ra);
2920     *olen = 4;
2921     return -1;
2922 }
2923 
2924 static inline uint32_t convert_unicode(CPUS390XState *env, uint32_t r1,
2925                                        uint32_t r2, uint32_t m3, uintptr_t ra,
2926                                        decode_unicode_fn decode,
2927                                        encode_unicode_fn encode)
2928 {
2929     uint64_t dst = get_address(env, r1);
2930     uint64_t dlen = get_length(env, r1 + 1);
2931     uint64_t src = get_address(env, r2);
2932     uint64_t slen = get_length(env, r2 + 1);
2933     bool enh_check = m3 & 1;
2934     int cc, i;
2935 
2936     /* Lest we fail to service interrupts in a timely manner, limit the
2937        amount of work we're willing to do.  For now, let's cap at 256.  */
2938     for (i = 0; i < 256; ++i) {
2939         uint32_t c, ilen, olen;
2940 
2941         cc = decode(env, src, slen, enh_check, ra, &c, &ilen);
2942         if (unlikely(cc >= 0)) {
2943             break;
2944         }
2945         cc = encode(env, dst, dlen, ra, c, &olen);
2946         if (unlikely(cc >= 0)) {
2947             break;
2948         }
2949 
2950         src += ilen;
2951         slen -= ilen;
2952         dst += olen;
2953         dlen -= olen;
2954         cc = 3;
2955     }
2956 
2957     set_address(env, r1, dst);
2958     set_length(env, r1 + 1, dlen);
2959     set_address(env, r2, src);
2960     set_length(env, r2 + 1, slen);
2961 
2962     return cc;
2963 }
2964 
2965 uint32_t HELPER(cu12)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2966 {
2967     return convert_unicode(env, r1, r2, m3, GETPC(),
2968                            decode_utf8, encode_utf16);
2969 }
2970 
2971 uint32_t HELPER(cu14)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2972 {
2973     return convert_unicode(env, r1, r2, m3, GETPC(),
2974                            decode_utf8, encode_utf32);
2975 }
2976 
2977 uint32_t HELPER(cu21)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2978 {
2979     return convert_unicode(env, r1, r2, m3, GETPC(),
2980                            decode_utf16, encode_utf8);
2981 }
2982 
2983 uint32_t HELPER(cu24)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2984 {
2985     return convert_unicode(env, r1, r2, m3, GETPC(),
2986                            decode_utf16, encode_utf32);
2987 }
2988 
2989 uint32_t HELPER(cu41)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2990 {
2991     return convert_unicode(env, r1, r2, m3, GETPC(),
2992                            decode_utf32, encode_utf8);
2993 }
2994 
2995 uint32_t HELPER(cu42)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2996 {
2997     return convert_unicode(env, r1, r2, m3, GETPC(),
2998                            decode_utf32, encode_utf16);
2999 }
3000 
3001 void probe_write_access(CPUS390XState *env, uint64_t addr, uint64_t len,
3002                         uintptr_t ra)
3003 {
3004     /* test the actual access, not just any access to the page due to LAP */
3005     while (len) {
3006         const uint64_t pagelen = -(addr | TARGET_PAGE_MASK);
3007         const uint64_t curlen = MIN(pagelen, len);
3008 
3009         probe_write(env, addr, curlen, cpu_mmu_index(env, false), ra);
3010         addr = wrap_address(env, addr + curlen);
3011         len -= curlen;
3012     }
3013 }
3014 
3015 void HELPER(probe_write_access)(CPUS390XState *env, uint64_t addr, uint64_t len)
3016 {
3017     probe_write_access(env, addr, len, GETPC());
3018 }
3019