xref: /qemu/target/s390x/tcg/mem_helper.c (revision f47b6eab)
1 /*
2  *  S/390 memory access helper routines
3  *
4  *  Copyright (c) 2009 Ulrich Hecht
5  *  Copyright (c) 2009 Alexander Graf
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * This library is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19  */
20 
21 #include "qemu/osdep.h"
22 #include "qemu/log.h"
23 #include "cpu.h"
24 #include "s390x-internal.h"
25 #include "tcg_s390x.h"
26 #include "exec/helper-proto.h"
27 #include "exec/exec-all.h"
28 #include "exec/cpu_ldst.h"
29 #include "hw/core/tcg-cpu-ops.h"
30 #include "qemu/int128.h"
31 #include "qemu/atomic128.h"
32 #include "trace.h"
33 
34 #if !defined(CONFIG_USER_ONLY)
35 #include "hw/s390x/storage-keys.h"
36 #include "hw/boards.h"
37 #endif
38 
39 #ifdef CONFIG_USER_ONLY
40 # define user_or_likely(X)    true
41 #else
42 # define user_or_likely(X)    likely(X)
43 #endif
44 
45 /*****************************************************************************/
46 /* Softmmu support */
47 
48 /* #define DEBUG_HELPER */
49 #ifdef DEBUG_HELPER
50 #define HELPER_LOG(x...) qemu_log(x)
51 #else
52 #define HELPER_LOG(x...)
53 #endif
54 
55 static inline bool psw_key_valid(CPUS390XState *env, uint8_t psw_key)
56 {
57     uint16_t pkm = env->cregs[3] >> 16;
58 
59     if (env->psw.mask & PSW_MASK_PSTATE) {
60         /* PSW key has range 0..15, it is valid if the bit is 1 in the PKM */
61         return pkm & (0x8000 >> psw_key);
62     }
63     return true;
64 }
65 
66 static bool is_destructive_overlap(CPUS390XState *env, uint64_t dest,
67                                    uint64_t src, uint32_t len)
68 {
69     if (!len || src == dest) {
70         return false;
71     }
72     /* Take care of wrapping at the end of address space. */
73     if (unlikely(wrap_address(env, src + len - 1) < src)) {
74         return dest > src || dest <= wrap_address(env, src + len - 1);
75     }
76     return dest > src && dest <= src + len - 1;
77 }
78 
79 /* Trigger a SPECIFICATION exception if an address or a length is not
80    naturally aligned.  */
81 static inline void check_alignment(CPUS390XState *env, uint64_t v,
82                                    int wordsize, uintptr_t ra)
83 {
84     if (v % wordsize) {
85         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
86     }
87 }
88 
89 /* Load a value from memory according to its size.  */
90 static inline uint64_t cpu_ldusize_data_ra(CPUS390XState *env, uint64_t addr,
91                                            int wordsize, uintptr_t ra)
92 {
93     switch (wordsize) {
94     case 1:
95         return cpu_ldub_data_ra(env, addr, ra);
96     case 2:
97         return cpu_lduw_data_ra(env, addr, ra);
98     default:
99         abort();
100     }
101 }
102 
103 /* Store a to memory according to its size.  */
104 static inline void cpu_stsize_data_ra(CPUS390XState *env, uint64_t addr,
105                                       uint64_t value, int wordsize,
106                                       uintptr_t ra)
107 {
108     switch (wordsize) {
109     case 1:
110         cpu_stb_data_ra(env, addr, value, ra);
111         break;
112     case 2:
113         cpu_stw_data_ra(env, addr, value, ra);
114         break;
115     default:
116         abort();
117     }
118 }
119 
120 /* An access covers at most 4096 bytes and therefore at most two pages. */
121 typedef struct S390Access {
122     target_ulong vaddr1;
123     target_ulong vaddr2;
124     void *haddr1;
125     void *haddr2;
126     uint16_t size1;
127     uint16_t size2;
128     /*
129      * If we can't access the host page directly, we'll have to do I/O access
130      * via ld/st helpers. These are internal details, so we store the
131      * mmu idx to do the access here instead of passing it around in the
132      * helpers.
133      */
134     int mmu_idx;
135 } S390Access;
136 
137 /*
138  * With nonfault=1, return the PGM_ exception that would have been injected
139  * into the guest; return 0 if no exception was detected.
140  *
141  * For !CONFIG_USER_ONLY, the TEC is stored stored to env->tlb_fill_tec.
142  * For CONFIG_USER_ONLY, the faulting address is stored to env->__excp_addr.
143  */
144 static inline int s390_probe_access(CPUArchState *env, target_ulong addr,
145                                     int size, MMUAccessType access_type,
146                                     int mmu_idx, bool nonfault,
147                                     void **phost, uintptr_t ra)
148 {
149     int flags = probe_access_flags(env, addr, 0, access_type, mmu_idx,
150                                    nonfault, phost, ra);
151 
152     if (unlikely(flags & TLB_INVALID_MASK)) {
153 #ifdef CONFIG_USER_ONLY
154         /* Address is in TEC in system mode; see s390_cpu_record_sigsegv. */
155         env->__excp_addr = addr & TARGET_PAGE_MASK;
156         return (page_get_flags(addr) & PAGE_VALID
157                 ? PGM_PROTECTION : PGM_ADDRESSING);
158 #else
159         return env->tlb_fill_exc;
160 #endif
161     }
162 
163 #ifndef CONFIG_USER_ONLY
164     if (unlikely(flags & TLB_WATCHPOINT)) {
165         /* S390 does not presently use transaction attributes. */
166         cpu_check_watchpoint(env_cpu(env), addr, size,
167                              MEMTXATTRS_UNSPECIFIED,
168                              (access_type == MMU_DATA_STORE
169                               ? BP_MEM_WRITE : BP_MEM_READ), ra);
170     }
171 #endif
172 
173     return 0;
174 }
175 
176 static int access_prepare_nf(S390Access *access, CPUS390XState *env,
177                              bool nonfault, vaddr vaddr1, int size,
178                              MMUAccessType access_type,
179                              int mmu_idx, uintptr_t ra)
180 {
181     int size1, size2, exc;
182 
183     assert(size > 0 && size <= 4096);
184 
185     size1 = MIN(size, -(vaddr1 | TARGET_PAGE_MASK)),
186     size2 = size - size1;
187 
188     memset(access, 0, sizeof(*access));
189     access->vaddr1 = vaddr1;
190     access->size1 = size1;
191     access->size2 = size2;
192     access->mmu_idx = mmu_idx;
193 
194     exc = s390_probe_access(env, vaddr1, size1, access_type, mmu_idx, nonfault,
195                             &access->haddr1, ra);
196     if (unlikely(exc)) {
197         return exc;
198     }
199     if (unlikely(size2)) {
200         /* The access crosses page boundaries. */
201         vaddr vaddr2 = wrap_address(env, vaddr1 + size1);
202 
203         access->vaddr2 = vaddr2;
204         exc = s390_probe_access(env, vaddr2, size2, access_type, mmu_idx,
205                                 nonfault, &access->haddr2, ra);
206         if (unlikely(exc)) {
207             return exc;
208         }
209     }
210     return 0;
211 }
212 
213 static inline void access_prepare(S390Access *ret, CPUS390XState *env,
214                                   vaddr vaddr, int size,
215                                   MMUAccessType access_type, int mmu_idx,
216                                   uintptr_t ra)
217 {
218     int exc = access_prepare_nf(ret, env, false, vaddr, size,
219                                 access_type, mmu_idx, ra);
220     assert(!exc);
221 }
222 
223 /* Helper to handle memset on a single page. */
224 static void do_access_memset(CPUS390XState *env, vaddr vaddr, char *haddr,
225                              uint8_t byte, uint16_t size, int mmu_idx,
226                              uintptr_t ra)
227 {
228 #ifdef CONFIG_USER_ONLY
229     memset(haddr, byte, size);
230 #else
231     if (likely(haddr)) {
232         memset(haddr, byte, size);
233     } else {
234         MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
235         for (int i = 0; i < size; i++) {
236             cpu_stb_mmu(env, vaddr + i, byte, oi, ra);
237         }
238     }
239 #endif
240 }
241 
242 static void access_memset(CPUS390XState *env, S390Access *desta,
243                           uint8_t byte, uintptr_t ra)
244 {
245 
246     do_access_memset(env, desta->vaddr1, desta->haddr1, byte, desta->size1,
247                      desta->mmu_idx, ra);
248     if (likely(!desta->size2)) {
249         return;
250     }
251     do_access_memset(env, desta->vaddr2, desta->haddr2, byte, desta->size2,
252                      desta->mmu_idx, ra);
253 }
254 
255 static uint8_t access_get_byte(CPUS390XState *env, S390Access *access,
256                                int offset, uintptr_t ra)
257 {
258     target_ulong vaddr = access->vaddr1;
259     void *haddr = access->haddr1;
260 
261     if (unlikely(offset >= access->size1)) {
262         offset -= access->size1;
263         vaddr = access->vaddr2;
264         haddr = access->haddr2;
265     }
266 
267     if (user_or_likely(haddr)) {
268         return ldub_p(haddr + offset);
269     } else {
270         MemOpIdx oi = make_memop_idx(MO_UB, access->mmu_idx);
271         return cpu_ldb_mmu(env, vaddr + offset, oi, ra);
272     }
273 }
274 
275 static void access_set_byte(CPUS390XState *env, S390Access *access,
276                             int offset, uint8_t byte, uintptr_t ra)
277 {
278     target_ulong vaddr = access->vaddr1;
279     void *haddr = access->haddr1;
280 
281     if (unlikely(offset >= access->size1)) {
282         offset -= access->size1;
283         vaddr = access->vaddr2;
284         haddr = access->haddr2;
285     }
286 
287     if (user_or_likely(haddr)) {
288         stb_p(haddr + offset, byte);
289     } else {
290         MemOpIdx oi = make_memop_idx(MO_UB, access->mmu_idx);
291         cpu_stb_mmu(env, vaddr + offset, byte, oi, ra);
292     }
293 }
294 
295 /*
296  * Move data with the same semantics as memmove() in case ranges don't overlap
297  * or src > dest. Undefined behavior on destructive overlaps.
298  */
299 static void access_memmove(CPUS390XState *env, S390Access *desta,
300                            S390Access *srca, uintptr_t ra)
301 {
302     int len = desta->size1 + desta->size2;
303     int diff;
304 
305     assert(len == srca->size1 + srca->size2);
306 
307     /* Fallback to slow access in case we don't have access to all host pages */
308     if (unlikely(!desta->haddr1 || (desta->size2 && !desta->haddr2) ||
309                  !srca->haddr1 || (srca->size2 && !srca->haddr2))) {
310         int i;
311 
312         for (i = 0; i < len; i++) {
313             uint8_t byte = access_get_byte(env, srca, i, ra);
314 
315             access_set_byte(env, desta, i, byte, ra);
316         }
317         return;
318     }
319 
320     diff = desta->size1 - srca->size1;
321     if (likely(diff == 0)) {
322         memmove(desta->haddr1, srca->haddr1, srca->size1);
323         if (unlikely(srca->size2)) {
324             memmove(desta->haddr2, srca->haddr2, srca->size2);
325         }
326     } else if (diff > 0) {
327         memmove(desta->haddr1, srca->haddr1, srca->size1);
328         memmove(desta->haddr1 + srca->size1, srca->haddr2, diff);
329         if (likely(desta->size2)) {
330             memmove(desta->haddr2, srca->haddr2 + diff, desta->size2);
331         }
332     } else {
333         diff = -diff;
334         memmove(desta->haddr1, srca->haddr1, desta->size1);
335         memmove(desta->haddr2, srca->haddr1 + desta->size1, diff);
336         if (likely(srca->size2)) {
337             memmove(desta->haddr2 + diff, srca->haddr2, srca->size2);
338         }
339     }
340 }
341 
342 static int mmu_idx_from_as(uint8_t as)
343 {
344     switch (as) {
345     case AS_PRIMARY:
346         return MMU_PRIMARY_IDX;
347     case AS_SECONDARY:
348         return MMU_SECONDARY_IDX;
349     case AS_HOME:
350         return MMU_HOME_IDX;
351     default:
352         /* FIXME AS_ACCREG */
353         g_assert_not_reached();
354     }
355 }
356 
357 /* and on array */
358 static uint32_t do_helper_nc(CPUS390XState *env, uint32_t l, uint64_t dest,
359                              uint64_t src, uintptr_t ra)
360 {
361     const int mmu_idx = cpu_mmu_index(env, false);
362     S390Access srca1, srca2, desta;
363     uint32_t i;
364     uint8_t c = 0;
365 
366     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
367                __func__, l, dest, src);
368 
369     /* NC always processes one more byte than specified - maximum is 256 */
370     l++;
371 
372     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
373     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
374     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
375     for (i = 0; i < l; i++) {
376         const uint8_t x = access_get_byte(env, &srca1, i, ra) &
377                           access_get_byte(env, &srca2, i, ra);
378 
379         c |= x;
380         access_set_byte(env, &desta, i, x, ra);
381     }
382     return c != 0;
383 }
384 
385 uint32_t HELPER(nc)(CPUS390XState *env, uint32_t l, uint64_t dest,
386                     uint64_t src)
387 {
388     return do_helper_nc(env, l, dest, src, GETPC());
389 }
390 
391 /* xor on array */
392 static uint32_t do_helper_xc(CPUS390XState *env, uint32_t l, uint64_t dest,
393                              uint64_t src, uintptr_t ra)
394 {
395     const int mmu_idx = cpu_mmu_index(env, false);
396     S390Access srca1, srca2, desta;
397     uint32_t i;
398     uint8_t c = 0;
399 
400     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
401                __func__, l, dest, src);
402 
403     /* XC always processes one more byte than specified - maximum is 256 */
404     l++;
405 
406     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
407     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
408     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
409 
410     /* xor with itself is the same as memset(0) */
411     if (src == dest) {
412         access_memset(env, &desta, 0, ra);
413         return 0;
414     }
415 
416     for (i = 0; i < l; i++) {
417         const uint8_t x = access_get_byte(env, &srca1, i, ra) ^
418                           access_get_byte(env, &srca2, i, ra);
419 
420         c |= x;
421         access_set_byte(env, &desta, i, x, ra);
422     }
423     return c != 0;
424 }
425 
426 uint32_t HELPER(xc)(CPUS390XState *env, uint32_t l, uint64_t dest,
427                     uint64_t src)
428 {
429     return do_helper_xc(env, l, dest, src, GETPC());
430 }
431 
432 /* or on array */
433 static uint32_t do_helper_oc(CPUS390XState *env, uint32_t l, uint64_t dest,
434                              uint64_t src, uintptr_t ra)
435 {
436     const int mmu_idx = cpu_mmu_index(env, false);
437     S390Access srca1, srca2, desta;
438     uint32_t i;
439     uint8_t c = 0;
440 
441     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
442                __func__, l, dest, src);
443 
444     /* OC always processes one more byte than specified - maximum is 256 */
445     l++;
446 
447     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
448     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
449     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
450     for (i = 0; i < l; i++) {
451         const uint8_t x = access_get_byte(env, &srca1, i, ra) |
452                           access_get_byte(env, &srca2, i, ra);
453 
454         c |= x;
455         access_set_byte(env, &desta, i, x, ra);
456     }
457     return c != 0;
458 }
459 
460 uint32_t HELPER(oc)(CPUS390XState *env, uint32_t l, uint64_t dest,
461                     uint64_t src)
462 {
463     return do_helper_oc(env, l, dest, src, GETPC());
464 }
465 
466 /* memmove */
467 static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest,
468                               uint64_t src, uintptr_t ra)
469 {
470     const int mmu_idx = cpu_mmu_index(env, false);
471     S390Access srca, desta;
472     uint32_t i;
473 
474     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
475                __func__, l, dest, src);
476 
477     /* MVC always copies one more byte than specified - maximum is 256 */
478     l++;
479 
480     access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
481     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
482 
483     /*
484      * "When the operands overlap, the result is obtained as if the operands
485      * were processed one byte at a time". Only non-destructive overlaps
486      * behave like memmove().
487      */
488     if (dest == src + 1) {
489         access_memset(env, &desta, access_get_byte(env, &srca, 0, ra), ra);
490     } else if (!is_destructive_overlap(env, dest, src, l)) {
491         access_memmove(env, &desta, &srca, ra);
492     } else {
493         for (i = 0; i < l; i++) {
494             uint8_t byte = access_get_byte(env, &srca, i, ra);
495 
496             access_set_byte(env, &desta, i, byte, ra);
497         }
498     }
499 
500     return env->cc_op;
501 }
502 
503 void HELPER(mvc)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
504 {
505     do_helper_mvc(env, l, dest, src, GETPC());
506 }
507 
508 /* move right to left */
509 void HELPER(mvcrl)(CPUS390XState *env, uint64_t l, uint64_t dest, uint64_t src)
510 {
511     const int mmu_idx = cpu_mmu_index(env, false);
512     const uint64_t ra = GETPC();
513     S390Access srca, desta;
514     int32_t i;
515 
516     /* MVCRL always copies one more byte than specified - maximum is 256 */
517     l &= 0xff;
518     l++;
519 
520     access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
521     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
522 
523     for (i = l - 1; i >= 0; i--) {
524         uint8_t byte = access_get_byte(env, &srca, i, ra);
525         access_set_byte(env, &desta, i, byte, ra);
526     }
527 }
528 
529 /* move inverse  */
530 void HELPER(mvcin)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
531 {
532     const int mmu_idx = cpu_mmu_index(env, false);
533     S390Access srca, desta;
534     uintptr_t ra = GETPC();
535     int i;
536 
537     /* MVCIN always copies one more byte than specified - maximum is 256 */
538     l++;
539 
540     src = wrap_address(env, src - l + 1);
541     access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
542     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
543     for (i = 0; i < l; i++) {
544         const uint8_t x = access_get_byte(env, &srca, l - i - 1, ra);
545 
546         access_set_byte(env, &desta, i, x, ra);
547     }
548 }
549 
550 /* move numerics  */
551 void HELPER(mvn)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
552 {
553     const int mmu_idx = cpu_mmu_index(env, false);
554     S390Access srca1, srca2, desta;
555     uintptr_t ra = GETPC();
556     int i;
557 
558     /* MVN always copies one more byte than specified - maximum is 256 */
559     l++;
560 
561     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
562     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
563     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
564     for (i = 0; i < l; i++) {
565         const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0x0f) |
566                           (access_get_byte(env, &srca2, i, ra) & 0xf0);
567 
568         access_set_byte(env, &desta, i, x, ra);
569     }
570 }
571 
572 /* move with offset  */
573 void HELPER(mvo)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
574 {
575     const int mmu_idx = cpu_mmu_index(env, false);
576     /* MVO always processes one more byte than specified - maximum is 16 */
577     const int len_dest = (l >> 4) + 1;
578     const int len_src = (l & 0xf) + 1;
579     uintptr_t ra = GETPC();
580     uint8_t byte_dest, byte_src;
581     S390Access srca, desta;
582     int i, j;
583 
584     access_prepare(&srca, env, src, len_src, MMU_DATA_LOAD, mmu_idx, ra);
585     access_prepare(&desta, env, dest, len_dest, MMU_DATA_STORE, mmu_idx, ra);
586 
587     /* Handle rightmost byte */
588     byte_dest = cpu_ldub_data_ra(env, dest + len_dest - 1, ra);
589     byte_src = access_get_byte(env, &srca, len_src - 1, ra);
590     byte_dest = (byte_dest & 0x0f) | (byte_src << 4);
591     access_set_byte(env, &desta, len_dest - 1, byte_dest, ra);
592 
593     /* Process remaining bytes from right to left */
594     for (i = len_dest - 2, j = len_src - 2; i >= 0; i--, j--) {
595         byte_dest = byte_src >> 4;
596         if (j >= 0) {
597             byte_src = access_get_byte(env, &srca, j, ra);
598         } else {
599             byte_src = 0;
600         }
601         byte_dest |= byte_src << 4;
602         access_set_byte(env, &desta, i, byte_dest, ra);
603     }
604 }
605 
606 /* move zones  */
607 void HELPER(mvz)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
608 {
609     const int mmu_idx = cpu_mmu_index(env, false);
610     S390Access srca1, srca2, desta;
611     uintptr_t ra = GETPC();
612     int i;
613 
614     /* MVZ always copies one more byte than specified - maximum is 256 */
615     l++;
616 
617     access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
618     access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
619     access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
620     for (i = 0; i < l; i++) {
621         const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0xf0) |
622                           (access_get_byte(env, &srca2, i, ra) & 0x0f);
623 
624         access_set_byte(env, &desta, i, x, ra);
625     }
626 }
627 
628 /* compare unsigned byte arrays */
629 static uint32_t do_helper_clc(CPUS390XState *env, uint32_t l, uint64_t s1,
630                               uint64_t s2, uintptr_t ra)
631 {
632     uint32_t i;
633     uint32_t cc = 0;
634 
635     HELPER_LOG("%s l %d s1 %" PRIx64 " s2 %" PRIx64 "\n",
636                __func__, l, s1, s2);
637 
638     for (i = 0; i <= l; i++) {
639         uint8_t x = cpu_ldub_data_ra(env, s1 + i, ra);
640         uint8_t y = cpu_ldub_data_ra(env, s2 + i, ra);
641         HELPER_LOG("%02x (%c)/%02x (%c) ", x, x, y, y);
642         if (x < y) {
643             cc = 1;
644             break;
645         } else if (x > y) {
646             cc = 2;
647             break;
648         }
649     }
650 
651     HELPER_LOG("\n");
652     return cc;
653 }
654 
655 uint32_t HELPER(clc)(CPUS390XState *env, uint32_t l, uint64_t s1, uint64_t s2)
656 {
657     return do_helper_clc(env, l, s1, s2, GETPC());
658 }
659 
660 /* compare logical under mask */
661 uint32_t HELPER(clm)(CPUS390XState *env, uint32_t r1, uint32_t mask,
662                      uint64_t addr)
663 {
664     uintptr_t ra = GETPC();
665     uint32_t cc = 0;
666 
667     HELPER_LOG("%s: r1 0x%x mask 0x%x addr 0x%" PRIx64 "\n", __func__, r1,
668                mask, addr);
669 
670     while (mask) {
671         if (mask & 8) {
672             uint8_t d = cpu_ldub_data_ra(env, addr, ra);
673             uint8_t r = extract32(r1, 24, 8);
674             HELPER_LOG("mask 0x%x %02x/%02x (0x%" PRIx64 ") ", mask, r, d,
675                        addr);
676             if (r < d) {
677                 cc = 1;
678                 break;
679             } else if (r > d) {
680                 cc = 2;
681                 break;
682             }
683             addr++;
684         }
685         mask = (mask << 1) & 0xf;
686         r1 <<= 8;
687     }
688 
689     HELPER_LOG("\n");
690     return cc;
691 }
692 
693 static inline uint64_t get_address(CPUS390XState *env, int reg)
694 {
695     return wrap_address(env, env->regs[reg]);
696 }
697 
698 /*
699  * Store the address to the given register, zeroing out unused leftmost
700  * bits in bit positions 32-63 (24-bit and 31-bit mode only).
701  */
702 static inline void set_address_zero(CPUS390XState *env, int reg,
703                                     uint64_t address)
704 {
705     if (env->psw.mask & PSW_MASK_64) {
706         env->regs[reg] = address;
707     } else {
708         if (!(env->psw.mask & PSW_MASK_32)) {
709             address &= 0x00ffffff;
710         } else {
711             address &= 0x7fffffff;
712         }
713         env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
714     }
715 }
716 
717 static inline void set_address(CPUS390XState *env, int reg, uint64_t address)
718 {
719     if (env->psw.mask & PSW_MASK_64) {
720         /* 64-Bit mode */
721         env->regs[reg] = address;
722     } else {
723         if (!(env->psw.mask & PSW_MASK_32)) {
724             /* 24-Bit mode. According to the PoO it is implementation
725             dependent if bits 32-39 remain unchanged or are set to
726             zeros.  Choose the former so that the function can also be
727             used for TRT.  */
728             env->regs[reg] = deposit64(env->regs[reg], 0, 24, address);
729         } else {
730             /* 31-Bit mode. According to the PoO it is implementation
731             dependent if bit 32 remains unchanged or is set to zero.
732             Choose the latter so that the function can also be used for
733             TRT.  */
734             address &= 0x7fffffff;
735             env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
736         }
737     }
738 }
739 
740 static inline uint64_t wrap_length32(CPUS390XState *env, uint64_t length)
741 {
742     if (!(env->psw.mask & PSW_MASK_64)) {
743         return (uint32_t)length;
744     }
745     return length;
746 }
747 
748 static inline uint64_t wrap_length31(CPUS390XState *env, uint64_t length)
749 {
750     if (!(env->psw.mask & PSW_MASK_64)) {
751         /* 24-Bit and 31-Bit mode */
752         length &= 0x7fffffff;
753     }
754     return length;
755 }
756 
757 static inline uint64_t get_length(CPUS390XState *env, int reg)
758 {
759     return wrap_length31(env, env->regs[reg]);
760 }
761 
762 static inline void set_length(CPUS390XState *env, int reg, uint64_t length)
763 {
764     if (env->psw.mask & PSW_MASK_64) {
765         /* 64-Bit mode */
766         env->regs[reg] = length;
767     } else {
768         /* 24-Bit and 31-Bit mode */
769         env->regs[reg] = deposit64(env->regs[reg], 0, 32, length);
770     }
771 }
772 
773 /* search string (c is byte to search, r2 is string, r1 end of string) */
774 void HELPER(srst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
775 {
776     uintptr_t ra = GETPC();
777     uint64_t end, str;
778     uint32_t len;
779     uint8_t v, c = env->regs[0];
780 
781     /* Bits 32-55 must contain all 0.  */
782     if (env->regs[0] & 0xffffff00u) {
783         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
784     }
785 
786     str = get_address(env, r2);
787     end = get_address(env, r1);
788 
789     /* Lest we fail to service interrupts in a timely manner, limit the
790        amount of work we're willing to do.  For now, let's cap at 8k.  */
791     for (len = 0; len < 0x2000; ++len) {
792         if (str + len == end) {
793             /* Character not found.  R1 & R2 are unmodified.  */
794             env->cc_op = 2;
795             return;
796         }
797         v = cpu_ldub_data_ra(env, str + len, ra);
798         if (v == c) {
799             /* Character found.  Set R1 to the location; R2 is unmodified.  */
800             env->cc_op = 1;
801             set_address(env, r1, str + len);
802             return;
803         }
804     }
805 
806     /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
807     env->cc_op = 3;
808     set_address(env, r2, str + len);
809 }
810 
811 void HELPER(srstu)(CPUS390XState *env, uint32_t r1, uint32_t r2)
812 {
813     uintptr_t ra = GETPC();
814     uint32_t len;
815     uint16_t v, c = env->regs[0];
816     uint64_t end, str, adj_end;
817 
818     /* Bits 32-47 of R0 must be zero.  */
819     if (env->regs[0] & 0xffff0000u) {
820         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
821     }
822 
823     str = get_address(env, r2);
824     end = get_address(env, r1);
825 
826     /* If the LSB of the two addresses differ, use one extra byte.  */
827     adj_end = end + ((str ^ end) & 1);
828 
829     /* Lest we fail to service interrupts in a timely manner, limit the
830        amount of work we're willing to do.  For now, let's cap at 8k.  */
831     for (len = 0; len < 0x2000; len += 2) {
832         if (str + len == adj_end) {
833             /* End of input found.  */
834             env->cc_op = 2;
835             return;
836         }
837         v = cpu_lduw_data_ra(env, str + len, ra);
838         if (v == c) {
839             /* Character found.  Set R1 to the location; R2 is unmodified.  */
840             env->cc_op = 1;
841             set_address(env, r1, str + len);
842             return;
843         }
844     }
845 
846     /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
847     env->cc_op = 3;
848     set_address(env, r2, str + len);
849 }
850 
851 /* unsigned string compare (c is string terminator) */
852 Int128 HELPER(clst)(CPUS390XState *env, uint64_t c, uint64_t s1, uint64_t s2)
853 {
854     uintptr_t ra = GETPC();
855     uint32_t len;
856 
857     c = c & 0xff;
858     s1 = wrap_address(env, s1);
859     s2 = wrap_address(env, s2);
860 
861     /* Lest we fail to service interrupts in a timely manner, limit the
862        amount of work we're willing to do.  For now, let's cap at 8k.  */
863     for (len = 0; len < 0x2000; ++len) {
864         uint8_t v1 = cpu_ldub_data_ra(env, s1 + len, ra);
865         uint8_t v2 = cpu_ldub_data_ra(env, s2 + len, ra);
866         if (v1 == v2) {
867             if (v1 == c) {
868                 /* Equal.  CC=0, and don't advance the registers.  */
869                 env->cc_op = 0;
870                 return int128_make128(s2, s1);
871             }
872         } else {
873             /* Unequal.  CC={1,2}, and advance the registers.  Note that
874                the terminator need not be zero, but the string that contains
875                the terminator is by definition "low".  */
876             env->cc_op = (v1 == c ? 1 : v2 == c ? 2 : v1 < v2 ? 1 : 2);
877             return int128_make128(s2 + len, s1 + len);
878         }
879     }
880 
881     /* CPU-determined bytes equal; advance the registers.  */
882     env->cc_op = 3;
883     return int128_make128(s2 + len, s1 + len);
884 }
885 
886 /* move page */
887 uint32_t HELPER(mvpg)(CPUS390XState *env, uint64_t r0, uint32_t r1, uint32_t r2)
888 {
889     const uint64_t src = get_address(env, r2) & TARGET_PAGE_MASK;
890     const uint64_t dst = get_address(env, r1) & TARGET_PAGE_MASK;
891     const int mmu_idx = cpu_mmu_index(env, false);
892     const bool f = extract64(r0, 11, 1);
893     const bool s = extract64(r0, 10, 1);
894     const bool cco = extract64(r0, 8, 1);
895     uintptr_t ra = GETPC();
896     S390Access srca, desta;
897     int exc;
898 
899     if ((f && s) || extract64(r0, 12, 4)) {
900         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
901     }
902 
903     /*
904      * We always manually handle exceptions such that we can properly store
905      * r1/r2 to the lowcore on page-translation exceptions.
906      *
907      * TODO: Access key handling
908      */
909     exc = access_prepare_nf(&srca, env, true, src, TARGET_PAGE_SIZE,
910                             MMU_DATA_LOAD, mmu_idx, ra);
911     if (exc) {
912         if (cco) {
913             return 2;
914         }
915         goto inject_exc;
916     }
917     exc = access_prepare_nf(&desta, env, true, dst, TARGET_PAGE_SIZE,
918                             MMU_DATA_STORE, mmu_idx, ra);
919     if (exc) {
920         if (cco && exc != PGM_PROTECTION) {
921             return 1;
922         }
923         goto inject_exc;
924     }
925     access_memmove(env, &desta, &srca, ra);
926     return 0; /* data moved */
927 inject_exc:
928 #if !defined(CONFIG_USER_ONLY)
929     if (exc != PGM_ADDRESSING) {
930         stq_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, trans_exc_code),
931                  env->tlb_fill_tec);
932     }
933     if (exc == PGM_PAGE_TRANS) {
934         stb_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, op_access_id),
935                  r1 << 4 | r2);
936     }
937 #endif
938     tcg_s390_program_interrupt(env, exc, ra);
939 }
940 
941 /* string copy */
942 uint32_t HELPER(mvst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
943 {
944     const int mmu_idx = cpu_mmu_index(env, false);
945     const uint64_t d = get_address(env, r1);
946     const uint64_t s = get_address(env, r2);
947     const uint8_t c = env->regs[0];
948     const int len = MIN(-(d | TARGET_PAGE_MASK), -(s | TARGET_PAGE_MASK));
949     S390Access srca, desta;
950     uintptr_t ra = GETPC();
951     int i;
952 
953     if (env->regs[0] & 0xffffff00ull) {
954         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
955     }
956 
957     /*
958      * Our access should not exceed single pages, as we must not report access
959      * exceptions exceeding the actually copied range (which we don't know at
960      * this point). We might over-indicate watchpoints within the pages
961      * (if we ever care, we have to limit processing to a single byte).
962      */
963     access_prepare(&srca, env, s, len, MMU_DATA_LOAD, mmu_idx, ra);
964     access_prepare(&desta, env, d, len, MMU_DATA_STORE, mmu_idx, ra);
965     for (i = 0; i < len; i++) {
966         const uint8_t v = access_get_byte(env, &srca, i, ra);
967 
968         access_set_byte(env, &desta, i, v, ra);
969         if (v == c) {
970             set_address_zero(env, r1, d + i);
971             return 1;
972         }
973     }
974     set_address_zero(env, r1, d + len);
975     set_address_zero(env, r2, s + len);
976     return 3;
977 }
978 
979 /* load access registers r1 to r3 from memory at a2 */
980 void HELPER(lam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
981 {
982     uintptr_t ra = GETPC();
983     int i;
984 
985     if (a2 & 0x3) {
986         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
987     }
988 
989     for (i = r1;; i = (i + 1) % 16) {
990         env->aregs[i] = cpu_ldl_data_ra(env, a2, ra);
991         a2 += 4;
992 
993         if (i == r3) {
994             break;
995         }
996     }
997 }
998 
999 /* store access registers r1 to r3 in memory at a2 */
1000 void HELPER(stam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1001 {
1002     uintptr_t ra = GETPC();
1003     int i;
1004 
1005     if (a2 & 0x3) {
1006         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1007     }
1008 
1009     for (i = r1;; i = (i + 1) % 16) {
1010         cpu_stl_data_ra(env, a2, env->aregs[i], ra);
1011         a2 += 4;
1012 
1013         if (i == r3) {
1014             break;
1015         }
1016     }
1017 }
1018 
1019 /* move long helper */
1020 static inline uint32_t do_mvcl(CPUS390XState *env,
1021                                uint64_t *dest, uint64_t *destlen,
1022                                uint64_t *src, uint64_t *srclen,
1023                                uint16_t pad, int wordsize, uintptr_t ra)
1024 {
1025     const int mmu_idx = cpu_mmu_index(env, false);
1026     int len = MIN(*destlen, -(*dest | TARGET_PAGE_MASK));
1027     S390Access srca, desta;
1028     int i, cc;
1029 
1030     if (*destlen == *srclen) {
1031         cc = 0;
1032     } else if (*destlen < *srclen) {
1033         cc = 1;
1034     } else {
1035         cc = 2;
1036     }
1037 
1038     if (!*destlen) {
1039         return cc;
1040     }
1041 
1042     /*
1043      * Only perform one type of type of operation (move/pad) at a time.
1044      * Stay within single pages.
1045      */
1046     if (*srclen) {
1047         /* Copy the src array */
1048         len = MIN(MIN(*srclen, -(*src | TARGET_PAGE_MASK)), len);
1049         *destlen -= len;
1050         *srclen -= len;
1051         access_prepare(&srca, env, *src, len, MMU_DATA_LOAD, mmu_idx, ra);
1052         access_prepare(&desta, env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1053         access_memmove(env, &desta, &srca, ra);
1054         *src = wrap_address(env, *src + len);
1055         *dest = wrap_address(env, *dest + len);
1056     } else if (wordsize == 1) {
1057         /* Pad the remaining area */
1058         *destlen -= len;
1059         access_prepare(&desta, env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1060         access_memset(env, &desta, pad, ra);
1061         *dest = wrap_address(env, *dest + len);
1062     } else {
1063         access_prepare(&desta, env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1064 
1065         /* The remaining length selects the padding byte. */
1066         for (i = 0; i < len; (*destlen)--, i++) {
1067             if (*destlen & 1) {
1068                 access_set_byte(env, &desta, i, pad, ra);
1069             } else {
1070                 access_set_byte(env, &desta, i, pad >> 8, ra);
1071             }
1072         }
1073         *dest = wrap_address(env, *dest + len);
1074     }
1075 
1076     return *destlen ? 3 : cc;
1077 }
1078 
1079 /* move long */
1080 uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1081 {
1082     const int mmu_idx = cpu_mmu_index(env, false);
1083     uintptr_t ra = GETPC();
1084     uint64_t destlen = env->regs[r1 + 1] & 0xffffff;
1085     uint64_t dest = get_address(env, r1);
1086     uint64_t srclen = env->regs[r2 + 1] & 0xffffff;
1087     uint64_t src = get_address(env, r2);
1088     uint8_t pad = env->regs[r2 + 1] >> 24;
1089     CPUState *cs = env_cpu(env);
1090     S390Access srca, desta;
1091     uint32_t cc, cur_len;
1092 
1093     if (is_destructive_overlap(env, dest, src, MIN(srclen, destlen))) {
1094         cc = 3;
1095     } else if (srclen == destlen) {
1096         cc = 0;
1097     } else if (destlen < srclen) {
1098         cc = 1;
1099     } else {
1100         cc = 2;
1101     }
1102 
1103     /* We might have to zero-out some bits even if there was no action. */
1104     if (unlikely(!destlen || cc == 3)) {
1105         set_address_zero(env, r2, src);
1106         set_address_zero(env, r1, dest);
1107         return cc;
1108     } else if (!srclen) {
1109         set_address_zero(env, r2, src);
1110     }
1111 
1112     /*
1113      * Only perform one type of type of operation (move/pad) in one step.
1114      * Stay within single pages.
1115      */
1116     while (destlen) {
1117         cur_len = MIN(destlen, -(dest | TARGET_PAGE_MASK));
1118         if (!srclen) {
1119             access_prepare(&desta, env, dest, cur_len,
1120                            MMU_DATA_STORE, mmu_idx, ra);
1121             access_memset(env, &desta, pad, ra);
1122         } else {
1123             cur_len = MIN(MIN(srclen, -(src | TARGET_PAGE_MASK)), cur_len);
1124 
1125             access_prepare(&srca, env, src, cur_len,
1126                            MMU_DATA_LOAD, mmu_idx, ra);
1127             access_prepare(&desta, env, dest, cur_len,
1128                            MMU_DATA_STORE, mmu_idx, ra);
1129             access_memmove(env, &desta, &srca, ra);
1130             src = wrap_address(env, src + cur_len);
1131             srclen -= cur_len;
1132             env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, srclen);
1133             set_address_zero(env, r2, src);
1134         }
1135         dest = wrap_address(env, dest + cur_len);
1136         destlen -= cur_len;
1137         env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, destlen);
1138         set_address_zero(env, r1, dest);
1139 
1140         /*
1141          * MVCL is interruptible. Return to the main loop if requested after
1142          * writing back all state to registers. If no interrupt will get
1143          * injected, we'll end up back in this handler and continue processing
1144          * the remaining parts.
1145          */
1146         if (destlen && unlikely(cpu_loop_exit_requested(cs))) {
1147             cpu_loop_exit_restore(cs, ra);
1148         }
1149     }
1150     return cc;
1151 }
1152 
1153 /* move long extended */
1154 uint32_t HELPER(mvcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1155                        uint32_t r3)
1156 {
1157     uintptr_t ra = GETPC();
1158     uint64_t destlen = get_length(env, r1 + 1);
1159     uint64_t dest = get_address(env, r1);
1160     uint64_t srclen = get_length(env, r3 + 1);
1161     uint64_t src = get_address(env, r3);
1162     uint8_t pad = a2;
1163     uint32_t cc;
1164 
1165     cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 1, ra);
1166 
1167     set_length(env, r1 + 1, destlen);
1168     set_length(env, r3 + 1, srclen);
1169     set_address(env, r1, dest);
1170     set_address(env, r3, src);
1171 
1172     return cc;
1173 }
1174 
1175 /* move long unicode */
1176 uint32_t HELPER(mvclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1177                        uint32_t r3)
1178 {
1179     uintptr_t ra = GETPC();
1180     uint64_t destlen = get_length(env, r1 + 1);
1181     uint64_t dest = get_address(env, r1);
1182     uint64_t srclen = get_length(env, r3 + 1);
1183     uint64_t src = get_address(env, r3);
1184     uint16_t pad = a2;
1185     uint32_t cc;
1186 
1187     cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 2, ra);
1188 
1189     set_length(env, r1 + 1, destlen);
1190     set_length(env, r3 + 1, srclen);
1191     set_address(env, r1, dest);
1192     set_address(env, r3, src);
1193 
1194     return cc;
1195 }
1196 
1197 /* compare logical long helper */
1198 static inline uint32_t do_clcl(CPUS390XState *env,
1199                                uint64_t *src1, uint64_t *src1len,
1200                                uint64_t *src3, uint64_t *src3len,
1201                                uint16_t pad, uint64_t limit,
1202                                int wordsize, uintptr_t ra)
1203 {
1204     uint64_t len = MAX(*src1len, *src3len);
1205     uint32_t cc = 0;
1206 
1207     check_alignment(env, *src1len | *src3len, wordsize, ra);
1208 
1209     if (!len) {
1210         return cc;
1211     }
1212 
1213     /* Lest we fail to service interrupts in a timely manner, limit the
1214        amount of work we're willing to do.  */
1215     if (len > limit) {
1216         len = limit;
1217         cc = 3;
1218     }
1219 
1220     for (; len; len -= wordsize) {
1221         uint16_t v1 = pad;
1222         uint16_t v3 = pad;
1223 
1224         if (*src1len) {
1225             v1 = cpu_ldusize_data_ra(env, *src1, wordsize, ra);
1226         }
1227         if (*src3len) {
1228             v3 = cpu_ldusize_data_ra(env, *src3, wordsize, ra);
1229         }
1230 
1231         if (v1 != v3) {
1232             cc = (v1 < v3) ? 1 : 2;
1233             break;
1234         }
1235 
1236         if (*src1len) {
1237             *src1 += wordsize;
1238             *src1len -= wordsize;
1239         }
1240         if (*src3len) {
1241             *src3 += wordsize;
1242             *src3len -= wordsize;
1243         }
1244     }
1245 
1246     return cc;
1247 }
1248 
1249 
1250 /* compare logical long */
1251 uint32_t HELPER(clcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1252 {
1253     uintptr_t ra = GETPC();
1254     uint64_t src1len = extract64(env->regs[r1 + 1], 0, 24);
1255     uint64_t src1 = get_address(env, r1);
1256     uint64_t src3len = extract64(env->regs[r2 + 1], 0, 24);
1257     uint64_t src3 = get_address(env, r2);
1258     uint8_t pad = env->regs[r2 + 1] >> 24;
1259     uint32_t cc;
1260 
1261     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, -1, 1, ra);
1262 
1263     env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, src1len);
1264     env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, src3len);
1265     set_address(env, r1, src1);
1266     set_address(env, r2, src3);
1267 
1268     return cc;
1269 }
1270 
1271 /* compare logical long extended memcompare insn with padding */
1272 uint32_t HELPER(clcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1273                        uint32_t r3)
1274 {
1275     uintptr_t ra = GETPC();
1276     uint64_t src1len = get_length(env, r1 + 1);
1277     uint64_t src1 = get_address(env, r1);
1278     uint64_t src3len = get_length(env, r3 + 1);
1279     uint64_t src3 = get_address(env, r3);
1280     uint8_t pad = a2;
1281     uint32_t cc;
1282 
1283     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x2000, 1, ra);
1284 
1285     set_length(env, r1 + 1, src1len);
1286     set_length(env, r3 + 1, src3len);
1287     set_address(env, r1, src1);
1288     set_address(env, r3, src3);
1289 
1290     return cc;
1291 }
1292 
1293 /* compare logical long unicode memcompare insn with padding */
1294 uint32_t HELPER(clclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1295                        uint32_t r3)
1296 {
1297     uintptr_t ra = GETPC();
1298     uint64_t src1len = get_length(env, r1 + 1);
1299     uint64_t src1 = get_address(env, r1);
1300     uint64_t src3len = get_length(env, r3 + 1);
1301     uint64_t src3 = get_address(env, r3);
1302     uint16_t pad = a2;
1303     uint32_t cc = 0;
1304 
1305     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x1000, 2, ra);
1306 
1307     set_length(env, r1 + 1, src1len);
1308     set_length(env, r3 + 1, src3len);
1309     set_address(env, r1, src1);
1310     set_address(env, r3, src3);
1311 
1312     return cc;
1313 }
1314 
1315 /* checksum */
1316 Int128 HELPER(cksm)(CPUS390XState *env, uint64_t r1,
1317                     uint64_t src, uint64_t src_len)
1318 {
1319     uintptr_t ra = GETPC();
1320     uint64_t max_len, len;
1321     uint64_t cksm = (uint32_t)r1;
1322 
1323     /* Lest we fail to service interrupts in a timely manner, limit the
1324        amount of work we're willing to do.  For now, let's cap at 8k.  */
1325     max_len = (src_len > 0x2000 ? 0x2000 : src_len);
1326 
1327     /* Process full words as available.  */
1328     for (len = 0; len + 4 <= max_len; len += 4, src += 4) {
1329         cksm += (uint32_t)cpu_ldl_data_ra(env, src, ra);
1330     }
1331 
1332     switch (max_len - len) {
1333     case 1:
1334         cksm += cpu_ldub_data_ra(env, src, ra) << 24;
1335         len += 1;
1336         break;
1337     case 2:
1338         cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1339         len += 2;
1340         break;
1341     case 3:
1342         cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1343         cksm += cpu_ldub_data_ra(env, src + 2, ra) << 8;
1344         len += 3;
1345         break;
1346     }
1347 
1348     /* Fold the carry from the checksum.  Note that we can see carry-out
1349        during folding more than once (but probably not more than twice).  */
1350     while (cksm > 0xffffffffull) {
1351         cksm = (uint32_t)cksm + (cksm >> 32);
1352     }
1353 
1354     /* Indicate whether or not we've processed everything.  */
1355     env->cc_op = (len == src_len ? 0 : 3);
1356 
1357     /* Return both cksm and processed length.  */
1358     return int128_make128(cksm, len);
1359 }
1360 
1361 void HELPER(pack)(CPUS390XState *env, uint32_t len, uint64_t dest, uint64_t src)
1362 {
1363     uintptr_t ra = GETPC();
1364     int len_dest = len >> 4;
1365     int len_src = len & 0xf;
1366     uint8_t b;
1367 
1368     dest += len_dest;
1369     src += len_src;
1370 
1371     /* last byte is special, it only flips the nibbles */
1372     b = cpu_ldub_data_ra(env, src, ra);
1373     cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1374     src--;
1375     len_src--;
1376 
1377     /* now pack every value */
1378     while (len_dest > 0) {
1379         b = 0;
1380 
1381         if (len_src >= 0) {
1382             b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1383             src--;
1384             len_src--;
1385         }
1386         if (len_src >= 0) {
1387             b |= cpu_ldub_data_ra(env, src, ra) << 4;
1388             src--;
1389             len_src--;
1390         }
1391 
1392         len_dest--;
1393         dest--;
1394         cpu_stb_data_ra(env, dest, b, ra);
1395     }
1396 }
1397 
1398 static inline void do_pkau(CPUS390XState *env, uint64_t dest, uint64_t src,
1399                            uint32_t srclen, int ssize, uintptr_t ra)
1400 {
1401     int i;
1402     /* The destination operand is always 16 bytes long.  */
1403     const int destlen = 16;
1404 
1405     /* The operands are processed from right to left.  */
1406     src += srclen - 1;
1407     dest += destlen - 1;
1408 
1409     for (i = 0; i < destlen; i++) {
1410         uint8_t b = 0;
1411 
1412         /* Start with a positive sign */
1413         if (i == 0) {
1414             b = 0xc;
1415         } else if (srclen > ssize) {
1416             b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1417             src -= ssize;
1418             srclen -= ssize;
1419         }
1420 
1421         if (srclen > ssize) {
1422             b |= cpu_ldub_data_ra(env, src, ra) << 4;
1423             src -= ssize;
1424             srclen -= ssize;
1425         }
1426 
1427         cpu_stb_data_ra(env, dest, b, ra);
1428         dest--;
1429     }
1430 }
1431 
1432 
1433 void HELPER(pka)(CPUS390XState *env, uint64_t dest, uint64_t src,
1434                  uint32_t srclen)
1435 {
1436     do_pkau(env, dest, src, srclen, 1, GETPC());
1437 }
1438 
1439 void HELPER(pku)(CPUS390XState *env, uint64_t dest, uint64_t src,
1440                  uint32_t srclen)
1441 {
1442     do_pkau(env, dest, src, srclen, 2, GETPC());
1443 }
1444 
1445 void HELPER(unpk)(CPUS390XState *env, uint32_t len, uint64_t dest,
1446                   uint64_t src)
1447 {
1448     uintptr_t ra = GETPC();
1449     int len_dest = len >> 4;
1450     int len_src = len & 0xf;
1451     uint8_t b;
1452     int second_nibble = 0;
1453 
1454     dest += len_dest;
1455     src += len_src;
1456 
1457     /* last byte is special, it only flips the nibbles */
1458     b = cpu_ldub_data_ra(env, src, ra);
1459     cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1460     src--;
1461     len_src--;
1462 
1463     /* now pad every nibble with 0xf0 */
1464 
1465     while (len_dest > 0) {
1466         uint8_t cur_byte = 0;
1467 
1468         if (len_src > 0) {
1469             cur_byte = cpu_ldub_data_ra(env, src, ra);
1470         }
1471 
1472         len_dest--;
1473         dest--;
1474 
1475         /* only advance one nibble at a time */
1476         if (second_nibble) {
1477             cur_byte >>= 4;
1478             len_src--;
1479             src--;
1480         }
1481         second_nibble = !second_nibble;
1482 
1483         /* digit */
1484         cur_byte = (cur_byte & 0xf);
1485         /* zone bits */
1486         cur_byte |= 0xf0;
1487 
1488         cpu_stb_data_ra(env, dest, cur_byte, ra);
1489     }
1490 }
1491 
1492 static inline uint32_t do_unpkau(CPUS390XState *env, uint64_t dest,
1493                                  uint32_t destlen, int dsize, uint64_t src,
1494                                  uintptr_t ra)
1495 {
1496     int i;
1497     uint32_t cc;
1498     uint8_t b;
1499     /* The source operand is always 16 bytes long.  */
1500     const int srclen = 16;
1501 
1502     /* The operands are processed from right to left.  */
1503     src += srclen - 1;
1504     dest += destlen - dsize;
1505 
1506     /* Check for the sign.  */
1507     b = cpu_ldub_data_ra(env, src, ra);
1508     src--;
1509     switch (b & 0xf) {
1510     case 0xa:
1511     case 0xc:
1512     case 0xe ... 0xf:
1513         cc = 0;  /* plus */
1514         break;
1515     case 0xb:
1516     case 0xd:
1517         cc = 1;  /* minus */
1518         break;
1519     default:
1520     case 0x0 ... 0x9:
1521         cc = 3;  /* invalid */
1522         break;
1523     }
1524 
1525     /* Now pad every nibble with 0x30, advancing one nibble at a time. */
1526     for (i = 0; i < destlen; i += dsize) {
1527         if (i == (31 * dsize)) {
1528             /* If length is 32/64 bytes, the leftmost byte is 0. */
1529             b = 0;
1530         } else if (i % (2 * dsize)) {
1531             b = cpu_ldub_data_ra(env, src, ra);
1532             src--;
1533         } else {
1534             b >>= 4;
1535         }
1536         cpu_stsize_data_ra(env, dest, 0x30 + (b & 0xf), dsize, ra);
1537         dest -= dsize;
1538     }
1539 
1540     return cc;
1541 }
1542 
1543 uint32_t HELPER(unpka)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1544                        uint64_t src)
1545 {
1546     return do_unpkau(env, dest, destlen, 1, src, GETPC());
1547 }
1548 
1549 uint32_t HELPER(unpku)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1550                        uint64_t src)
1551 {
1552     return do_unpkau(env, dest, destlen, 2, src, GETPC());
1553 }
1554 
1555 uint32_t HELPER(tp)(CPUS390XState *env, uint64_t dest, uint32_t destlen)
1556 {
1557     uintptr_t ra = GETPC();
1558     uint32_t cc = 0;
1559     int i;
1560 
1561     for (i = 0; i < destlen; i++) {
1562         uint8_t b = cpu_ldub_data_ra(env, dest + i, ra);
1563         /* digit */
1564         cc |= (b & 0xf0) > 0x90 ? 2 : 0;
1565 
1566         if (i == (destlen - 1)) {
1567             /* sign */
1568             cc |= (b & 0xf) < 0xa ? 1 : 0;
1569         } else {
1570             /* digit */
1571             cc |= (b & 0xf) > 0x9 ? 2 : 0;
1572         }
1573     }
1574 
1575     return cc;
1576 }
1577 
1578 static uint32_t do_helper_tr(CPUS390XState *env, uint32_t len, uint64_t array,
1579                              uint64_t trans, uintptr_t ra)
1580 {
1581     uint32_t i;
1582 
1583     for (i = 0; i <= len; i++) {
1584         uint8_t byte = cpu_ldub_data_ra(env, array + i, ra);
1585         uint8_t new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1586         cpu_stb_data_ra(env, array + i, new_byte, ra);
1587     }
1588 
1589     return env->cc_op;
1590 }
1591 
1592 void HELPER(tr)(CPUS390XState *env, uint32_t len, uint64_t array,
1593                 uint64_t trans)
1594 {
1595     do_helper_tr(env, len, array, trans, GETPC());
1596 }
1597 
1598 Int128 HELPER(tre)(CPUS390XState *env, uint64_t array,
1599                    uint64_t len, uint64_t trans)
1600 {
1601     uintptr_t ra = GETPC();
1602     uint8_t end = env->regs[0] & 0xff;
1603     uint64_t l = len;
1604     uint64_t i;
1605     uint32_t cc = 0;
1606 
1607     if (!(env->psw.mask & PSW_MASK_64)) {
1608         array &= 0x7fffffff;
1609         l = (uint32_t)l;
1610     }
1611 
1612     /* Lest we fail to service interrupts in a timely manner, limit the
1613        amount of work we're willing to do.  For now, let's cap at 8k.  */
1614     if (l > 0x2000) {
1615         l = 0x2000;
1616         cc = 3;
1617     }
1618 
1619     for (i = 0; i < l; i++) {
1620         uint8_t byte, new_byte;
1621 
1622         byte = cpu_ldub_data_ra(env, array + i, ra);
1623 
1624         if (byte == end) {
1625             cc = 1;
1626             break;
1627         }
1628 
1629         new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1630         cpu_stb_data_ra(env, array + i, new_byte, ra);
1631     }
1632 
1633     env->cc_op = cc;
1634     return int128_make128(len - i, array + i);
1635 }
1636 
1637 static inline uint32_t do_helper_trt(CPUS390XState *env, int len,
1638                                      uint64_t array, uint64_t trans,
1639                                      int inc, uintptr_t ra)
1640 {
1641     int i;
1642 
1643     for (i = 0; i <= len; i++) {
1644         uint8_t byte = cpu_ldub_data_ra(env, array + i * inc, ra);
1645         uint8_t sbyte = cpu_ldub_data_ra(env, trans + byte, ra);
1646 
1647         if (sbyte != 0) {
1648             set_address(env, 1, array + i * inc);
1649             env->regs[2] = deposit64(env->regs[2], 0, 8, sbyte);
1650             return (i == len) ? 2 : 1;
1651         }
1652     }
1653 
1654     return 0;
1655 }
1656 
1657 static uint32_t do_helper_trt_fwd(CPUS390XState *env, uint32_t len,
1658                                   uint64_t array, uint64_t trans,
1659                                   uintptr_t ra)
1660 {
1661     return do_helper_trt(env, len, array, trans, 1, ra);
1662 }
1663 
1664 uint32_t HELPER(trt)(CPUS390XState *env, uint32_t len, uint64_t array,
1665                      uint64_t trans)
1666 {
1667     return do_helper_trt(env, len, array, trans, 1, GETPC());
1668 }
1669 
1670 static uint32_t do_helper_trt_bkwd(CPUS390XState *env, uint32_t len,
1671                                    uint64_t array, uint64_t trans,
1672                                    uintptr_t ra)
1673 {
1674     return do_helper_trt(env, len, array, trans, -1, ra);
1675 }
1676 
1677 uint32_t HELPER(trtr)(CPUS390XState *env, uint32_t len, uint64_t array,
1678                       uint64_t trans)
1679 {
1680     return do_helper_trt(env, len, array, trans, -1, GETPC());
1681 }
1682 
1683 /* Translate one/two to one/two */
1684 uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2,
1685                       uint32_t tst, uint32_t sizes)
1686 {
1687     uintptr_t ra = GETPC();
1688     int dsize = (sizes & 1) ? 1 : 2;
1689     int ssize = (sizes & 2) ? 1 : 2;
1690     uint64_t tbl = get_address(env, 1);
1691     uint64_t dst = get_address(env, r1);
1692     uint64_t len = get_length(env, r1 + 1);
1693     uint64_t src = get_address(env, r2);
1694     uint32_t cc = 3;
1695     int i;
1696 
1697     /* The lower address bits of TBL are ignored.  For TROO, TROT, it's
1698        the low 3 bits (double-word aligned).  For TRTO, TRTT, it's either
1699        the low 12 bits (4K, without ETF2-ENH) or 3 bits (with ETF2-ENH).  */
1700     if (ssize == 2 && !s390_has_feat(S390_FEAT_ETF2_ENH)) {
1701         tbl &= -4096;
1702     } else {
1703         tbl &= -8;
1704     }
1705 
1706     check_alignment(env, len, ssize, ra);
1707 
1708     /* Lest we fail to service interrupts in a timely manner, */
1709     /* limit the amount of work we're willing to do.   */
1710     for (i = 0; i < 0x2000; i++) {
1711         uint16_t sval = cpu_ldusize_data_ra(env, src, ssize, ra);
1712         uint64_t tble = tbl + (sval * dsize);
1713         uint16_t dval = cpu_ldusize_data_ra(env, tble, dsize, ra);
1714         if (dval == tst) {
1715             cc = 1;
1716             break;
1717         }
1718         cpu_stsize_data_ra(env, dst, dval, dsize, ra);
1719 
1720         len -= ssize;
1721         src += ssize;
1722         dst += dsize;
1723 
1724         if (len == 0) {
1725             cc = 0;
1726             break;
1727         }
1728     }
1729 
1730     set_address(env, r1, dst);
1731     set_length(env, r1 + 1, len);
1732     set_address(env, r2, src);
1733 
1734     return cc;
1735 }
1736 
1737 static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
1738                         uint64_t a2, bool parallel)
1739 {
1740     uint32_t mem_idx = cpu_mmu_index(env, false);
1741     MemOpIdx oi16 = make_memop_idx(MO_TE | MO_128, mem_idx);
1742     MemOpIdx oi8 = make_memop_idx(MO_TE | MO_64, mem_idx);
1743     MemOpIdx oi4 = make_memop_idx(MO_TE | MO_32, mem_idx);
1744     MemOpIdx oi2 = make_memop_idx(MO_TE | MO_16, mem_idx);
1745     MemOpIdx oi1 = make_memop_idx(MO_8, mem_idx);
1746     uintptr_t ra = GETPC();
1747     uint32_t fc = extract32(env->regs[0], 0, 8);
1748     uint32_t sc = extract32(env->regs[0], 8, 8);
1749     uint64_t pl = get_address(env, 1) & -16;
1750     uint64_t svh, svl;
1751     uint32_t cc;
1752 
1753     /* Sanity check the function code and storage characteristic.  */
1754     if (fc > 1 || sc > 3) {
1755         if (!s390_has_feat(S390_FEAT_COMPARE_AND_SWAP_AND_STORE_2)) {
1756             goto spec_exception;
1757         }
1758         if (fc > 2 || sc > 4 || (fc == 2 && (r3 & 1))) {
1759             goto spec_exception;
1760         }
1761     }
1762 
1763     /* Sanity check the alignments.  */
1764     if (extract32(a1, 0, fc + 2) || extract32(a2, 0, sc)) {
1765         goto spec_exception;
1766     }
1767 
1768     /* Sanity check writability of the store address.  */
1769     probe_write(env, a2, 1 << sc, mem_idx, ra);
1770 
1771     /*
1772      * Note that the compare-and-swap is atomic, and the store is atomic,
1773      * but the complete operation is not.  Therefore we do not need to
1774      * assert serial context in order to implement this.  That said,
1775      * restart early if we can't support either operation that is supposed
1776      * to be atomic.
1777      */
1778     if (parallel) {
1779         uint32_t max = 2;
1780 #ifdef CONFIG_ATOMIC64
1781         max = 3;
1782 #endif
1783         if ((HAVE_CMPXCHG128 ? 0 : fc + 2 > max) ||
1784             (HAVE_ATOMIC128_RW ? 0 : sc > max)) {
1785             cpu_loop_exit_atomic(env_cpu(env), ra);
1786         }
1787     }
1788 
1789     /*
1790      * All loads happen before all stores.  For simplicity, load the entire
1791      * store value area from the parameter list.
1792      */
1793     svh = cpu_ldq_mmu(env, pl + 16, oi8, ra);
1794     svl = cpu_ldq_mmu(env, pl + 24, oi8, ra);
1795 
1796     switch (fc) {
1797     case 0:
1798         {
1799             uint32_t nv = cpu_ldl_mmu(env, pl, oi4, ra);
1800             uint32_t cv = env->regs[r3];
1801             uint32_t ov;
1802 
1803             if (parallel) {
1804                 ov = cpu_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi4, ra);
1805             } else {
1806                 ov = cpu_ldl_mmu(env, a1, oi4, ra);
1807                 cpu_stl_mmu(env, a1, (ov == cv ? nv : ov), oi4, ra);
1808             }
1809             cc = (ov != cv);
1810             env->regs[r3] = deposit64(env->regs[r3], 32, 32, ov);
1811         }
1812         break;
1813 
1814     case 1:
1815         {
1816             uint64_t nv = cpu_ldq_mmu(env, pl, oi8, ra);
1817             uint64_t cv = env->regs[r3];
1818             uint64_t ov;
1819 
1820             if (parallel) {
1821 #ifdef CONFIG_ATOMIC64
1822                 ov = cpu_atomic_cmpxchgq_be_mmu(env, a1, cv, nv, oi8, ra);
1823 #else
1824                 /* Note that we asserted !parallel above.  */
1825                 g_assert_not_reached();
1826 #endif
1827             } else {
1828                 ov = cpu_ldq_mmu(env, a1, oi8, ra);
1829                 cpu_stq_mmu(env, a1, (ov == cv ? nv : ov), oi8, ra);
1830             }
1831             cc = (ov != cv);
1832             env->regs[r3] = ov;
1833         }
1834         break;
1835 
1836     case 2:
1837         {
1838             Int128 nv = cpu_ld16_mmu(env, pl, oi16, ra);
1839             Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1840             Int128 ov;
1841 
1842             if (!parallel) {
1843                 ov = cpu_ld16_mmu(env, a1, oi16, ra);
1844                 cc = !int128_eq(ov, cv);
1845                 if (cc) {
1846                     nv = ov;
1847                 }
1848                 cpu_st16_mmu(env, a1, nv, oi16, ra);
1849             } else if (HAVE_CMPXCHG128) {
1850                 ov = cpu_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi16, ra);
1851                 cc = !int128_eq(ov, cv);
1852             } else {
1853                 /* Note that we asserted !parallel above.  */
1854                 g_assert_not_reached();
1855             }
1856 
1857             env->regs[r3 + 0] = int128_gethi(ov);
1858             env->regs[r3 + 1] = int128_getlo(ov);
1859         }
1860         break;
1861 
1862     default:
1863         g_assert_not_reached();
1864     }
1865 
1866     /* Store only if the comparison succeeded.  Note that above we use a pair
1867        of 64-bit big-endian loads, so for sc < 3 we must extract the value
1868        from the most-significant bits of svh.  */
1869     if (cc == 0) {
1870         switch (sc) {
1871         case 0:
1872             cpu_stb_mmu(env, a2, svh >> 56, oi1, ra);
1873             break;
1874         case 1:
1875             cpu_stw_mmu(env, a2, svh >> 48, oi2, ra);
1876             break;
1877         case 2:
1878             cpu_stl_mmu(env, a2, svh >> 32, oi4, ra);
1879             break;
1880         case 3:
1881             cpu_stq_mmu(env, a2, svh, oi8, ra);
1882             break;
1883         case 4:
1884             cpu_st16_mmu(env, a2, int128_make128(svl, svh), oi16, ra);
1885             break;
1886         default:
1887             g_assert_not_reached();
1888         }
1889     }
1890 
1891     return cc;
1892 
1893  spec_exception:
1894     tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1895 }
1896 
1897 uint32_t HELPER(csst)(CPUS390XState *env, uint32_t r3, uint64_t a1, uint64_t a2)
1898 {
1899     return do_csst(env, r3, a1, a2, false);
1900 }
1901 
1902 uint32_t HELPER(csst_parallel)(CPUS390XState *env, uint32_t r3, uint64_t a1,
1903                                uint64_t a2)
1904 {
1905     return do_csst(env, r3, a1, a2, true);
1906 }
1907 
1908 #if !defined(CONFIG_USER_ONLY)
1909 void HELPER(lctlg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1910 {
1911     uintptr_t ra = GETPC();
1912     bool PERchanged = false;
1913     uint64_t src = a2;
1914     uint32_t i;
1915 
1916     if (src & 0x7) {
1917         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1918     }
1919 
1920     for (i = r1;; i = (i + 1) % 16) {
1921         uint64_t val = cpu_ldq_data_ra(env, src, ra);
1922         if (env->cregs[i] != val && i >= 9 && i <= 11) {
1923             PERchanged = true;
1924         }
1925         env->cregs[i] = val;
1926         HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%" PRIx64 "\n",
1927                    i, src, val);
1928         src += sizeof(uint64_t);
1929 
1930         if (i == r3) {
1931             break;
1932         }
1933     }
1934 
1935     if (PERchanged && env->psw.mask & PSW_MASK_PER) {
1936         s390_cpu_recompute_watchpoints(env_cpu(env));
1937     }
1938 
1939     tlb_flush(env_cpu(env));
1940 }
1941 
1942 void HELPER(lctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1943 {
1944     uintptr_t ra = GETPC();
1945     bool PERchanged = false;
1946     uint64_t src = a2;
1947     uint32_t i;
1948 
1949     if (src & 0x3) {
1950         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1951     }
1952 
1953     for (i = r1;; i = (i + 1) % 16) {
1954         uint32_t val = cpu_ldl_data_ra(env, src, ra);
1955         if ((uint32_t)env->cregs[i] != val && i >= 9 && i <= 11) {
1956             PERchanged = true;
1957         }
1958         env->cregs[i] = deposit64(env->cregs[i], 0, 32, val);
1959         HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%x\n", i, src, val);
1960         src += sizeof(uint32_t);
1961 
1962         if (i == r3) {
1963             break;
1964         }
1965     }
1966 
1967     if (PERchanged && env->psw.mask & PSW_MASK_PER) {
1968         s390_cpu_recompute_watchpoints(env_cpu(env));
1969     }
1970 
1971     tlb_flush(env_cpu(env));
1972 }
1973 
1974 void HELPER(stctg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1975 {
1976     uintptr_t ra = GETPC();
1977     uint64_t dest = a2;
1978     uint32_t i;
1979 
1980     if (dest & 0x7) {
1981         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1982     }
1983 
1984     for (i = r1;; i = (i + 1) % 16) {
1985         cpu_stq_data_ra(env, dest, env->cregs[i], ra);
1986         dest += sizeof(uint64_t);
1987 
1988         if (i == r3) {
1989             break;
1990         }
1991     }
1992 }
1993 
1994 void HELPER(stctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1995 {
1996     uintptr_t ra = GETPC();
1997     uint64_t dest = a2;
1998     uint32_t i;
1999 
2000     if (dest & 0x3) {
2001         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2002     }
2003 
2004     for (i = r1;; i = (i + 1) % 16) {
2005         cpu_stl_data_ra(env, dest, env->cregs[i], ra);
2006         dest += sizeof(uint32_t);
2007 
2008         if (i == r3) {
2009             break;
2010         }
2011     }
2012 }
2013 
2014 uint32_t HELPER(testblock)(CPUS390XState *env, uint64_t real_addr)
2015 {
2016     uintptr_t ra = GETPC();
2017     int i;
2018 
2019     real_addr = wrap_address(env, real_addr) & TARGET_PAGE_MASK;
2020 
2021     for (i = 0; i < TARGET_PAGE_SIZE; i += 8) {
2022         cpu_stq_mmuidx_ra(env, real_addr + i, 0, MMU_REAL_IDX, ra);
2023     }
2024 
2025     return 0;
2026 }
2027 
2028 uint32_t HELPER(tprot)(CPUS390XState *env, uint64_t a1, uint64_t a2)
2029 {
2030     S390CPU *cpu = env_archcpu(env);
2031     CPUState *cs = env_cpu(env);
2032 
2033     /*
2034      * TODO: we currently don't handle all access protection types
2035      * (including access-list and key-controlled) as well as AR mode.
2036      */
2037     if (!s390_cpu_virt_mem_check_write(cpu, a1, 0, 1)) {
2038         /* Fetching permitted; storing permitted */
2039         return 0;
2040     }
2041 
2042     if (env->int_pgm_code == PGM_PROTECTION) {
2043         /* retry if reading is possible */
2044         cs->exception_index = -1;
2045         if (!s390_cpu_virt_mem_check_read(cpu, a1, 0, 1)) {
2046             /* Fetching permitted; storing not permitted */
2047             return 1;
2048         }
2049     }
2050 
2051     switch (env->int_pgm_code) {
2052     case PGM_PROTECTION:
2053         /* Fetching not permitted; storing not permitted */
2054         cs->exception_index = -1;
2055         return 2;
2056     case PGM_ADDRESSING:
2057     case PGM_TRANS_SPEC:
2058         /* exceptions forwarded to the guest */
2059         s390_cpu_virt_mem_handle_exc(cpu, GETPC());
2060         return 0;
2061     }
2062 
2063     /* Translation not available */
2064     cs->exception_index = -1;
2065     return 3;
2066 }
2067 
2068 /* insert storage key extended */
2069 uint64_t HELPER(iske)(CPUS390XState *env, uint64_t r2)
2070 {
2071     static S390SKeysState *ss;
2072     static S390SKeysClass *skeyclass;
2073     uint64_t addr = wrap_address(env, r2);
2074     uint8_t key;
2075     int rc;
2076 
2077     addr = mmu_real2abs(env, addr);
2078     if (!mmu_absolute_addr_valid(addr, false)) {
2079         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2080     }
2081 
2082     if (unlikely(!ss)) {
2083         ss = s390_get_skeys_device();
2084         skeyclass = S390_SKEYS_GET_CLASS(ss);
2085         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2086             tlb_flush_all_cpus_synced(env_cpu(env));
2087         }
2088     }
2089 
2090     rc = skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2091     if (rc) {
2092         trace_get_skeys_nonzero(rc);
2093         return 0;
2094     }
2095     return key;
2096 }
2097 
2098 /* set storage key extended */
2099 void HELPER(sske)(CPUS390XState *env, uint64_t r1, uint64_t r2)
2100 {
2101     static S390SKeysState *ss;
2102     static S390SKeysClass *skeyclass;
2103     uint64_t addr = wrap_address(env, r2);
2104     uint8_t key;
2105     int rc;
2106 
2107     addr = mmu_real2abs(env, addr);
2108     if (!mmu_absolute_addr_valid(addr, false)) {
2109         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2110     }
2111 
2112     if (unlikely(!ss)) {
2113         ss = s390_get_skeys_device();
2114         skeyclass = S390_SKEYS_GET_CLASS(ss);
2115         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2116             tlb_flush_all_cpus_synced(env_cpu(env));
2117         }
2118     }
2119 
2120     key = r1 & 0xfe;
2121     rc = skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2122     if (rc) {
2123         trace_set_skeys_nonzero(rc);
2124     }
2125    /*
2126     * As we can only flush by virtual address and not all the entries
2127     * that point to a physical address we have to flush the whole TLB.
2128     */
2129     tlb_flush_all_cpus_synced(env_cpu(env));
2130 }
2131 
2132 /* reset reference bit extended */
2133 uint32_t HELPER(rrbe)(CPUS390XState *env, uint64_t r2)
2134 {
2135     uint64_t addr = wrap_address(env, r2);
2136     static S390SKeysState *ss;
2137     static S390SKeysClass *skeyclass;
2138     uint8_t re, key;
2139     int rc;
2140 
2141     addr = mmu_real2abs(env, addr);
2142     if (!mmu_absolute_addr_valid(addr, false)) {
2143         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2144     }
2145 
2146     if (unlikely(!ss)) {
2147         ss = s390_get_skeys_device();
2148         skeyclass = S390_SKEYS_GET_CLASS(ss);
2149         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2150             tlb_flush_all_cpus_synced(env_cpu(env));
2151         }
2152     }
2153 
2154     rc = skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2155     if (rc) {
2156         trace_get_skeys_nonzero(rc);
2157         return 0;
2158     }
2159 
2160     re = key & (SK_R | SK_C);
2161     key &= ~SK_R;
2162 
2163     rc = skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2164     if (rc) {
2165         trace_set_skeys_nonzero(rc);
2166         return 0;
2167     }
2168    /*
2169     * As we can only flush by virtual address and not all the entries
2170     * that point to a physical address we have to flush the whole TLB.
2171     */
2172     tlb_flush_all_cpus_synced(env_cpu(env));
2173 
2174     /*
2175      * cc
2176      *
2177      * 0  Reference bit zero; change bit zero
2178      * 1  Reference bit zero; change bit one
2179      * 2  Reference bit one; change bit zero
2180      * 3  Reference bit one; change bit one
2181      */
2182 
2183     return re >> 1;
2184 }
2185 
2186 uint32_t HELPER(mvcs)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2,
2187                       uint64_t key)
2188 {
2189     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2190     S390Access srca, desta;
2191     uintptr_t ra = GETPC();
2192     int cc = 0;
2193 
2194     HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2195                __func__, l, a1, a2);
2196 
2197     if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2198         psw_as == AS_HOME || psw_as == AS_ACCREG) {
2199         s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2200     }
2201 
2202     if (!psw_key_valid(env, (key >> 4) & 0xf)) {
2203         s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2204     }
2205 
2206     l = wrap_length32(env, l);
2207     if (l > 256) {
2208         /* max 256 */
2209         l = 256;
2210         cc = 3;
2211     } else if (!l) {
2212         return cc;
2213     }
2214 
2215     access_prepare(&srca, env, a2, l, MMU_DATA_LOAD, MMU_PRIMARY_IDX, ra);
2216     access_prepare(&desta, env, a1, l, MMU_DATA_STORE, MMU_SECONDARY_IDX, ra);
2217     access_memmove(env, &desta, &srca, ra);
2218     return cc;
2219 }
2220 
2221 uint32_t HELPER(mvcp)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2,
2222                       uint64_t key)
2223 {
2224     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2225     S390Access srca, desta;
2226     uintptr_t ra = GETPC();
2227     int cc = 0;
2228 
2229     HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2230                __func__, l, a1, a2);
2231 
2232     if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2233         psw_as == AS_HOME || psw_as == AS_ACCREG) {
2234         s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2235     }
2236 
2237     if (!psw_key_valid(env, (key >> 4) & 0xf)) {
2238         s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2239     }
2240 
2241     l = wrap_length32(env, l);
2242     if (l > 256) {
2243         /* max 256 */
2244         l = 256;
2245         cc = 3;
2246     } else if (!l) {
2247         return cc;
2248     }
2249     access_prepare(&srca, env, a2, l, MMU_DATA_LOAD, MMU_SECONDARY_IDX, ra);
2250     access_prepare(&desta, env, a1, l, MMU_DATA_STORE, MMU_PRIMARY_IDX, ra);
2251     access_memmove(env, &desta, &srca, ra);
2252     return cc;
2253 }
2254 
2255 void HELPER(idte)(CPUS390XState *env, uint64_t r1, uint64_t r2, uint32_t m4)
2256 {
2257     CPUState *cs = env_cpu(env);
2258     const uintptr_t ra = GETPC();
2259     uint64_t table, entry, raddr;
2260     uint16_t entries, i, index = 0;
2261 
2262     if (r2 & 0xff000) {
2263         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2264     }
2265 
2266     if (!(r2 & 0x800)) {
2267         /* invalidation-and-clearing operation */
2268         table = r1 & ASCE_ORIGIN;
2269         entries = (r2 & 0x7ff) + 1;
2270 
2271         switch (r1 & ASCE_TYPE_MASK) {
2272         case ASCE_TYPE_REGION1:
2273             index = (r2 >> 53) & 0x7ff;
2274             break;
2275         case ASCE_TYPE_REGION2:
2276             index = (r2 >> 42) & 0x7ff;
2277             break;
2278         case ASCE_TYPE_REGION3:
2279             index = (r2 >> 31) & 0x7ff;
2280             break;
2281         case ASCE_TYPE_SEGMENT:
2282             index = (r2 >> 20) & 0x7ff;
2283             break;
2284         }
2285         for (i = 0; i < entries; i++) {
2286             /* addresses are not wrapped in 24/31bit mode but table index is */
2287             raddr = table + ((index + i) & 0x7ff) * sizeof(entry);
2288             entry = cpu_ldq_mmuidx_ra(env, raddr, MMU_REAL_IDX, ra);
2289             if (!(entry & REGION_ENTRY_I)) {
2290                 /* we are allowed to not store if already invalid */
2291                 entry |= REGION_ENTRY_I;
2292                 cpu_stq_mmuidx_ra(env, raddr, entry, MMU_REAL_IDX, ra);
2293             }
2294         }
2295     }
2296 
2297     /* We simply flush the complete tlb, therefore we can ignore r3. */
2298     if (m4 & 1) {
2299         tlb_flush(cs);
2300     } else {
2301         tlb_flush_all_cpus_synced(cs);
2302     }
2303 }
2304 
2305 /* invalidate pte */
2306 void HELPER(ipte)(CPUS390XState *env, uint64_t pto, uint64_t vaddr,
2307                   uint32_t m4)
2308 {
2309     CPUState *cs = env_cpu(env);
2310     const uintptr_t ra = GETPC();
2311     uint64_t page = vaddr & TARGET_PAGE_MASK;
2312     uint64_t pte_addr, pte;
2313 
2314     /* Compute the page table entry address */
2315     pte_addr = (pto & SEGMENT_ENTRY_ORIGIN);
2316     pte_addr += VADDR_PAGE_TX(vaddr) * 8;
2317 
2318     /* Mark the page table entry as invalid */
2319     pte = cpu_ldq_mmuidx_ra(env, pte_addr, MMU_REAL_IDX, ra);
2320     pte |= PAGE_ENTRY_I;
2321     cpu_stq_mmuidx_ra(env, pte_addr, pte, MMU_REAL_IDX, ra);
2322 
2323     /* XXX we exploit the fact that Linux passes the exact virtual
2324        address here - it's not obliged to! */
2325     if (m4 & 1) {
2326         if (vaddr & ~VADDR_PAGE_TX_MASK) {
2327             tlb_flush_page(cs, page);
2328             /* XXX 31-bit hack */
2329             tlb_flush_page(cs, page ^ 0x80000000);
2330         } else {
2331             /* looks like we don't have a valid virtual address */
2332             tlb_flush(cs);
2333         }
2334     } else {
2335         if (vaddr & ~VADDR_PAGE_TX_MASK) {
2336             tlb_flush_page_all_cpus_synced(cs, page);
2337             /* XXX 31-bit hack */
2338             tlb_flush_page_all_cpus_synced(cs, page ^ 0x80000000);
2339         } else {
2340             /* looks like we don't have a valid virtual address */
2341             tlb_flush_all_cpus_synced(cs);
2342         }
2343     }
2344 }
2345 
2346 /* flush local tlb */
2347 void HELPER(ptlb)(CPUS390XState *env)
2348 {
2349     tlb_flush(env_cpu(env));
2350 }
2351 
2352 /* flush global tlb */
2353 void HELPER(purge)(CPUS390XState *env)
2354 {
2355     tlb_flush_all_cpus_synced(env_cpu(env));
2356 }
2357 
2358 /* load real address */
2359 uint64_t HELPER(lra)(CPUS390XState *env, uint64_t r1, uint64_t addr)
2360 {
2361     uint64_t asc = env->psw.mask & PSW_MASK_ASC;
2362     uint64_t ret, tec;
2363     int flags, exc, cc;
2364 
2365     /* XXX incomplete - has more corner cases */
2366     if (!(env->psw.mask & PSW_MASK_64) && (addr >> 32)) {
2367         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, GETPC());
2368     }
2369 
2370     exc = mmu_translate(env, addr, MMU_S390_LRA, asc, &ret, &flags, &tec);
2371     if (exc) {
2372         cc = 3;
2373         ret = (r1 & 0xFFFFFFFF00000000ULL) | exc | 0x80000000;
2374     } else {
2375         cc = 0;
2376         ret |= addr & ~TARGET_PAGE_MASK;
2377     }
2378 
2379     env->cc_op = cc;
2380     return ret;
2381 }
2382 #endif
2383 
2384 /* Execute instruction.  This instruction executes an insn modified with
2385    the contents of r1.  It does not change the executed instruction in memory;
2386    it does not change the program counter.
2387 
2388    Perform this by recording the modified instruction in env->ex_value.
2389    This will be noticed by cpu_get_tb_cpu_state and thus tb translation.
2390 */
2391 void HELPER(ex)(CPUS390XState *env, uint32_t ilen, uint64_t r1, uint64_t addr)
2392 {
2393     uint64_t insn;
2394     uint8_t opc;
2395 
2396     /* EXECUTE targets must be at even addresses.  */
2397     if (addr & 1) {
2398         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
2399     }
2400 
2401     insn = cpu_lduw_code(env, addr);
2402     opc = insn >> 8;
2403 
2404     /* Or in the contents of R1[56:63].  */
2405     insn |= r1 & 0xff;
2406 
2407     /* Load the rest of the instruction.  */
2408     insn <<= 48;
2409     switch (get_ilen(opc)) {
2410     case 2:
2411         break;
2412     case 4:
2413         insn |= (uint64_t)cpu_lduw_code(env, addr + 2) << 32;
2414         break;
2415     case 6:
2416         insn |= (uint64_t)(uint32_t)cpu_ldl_code(env, addr + 2) << 16;
2417         break;
2418     default:
2419         g_assert_not_reached();
2420     }
2421 
2422     /* The very most common cases can be sped up by avoiding a new TB.  */
2423     if ((opc & 0xf0) == 0xd0) {
2424         typedef uint32_t (*dx_helper)(CPUS390XState *, uint32_t, uint64_t,
2425                                       uint64_t, uintptr_t);
2426         static const dx_helper dx[16] = {
2427             [0x0] = do_helper_trt_bkwd,
2428             [0x2] = do_helper_mvc,
2429             [0x4] = do_helper_nc,
2430             [0x5] = do_helper_clc,
2431             [0x6] = do_helper_oc,
2432             [0x7] = do_helper_xc,
2433             [0xc] = do_helper_tr,
2434             [0xd] = do_helper_trt_fwd,
2435         };
2436         dx_helper helper = dx[opc & 0xf];
2437 
2438         if (helper) {
2439             uint32_t l = extract64(insn, 48, 8);
2440             uint32_t b1 = extract64(insn, 44, 4);
2441             uint32_t d1 = extract64(insn, 32, 12);
2442             uint32_t b2 = extract64(insn, 28, 4);
2443             uint32_t d2 = extract64(insn, 16, 12);
2444             uint64_t a1 = wrap_address(env, (b1 ? env->regs[b1] : 0) + d1);
2445             uint64_t a2 = wrap_address(env, (b2 ? env->regs[b2] : 0) + d2);
2446 
2447             env->cc_op = helper(env, l, a1, a2, 0);
2448             env->psw.addr += ilen;
2449             return;
2450         }
2451     } else if (opc == 0x0a) {
2452         env->int_svc_code = extract64(insn, 48, 8);
2453         env->int_svc_ilen = ilen;
2454         helper_exception(env, EXCP_SVC);
2455         g_assert_not_reached();
2456     }
2457 
2458     /* Record the insn we want to execute as well as the ilen to use
2459        during the execution of the target insn.  This will also ensure
2460        that ex_value is non-zero, which flags that we are in a state
2461        that requires such execution.  */
2462     env->ex_value = insn | ilen;
2463     env->ex_target = addr;
2464 }
2465 
2466 uint32_t HELPER(mvcos)(CPUS390XState *env, uint64_t dest, uint64_t src,
2467                        uint64_t len)
2468 {
2469     const uint8_t psw_key = (env->psw.mask & PSW_MASK_KEY) >> PSW_SHIFT_KEY;
2470     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2471     const uint64_t r0 = env->regs[0];
2472     const uintptr_t ra = GETPC();
2473     uint8_t dest_key, dest_as, dest_k, dest_a;
2474     uint8_t src_key, src_as, src_k, src_a;
2475     uint64_t val;
2476     int cc = 0;
2477 
2478     HELPER_LOG("%s dest %" PRIx64 ", src %" PRIx64 ", len %" PRIx64 "\n",
2479                __func__, dest, src, len);
2480 
2481     if (!(env->psw.mask & PSW_MASK_DAT)) {
2482         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2483     }
2484 
2485     /* OAC (operand access control) for the first operand -> dest */
2486     val = (r0 & 0xffff0000ULL) >> 16;
2487     dest_key = (val >> 12) & 0xf;
2488     dest_as = (val >> 6) & 0x3;
2489     dest_k = (val >> 1) & 0x1;
2490     dest_a = val & 0x1;
2491 
2492     /* OAC (operand access control) for the second operand -> src */
2493     val = (r0 & 0x0000ffffULL);
2494     src_key = (val >> 12) & 0xf;
2495     src_as = (val >> 6) & 0x3;
2496     src_k = (val >> 1) & 0x1;
2497     src_a = val & 0x1;
2498 
2499     if (!dest_k) {
2500         dest_key = psw_key;
2501     }
2502     if (!src_k) {
2503         src_key = psw_key;
2504     }
2505     if (!dest_a) {
2506         dest_as = psw_as;
2507     }
2508     if (!src_a) {
2509         src_as = psw_as;
2510     }
2511 
2512     if (dest_a && dest_as == AS_HOME && (env->psw.mask & PSW_MASK_PSTATE)) {
2513         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2514     }
2515     if (!(env->cregs[0] & CR0_SECONDARY) &&
2516         (dest_as == AS_SECONDARY || src_as == AS_SECONDARY)) {
2517         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2518     }
2519     if (!psw_key_valid(env, dest_key) || !psw_key_valid(env, src_key)) {
2520         tcg_s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2521     }
2522 
2523     len = wrap_length32(env, len);
2524     if (len > 4096) {
2525         cc = 3;
2526         len = 4096;
2527     }
2528 
2529     /* FIXME: AR-mode and proper problem state mode (using PSW keys) missing */
2530     if (src_as == AS_ACCREG || dest_as == AS_ACCREG ||
2531         (env->psw.mask & PSW_MASK_PSTATE)) {
2532         qemu_log_mask(LOG_UNIMP, "%s: AR-mode and PSTATE support missing\n",
2533                       __func__);
2534         tcg_s390_program_interrupt(env, PGM_ADDRESSING, ra);
2535     }
2536 
2537     /* FIXME: Access using correct keys and AR-mode */
2538     if (len) {
2539         S390Access srca, desta;
2540 
2541         access_prepare(&srca, env, src, len, MMU_DATA_LOAD,
2542                        mmu_idx_from_as(src_as), ra);
2543         access_prepare(&desta, env, dest, len, MMU_DATA_STORE,
2544                        mmu_idx_from_as(dest_as), ra);
2545 
2546         access_memmove(env, &desta, &srca, ra);
2547     }
2548 
2549     return cc;
2550 }
2551 
2552 /* Decode a Unicode character.  A return value < 0 indicates success, storing
2553    the UTF-32 result into OCHAR and the input length into OLEN.  A return
2554    value >= 0 indicates failure, and the CC value to be returned.  */
2555 typedef int (*decode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2556                                  uint64_t ilen, bool enh_check, uintptr_t ra,
2557                                  uint32_t *ochar, uint32_t *olen);
2558 
2559 /* Encode a Unicode character.  A return value < 0 indicates success, storing
2560    the bytes into ADDR and the output length into OLEN.  A return value >= 0
2561    indicates failure, and the CC value to be returned.  */
2562 typedef int (*encode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2563                                  uint64_t ilen, uintptr_t ra, uint32_t c,
2564                                  uint32_t *olen);
2565 
2566 static int decode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2567                        bool enh_check, uintptr_t ra,
2568                        uint32_t *ochar, uint32_t *olen)
2569 {
2570     uint8_t s0, s1, s2, s3;
2571     uint32_t c, l;
2572 
2573     if (ilen < 1) {
2574         return 0;
2575     }
2576     s0 = cpu_ldub_data_ra(env, addr, ra);
2577     if (s0 <= 0x7f) {
2578         /* one byte character */
2579         l = 1;
2580         c = s0;
2581     } else if (s0 <= (enh_check ? 0xc1 : 0xbf)) {
2582         /* invalid character */
2583         return 2;
2584     } else if (s0 <= 0xdf) {
2585         /* two byte character */
2586         l = 2;
2587         if (ilen < 2) {
2588             return 0;
2589         }
2590         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2591         c = s0 & 0x1f;
2592         c = (c << 6) | (s1 & 0x3f);
2593         if (enh_check && (s1 & 0xc0) != 0x80) {
2594             return 2;
2595         }
2596     } else if (s0 <= 0xef) {
2597         /* three byte character */
2598         l = 3;
2599         if (ilen < 3) {
2600             return 0;
2601         }
2602         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2603         s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2604         c = s0 & 0x0f;
2605         c = (c << 6) | (s1 & 0x3f);
2606         c = (c << 6) | (s2 & 0x3f);
2607         /* Fold the byte-by-byte range descriptions in the PoO into
2608            tests against the complete value.  It disallows encodings
2609            that could be smaller, and the UTF-16 surrogates.  */
2610         if (enh_check
2611             && ((s1 & 0xc0) != 0x80
2612                 || (s2 & 0xc0) != 0x80
2613                 || c < 0x1000
2614                 || (c >= 0xd800 && c <= 0xdfff))) {
2615             return 2;
2616         }
2617     } else if (s0 <= (enh_check ? 0xf4 : 0xf7)) {
2618         /* four byte character */
2619         l = 4;
2620         if (ilen < 4) {
2621             return 0;
2622         }
2623         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2624         s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2625         s3 = cpu_ldub_data_ra(env, addr + 3, ra);
2626         c = s0 & 0x07;
2627         c = (c << 6) | (s1 & 0x3f);
2628         c = (c << 6) | (s2 & 0x3f);
2629         c = (c << 6) | (s3 & 0x3f);
2630         /* See above.  */
2631         if (enh_check
2632             && ((s1 & 0xc0) != 0x80
2633                 || (s2 & 0xc0) != 0x80
2634                 || (s3 & 0xc0) != 0x80
2635                 || c < 0x010000
2636                 || c > 0x10ffff)) {
2637             return 2;
2638         }
2639     } else {
2640         /* invalid character */
2641         return 2;
2642     }
2643 
2644     *ochar = c;
2645     *olen = l;
2646     return -1;
2647 }
2648 
2649 static int decode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2650                         bool enh_check, uintptr_t ra,
2651                         uint32_t *ochar, uint32_t *olen)
2652 {
2653     uint16_t s0, s1;
2654     uint32_t c, l;
2655 
2656     if (ilen < 2) {
2657         return 0;
2658     }
2659     s0 = cpu_lduw_data_ra(env, addr, ra);
2660     if ((s0 & 0xfc00) != 0xd800) {
2661         /* one word character */
2662         l = 2;
2663         c = s0;
2664     } else {
2665         /* two word character */
2666         l = 4;
2667         if (ilen < 4) {
2668             return 0;
2669         }
2670         s1 = cpu_lduw_data_ra(env, addr + 2, ra);
2671         c = extract32(s0, 6, 4) + 1;
2672         c = (c << 6) | (s0 & 0x3f);
2673         c = (c << 10) | (s1 & 0x3ff);
2674         if (enh_check && (s1 & 0xfc00) != 0xdc00) {
2675             /* invalid surrogate character */
2676             return 2;
2677         }
2678     }
2679 
2680     *ochar = c;
2681     *olen = l;
2682     return -1;
2683 }
2684 
2685 static int decode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2686                         bool enh_check, uintptr_t ra,
2687                         uint32_t *ochar, uint32_t *olen)
2688 {
2689     uint32_t c;
2690 
2691     if (ilen < 4) {
2692         return 0;
2693     }
2694     c = cpu_ldl_data_ra(env, addr, ra);
2695     if ((c >= 0xd800 && c <= 0xdbff) || c > 0x10ffff) {
2696         /* invalid unicode character */
2697         return 2;
2698     }
2699 
2700     *ochar = c;
2701     *olen = 4;
2702     return -1;
2703 }
2704 
2705 static int encode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2706                        uintptr_t ra, uint32_t c, uint32_t *olen)
2707 {
2708     uint8_t d[4];
2709     uint32_t l, i;
2710 
2711     if (c <= 0x7f) {
2712         /* one byte character */
2713         l = 1;
2714         d[0] = c;
2715     } else if (c <= 0x7ff) {
2716         /* two byte character */
2717         l = 2;
2718         d[1] = 0x80 | extract32(c, 0, 6);
2719         d[0] = 0xc0 | extract32(c, 6, 5);
2720     } else if (c <= 0xffff) {
2721         /* three byte character */
2722         l = 3;
2723         d[2] = 0x80 | extract32(c, 0, 6);
2724         d[1] = 0x80 | extract32(c, 6, 6);
2725         d[0] = 0xe0 | extract32(c, 12, 4);
2726     } else {
2727         /* four byte character */
2728         l = 4;
2729         d[3] = 0x80 | extract32(c, 0, 6);
2730         d[2] = 0x80 | extract32(c, 6, 6);
2731         d[1] = 0x80 | extract32(c, 12, 6);
2732         d[0] = 0xf0 | extract32(c, 18, 3);
2733     }
2734 
2735     if (ilen < l) {
2736         return 1;
2737     }
2738     for (i = 0; i < l; ++i) {
2739         cpu_stb_data_ra(env, addr + i, d[i], ra);
2740     }
2741 
2742     *olen = l;
2743     return -1;
2744 }
2745 
2746 static int encode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2747                         uintptr_t ra, uint32_t c, uint32_t *olen)
2748 {
2749     uint16_t d0, d1;
2750 
2751     if (c <= 0xffff) {
2752         /* one word character */
2753         if (ilen < 2) {
2754             return 1;
2755         }
2756         cpu_stw_data_ra(env, addr, c, ra);
2757         *olen = 2;
2758     } else {
2759         /* two word character */
2760         if (ilen < 4) {
2761             return 1;
2762         }
2763         d1 = 0xdc00 | extract32(c, 0, 10);
2764         d0 = 0xd800 | extract32(c, 10, 6);
2765         d0 = deposit32(d0, 6, 4, extract32(c, 16, 5) - 1);
2766         cpu_stw_data_ra(env, addr + 0, d0, ra);
2767         cpu_stw_data_ra(env, addr + 2, d1, ra);
2768         *olen = 4;
2769     }
2770 
2771     return -1;
2772 }
2773 
2774 static int encode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2775                         uintptr_t ra, uint32_t c, uint32_t *olen)
2776 {
2777     if (ilen < 4) {
2778         return 1;
2779     }
2780     cpu_stl_data_ra(env, addr, c, ra);
2781     *olen = 4;
2782     return -1;
2783 }
2784 
2785 static inline uint32_t convert_unicode(CPUS390XState *env, uint32_t r1,
2786                                        uint32_t r2, uint32_t m3, uintptr_t ra,
2787                                        decode_unicode_fn decode,
2788                                        encode_unicode_fn encode)
2789 {
2790     uint64_t dst = get_address(env, r1);
2791     uint64_t dlen = get_length(env, r1 + 1);
2792     uint64_t src = get_address(env, r2);
2793     uint64_t slen = get_length(env, r2 + 1);
2794     bool enh_check = m3 & 1;
2795     int cc, i;
2796 
2797     /* Lest we fail to service interrupts in a timely manner, limit the
2798        amount of work we're willing to do.  For now, let's cap at 256.  */
2799     for (i = 0; i < 256; ++i) {
2800         uint32_t c, ilen, olen;
2801 
2802         cc = decode(env, src, slen, enh_check, ra, &c, &ilen);
2803         if (unlikely(cc >= 0)) {
2804             break;
2805         }
2806         cc = encode(env, dst, dlen, ra, c, &olen);
2807         if (unlikely(cc >= 0)) {
2808             break;
2809         }
2810 
2811         src += ilen;
2812         slen -= ilen;
2813         dst += olen;
2814         dlen -= olen;
2815         cc = 3;
2816     }
2817 
2818     set_address(env, r1, dst);
2819     set_length(env, r1 + 1, dlen);
2820     set_address(env, r2, src);
2821     set_length(env, r2 + 1, slen);
2822 
2823     return cc;
2824 }
2825 
2826 uint32_t HELPER(cu12)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2827 {
2828     return convert_unicode(env, r1, r2, m3, GETPC(),
2829                            decode_utf8, encode_utf16);
2830 }
2831 
2832 uint32_t HELPER(cu14)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2833 {
2834     return convert_unicode(env, r1, r2, m3, GETPC(),
2835                            decode_utf8, encode_utf32);
2836 }
2837 
2838 uint32_t HELPER(cu21)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2839 {
2840     return convert_unicode(env, r1, r2, m3, GETPC(),
2841                            decode_utf16, encode_utf8);
2842 }
2843 
2844 uint32_t HELPER(cu24)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2845 {
2846     return convert_unicode(env, r1, r2, m3, GETPC(),
2847                            decode_utf16, encode_utf32);
2848 }
2849 
2850 uint32_t HELPER(cu41)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2851 {
2852     return convert_unicode(env, r1, r2, m3, GETPC(),
2853                            decode_utf32, encode_utf8);
2854 }
2855 
2856 uint32_t HELPER(cu42)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2857 {
2858     return convert_unicode(env, r1, r2, m3, GETPC(),
2859                            decode_utf32, encode_utf16);
2860 }
2861 
2862 void probe_write_access(CPUS390XState *env, uint64_t addr, uint64_t len,
2863                         uintptr_t ra)
2864 {
2865     /* test the actual access, not just any access to the page due to LAP */
2866     while (len) {
2867         const uint64_t pagelen = -(addr | TARGET_PAGE_MASK);
2868         const uint64_t curlen = MIN(pagelen, len);
2869 
2870         probe_write(env, addr, curlen, cpu_mmu_index(env, false), ra);
2871         addr = wrap_address(env, addr + curlen);
2872         len -= curlen;
2873     }
2874 }
2875 
2876 void HELPER(probe_write_access)(CPUS390XState *env, uint64_t addr, uint64_t len)
2877 {
2878     probe_write_access(env, addr, len, GETPC());
2879 }
2880