1 /*
2  *  S/390 memory access helper routines
3  *
4  *  Copyright (c) 2009 Ulrich Hecht
5  *  Copyright (c) 2009 Alexander Graf
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * This library is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19  */
20 
21 #include "qemu/osdep.h"
22 #include "cpu.h"
23 #include "internal.h"
24 #include "tcg_s390x.h"
25 #include "exec/helper-proto.h"
26 #include "exec/exec-all.h"
27 #include "exec/cpu_ldst.h"
28 #include "qemu/int128.h"
29 #include "qemu/atomic128.h"
30 
31 #if !defined(CONFIG_USER_ONLY)
32 #include "hw/s390x/storage-keys.h"
33 #endif
34 
35 /*****************************************************************************/
36 /* Softmmu support */
37 
38 /* #define DEBUG_HELPER */
39 #ifdef DEBUG_HELPER
40 #define HELPER_LOG(x...) qemu_log(x)
41 #else
42 #define HELPER_LOG(x...)
43 #endif
44 
psw_key_valid(CPUS390XState * env,uint8_t psw_key)45 static inline bool psw_key_valid(CPUS390XState *env, uint8_t psw_key)
46 {
47     uint16_t pkm = env->cregs[3] >> 16;
48 
49     if (env->psw.mask & PSW_MASK_PSTATE) {
50         /* PSW key has range 0..15, it is valid if the bit is 1 in the PKM */
51         return pkm & (0x80 >> psw_key);
52     }
53     return true;
54 }
55 
is_destructive_overlap(CPUS390XState * env,uint64_t dest,uint64_t src,uint32_t len)56 static bool is_destructive_overlap(CPUS390XState *env, uint64_t dest,
57                                    uint64_t src, uint32_t len)
58 {
59     if (!len || src == dest) {
60         return false;
61     }
62     /* Take care of wrapping at the end of address space. */
63     if (unlikely(wrap_address(env, src + len - 1) < src)) {
64         return dest > src || dest <= wrap_address(env, src + len - 1);
65     }
66     return dest > src && dest <= src + len - 1;
67 }
68 
69 /* Trigger a SPECIFICATION exception if an address or a length is not
70    naturally aligned.  */
check_alignment(CPUS390XState * env,uint64_t v,int wordsize,uintptr_t ra)71 static inline void check_alignment(CPUS390XState *env, uint64_t v,
72                                    int wordsize, uintptr_t ra)
73 {
74     if (v % wordsize) {
75         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
76     }
77 }
78 
79 /* Load a value from memory according to its size.  */
cpu_ldusize_data_ra(CPUS390XState * env,uint64_t addr,int wordsize,uintptr_t ra)80 static inline uint64_t cpu_ldusize_data_ra(CPUS390XState *env, uint64_t addr,
81                                            int wordsize, uintptr_t ra)
82 {
83     switch (wordsize) {
84     case 1:
85         return cpu_ldub_data_ra(env, addr, ra);
86     case 2:
87         return cpu_lduw_data_ra(env, addr, ra);
88     default:
89         abort();
90     }
91 }
92 
93 /* Store a to memory according to its size.  */
cpu_stsize_data_ra(CPUS390XState * env,uint64_t addr,uint64_t value,int wordsize,uintptr_t ra)94 static inline void cpu_stsize_data_ra(CPUS390XState *env, uint64_t addr,
95                                       uint64_t value, int wordsize,
96                                       uintptr_t ra)
97 {
98     switch (wordsize) {
99     case 1:
100         cpu_stb_data_ra(env, addr, value, ra);
101         break;
102     case 2:
103         cpu_stw_data_ra(env, addr, value, ra);
104         break;
105     default:
106         abort();
107     }
108 }
109 
110 /* An access covers at most 4096 bytes and therefore at most two pages. */
111 typedef struct S390Access {
112     target_ulong vaddr1;
113     target_ulong vaddr2;
114     char *haddr1;
115     char *haddr2;
116     uint16_t size1;
117     uint16_t size2;
118     /*
119      * If we can't access the host page directly, we'll have to do I/O access
120      * via ld/st helpers. These are internal details, so we store the
121      * mmu idx to do the access here instead of passing it around in the
122      * helpers. Maybe, one day we can get rid of ld/st access - once we can
123      * handle TLB_NOTDIRTY differently. We don't expect these special accesses
124      * to trigger exceptions - only if we would have TLB_NOTDIRTY on LAP
125      * pages, we might trigger a new MMU translation - very unlikely that
126      * the mapping changes in between and we would trigger a fault.
127      */
128     int mmu_idx;
129 } S390Access;
130 
access_prepare(CPUS390XState * env,vaddr vaddr,int size,MMUAccessType access_type,int mmu_idx,uintptr_t ra)131 static S390Access access_prepare(CPUS390XState *env, vaddr vaddr, int size,
132                                  MMUAccessType access_type, int mmu_idx,
133                                  uintptr_t ra)
134 {
135     S390Access access = {
136         .vaddr1 = vaddr,
137         .size1 = MIN(size, -(vaddr | TARGET_PAGE_MASK)),
138         .mmu_idx = mmu_idx,
139     };
140 
141     g_assert(size > 0 && size <= 4096);
142     access.haddr1 = probe_access(env, access.vaddr1, access.size1, access_type,
143                                  mmu_idx, ra);
144 
145     if (unlikely(access.size1 != size)) {
146         /* The access crosses page boundaries. */
147         access.vaddr2 = wrap_address(env, vaddr + access.size1);
148         access.size2 = size - access.size1;
149         access.haddr2 = probe_access(env, access.vaddr2, access.size2,
150                                      access_type, mmu_idx, ra);
151     }
152     return access;
153 }
154 
155 /* Helper to handle memset on a single page. */
do_access_memset(CPUS390XState * env,vaddr vaddr,char * haddr,uint8_t byte,uint16_t size,int mmu_idx,uintptr_t ra)156 static void do_access_memset(CPUS390XState *env, vaddr vaddr, char *haddr,
157                              uint8_t byte, uint16_t size, int mmu_idx,
158                              uintptr_t ra)
159 {
160 #ifdef CONFIG_USER_ONLY
161     g_assert(haddr);
162     memset(haddr, byte, size);
163 #else
164     TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
165     int i;
166 
167     if (likely(haddr)) {
168         memset(haddr, byte, size);
169     } else {
170         /*
171          * Do a single access and test if we can then get access to the
172          * page. This is especially relevant to speed up TLB_NOTDIRTY.
173          */
174         g_assert(size > 0);
175         helper_ret_stb_mmu(env, vaddr, byte, oi, ra);
176         haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
177         if (likely(haddr)) {
178             memset(haddr + 1, byte, size - 1);
179         } else {
180             for (i = 1; i < size; i++) {
181                 helper_ret_stb_mmu(env, vaddr + i, byte, oi, ra);
182             }
183         }
184     }
185 #endif
186 }
187 
access_memset(CPUS390XState * env,S390Access * desta,uint8_t byte,uintptr_t ra)188 static void access_memset(CPUS390XState *env, S390Access *desta,
189                           uint8_t byte, uintptr_t ra)
190 {
191 
192     do_access_memset(env, desta->vaddr1, desta->haddr1, byte, desta->size1,
193                      desta->mmu_idx, ra);
194     if (likely(!desta->size2)) {
195         return;
196     }
197     do_access_memset(env, desta->vaddr2, desta->haddr2, byte, desta->size2,
198                      desta->mmu_idx, ra);
199 }
200 
do_access_get_byte(CPUS390XState * env,vaddr vaddr,char ** haddr,int offset,int mmu_idx,uintptr_t ra)201 static uint8_t do_access_get_byte(CPUS390XState *env, vaddr vaddr, char **haddr,
202                                   int offset, int mmu_idx, uintptr_t ra)
203 {
204 #ifdef CONFIG_USER_ONLY
205     return ldub_p(*haddr + offset);
206 #else
207     TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
208     uint8_t byte;
209 
210     if (likely(*haddr)) {
211         return ldub_p(*haddr + offset);
212     }
213     /*
214      * Do a single access and test if we can then get access to the
215      * page. This is especially relevant to speed up TLB_NOTDIRTY.
216      */
217     byte = helper_ret_ldub_mmu(env, vaddr + offset, oi, ra);
218     *haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_LOAD, mmu_idx);
219     return byte;
220 #endif
221 }
222 
access_get_byte(CPUS390XState * env,S390Access * access,int offset,uintptr_t ra)223 static uint8_t access_get_byte(CPUS390XState *env, S390Access *access,
224                                int offset, uintptr_t ra)
225 {
226     if (offset < access->size1) {
227         return do_access_get_byte(env, access->vaddr1, &access->haddr1,
228                                   offset, access->mmu_idx, ra);
229     }
230     return do_access_get_byte(env, access->vaddr2, &access->haddr2,
231                               offset - access->size1, access->mmu_idx, ra);
232 }
233 
do_access_set_byte(CPUS390XState * env,vaddr vaddr,char ** haddr,int offset,uint8_t byte,int mmu_idx,uintptr_t ra)234 static void do_access_set_byte(CPUS390XState *env, vaddr vaddr, char **haddr,
235                                int offset, uint8_t byte, int mmu_idx,
236                                uintptr_t ra)
237 {
238 #ifdef CONFIG_USER_ONLY
239     stb_p(*haddr + offset, byte);
240 #else
241     TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
242 
243     if (likely(*haddr)) {
244         stb_p(*haddr + offset, byte);
245         return;
246     }
247     /*
248      * Do a single access and test if we can then get access to the
249      * page. This is especially relevant to speed up TLB_NOTDIRTY.
250      */
251     helper_ret_stb_mmu(env, vaddr + offset, byte, oi, ra);
252     *haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
253 #endif
254 }
255 
access_set_byte(CPUS390XState * env,S390Access * access,int offset,uint8_t byte,uintptr_t ra)256 static void access_set_byte(CPUS390XState *env, S390Access *access,
257                             int offset, uint8_t byte, uintptr_t ra)
258 {
259     if (offset < access->size1) {
260         do_access_set_byte(env, access->vaddr1, &access->haddr1, offset, byte,
261                            access->mmu_idx, ra);
262     } else {
263         do_access_set_byte(env, access->vaddr2, &access->haddr2,
264                            offset - access->size1, byte, access->mmu_idx, ra);
265     }
266 }
267 
268 /*
269  * Move data with the same semantics as memmove() in case ranges don't overlap
270  * or src > dest. Undefined behavior on destructive overlaps.
271  */
access_memmove(CPUS390XState * env,S390Access * desta,S390Access * srca,uintptr_t ra)272 static void access_memmove(CPUS390XState *env, S390Access *desta,
273                            S390Access *srca, uintptr_t ra)
274 {
275     int diff;
276 
277     g_assert(desta->size1 + desta->size2 == srca->size1 + srca->size2);
278 
279     /* Fallback to slow access in case we don't have access to all host pages */
280     if (unlikely(!desta->haddr1 || (desta->size2 && !desta->haddr2) ||
281                  !srca->haddr1 || (srca->size2 && !srca->haddr2))) {
282         int i;
283 
284         for (i = 0; i < desta->size1 + desta->size2; i++) {
285             uint8_t byte = access_get_byte(env, srca, i, ra);
286 
287             access_set_byte(env, desta, i, byte, ra);
288         }
289         return;
290     }
291 
292     if (srca->size1 == desta->size1) {
293         memmove(desta->haddr1, srca->haddr1, srca->size1);
294         if (unlikely(srca->size2)) {
295             memmove(desta->haddr2, srca->haddr2, srca->size2);
296         }
297     } else if (srca->size1 < desta->size1) {
298         diff = desta->size1 - srca->size1;
299         memmove(desta->haddr1, srca->haddr1, srca->size1);
300         memmove(desta->haddr1 + srca->size1, srca->haddr2, diff);
301         if (likely(desta->size2)) {
302             memmove(desta->haddr2, srca->haddr2 + diff, desta->size2);
303         }
304     } else {
305         diff = srca->size1 - desta->size1;
306         memmove(desta->haddr1, srca->haddr1, desta->size1);
307         memmove(desta->haddr2, srca->haddr1 + desta->size1, diff);
308         if (likely(srca->size2)) {
309             memmove(desta->haddr2 + diff, srca->haddr2, srca->size2);
310         }
311     }
312 }
313 
mmu_idx_from_as(uint8_t as)314 static int mmu_idx_from_as(uint8_t as)
315 {
316     switch (as) {
317     case AS_PRIMARY:
318         return MMU_PRIMARY_IDX;
319     case AS_SECONDARY:
320         return MMU_SECONDARY_IDX;
321     case AS_HOME:
322         return MMU_HOME_IDX;
323     default:
324         /* FIXME AS_ACCREG */
325         g_assert_not_reached();
326     }
327 }
328 
329 /* and on array */
do_helper_nc(CPUS390XState * env,uint32_t l,uint64_t dest,uint64_t src,uintptr_t ra)330 static uint32_t do_helper_nc(CPUS390XState *env, uint32_t l, uint64_t dest,
331                              uint64_t src, uintptr_t ra)
332 {
333     const int mmu_idx = cpu_mmu_index(env, false);
334     S390Access srca1, srca2, desta;
335     uint32_t i;
336     uint8_t c = 0;
337 
338     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
339                __func__, l, dest, src);
340 
341     /* NC always processes one more byte than specified - maximum is 256 */
342     l++;
343 
344     srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
345     srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
346     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
347     for (i = 0; i < l; i++) {
348         const uint8_t x = access_get_byte(env, &srca1, i, ra) &
349                           access_get_byte(env, &srca2, i, ra);
350 
351         c |= x;
352         access_set_byte(env, &desta, i, x, ra);
353     }
354     return c != 0;
355 }
356 
HELPER(nc)357 uint32_t HELPER(nc)(CPUS390XState *env, uint32_t l, uint64_t dest,
358                     uint64_t src)
359 {
360     return do_helper_nc(env, l, dest, src, GETPC());
361 }
362 
363 /* xor on array */
do_helper_xc(CPUS390XState * env,uint32_t l,uint64_t dest,uint64_t src,uintptr_t ra)364 static uint32_t do_helper_xc(CPUS390XState *env, uint32_t l, uint64_t dest,
365                              uint64_t src, uintptr_t ra)
366 {
367     const int mmu_idx = cpu_mmu_index(env, false);
368     S390Access srca1, srca2, desta;
369     uint32_t i;
370     uint8_t c = 0;
371 
372     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
373                __func__, l, dest, src);
374 
375     /* XC always processes one more byte than specified - maximum is 256 */
376     l++;
377 
378     srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
379     srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
380     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
381 
382     /* xor with itself is the same as memset(0) */
383     if (src == dest) {
384         access_memset(env, &desta, 0, ra);
385         return 0;
386     }
387 
388     for (i = 0; i < l; i++) {
389         const uint8_t x = access_get_byte(env, &srca1, i, ra) ^
390                           access_get_byte(env, &srca2, i, ra);
391 
392         c |= x;
393         access_set_byte(env, &desta, i, x, ra);
394     }
395     return c != 0;
396 }
397 
HELPER(xc)398 uint32_t HELPER(xc)(CPUS390XState *env, uint32_t l, uint64_t dest,
399                     uint64_t src)
400 {
401     return do_helper_xc(env, l, dest, src, GETPC());
402 }
403 
404 /* or on array */
do_helper_oc(CPUS390XState * env,uint32_t l,uint64_t dest,uint64_t src,uintptr_t ra)405 static uint32_t do_helper_oc(CPUS390XState *env, uint32_t l, uint64_t dest,
406                              uint64_t src, uintptr_t ra)
407 {
408     const int mmu_idx = cpu_mmu_index(env, false);
409     S390Access srca1, srca2, desta;
410     uint32_t i;
411     uint8_t c = 0;
412 
413     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
414                __func__, l, dest, src);
415 
416     /* OC always processes one more byte than specified - maximum is 256 */
417     l++;
418 
419     srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
420     srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
421     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
422     for (i = 0; i < l; i++) {
423         const uint8_t x = access_get_byte(env, &srca1, i, ra) |
424                           access_get_byte(env, &srca2, i, ra);
425 
426         c |= x;
427         access_set_byte(env, &desta, i, x, ra);
428     }
429     return c != 0;
430 }
431 
HELPER(oc)432 uint32_t HELPER(oc)(CPUS390XState *env, uint32_t l, uint64_t dest,
433                     uint64_t src)
434 {
435     return do_helper_oc(env, l, dest, src, GETPC());
436 }
437 
438 /* memmove */
do_helper_mvc(CPUS390XState * env,uint32_t l,uint64_t dest,uint64_t src,uintptr_t ra)439 static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest,
440                               uint64_t src, uintptr_t ra)
441 {
442     const int mmu_idx = cpu_mmu_index(env, false);
443     S390Access srca, desta;
444     uint32_t i;
445 
446     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
447                __func__, l, dest, src);
448 
449     /* MVC always copies one more byte than specified - maximum is 256 */
450     l++;
451 
452     srca = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
453     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
454 
455     /*
456      * "When the operands overlap, the result is obtained as if the operands
457      * were processed one byte at a time". Only non-destructive overlaps
458      * behave like memmove().
459      */
460     if (dest == src + 1) {
461         access_memset(env, &desta, access_get_byte(env, &srca, 0, ra), ra);
462     } else if (!is_destructive_overlap(env, dest, src, l)) {
463         access_memmove(env, &desta, &srca, ra);
464     } else {
465         for (i = 0; i < l; i++) {
466             uint8_t byte = access_get_byte(env, &srca, i, ra);
467 
468             access_set_byte(env, &desta, i, byte, ra);
469         }
470     }
471 
472     return env->cc_op;
473 }
474 
HELPER(mvc)475 void HELPER(mvc)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
476 {
477     do_helper_mvc(env, l, dest, src, GETPC());
478 }
479 
480 /* move inverse  */
HELPER(mvcin)481 void HELPER(mvcin)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
482 {
483     const int mmu_idx = cpu_mmu_index(env, false);
484     S390Access srca, desta;
485     uintptr_t ra = GETPC();
486     int i;
487 
488     /* MVCIN always copies one more byte than specified - maximum is 256 */
489     l++;
490 
491     src = wrap_address(env, src - l + 1);
492     srca = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
493     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
494     for (i = 0; i < l; i++) {
495         const uint8_t x = access_get_byte(env, &srca, l - i - 1, ra);
496 
497         access_set_byte(env, &desta, i, x, ra);
498     }
499 }
500 
501 /* move numerics  */
HELPER(mvn)502 void HELPER(mvn)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
503 {
504     const int mmu_idx = cpu_mmu_index(env, false);
505     S390Access srca1, srca2, desta;
506     uintptr_t ra = GETPC();
507     int i;
508 
509     /* MVN always copies one more byte than specified - maximum is 256 */
510     l++;
511 
512     srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
513     srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
514     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
515     for (i = 0; i < l; i++) {
516         const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0x0f) |
517                           (access_get_byte(env, &srca2, i, ra) & 0xf0);
518 
519         access_set_byte(env, &desta, i, x, ra);
520     }
521 }
522 
523 /* move with offset  */
HELPER(mvo)524 void HELPER(mvo)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
525 {
526     const int mmu_idx = cpu_mmu_index(env, false);
527     /* MVO always processes one more byte than specified - maximum is 16 */
528     const int len_dest = (l >> 4) + 1;
529     const int len_src = (l & 0xf) + 1;
530     uintptr_t ra = GETPC();
531     uint8_t byte_dest, byte_src;
532     S390Access srca, desta;
533     int i, j;
534 
535     srca = access_prepare(env, src, len_src, MMU_DATA_LOAD, mmu_idx, ra);
536     desta = access_prepare(env, dest, len_dest, MMU_DATA_STORE, mmu_idx, ra);
537 
538     /* Handle rightmost byte */
539     byte_dest = cpu_ldub_data_ra(env, dest + len_dest - 1, ra);
540     byte_src = access_get_byte(env, &srca, len_src - 1, ra);
541     byte_dest = (byte_dest & 0x0f) | (byte_src << 4);
542     access_set_byte(env, &desta, len_dest - 1, byte_dest, ra);
543 
544     /* Process remaining bytes from right to left */
545     for (i = len_dest - 2, j = len_src - 2; i >= 0; i--, j--) {
546         byte_dest = byte_src >> 4;
547         if (j >= 0) {
548             byte_src = access_get_byte(env, &srca, j, ra);
549         } else {
550             byte_src = 0;
551         }
552         byte_dest |= byte_src << 4;
553         access_set_byte(env, &desta, i, byte_dest, ra);
554     }
555 }
556 
557 /* move zones  */
HELPER(mvz)558 void HELPER(mvz)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
559 {
560     const int mmu_idx = cpu_mmu_index(env, false);
561     S390Access srca1, srca2, desta;
562     uintptr_t ra = GETPC();
563     int i;
564 
565     /* MVZ always copies one more byte than specified - maximum is 256 */
566     l++;
567 
568     srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
569     srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
570     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
571     for (i = 0; i < l; i++) {
572         const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0xf0) |
573                           (access_get_byte(env, &srca2, i, ra) & 0x0f);
574 
575         access_set_byte(env, &desta, i, x, ra);
576     }
577 }
578 
579 /* compare unsigned byte arrays */
do_helper_clc(CPUS390XState * env,uint32_t l,uint64_t s1,uint64_t s2,uintptr_t ra)580 static uint32_t do_helper_clc(CPUS390XState *env, uint32_t l, uint64_t s1,
581                               uint64_t s2, uintptr_t ra)
582 {
583     uint32_t i;
584     uint32_t cc = 0;
585 
586     HELPER_LOG("%s l %d s1 %" PRIx64 " s2 %" PRIx64 "\n",
587                __func__, l, s1, s2);
588 
589     for (i = 0; i <= l; i++) {
590         uint8_t x = cpu_ldub_data_ra(env, s1 + i, ra);
591         uint8_t y = cpu_ldub_data_ra(env, s2 + i, ra);
592         HELPER_LOG("%02x (%c)/%02x (%c) ", x, x, y, y);
593         if (x < y) {
594             cc = 1;
595             break;
596         } else if (x > y) {
597             cc = 2;
598             break;
599         }
600     }
601 
602     HELPER_LOG("\n");
603     return cc;
604 }
605 
HELPER(clc)606 uint32_t HELPER(clc)(CPUS390XState *env, uint32_t l, uint64_t s1, uint64_t s2)
607 {
608     return do_helper_clc(env, l, s1, s2, GETPC());
609 }
610 
611 /* compare logical under mask */
HELPER(clm)612 uint32_t HELPER(clm)(CPUS390XState *env, uint32_t r1, uint32_t mask,
613                      uint64_t addr)
614 {
615     uintptr_t ra = GETPC();
616     uint32_t cc = 0;
617 
618     HELPER_LOG("%s: r1 0x%x mask 0x%x addr 0x%" PRIx64 "\n", __func__, r1,
619                mask, addr);
620 
621     while (mask) {
622         if (mask & 8) {
623             uint8_t d = cpu_ldub_data_ra(env, addr, ra);
624             uint8_t r = extract32(r1, 24, 8);
625             HELPER_LOG("mask 0x%x %02x/%02x (0x%" PRIx64 ") ", mask, r, d,
626                        addr);
627             if (r < d) {
628                 cc = 1;
629                 break;
630             } else if (r > d) {
631                 cc = 2;
632                 break;
633             }
634             addr++;
635         }
636         mask = (mask << 1) & 0xf;
637         r1 <<= 8;
638     }
639 
640     HELPER_LOG("\n");
641     return cc;
642 }
643 
get_address(CPUS390XState * env,int reg)644 static inline uint64_t get_address(CPUS390XState *env, int reg)
645 {
646     return wrap_address(env, env->regs[reg]);
647 }
648 
649 /*
650  * Store the address to the given register, zeroing out unused leftmost
651  * bits in bit positions 32-63 (24-bit and 31-bit mode only).
652  */
set_address_zero(CPUS390XState * env,int reg,uint64_t address)653 static inline void set_address_zero(CPUS390XState *env, int reg,
654                                     uint64_t address)
655 {
656     if (env->psw.mask & PSW_MASK_64) {
657         env->regs[reg] = address;
658     } else {
659         if (!(env->psw.mask & PSW_MASK_32)) {
660             address &= 0x00ffffff;
661         } else {
662             address &= 0x7fffffff;
663         }
664         env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
665     }
666 }
667 
set_address(CPUS390XState * env,int reg,uint64_t address)668 static inline void set_address(CPUS390XState *env, int reg, uint64_t address)
669 {
670     if (env->psw.mask & PSW_MASK_64) {
671         /* 64-Bit mode */
672         env->regs[reg] = address;
673     } else {
674         if (!(env->psw.mask & PSW_MASK_32)) {
675             /* 24-Bit mode. According to the PoO it is implementation
676             dependent if bits 32-39 remain unchanged or are set to
677             zeros.  Choose the former so that the function can also be
678             used for TRT.  */
679             env->regs[reg] = deposit64(env->regs[reg], 0, 24, address);
680         } else {
681             /* 31-Bit mode. According to the PoO it is implementation
682             dependent if bit 32 remains unchanged or is set to zero.
683             Choose the latter so that the function can also be used for
684             TRT.  */
685             address &= 0x7fffffff;
686             env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
687         }
688     }
689 }
690 
wrap_length32(CPUS390XState * env,uint64_t length)691 static inline uint64_t wrap_length32(CPUS390XState *env, uint64_t length)
692 {
693     if (!(env->psw.mask & PSW_MASK_64)) {
694         return (uint32_t)length;
695     }
696     return length;
697 }
698 
wrap_length31(CPUS390XState * env,uint64_t length)699 static inline uint64_t wrap_length31(CPUS390XState *env, uint64_t length)
700 {
701     if (!(env->psw.mask & PSW_MASK_64)) {
702         /* 24-Bit and 31-Bit mode */
703         length &= 0x7fffffff;
704     }
705     return length;
706 }
707 
get_length(CPUS390XState * env,int reg)708 static inline uint64_t get_length(CPUS390XState *env, int reg)
709 {
710     return wrap_length31(env, env->regs[reg]);
711 }
712 
set_length(CPUS390XState * env,int reg,uint64_t length)713 static inline void set_length(CPUS390XState *env, int reg, uint64_t length)
714 {
715     if (env->psw.mask & PSW_MASK_64) {
716         /* 64-Bit mode */
717         env->regs[reg] = length;
718     } else {
719         /* 24-Bit and 31-Bit mode */
720         env->regs[reg] = deposit64(env->regs[reg], 0, 32, length);
721     }
722 }
723 
724 /* search string (c is byte to search, r2 is string, r1 end of string) */
HELPER(srst)725 void HELPER(srst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
726 {
727     uintptr_t ra = GETPC();
728     uint64_t end, str;
729     uint32_t len;
730     uint8_t v, c = env->regs[0];
731 
732     /* Bits 32-55 must contain all 0.  */
733     if (env->regs[0] & 0xffffff00u) {
734         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
735     }
736 
737     str = get_address(env, r2);
738     end = get_address(env, r1);
739 
740     /* Lest we fail to service interrupts in a timely manner, limit the
741        amount of work we're willing to do.  For now, let's cap at 8k.  */
742     for (len = 0; len < 0x2000; ++len) {
743         if (str + len == end) {
744             /* Character not found.  R1 & R2 are unmodified.  */
745             env->cc_op = 2;
746             return;
747         }
748         v = cpu_ldub_data_ra(env, str + len, ra);
749         if (v == c) {
750             /* Character found.  Set R1 to the location; R2 is unmodified.  */
751             env->cc_op = 1;
752             set_address(env, r1, str + len);
753             return;
754         }
755     }
756 
757     /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
758     env->cc_op = 3;
759     set_address(env, r2, str + len);
760 }
761 
HELPER(srstu)762 void HELPER(srstu)(CPUS390XState *env, uint32_t r1, uint32_t r2)
763 {
764     uintptr_t ra = GETPC();
765     uint32_t len;
766     uint16_t v, c = env->regs[0];
767     uint64_t end, str, adj_end;
768 
769     /* Bits 32-47 of R0 must be zero.  */
770     if (env->regs[0] & 0xffff0000u) {
771         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
772     }
773 
774     str = get_address(env, r2);
775     end = get_address(env, r1);
776 
777     /* If the LSB of the two addresses differ, use one extra byte.  */
778     adj_end = end + ((str ^ end) & 1);
779 
780     /* Lest we fail to service interrupts in a timely manner, limit the
781        amount of work we're willing to do.  For now, let's cap at 8k.  */
782     for (len = 0; len < 0x2000; len += 2) {
783         if (str + len == adj_end) {
784             /* End of input found.  */
785             env->cc_op = 2;
786             return;
787         }
788         v = cpu_lduw_data_ra(env, str + len, ra);
789         if (v == c) {
790             /* Character found.  Set R1 to the location; R2 is unmodified.  */
791             env->cc_op = 1;
792             set_address(env, r1, str + len);
793             return;
794         }
795     }
796 
797     /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
798     env->cc_op = 3;
799     set_address(env, r2, str + len);
800 }
801 
802 /* unsigned string compare (c is string terminator) */
HELPER(clst)803 uint64_t HELPER(clst)(CPUS390XState *env, uint64_t c, uint64_t s1, uint64_t s2)
804 {
805     uintptr_t ra = GETPC();
806     uint32_t len;
807 
808     c = c & 0xff;
809     s1 = wrap_address(env, s1);
810     s2 = wrap_address(env, s2);
811 
812     /* Lest we fail to service interrupts in a timely manner, limit the
813        amount of work we're willing to do.  For now, let's cap at 8k.  */
814     for (len = 0; len < 0x2000; ++len) {
815         uint8_t v1 = cpu_ldub_data_ra(env, s1 + len, ra);
816         uint8_t v2 = cpu_ldub_data_ra(env, s2 + len, ra);
817         if (v1 == v2) {
818             if (v1 == c) {
819                 /* Equal.  CC=0, and don't advance the registers.  */
820                 env->cc_op = 0;
821                 env->retxl = s2;
822                 return s1;
823             }
824         } else {
825             /* Unequal.  CC={1,2}, and advance the registers.  Note that
826                the terminator need not be zero, but the string that contains
827                the terminator is by definition "low".  */
828             env->cc_op = (v1 == c ? 1 : v2 == c ? 2 : v1 < v2 ? 1 : 2);
829             env->retxl = s2 + len;
830             return s1 + len;
831         }
832     }
833 
834     /* CPU-determined bytes equal; advance the registers.  */
835     env->cc_op = 3;
836     env->retxl = s2 + len;
837     return s1 + len;
838 }
839 
840 /* move page */
HELPER(mvpg)841 uint32_t HELPER(mvpg)(CPUS390XState *env, uint64_t r0, uint64_t r1, uint64_t r2)
842 {
843     const int mmu_idx = cpu_mmu_index(env, false);
844     const bool f = extract64(r0, 11, 1);
845     const bool s = extract64(r0, 10, 1);
846     uintptr_t ra = GETPC();
847     S390Access srca, desta;
848 
849     if ((f && s) || extract64(r0, 12, 4)) {
850         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
851     }
852 
853     r1 = wrap_address(env, r1 & TARGET_PAGE_MASK);
854     r2 = wrap_address(env, r2 & TARGET_PAGE_MASK);
855 
856     /*
857      * TODO:
858      * - Access key handling
859      * - CC-option with surpression of page-translation exceptions
860      * - Store r1/r2 register identifiers at real location 162
861      */
862     srca = access_prepare(env, r2, TARGET_PAGE_SIZE, MMU_DATA_LOAD, mmu_idx,
863                           ra);
864     desta = access_prepare(env, r1, TARGET_PAGE_SIZE, MMU_DATA_STORE, mmu_idx,
865                            ra);
866     access_memmove(env, &desta, &srca, ra);
867     return 0; /* data moved */
868 }
869 
870 /* string copy */
HELPER(mvst)871 uint32_t HELPER(mvst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
872 {
873     const int mmu_idx = cpu_mmu_index(env, false);
874     const uint64_t d = get_address(env, r1);
875     const uint64_t s = get_address(env, r2);
876     const uint8_t c = env->regs[0];
877     const int len = MIN(-(d | TARGET_PAGE_MASK), -(s | TARGET_PAGE_MASK));
878     S390Access srca, desta;
879     uintptr_t ra = GETPC();
880     int i;
881 
882     if (env->regs[0] & 0xffffff00ull) {
883         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
884     }
885 
886     /*
887      * Our access should not exceed single pages, as we must not report access
888      * exceptions exceeding the actually copied range (which we don't know at
889      * this point). We might over-indicate watchpoints within the pages
890      * (if we ever care, we have to limit processing to a single byte).
891      */
892     srca = access_prepare(env, s, len, MMU_DATA_LOAD, mmu_idx, ra);
893     desta = access_prepare(env, d, len, MMU_DATA_STORE, mmu_idx, ra);
894     for (i = 0; i < len; i++) {
895         const uint8_t v = access_get_byte(env, &srca, i, ra);
896 
897         access_set_byte(env, &desta, i, v, ra);
898         if (v == c) {
899             set_address_zero(env, r1, d + i);
900             return 1;
901         }
902     }
903     set_address_zero(env, r1, d + len);
904     set_address_zero(env, r2, s + len);
905     return 3;
906 }
907 
908 /* load access registers r1 to r3 from memory at a2 */
HELPER(lam)909 void HELPER(lam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
910 {
911     uintptr_t ra = GETPC();
912     int i;
913 
914     if (a2 & 0x3) {
915         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
916     }
917 
918     for (i = r1;; i = (i + 1) % 16) {
919         env->aregs[i] = cpu_ldl_data_ra(env, a2, ra);
920         a2 += 4;
921 
922         if (i == r3) {
923             break;
924         }
925     }
926 }
927 
928 /* store access registers r1 to r3 in memory at a2 */
HELPER(stam)929 void HELPER(stam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
930 {
931     uintptr_t ra = GETPC();
932     int i;
933 
934     if (a2 & 0x3) {
935         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
936     }
937 
938     for (i = r1;; i = (i + 1) % 16) {
939         cpu_stl_data_ra(env, a2, env->aregs[i], ra);
940         a2 += 4;
941 
942         if (i == r3) {
943             break;
944         }
945     }
946 }
947 
948 /* move long helper */
do_mvcl(CPUS390XState * env,uint64_t * dest,uint64_t * destlen,uint64_t * src,uint64_t * srclen,uint16_t pad,int wordsize,uintptr_t ra)949 static inline uint32_t do_mvcl(CPUS390XState *env,
950                                uint64_t *dest, uint64_t *destlen,
951                                uint64_t *src, uint64_t *srclen,
952                                uint16_t pad, int wordsize, uintptr_t ra)
953 {
954     const int mmu_idx = cpu_mmu_index(env, false);
955     int len = MIN(*destlen, -(*dest | TARGET_PAGE_MASK));
956     S390Access srca, desta;
957     int i, cc;
958 
959     if (*destlen == *srclen) {
960         cc = 0;
961     } else if (*destlen < *srclen) {
962         cc = 1;
963     } else {
964         cc = 2;
965     }
966 
967     if (!*destlen) {
968         return cc;
969     }
970 
971     /*
972      * Only perform one type of type of operation (move/pad) at a time.
973      * Stay within single pages.
974      */
975     if (*srclen) {
976         /* Copy the src array */
977         len = MIN(MIN(*srclen, -(*src | TARGET_PAGE_MASK)), len);
978         *destlen -= len;
979         *srclen -= len;
980         srca = access_prepare(env, *src, len, MMU_DATA_LOAD, mmu_idx, ra);
981         desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
982         access_memmove(env, &desta, &srca, ra);
983         *src = wrap_address(env, *src + len);
984         *dest = wrap_address(env, *dest + len);
985     } else if (wordsize == 1) {
986         /* Pad the remaining area */
987         *destlen -= len;
988         desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
989         access_memset(env, &desta, pad, ra);
990         *dest = wrap_address(env, *dest + len);
991     } else {
992         desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
993 
994         /* The remaining length selects the padding byte. */
995         for (i = 0; i < len; (*destlen)--, i++) {
996             if (*destlen & 1) {
997                 access_set_byte(env, &desta, i, pad, ra);
998             } else {
999                 access_set_byte(env, &desta, i, pad >> 8, ra);
1000             }
1001         }
1002         *dest = wrap_address(env, *dest + len);
1003     }
1004 
1005     return *destlen ? 3 : cc;
1006 }
1007 
1008 /* move long */
HELPER(mvcl)1009 uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1010 {
1011     const int mmu_idx = cpu_mmu_index(env, false);
1012     uintptr_t ra = GETPC();
1013     uint64_t destlen = env->regs[r1 + 1] & 0xffffff;
1014     uint64_t dest = get_address(env, r1);
1015     uint64_t srclen = env->regs[r2 + 1] & 0xffffff;
1016     uint64_t src = get_address(env, r2);
1017     uint8_t pad = env->regs[r2 + 1] >> 24;
1018     CPUState *cs = env_cpu(env);
1019     S390Access srca, desta;
1020     uint32_t cc, cur_len;
1021 
1022     if (is_destructive_overlap(env, dest, src, MIN(srclen, destlen))) {
1023         cc = 3;
1024     } else if (srclen == destlen) {
1025         cc = 0;
1026     } else if (destlen < srclen) {
1027         cc = 1;
1028     } else {
1029         cc = 2;
1030     }
1031 
1032     /* We might have to zero-out some bits even if there was no action. */
1033     if (unlikely(!destlen || cc == 3)) {
1034         set_address_zero(env, r2, src);
1035         set_address_zero(env, r1, dest);
1036         return cc;
1037     } else if (!srclen) {
1038         set_address_zero(env, r2, src);
1039     }
1040 
1041     /*
1042      * Only perform one type of type of operation (move/pad) in one step.
1043      * Stay within single pages.
1044      */
1045     while (destlen) {
1046         cur_len = MIN(destlen, -(dest | TARGET_PAGE_MASK));
1047         if (!srclen) {
1048             desta = access_prepare(env, dest, cur_len, MMU_DATA_STORE, mmu_idx,
1049                                    ra);
1050             access_memset(env, &desta, pad, ra);
1051         } else {
1052             cur_len = MIN(MIN(srclen, -(src | TARGET_PAGE_MASK)), cur_len);
1053 
1054             srca = access_prepare(env, src, cur_len, MMU_DATA_LOAD, mmu_idx,
1055                                   ra);
1056             desta = access_prepare(env, dest, cur_len, MMU_DATA_STORE, mmu_idx,
1057                                    ra);
1058             access_memmove(env, &desta, &srca, ra);
1059             src = wrap_address(env, src + cur_len);
1060             srclen -= cur_len;
1061             env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, srclen);
1062             set_address_zero(env, r2, src);
1063         }
1064         dest = wrap_address(env, dest + cur_len);
1065         destlen -= cur_len;
1066         env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, destlen);
1067         set_address_zero(env, r1, dest);
1068 
1069         /*
1070          * MVCL is interruptible. Return to the main loop if requested after
1071          * writing back all state to registers. If no interrupt will get
1072          * injected, we'll end up back in this handler and continue processing
1073          * the remaining parts.
1074          */
1075         if (destlen && unlikely(cpu_loop_exit_requested(cs))) {
1076             cpu_loop_exit_restore(cs, ra);
1077         }
1078     }
1079     return cc;
1080 }
1081 
1082 /* move long extended */
HELPER(mvcle)1083 uint32_t HELPER(mvcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1084                        uint32_t r3)
1085 {
1086     uintptr_t ra = GETPC();
1087     uint64_t destlen = get_length(env, r1 + 1);
1088     uint64_t dest = get_address(env, r1);
1089     uint64_t srclen = get_length(env, r3 + 1);
1090     uint64_t src = get_address(env, r3);
1091     uint8_t pad = a2;
1092     uint32_t cc;
1093 
1094     cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 1, ra);
1095 
1096     set_length(env, r1 + 1, destlen);
1097     set_length(env, r3 + 1, srclen);
1098     set_address(env, r1, dest);
1099     set_address(env, r3, src);
1100 
1101     return cc;
1102 }
1103 
1104 /* move long unicode */
HELPER(mvclu)1105 uint32_t HELPER(mvclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1106                        uint32_t r3)
1107 {
1108     uintptr_t ra = GETPC();
1109     uint64_t destlen = get_length(env, r1 + 1);
1110     uint64_t dest = get_address(env, r1);
1111     uint64_t srclen = get_length(env, r3 + 1);
1112     uint64_t src = get_address(env, r3);
1113     uint16_t pad = a2;
1114     uint32_t cc;
1115 
1116     cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 2, ra);
1117 
1118     set_length(env, r1 + 1, destlen);
1119     set_length(env, r3 + 1, srclen);
1120     set_address(env, r1, dest);
1121     set_address(env, r3, src);
1122 
1123     return cc;
1124 }
1125 
1126 /* compare logical long helper */
do_clcl(CPUS390XState * env,uint64_t * src1,uint64_t * src1len,uint64_t * src3,uint64_t * src3len,uint16_t pad,uint64_t limit,int wordsize,uintptr_t ra)1127 static inline uint32_t do_clcl(CPUS390XState *env,
1128                                uint64_t *src1, uint64_t *src1len,
1129                                uint64_t *src3, uint64_t *src3len,
1130                                uint16_t pad, uint64_t limit,
1131                                int wordsize, uintptr_t ra)
1132 {
1133     uint64_t len = MAX(*src1len, *src3len);
1134     uint32_t cc = 0;
1135 
1136     check_alignment(env, *src1len | *src3len, wordsize, ra);
1137 
1138     if (!len) {
1139         return cc;
1140     }
1141 
1142     /* Lest we fail to service interrupts in a timely manner, limit the
1143        amount of work we're willing to do.  */
1144     if (len > limit) {
1145         len = limit;
1146         cc = 3;
1147     }
1148 
1149     for (; len; len -= wordsize) {
1150         uint16_t v1 = pad;
1151         uint16_t v3 = pad;
1152 
1153         if (*src1len) {
1154             v1 = cpu_ldusize_data_ra(env, *src1, wordsize, ra);
1155         }
1156         if (*src3len) {
1157             v3 = cpu_ldusize_data_ra(env, *src3, wordsize, ra);
1158         }
1159 
1160         if (v1 != v3) {
1161             cc = (v1 < v3) ? 1 : 2;
1162             break;
1163         }
1164 
1165         if (*src1len) {
1166             *src1 += wordsize;
1167             *src1len -= wordsize;
1168         }
1169         if (*src3len) {
1170             *src3 += wordsize;
1171             *src3len -= wordsize;
1172         }
1173     }
1174 
1175     return cc;
1176 }
1177 
1178 
1179 /* compare logical long */
HELPER(clcl)1180 uint32_t HELPER(clcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1181 {
1182     uintptr_t ra = GETPC();
1183     uint64_t src1len = extract64(env->regs[r1 + 1], 0, 24);
1184     uint64_t src1 = get_address(env, r1);
1185     uint64_t src3len = extract64(env->regs[r2 + 1], 0, 24);
1186     uint64_t src3 = get_address(env, r2);
1187     uint8_t pad = env->regs[r2 + 1] >> 24;
1188     uint32_t cc;
1189 
1190     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, -1, 1, ra);
1191 
1192     env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, src1len);
1193     env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, src3len);
1194     set_address(env, r1, src1);
1195     set_address(env, r2, src3);
1196 
1197     return cc;
1198 }
1199 
1200 /* compare logical long extended memcompare insn with padding */
HELPER(clcle)1201 uint32_t HELPER(clcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1202                        uint32_t r3)
1203 {
1204     uintptr_t ra = GETPC();
1205     uint64_t src1len = get_length(env, r1 + 1);
1206     uint64_t src1 = get_address(env, r1);
1207     uint64_t src3len = get_length(env, r3 + 1);
1208     uint64_t src3 = get_address(env, r3);
1209     uint8_t pad = a2;
1210     uint32_t cc;
1211 
1212     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x2000, 1, ra);
1213 
1214     set_length(env, r1 + 1, src1len);
1215     set_length(env, r3 + 1, src3len);
1216     set_address(env, r1, src1);
1217     set_address(env, r3, src3);
1218 
1219     return cc;
1220 }
1221 
1222 /* compare logical long unicode memcompare insn with padding */
HELPER(clclu)1223 uint32_t HELPER(clclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1224                        uint32_t r3)
1225 {
1226     uintptr_t ra = GETPC();
1227     uint64_t src1len = get_length(env, r1 + 1);
1228     uint64_t src1 = get_address(env, r1);
1229     uint64_t src3len = get_length(env, r3 + 1);
1230     uint64_t src3 = get_address(env, r3);
1231     uint16_t pad = a2;
1232     uint32_t cc = 0;
1233 
1234     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x1000, 2, ra);
1235 
1236     set_length(env, r1 + 1, src1len);
1237     set_length(env, r3 + 1, src3len);
1238     set_address(env, r1, src1);
1239     set_address(env, r3, src3);
1240 
1241     return cc;
1242 }
1243 
1244 /* checksum */
HELPER(cksm)1245 uint64_t HELPER(cksm)(CPUS390XState *env, uint64_t r1,
1246                       uint64_t src, uint64_t src_len)
1247 {
1248     uintptr_t ra = GETPC();
1249     uint64_t max_len, len;
1250     uint64_t cksm = (uint32_t)r1;
1251 
1252     /* Lest we fail to service interrupts in a timely manner, limit the
1253        amount of work we're willing to do.  For now, let's cap at 8k.  */
1254     max_len = (src_len > 0x2000 ? 0x2000 : src_len);
1255 
1256     /* Process full words as available.  */
1257     for (len = 0; len + 4 <= max_len; len += 4, src += 4) {
1258         cksm += (uint32_t)cpu_ldl_data_ra(env, src, ra);
1259     }
1260 
1261     switch (max_len - len) {
1262     case 1:
1263         cksm += cpu_ldub_data_ra(env, src, ra) << 24;
1264         len += 1;
1265         break;
1266     case 2:
1267         cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1268         len += 2;
1269         break;
1270     case 3:
1271         cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1272         cksm += cpu_ldub_data_ra(env, src + 2, ra) << 8;
1273         len += 3;
1274         break;
1275     }
1276 
1277     /* Fold the carry from the checksum.  Note that we can see carry-out
1278        during folding more than once (but probably not more than twice).  */
1279     while (cksm > 0xffffffffull) {
1280         cksm = (uint32_t)cksm + (cksm >> 32);
1281     }
1282 
1283     /* Indicate whether or not we've processed everything.  */
1284     env->cc_op = (len == src_len ? 0 : 3);
1285 
1286     /* Return both cksm and processed length.  */
1287     env->retxl = cksm;
1288     return len;
1289 }
1290 
HELPER(pack)1291 void HELPER(pack)(CPUS390XState *env, uint32_t len, uint64_t dest, uint64_t src)
1292 {
1293     uintptr_t ra = GETPC();
1294     int len_dest = len >> 4;
1295     int len_src = len & 0xf;
1296     uint8_t b;
1297 
1298     dest += len_dest;
1299     src += len_src;
1300 
1301     /* last byte is special, it only flips the nibbles */
1302     b = cpu_ldub_data_ra(env, src, ra);
1303     cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1304     src--;
1305     len_src--;
1306 
1307     /* now pack every value */
1308     while (len_dest > 0) {
1309         b = 0;
1310 
1311         if (len_src >= 0) {
1312             b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1313             src--;
1314             len_src--;
1315         }
1316         if (len_src >= 0) {
1317             b |= cpu_ldub_data_ra(env, src, ra) << 4;
1318             src--;
1319             len_src--;
1320         }
1321 
1322         len_dest--;
1323         dest--;
1324         cpu_stb_data_ra(env, dest, b, ra);
1325     }
1326 }
1327 
do_pkau(CPUS390XState * env,uint64_t dest,uint64_t src,uint32_t srclen,int ssize,uintptr_t ra)1328 static inline void do_pkau(CPUS390XState *env, uint64_t dest, uint64_t src,
1329                            uint32_t srclen, int ssize, uintptr_t ra)
1330 {
1331     int i;
1332     /* The destination operand is always 16 bytes long.  */
1333     const int destlen = 16;
1334 
1335     /* The operands are processed from right to left.  */
1336     src += srclen - 1;
1337     dest += destlen - 1;
1338 
1339     for (i = 0; i < destlen; i++) {
1340         uint8_t b = 0;
1341 
1342         /* Start with a positive sign */
1343         if (i == 0) {
1344             b = 0xc;
1345         } else if (srclen > ssize) {
1346             b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1347             src -= ssize;
1348             srclen -= ssize;
1349         }
1350 
1351         if (srclen > ssize) {
1352             b |= cpu_ldub_data_ra(env, src, ra) << 4;
1353             src -= ssize;
1354             srclen -= ssize;
1355         }
1356 
1357         cpu_stb_data_ra(env, dest, b, ra);
1358         dest--;
1359     }
1360 }
1361 
1362 
HELPER(pka)1363 void HELPER(pka)(CPUS390XState *env, uint64_t dest, uint64_t src,
1364                  uint32_t srclen)
1365 {
1366     do_pkau(env, dest, src, srclen, 1, GETPC());
1367 }
1368 
HELPER(pku)1369 void HELPER(pku)(CPUS390XState *env, uint64_t dest, uint64_t src,
1370                  uint32_t srclen)
1371 {
1372     do_pkau(env, dest, src, srclen, 2, GETPC());
1373 }
1374 
HELPER(unpk)1375 void HELPER(unpk)(CPUS390XState *env, uint32_t len, uint64_t dest,
1376                   uint64_t src)
1377 {
1378     uintptr_t ra = GETPC();
1379     int len_dest = len >> 4;
1380     int len_src = len & 0xf;
1381     uint8_t b;
1382     int second_nibble = 0;
1383 
1384     dest += len_dest;
1385     src += len_src;
1386 
1387     /* last byte is special, it only flips the nibbles */
1388     b = cpu_ldub_data_ra(env, src, ra);
1389     cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1390     src--;
1391     len_src--;
1392 
1393     /* now pad every nibble with 0xf0 */
1394 
1395     while (len_dest > 0) {
1396         uint8_t cur_byte = 0;
1397 
1398         if (len_src > 0) {
1399             cur_byte = cpu_ldub_data_ra(env, src, ra);
1400         }
1401 
1402         len_dest--;
1403         dest--;
1404 
1405         /* only advance one nibble at a time */
1406         if (second_nibble) {
1407             cur_byte >>= 4;
1408             len_src--;
1409             src--;
1410         }
1411         second_nibble = !second_nibble;
1412 
1413         /* digit */
1414         cur_byte = (cur_byte & 0xf);
1415         /* zone bits */
1416         cur_byte |= 0xf0;
1417 
1418         cpu_stb_data_ra(env, dest, cur_byte, ra);
1419     }
1420 }
1421 
do_unpkau(CPUS390XState * env,uint64_t dest,uint32_t destlen,int dsize,uint64_t src,uintptr_t ra)1422 static inline uint32_t do_unpkau(CPUS390XState *env, uint64_t dest,
1423                                  uint32_t destlen, int dsize, uint64_t src,
1424                                  uintptr_t ra)
1425 {
1426     int i;
1427     uint32_t cc;
1428     uint8_t b;
1429     /* The source operand is always 16 bytes long.  */
1430     const int srclen = 16;
1431 
1432     /* The operands are processed from right to left.  */
1433     src += srclen - 1;
1434     dest += destlen - dsize;
1435 
1436     /* Check for the sign.  */
1437     b = cpu_ldub_data_ra(env, src, ra);
1438     src--;
1439     switch (b & 0xf) {
1440     case 0xa:
1441     case 0xc:
1442     case 0xe ... 0xf:
1443         cc = 0;  /* plus */
1444         break;
1445     case 0xb:
1446     case 0xd:
1447         cc = 1;  /* minus */
1448         break;
1449     default:
1450     case 0x0 ... 0x9:
1451         cc = 3;  /* invalid */
1452         break;
1453     }
1454 
1455     /* Now pad every nibble with 0x30, advancing one nibble at a time. */
1456     for (i = 0; i < destlen; i += dsize) {
1457         if (i == (31 * dsize)) {
1458             /* If length is 32/64 bytes, the leftmost byte is 0. */
1459             b = 0;
1460         } else if (i % (2 * dsize)) {
1461             b = cpu_ldub_data_ra(env, src, ra);
1462             src--;
1463         } else {
1464             b >>= 4;
1465         }
1466         cpu_stsize_data_ra(env, dest, 0x30 + (b & 0xf), dsize, ra);
1467         dest -= dsize;
1468     }
1469 
1470     return cc;
1471 }
1472 
HELPER(unpka)1473 uint32_t HELPER(unpka)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1474                        uint64_t src)
1475 {
1476     return do_unpkau(env, dest, destlen, 1, src, GETPC());
1477 }
1478 
HELPER(unpku)1479 uint32_t HELPER(unpku)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1480                        uint64_t src)
1481 {
1482     return do_unpkau(env, dest, destlen, 2, src, GETPC());
1483 }
1484 
HELPER(tp)1485 uint32_t HELPER(tp)(CPUS390XState *env, uint64_t dest, uint32_t destlen)
1486 {
1487     uintptr_t ra = GETPC();
1488     uint32_t cc = 0;
1489     int i;
1490 
1491     for (i = 0; i < destlen; i++) {
1492         uint8_t b = cpu_ldub_data_ra(env, dest + i, ra);
1493         /* digit */
1494         cc |= (b & 0xf0) > 0x90 ? 2 : 0;
1495 
1496         if (i == (destlen - 1)) {
1497             /* sign */
1498             cc |= (b & 0xf) < 0xa ? 1 : 0;
1499         } else {
1500             /* digit */
1501             cc |= (b & 0xf) > 0x9 ? 2 : 0;
1502         }
1503     }
1504 
1505     return cc;
1506 }
1507 
do_helper_tr(CPUS390XState * env,uint32_t len,uint64_t array,uint64_t trans,uintptr_t ra)1508 static uint32_t do_helper_tr(CPUS390XState *env, uint32_t len, uint64_t array,
1509                              uint64_t trans, uintptr_t ra)
1510 {
1511     uint32_t i;
1512 
1513     for (i = 0; i <= len; i++) {
1514         uint8_t byte = cpu_ldub_data_ra(env, array + i, ra);
1515         uint8_t new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1516         cpu_stb_data_ra(env, array + i, new_byte, ra);
1517     }
1518 
1519     return env->cc_op;
1520 }
1521 
HELPER(tr)1522 void HELPER(tr)(CPUS390XState *env, uint32_t len, uint64_t array,
1523                 uint64_t trans)
1524 {
1525     do_helper_tr(env, len, array, trans, GETPC());
1526 }
1527 
HELPER(tre)1528 uint64_t HELPER(tre)(CPUS390XState *env, uint64_t array,
1529                      uint64_t len, uint64_t trans)
1530 {
1531     uintptr_t ra = GETPC();
1532     uint8_t end = env->regs[0] & 0xff;
1533     uint64_t l = len;
1534     uint64_t i;
1535     uint32_t cc = 0;
1536 
1537     if (!(env->psw.mask & PSW_MASK_64)) {
1538         array &= 0x7fffffff;
1539         l = (uint32_t)l;
1540     }
1541 
1542     /* Lest we fail to service interrupts in a timely manner, limit the
1543        amount of work we're willing to do.  For now, let's cap at 8k.  */
1544     if (l > 0x2000) {
1545         l = 0x2000;
1546         cc = 3;
1547     }
1548 
1549     for (i = 0; i < l; i++) {
1550         uint8_t byte, new_byte;
1551 
1552         byte = cpu_ldub_data_ra(env, array + i, ra);
1553 
1554         if (byte == end) {
1555             cc = 1;
1556             break;
1557         }
1558 
1559         new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1560         cpu_stb_data_ra(env, array + i, new_byte, ra);
1561     }
1562 
1563     env->cc_op = cc;
1564     env->retxl = len - i;
1565     return array + i;
1566 }
1567 
do_helper_trt(CPUS390XState * env,int len,uint64_t array,uint64_t trans,int inc,uintptr_t ra)1568 static inline uint32_t do_helper_trt(CPUS390XState *env, int len,
1569                                      uint64_t array, uint64_t trans,
1570                                      int inc, uintptr_t ra)
1571 {
1572     int i;
1573 
1574     for (i = 0; i <= len; i++) {
1575         uint8_t byte = cpu_ldub_data_ra(env, array + i * inc, ra);
1576         uint8_t sbyte = cpu_ldub_data_ra(env, trans + byte, ra);
1577 
1578         if (sbyte != 0) {
1579             set_address(env, 1, array + i * inc);
1580             env->regs[2] = deposit64(env->regs[2], 0, 8, sbyte);
1581             return (i == len) ? 2 : 1;
1582         }
1583     }
1584 
1585     return 0;
1586 }
1587 
do_helper_trt_fwd(CPUS390XState * env,uint32_t len,uint64_t array,uint64_t trans,uintptr_t ra)1588 static uint32_t do_helper_trt_fwd(CPUS390XState *env, uint32_t len,
1589                                   uint64_t array, uint64_t trans,
1590                                   uintptr_t ra)
1591 {
1592     return do_helper_trt(env, len, array, trans, 1, ra);
1593 }
1594 
HELPER(trt)1595 uint32_t HELPER(trt)(CPUS390XState *env, uint32_t len, uint64_t array,
1596                      uint64_t trans)
1597 {
1598     return do_helper_trt(env, len, array, trans, 1, GETPC());
1599 }
1600 
do_helper_trt_bkwd(CPUS390XState * env,uint32_t len,uint64_t array,uint64_t trans,uintptr_t ra)1601 static uint32_t do_helper_trt_bkwd(CPUS390XState *env, uint32_t len,
1602                                    uint64_t array, uint64_t trans,
1603                                    uintptr_t ra)
1604 {
1605     return do_helper_trt(env, len, array, trans, -1, ra);
1606 }
1607 
HELPER(trtr)1608 uint32_t HELPER(trtr)(CPUS390XState *env, uint32_t len, uint64_t array,
1609                       uint64_t trans)
1610 {
1611     return do_helper_trt(env, len, array, trans, -1, GETPC());
1612 }
1613 
1614 /* Translate one/two to one/two */
HELPER(trXX)1615 uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2,
1616                       uint32_t tst, uint32_t sizes)
1617 {
1618     uintptr_t ra = GETPC();
1619     int dsize = (sizes & 1) ? 1 : 2;
1620     int ssize = (sizes & 2) ? 1 : 2;
1621     uint64_t tbl = get_address(env, 1);
1622     uint64_t dst = get_address(env, r1);
1623     uint64_t len = get_length(env, r1 + 1);
1624     uint64_t src = get_address(env, r2);
1625     uint32_t cc = 3;
1626     int i;
1627 
1628     /* The lower address bits of TBL are ignored.  For TROO, TROT, it's
1629        the low 3 bits (double-word aligned).  For TRTO, TRTT, it's either
1630        the low 12 bits (4K, without ETF2-ENH) or 3 bits (with ETF2-ENH).  */
1631     if (ssize == 2 && !s390_has_feat(S390_FEAT_ETF2_ENH)) {
1632         tbl &= -4096;
1633     } else {
1634         tbl &= -8;
1635     }
1636 
1637     check_alignment(env, len, ssize, ra);
1638 
1639     /* Lest we fail to service interrupts in a timely manner, */
1640     /* limit the amount of work we're willing to do.   */
1641     for (i = 0; i < 0x2000; i++) {
1642         uint16_t sval = cpu_ldusize_data_ra(env, src, ssize, ra);
1643         uint64_t tble = tbl + (sval * dsize);
1644         uint16_t dval = cpu_ldusize_data_ra(env, tble, dsize, ra);
1645         if (dval == tst) {
1646             cc = 1;
1647             break;
1648         }
1649         cpu_stsize_data_ra(env, dst, dval, dsize, ra);
1650 
1651         len -= ssize;
1652         src += ssize;
1653         dst += dsize;
1654 
1655         if (len == 0) {
1656             cc = 0;
1657             break;
1658         }
1659     }
1660 
1661     set_address(env, r1, dst);
1662     set_length(env, r1 + 1, len);
1663     set_address(env, r2, src);
1664 
1665     return cc;
1666 }
1667 
HELPER(cdsg)1668 void HELPER(cdsg)(CPUS390XState *env, uint64_t addr,
1669                   uint32_t r1, uint32_t r3)
1670 {
1671     uintptr_t ra = GETPC();
1672     Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
1673     Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1674     Int128 oldv;
1675     uint64_t oldh, oldl;
1676     bool fail;
1677 
1678     check_alignment(env, addr, 16, ra);
1679 
1680     oldh = cpu_ldq_data_ra(env, addr + 0, ra);
1681     oldl = cpu_ldq_data_ra(env, addr + 8, ra);
1682 
1683     oldv = int128_make128(oldl, oldh);
1684     fail = !int128_eq(oldv, cmpv);
1685     if (fail) {
1686         newv = oldv;
1687     }
1688 
1689     cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra);
1690     cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra);
1691 
1692     env->cc_op = fail;
1693     env->regs[r1] = int128_gethi(oldv);
1694     env->regs[r1 + 1] = int128_getlo(oldv);
1695 }
1696 
HELPER(cdsg_parallel)1697 void HELPER(cdsg_parallel)(CPUS390XState *env, uint64_t addr,
1698                            uint32_t r1, uint32_t r3)
1699 {
1700     uintptr_t ra = GETPC();
1701     Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
1702     Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1703     int mem_idx;
1704     TCGMemOpIdx oi;
1705     Int128 oldv;
1706     bool fail;
1707 
1708     assert(HAVE_CMPXCHG128);
1709 
1710     mem_idx = cpu_mmu_index(env, false);
1711     oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
1712     oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
1713     fail = !int128_eq(oldv, cmpv);
1714 
1715     env->cc_op = fail;
1716     env->regs[r1] = int128_gethi(oldv);
1717     env->regs[r1 + 1] = int128_getlo(oldv);
1718 }
1719 
do_csst(CPUS390XState * env,uint32_t r3,uint64_t a1,uint64_t a2,bool parallel)1720 static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
1721                         uint64_t a2, bool parallel)
1722 {
1723     uint32_t mem_idx = cpu_mmu_index(env, false);
1724     uintptr_t ra = GETPC();
1725     uint32_t fc = extract32(env->regs[0], 0, 8);
1726     uint32_t sc = extract32(env->regs[0], 8, 8);
1727     uint64_t pl = get_address(env, 1) & -16;
1728     uint64_t svh, svl;
1729     uint32_t cc;
1730 
1731     /* Sanity check the function code and storage characteristic.  */
1732     if (fc > 1 || sc > 3) {
1733         if (!s390_has_feat(S390_FEAT_COMPARE_AND_SWAP_AND_STORE_2)) {
1734             goto spec_exception;
1735         }
1736         if (fc > 2 || sc > 4 || (fc == 2 && (r3 & 1))) {
1737             goto spec_exception;
1738         }
1739     }
1740 
1741     /* Sanity check the alignments.  */
1742     if (extract32(a1, 0, fc + 2) || extract32(a2, 0, sc)) {
1743         goto spec_exception;
1744     }
1745 
1746     /* Sanity check writability of the store address.  */
1747     probe_write(env, a2, 1 << sc, mem_idx, ra);
1748 
1749     /*
1750      * Note that the compare-and-swap is atomic, and the store is atomic,
1751      * but the complete operation is not.  Therefore we do not need to
1752      * assert serial context in order to implement this.  That said,
1753      * restart early if we can't support either operation that is supposed
1754      * to be atomic.
1755      */
1756     if (parallel) {
1757         uint32_t max = 2;
1758 #ifdef CONFIG_ATOMIC64
1759         max = 3;
1760 #endif
1761         if ((HAVE_CMPXCHG128 ? 0 : fc + 2 > max) ||
1762             (HAVE_ATOMIC128  ? 0 : sc > max)) {
1763             cpu_loop_exit_atomic(env_cpu(env), ra);
1764         }
1765     }
1766 
1767     /* All loads happen before all stores.  For simplicity, load the entire
1768        store value area from the parameter list.  */
1769     svh = cpu_ldq_data_ra(env, pl + 16, ra);
1770     svl = cpu_ldq_data_ra(env, pl + 24, ra);
1771 
1772     switch (fc) {
1773     case 0:
1774         {
1775             uint32_t nv = cpu_ldl_data_ra(env, pl, ra);
1776             uint32_t cv = env->regs[r3];
1777             uint32_t ov;
1778 
1779             if (parallel) {
1780 #ifdef CONFIG_USER_ONLY
1781                 uint32_t *haddr = g2h(a1);
1782                 ov = atomic_cmpxchg__nocheck(haddr, cv, nv);
1783 #else
1784                 TCGMemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mem_idx);
1785                 ov = helper_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi, ra);
1786 #endif
1787             } else {
1788                 ov = cpu_ldl_data_ra(env, a1, ra);
1789                 cpu_stl_data_ra(env, a1, (ov == cv ? nv : ov), ra);
1790             }
1791             cc = (ov != cv);
1792             env->regs[r3] = deposit64(env->regs[r3], 32, 32, ov);
1793         }
1794         break;
1795 
1796     case 1:
1797         {
1798             uint64_t nv = cpu_ldq_data_ra(env, pl, ra);
1799             uint64_t cv = env->regs[r3];
1800             uint64_t ov;
1801 
1802             if (parallel) {
1803 #ifdef CONFIG_ATOMIC64
1804 # ifdef CONFIG_USER_ONLY
1805                 uint64_t *haddr = g2h(a1);
1806                 ov = atomic_cmpxchg__nocheck(haddr, cv, nv);
1807 # else
1808                 TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN, mem_idx);
1809                 ov = helper_atomic_cmpxchgq_be_mmu(env, a1, cv, nv, oi, ra);
1810 # endif
1811 #else
1812                 /* Note that we asserted !parallel above.  */
1813                 g_assert_not_reached();
1814 #endif
1815             } else {
1816                 ov = cpu_ldq_data_ra(env, a1, ra);
1817                 cpu_stq_data_ra(env, a1, (ov == cv ? nv : ov), ra);
1818             }
1819             cc = (ov != cv);
1820             env->regs[r3] = ov;
1821         }
1822         break;
1823 
1824     case 2:
1825         {
1826             uint64_t nvh = cpu_ldq_data_ra(env, pl, ra);
1827             uint64_t nvl = cpu_ldq_data_ra(env, pl + 8, ra);
1828             Int128 nv = int128_make128(nvl, nvh);
1829             Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1830             Int128 ov;
1831 
1832             if (!parallel) {
1833                 uint64_t oh = cpu_ldq_data_ra(env, a1 + 0, ra);
1834                 uint64_t ol = cpu_ldq_data_ra(env, a1 + 8, ra);
1835 
1836                 ov = int128_make128(ol, oh);
1837                 cc = !int128_eq(ov, cv);
1838                 if (cc) {
1839                     nv = ov;
1840                 }
1841 
1842                 cpu_stq_data_ra(env, a1 + 0, int128_gethi(nv), ra);
1843                 cpu_stq_data_ra(env, a1 + 8, int128_getlo(nv), ra);
1844             } else if (HAVE_CMPXCHG128) {
1845                 TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
1846                 ov = helper_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra);
1847                 cc = !int128_eq(ov, cv);
1848             } else {
1849                 /* Note that we asserted !parallel above.  */
1850                 g_assert_not_reached();
1851             }
1852 
1853             env->regs[r3 + 0] = int128_gethi(ov);
1854             env->regs[r3 + 1] = int128_getlo(ov);
1855         }
1856         break;
1857 
1858     default:
1859         g_assert_not_reached();
1860     }
1861 
1862     /* Store only if the comparison succeeded.  Note that above we use a pair
1863        of 64-bit big-endian loads, so for sc < 3 we must extract the value
1864        from the most-significant bits of svh.  */
1865     if (cc == 0) {
1866         switch (sc) {
1867         case 0:
1868             cpu_stb_data_ra(env, a2, svh >> 56, ra);
1869             break;
1870         case 1:
1871             cpu_stw_data_ra(env, a2, svh >> 48, ra);
1872             break;
1873         case 2:
1874             cpu_stl_data_ra(env, a2, svh >> 32, ra);
1875             break;
1876         case 3:
1877             cpu_stq_data_ra(env, a2, svh, ra);
1878             break;
1879         case 4:
1880             if (!parallel) {
1881                 cpu_stq_data_ra(env, a2 + 0, svh, ra);
1882                 cpu_stq_data_ra(env, a2 + 8, svl, ra);
1883             } else if (HAVE_ATOMIC128) {
1884                 TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
1885                 Int128 sv = int128_make128(svl, svh);
1886                 helper_atomic_sto_be_mmu(env, a2, sv, oi, ra);
1887             } else {
1888                 /* Note that we asserted !parallel above.  */
1889                 g_assert_not_reached();
1890             }
1891             break;
1892         default:
1893             g_assert_not_reached();
1894         }
1895     }
1896 
1897     return cc;
1898 
1899  spec_exception:
1900     tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1901 }
1902 
HELPER(csst)1903 uint32_t HELPER(csst)(CPUS390XState *env, uint32_t r3, uint64_t a1, uint64_t a2)
1904 {
1905     return do_csst(env, r3, a1, a2, false);
1906 }
1907 
HELPER(csst_parallel)1908 uint32_t HELPER(csst_parallel)(CPUS390XState *env, uint32_t r3, uint64_t a1,
1909                                uint64_t a2)
1910 {
1911     return do_csst(env, r3, a1, a2, true);
1912 }
1913 
1914 #if !defined(CONFIG_USER_ONLY)
HELPER(lctlg)1915 void HELPER(lctlg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1916 {
1917     uintptr_t ra = GETPC();
1918     bool PERchanged = false;
1919     uint64_t src = a2;
1920     uint32_t i;
1921 
1922     if (src & 0x7) {
1923         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1924     }
1925 
1926     for (i = r1;; i = (i + 1) % 16) {
1927         uint64_t val = cpu_ldq_data_ra(env, src, ra);
1928         if (env->cregs[i] != val && i >= 9 && i <= 11) {
1929             PERchanged = true;
1930         }
1931         env->cregs[i] = val;
1932         HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%" PRIx64 "\n",
1933                    i, src, val);
1934         src += sizeof(uint64_t);
1935 
1936         if (i == r3) {
1937             break;
1938         }
1939     }
1940 
1941     if (PERchanged && env->psw.mask & PSW_MASK_PER) {
1942         s390_cpu_recompute_watchpoints(env_cpu(env));
1943     }
1944 
1945     tlb_flush(env_cpu(env));
1946 }
1947 
HELPER(lctl)1948 void HELPER(lctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1949 {
1950     uintptr_t ra = GETPC();
1951     bool PERchanged = false;
1952     uint64_t src = a2;
1953     uint32_t i;
1954 
1955     if (src & 0x3) {
1956         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1957     }
1958 
1959     for (i = r1;; i = (i + 1) % 16) {
1960         uint32_t val = cpu_ldl_data_ra(env, src, ra);
1961         if ((uint32_t)env->cregs[i] != val && i >= 9 && i <= 11) {
1962             PERchanged = true;
1963         }
1964         env->cregs[i] = deposit64(env->cregs[i], 0, 32, val);
1965         HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%x\n", i, src, val);
1966         src += sizeof(uint32_t);
1967 
1968         if (i == r3) {
1969             break;
1970         }
1971     }
1972 
1973     if (PERchanged && env->psw.mask & PSW_MASK_PER) {
1974         s390_cpu_recompute_watchpoints(env_cpu(env));
1975     }
1976 
1977     tlb_flush(env_cpu(env));
1978 }
1979 
HELPER(stctg)1980 void HELPER(stctg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1981 {
1982     uintptr_t ra = GETPC();
1983     uint64_t dest = a2;
1984     uint32_t i;
1985 
1986     if (dest & 0x7) {
1987         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1988     }
1989 
1990     for (i = r1;; i = (i + 1) % 16) {
1991         cpu_stq_data_ra(env, dest, env->cregs[i], ra);
1992         dest += sizeof(uint64_t);
1993 
1994         if (i == r3) {
1995             break;
1996         }
1997     }
1998 }
1999 
HELPER(stctl)2000 void HELPER(stctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2001 {
2002     uintptr_t ra = GETPC();
2003     uint64_t dest = a2;
2004     uint32_t i;
2005 
2006     if (dest & 0x3) {
2007         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2008     }
2009 
2010     for (i = r1;; i = (i + 1) % 16) {
2011         cpu_stl_data_ra(env, dest, env->cregs[i], ra);
2012         dest += sizeof(uint32_t);
2013 
2014         if (i == r3) {
2015             break;
2016         }
2017     }
2018 }
2019 
HELPER(testblock)2020 uint32_t HELPER(testblock)(CPUS390XState *env, uint64_t real_addr)
2021 {
2022     uintptr_t ra = GETPC();
2023     int i;
2024 
2025     real_addr = wrap_address(env, real_addr) & TARGET_PAGE_MASK;
2026 
2027     for (i = 0; i < TARGET_PAGE_SIZE; i += 8) {
2028         cpu_stq_real_ra(env, real_addr + i, 0, ra);
2029     }
2030 
2031     return 0;
2032 }
2033 
HELPER(tprot)2034 uint32_t HELPER(tprot)(CPUS390XState *env, uint64_t a1, uint64_t a2)
2035 {
2036     S390CPU *cpu = env_archcpu(env);
2037     CPUState *cs = env_cpu(env);
2038 
2039     /*
2040      * TODO: we currently don't handle all access protection types
2041      * (including access-list and key-controlled) as well as AR mode.
2042      */
2043     if (!s390_cpu_virt_mem_check_write(cpu, a1, 0, 1)) {
2044         /* Fetching permitted; storing permitted */
2045         return 0;
2046     }
2047 
2048     if (env->int_pgm_code == PGM_PROTECTION) {
2049         /* retry if reading is possible */
2050         cs->exception_index = -1;
2051         if (!s390_cpu_virt_mem_check_read(cpu, a1, 0, 1)) {
2052             /* Fetching permitted; storing not permitted */
2053             return 1;
2054         }
2055     }
2056 
2057     switch (env->int_pgm_code) {
2058     case PGM_PROTECTION:
2059         /* Fetching not permitted; storing not permitted */
2060         cs->exception_index = -1;
2061         return 2;
2062     case PGM_ADDRESSING:
2063     case PGM_TRANS_SPEC:
2064         /* exceptions forwarded to the guest */
2065         s390_cpu_virt_mem_handle_exc(cpu, GETPC());
2066         return 0;
2067     }
2068 
2069     /* Translation not available */
2070     cs->exception_index = -1;
2071     return 3;
2072 }
2073 
2074 /* insert storage key extended */
HELPER(iske)2075 uint64_t HELPER(iske)(CPUS390XState *env, uint64_t r2)
2076 {
2077     static S390SKeysState *ss;
2078     static S390SKeysClass *skeyclass;
2079     uint64_t addr = wrap_address(env, r2);
2080     uint8_t key;
2081 
2082     if (addr > ram_size) {
2083         return 0;
2084     }
2085 
2086     if (unlikely(!ss)) {
2087         ss = s390_get_skeys_device();
2088         skeyclass = S390_SKEYS_GET_CLASS(ss);
2089     }
2090 
2091     if (skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key)) {
2092         return 0;
2093     }
2094     return key;
2095 }
2096 
2097 /* set storage key extended */
HELPER(sske)2098 void HELPER(sske)(CPUS390XState *env, uint64_t r1, uint64_t r2)
2099 {
2100     static S390SKeysState *ss;
2101     static S390SKeysClass *skeyclass;
2102     uint64_t addr = wrap_address(env, r2);
2103     uint8_t key;
2104 
2105     if (addr > ram_size) {
2106         return;
2107     }
2108 
2109     if (unlikely(!ss)) {
2110         ss = s390_get_skeys_device();
2111         skeyclass = S390_SKEYS_GET_CLASS(ss);
2112     }
2113 
2114     key = (uint8_t) r1;
2115     skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2116    /*
2117     * As we can only flush by virtual address and not all the entries
2118     * that point to a physical address we have to flush the whole TLB.
2119     */
2120     tlb_flush_all_cpus_synced(env_cpu(env));
2121 }
2122 
2123 /* reset reference bit extended */
HELPER(rrbe)2124 uint32_t HELPER(rrbe)(CPUS390XState *env, uint64_t r2)
2125 {
2126     static S390SKeysState *ss;
2127     static S390SKeysClass *skeyclass;
2128     uint8_t re, key;
2129 
2130     if (r2 > ram_size) {
2131         return 0;
2132     }
2133 
2134     if (unlikely(!ss)) {
2135         ss = s390_get_skeys_device();
2136         skeyclass = S390_SKEYS_GET_CLASS(ss);
2137     }
2138 
2139     if (skeyclass->get_skeys(ss, r2 / TARGET_PAGE_SIZE, 1, &key)) {
2140         return 0;
2141     }
2142 
2143     re = key & (SK_R | SK_C);
2144     key &= ~SK_R;
2145 
2146     if (skeyclass->set_skeys(ss, r2 / TARGET_PAGE_SIZE, 1, &key)) {
2147         return 0;
2148     }
2149    /*
2150     * As we can only flush by virtual address and not all the entries
2151     * that point to a physical address we have to flush the whole TLB.
2152     */
2153     tlb_flush_all_cpus_synced(env_cpu(env));
2154 
2155     /*
2156      * cc
2157      *
2158      * 0  Reference bit zero; change bit zero
2159      * 1  Reference bit zero; change bit one
2160      * 2  Reference bit one; change bit zero
2161      * 3  Reference bit one; change bit one
2162      */
2163 
2164     return re >> 1;
2165 }
2166 
HELPER(mvcs)2167 uint32_t HELPER(mvcs)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2)
2168 {
2169     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2170     S390Access srca, desta;
2171     uintptr_t ra = GETPC();
2172     int cc = 0;
2173 
2174     HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2175                __func__, l, a1, a2);
2176 
2177     if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2178         psw_as == AS_HOME || psw_as == AS_ACCREG) {
2179         s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2180     }
2181 
2182     l = wrap_length32(env, l);
2183     if (l > 256) {
2184         /* max 256 */
2185         l = 256;
2186         cc = 3;
2187     } else if (!l) {
2188         return cc;
2189     }
2190 
2191     /* TODO: Access key handling */
2192     srca = access_prepare(env, a2, l, MMU_DATA_LOAD, MMU_PRIMARY_IDX, ra);
2193     desta = access_prepare(env, a1, l, MMU_DATA_STORE, MMU_SECONDARY_IDX, ra);
2194     access_memmove(env, &desta, &srca, ra);
2195     return cc;
2196 }
2197 
HELPER(mvcp)2198 uint32_t HELPER(mvcp)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2)
2199 {
2200     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2201     S390Access srca, desta;
2202     uintptr_t ra = GETPC();
2203     int cc = 0;
2204 
2205     HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2206                __func__, l, a1, a2);
2207 
2208     if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2209         psw_as == AS_HOME || psw_as == AS_ACCREG) {
2210         s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2211     }
2212 
2213     l = wrap_length32(env, l);
2214     if (l > 256) {
2215         /* max 256 */
2216         l = 256;
2217         cc = 3;
2218     } else if (!l) {
2219         return cc;
2220     }
2221 
2222     /* TODO: Access key handling */
2223     srca = access_prepare(env, a2, l, MMU_DATA_LOAD, MMU_SECONDARY_IDX, ra);
2224     desta = access_prepare(env, a1, l, MMU_DATA_STORE, MMU_PRIMARY_IDX, ra);
2225     access_memmove(env, &desta, &srca, ra);
2226     return cc;
2227 }
2228 
HELPER(idte)2229 void HELPER(idte)(CPUS390XState *env, uint64_t r1, uint64_t r2, uint32_t m4)
2230 {
2231     CPUState *cs = env_cpu(env);
2232     const uintptr_t ra = GETPC();
2233     uint64_t table, entry, raddr;
2234     uint16_t entries, i, index = 0;
2235 
2236     if (r2 & 0xff000) {
2237         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2238     }
2239 
2240     if (!(r2 & 0x800)) {
2241         /* invalidation-and-clearing operation */
2242         table = r1 & ASCE_ORIGIN;
2243         entries = (r2 & 0x7ff) + 1;
2244 
2245         switch (r1 & ASCE_TYPE_MASK) {
2246         case ASCE_TYPE_REGION1:
2247             index = (r2 >> 53) & 0x7ff;
2248             break;
2249         case ASCE_TYPE_REGION2:
2250             index = (r2 >> 42) & 0x7ff;
2251             break;
2252         case ASCE_TYPE_REGION3:
2253             index = (r2 >> 31) & 0x7ff;
2254             break;
2255         case ASCE_TYPE_SEGMENT:
2256             index = (r2 >> 20) & 0x7ff;
2257             break;
2258         }
2259         for (i = 0; i < entries; i++) {
2260             /* addresses are not wrapped in 24/31bit mode but table index is */
2261             raddr = table + ((index + i) & 0x7ff) * sizeof(entry);
2262             entry = cpu_ldq_real_ra(env, raddr, ra);
2263             if (!(entry & REGION_ENTRY_I)) {
2264                 /* we are allowed to not store if already invalid */
2265                 entry |= REGION_ENTRY_I;
2266                 cpu_stq_real_ra(env, raddr, entry, ra);
2267             }
2268         }
2269     }
2270 
2271     /* We simply flush the complete tlb, therefore we can ignore r3. */
2272     if (m4 & 1) {
2273         tlb_flush(cs);
2274     } else {
2275         tlb_flush_all_cpus_synced(cs);
2276     }
2277 }
2278 
2279 /* invalidate pte */
HELPER(ipte)2280 void HELPER(ipte)(CPUS390XState *env, uint64_t pto, uint64_t vaddr,
2281                   uint32_t m4)
2282 {
2283     CPUState *cs = env_cpu(env);
2284     const uintptr_t ra = GETPC();
2285     uint64_t page = vaddr & TARGET_PAGE_MASK;
2286     uint64_t pte_addr, pte;
2287 
2288     /* Compute the page table entry address */
2289     pte_addr = (pto & SEGMENT_ENTRY_ORIGIN);
2290     pte_addr += VADDR_PAGE_TX(vaddr) * 8;
2291 
2292     /* Mark the page table entry as invalid */
2293     pte = cpu_ldq_real_ra(env, pte_addr, ra);
2294     pte |= PAGE_ENTRY_I;
2295     cpu_stq_real_ra(env, pte_addr, pte, ra);
2296 
2297     /* XXX we exploit the fact that Linux passes the exact virtual
2298        address here - it's not obliged to! */
2299     if (m4 & 1) {
2300         if (vaddr & ~VADDR_PAGE_TX_MASK) {
2301             tlb_flush_page(cs, page);
2302             /* XXX 31-bit hack */
2303             tlb_flush_page(cs, page ^ 0x80000000);
2304         } else {
2305             /* looks like we don't have a valid virtual address */
2306             tlb_flush(cs);
2307         }
2308     } else {
2309         if (vaddr & ~VADDR_PAGE_TX_MASK) {
2310             tlb_flush_page_all_cpus_synced(cs, page);
2311             /* XXX 31-bit hack */
2312             tlb_flush_page_all_cpus_synced(cs, page ^ 0x80000000);
2313         } else {
2314             /* looks like we don't have a valid virtual address */
2315             tlb_flush_all_cpus_synced(cs);
2316         }
2317     }
2318 }
2319 
2320 /* flush local tlb */
HELPER(ptlb)2321 void HELPER(ptlb)(CPUS390XState *env)
2322 {
2323     tlb_flush(env_cpu(env));
2324 }
2325 
2326 /* flush global tlb */
HELPER(purge)2327 void HELPER(purge)(CPUS390XState *env)
2328 {
2329     tlb_flush_all_cpus_synced(env_cpu(env));
2330 }
2331 
2332 /* load using real address */
HELPER(lura)2333 uint64_t HELPER(lura)(CPUS390XState *env, uint64_t addr)
2334 {
2335     return cpu_ldl_real_ra(env, wrap_address(env, addr), GETPC());
2336 }
2337 
HELPER(lurag)2338 uint64_t HELPER(lurag)(CPUS390XState *env, uint64_t addr)
2339 {
2340     return cpu_ldq_real_ra(env, wrap_address(env, addr), GETPC());
2341 }
2342 
2343 /* store using real address */
HELPER(stura)2344 void HELPER(stura)(CPUS390XState *env, uint64_t addr, uint64_t v1)
2345 {
2346     cpu_stl_real_ra(env, wrap_address(env, addr), (uint32_t)v1, GETPC());
2347 
2348     if ((env->psw.mask & PSW_MASK_PER) &&
2349         (env->cregs[9] & PER_CR9_EVENT_STORE) &&
2350         (env->cregs[9] & PER_CR9_EVENT_STORE_REAL)) {
2351         /* PSW is saved just before calling the helper.  */
2352         env->per_address = env->psw.addr;
2353         env->per_perc_atmid = PER_CODE_EVENT_STORE_REAL | get_per_atmid(env);
2354     }
2355 }
2356 
HELPER(sturg)2357 void HELPER(sturg)(CPUS390XState *env, uint64_t addr, uint64_t v1)
2358 {
2359     cpu_stq_real_ra(env, wrap_address(env, addr), v1, GETPC());
2360 
2361     if ((env->psw.mask & PSW_MASK_PER) &&
2362         (env->cregs[9] & PER_CR9_EVENT_STORE) &&
2363         (env->cregs[9] & PER_CR9_EVENT_STORE_REAL)) {
2364         /* PSW is saved just before calling the helper.  */
2365         env->per_address = env->psw.addr;
2366         env->per_perc_atmid = PER_CODE_EVENT_STORE_REAL | get_per_atmid(env);
2367     }
2368 }
2369 
2370 /* load real address */
HELPER(lra)2371 uint64_t HELPER(lra)(CPUS390XState *env, uint64_t addr)
2372 {
2373     uint64_t asc = env->psw.mask & PSW_MASK_ASC;
2374     uint64_t ret, tec;
2375     int flags, exc, cc;
2376 
2377     /* XXX incomplete - has more corner cases */
2378     if (!(env->psw.mask & PSW_MASK_64) && (addr >> 32)) {
2379         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, GETPC());
2380     }
2381 
2382     exc = mmu_translate(env, addr, 0, asc, &ret, &flags, &tec);
2383     if (exc) {
2384         cc = 3;
2385         ret = exc | 0x80000000;
2386     } else {
2387         cc = 0;
2388         ret |= addr & ~TARGET_PAGE_MASK;
2389     }
2390 
2391     env->cc_op = cc;
2392     return ret;
2393 }
2394 #endif
2395 
2396 /* load pair from quadword */
HELPER(lpq)2397 uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr)
2398 {
2399     uintptr_t ra = GETPC();
2400     uint64_t hi, lo;
2401 
2402     check_alignment(env, addr, 16, ra);
2403     hi = cpu_ldq_data_ra(env, addr + 0, ra);
2404     lo = cpu_ldq_data_ra(env, addr + 8, ra);
2405 
2406     env->retxl = lo;
2407     return hi;
2408 }
2409 
HELPER(lpq_parallel)2410 uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr)
2411 {
2412     uintptr_t ra = GETPC();
2413     uint64_t hi, lo;
2414     int mem_idx;
2415     TCGMemOpIdx oi;
2416     Int128 v;
2417 
2418     assert(HAVE_ATOMIC128);
2419 
2420     mem_idx = cpu_mmu_index(env, false);
2421     oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
2422     v = helper_atomic_ldo_be_mmu(env, addr, oi, ra);
2423     hi = int128_gethi(v);
2424     lo = int128_getlo(v);
2425 
2426     env->retxl = lo;
2427     return hi;
2428 }
2429 
2430 /* store pair to quadword */
HELPER(stpq)2431 void HELPER(stpq)(CPUS390XState *env, uint64_t addr,
2432                   uint64_t low, uint64_t high)
2433 {
2434     uintptr_t ra = GETPC();
2435 
2436     check_alignment(env, addr, 16, ra);
2437     cpu_stq_data_ra(env, addr + 0, high, ra);
2438     cpu_stq_data_ra(env, addr + 8, low, ra);
2439 }
2440 
HELPER(stpq_parallel)2441 void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr,
2442                            uint64_t low, uint64_t high)
2443 {
2444     uintptr_t ra = GETPC();
2445     int mem_idx;
2446     TCGMemOpIdx oi;
2447     Int128 v;
2448 
2449     assert(HAVE_ATOMIC128);
2450 
2451     mem_idx = cpu_mmu_index(env, false);
2452     oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
2453     v = int128_make128(low, high);
2454     helper_atomic_sto_be_mmu(env, addr, v, oi, ra);
2455 }
2456 
2457 /* Execute instruction.  This instruction executes an insn modified with
2458    the contents of r1.  It does not change the executed instruction in memory;
2459    it does not change the program counter.
2460 
2461    Perform this by recording the modified instruction in env->ex_value.
2462    This will be noticed by cpu_get_tb_cpu_state and thus tb translation.
2463 */
HELPER(ex)2464 void HELPER(ex)(CPUS390XState *env, uint32_t ilen, uint64_t r1, uint64_t addr)
2465 {
2466     uint64_t insn = cpu_lduw_code(env, addr);
2467     uint8_t opc = insn >> 8;
2468 
2469     /* Or in the contents of R1[56:63].  */
2470     insn |= r1 & 0xff;
2471 
2472     /* Load the rest of the instruction.  */
2473     insn <<= 48;
2474     switch (get_ilen(opc)) {
2475     case 2:
2476         break;
2477     case 4:
2478         insn |= (uint64_t)cpu_lduw_code(env, addr + 2) << 32;
2479         break;
2480     case 6:
2481         insn |= (uint64_t)(uint32_t)cpu_ldl_code(env, addr + 2) << 16;
2482         break;
2483     default:
2484         g_assert_not_reached();
2485     }
2486 
2487     /* The very most common cases can be sped up by avoiding a new TB.  */
2488     if ((opc & 0xf0) == 0xd0) {
2489         typedef uint32_t (*dx_helper)(CPUS390XState *, uint32_t, uint64_t,
2490                                       uint64_t, uintptr_t);
2491         static const dx_helper dx[16] = {
2492             [0x0] = do_helper_trt_bkwd,
2493             [0x2] = do_helper_mvc,
2494             [0x4] = do_helper_nc,
2495             [0x5] = do_helper_clc,
2496             [0x6] = do_helper_oc,
2497             [0x7] = do_helper_xc,
2498             [0xc] = do_helper_tr,
2499             [0xd] = do_helper_trt_fwd,
2500         };
2501         dx_helper helper = dx[opc & 0xf];
2502 
2503         if (helper) {
2504             uint32_t l = extract64(insn, 48, 8);
2505             uint32_t b1 = extract64(insn, 44, 4);
2506             uint32_t d1 = extract64(insn, 32, 12);
2507             uint32_t b2 = extract64(insn, 28, 4);
2508             uint32_t d2 = extract64(insn, 16, 12);
2509             uint64_t a1 = wrap_address(env, env->regs[b1] + d1);
2510             uint64_t a2 = wrap_address(env, env->regs[b2] + d2);
2511 
2512             env->cc_op = helper(env, l, a1, a2, 0);
2513             env->psw.addr += ilen;
2514             return;
2515         }
2516     } else if (opc == 0x0a) {
2517         env->int_svc_code = extract64(insn, 48, 8);
2518         env->int_svc_ilen = ilen;
2519         helper_exception(env, EXCP_SVC);
2520         g_assert_not_reached();
2521     }
2522 
2523     /* Record the insn we want to execute as well as the ilen to use
2524        during the execution of the target insn.  This will also ensure
2525        that ex_value is non-zero, which flags that we are in a state
2526        that requires such execution.  */
2527     env->ex_value = insn | ilen;
2528 }
2529 
HELPER(mvcos)2530 uint32_t HELPER(mvcos)(CPUS390XState *env, uint64_t dest, uint64_t src,
2531                        uint64_t len)
2532 {
2533     const uint8_t psw_key = (env->psw.mask & PSW_MASK_KEY) >> PSW_SHIFT_KEY;
2534     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2535     const uint64_t r0 = env->regs[0];
2536     const uintptr_t ra = GETPC();
2537     uint8_t dest_key, dest_as, dest_k, dest_a;
2538     uint8_t src_key, src_as, src_k, src_a;
2539     uint64_t val;
2540     int cc = 0;
2541 
2542     HELPER_LOG("%s dest %" PRIx64 ", src %" PRIx64 ", len %" PRIx64 "\n",
2543                __func__, dest, src, len);
2544 
2545     if (!(env->psw.mask & PSW_MASK_DAT)) {
2546         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2547     }
2548 
2549     /* OAC (operand access control) for the first operand -> dest */
2550     val = (r0 & 0xffff0000ULL) >> 16;
2551     dest_key = (val >> 12) & 0xf;
2552     dest_as = (val >> 6) & 0x3;
2553     dest_k = (val >> 1) & 0x1;
2554     dest_a = val & 0x1;
2555 
2556     /* OAC (operand access control) for the second operand -> src */
2557     val = (r0 & 0x0000ffffULL);
2558     src_key = (val >> 12) & 0xf;
2559     src_as = (val >> 6) & 0x3;
2560     src_k = (val >> 1) & 0x1;
2561     src_a = val & 0x1;
2562 
2563     if (!dest_k) {
2564         dest_key = psw_key;
2565     }
2566     if (!src_k) {
2567         src_key = psw_key;
2568     }
2569     if (!dest_a) {
2570         dest_as = psw_as;
2571     }
2572     if (!src_a) {
2573         src_as = psw_as;
2574     }
2575 
2576     if (dest_a && dest_as == AS_HOME && (env->psw.mask & PSW_MASK_PSTATE)) {
2577         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2578     }
2579     if (!(env->cregs[0] & CR0_SECONDARY) &&
2580         (dest_as == AS_SECONDARY || src_as == AS_SECONDARY)) {
2581         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2582     }
2583     if (!psw_key_valid(env, dest_key) || !psw_key_valid(env, src_key)) {
2584         tcg_s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2585     }
2586 
2587     len = wrap_length32(env, len);
2588     if (len > 4096) {
2589         cc = 3;
2590         len = 4096;
2591     }
2592 
2593     /* FIXME: AR-mode and proper problem state mode (using PSW keys) missing */
2594     if (src_as == AS_ACCREG || dest_as == AS_ACCREG ||
2595         (env->psw.mask & PSW_MASK_PSTATE)) {
2596         qemu_log_mask(LOG_UNIMP, "%s: AR-mode and PSTATE support missing\n",
2597                       __func__);
2598         tcg_s390_program_interrupt(env, PGM_ADDRESSING, ra);
2599     }
2600 
2601     /* FIXME: Access using correct keys and AR-mode */
2602     if (len) {
2603         S390Access srca = access_prepare(env, src, len, MMU_DATA_LOAD,
2604                                          mmu_idx_from_as(src_as), ra);
2605         S390Access desta = access_prepare(env, dest, len, MMU_DATA_STORE,
2606                                           mmu_idx_from_as(dest_as), ra);
2607 
2608         access_memmove(env, &desta, &srca, ra);
2609     }
2610 
2611     return cc;
2612 }
2613 
2614 /* Decode a Unicode character.  A return value < 0 indicates success, storing
2615    the UTF-32 result into OCHAR and the input length into OLEN.  A return
2616    value >= 0 indicates failure, and the CC value to be returned.  */
2617 typedef int (*decode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2618                                  uint64_t ilen, bool enh_check, uintptr_t ra,
2619                                  uint32_t *ochar, uint32_t *olen);
2620 
2621 /* Encode a Unicode character.  A return value < 0 indicates success, storing
2622    the bytes into ADDR and the output length into OLEN.  A return value >= 0
2623    indicates failure, and the CC value to be returned.  */
2624 typedef int (*encode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2625                                  uint64_t ilen, uintptr_t ra, uint32_t c,
2626                                  uint32_t *olen);
2627 
decode_utf8(CPUS390XState * env,uint64_t addr,uint64_t ilen,bool enh_check,uintptr_t ra,uint32_t * ochar,uint32_t * olen)2628 static int decode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2629                        bool enh_check, uintptr_t ra,
2630                        uint32_t *ochar, uint32_t *olen)
2631 {
2632     uint8_t s0, s1, s2, s3;
2633     uint32_t c, l;
2634 
2635     if (ilen < 1) {
2636         return 0;
2637     }
2638     s0 = cpu_ldub_data_ra(env, addr, ra);
2639     if (s0 <= 0x7f) {
2640         /* one byte character */
2641         l = 1;
2642         c = s0;
2643     } else if (s0 <= (enh_check ? 0xc1 : 0xbf)) {
2644         /* invalid character */
2645         return 2;
2646     } else if (s0 <= 0xdf) {
2647         /* two byte character */
2648         l = 2;
2649         if (ilen < 2) {
2650             return 0;
2651         }
2652         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2653         c = s0 & 0x1f;
2654         c = (c << 6) | (s1 & 0x3f);
2655         if (enh_check && (s1 & 0xc0) != 0x80) {
2656             return 2;
2657         }
2658     } else if (s0 <= 0xef) {
2659         /* three byte character */
2660         l = 3;
2661         if (ilen < 3) {
2662             return 0;
2663         }
2664         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2665         s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2666         c = s0 & 0x0f;
2667         c = (c << 6) | (s1 & 0x3f);
2668         c = (c << 6) | (s2 & 0x3f);
2669         /* Fold the byte-by-byte range descriptions in the PoO into
2670            tests against the complete value.  It disallows encodings
2671            that could be smaller, and the UTF-16 surrogates.  */
2672         if (enh_check
2673             && ((s1 & 0xc0) != 0x80
2674                 || (s2 & 0xc0) != 0x80
2675                 || c < 0x1000
2676                 || (c >= 0xd800 && c <= 0xdfff))) {
2677             return 2;
2678         }
2679     } else if (s0 <= (enh_check ? 0xf4 : 0xf7)) {
2680         /* four byte character */
2681         l = 4;
2682         if (ilen < 4) {
2683             return 0;
2684         }
2685         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2686         s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2687         s3 = cpu_ldub_data_ra(env, addr + 3, ra);
2688         c = s0 & 0x07;
2689         c = (c << 6) | (s1 & 0x3f);
2690         c = (c << 6) | (s2 & 0x3f);
2691         c = (c << 6) | (s3 & 0x3f);
2692         /* See above.  */
2693         if (enh_check
2694             && ((s1 & 0xc0) != 0x80
2695                 || (s2 & 0xc0) != 0x80
2696                 || (s3 & 0xc0) != 0x80
2697                 || c < 0x010000
2698                 || c > 0x10ffff)) {
2699             return 2;
2700         }
2701     } else {
2702         /* invalid character */
2703         return 2;
2704     }
2705 
2706     *ochar = c;
2707     *olen = l;
2708     return -1;
2709 }
2710 
decode_utf16(CPUS390XState * env,uint64_t addr,uint64_t ilen,bool enh_check,uintptr_t ra,uint32_t * ochar,uint32_t * olen)2711 static int decode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2712                         bool enh_check, uintptr_t ra,
2713                         uint32_t *ochar, uint32_t *olen)
2714 {
2715     uint16_t s0, s1;
2716     uint32_t c, l;
2717 
2718     if (ilen < 2) {
2719         return 0;
2720     }
2721     s0 = cpu_lduw_data_ra(env, addr, ra);
2722     if ((s0 & 0xfc00) != 0xd800) {
2723         /* one word character */
2724         l = 2;
2725         c = s0;
2726     } else {
2727         /* two word character */
2728         l = 4;
2729         if (ilen < 4) {
2730             return 0;
2731         }
2732         s1 = cpu_lduw_data_ra(env, addr + 2, ra);
2733         c = extract32(s0, 6, 4) + 1;
2734         c = (c << 6) | (s0 & 0x3f);
2735         c = (c << 10) | (s1 & 0x3ff);
2736         if (enh_check && (s1 & 0xfc00) != 0xdc00) {
2737             /* invalid surrogate character */
2738             return 2;
2739         }
2740     }
2741 
2742     *ochar = c;
2743     *olen = l;
2744     return -1;
2745 }
2746 
decode_utf32(CPUS390XState * env,uint64_t addr,uint64_t ilen,bool enh_check,uintptr_t ra,uint32_t * ochar,uint32_t * olen)2747 static int decode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2748                         bool enh_check, uintptr_t ra,
2749                         uint32_t *ochar, uint32_t *olen)
2750 {
2751     uint32_t c;
2752 
2753     if (ilen < 4) {
2754         return 0;
2755     }
2756     c = cpu_ldl_data_ra(env, addr, ra);
2757     if ((c >= 0xd800 && c <= 0xdbff) || c > 0x10ffff) {
2758         /* invalid unicode character */
2759         return 2;
2760     }
2761 
2762     *ochar = c;
2763     *olen = 4;
2764     return -1;
2765 }
2766 
encode_utf8(CPUS390XState * env,uint64_t addr,uint64_t ilen,uintptr_t ra,uint32_t c,uint32_t * olen)2767 static int encode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2768                        uintptr_t ra, uint32_t c, uint32_t *olen)
2769 {
2770     uint8_t d[4];
2771     uint32_t l, i;
2772 
2773     if (c <= 0x7f) {
2774         /* one byte character */
2775         l = 1;
2776         d[0] = c;
2777     } else if (c <= 0x7ff) {
2778         /* two byte character */
2779         l = 2;
2780         d[1] = 0x80 | extract32(c, 0, 6);
2781         d[0] = 0xc0 | extract32(c, 6, 5);
2782     } else if (c <= 0xffff) {
2783         /* three byte character */
2784         l = 3;
2785         d[2] = 0x80 | extract32(c, 0, 6);
2786         d[1] = 0x80 | extract32(c, 6, 6);
2787         d[0] = 0xe0 | extract32(c, 12, 4);
2788     } else {
2789         /* four byte character */
2790         l = 4;
2791         d[3] = 0x80 | extract32(c, 0, 6);
2792         d[2] = 0x80 | extract32(c, 6, 6);
2793         d[1] = 0x80 | extract32(c, 12, 6);
2794         d[0] = 0xf0 | extract32(c, 18, 3);
2795     }
2796 
2797     if (ilen < l) {
2798         return 1;
2799     }
2800     for (i = 0; i < l; ++i) {
2801         cpu_stb_data_ra(env, addr + i, d[i], ra);
2802     }
2803 
2804     *olen = l;
2805     return -1;
2806 }
2807 
encode_utf16(CPUS390XState * env,uint64_t addr,uint64_t ilen,uintptr_t ra,uint32_t c,uint32_t * olen)2808 static int encode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2809                         uintptr_t ra, uint32_t c, uint32_t *olen)
2810 {
2811     uint16_t d0, d1;
2812 
2813     if (c <= 0xffff) {
2814         /* one word character */
2815         if (ilen < 2) {
2816             return 1;
2817         }
2818         cpu_stw_data_ra(env, addr, c, ra);
2819         *olen = 2;
2820     } else {
2821         /* two word character */
2822         if (ilen < 4) {
2823             return 1;
2824         }
2825         d1 = 0xdc00 | extract32(c, 0, 10);
2826         d0 = 0xd800 | extract32(c, 10, 6);
2827         d0 = deposit32(d0, 6, 4, extract32(c, 16, 5) - 1);
2828         cpu_stw_data_ra(env, addr + 0, d0, ra);
2829         cpu_stw_data_ra(env, addr + 2, d1, ra);
2830         *olen = 4;
2831     }
2832 
2833     return -1;
2834 }
2835 
encode_utf32(CPUS390XState * env,uint64_t addr,uint64_t ilen,uintptr_t ra,uint32_t c,uint32_t * olen)2836 static int encode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2837                         uintptr_t ra, uint32_t c, uint32_t *olen)
2838 {
2839     if (ilen < 4) {
2840         return 1;
2841     }
2842     cpu_stl_data_ra(env, addr, c, ra);
2843     *olen = 4;
2844     return -1;
2845 }
2846 
convert_unicode(CPUS390XState * env,uint32_t r1,uint32_t r2,uint32_t m3,uintptr_t ra,decode_unicode_fn decode,encode_unicode_fn encode)2847 static inline uint32_t convert_unicode(CPUS390XState *env, uint32_t r1,
2848                                        uint32_t r2, uint32_t m3, uintptr_t ra,
2849                                        decode_unicode_fn decode,
2850                                        encode_unicode_fn encode)
2851 {
2852     uint64_t dst = get_address(env, r1);
2853     uint64_t dlen = get_length(env, r1 + 1);
2854     uint64_t src = get_address(env, r2);
2855     uint64_t slen = get_length(env, r2 + 1);
2856     bool enh_check = m3 & 1;
2857     int cc, i;
2858 
2859     /* Lest we fail to service interrupts in a timely manner, limit the
2860        amount of work we're willing to do.  For now, let's cap at 256.  */
2861     for (i = 0; i < 256; ++i) {
2862         uint32_t c, ilen, olen;
2863 
2864         cc = decode(env, src, slen, enh_check, ra, &c, &ilen);
2865         if (unlikely(cc >= 0)) {
2866             break;
2867         }
2868         cc = encode(env, dst, dlen, ra, c, &olen);
2869         if (unlikely(cc >= 0)) {
2870             break;
2871         }
2872 
2873         src += ilen;
2874         slen -= ilen;
2875         dst += olen;
2876         dlen -= olen;
2877         cc = 3;
2878     }
2879 
2880     set_address(env, r1, dst);
2881     set_length(env, r1 + 1, dlen);
2882     set_address(env, r2, src);
2883     set_length(env, r2 + 1, slen);
2884 
2885     return cc;
2886 }
2887 
HELPER(cu12)2888 uint32_t HELPER(cu12)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2889 {
2890     return convert_unicode(env, r1, r2, m3, GETPC(),
2891                            decode_utf8, encode_utf16);
2892 }
2893 
HELPER(cu14)2894 uint32_t HELPER(cu14)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2895 {
2896     return convert_unicode(env, r1, r2, m3, GETPC(),
2897                            decode_utf8, encode_utf32);
2898 }
2899 
HELPER(cu21)2900 uint32_t HELPER(cu21)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2901 {
2902     return convert_unicode(env, r1, r2, m3, GETPC(),
2903                            decode_utf16, encode_utf8);
2904 }
2905 
HELPER(cu24)2906 uint32_t HELPER(cu24)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2907 {
2908     return convert_unicode(env, r1, r2, m3, GETPC(),
2909                            decode_utf16, encode_utf32);
2910 }
2911 
HELPER(cu41)2912 uint32_t HELPER(cu41)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2913 {
2914     return convert_unicode(env, r1, r2, m3, GETPC(),
2915                            decode_utf32, encode_utf8);
2916 }
2917 
HELPER(cu42)2918 uint32_t HELPER(cu42)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2919 {
2920     return convert_unicode(env, r1, r2, m3, GETPC(),
2921                            decode_utf32, encode_utf16);
2922 }
2923 
probe_write_access(CPUS390XState * env,uint64_t addr,uint64_t len,uintptr_t ra)2924 void probe_write_access(CPUS390XState *env, uint64_t addr, uint64_t len,
2925                         uintptr_t ra)
2926 {
2927     /* test the actual access, not just any access to the page due to LAP */
2928     while (len) {
2929         const uint64_t pagelen = -(addr | TARGET_PAGE_MASK);
2930         const uint64_t curlen = MIN(pagelen, len);
2931 
2932         probe_write(env, addr, curlen, cpu_mmu_index(env, false), ra);
2933         addr = wrap_address(env, addr + curlen);
2934         len -= curlen;
2935     }
2936 }
2937 
HELPER(probe_write_access)2938 void HELPER(probe_write_access)(CPUS390XState *env, uint64_t addr, uint64_t len)
2939 {
2940     probe_write_access(env, addr, len, GETPC());
2941 }
2942