1 /*
2 * S/390 memory access helper routines
3 *
4 * Copyright (c) 2009 Ulrich Hecht
5 * Copyright (c) 2009 Alexander Graf
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 */
20
21 #include "qemu/osdep.h"
22 #include "cpu.h"
23 #include "s390x-internal.h"
24 #include "tcg_s390x.h"
25 #include "exec/helper-proto.h"
26 #include "exec/exec-all.h"
27 #include "exec/cpu_ldst.h"
28 #include "qemu/int128.h"
29 #include "qemu/atomic128.h"
30 #include "trace.h"
31
32 #if !defined(CONFIG_USER_ONLY)
33 #include "hw/s390x/storage-keys.h"
34 #include "hw/boards.h"
35 #endif
36
37 /*****************************************************************************/
38 /* Softmmu support */
39
40 /* #define DEBUG_HELPER */
41 #ifdef DEBUG_HELPER
42 #define HELPER_LOG(x...) qemu_log(x)
43 #else
44 #define HELPER_LOG(x...)
45 #endif
46
psw_key_valid(CPUS390XState * env,uint8_t psw_key)47 static inline bool psw_key_valid(CPUS390XState *env, uint8_t psw_key)
48 {
49 uint16_t pkm = env->cregs[3] >> 16;
50
51 if (env->psw.mask & PSW_MASK_PSTATE) {
52 /* PSW key has range 0..15, it is valid if the bit is 1 in the PKM */
53 return pkm & (0x80 >> psw_key);
54 }
55 return true;
56 }
57
is_destructive_overlap(CPUS390XState * env,uint64_t dest,uint64_t src,uint32_t len)58 static bool is_destructive_overlap(CPUS390XState *env, uint64_t dest,
59 uint64_t src, uint32_t len)
60 {
61 if (!len || src == dest) {
62 return false;
63 }
64 /* Take care of wrapping at the end of address space. */
65 if (unlikely(wrap_address(env, src + len - 1) < src)) {
66 return dest > src || dest <= wrap_address(env, src + len - 1);
67 }
68 return dest > src && dest <= src + len - 1;
69 }
70
71 /* Trigger a SPECIFICATION exception if an address or a length is not
72 naturally aligned. */
check_alignment(CPUS390XState * env,uint64_t v,int wordsize,uintptr_t ra)73 static inline void check_alignment(CPUS390XState *env, uint64_t v,
74 int wordsize, uintptr_t ra)
75 {
76 if (v % wordsize) {
77 tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
78 }
79 }
80
81 /* Load a value from memory according to its size. */
cpu_ldusize_data_ra(CPUS390XState * env,uint64_t addr,int wordsize,uintptr_t ra)82 static inline uint64_t cpu_ldusize_data_ra(CPUS390XState *env, uint64_t addr,
83 int wordsize, uintptr_t ra)
84 {
85 switch (wordsize) {
86 case 1:
87 return cpu_ldub_data_ra(env, addr, ra);
88 case 2:
89 return cpu_lduw_data_ra(env, addr, ra);
90 default:
91 abort();
92 }
93 }
94
95 /* Store a to memory according to its size. */
cpu_stsize_data_ra(CPUS390XState * env,uint64_t addr,uint64_t value,int wordsize,uintptr_t ra)96 static inline void cpu_stsize_data_ra(CPUS390XState *env, uint64_t addr,
97 uint64_t value, int wordsize,
98 uintptr_t ra)
99 {
100 switch (wordsize) {
101 case 1:
102 cpu_stb_data_ra(env, addr, value, ra);
103 break;
104 case 2:
105 cpu_stw_data_ra(env, addr, value, ra);
106 break;
107 default:
108 abort();
109 }
110 }
111
112 /* An access covers at most 4096 bytes and therefore at most two pages. */
113 typedef struct S390Access {
114 target_ulong vaddr1;
115 target_ulong vaddr2;
116 char *haddr1;
117 char *haddr2;
118 uint16_t size1;
119 uint16_t size2;
120 /*
121 * If we can't access the host page directly, we'll have to do I/O access
122 * via ld/st helpers. These are internal details, so we store the
123 * mmu idx to do the access here instead of passing it around in the
124 * helpers. Maybe, one day we can get rid of ld/st access - once we can
125 * handle TLB_NOTDIRTY differently. We don't expect these special accesses
126 * to trigger exceptions - only if we would have TLB_NOTDIRTY on LAP
127 * pages, we might trigger a new MMU translation - very unlikely that
128 * the mapping changes in between and we would trigger a fault.
129 */
130 int mmu_idx;
131 } S390Access;
132
133 /*
134 * With nonfault=1, return the PGM_ exception that would have been injected
135 * into the guest; return 0 if no exception was detected.
136 *
137 * For !CONFIG_USER_ONLY, the TEC is stored stored to env->tlb_fill_tec.
138 * For CONFIG_USER_ONLY, the faulting address is stored to env->__excp_addr.
139 */
s390_probe_access(CPUArchState * env,target_ulong addr,int size,MMUAccessType access_type,int mmu_idx,bool nonfault,void ** phost,uintptr_t ra)140 static int s390_probe_access(CPUArchState *env, target_ulong addr, int size,
141 MMUAccessType access_type, int mmu_idx,
142 bool nonfault, void **phost, uintptr_t ra)
143 {
144 #if defined(CONFIG_USER_ONLY)
145 return probe_access_flags(env, addr, access_type, mmu_idx,
146 nonfault, phost, ra);
147 #else
148 int flags;
149
150 /*
151 * For !CONFIG_USER_ONLY, we cannot rely on TLB_INVALID_MASK or haddr==NULL
152 * to detect if there was an exception during tlb_fill().
153 */
154 env->tlb_fill_exc = 0;
155 flags = probe_access_flags(env, addr, access_type, mmu_idx, nonfault, phost,
156 ra);
157 if (env->tlb_fill_exc) {
158 return env->tlb_fill_exc;
159 }
160
161 if (unlikely(flags & TLB_WATCHPOINT)) {
162 /* S390 does not presently use transaction attributes. */
163 cpu_check_watchpoint(env_cpu(env), addr, size,
164 MEMTXATTRS_UNSPECIFIED,
165 (access_type == MMU_DATA_STORE
166 ? BP_MEM_WRITE : BP_MEM_READ), ra);
167 }
168 return 0;
169 #endif
170 }
171
access_prepare_nf(S390Access * access,CPUS390XState * env,bool nonfault,vaddr vaddr1,int size,MMUAccessType access_type,int mmu_idx,uintptr_t ra)172 static int access_prepare_nf(S390Access *access, CPUS390XState *env,
173 bool nonfault, vaddr vaddr1, int size,
174 MMUAccessType access_type,
175 int mmu_idx, uintptr_t ra)
176 {
177 void *haddr1, *haddr2 = NULL;
178 int size1, size2, exc;
179 vaddr vaddr2 = 0;
180
181 assert(size > 0 && size <= 4096);
182
183 size1 = MIN(size, -(vaddr1 | TARGET_PAGE_MASK)),
184 size2 = size - size1;
185
186 exc = s390_probe_access(env, vaddr1, size1, access_type, mmu_idx, nonfault,
187 &haddr1, ra);
188 if (exc) {
189 return exc;
190 }
191 if (unlikely(size2)) {
192 /* The access crosses page boundaries. */
193 vaddr2 = wrap_address(env, vaddr1 + size1);
194 exc = s390_probe_access(env, vaddr2, size2, access_type, mmu_idx,
195 nonfault, &haddr2, ra);
196 if (exc) {
197 return exc;
198 }
199 }
200
201 *access = (S390Access) {
202 .vaddr1 = vaddr1,
203 .vaddr2 = vaddr2,
204 .haddr1 = haddr1,
205 .haddr2 = haddr2,
206 .size1 = size1,
207 .size2 = size2,
208 .mmu_idx = mmu_idx
209 };
210 return 0;
211 }
212
access_prepare(CPUS390XState * env,vaddr vaddr,int size,MMUAccessType access_type,int mmu_idx,uintptr_t ra)213 static S390Access access_prepare(CPUS390XState *env, vaddr vaddr, int size,
214 MMUAccessType access_type, int mmu_idx,
215 uintptr_t ra)
216 {
217 S390Access ret;
218 int exc = access_prepare_nf(&ret, env, false, vaddr, size,
219 access_type, mmu_idx, ra);
220 assert(!exc);
221 return ret;
222 }
223
224 /* Helper to handle memset on a single page. */
do_access_memset(CPUS390XState * env,vaddr vaddr,char * haddr,uint8_t byte,uint16_t size,int mmu_idx,uintptr_t ra)225 static void do_access_memset(CPUS390XState *env, vaddr vaddr, char *haddr,
226 uint8_t byte, uint16_t size, int mmu_idx,
227 uintptr_t ra)
228 {
229 #ifdef CONFIG_USER_ONLY
230 g_assert(haddr);
231 memset(haddr, byte, size);
232 #else
233 MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
234 int i;
235
236 if (likely(haddr)) {
237 memset(haddr, byte, size);
238 } else {
239 /*
240 * Do a single access and test if we can then get access to the
241 * page. This is especially relevant to speed up TLB_NOTDIRTY.
242 */
243 g_assert(size > 0);
244 cpu_stb_mmu(env, vaddr, byte, oi, ra);
245 haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
246 if (likely(haddr)) {
247 memset(haddr + 1, byte, size - 1);
248 } else {
249 for (i = 1; i < size; i++) {
250 cpu_stb_mmu(env, vaddr + i, byte, oi, ra);
251 }
252 }
253 }
254 #endif
255 }
256
access_memset(CPUS390XState * env,S390Access * desta,uint8_t byte,uintptr_t ra)257 static void access_memset(CPUS390XState *env, S390Access *desta,
258 uint8_t byte, uintptr_t ra)
259 {
260
261 do_access_memset(env, desta->vaddr1, desta->haddr1, byte, desta->size1,
262 desta->mmu_idx, ra);
263 if (likely(!desta->size2)) {
264 return;
265 }
266 do_access_memset(env, desta->vaddr2, desta->haddr2, byte, desta->size2,
267 desta->mmu_idx, ra);
268 }
269
do_access_get_byte(CPUS390XState * env,vaddr vaddr,char ** haddr,int offset,int mmu_idx,uintptr_t ra)270 static uint8_t do_access_get_byte(CPUS390XState *env, vaddr vaddr, char **haddr,
271 int offset, int mmu_idx, uintptr_t ra)
272 {
273 #ifdef CONFIG_USER_ONLY
274 return ldub_p(*haddr + offset);
275 #else
276 MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
277 uint8_t byte;
278
279 if (likely(*haddr)) {
280 return ldub_p(*haddr + offset);
281 }
282 /*
283 * Do a single access and test if we can then get access to the
284 * page. This is especially relevant to speed up TLB_NOTDIRTY.
285 */
286 byte = cpu_ldb_mmu(env, vaddr + offset, oi, ra);
287 *haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_LOAD, mmu_idx);
288 return byte;
289 #endif
290 }
291
access_get_byte(CPUS390XState * env,S390Access * access,int offset,uintptr_t ra)292 static uint8_t access_get_byte(CPUS390XState *env, S390Access *access,
293 int offset, uintptr_t ra)
294 {
295 if (offset < access->size1) {
296 return do_access_get_byte(env, access->vaddr1, &access->haddr1,
297 offset, access->mmu_idx, ra);
298 }
299 return do_access_get_byte(env, access->vaddr2, &access->haddr2,
300 offset - access->size1, access->mmu_idx, ra);
301 }
302
do_access_set_byte(CPUS390XState * env,vaddr vaddr,char ** haddr,int offset,uint8_t byte,int mmu_idx,uintptr_t ra)303 static void do_access_set_byte(CPUS390XState *env, vaddr vaddr, char **haddr,
304 int offset, uint8_t byte, int mmu_idx,
305 uintptr_t ra)
306 {
307 #ifdef CONFIG_USER_ONLY
308 stb_p(*haddr + offset, byte);
309 #else
310 MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
311
312 if (likely(*haddr)) {
313 stb_p(*haddr + offset, byte);
314 return;
315 }
316 /*
317 * Do a single access and test if we can then get access to the
318 * page. This is especially relevant to speed up TLB_NOTDIRTY.
319 */
320 cpu_stb_mmu(env, vaddr + offset, byte, oi, ra);
321 *haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
322 #endif
323 }
324
access_set_byte(CPUS390XState * env,S390Access * access,int offset,uint8_t byte,uintptr_t ra)325 static void access_set_byte(CPUS390XState *env, S390Access *access,
326 int offset, uint8_t byte, uintptr_t ra)
327 {
328 if (offset < access->size1) {
329 do_access_set_byte(env, access->vaddr1, &access->haddr1, offset, byte,
330 access->mmu_idx, ra);
331 } else {
332 do_access_set_byte(env, access->vaddr2, &access->haddr2,
333 offset - access->size1, byte, access->mmu_idx, ra);
334 }
335 }
336
337 /*
338 * Move data with the same semantics as memmove() in case ranges don't overlap
339 * or src > dest. Undefined behavior on destructive overlaps.
340 */
access_memmove(CPUS390XState * env,S390Access * desta,S390Access * srca,uintptr_t ra)341 static void access_memmove(CPUS390XState *env, S390Access *desta,
342 S390Access *srca, uintptr_t ra)
343 {
344 int diff;
345
346 g_assert(desta->size1 + desta->size2 == srca->size1 + srca->size2);
347
348 /* Fallback to slow access in case we don't have access to all host pages */
349 if (unlikely(!desta->haddr1 || (desta->size2 && !desta->haddr2) ||
350 !srca->haddr1 || (srca->size2 && !srca->haddr2))) {
351 int i;
352
353 for (i = 0; i < desta->size1 + desta->size2; i++) {
354 uint8_t byte = access_get_byte(env, srca, i, ra);
355
356 access_set_byte(env, desta, i, byte, ra);
357 }
358 return;
359 }
360
361 if (srca->size1 == desta->size1) {
362 memmove(desta->haddr1, srca->haddr1, srca->size1);
363 if (unlikely(srca->size2)) {
364 memmove(desta->haddr2, srca->haddr2, srca->size2);
365 }
366 } else if (srca->size1 < desta->size1) {
367 diff = desta->size1 - srca->size1;
368 memmove(desta->haddr1, srca->haddr1, srca->size1);
369 memmove(desta->haddr1 + srca->size1, srca->haddr2, diff);
370 if (likely(desta->size2)) {
371 memmove(desta->haddr2, srca->haddr2 + diff, desta->size2);
372 }
373 } else {
374 diff = srca->size1 - desta->size1;
375 memmove(desta->haddr1, srca->haddr1, desta->size1);
376 memmove(desta->haddr2, srca->haddr1 + desta->size1, diff);
377 if (likely(srca->size2)) {
378 memmove(desta->haddr2 + diff, srca->haddr2, srca->size2);
379 }
380 }
381 }
382
mmu_idx_from_as(uint8_t as)383 static int mmu_idx_from_as(uint8_t as)
384 {
385 switch (as) {
386 case AS_PRIMARY:
387 return MMU_PRIMARY_IDX;
388 case AS_SECONDARY:
389 return MMU_SECONDARY_IDX;
390 case AS_HOME:
391 return MMU_HOME_IDX;
392 default:
393 /* FIXME AS_ACCREG */
394 g_assert_not_reached();
395 }
396 }
397
398 /* and on array */
do_helper_nc(CPUS390XState * env,uint32_t l,uint64_t dest,uint64_t src,uintptr_t ra)399 static uint32_t do_helper_nc(CPUS390XState *env, uint32_t l, uint64_t dest,
400 uint64_t src, uintptr_t ra)
401 {
402 const int mmu_idx = cpu_mmu_index(env, false);
403 S390Access srca1, srca2, desta;
404 uint32_t i;
405 uint8_t c = 0;
406
407 HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
408 __func__, l, dest, src);
409
410 /* NC always processes one more byte than specified - maximum is 256 */
411 l++;
412
413 srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
414 srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
415 desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
416 for (i = 0; i < l; i++) {
417 const uint8_t x = access_get_byte(env, &srca1, i, ra) &
418 access_get_byte(env, &srca2, i, ra);
419
420 c |= x;
421 access_set_byte(env, &desta, i, x, ra);
422 }
423 return c != 0;
424 }
425
HELPER(nc)426 uint32_t HELPER(nc)(CPUS390XState *env, uint32_t l, uint64_t dest,
427 uint64_t src)
428 {
429 return do_helper_nc(env, l, dest, src, GETPC());
430 }
431
432 /* xor on array */
do_helper_xc(CPUS390XState * env,uint32_t l,uint64_t dest,uint64_t src,uintptr_t ra)433 static uint32_t do_helper_xc(CPUS390XState *env, uint32_t l, uint64_t dest,
434 uint64_t src, uintptr_t ra)
435 {
436 const int mmu_idx = cpu_mmu_index(env, false);
437 S390Access srca1, srca2, desta;
438 uint32_t i;
439 uint8_t c = 0;
440
441 HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
442 __func__, l, dest, src);
443
444 /* XC always processes one more byte than specified - maximum is 256 */
445 l++;
446
447 srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
448 srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
449 desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
450
451 /* xor with itself is the same as memset(0) */
452 if (src == dest) {
453 access_memset(env, &desta, 0, ra);
454 return 0;
455 }
456
457 for (i = 0; i < l; i++) {
458 const uint8_t x = access_get_byte(env, &srca1, i, ra) ^
459 access_get_byte(env, &srca2, i, ra);
460
461 c |= x;
462 access_set_byte(env, &desta, i, x, ra);
463 }
464 return c != 0;
465 }
466
HELPER(xc)467 uint32_t HELPER(xc)(CPUS390XState *env, uint32_t l, uint64_t dest,
468 uint64_t src)
469 {
470 return do_helper_xc(env, l, dest, src, GETPC());
471 }
472
473 /* or on array */
do_helper_oc(CPUS390XState * env,uint32_t l,uint64_t dest,uint64_t src,uintptr_t ra)474 static uint32_t do_helper_oc(CPUS390XState *env, uint32_t l, uint64_t dest,
475 uint64_t src, uintptr_t ra)
476 {
477 const int mmu_idx = cpu_mmu_index(env, false);
478 S390Access srca1, srca2, desta;
479 uint32_t i;
480 uint8_t c = 0;
481
482 HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
483 __func__, l, dest, src);
484
485 /* OC always processes one more byte than specified - maximum is 256 */
486 l++;
487
488 srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
489 srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
490 desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
491 for (i = 0; i < l; i++) {
492 const uint8_t x = access_get_byte(env, &srca1, i, ra) |
493 access_get_byte(env, &srca2, i, ra);
494
495 c |= x;
496 access_set_byte(env, &desta, i, x, ra);
497 }
498 return c != 0;
499 }
500
HELPER(oc)501 uint32_t HELPER(oc)(CPUS390XState *env, uint32_t l, uint64_t dest,
502 uint64_t src)
503 {
504 return do_helper_oc(env, l, dest, src, GETPC());
505 }
506
507 /* memmove */
do_helper_mvc(CPUS390XState * env,uint32_t l,uint64_t dest,uint64_t src,uintptr_t ra)508 static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest,
509 uint64_t src, uintptr_t ra)
510 {
511 const int mmu_idx = cpu_mmu_index(env, false);
512 S390Access srca, desta;
513 uint32_t i;
514
515 HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
516 __func__, l, dest, src);
517
518 /* MVC always copies one more byte than specified - maximum is 256 */
519 l++;
520
521 srca = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
522 desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
523
524 /*
525 * "When the operands overlap, the result is obtained as if the operands
526 * were processed one byte at a time". Only non-destructive overlaps
527 * behave like memmove().
528 */
529 if (dest == src + 1) {
530 access_memset(env, &desta, access_get_byte(env, &srca, 0, ra), ra);
531 } else if (!is_destructive_overlap(env, dest, src, l)) {
532 access_memmove(env, &desta, &srca, ra);
533 } else {
534 for (i = 0; i < l; i++) {
535 uint8_t byte = access_get_byte(env, &srca, i, ra);
536
537 access_set_byte(env, &desta, i, byte, ra);
538 }
539 }
540
541 return env->cc_op;
542 }
543
HELPER(mvc)544 void HELPER(mvc)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
545 {
546 do_helper_mvc(env, l, dest, src, GETPC());
547 }
548
549 /* move inverse */
HELPER(mvcin)550 void HELPER(mvcin)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
551 {
552 const int mmu_idx = cpu_mmu_index(env, false);
553 S390Access srca, desta;
554 uintptr_t ra = GETPC();
555 int i;
556
557 /* MVCIN always copies one more byte than specified - maximum is 256 */
558 l++;
559
560 src = wrap_address(env, src - l + 1);
561 srca = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
562 desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
563 for (i = 0; i < l; i++) {
564 const uint8_t x = access_get_byte(env, &srca, l - i - 1, ra);
565
566 access_set_byte(env, &desta, i, x, ra);
567 }
568 }
569
570 /* move numerics */
HELPER(mvn)571 void HELPER(mvn)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
572 {
573 const int mmu_idx = cpu_mmu_index(env, false);
574 S390Access srca1, srca2, desta;
575 uintptr_t ra = GETPC();
576 int i;
577
578 /* MVN always copies one more byte than specified - maximum is 256 */
579 l++;
580
581 srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
582 srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
583 desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
584 for (i = 0; i < l; i++) {
585 const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0x0f) |
586 (access_get_byte(env, &srca2, i, ra) & 0xf0);
587
588 access_set_byte(env, &desta, i, x, ra);
589 }
590 }
591
592 /* move with offset */
HELPER(mvo)593 void HELPER(mvo)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
594 {
595 const int mmu_idx = cpu_mmu_index(env, false);
596 /* MVO always processes one more byte than specified - maximum is 16 */
597 const int len_dest = (l >> 4) + 1;
598 const int len_src = (l & 0xf) + 1;
599 uintptr_t ra = GETPC();
600 uint8_t byte_dest, byte_src;
601 S390Access srca, desta;
602 int i, j;
603
604 srca = access_prepare(env, src, len_src, MMU_DATA_LOAD, mmu_idx, ra);
605 desta = access_prepare(env, dest, len_dest, MMU_DATA_STORE, mmu_idx, ra);
606
607 /* Handle rightmost byte */
608 byte_dest = cpu_ldub_data_ra(env, dest + len_dest - 1, ra);
609 byte_src = access_get_byte(env, &srca, len_src - 1, ra);
610 byte_dest = (byte_dest & 0x0f) | (byte_src << 4);
611 access_set_byte(env, &desta, len_dest - 1, byte_dest, ra);
612
613 /* Process remaining bytes from right to left */
614 for (i = len_dest - 2, j = len_src - 2; i >= 0; i--, j--) {
615 byte_dest = byte_src >> 4;
616 if (j >= 0) {
617 byte_src = access_get_byte(env, &srca, j, ra);
618 } else {
619 byte_src = 0;
620 }
621 byte_dest |= byte_src << 4;
622 access_set_byte(env, &desta, i, byte_dest, ra);
623 }
624 }
625
626 /* move zones */
HELPER(mvz)627 void HELPER(mvz)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
628 {
629 const int mmu_idx = cpu_mmu_index(env, false);
630 S390Access srca1, srca2, desta;
631 uintptr_t ra = GETPC();
632 int i;
633
634 /* MVZ always copies one more byte than specified - maximum is 256 */
635 l++;
636
637 srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
638 srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
639 desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
640 for (i = 0; i < l; i++) {
641 const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0xf0) |
642 (access_get_byte(env, &srca2, i, ra) & 0x0f);
643
644 access_set_byte(env, &desta, i, x, ra);
645 }
646 }
647
648 /* compare unsigned byte arrays */
do_helper_clc(CPUS390XState * env,uint32_t l,uint64_t s1,uint64_t s2,uintptr_t ra)649 static uint32_t do_helper_clc(CPUS390XState *env, uint32_t l, uint64_t s1,
650 uint64_t s2, uintptr_t ra)
651 {
652 uint32_t i;
653 uint32_t cc = 0;
654
655 HELPER_LOG("%s l %d s1 %" PRIx64 " s2 %" PRIx64 "\n",
656 __func__, l, s1, s2);
657
658 for (i = 0; i <= l; i++) {
659 uint8_t x = cpu_ldub_data_ra(env, s1 + i, ra);
660 uint8_t y = cpu_ldub_data_ra(env, s2 + i, ra);
661 HELPER_LOG("%02x (%c)/%02x (%c) ", x, x, y, y);
662 if (x < y) {
663 cc = 1;
664 break;
665 } else if (x > y) {
666 cc = 2;
667 break;
668 }
669 }
670
671 HELPER_LOG("\n");
672 return cc;
673 }
674
HELPER(clc)675 uint32_t HELPER(clc)(CPUS390XState *env, uint32_t l, uint64_t s1, uint64_t s2)
676 {
677 return do_helper_clc(env, l, s1, s2, GETPC());
678 }
679
680 /* compare logical under mask */
HELPER(clm)681 uint32_t HELPER(clm)(CPUS390XState *env, uint32_t r1, uint32_t mask,
682 uint64_t addr)
683 {
684 uintptr_t ra = GETPC();
685 uint32_t cc = 0;
686
687 HELPER_LOG("%s: r1 0x%x mask 0x%x addr 0x%" PRIx64 "\n", __func__, r1,
688 mask, addr);
689
690 while (mask) {
691 if (mask & 8) {
692 uint8_t d = cpu_ldub_data_ra(env, addr, ra);
693 uint8_t r = extract32(r1, 24, 8);
694 HELPER_LOG("mask 0x%x %02x/%02x (0x%" PRIx64 ") ", mask, r, d,
695 addr);
696 if (r < d) {
697 cc = 1;
698 break;
699 } else if (r > d) {
700 cc = 2;
701 break;
702 }
703 addr++;
704 }
705 mask = (mask << 1) & 0xf;
706 r1 <<= 8;
707 }
708
709 HELPER_LOG("\n");
710 return cc;
711 }
712
get_address(CPUS390XState * env,int reg)713 static inline uint64_t get_address(CPUS390XState *env, int reg)
714 {
715 return wrap_address(env, env->regs[reg]);
716 }
717
718 /*
719 * Store the address to the given register, zeroing out unused leftmost
720 * bits in bit positions 32-63 (24-bit and 31-bit mode only).
721 */
set_address_zero(CPUS390XState * env,int reg,uint64_t address)722 static inline void set_address_zero(CPUS390XState *env, int reg,
723 uint64_t address)
724 {
725 if (env->psw.mask & PSW_MASK_64) {
726 env->regs[reg] = address;
727 } else {
728 if (!(env->psw.mask & PSW_MASK_32)) {
729 address &= 0x00ffffff;
730 } else {
731 address &= 0x7fffffff;
732 }
733 env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
734 }
735 }
736
set_address(CPUS390XState * env,int reg,uint64_t address)737 static inline void set_address(CPUS390XState *env, int reg, uint64_t address)
738 {
739 if (env->psw.mask & PSW_MASK_64) {
740 /* 64-Bit mode */
741 env->regs[reg] = address;
742 } else {
743 if (!(env->psw.mask & PSW_MASK_32)) {
744 /* 24-Bit mode. According to the PoO it is implementation
745 dependent if bits 32-39 remain unchanged or are set to
746 zeros. Choose the former so that the function can also be
747 used for TRT. */
748 env->regs[reg] = deposit64(env->regs[reg], 0, 24, address);
749 } else {
750 /* 31-Bit mode. According to the PoO it is implementation
751 dependent if bit 32 remains unchanged or is set to zero.
752 Choose the latter so that the function can also be used for
753 TRT. */
754 address &= 0x7fffffff;
755 env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
756 }
757 }
758 }
759
wrap_length32(CPUS390XState * env,uint64_t length)760 static inline uint64_t wrap_length32(CPUS390XState *env, uint64_t length)
761 {
762 if (!(env->psw.mask & PSW_MASK_64)) {
763 return (uint32_t)length;
764 }
765 return length;
766 }
767
wrap_length31(CPUS390XState * env,uint64_t length)768 static inline uint64_t wrap_length31(CPUS390XState *env, uint64_t length)
769 {
770 if (!(env->psw.mask & PSW_MASK_64)) {
771 /* 24-Bit and 31-Bit mode */
772 length &= 0x7fffffff;
773 }
774 return length;
775 }
776
get_length(CPUS390XState * env,int reg)777 static inline uint64_t get_length(CPUS390XState *env, int reg)
778 {
779 return wrap_length31(env, env->regs[reg]);
780 }
781
set_length(CPUS390XState * env,int reg,uint64_t length)782 static inline void set_length(CPUS390XState *env, int reg, uint64_t length)
783 {
784 if (env->psw.mask & PSW_MASK_64) {
785 /* 64-Bit mode */
786 env->regs[reg] = length;
787 } else {
788 /* 24-Bit and 31-Bit mode */
789 env->regs[reg] = deposit64(env->regs[reg], 0, 32, length);
790 }
791 }
792
793 /* search string (c is byte to search, r2 is string, r1 end of string) */
HELPER(srst)794 void HELPER(srst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
795 {
796 uintptr_t ra = GETPC();
797 uint64_t end, str;
798 uint32_t len;
799 uint8_t v, c = env->regs[0];
800
801 /* Bits 32-55 must contain all 0. */
802 if (env->regs[0] & 0xffffff00u) {
803 tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
804 }
805
806 str = get_address(env, r2);
807 end = get_address(env, r1);
808
809 /* Lest we fail to service interrupts in a timely manner, limit the
810 amount of work we're willing to do. For now, let's cap at 8k. */
811 for (len = 0; len < 0x2000; ++len) {
812 if (str + len == end) {
813 /* Character not found. R1 & R2 are unmodified. */
814 env->cc_op = 2;
815 return;
816 }
817 v = cpu_ldub_data_ra(env, str + len, ra);
818 if (v == c) {
819 /* Character found. Set R1 to the location; R2 is unmodified. */
820 env->cc_op = 1;
821 set_address(env, r1, str + len);
822 return;
823 }
824 }
825
826 /* CPU-determined bytes processed. Advance R2 to next byte to process. */
827 env->cc_op = 3;
828 set_address(env, r2, str + len);
829 }
830
HELPER(srstu)831 void HELPER(srstu)(CPUS390XState *env, uint32_t r1, uint32_t r2)
832 {
833 uintptr_t ra = GETPC();
834 uint32_t len;
835 uint16_t v, c = env->regs[0];
836 uint64_t end, str, adj_end;
837
838 /* Bits 32-47 of R0 must be zero. */
839 if (env->regs[0] & 0xffff0000u) {
840 tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
841 }
842
843 str = get_address(env, r2);
844 end = get_address(env, r1);
845
846 /* If the LSB of the two addresses differ, use one extra byte. */
847 adj_end = end + ((str ^ end) & 1);
848
849 /* Lest we fail to service interrupts in a timely manner, limit the
850 amount of work we're willing to do. For now, let's cap at 8k. */
851 for (len = 0; len < 0x2000; len += 2) {
852 if (str + len == adj_end) {
853 /* End of input found. */
854 env->cc_op = 2;
855 return;
856 }
857 v = cpu_lduw_data_ra(env, str + len, ra);
858 if (v == c) {
859 /* Character found. Set R1 to the location; R2 is unmodified. */
860 env->cc_op = 1;
861 set_address(env, r1, str + len);
862 return;
863 }
864 }
865
866 /* CPU-determined bytes processed. Advance R2 to next byte to process. */
867 env->cc_op = 3;
868 set_address(env, r2, str + len);
869 }
870
871 /* unsigned string compare (c is string terminator) */
HELPER(clst)872 uint64_t HELPER(clst)(CPUS390XState *env, uint64_t c, uint64_t s1, uint64_t s2)
873 {
874 uintptr_t ra = GETPC();
875 uint32_t len;
876
877 c = c & 0xff;
878 s1 = wrap_address(env, s1);
879 s2 = wrap_address(env, s2);
880
881 /* Lest we fail to service interrupts in a timely manner, limit the
882 amount of work we're willing to do. For now, let's cap at 8k. */
883 for (len = 0; len < 0x2000; ++len) {
884 uint8_t v1 = cpu_ldub_data_ra(env, s1 + len, ra);
885 uint8_t v2 = cpu_ldub_data_ra(env, s2 + len, ra);
886 if (v1 == v2) {
887 if (v1 == c) {
888 /* Equal. CC=0, and don't advance the registers. */
889 env->cc_op = 0;
890 env->retxl = s2;
891 return s1;
892 }
893 } else {
894 /* Unequal. CC={1,2}, and advance the registers. Note that
895 the terminator need not be zero, but the string that contains
896 the terminator is by definition "low". */
897 env->cc_op = (v1 == c ? 1 : v2 == c ? 2 : v1 < v2 ? 1 : 2);
898 env->retxl = s2 + len;
899 return s1 + len;
900 }
901 }
902
903 /* CPU-determined bytes equal; advance the registers. */
904 env->cc_op = 3;
905 env->retxl = s2 + len;
906 return s1 + len;
907 }
908
909 /* move page */
HELPER(mvpg)910 uint32_t HELPER(mvpg)(CPUS390XState *env, uint64_t r0, uint32_t r1, uint32_t r2)
911 {
912 const uint64_t src = get_address(env, r2) & TARGET_PAGE_MASK;
913 const uint64_t dst = get_address(env, r1) & TARGET_PAGE_MASK;
914 const int mmu_idx = cpu_mmu_index(env, false);
915 const bool f = extract64(r0, 11, 1);
916 const bool s = extract64(r0, 10, 1);
917 const bool cco = extract64(r0, 8, 1);
918 uintptr_t ra = GETPC();
919 S390Access srca, desta;
920 int exc;
921
922 if ((f && s) || extract64(r0, 12, 4)) {
923 tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
924 }
925
926 /*
927 * We always manually handle exceptions such that we can properly store
928 * r1/r2 to the lowcore on page-translation exceptions.
929 *
930 * TODO: Access key handling
931 */
932 exc = access_prepare_nf(&srca, env, true, src, TARGET_PAGE_SIZE,
933 MMU_DATA_LOAD, mmu_idx, ra);
934 if (exc) {
935 if (cco) {
936 return 2;
937 }
938 goto inject_exc;
939 }
940 exc = access_prepare_nf(&desta, env, true, dst, TARGET_PAGE_SIZE,
941 MMU_DATA_STORE, mmu_idx, ra);
942 if (exc) {
943 if (cco && exc != PGM_PROTECTION) {
944 return 1;
945 }
946 goto inject_exc;
947 }
948 access_memmove(env, &desta, &srca, ra);
949 return 0; /* data moved */
950 inject_exc:
951 #if !defined(CONFIG_USER_ONLY)
952 if (exc != PGM_ADDRESSING) {
953 stq_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, trans_exc_code),
954 env->tlb_fill_tec);
955 }
956 if (exc == PGM_PAGE_TRANS) {
957 stb_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, op_access_id),
958 r1 << 4 | r2);
959 }
960 #endif
961 tcg_s390_program_interrupt(env, exc, ra);
962 }
963
964 /* string copy */
HELPER(mvst)965 uint32_t HELPER(mvst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
966 {
967 const int mmu_idx = cpu_mmu_index(env, false);
968 const uint64_t d = get_address(env, r1);
969 const uint64_t s = get_address(env, r2);
970 const uint8_t c = env->regs[0];
971 const int len = MIN(-(d | TARGET_PAGE_MASK), -(s | TARGET_PAGE_MASK));
972 S390Access srca, desta;
973 uintptr_t ra = GETPC();
974 int i;
975
976 if (env->regs[0] & 0xffffff00ull) {
977 tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
978 }
979
980 /*
981 * Our access should not exceed single pages, as we must not report access
982 * exceptions exceeding the actually copied range (which we don't know at
983 * this point). We might over-indicate watchpoints within the pages
984 * (if we ever care, we have to limit processing to a single byte).
985 */
986 srca = access_prepare(env, s, len, MMU_DATA_LOAD, mmu_idx, ra);
987 desta = access_prepare(env, d, len, MMU_DATA_STORE, mmu_idx, ra);
988 for (i = 0; i < len; i++) {
989 const uint8_t v = access_get_byte(env, &srca, i, ra);
990
991 access_set_byte(env, &desta, i, v, ra);
992 if (v == c) {
993 set_address_zero(env, r1, d + i);
994 return 1;
995 }
996 }
997 set_address_zero(env, r1, d + len);
998 set_address_zero(env, r2, s + len);
999 return 3;
1000 }
1001
1002 /* load access registers r1 to r3 from memory at a2 */
HELPER(lam)1003 void HELPER(lam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1004 {
1005 uintptr_t ra = GETPC();
1006 int i;
1007
1008 if (a2 & 0x3) {
1009 tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1010 }
1011
1012 for (i = r1;; i = (i + 1) % 16) {
1013 env->aregs[i] = cpu_ldl_data_ra(env, a2, ra);
1014 a2 += 4;
1015
1016 if (i == r3) {
1017 break;
1018 }
1019 }
1020 }
1021
1022 /* store access registers r1 to r3 in memory at a2 */
HELPER(stam)1023 void HELPER(stam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1024 {
1025 uintptr_t ra = GETPC();
1026 int i;
1027
1028 if (a2 & 0x3) {
1029 tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1030 }
1031
1032 for (i = r1;; i = (i + 1) % 16) {
1033 cpu_stl_data_ra(env, a2, env->aregs[i], ra);
1034 a2 += 4;
1035
1036 if (i == r3) {
1037 break;
1038 }
1039 }
1040 }
1041
1042 /* move long helper */
do_mvcl(CPUS390XState * env,uint64_t * dest,uint64_t * destlen,uint64_t * src,uint64_t * srclen,uint16_t pad,int wordsize,uintptr_t ra)1043 static inline uint32_t do_mvcl(CPUS390XState *env,
1044 uint64_t *dest, uint64_t *destlen,
1045 uint64_t *src, uint64_t *srclen,
1046 uint16_t pad, int wordsize, uintptr_t ra)
1047 {
1048 const int mmu_idx = cpu_mmu_index(env, false);
1049 int len = MIN(*destlen, -(*dest | TARGET_PAGE_MASK));
1050 S390Access srca, desta;
1051 int i, cc;
1052
1053 if (*destlen == *srclen) {
1054 cc = 0;
1055 } else if (*destlen < *srclen) {
1056 cc = 1;
1057 } else {
1058 cc = 2;
1059 }
1060
1061 if (!*destlen) {
1062 return cc;
1063 }
1064
1065 /*
1066 * Only perform one type of type of operation (move/pad) at a time.
1067 * Stay within single pages.
1068 */
1069 if (*srclen) {
1070 /* Copy the src array */
1071 len = MIN(MIN(*srclen, -(*src | TARGET_PAGE_MASK)), len);
1072 *destlen -= len;
1073 *srclen -= len;
1074 srca = access_prepare(env, *src, len, MMU_DATA_LOAD, mmu_idx, ra);
1075 desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1076 access_memmove(env, &desta, &srca, ra);
1077 *src = wrap_address(env, *src + len);
1078 *dest = wrap_address(env, *dest + len);
1079 } else if (wordsize == 1) {
1080 /* Pad the remaining area */
1081 *destlen -= len;
1082 desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1083 access_memset(env, &desta, pad, ra);
1084 *dest = wrap_address(env, *dest + len);
1085 } else {
1086 desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1087
1088 /* The remaining length selects the padding byte. */
1089 for (i = 0; i < len; (*destlen)--, i++) {
1090 if (*destlen & 1) {
1091 access_set_byte(env, &desta, i, pad, ra);
1092 } else {
1093 access_set_byte(env, &desta, i, pad >> 8, ra);
1094 }
1095 }
1096 *dest = wrap_address(env, *dest + len);
1097 }
1098
1099 return *destlen ? 3 : cc;
1100 }
1101
1102 /* move long */
HELPER(mvcl)1103 uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1104 {
1105 const int mmu_idx = cpu_mmu_index(env, false);
1106 uintptr_t ra = GETPC();
1107 uint64_t destlen = env->regs[r1 + 1] & 0xffffff;
1108 uint64_t dest = get_address(env, r1);
1109 uint64_t srclen = env->regs[r2 + 1] & 0xffffff;
1110 uint64_t src = get_address(env, r2);
1111 uint8_t pad = env->regs[r2 + 1] >> 24;
1112 CPUState *cs = env_cpu(env);
1113 S390Access srca, desta;
1114 uint32_t cc, cur_len;
1115
1116 if (is_destructive_overlap(env, dest, src, MIN(srclen, destlen))) {
1117 cc = 3;
1118 } else if (srclen == destlen) {
1119 cc = 0;
1120 } else if (destlen < srclen) {
1121 cc = 1;
1122 } else {
1123 cc = 2;
1124 }
1125
1126 /* We might have to zero-out some bits even if there was no action. */
1127 if (unlikely(!destlen || cc == 3)) {
1128 set_address_zero(env, r2, src);
1129 set_address_zero(env, r1, dest);
1130 return cc;
1131 } else if (!srclen) {
1132 set_address_zero(env, r2, src);
1133 }
1134
1135 /*
1136 * Only perform one type of type of operation (move/pad) in one step.
1137 * Stay within single pages.
1138 */
1139 while (destlen) {
1140 cur_len = MIN(destlen, -(dest | TARGET_PAGE_MASK));
1141 if (!srclen) {
1142 desta = access_prepare(env, dest, cur_len, MMU_DATA_STORE, mmu_idx,
1143 ra);
1144 access_memset(env, &desta, pad, ra);
1145 } else {
1146 cur_len = MIN(MIN(srclen, -(src | TARGET_PAGE_MASK)), cur_len);
1147
1148 srca = access_prepare(env, src, cur_len, MMU_DATA_LOAD, mmu_idx,
1149 ra);
1150 desta = access_prepare(env, dest, cur_len, MMU_DATA_STORE, mmu_idx,
1151 ra);
1152 access_memmove(env, &desta, &srca, ra);
1153 src = wrap_address(env, src + cur_len);
1154 srclen -= cur_len;
1155 env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, srclen);
1156 set_address_zero(env, r2, src);
1157 }
1158 dest = wrap_address(env, dest + cur_len);
1159 destlen -= cur_len;
1160 env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, destlen);
1161 set_address_zero(env, r1, dest);
1162
1163 /*
1164 * MVCL is interruptible. Return to the main loop if requested after
1165 * writing back all state to registers. If no interrupt will get
1166 * injected, we'll end up back in this handler and continue processing
1167 * the remaining parts.
1168 */
1169 if (destlen && unlikely(cpu_loop_exit_requested(cs))) {
1170 cpu_loop_exit_restore(cs, ra);
1171 }
1172 }
1173 return cc;
1174 }
1175
1176 /* move long extended */
HELPER(mvcle)1177 uint32_t HELPER(mvcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1178 uint32_t r3)
1179 {
1180 uintptr_t ra = GETPC();
1181 uint64_t destlen = get_length(env, r1 + 1);
1182 uint64_t dest = get_address(env, r1);
1183 uint64_t srclen = get_length(env, r3 + 1);
1184 uint64_t src = get_address(env, r3);
1185 uint8_t pad = a2;
1186 uint32_t cc;
1187
1188 cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 1, ra);
1189
1190 set_length(env, r1 + 1, destlen);
1191 set_length(env, r3 + 1, srclen);
1192 set_address(env, r1, dest);
1193 set_address(env, r3, src);
1194
1195 return cc;
1196 }
1197
1198 /* move long unicode */
HELPER(mvclu)1199 uint32_t HELPER(mvclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1200 uint32_t r3)
1201 {
1202 uintptr_t ra = GETPC();
1203 uint64_t destlen = get_length(env, r1 + 1);
1204 uint64_t dest = get_address(env, r1);
1205 uint64_t srclen = get_length(env, r3 + 1);
1206 uint64_t src = get_address(env, r3);
1207 uint16_t pad = a2;
1208 uint32_t cc;
1209
1210 cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 2, ra);
1211
1212 set_length(env, r1 + 1, destlen);
1213 set_length(env, r3 + 1, srclen);
1214 set_address(env, r1, dest);
1215 set_address(env, r3, src);
1216
1217 return cc;
1218 }
1219
1220 /* compare logical long helper */
do_clcl(CPUS390XState * env,uint64_t * src1,uint64_t * src1len,uint64_t * src3,uint64_t * src3len,uint16_t pad,uint64_t limit,int wordsize,uintptr_t ra)1221 static inline uint32_t do_clcl(CPUS390XState *env,
1222 uint64_t *src1, uint64_t *src1len,
1223 uint64_t *src3, uint64_t *src3len,
1224 uint16_t pad, uint64_t limit,
1225 int wordsize, uintptr_t ra)
1226 {
1227 uint64_t len = MAX(*src1len, *src3len);
1228 uint32_t cc = 0;
1229
1230 check_alignment(env, *src1len | *src3len, wordsize, ra);
1231
1232 if (!len) {
1233 return cc;
1234 }
1235
1236 /* Lest we fail to service interrupts in a timely manner, limit the
1237 amount of work we're willing to do. */
1238 if (len > limit) {
1239 len = limit;
1240 cc = 3;
1241 }
1242
1243 for (; len; len -= wordsize) {
1244 uint16_t v1 = pad;
1245 uint16_t v3 = pad;
1246
1247 if (*src1len) {
1248 v1 = cpu_ldusize_data_ra(env, *src1, wordsize, ra);
1249 }
1250 if (*src3len) {
1251 v3 = cpu_ldusize_data_ra(env, *src3, wordsize, ra);
1252 }
1253
1254 if (v1 != v3) {
1255 cc = (v1 < v3) ? 1 : 2;
1256 break;
1257 }
1258
1259 if (*src1len) {
1260 *src1 += wordsize;
1261 *src1len -= wordsize;
1262 }
1263 if (*src3len) {
1264 *src3 += wordsize;
1265 *src3len -= wordsize;
1266 }
1267 }
1268
1269 return cc;
1270 }
1271
1272
1273 /* compare logical long */
HELPER(clcl)1274 uint32_t HELPER(clcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1275 {
1276 uintptr_t ra = GETPC();
1277 uint64_t src1len = extract64(env->regs[r1 + 1], 0, 24);
1278 uint64_t src1 = get_address(env, r1);
1279 uint64_t src3len = extract64(env->regs[r2 + 1], 0, 24);
1280 uint64_t src3 = get_address(env, r2);
1281 uint8_t pad = env->regs[r2 + 1] >> 24;
1282 uint32_t cc;
1283
1284 cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, -1, 1, ra);
1285
1286 env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, src1len);
1287 env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, src3len);
1288 set_address(env, r1, src1);
1289 set_address(env, r2, src3);
1290
1291 return cc;
1292 }
1293
1294 /* compare logical long extended memcompare insn with padding */
HELPER(clcle)1295 uint32_t HELPER(clcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1296 uint32_t r3)
1297 {
1298 uintptr_t ra = GETPC();
1299 uint64_t src1len = get_length(env, r1 + 1);
1300 uint64_t src1 = get_address(env, r1);
1301 uint64_t src3len = get_length(env, r3 + 1);
1302 uint64_t src3 = get_address(env, r3);
1303 uint8_t pad = a2;
1304 uint32_t cc;
1305
1306 cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x2000, 1, ra);
1307
1308 set_length(env, r1 + 1, src1len);
1309 set_length(env, r3 + 1, src3len);
1310 set_address(env, r1, src1);
1311 set_address(env, r3, src3);
1312
1313 return cc;
1314 }
1315
1316 /* compare logical long unicode memcompare insn with padding */
HELPER(clclu)1317 uint32_t HELPER(clclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1318 uint32_t r3)
1319 {
1320 uintptr_t ra = GETPC();
1321 uint64_t src1len = get_length(env, r1 + 1);
1322 uint64_t src1 = get_address(env, r1);
1323 uint64_t src3len = get_length(env, r3 + 1);
1324 uint64_t src3 = get_address(env, r3);
1325 uint16_t pad = a2;
1326 uint32_t cc = 0;
1327
1328 cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x1000, 2, ra);
1329
1330 set_length(env, r1 + 1, src1len);
1331 set_length(env, r3 + 1, src3len);
1332 set_address(env, r1, src1);
1333 set_address(env, r3, src3);
1334
1335 return cc;
1336 }
1337
1338 /* checksum */
HELPER(cksm)1339 uint64_t HELPER(cksm)(CPUS390XState *env, uint64_t r1,
1340 uint64_t src, uint64_t src_len)
1341 {
1342 uintptr_t ra = GETPC();
1343 uint64_t max_len, len;
1344 uint64_t cksm = (uint32_t)r1;
1345
1346 /* Lest we fail to service interrupts in a timely manner, limit the
1347 amount of work we're willing to do. For now, let's cap at 8k. */
1348 max_len = (src_len > 0x2000 ? 0x2000 : src_len);
1349
1350 /* Process full words as available. */
1351 for (len = 0; len + 4 <= max_len; len += 4, src += 4) {
1352 cksm += (uint32_t)cpu_ldl_data_ra(env, src, ra);
1353 }
1354
1355 switch (max_len - len) {
1356 case 1:
1357 cksm += cpu_ldub_data_ra(env, src, ra) << 24;
1358 len += 1;
1359 break;
1360 case 2:
1361 cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1362 len += 2;
1363 break;
1364 case 3:
1365 cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1366 cksm += cpu_ldub_data_ra(env, src + 2, ra) << 8;
1367 len += 3;
1368 break;
1369 }
1370
1371 /* Fold the carry from the checksum. Note that we can see carry-out
1372 during folding more than once (but probably not more than twice). */
1373 while (cksm > 0xffffffffull) {
1374 cksm = (uint32_t)cksm + (cksm >> 32);
1375 }
1376
1377 /* Indicate whether or not we've processed everything. */
1378 env->cc_op = (len == src_len ? 0 : 3);
1379
1380 /* Return both cksm and processed length. */
1381 env->retxl = cksm;
1382 return len;
1383 }
1384
HELPER(pack)1385 void HELPER(pack)(CPUS390XState *env, uint32_t len, uint64_t dest, uint64_t src)
1386 {
1387 uintptr_t ra = GETPC();
1388 int len_dest = len >> 4;
1389 int len_src = len & 0xf;
1390 uint8_t b;
1391
1392 dest += len_dest;
1393 src += len_src;
1394
1395 /* last byte is special, it only flips the nibbles */
1396 b = cpu_ldub_data_ra(env, src, ra);
1397 cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1398 src--;
1399 len_src--;
1400
1401 /* now pack every value */
1402 while (len_dest > 0) {
1403 b = 0;
1404
1405 if (len_src >= 0) {
1406 b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1407 src--;
1408 len_src--;
1409 }
1410 if (len_src >= 0) {
1411 b |= cpu_ldub_data_ra(env, src, ra) << 4;
1412 src--;
1413 len_src--;
1414 }
1415
1416 len_dest--;
1417 dest--;
1418 cpu_stb_data_ra(env, dest, b, ra);
1419 }
1420 }
1421
do_pkau(CPUS390XState * env,uint64_t dest,uint64_t src,uint32_t srclen,int ssize,uintptr_t ra)1422 static inline void do_pkau(CPUS390XState *env, uint64_t dest, uint64_t src,
1423 uint32_t srclen, int ssize, uintptr_t ra)
1424 {
1425 int i;
1426 /* The destination operand is always 16 bytes long. */
1427 const int destlen = 16;
1428
1429 /* The operands are processed from right to left. */
1430 src += srclen - 1;
1431 dest += destlen - 1;
1432
1433 for (i = 0; i < destlen; i++) {
1434 uint8_t b = 0;
1435
1436 /* Start with a positive sign */
1437 if (i == 0) {
1438 b = 0xc;
1439 } else if (srclen > ssize) {
1440 b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1441 src -= ssize;
1442 srclen -= ssize;
1443 }
1444
1445 if (srclen > ssize) {
1446 b |= cpu_ldub_data_ra(env, src, ra) << 4;
1447 src -= ssize;
1448 srclen -= ssize;
1449 }
1450
1451 cpu_stb_data_ra(env, dest, b, ra);
1452 dest--;
1453 }
1454 }
1455
1456
HELPER(pka)1457 void HELPER(pka)(CPUS390XState *env, uint64_t dest, uint64_t src,
1458 uint32_t srclen)
1459 {
1460 do_pkau(env, dest, src, srclen, 1, GETPC());
1461 }
1462
HELPER(pku)1463 void HELPER(pku)(CPUS390XState *env, uint64_t dest, uint64_t src,
1464 uint32_t srclen)
1465 {
1466 do_pkau(env, dest, src, srclen, 2, GETPC());
1467 }
1468
HELPER(unpk)1469 void HELPER(unpk)(CPUS390XState *env, uint32_t len, uint64_t dest,
1470 uint64_t src)
1471 {
1472 uintptr_t ra = GETPC();
1473 int len_dest = len >> 4;
1474 int len_src = len & 0xf;
1475 uint8_t b;
1476 int second_nibble = 0;
1477
1478 dest += len_dest;
1479 src += len_src;
1480
1481 /* last byte is special, it only flips the nibbles */
1482 b = cpu_ldub_data_ra(env, src, ra);
1483 cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1484 src--;
1485 len_src--;
1486
1487 /* now pad every nibble with 0xf0 */
1488
1489 while (len_dest > 0) {
1490 uint8_t cur_byte = 0;
1491
1492 if (len_src > 0) {
1493 cur_byte = cpu_ldub_data_ra(env, src, ra);
1494 }
1495
1496 len_dest--;
1497 dest--;
1498
1499 /* only advance one nibble at a time */
1500 if (second_nibble) {
1501 cur_byte >>= 4;
1502 len_src--;
1503 src--;
1504 }
1505 second_nibble = !second_nibble;
1506
1507 /* digit */
1508 cur_byte = (cur_byte & 0xf);
1509 /* zone bits */
1510 cur_byte |= 0xf0;
1511
1512 cpu_stb_data_ra(env, dest, cur_byte, ra);
1513 }
1514 }
1515
do_unpkau(CPUS390XState * env,uint64_t dest,uint32_t destlen,int dsize,uint64_t src,uintptr_t ra)1516 static inline uint32_t do_unpkau(CPUS390XState *env, uint64_t dest,
1517 uint32_t destlen, int dsize, uint64_t src,
1518 uintptr_t ra)
1519 {
1520 int i;
1521 uint32_t cc;
1522 uint8_t b;
1523 /* The source operand is always 16 bytes long. */
1524 const int srclen = 16;
1525
1526 /* The operands are processed from right to left. */
1527 src += srclen - 1;
1528 dest += destlen - dsize;
1529
1530 /* Check for the sign. */
1531 b = cpu_ldub_data_ra(env, src, ra);
1532 src--;
1533 switch (b & 0xf) {
1534 case 0xa:
1535 case 0xc:
1536 case 0xe ... 0xf:
1537 cc = 0; /* plus */
1538 break;
1539 case 0xb:
1540 case 0xd:
1541 cc = 1; /* minus */
1542 break;
1543 default:
1544 case 0x0 ... 0x9:
1545 cc = 3; /* invalid */
1546 break;
1547 }
1548
1549 /* Now pad every nibble with 0x30, advancing one nibble at a time. */
1550 for (i = 0; i < destlen; i += dsize) {
1551 if (i == (31 * dsize)) {
1552 /* If length is 32/64 bytes, the leftmost byte is 0. */
1553 b = 0;
1554 } else if (i % (2 * dsize)) {
1555 b = cpu_ldub_data_ra(env, src, ra);
1556 src--;
1557 } else {
1558 b >>= 4;
1559 }
1560 cpu_stsize_data_ra(env, dest, 0x30 + (b & 0xf), dsize, ra);
1561 dest -= dsize;
1562 }
1563
1564 return cc;
1565 }
1566
HELPER(unpka)1567 uint32_t HELPER(unpka)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1568 uint64_t src)
1569 {
1570 return do_unpkau(env, dest, destlen, 1, src, GETPC());
1571 }
1572
HELPER(unpku)1573 uint32_t HELPER(unpku)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1574 uint64_t src)
1575 {
1576 return do_unpkau(env, dest, destlen, 2, src, GETPC());
1577 }
1578
HELPER(tp)1579 uint32_t HELPER(tp)(CPUS390XState *env, uint64_t dest, uint32_t destlen)
1580 {
1581 uintptr_t ra = GETPC();
1582 uint32_t cc = 0;
1583 int i;
1584
1585 for (i = 0; i < destlen; i++) {
1586 uint8_t b = cpu_ldub_data_ra(env, dest + i, ra);
1587 /* digit */
1588 cc |= (b & 0xf0) > 0x90 ? 2 : 0;
1589
1590 if (i == (destlen - 1)) {
1591 /* sign */
1592 cc |= (b & 0xf) < 0xa ? 1 : 0;
1593 } else {
1594 /* digit */
1595 cc |= (b & 0xf) > 0x9 ? 2 : 0;
1596 }
1597 }
1598
1599 return cc;
1600 }
1601
do_helper_tr(CPUS390XState * env,uint32_t len,uint64_t array,uint64_t trans,uintptr_t ra)1602 static uint32_t do_helper_tr(CPUS390XState *env, uint32_t len, uint64_t array,
1603 uint64_t trans, uintptr_t ra)
1604 {
1605 uint32_t i;
1606
1607 for (i = 0; i <= len; i++) {
1608 uint8_t byte = cpu_ldub_data_ra(env, array + i, ra);
1609 uint8_t new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1610 cpu_stb_data_ra(env, array + i, new_byte, ra);
1611 }
1612
1613 return env->cc_op;
1614 }
1615
HELPER(tr)1616 void HELPER(tr)(CPUS390XState *env, uint32_t len, uint64_t array,
1617 uint64_t trans)
1618 {
1619 do_helper_tr(env, len, array, trans, GETPC());
1620 }
1621
HELPER(tre)1622 uint64_t HELPER(tre)(CPUS390XState *env, uint64_t array,
1623 uint64_t len, uint64_t trans)
1624 {
1625 uintptr_t ra = GETPC();
1626 uint8_t end = env->regs[0] & 0xff;
1627 uint64_t l = len;
1628 uint64_t i;
1629 uint32_t cc = 0;
1630
1631 if (!(env->psw.mask & PSW_MASK_64)) {
1632 array &= 0x7fffffff;
1633 l = (uint32_t)l;
1634 }
1635
1636 /* Lest we fail to service interrupts in a timely manner, limit the
1637 amount of work we're willing to do. For now, let's cap at 8k. */
1638 if (l > 0x2000) {
1639 l = 0x2000;
1640 cc = 3;
1641 }
1642
1643 for (i = 0; i < l; i++) {
1644 uint8_t byte, new_byte;
1645
1646 byte = cpu_ldub_data_ra(env, array + i, ra);
1647
1648 if (byte == end) {
1649 cc = 1;
1650 break;
1651 }
1652
1653 new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1654 cpu_stb_data_ra(env, array + i, new_byte, ra);
1655 }
1656
1657 env->cc_op = cc;
1658 env->retxl = len - i;
1659 return array + i;
1660 }
1661
do_helper_trt(CPUS390XState * env,int len,uint64_t array,uint64_t trans,int inc,uintptr_t ra)1662 static inline uint32_t do_helper_trt(CPUS390XState *env, int len,
1663 uint64_t array, uint64_t trans,
1664 int inc, uintptr_t ra)
1665 {
1666 int i;
1667
1668 for (i = 0; i <= len; i++) {
1669 uint8_t byte = cpu_ldub_data_ra(env, array + i * inc, ra);
1670 uint8_t sbyte = cpu_ldub_data_ra(env, trans + byte, ra);
1671
1672 if (sbyte != 0) {
1673 set_address(env, 1, array + i * inc);
1674 env->regs[2] = deposit64(env->regs[2], 0, 8, sbyte);
1675 return (i == len) ? 2 : 1;
1676 }
1677 }
1678
1679 return 0;
1680 }
1681
do_helper_trt_fwd(CPUS390XState * env,uint32_t len,uint64_t array,uint64_t trans,uintptr_t ra)1682 static uint32_t do_helper_trt_fwd(CPUS390XState *env, uint32_t len,
1683 uint64_t array, uint64_t trans,
1684 uintptr_t ra)
1685 {
1686 return do_helper_trt(env, len, array, trans, 1, ra);
1687 }
1688
HELPER(trt)1689 uint32_t HELPER(trt)(CPUS390XState *env, uint32_t len, uint64_t array,
1690 uint64_t trans)
1691 {
1692 return do_helper_trt(env, len, array, trans, 1, GETPC());
1693 }
1694
do_helper_trt_bkwd(CPUS390XState * env,uint32_t len,uint64_t array,uint64_t trans,uintptr_t ra)1695 static uint32_t do_helper_trt_bkwd(CPUS390XState *env, uint32_t len,
1696 uint64_t array, uint64_t trans,
1697 uintptr_t ra)
1698 {
1699 return do_helper_trt(env, len, array, trans, -1, ra);
1700 }
1701
HELPER(trtr)1702 uint32_t HELPER(trtr)(CPUS390XState *env, uint32_t len, uint64_t array,
1703 uint64_t trans)
1704 {
1705 return do_helper_trt(env, len, array, trans, -1, GETPC());
1706 }
1707
1708 /* Translate one/two to one/two */
HELPER(trXX)1709 uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2,
1710 uint32_t tst, uint32_t sizes)
1711 {
1712 uintptr_t ra = GETPC();
1713 int dsize = (sizes & 1) ? 1 : 2;
1714 int ssize = (sizes & 2) ? 1 : 2;
1715 uint64_t tbl = get_address(env, 1);
1716 uint64_t dst = get_address(env, r1);
1717 uint64_t len = get_length(env, r1 + 1);
1718 uint64_t src = get_address(env, r2);
1719 uint32_t cc = 3;
1720 int i;
1721
1722 /* The lower address bits of TBL are ignored. For TROO, TROT, it's
1723 the low 3 bits (double-word aligned). For TRTO, TRTT, it's either
1724 the low 12 bits (4K, without ETF2-ENH) or 3 bits (with ETF2-ENH). */
1725 if (ssize == 2 && !s390_has_feat(S390_FEAT_ETF2_ENH)) {
1726 tbl &= -4096;
1727 } else {
1728 tbl &= -8;
1729 }
1730
1731 check_alignment(env, len, ssize, ra);
1732
1733 /* Lest we fail to service interrupts in a timely manner, */
1734 /* limit the amount of work we're willing to do. */
1735 for (i = 0; i < 0x2000; i++) {
1736 uint16_t sval = cpu_ldusize_data_ra(env, src, ssize, ra);
1737 uint64_t tble = tbl + (sval * dsize);
1738 uint16_t dval = cpu_ldusize_data_ra(env, tble, dsize, ra);
1739 if (dval == tst) {
1740 cc = 1;
1741 break;
1742 }
1743 cpu_stsize_data_ra(env, dst, dval, dsize, ra);
1744
1745 len -= ssize;
1746 src += ssize;
1747 dst += dsize;
1748
1749 if (len == 0) {
1750 cc = 0;
1751 break;
1752 }
1753 }
1754
1755 set_address(env, r1, dst);
1756 set_length(env, r1 + 1, len);
1757 set_address(env, r2, src);
1758
1759 return cc;
1760 }
1761
HELPER(cdsg)1762 void HELPER(cdsg)(CPUS390XState *env, uint64_t addr,
1763 uint32_t r1, uint32_t r3)
1764 {
1765 uintptr_t ra = GETPC();
1766 Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
1767 Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1768 Int128 oldv;
1769 uint64_t oldh, oldl;
1770 bool fail;
1771
1772 check_alignment(env, addr, 16, ra);
1773
1774 oldh = cpu_ldq_data_ra(env, addr + 0, ra);
1775 oldl = cpu_ldq_data_ra(env, addr + 8, ra);
1776
1777 oldv = int128_make128(oldl, oldh);
1778 fail = !int128_eq(oldv, cmpv);
1779 if (fail) {
1780 newv = oldv;
1781 }
1782
1783 cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra);
1784 cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra);
1785
1786 env->cc_op = fail;
1787 env->regs[r1] = int128_gethi(oldv);
1788 env->regs[r1 + 1] = int128_getlo(oldv);
1789 }
1790
HELPER(cdsg_parallel)1791 void HELPER(cdsg_parallel)(CPUS390XState *env, uint64_t addr,
1792 uint32_t r1, uint32_t r3)
1793 {
1794 uintptr_t ra = GETPC();
1795 Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
1796 Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1797 int mem_idx;
1798 MemOpIdx oi;
1799 Int128 oldv;
1800 bool fail;
1801
1802 assert(HAVE_CMPXCHG128);
1803
1804 mem_idx = cpu_mmu_index(env, false);
1805 oi = make_memop_idx(MO_TE | MO_128 | MO_ALIGN, mem_idx);
1806 oldv = cpu_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
1807 fail = !int128_eq(oldv, cmpv);
1808
1809 env->cc_op = fail;
1810 env->regs[r1] = int128_gethi(oldv);
1811 env->regs[r1 + 1] = int128_getlo(oldv);
1812 }
1813
do_csst(CPUS390XState * env,uint32_t r3,uint64_t a1,uint64_t a2,bool parallel)1814 static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
1815 uint64_t a2, bool parallel)
1816 {
1817 uint32_t mem_idx = cpu_mmu_index(env, false);
1818 uintptr_t ra = GETPC();
1819 uint32_t fc = extract32(env->regs[0], 0, 8);
1820 uint32_t sc = extract32(env->regs[0], 8, 8);
1821 uint64_t pl = get_address(env, 1) & -16;
1822 uint64_t svh, svl;
1823 uint32_t cc;
1824
1825 /* Sanity check the function code and storage characteristic. */
1826 if (fc > 1 || sc > 3) {
1827 if (!s390_has_feat(S390_FEAT_COMPARE_AND_SWAP_AND_STORE_2)) {
1828 goto spec_exception;
1829 }
1830 if (fc > 2 || sc > 4 || (fc == 2 && (r3 & 1))) {
1831 goto spec_exception;
1832 }
1833 }
1834
1835 /* Sanity check the alignments. */
1836 if (extract32(a1, 0, fc + 2) || extract32(a2, 0, sc)) {
1837 goto spec_exception;
1838 }
1839
1840 /* Sanity check writability of the store address. */
1841 probe_write(env, a2, 1 << sc, mem_idx, ra);
1842
1843 /*
1844 * Note that the compare-and-swap is atomic, and the store is atomic,
1845 * but the complete operation is not. Therefore we do not need to
1846 * assert serial context in order to implement this. That said,
1847 * restart early if we can't support either operation that is supposed
1848 * to be atomic.
1849 */
1850 if (parallel) {
1851 uint32_t max = 2;
1852 #ifdef CONFIG_ATOMIC64
1853 max = 3;
1854 #endif
1855 if ((HAVE_CMPXCHG128 ? 0 : fc + 2 > max) ||
1856 (HAVE_ATOMIC128 ? 0 : sc > max)) {
1857 cpu_loop_exit_atomic(env_cpu(env), ra);
1858 }
1859 }
1860
1861 /* All loads happen before all stores. For simplicity, load the entire
1862 store value area from the parameter list. */
1863 svh = cpu_ldq_data_ra(env, pl + 16, ra);
1864 svl = cpu_ldq_data_ra(env, pl + 24, ra);
1865
1866 switch (fc) {
1867 case 0:
1868 {
1869 uint32_t nv = cpu_ldl_data_ra(env, pl, ra);
1870 uint32_t cv = env->regs[r3];
1871 uint32_t ov;
1872
1873 if (parallel) {
1874 #ifdef CONFIG_USER_ONLY
1875 uint32_t *haddr = g2h(env_cpu(env), a1);
1876 ov = qatomic_cmpxchg__nocheck(haddr, cv, nv);
1877 #else
1878 MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mem_idx);
1879 ov = cpu_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi, ra);
1880 #endif
1881 } else {
1882 ov = cpu_ldl_data_ra(env, a1, ra);
1883 cpu_stl_data_ra(env, a1, (ov == cv ? nv : ov), ra);
1884 }
1885 cc = (ov != cv);
1886 env->regs[r3] = deposit64(env->regs[r3], 32, 32, ov);
1887 }
1888 break;
1889
1890 case 1:
1891 {
1892 uint64_t nv = cpu_ldq_data_ra(env, pl, ra);
1893 uint64_t cv = env->regs[r3];
1894 uint64_t ov;
1895
1896 if (parallel) {
1897 #ifdef CONFIG_ATOMIC64
1898 MemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN, mem_idx);
1899 ov = cpu_atomic_cmpxchgq_be_mmu(env, a1, cv, nv, oi, ra);
1900 #else
1901 /* Note that we asserted !parallel above. */
1902 g_assert_not_reached();
1903 #endif
1904 } else {
1905 ov = cpu_ldq_data_ra(env, a1, ra);
1906 cpu_stq_data_ra(env, a1, (ov == cv ? nv : ov), ra);
1907 }
1908 cc = (ov != cv);
1909 env->regs[r3] = ov;
1910 }
1911 break;
1912
1913 case 2:
1914 {
1915 uint64_t nvh = cpu_ldq_data_ra(env, pl, ra);
1916 uint64_t nvl = cpu_ldq_data_ra(env, pl + 8, ra);
1917 Int128 nv = int128_make128(nvl, nvh);
1918 Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1919 Int128 ov;
1920
1921 if (!parallel) {
1922 uint64_t oh = cpu_ldq_data_ra(env, a1 + 0, ra);
1923 uint64_t ol = cpu_ldq_data_ra(env, a1 + 8, ra);
1924
1925 ov = int128_make128(ol, oh);
1926 cc = !int128_eq(ov, cv);
1927 if (cc) {
1928 nv = ov;
1929 }
1930
1931 cpu_stq_data_ra(env, a1 + 0, int128_gethi(nv), ra);
1932 cpu_stq_data_ra(env, a1 + 8, int128_getlo(nv), ra);
1933 } else if (HAVE_CMPXCHG128) {
1934 MemOpIdx oi = make_memop_idx(MO_TE | MO_128 | MO_ALIGN, mem_idx);
1935 ov = cpu_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra);
1936 cc = !int128_eq(ov, cv);
1937 } else {
1938 /* Note that we asserted !parallel above. */
1939 g_assert_not_reached();
1940 }
1941
1942 env->regs[r3 + 0] = int128_gethi(ov);
1943 env->regs[r3 + 1] = int128_getlo(ov);
1944 }
1945 break;
1946
1947 default:
1948 g_assert_not_reached();
1949 }
1950
1951 /* Store only if the comparison succeeded. Note that above we use a pair
1952 of 64-bit big-endian loads, so for sc < 3 we must extract the value
1953 from the most-significant bits of svh. */
1954 if (cc == 0) {
1955 switch (sc) {
1956 case 0:
1957 cpu_stb_data_ra(env, a2, svh >> 56, ra);
1958 break;
1959 case 1:
1960 cpu_stw_data_ra(env, a2, svh >> 48, ra);
1961 break;
1962 case 2:
1963 cpu_stl_data_ra(env, a2, svh >> 32, ra);
1964 break;
1965 case 3:
1966 cpu_stq_data_ra(env, a2, svh, ra);
1967 break;
1968 case 4:
1969 if (!parallel) {
1970 cpu_stq_data_ra(env, a2 + 0, svh, ra);
1971 cpu_stq_data_ra(env, a2 + 8, svl, ra);
1972 } else if (HAVE_ATOMIC128) {
1973 MemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
1974 Int128 sv = int128_make128(svl, svh);
1975 cpu_atomic_sto_be_mmu(env, a2, sv, oi, ra);
1976 } else {
1977 /* Note that we asserted !parallel above. */
1978 g_assert_not_reached();
1979 }
1980 break;
1981 default:
1982 g_assert_not_reached();
1983 }
1984 }
1985
1986 return cc;
1987
1988 spec_exception:
1989 tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1990 }
1991
HELPER(csst)1992 uint32_t HELPER(csst)(CPUS390XState *env, uint32_t r3, uint64_t a1, uint64_t a2)
1993 {
1994 return do_csst(env, r3, a1, a2, false);
1995 }
1996
HELPER(csst_parallel)1997 uint32_t HELPER(csst_parallel)(CPUS390XState *env, uint32_t r3, uint64_t a1,
1998 uint64_t a2)
1999 {
2000 return do_csst(env, r3, a1, a2, true);
2001 }
2002
2003 #if !defined(CONFIG_USER_ONLY)
HELPER(lctlg)2004 void HELPER(lctlg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2005 {
2006 uintptr_t ra = GETPC();
2007 bool PERchanged = false;
2008 uint64_t src = a2;
2009 uint32_t i;
2010
2011 if (src & 0x7) {
2012 tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2013 }
2014
2015 for (i = r1;; i = (i + 1) % 16) {
2016 uint64_t val = cpu_ldq_data_ra(env, src, ra);
2017 if (env->cregs[i] != val && i >= 9 && i <= 11) {
2018 PERchanged = true;
2019 }
2020 env->cregs[i] = val;
2021 HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%" PRIx64 "\n",
2022 i, src, val);
2023 src += sizeof(uint64_t);
2024
2025 if (i == r3) {
2026 break;
2027 }
2028 }
2029
2030 if (PERchanged && env->psw.mask & PSW_MASK_PER) {
2031 s390_cpu_recompute_watchpoints(env_cpu(env));
2032 }
2033
2034 tlb_flush(env_cpu(env));
2035 }
2036
HELPER(lctl)2037 void HELPER(lctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2038 {
2039 uintptr_t ra = GETPC();
2040 bool PERchanged = false;
2041 uint64_t src = a2;
2042 uint32_t i;
2043
2044 if (src & 0x3) {
2045 tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2046 }
2047
2048 for (i = r1;; i = (i + 1) % 16) {
2049 uint32_t val = cpu_ldl_data_ra(env, src, ra);
2050 if ((uint32_t)env->cregs[i] != val && i >= 9 && i <= 11) {
2051 PERchanged = true;
2052 }
2053 env->cregs[i] = deposit64(env->cregs[i], 0, 32, val);
2054 HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%x\n", i, src, val);
2055 src += sizeof(uint32_t);
2056
2057 if (i == r3) {
2058 break;
2059 }
2060 }
2061
2062 if (PERchanged && env->psw.mask & PSW_MASK_PER) {
2063 s390_cpu_recompute_watchpoints(env_cpu(env));
2064 }
2065
2066 tlb_flush(env_cpu(env));
2067 }
2068
HELPER(stctg)2069 void HELPER(stctg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2070 {
2071 uintptr_t ra = GETPC();
2072 uint64_t dest = a2;
2073 uint32_t i;
2074
2075 if (dest & 0x7) {
2076 tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2077 }
2078
2079 for (i = r1;; i = (i + 1) % 16) {
2080 cpu_stq_data_ra(env, dest, env->cregs[i], ra);
2081 dest += sizeof(uint64_t);
2082
2083 if (i == r3) {
2084 break;
2085 }
2086 }
2087 }
2088
HELPER(stctl)2089 void HELPER(stctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2090 {
2091 uintptr_t ra = GETPC();
2092 uint64_t dest = a2;
2093 uint32_t i;
2094
2095 if (dest & 0x3) {
2096 tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2097 }
2098
2099 for (i = r1;; i = (i + 1) % 16) {
2100 cpu_stl_data_ra(env, dest, env->cregs[i], ra);
2101 dest += sizeof(uint32_t);
2102
2103 if (i == r3) {
2104 break;
2105 }
2106 }
2107 }
2108
HELPER(testblock)2109 uint32_t HELPER(testblock)(CPUS390XState *env, uint64_t real_addr)
2110 {
2111 uintptr_t ra = GETPC();
2112 int i;
2113
2114 real_addr = wrap_address(env, real_addr) & TARGET_PAGE_MASK;
2115
2116 for (i = 0; i < TARGET_PAGE_SIZE; i += 8) {
2117 cpu_stq_mmuidx_ra(env, real_addr + i, 0, MMU_REAL_IDX, ra);
2118 }
2119
2120 return 0;
2121 }
2122
HELPER(tprot)2123 uint32_t HELPER(tprot)(CPUS390XState *env, uint64_t a1, uint64_t a2)
2124 {
2125 S390CPU *cpu = env_archcpu(env);
2126 CPUState *cs = env_cpu(env);
2127
2128 /*
2129 * TODO: we currently don't handle all access protection types
2130 * (including access-list and key-controlled) as well as AR mode.
2131 */
2132 if (!s390_cpu_virt_mem_check_write(cpu, a1, 0, 1)) {
2133 /* Fetching permitted; storing permitted */
2134 return 0;
2135 }
2136
2137 if (env->int_pgm_code == PGM_PROTECTION) {
2138 /* retry if reading is possible */
2139 cs->exception_index = -1;
2140 if (!s390_cpu_virt_mem_check_read(cpu, a1, 0, 1)) {
2141 /* Fetching permitted; storing not permitted */
2142 return 1;
2143 }
2144 }
2145
2146 switch (env->int_pgm_code) {
2147 case PGM_PROTECTION:
2148 /* Fetching not permitted; storing not permitted */
2149 cs->exception_index = -1;
2150 return 2;
2151 case PGM_ADDRESSING:
2152 case PGM_TRANS_SPEC:
2153 /* exceptions forwarded to the guest */
2154 s390_cpu_virt_mem_handle_exc(cpu, GETPC());
2155 return 0;
2156 }
2157
2158 /* Translation not available */
2159 cs->exception_index = -1;
2160 return 3;
2161 }
2162
2163 /* insert storage key extended */
HELPER(iske)2164 uint64_t HELPER(iske)(CPUS390XState *env, uint64_t r2)
2165 {
2166 static S390SKeysState *ss;
2167 static S390SKeysClass *skeyclass;
2168 uint64_t addr = wrap_address(env, r2);
2169 uint8_t key;
2170 int rc;
2171
2172 addr = mmu_real2abs(env, addr);
2173 if (!mmu_absolute_addr_valid(addr, false)) {
2174 tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2175 }
2176
2177 if (unlikely(!ss)) {
2178 ss = s390_get_skeys_device();
2179 skeyclass = S390_SKEYS_GET_CLASS(ss);
2180 if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2181 tlb_flush_all_cpus_synced(env_cpu(env));
2182 }
2183 }
2184
2185 rc = skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2186 if (rc) {
2187 trace_get_skeys_nonzero(rc);
2188 return 0;
2189 }
2190 return key;
2191 }
2192
2193 /* set storage key extended */
HELPER(sske)2194 void HELPER(sske)(CPUS390XState *env, uint64_t r1, uint64_t r2)
2195 {
2196 static S390SKeysState *ss;
2197 static S390SKeysClass *skeyclass;
2198 uint64_t addr = wrap_address(env, r2);
2199 uint8_t key;
2200 int rc;
2201
2202 addr = mmu_real2abs(env, addr);
2203 if (!mmu_absolute_addr_valid(addr, false)) {
2204 tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2205 }
2206
2207 if (unlikely(!ss)) {
2208 ss = s390_get_skeys_device();
2209 skeyclass = S390_SKEYS_GET_CLASS(ss);
2210 if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2211 tlb_flush_all_cpus_synced(env_cpu(env));
2212 }
2213 }
2214
2215 key = r1 & 0xfe;
2216 rc = skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2217 if (rc) {
2218 trace_set_skeys_nonzero(rc);
2219 }
2220 /*
2221 * As we can only flush by virtual address and not all the entries
2222 * that point to a physical address we have to flush the whole TLB.
2223 */
2224 tlb_flush_all_cpus_synced(env_cpu(env));
2225 }
2226
2227 /* reset reference bit extended */
HELPER(rrbe)2228 uint32_t HELPER(rrbe)(CPUS390XState *env, uint64_t r2)
2229 {
2230 uint64_t addr = wrap_address(env, r2);
2231 static S390SKeysState *ss;
2232 static S390SKeysClass *skeyclass;
2233 uint8_t re, key;
2234 int rc;
2235
2236 addr = mmu_real2abs(env, addr);
2237 if (!mmu_absolute_addr_valid(addr, false)) {
2238 tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2239 }
2240
2241 if (unlikely(!ss)) {
2242 ss = s390_get_skeys_device();
2243 skeyclass = S390_SKEYS_GET_CLASS(ss);
2244 if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2245 tlb_flush_all_cpus_synced(env_cpu(env));
2246 }
2247 }
2248
2249 rc = skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2250 if (rc) {
2251 trace_get_skeys_nonzero(rc);
2252 return 0;
2253 }
2254
2255 re = key & (SK_R | SK_C);
2256 key &= ~SK_R;
2257
2258 rc = skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2259 if (rc) {
2260 trace_set_skeys_nonzero(rc);
2261 return 0;
2262 }
2263 /*
2264 * As we can only flush by virtual address and not all the entries
2265 * that point to a physical address we have to flush the whole TLB.
2266 */
2267 tlb_flush_all_cpus_synced(env_cpu(env));
2268
2269 /*
2270 * cc
2271 *
2272 * 0 Reference bit zero; change bit zero
2273 * 1 Reference bit zero; change bit one
2274 * 2 Reference bit one; change bit zero
2275 * 3 Reference bit one; change bit one
2276 */
2277
2278 return re >> 1;
2279 }
2280
HELPER(mvcs)2281 uint32_t HELPER(mvcs)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2)
2282 {
2283 const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2284 S390Access srca, desta;
2285 uintptr_t ra = GETPC();
2286 int cc = 0;
2287
2288 HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2289 __func__, l, a1, a2);
2290
2291 if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2292 psw_as == AS_HOME || psw_as == AS_ACCREG) {
2293 s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2294 }
2295
2296 l = wrap_length32(env, l);
2297 if (l > 256) {
2298 /* max 256 */
2299 l = 256;
2300 cc = 3;
2301 } else if (!l) {
2302 return cc;
2303 }
2304
2305 /* TODO: Access key handling */
2306 srca = access_prepare(env, a2, l, MMU_DATA_LOAD, MMU_PRIMARY_IDX, ra);
2307 desta = access_prepare(env, a1, l, MMU_DATA_STORE, MMU_SECONDARY_IDX, ra);
2308 access_memmove(env, &desta, &srca, ra);
2309 return cc;
2310 }
2311
HELPER(mvcp)2312 uint32_t HELPER(mvcp)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2)
2313 {
2314 const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2315 S390Access srca, desta;
2316 uintptr_t ra = GETPC();
2317 int cc = 0;
2318
2319 HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2320 __func__, l, a1, a2);
2321
2322 if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2323 psw_as == AS_HOME || psw_as == AS_ACCREG) {
2324 s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2325 }
2326
2327 l = wrap_length32(env, l);
2328 if (l > 256) {
2329 /* max 256 */
2330 l = 256;
2331 cc = 3;
2332 } else if (!l) {
2333 return cc;
2334 }
2335
2336 /* TODO: Access key handling */
2337 srca = access_prepare(env, a2, l, MMU_DATA_LOAD, MMU_SECONDARY_IDX, ra);
2338 desta = access_prepare(env, a1, l, MMU_DATA_STORE, MMU_PRIMARY_IDX, ra);
2339 access_memmove(env, &desta, &srca, ra);
2340 return cc;
2341 }
2342
HELPER(idte)2343 void HELPER(idte)(CPUS390XState *env, uint64_t r1, uint64_t r2, uint32_t m4)
2344 {
2345 CPUState *cs = env_cpu(env);
2346 const uintptr_t ra = GETPC();
2347 uint64_t table, entry, raddr;
2348 uint16_t entries, i, index = 0;
2349
2350 if (r2 & 0xff000) {
2351 tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2352 }
2353
2354 if (!(r2 & 0x800)) {
2355 /* invalidation-and-clearing operation */
2356 table = r1 & ASCE_ORIGIN;
2357 entries = (r2 & 0x7ff) + 1;
2358
2359 switch (r1 & ASCE_TYPE_MASK) {
2360 case ASCE_TYPE_REGION1:
2361 index = (r2 >> 53) & 0x7ff;
2362 break;
2363 case ASCE_TYPE_REGION2:
2364 index = (r2 >> 42) & 0x7ff;
2365 break;
2366 case ASCE_TYPE_REGION3:
2367 index = (r2 >> 31) & 0x7ff;
2368 break;
2369 case ASCE_TYPE_SEGMENT:
2370 index = (r2 >> 20) & 0x7ff;
2371 break;
2372 }
2373 for (i = 0; i < entries; i++) {
2374 /* addresses are not wrapped in 24/31bit mode but table index is */
2375 raddr = table + ((index + i) & 0x7ff) * sizeof(entry);
2376 entry = cpu_ldq_mmuidx_ra(env, raddr, MMU_REAL_IDX, ra);
2377 if (!(entry & REGION_ENTRY_I)) {
2378 /* we are allowed to not store if already invalid */
2379 entry |= REGION_ENTRY_I;
2380 cpu_stq_mmuidx_ra(env, raddr, entry, MMU_REAL_IDX, ra);
2381 }
2382 }
2383 }
2384
2385 /* We simply flush the complete tlb, therefore we can ignore r3. */
2386 if (m4 & 1) {
2387 tlb_flush(cs);
2388 } else {
2389 tlb_flush_all_cpus_synced(cs);
2390 }
2391 }
2392
2393 /* invalidate pte */
HELPER(ipte)2394 void HELPER(ipte)(CPUS390XState *env, uint64_t pto, uint64_t vaddr,
2395 uint32_t m4)
2396 {
2397 CPUState *cs = env_cpu(env);
2398 const uintptr_t ra = GETPC();
2399 uint64_t page = vaddr & TARGET_PAGE_MASK;
2400 uint64_t pte_addr, pte;
2401
2402 /* Compute the page table entry address */
2403 pte_addr = (pto & SEGMENT_ENTRY_ORIGIN);
2404 pte_addr += VADDR_PAGE_TX(vaddr) * 8;
2405
2406 /* Mark the page table entry as invalid */
2407 pte = cpu_ldq_mmuidx_ra(env, pte_addr, MMU_REAL_IDX, ra);
2408 pte |= PAGE_ENTRY_I;
2409 cpu_stq_mmuidx_ra(env, pte_addr, pte, MMU_REAL_IDX, ra);
2410
2411 /* XXX we exploit the fact that Linux passes the exact virtual
2412 address here - it's not obliged to! */
2413 if (m4 & 1) {
2414 if (vaddr & ~VADDR_PAGE_TX_MASK) {
2415 tlb_flush_page(cs, page);
2416 /* XXX 31-bit hack */
2417 tlb_flush_page(cs, page ^ 0x80000000);
2418 } else {
2419 /* looks like we don't have a valid virtual address */
2420 tlb_flush(cs);
2421 }
2422 } else {
2423 if (vaddr & ~VADDR_PAGE_TX_MASK) {
2424 tlb_flush_page_all_cpus_synced(cs, page);
2425 /* XXX 31-bit hack */
2426 tlb_flush_page_all_cpus_synced(cs, page ^ 0x80000000);
2427 } else {
2428 /* looks like we don't have a valid virtual address */
2429 tlb_flush_all_cpus_synced(cs);
2430 }
2431 }
2432 }
2433
2434 /* flush local tlb */
HELPER(ptlb)2435 void HELPER(ptlb)(CPUS390XState *env)
2436 {
2437 tlb_flush(env_cpu(env));
2438 }
2439
2440 /* flush global tlb */
HELPER(purge)2441 void HELPER(purge)(CPUS390XState *env)
2442 {
2443 tlb_flush_all_cpus_synced(env_cpu(env));
2444 }
2445
2446 /* load real address */
HELPER(lra)2447 uint64_t HELPER(lra)(CPUS390XState *env, uint64_t addr)
2448 {
2449 uint64_t asc = env->psw.mask & PSW_MASK_ASC;
2450 uint64_t ret, tec;
2451 int flags, exc, cc;
2452
2453 /* XXX incomplete - has more corner cases */
2454 if (!(env->psw.mask & PSW_MASK_64) && (addr >> 32)) {
2455 tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, GETPC());
2456 }
2457
2458 exc = mmu_translate(env, addr, MMU_S390_LRA, asc, &ret, &flags, &tec);
2459 if (exc) {
2460 cc = 3;
2461 ret = exc | 0x80000000;
2462 } else {
2463 cc = 0;
2464 ret |= addr & ~TARGET_PAGE_MASK;
2465 }
2466
2467 env->cc_op = cc;
2468 return ret;
2469 }
2470 #endif
2471
2472 /* load pair from quadword */
HELPER(lpq)2473 uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr)
2474 {
2475 uintptr_t ra = GETPC();
2476 uint64_t hi, lo;
2477
2478 check_alignment(env, addr, 16, ra);
2479 hi = cpu_ldq_data_ra(env, addr + 0, ra);
2480 lo = cpu_ldq_data_ra(env, addr + 8, ra);
2481
2482 env->retxl = lo;
2483 return hi;
2484 }
2485
HELPER(lpq_parallel)2486 uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr)
2487 {
2488 uintptr_t ra = GETPC();
2489 uint64_t hi, lo;
2490 int mem_idx;
2491 MemOpIdx oi;
2492 Int128 v;
2493
2494 assert(HAVE_ATOMIC128);
2495
2496 mem_idx = cpu_mmu_index(env, false);
2497 oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
2498 v = cpu_atomic_ldo_be_mmu(env, addr, oi, ra);
2499 hi = int128_gethi(v);
2500 lo = int128_getlo(v);
2501
2502 env->retxl = lo;
2503 return hi;
2504 }
2505
2506 /* store pair to quadword */
HELPER(stpq)2507 void HELPER(stpq)(CPUS390XState *env, uint64_t addr,
2508 uint64_t low, uint64_t high)
2509 {
2510 uintptr_t ra = GETPC();
2511
2512 check_alignment(env, addr, 16, ra);
2513 cpu_stq_data_ra(env, addr + 0, high, ra);
2514 cpu_stq_data_ra(env, addr + 8, low, ra);
2515 }
2516
HELPER(stpq_parallel)2517 void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr,
2518 uint64_t low, uint64_t high)
2519 {
2520 uintptr_t ra = GETPC();
2521 int mem_idx;
2522 MemOpIdx oi;
2523 Int128 v;
2524
2525 assert(HAVE_ATOMIC128);
2526
2527 mem_idx = cpu_mmu_index(env, false);
2528 oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
2529 v = int128_make128(low, high);
2530 cpu_atomic_sto_be_mmu(env, addr, v, oi, ra);
2531 }
2532
2533 /* Execute instruction. This instruction executes an insn modified with
2534 the contents of r1. It does not change the executed instruction in memory;
2535 it does not change the program counter.
2536
2537 Perform this by recording the modified instruction in env->ex_value.
2538 This will be noticed by cpu_get_tb_cpu_state and thus tb translation.
2539 */
HELPER(ex)2540 void HELPER(ex)(CPUS390XState *env, uint32_t ilen, uint64_t r1, uint64_t addr)
2541 {
2542 uint64_t insn = cpu_lduw_code(env, addr);
2543 uint8_t opc = insn >> 8;
2544
2545 /* Or in the contents of R1[56:63]. */
2546 insn |= r1 & 0xff;
2547
2548 /* Load the rest of the instruction. */
2549 insn <<= 48;
2550 switch (get_ilen(opc)) {
2551 case 2:
2552 break;
2553 case 4:
2554 insn |= (uint64_t)cpu_lduw_code(env, addr + 2) << 32;
2555 break;
2556 case 6:
2557 insn |= (uint64_t)(uint32_t)cpu_ldl_code(env, addr + 2) << 16;
2558 break;
2559 default:
2560 g_assert_not_reached();
2561 }
2562
2563 /* The very most common cases can be sped up by avoiding a new TB. */
2564 if ((opc & 0xf0) == 0xd0) {
2565 typedef uint32_t (*dx_helper)(CPUS390XState *, uint32_t, uint64_t,
2566 uint64_t, uintptr_t);
2567 static const dx_helper dx[16] = {
2568 [0x0] = do_helper_trt_bkwd,
2569 [0x2] = do_helper_mvc,
2570 [0x4] = do_helper_nc,
2571 [0x5] = do_helper_clc,
2572 [0x6] = do_helper_oc,
2573 [0x7] = do_helper_xc,
2574 [0xc] = do_helper_tr,
2575 [0xd] = do_helper_trt_fwd,
2576 };
2577 dx_helper helper = dx[opc & 0xf];
2578
2579 if (helper) {
2580 uint32_t l = extract64(insn, 48, 8);
2581 uint32_t b1 = extract64(insn, 44, 4);
2582 uint32_t d1 = extract64(insn, 32, 12);
2583 uint32_t b2 = extract64(insn, 28, 4);
2584 uint32_t d2 = extract64(insn, 16, 12);
2585 uint64_t a1 = wrap_address(env, (b1 ? env->regs[b1] : 0) + d1);
2586 uint64_t a2 = wrap_address(env, (b2 ? env->regs[b2] : 0) + d2);
2587
2588 env->cc_op = helper(env, l, a1, a2, 0);
2589 env->psw.addr += ilen;
2590 return;
2591 }
2592 } else if (opc == 0x0a) {
2593 env->int_svc_code = extract64(insn, 48, 8);
2594 env->int_svc_ilen = ilen;
2595 helper_exception(env, EXCP_SVC);
2596 g_assert_not_reached();
2597 }
2598
2599 /* Record the insn we want to execute as well as the ilen to use
2600 during the execution of the target insn. This will also ensure
2601 that ex_value is non-zero, which flags that we are in a state
2602 that requires such execution. */
2603 env->ex_value = insn | ilen;
2604 }
2605
HELPER(mvcos)2606 uint32_t HELPER(mvcos)(CPUS390XState *env, uint64_t dest, uint64_t src,
2607 uint64_t len)
2608 {
2609 const uint8_t psw_key = (env->psw.mask & PSW_MASK_KEY) >> PSW_SHIFT_KEY;
2610 const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2611 const uint64_t r0 = env->regs[0];
2612 const uintptr_t ra = GETPC();
2613 uint8_t dest_key, dest_as, dest_k, dest_a;
2614 uint8_t src_key, src_as, src_k, src_a;
2615 uint64_t val;
2616 int cc = 0;
2617
2618 HELPER_LOG("%s dest %" PRIx64 ", src %" PRIx64 ", len %" PRIx64 "\n",
2619 __func__, dest, src, len);
2620
2621 if (!(env->psw.mask & PSW_MASK_DAT)) {
2622 tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2623 }
2624
2625 /* OAC (operand access control) for the first operand -> dest */
2626 val = (r0 & 0xffff0000ULL) >> 16;
2627 dest_key = (val >> 12) & 0xf;
2628 dest_as = (val >> 6) & 0x3;
2629 dest_k = (val >> 1) & 0x1;
2630 dest_a = val & 0x1;
2631
2632 /* OAC (operand access control) for the second operand -> src */
2633 val = (r0 & 0x0000ffffULL);
2634 src_key = (val >> 12) & 0xf;
2635 src_as = (val >> 6) & 0x3;
2636 src_k = (val >> 1) & 0x1;
2637 src_a = val & 0x1;
2638
2639 if (!dest_k) {
2640 dest_key = psw_key;
2641 }
2642 if (!src_k) {
2643 src_key = psw_key;
2644 }
2645 if (!dest_a) {
2646 dest_as = psw_as;
2647 }
2648 if (!src_a) {
2649 src_as = psw_as;
2650 }
2651
2652 if (dest_a && dest_as == AS_HOME && (env->psw.mask & PSW_MASK_PSTATE)) {
2653 tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2654 }
2655 if (!(env->cregs[0] & CR0_SECONDARY) &&
2656 (dest_as == AS_SECONDARY || src_as == AS_SECONDARY)) {
2657 tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2658 }
2659 if (!psw_key_valid(env, dest_key) || !psw_key_valid(env, src_key)) {
2660 tcg_s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2661 }
2662
2663 len = wrap_length32(env, len);
2664 if (len > 4096) {
2665 cc = 3;
2666 len = 4096;
2667 }
2668
2669 /* FIXME: AR-mode and proper problem state mode (using PSW keys) missing */
2670 if (src_as == AS_ACCREG || dest_as == AS_ACCREG ||
2671 (env->psw.mask & PSW_MASK_PSTATE)) {
2672 qemu_log_mask(LOG_UNIMP, "%s: AR-mode and PSTATE support missing\n",
2673 __func__);
2674 tcg_s390_program_interrupt(env, PGM_ADDRESSING, ra);
2675 }
2676
2677 /* FIXME: Access using correct keys and AR-mode */
2678 if (len) {
2679 S390Access srca = access_prepare(env, src, len, MMU_DATA_LOAD,
2680 mmu_idx_from_as(src_as), ra);
2681 S390Access desta = access_prepare(env, dest, len, MMU_DATA_STORE,
2682 mmu_idx_from_as(dest_as), ra);
2683
2684 access_memmove(env, &desta, &srca, ra);
2685 }
2686
2687 return cc;
2688 }
2689
2690 /* Decode a Unicode character. A return value < 0 indicates success, storing
2691 the UTF-32 result into OCHAR and the input length into OLEN. A return
2692 value >= 0 indicates failure, and the CC value to be returned. */
2693 typedef int (*decode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2694 uint64_t ilen, bool enh_check, uintptr_t ra,
2695 uint32_t *ochar, uint32_t *olen);
2696
2697 /* Encode a Unicode character. A return value < 0 indicates success, storing
2698 the bytes into ADDR and the output length into OLEN. A return value >= 0
2699 indicates failure, and the CC value to be returned. */
2700 typedef int (*encode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2701 uint64_t ilen, uintptr_t ra, uint32_t c,
2702 uint32_t *olen);
2703
decode_utf8(CPUS390XState * env,uint64_t addr,uint64_t ilen,bool enh_check,uintptr_t ra,uint32_t * ochar,uint32_t * olen)2704 static int decode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2705 bool enh_check, uintptr_t ra,
2706 uint32_t *ochar, uint32_t *olen)
2707 {
2708 uint8_t s0, s1, s2, s3;
2709 uint32_t c, l;
2710
2711 if (ilen < 1) {
2712 return 0;
2713 }
2714 s0 = cpu_ldub_data_ra(env, addr, ra);
2715 if (s0 <= 0x7f) {
2716 /* one byte character */
2717 l = 1;
2718 c = s0;
2719 } else if (s0 <= (enh_check ? 0xc1 : 0xbf)) {
2720 /* invalid character */
2721 return 2;
2722 } else if (s0 <= 0xdf) {
2723 /* two byte character */
2724 l = 2;
2725 if (ilen < 2) {
2726 return 0;
2727 }
2728 s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2729 c = s0 & 0x1f;
2730 c = (c << 6) | (s1 & 0x3f);
2731 if (enh_check && (s1 & 0xc0) != 0x80) {
2732 return 2;
2733 }
2734 } else if (s0 <= 0xef) {
2735 /* three byte character */
2736 l = 3;
2737 if (ilen < 3) {
2738 return 0;
2739 }
2740 s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2741 s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2742 c = s0 & 0x0f;
2743 c = (c << 6) | (s1 & 0x3f);
2744 c = (c << 6) | (s2 & 0x3f);
2745 /* Fold the byte-by-byte range descriptions in the PoO into
2746 tests against the complete value. It disallows encodings
2747 that could be smaller, and the UTF-16 surrogates. */
2748 if (enh_check
2749 && ((s1 & 0xc0) != 0x80
2750 || (s2 & 0xc0) != 0x80
2751 || c < 0x1000
2752 || (c >= 0xd800 && c <= 0xdfff))) {
2753 return 2;
2754 }
2755 } else if (s0 <= (enh_check ? 0xf4 : 0xf7)) {
2756 /* four byte character */
2757 l = 4;
2758 if (ilen < 4) {
2759 return 0;
2760 }
2761 s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2762 s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2763 s3 = cpu_ldub_data_ra(env, addr + 3, ra);
2764 c = s0 & 0x07;
2765 c = (c << 6) | (s1 & 0x3f);
2766 c = (c << 6) | (s2 & 0x3f);
2767 c = (c << 6) | (s3 & 0x3f);
2768 /* See above. */
2769 if (enh_check
2770 && ((s1 & 0xc0) != 0x80
2771 || (s2 & 0xc0) != 0x80
2772 || (s3 & 0xc0) != 0x80
2773 || c < 0x010000
2774 || c > 0x10ffff)) {
2775 return 2;
2776 }
2777 } else {
2778 /* invalid character */
2779 return 2;
2780 }
2781
2782 *ochar = c;
2783 *olen = l;
2784 return -1;
2785 }
2786
decode_utf16(CPUS390XState * env,uint64_t addr,uint64_t ilen,bool enh_check,uintptr_t ra,uint32_t * ochar,uint32_t * olen)2787 static int decode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2788 bool enh_check, uintptr_t ra,
2789 uint32_t *ochar, uint32_t *olen)
2790 {
2791 uint16_t s0, s1;
2792 uint32_t c, l;
2793
2794 if (ilen < 2) {
2795 return 0;
2796 }
2797 s0 = cpu_lduw_data_ra(env, addr, ra);
2798 if ((s0 & 0xfc00) != 0xd800) {
2799 /* one word character */
2800 l = 2;
2801 c = s0;
2802 } else {
2803 /* two word character */
2804 l = 4;
2805 if (ilen < 4) {
2806 return 0;
2807 }
2808 s1 = cpu_lduw_data_ra(env, addr + 2, ra);
2809 c = extract32(s0, 6, 4) + 1;
2810 c = (c << 6) | (s0 & 0x3f);
2811 c = (c << 10) | (s1 & 0x3ff);
2812 if (enh_check && (s1 & 0xfc00) != 0xdc00) {
2813 /* invalid surrogate character */
2814 return 2;
2815 }
2816 }
2817
2818 *ochar = c;
2819 *olen = l;
2820 return -1;
2821 }
2822
decode_utf32(CPUS390XState * env,uint64_t addr,uint64_t ilen,bool enh_check,uintptr_t ra,uint32_t * ochar,uint32_t * olen)2823 static int decode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2824 bool enh_check, uintptr_t ra,
2825 uint32_t *ochar, uint32_t *olen)
2826 {
2827 uint32_t c;
2828
2829 if (ilen < 4) {
2830 return 0;
2831 }
2832 c = cpu_ldl_data_ra(env, addr, ra);
2833 if ((c >= 0xd800 && c <= 0xdbff) || c > 0x10ffff) {
2834 /* invalid unicode character */
2835 return 2;
2836 }
2837
2838 *ochar = c;
2839 *olen = 4;
2840 return -1;
2841 }
2842
encode_utf8(CPUS390XState * env,uint64_t addr,uint64_t ilen,uintptr_t ra,uint32_t c,uint32_t * olen)2843 static int encode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2844 uintptr_t ra, uint32_t c, uint32_t *olen)
2845 {
2846 uint8_t d[4];
2847 uint32_t l, i;
2848
2849 if (c <= 0x7f) {
2850 /* one byte character */
2851 l = 1;
2852 d[0] = c;
2853 } else if (c <= 0x7ff) {
2854 /* two byte character */
2855 l = 2;
2856 d[1] = 0x80 | extract32(c, 0, 6);
2857 d[0] = 0xc0 | extract32(c, 6, 5);
2858 } else if (c <= 0xffff) {
2859 /* three byte character */
2860 l = 3;
2861 d[2] = 0x80 | extract32(c, 0, 6);
2862 d[1] = 0x80 | extract32(c, 6, 6);
2863 d[0] = 0xe0 | extract32(c, 12, 4);
2864 } else {
2865 /* four byte character */
2866 l = 4;
2867 d[3] = 0x80 | extract32(c, 0, 6);
2868 d[2] = 0x80 | extract32(c, 6, 6);
2869 d[1] = 0x80 | extract32(c, 12, 6);
2870 d[0] = 0xf0 | extract32(c, 18, 3);
2871 }
2872
2873 if (ilen < l) {
2874 return 1;
2875 }
2876 for (i = 0; i < l; ++i) {
2877 cpu_stb_data_ra(env, addr + i, d[i], ra);
2878 }
2879
2880 *olen = l;
2881 return -1;
2882 }
2883
encode_utf16(CPUS390XState * env,uint64_t addr,uint64_t ilen,uintptr_t ra,uint32_t c,uint32_t * olen)2884 static int encode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2885 uintptr_t ra, uint32_t c, uint32_t *olen)
2886 {
2887 uint16_t d0, d1;
2888
2889 if (c <= 0xffff) {
2890 /* one word character */
2891 if (ilen < 2) {
2892 return 1;
2893 }
2894 cpu_stw_data_ra(env, addr, c, ra);
2895 *olen = 2;
2896 } else {
2897 /* two word character */
2898 if (ilen < 4) {
2899 return 1;
2900 }
2901 d1 = 0xdc00 | extract32(c, 0, 10);
2902 d0 = 0xd800 | extract32(c, 10, 6);
2903 d0 = deposit32(d0, 6, 4, extract32(c, 16, 5) - 1);
2904 cpu_stw_data_ra(env, addr + 0, d0, ra);
2905 cpu_stw_data_ra(env, addr + 2, d1, ra);
2906 *olen = 4;
2907 }
2908
2909 return -1;
2910 }
2911
encode_utf32(CPUS390XState * env,uint64_t addr,uint64_t ilen,uintptr_t ra,uint32_t c,uint32_t * olen)2912 static int encode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2913 uintptr_t ra, uint32_t c, uint32_t *olen)
2914 {
2915 if (ilen < 4) {
2916 return 1;
2917 }
2918 cpu_stl_data_ra(env, addr, c, ra);
2919 *olen = 4;
2920 return -1;
2921 }
2922
convert_unicode(CPUS390XState * env,uint32_t r1,uint32_t r2,uint32_t m3,uintptr_t ra,decode_unicode_fn decode,encode_unicode_fn encode)2923 static inline uint32_t convert_unicode(CPUS390XState *env, uint32_t r1,
2924 uint32_t r2, uint32_t m3, uintptr_t ra,
2925 decode_unicode_fn decode,
2926 encode_unicode_fn encode)
2927 {
2928 uint64_t dst = get_address(env, r1);
2929 uint64_t dlen = get_length(env, r1 + 1);
2930 uint64_t src = get_address(env, r2);
2931 uint64_t slen = get_length(env, r2 + 1);
2932 bool enh_check = m3 & 1;
2933 int cc, i;
2934
2935 /* Lest we fail to service interrupts in a timely manner, limit the
2936 amount of work we're willing to do. For now, let's cap at 256. */
2937 for (i = 0; i < 256; ++i) {
2938 uint32_t c, ilen, olen;
2939
2940 cc = decode(env, src, slen, enh_check, ra, &c, &ilen);
2941 if (unlikely(cc >= 0)) {
2942 break;
2943 }
2944 cc = encode(env, dst, dlen, ra, c, &olen);
2945 if (unlikely(cc >= 0)) {
2946 break;
2947 }
2948
2949 src += ilen;
2950 slen -= ilen;
2951 dst += olen;
2952 dlen -= olen;
2953 cc = 3;
2954 }
2955
2956 set_address(env, r1, dst);
2957 set_length(env, r1 + 1, dlen);
2958 set_address(env, r2, src);
2959 set_length(env, r2 + 1, slen);
2960
2961 return cc;
2962 }
2963
HELPER(cu12)2964 uint32_t HELPER(cu12)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2965 {
2966 return convert_unicode(env, r1, r2, m3, GETPC(),
2967 decode_utf8, encode_utf16);
2968 }
2969
HELPER(cu14)2970 uint32_t HELPER(cu14)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2971 {
2972 return convert_unicode(env, r1, r2, m3, GETPC(),
2973 decode_utf8, encode_utf32);
2974 }
2975
HELPER(cu21)2976 uint32_t HELPER(cu21)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2977 {
2978 return convert_unicode(env, r1, r2, m3, GETPC(),
2979 decode_utf16, encode_utf8);
2980 }
2981
HELPER(cu24)2982 uint32_t HELPER(cu24)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2983 {
2984 return convert_unicode(env, r1, r2, m3, GETPC(),
2985 decode_utf16, encode_utf32);
2986 }
2987
HELPER(cu41)2988 uint32_t HELPER(cu41)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2989 {
2990 return convert_unicode(env, r1, r2, m3, GETPC(),
2991 decode_utf32, encode_utf8);
2992 }
2993
HELPER(cu42)2994 uint32_t HELPER(cu42)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2995 {
2996 return convert_unicode(env, r1, r2, m3, GETPC(),
2997 decode_utf32, encode_utf16);
2998 }
2999
probe_write_access(CPUS390XState * env,uint64_t addr,uint64_t len,uintptr_t ra)3000 void probe_write_access(CPUS390XState *env, uint64_t addr, uint64_t len,
3001 uintptr_t ra)
3002 {
3003 /* test the actual access, not just any access to the page due to LAP */
3004 while (len) {
3005 const uint64_t pagelen = -(addr | TARGET_PAGE_MASK);
3006 const uint64_t curlen = MIN(pagelen, len);
3007
3008 probe_write(env, addr, curlen, cpu_mmu_index(env, false), ra);
3009 addr = wrap_address(env, addr + curlen);
3010 len -= curlen;
3011 }
3012 }
3013
HELPER(probe_write_access)3014 void HELPER(probe_write_access)(CPUS390XState *env, uint64_t addr, uint64_t len)
3015 {
3016 probe_write_access(env, addr, len, GETPC());
3017 }
3018