1 /* 2 * PowerPC integer and vector emulation helpers for QEMU. 3 * 4 * Copyright (c) 2003-2007 Jocelyn Mayer 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "cpu.h" 22 #include "internal.h" 23 #include "qemu/host-utils.h" 24 #include "qemu/log.h" 25 #include "exec/helper-proto.h" 26 #include "crypto/aes.h" 27 #include "crypto/aes-round.h" 28 #include "crypto/clmul.h" 29 #include "fpu/softfloat.h" 30 #include "qapi/error.h" 31 #include "qemu/guest-random.h" 32 #include "tcg/tcg-gvec-desc.h" 33 34 #include "helper_regs.h" 35 /*****************************************************************************/ 36 /* Fixed point operations helpers */ 37 38 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov) 39 { 40 if (unlikely(ov)) { 41 env->so = env->ov = env->ov32 = 1; 42 } else { 43 env->ov = env->ov32 = 0; 44 } 45 } 46 47 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb, 48 uint32_t oe) 49 { 50 uint64_t rt = 0; 51 int overflow = 0; 52 53 uint64_t dividend = (uint64_t)ra << 32; 54 uint64_t divisor = (uint32_t)rb; 55 56 if (unlikely(divisor == 0)) { 57 overflow = 1; 58 } else { 59 rt = dividend / divisor; 60 overflow = rt > UINT32_MAX; 61 } 62 63 if (unlikely(overflow)) { 64 rt = 0; /* Undefined */ 65 } 66 67 if (oe) { 68 helper_update_ov_legacy(env, overflow); 69 } 70 71 return (target_ulong)rt; 72 } 73 74 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb, 75 uint32_t oe) 76 { 77 int64_t rt = 0; 78 int overflow = 0; 79 80 int64_t dividend = (int64_t)ra << 32; 81 int64_t divisor = (int64_t)((int32_t)rb); 82 83 if (unlikely((divisor == 0) || 84 ((divisor == -1ull) && (dividend == INT64_MIN)))) { 85 overflow = 1; 86 } else { 87 rt = dividend / divisor; 88 overflow = rt != (int32_t)rt; 89 } 90 91 if (unlikely(overflow)) { 92 rt = 0; /* Undefined */ 93 } 94 95 if (oe) { 96 helper_update_ov_legacy(env, overflow); 97 } 98 99 return (target_ulong)rt; 100 } 101 102 #if defined(TARGET_PPC64) 103 104 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe) 105 { 106 uint64_t rt = 0; 107 int overflow = 0; 108 109 if (unlikely(rb == 0 || ra >= rb)) { 110 overflow = 1; 111 rt = 0; /* Undefined */ 112 } else { 113 divu128(&rt, &ra, rb); 114 } 115 116 if (oe) { 117 helper_update_ov_legacy(env, overflow); 118 } 119 120 return rt; 121 } 122 123 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe) 124 { 125 uint64_t rt = 0; 126 int64_t ra = (int64_t)rau; 127 int64_t rb = (int64_t)rbu; 128 int overflow = 0; 129 130 if (unlikely(rb == 0 || uabs64(ra) >= uabs64(rb))) { 131 overflow = 1; 132 rt = 0; /* Undefined */ 133 } else { 134 divs128(&rt, &ra, rb); 135 } 136 137 if (oe) { 138 helper_update_ov_legacy(env, overflow); 139 } 140 141 return rt; 142 } 143 144 #endif 145 146 147 #if defined(TARGET_PPC64) 148 /* if x = 0xab, returns 0xababababababababa */ 149 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff)) 150 151 /* 152 * subtract 1 from each byte, and with inverse, check if MSB is set at each 153 * byte. 154 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80 155 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found) 156 */ 157 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80)) 158 159 /* When you XOR the pattern and there is a match, that byte will be zero */ 160 #define hasvalue(x, n) (haszero((x) ^ pattern(n))) 161 162 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb) 163 { 164 return hasvalue(rb, ra) ? CRF_GT : 0; 165 } 166 167 #undef pattern 168 #undef haszero 169 #undef hasvalue 170 171 /* 172 * Return a random number. 173 */ 174 uint64_t helper_darn32(void) 175 { 176 Error *err = NULL; 177 uint32_t ret; 178 179 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 180 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 181 error_get_pretty(err)); 182 error_free(err); 183 return -1; 184 } 185 186 return ret; 187 } 188 189 uint64_t helper_darn64(void) 190 { 191 Error *err = NULL; 192 uint64_t ret; 193 194 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 195 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 196 error_get_pretty(err)); 197 error_free(err); 198 return -1; 199 } 200 201 return ret; 202 } 203 204 uint64_t helper_bpermd(uint64_t rs, uint64_t rb) 205 { 206 int i; 207 uint64_t ra = 0; 208 209 for (i = 0; i < 8; i++) { 210 int index = (rs >> (i * 8)) & 0xFF; 211 if (index < 64) { 212 if (rb & PPC_BIT(index)) { 213 ra |= 1 << i; 214 } 215 } 216 } 217 return ra; 218 } 219 220 #endif 221 222 target_ulong helper_cmpb(target_ulong rs, target_ulong rb) 223 { 224 target_ulong mask = 0xff; 225 target_ulong ra = 0; 226 int i; 227 228 for (i = 0; i < sizeof(target_ulong); i++) { 229 if ((rs & mask) == (rb & mask)) { 230 ra |= mask; 231 } 232 mask <<= 8; 233 } 234 return ra; 235 } 236 237 /* shift right arithmetic helper */ 238 target_ulong helper_sraw(CPUPPCState *env, target_ulong value, 239 target_ulong shift) 240 { 241 int32_t ret; 242 243 if (likely(!(shift & 0x20))) { 244 if (likely((uint32_t)shift != 0)) { 245 shift &= 0x1f; 246 ret = (int32_t)value >> shift; 247 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) { 248 env->ca32 = env->ca = 0; 249 } else { 250 env->ca32 = env->ca = 1; 251 } 252 } else { 253 ret = (int32_t)value; 254 env->ca32 = env->ca = 0; 255 } 256 } else { 257 ret = (int32_t)value >> 31; 258 env->ca32 = env->ca = (ret != 0); 259 } 260 return (target_long)ret; 261 } 262 263 #if defined(TARGET_PPC64) 264 target_ulong helper_srad(CPUPPCState *env, target_ulong value, 265 target_ulong shift) 266 { 267 int64_t ret; 268 269 if (likely(!(shift & 0x40))) { 270 if (likely((uint64_t)shift != 0)) { 271 shift &= 0x3f; 272 ret = (int64_t)value >> shift; 273 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) { 274 env->ca32 = env->ca = 0; 275 } else { 276 env->ca32 = env->ca = 1; 277 } 278 } else { 279 ret = (int64_t)value; 280 env->ca32 = env->ca = 0; 281 } 282 } else { 283 ret = (int64_t)value >> 63; 284 env->ca32 = env->ca = (ret != 0); 285 } 286 return ret; 287 } 288 #endif 289 290 #if defined(TARGET_PPC64) 291 target_ulong helper_popcntb(target_ulong val) 292 { 293 /* Note that we don't fold past bytes */ 294 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 295 0x5555555555555555ULL); 296 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 297 0x3333333333333333ULL); 298 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 299 0x0f0f0f0f0f0f0f0fULL); 300 return val; 301 } 302 303 target_ulong helper_popcntw(target_ulong val) 304 { 305 /* Note that we don't fold past words. */ 306 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 307 0x5555555555555555ULL); 308 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 309 0x3333333333333333ULL); 310 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 311 0x0f0f0f0f0f0f0f0fULL); 312 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) & 313 0x00ff00ff00ff00ffULL); 314 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) & 315 0x0000ffff0000ffffULL); 316 return val; 317 } 318 #else 319 target_ulong helper_popcntb(target_ulong val) 320 { 321 /* Note that we don't fold past bytes */ 322 val = (val & 0x55555555) + ((val >> 1) & 0x55555555); 323 val = (val & 0x33333333) + ((val >> 2) & 0x33333333); 324 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f); 325 return val; 326 } 327 #endif 328 329 uint64_t helper_CFUGED(uint64_t src, uint64_t mask) 330 { 331 /* 332 * Instead of processing the mask bit-by-bit from the most significant to 333 * the least significant bit, as described in PowerISA, we'll handle it in 334 * blocks of 'n' zeros/ones from LSB to MSB. To avoid the decision to use 335 * ctz or cto, we negate the mask at the end of the loop. 336 */ 337 target_ulong m, left = 0, right = 0; 338 unsigned int n, i = 64; 339 bool bit = false; /* tracks if we are processing zeros or ones */ 340 341 if (mask == 0 || mask == -1) { 342 return src; 343 } 344 345 /* Processes the mask in blocks, from LSB to MSB */ 346 while (i) { 347 /* Find how many bits we should take */ 348 n = ctz64(mask); 349 if (n > i) { 350 n = i; 351 } 352 353 /* 354 * Extracts 'n' trailing bits of src and put them on the leading 'n' 355 * bits of 'right' or 'left', pushing down the previously extracted 356 * values. 357 */ 358 m = (1ll << n) - 1; 359 if (bit) { 360 right = ror64(right | (src & m), n); 361 } else { 362 left = ror64(left | (src & m), n); 363 } 364 365 /* 366 * Discards the processed bits from 'src' and 'mask'. Note that we are 367 * removing 'n' trailing zeros from 'mask', but the logical shift will 368 * add 'n' leading zeros back, so the population count of 'mask' is kept 369 * the same. 370 */ 371 src >>= n; 372 mask >>= n; 373 i -= n; 374 bit = !bit; 375 mask = ~mask; 376 } 377 378 /* 379 * At the end, right was ror'ed ctpop(mask) times. To put it back in place, 380 * we'll shift it more 64-ctpop(mask) times. 381 */ 382 if (bit) { 383 n = ctpop64(mask); 384 } else { 385 n = 64 - ctpop64(mask); 386 } 387 388 return left | (right >> n); 389 } 390 391 uint64_t helper_PDEPD(uint64_t src, uint64_t mask) 392 { 393 int i, o; 394 uint64_t result = 0; 395 396 if (mask == -1) { 397 return src; 398 } 399 400 for (i = 0; mask != 0; i++) { 401 o = ctz64(mask); 402 mask &= mask - 1; 403 result |= ((src >> i) & 1) << o; 404 } 405 406 return result; 407 } 408 409 uint64_t helper_PEXTD(uint64_t src, uint64_t mask) 410 { 411 int i, o; 412 uint64_t result = 0; 413 414 if (mask == -1) { 415 return src; 416 } 417 418 for (o = 0; mask != 0; o++) { 419 i = ctz64(mask); 420 mask &= mask - 1; 421 result |= ((src >> i) & 1) << o; 422 } 423 424 return result; 425 } 426 427 /*****************************************************************************/ 428 /* Altivec extension helpers */ 429 #if HOST_BIG_ENDIAN 430 #define VECTOR_FOR_INORDER_I(index, element) \ 431 for (index = 0; index < ARRAY_SIZE(r->element); index++) 432 #else 433 #define VECTOR_FOR_INORDER_I(index, element) \ 434 for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--) 435 #endif 436 437 /* Saturating arithmetic helpers. */ 438 #define SATCVT(from, to, from_type, to_type, min, max) \ 439 static inline to_type cvt##from##to(from_type x, int *sat) \ 440 { \ 441 to_type r; \ 442 \ 443 if (x < (from_type)min) { \ 444 r = min; \ 445 *sat = 1; \ 446 } else if (x > (from_type)max) { \ 447 r = max; \ 448 *sat = 1; \ 449 } else { \ 450 r = x; \ 451 } \ 452 return r; \ 453 } 454 #define SATCVTU(from, to, from_type, to_type, min, max) \ 455 static inline to_type cvt##from##to(from_type x, int *sat) \ 456 { \ 457 to_type r; \ 458 \ 459 if (x > (from_type)max) { \ 460 r = max; \ 461 *sat = 1; \ 462 } else { \ 463 r = x; \ 464 } \ 465 return r; \ 466 } 467 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX) 468 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX) 469 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX) 470 471 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX) 472 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX) 473 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX) 474 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX) 475 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX) 476 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX) 477 #undef SATCVT 478 #undef SATCVTU 479 480 void helper_mtvscr(CPUPPCState *env, uint32_t vscr) 481 { 482 ppc_store_vscr(env, vscr); 483 } 484 485 uint32_t helper_mfvscr(CPUPPCState *env) 486 { 487 return ppc_get_vscr(env); 488 } 489 490 static inline void set_vscr_sat(CPUPPCState *env) 491 { 492 /* The choice of non-zero value is arbitrary. */ 493 env->vscr_sat.u32[0] = 1; 494 } 495 496 /* vprtybq */ 497 void helper_VPRTYBQ(ppc_avr_t *r, ppc_avr_t *b, uint32_t v) 498 { 499 uint64_t res = b->u64[0] ^ b->u64[1]; 500 res ^= res >> 32; 501 res ^= res >> 16; 502 res ^= res >> 8; 503 r->VsrD(1) = res & 1; 504 r->VsrD(0) = 0; 505 } 506 507 #define VARITHFP(suffix, func) \ 508 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 509 ppc_avr_t *b) \ 510 { \ 511 int i; \ 512 \ 513 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 514 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \ 515 } \ 516 } 517 VARITHFP(addfp, float32_add) 518 VARITHFP(subfp, float32_sub) 519 VARITHFP(minfp, float32_min) 520 VARITHFP(maxfp, float32_max) 521 #undef VARITHFP 522 523 #define VARITHFPFMA(suffix, type) \ 524 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 525 ppc_avr_t *b, ppc_avr_t *c) \ 526 { \ 527 int i; \ 528 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 529 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \ 530 type, &env->vec_status); \ 531 } \ 532 } 533 VARITHFPFMA(maddfp, 0); 534 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c); 535 #undef VARITHFPFMA 536 537 #define VARITHSAT_CASE(type, op, cvt, element) \ 538 { \ 539 type result = (type)a->element[i] op (type)b->element[i]; \ 540 r->element[i] = cvt(result, &sat); \ 541 } 542 543 #define VARITHSAT_DO(name, op, optype, cvt, element) \ 544 void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \ 545 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \ 546 { \ 547 int sat = 0; \ 548 int i; \ 549 \ 550 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 551 VARITHSAT_CASE(optype, op, cvt, element); \ 552 } \ 553 if (sat) { \ 554 vscr_sat->u32[0] = 1; \ 555 } \ 556 } 557 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \ 558 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \ 559 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element) 560 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \ 561 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \ 562 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element) 563 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb) 564 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh) 565 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw) 566 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub) 567 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh) 568 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw) 569 #undef VARITHSAT_CASE 570 #undef VARITHSAT_DO 571 #undef VARITHSAT_SIGNED 572 #undef VARITHSAT_UNSIGNED 573 574 #define VAVG(name, element, etype) \ 575 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t v)\ 576 { \ 577 int i; \ 578 \ 579 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 580 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \ 581 r->element[i] = x >> 1; \ 582 } \ 583 } 584 585 VAVG(VAVGSB, s8, int16_t) 586 VAVG(VAVGUB, u8, uint16_t) 587 VAVG(VAVGSH, s16, int32_t) 588 VAVG(VAVGUH, u16, uint32_t) 589 VAVG(VAVGSW, s32, int64_t) 590 VAVG(VAVGUW, u32, uint64_t) 591 #undef VAVG 592 593 #define VABSDU(name, element) \ 594 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t v)\ 595 { \ 596 int i; \ 597 \ 598 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 599 r->element[i] = (a->element[i] > b->element[i]) ? \ 600 (a->element[i] - b->element[i]) : \ 601 (b->element[i] - a->element[i]); \ 602 } \ 603 } 604 605 /* 606 * VABSDU - Vector absolute difference unsigned 607 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word) 608 * element - element type to access from vector 609 */ 610 VABSDU(VABSDUB, u8) 611 VABSDU(VABSDUH, u16) 612 VABSDU(VABSDUW, u32) 613 #undef VABSDU 614 615 #define VCF(suffix, cvt, element) \ 616 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \ 617 ppc_avr_t *b, uint32_t uim) \ 618 { \ 619 int i; \ 620 \ 621 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 622 float32 t = cvt(b->element[i], &env->vec_status); \ 623 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \ 624 } \ 625 } 626 VCF(ux, uint32_to_float32, u32) 627 VCF(sx, int32_to_float32, s32) 628 #undef VCF 629 630 #define VCMPNEZ(NAME, ELEM) \ 631 void helper_##NAME(ppc_vsr_t *t, ppc_vsr_t *a, ppc_vsr_t *b, uint32_t desc) \ 632 { \ 633 for (int i = 0; i < ARRAY_SIZE(t->ELEM); i++) { \ 634 t->ELEM[i] = ((a->ELEM[i] == 0) || (b->ELEM[i] == 0) || \ 635 (a->ELEM[i] != b->ELEM[i])) ? -1 : 0; \ 636 } \ 637 } 638 VCMPNEZ(VCMPNEZB, u8) 639 VCMPNEZ(VCMPNEZH, u16) 640 VCMPNEZ(VCMPNEZW, u32) 641 #undef VCMPNEZ 642 643 #define VCMPFP_DO(suffix, compare, order, record) \ 644 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 645 ppc_avr_t *a, ppc_avr_t *b) \ 646 { \ 647 uint32_t ones = (uint32_t)-1; \ 648 uint32_t all = ones; \ 649 uint32_t none = 0; \ 650 int i; \ 651 \ 652 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 653 uint32_t result; \ 654 FloatRelation rel = \ 655 float32_compare_quiet(a->f32[i], b->f32[i], \ 656 &env->vec_status); \ 657 if (rel == float_relation_unordered) { \ 658 result = 0; \ 659 } else if (rel compare order) { \ 660 result = ones; \ 661 } else { \ 662 result = 0; \ 663 } \ 664 r->u32[i] = result; \ 665 all &= result; \ 666 none |= result; \ 667 } \ 668 if (record) { \ 669 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 670 } \ 671 } 672 #define VCMPFP(suffix, compare, order) \ 673 VCMPFP_DO(suffix, compare, order, 0) \ 674 VCMPFP_DO(suffix##_dot, compare, order, 1) 675 VCMPFP(eqfp, ==, float_relation_equal) 676 VCMPFP(gefp, !=, float_relation_less) 677 VCMPFP(gtfp, ==, float_relation_greater) 678 #undef VCMPFP_DO 679 #undef VCMPFP 680 681 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r, 682 ppc_avr_t *a, ppc_avr_t *b, int record) 683 { 684 int i; 685 int all_in = 0; 686 687 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 688 FloatRelation le_rel = float32_compare_quiet(a->f32[i], b->f32[i], 689 &env->vec_status); 690 if (le_rel == float_relation_unordered) { 691 r->u32[i] = 0xc0000000; 692 all_in = 1; 693 } else { 694 float32 bneg = float32_chs(b->f32[i]); 695 FloatRelation ge_rel = float32_compare_quiet(a->f32[i], bneg, 696 &env->vec_status); 697 int le = le_rel != float_relation_greater; 698 int ge = ge_rel != float_relation_less; 699 700 r->u32[i] = ((!le) << 31) | ((!ge) << 30); 701 all_in |= (!le | !ge); 702 } 703 } 704 if (record) { 705 env->crf[6] = (all_in == 0) << 1; 706 } 707 } 708 709 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 710 { 711 vcmpbfp_internal(env, r, a, b, 0); 712 } 713 714 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 715 ppc_avr_t *b) 716 { 717 vcmpbfp_internal(env, r, a, b, 1); 718 } 719 720 #define VCT(suffix, satcvt, element) \ 721 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \ 722 ppc_avr_t *b, uint32_t uim) \ 723 { \ 724 int i; \ 725 int sat = 0; \ 726 float_status s = env->vec_status; \ 727 \ 728 set_float_rounding_mode(float_round_to_zero, &s); \ 729 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 730 if (float32_is_any_nan(b->f32[i])) { \ 731 r->element[i] = 0; \ 732 } else { \ 733 float64 t = float32_to_float64(b->f32[i], &s); \ 734 int64_t j; \ 735 \ 736 t = float64_scalbn(t, uim, &s); \ 737 j = float64_to_int64(t, &s); \ 738 r->element[i] = satcvt(j, &sat); \ 739 } \ 740 } \ 741 if (sat) { \ 742 set_vscr_sat(env); \ 743 } \ 744 } 745 VCT(uxs, cvtsduw, u32) 746 VCT(sxs, cvtsdsw, s32) 747 #undef VCT 748 749 typedef int64_t do_ger(uint32_t, uint32_t, uint32_t); 750 751 static int64_t ger_rank8(uint32_t a, uint32_t b, uint32_t mask) 752 { 753 int64_t psum = 0; 754 for (int i = 0; i < 8; i++, mask >>= 1) { 755 if (mask & 1) { 756 psum += (int64_t)sextract32(a, 4 * i, 4) * sextract32(b, 4 * i, 4); 757 } 758 } 759 return psum; 760 } 761 762 static int64_t ger_rank4(uint32_t a, uint32_t b, uint32_t mask) 763 { 764 int64_t psum = 0; 765 for (int i = 0; i < 4; i++, mask >>= 1) { 766 if (mask & 1) { 767 psum += sextract32(a, 8 * i, 8) * (int64_t)extract32(b, 8 * i, 8); 768 } 769 } 770 return psum; 771 } 772 773 static int64_t ger_rank2(uint32_t a, uint32_t b, uint32_t mask) 774 { 775 int64_t psum = 0; 776 for (int i = 0; i < 2; i++, mask >>= 1) { 777 if (mask & 1) { 778 psum += (int64_t)sextract32(a, 16 * i, 16) * 779 sextract32(b, 16 * i, 16); 780 } 781 } 782 return psum; 783 } 784 785 static void xviger(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, ppc_acc_t *at, 786 uint32_t mask, bool sat, bool acc, do_ger ger) 787 { 788 uint8_t pmsk = FIELD_EX32(mask, GER_MSK, PMSK), 789 xmsk = FIELD_EX32(mask, GER_MSK, XMSK), 790 ymsk = FIELD_EX32(mask, GER_MSK, YMSK); 791 uint8_t xmsk_bit, ymsk_bit; 792 int64_t psum; 793 int i, j; 794 for (i = 0, xmsk_bit = 1 << 3; i < 4; i++, xmsk_bit >>= 1) { 795 for (j = 0, ymsk_bit = 1 << 3; j < 4; j++, ymsk_bit >>= 1) { 796 if ((xmsk_bit & xmsk) && (ymsk_bit & ymsk)) { 797 psum = ger(a->VsrW(i), b->VsrW(j), pmsk); 798 if (acc) { 799 psum += at[i].VsrSW(j); 800 } 801 if (sat && psum > INT32_MAX) { 802 set_vscr_sat(env); 803 at[i].VsrSW(j) = INT32_MAX; 804 } else if (sat && psum < INT32_MIN) { 805 set_vscr_sat(env); 806 at[i].VsrSW(j) = INT32_MIN; 807 } else { 808 at[i].VsrSW(j) = (int32_t) psum; 809 } 810 } else { 811 at[i].VsrSW(j) = 0; 812 } 813 } 814 } 815 } 816 817 QEMU_FLATTEN 818 void helper_XVI4GER8(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 819 ppc_acc_t *at, uint32_t mask) 820 { 821 xviger(env, a, b, at, mask, false, false, ger_rank8); 822 } 823 824 QEMU_FLATTEN 825 void helper_XVI4GER8PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 826 ppc_acc_t *at, uint32_t mask) 827 { 828 xviger(env, a, b, at, mask, false, true, ger_rank8); 829 } 830 831 QEMU_FLATTEN 832 void helper_XVI8GER4(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 833 ppc_acc_t *at, uint32_t mask) 834 { 835 xviger(env, a, b, at, mask, false, false, ger_rank4); 836 } 837 838 QEMU_FLATTEN 839 void helper_XVI8GER4PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 840 ppc_acc_t *at, uint32_t mask) 841 { 842 xviger(env, a, b, at, mask, false, true, ger_rank4); 843 } 844 845 QEMU_FLATTEN 846 void helper_XVI8GER4SPP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 847 ppc_acc_t *at, uint32_t mask) 848 { 849 xviger(env, a, b, at, mask, true, true, ger_rank4); 850 } 851 852 QEMU_FLATTEN 853 void helper_XVI16GER2(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 854 ppc_acc_t *at, uint32_t mask) 855 { 856 xviger(env, a, b, at, mask, false, false, ger_rank2); 857 } 858 859 QEMU_FLATTEN 860 void helper_XVI16GER2S(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 861 ppc_acc_t *at, uint32_t mask) 862 { 863 xviger(env, a, b, at, mask, true, false, ger_rank2); 864 } 865 866 QEMU_FLATTEN 867 void helper_XVI16GER2PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 868 ppc_acc_t *at, uint32_t mask) 869 { 870 xviger(env, a, b, at, mask, false, true, ger_rank2); 871 } 872 873 QEMU_FLATTEN 874 void helper_XVI16GER2SPP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 875 ppc_acc_t *at, uint32_t mask) 876 { 877 xviger(env, a, b, at, mask, true, true, ger_rank2); 878 } 879 880 target_ulong helper_vclzlsbb(ppc_avr_t *r) 881 { 882 target_ulong count = 0; 883 int i; 884 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 885 if (r->VsrB(i) & 0x01) { 886 break; 887 } 888 count++; 889 } 890 return count; 891 } 892 893 target_ulong helper_vctzlsbb(ppc_avr_t *r) 894 { 895 target_ulong count = 0; 896 int i; 897 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 898 if (r->VsrB(i) & 0x01) { 899 break; 900 } 901 count++; 902 } 903 return count; 904 } 905 906 void helper_VMHADDSHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 907 ppc_avr_t *b, ppc_avr_t *c) 908 { 909 int sat = 0; 910 int i; 911 912 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 913 int32_t prod = a->s16[i] * b->s16[i]; 914 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 915 916 r->s16[i] = cvtswsh(t, &sat); 917 } 918 919 if (sat) { 920 set_vscr_sat(env); 921 } 922 } 923 924 void helper_VMHRADDSHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 925 ppc_avr_t *b, ppc_avr_t *c) 926 { 927 int sat = 0; 928 int i; 929 930 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 931 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000; 932 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 933 r->s16[i] = cvtswsh(t, &sat); 934 } 935 936 if (sat) { 937 set_vscr_sat(env); 938 } 939 } 940 941 void helper_VMLADDUHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c, 942 uint32_t v) 943 { 944 int i; 945 946 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 947 int32_t prod = a->s16[i] * b->s16[i]; 948 r->s16[i] = (int16_t) (prod + c->s16[i]); 949 } 950 } 951 952 #define VMRG_DO(name, element, access, ofs) \ 953 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 954 { \ 955 ppc_avr_t result; \ 956 int i, half = ARRAY_SIZE(r->element) / 2; \ 957 \ 958 for (i = 0; i < half; i++) { \ 959 result.access(i * 2 + 0) = a->access(i + ofs); \ 960 result.access(i * 2 + 1) = b->access(i + ofs); \ 961 } \ 962 *r = result; \ 963 } 964 965 #define VMRG(suffix, element, access) \ 966 VMRG_DO(mrgl##suffix, element, access, half) \ 967 VMRG_DO(mrgh##suffix, element, access, 0) 968 VMRG(b, u8, VsrB) 969 VMRG(h, u16, VsrH) 970 VMRG(w, u32, VsrW) 971 #undef VMRG_DO 972 #undef VMRG 973 974 void helper_VMSUMMBM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 975 { 976 int32_t prod[16]; 977 int i; 978 979 for (i = 0; i < ARRAY_SIZE(r->s8); i++) { 980 prod[i] = (int32_t)a->s8[i] * b->u8[i]; 981 } 982 983 VECTOR_FOR_INORDER_I(i, s32) { 984 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] + 985 prod[4 * i + 2] + prod[4 * i + 3]; 986 } 987 } 988 989 void helper_VMSUMSHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 990 { 991 int32_t prod[8]; 992 int i; 993 994 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 995 prod[i] = a->s16[i] * b->s16[i]; 996 } 997 998 VECTOR_FOR_INORDER_I(i, s32) { 999 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 1000 } 1001 } 1002 1003 void helper_VMSUMSHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1004 ppc_avr_t *b, ppc_avr_t *c) 1005 { 1006 int32_t prod[8]; 1007 int i; 1008 int sat = 0; 1009 1010 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 1011 prod[i] = (int32_t)a->s16[i] * b->s16[i]; 1012 } 1013 1014 VECTOR_FOR_INORDER_I(i, s32) { 1015 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 1016 1017 r->u32[i] = cvtsdsw(t, &sat); 1018 } 1019 1020 if (sat) { 1021 set_vscr_sat(env); 1022 } 1023 } 1024 1025 void helper_VMSUMUBM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1026 { 1027 uint16_t prod[16]; 1028 int i; 1029 1030 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1031 prod[i] = a->u8[i] * b->u8[i]; 1032 } 1033 1034 VECTOR_FOR_INORDER_I(i, u32) { 1035 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] + 1036 prod[4 * i + 2] + prod[4 * i + 3]; 1037 } 1038 } 1039 1040 void helper_VMSUMUHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1041 { 1042 uint32_t prod[8]; 1043 int i; 1044 1045 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1046 prod[i] = a->u16[i] * b->u16[i]; 1047 } 1048 1049 VECTOR_FOR_INORDER_I(i, u32) { 1050 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1051 } 1052 } 1053 1054 void helper_VMSUMUHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1055 ppc_avr_t *b, ppc_avr_t *c) 1056 { 1057 uint32_t prod[8]; 1058 int i; 1059 int sat = 0; 1060 1061 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1062 prod[i] = a->u16[i] * b->u16[i]; 1063 } 1064 1065 VECTOR_FOR_INORDER_I(i, s32) { 1066 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1067 1068 r->u32[i] = cvtuduw(t, &sat); 1069 } 1070 1071 if (sat) { 1072 set_vscr_sat(env); 1073 } 1074 } 1075 1076 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \ 1077 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1078 { \ 1079 int i; \ 1080 \ 1081 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1082 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \ 1083 (cast)b->mul_access(i); \ 1084 } \ 1085 } 1086 1087 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \ 1088 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1089 { \ 1090 int i; \ 1091 \ 1092 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1093 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \ 1094 (cast)b->mul_access(i + 1); \ 1095 } \ 1096 } 1097 1098 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \ 1099 VMUL_DO_EVN(MULE##suffix, mul_element, mul_access, prod_access, cast) \ 1100 VMUL_DO_ODD(MULO##suffix, mul_element, mul_access, prod_access, cast) 1101 VMUL(SB, s8, VsrSB, VsrSH, int16_t) 1102 VMUL(SH, s16, VsrSH, VsrSW, int32_t) 1103 VMUL(SW, s32, VsrSW, VsrSD, int64_t) 1104 VMUL(UB, u8, VsrB, VsrH, uint16_t) 1105 VMUL(UH, u16, VsrH, VsrW, uint32_t) 1106 VMUL(UW, u32, VsrW, VsrD, uint64_t) 1107 #undef VMUL_DO_EVN 1108 #undef VMUL_DO_ODD 1109 #undef VMUL 1110 1111 void helper_XXPERMX(ppc_vsr_t *t, ppc_vsr_t *s0, ppc_vsr_t *s1, ppc_vsr_t *pcv, 1112 target_ulong uim) 1113 { 1114 int i, idx; 1115 ppc_vsr_t tmp = { .u64 = {0, 0} }; 1116 1117 for (i = 0; i < ARRAY_SIZE(t->u8); i++) { 1118 if ((pcv->VsrB(i) >> 5) == uim) { 1119 idx = pcv->VsrB(i) & 0x1f; 1120 if (idx < ARRAY_SIZE(t->u8)) { 1121 tmp.VsrB(i) = s0->VsrB(idx); 1122 } else { 1123 tmp.VsrB(i) = s1->VsrB(idx - ARRAY_SIZE(t->u8)); 1124 } 1125 } 1126 } 1127 1128 *t = tmp; 1129 } 1130 1131 void helper_VDIVSQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1132 { 1133 Int128 neg1 = int128_makes64(-1); 1134 Int128 int128_min = int128_make128(0, INT64_MIN); 1135 if (likely(int128_nz(b->s128) && 1136 (int128_ne(a->s128, int128_min) || int128_ne(b->s128, neg1)))) { 1137 t->s128 = int128_divs(a->s128, b->s128); 1138 } else { 1139 t->s128 = a->s128; /* Undefined behavior */ 1140 } 1141 } 1142 1143 void helper_VDIVUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1144 { 1145 if (int128_nz(b->s128)) { 1146 t->s128 = int128_divu(a->s128, b->s128); 1147 } else { 1148 t->s128 = a->s128; /* Undefined behavior */ 1149 } 1150 } 1151 1152 void helper_VDIVESD(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1153 { 1154 int i; 1155 int64_t high; 1156 uint64_t low; 1157 for (i = 0; i < 2; i++) { 1158 high = a->s64[i]; 1159 low = 0; 1160 if (unlikely((high == INT64_MIN && b->s64[i] == -1) || !b->s64[i])) { 1161 t->s64[i] = a->s64[i]; /* Undefined behavior */ 1162 } else { 1163 divs128(&low, &high, b->s64[i]); 1164 t->s64[i] = low; 1165 } 1166 } 1167 } 1168 1169 void helper_VDIVEUD(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1170 { 1171 int i; 1172 uint64_t high, low; 1173 for (i = 0; i < 2; i++) { 1174 high = a->u64[i]; 1175 low = 0; 1176 if (unlikely(!b->u64[i])) { 1177 t->u64[i] = a->u64[i]; /* Undefined behavior */ 1178 } else { 1179 divu128(&low, &high, b->u64[i]); 1180 t->u64[i] = low; 1181 } 1182 } 1183 } 1184 1185 void helper_VDIVESQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1186 { 1187 Int128 high, low; 1188 Int128 int128_min = int128_make128(0, INT64_MIN); 1189 Int128 neg1 = int128_makes64(-1); 1190 1191 high = a->s128; 1192 low = int128_zero(); 1193 if (unlikely(!int128_nz(b->s128) || 1194 (int128_eq(b->s128, neg1) && int128_eq(high, int128_min)))) { 1195 t->s128 = a->s128; /* Undefined behavior */ 1196 } else { 1197 divs256(&low, &high, b->s128); 1198 t->s128 = low; 1199 } 1200 } 1201 1202 void helper_VDIVEUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1203 { 1204 Int128 high, low; 1205 1206 high = a->s128; 1207 low = int128_zero(); 1208 if (unlikely(!int128_nz(b->s128))) { 1209 t->s128 = a->s128; /* Undefined behavior */ 1210 } else { 1211 divu256(&low, &high, b->s128); 1212 t->s128 = low; 1213 } 1214 } 1215 1216 void helper_VMODSQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1217 { 1218 Int128 neg1 = int128_makes64(-1); 1219 Int128 int128_min = int128_make128(0, INT64_MIN); 1220 if (likely(int128_nz(b->s128) && 1221 (int128_ne(a->s128, int128_min) || int128_ne(b->s128, neg1)))) { 1222 t->s128 = int128_rems(a->s128, b->s128); 1223 } else { 1224 t->s128 = int128_zero(); /* Undefined behavior */ 1225 } 1226 } 1227 1228 void helper_VMODUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1229 { 1230 if (likely(int128_nz(b->s128))) { 1231 t->s128 = int128_remu(a->s128, b->s128); 1232 } else { 1233 t->s128 = int128_zero(); /* Undefined behavior */ 1234 } 1235 } 1236 1237 void helper_VPERM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1238 { 1239 ppc_avr_t result; 1240 int i; 1241 1242 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1243 int s = c->VsrB(i) & 0x1f; 1244 int index = s & 0xf; 1245 1246 if (s & 0x10) { 1247 result.VsrB(i) = b->VsrB(index); 1248 } else { 1249 result.VsrB(i) = a->VsrB(index); 1250 } 1251 } 1252 *r = result; 1253 } 1254 1255 void helper_VPERMR(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1256 { 1257 ppc_avr_t result; 1258 int i; 1259 1260 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1261 int s = c->VsrB(i) & 0x1f; 1262 int index = 15 - (s & 0xf); 1263 1264 if (s & 0x10) { 1265 result.VsrB(i) = a->VsrB(index); 1266 } else { 1267 result.VsrB(i) = b->VsrB(index); 1268 } 1269 } 1270 *r = result; 1271 } 1272 1273 #define XXGENPCV_BE_EXP(NAME, SZ) \ 1274 void glue(helper_, glue(NAME, _be_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \ 1275 { \ 1276 ppc_vsr_t tmp; \ 1277 \ 1278 /* Initialize tmp with the result of an all-zeros mask */ \ 1279 tmp.VsrD(0) = 0x1011121314151617; \ 1280 tmp.VsrD(1) = 0x18191A1B1C1D1E1F; \ 1281 \ 1282 /* Iterate over the most significant byte of each element */ \ 1283 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \ 1284 if (b->VsrB(i) & 0x80) { \ 1285 /* Update each byte of the element */ \ 1286 for (int k = 0; k < SZ; k++) { \ 1287 tmp.VsrB(i + k) = j + k; \ 1288 } \ 1289 j += SZ; \ 1290 } \ 1291 } \ 1292 \ 1293 *t = tmp; \ 1294 } 1295 1296 #define XXGENPCV_BE_COMP(NAME, SZ) \ 1297 void glue(helper_, glue(NAME, _be_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\ 1298 { \ 1299 ppc_vsr_t tmp = { .u64 = { 0, 0 } }; \ 1300 \ 1301 /* Iterate over the most significant byte of each element */ \ 1302 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \ 1303 if (b->VsrB(i) & 0x80) { \ 1304 /* Update each byte of the element */ \ 1305 for (int k = 0; k < SZ; k++) { \ 1306 tmp.VsrB(j + k) = i + k; \ 1307 } \ 1308 j += SZ; \ 1309 } \ 1310 } \ 1311 \ 1312 *t = tmp; \ 1313 } 1314 1315 #define XXGENPCV_LE_EXP(NAME, SZ) \ 1316 void glue(helper_, glue(NAME, _le_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \ 1317 { \ 1318 ppc_vsr_t tmp; \ 1319 \ 1320 /* Initialize tmp with the result of an all-zeros mask */ \ 1321 tmp.VsrD(0) = 0x1F1E1D1C1B1A1918; \ 1322 tmp.VsrD(1) = 0x1716151413121110; \ 1323 \ 1324 /* Iterate over the most significant byte of each element */ \ 1325 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \ 1326 /* Reverse indexing of "i" */ \ 1327 const int idx = ARRAY_SIZE(b->u8) - i - SZ; \ 1328 if (b->VsrB(idx) & 0x80) { \ 1329 /* Update each byte of the element */ \ 1330 for (int k = 0, rk = SZ - 1; k < SZ; k++, rk--) { \ 1331 tmp.VsrB(idx + rk) = j + k; \ 1332 } \ 1333 j += SZ; \ 1334 } \ 1335 } \ 1336 \ 1337 *t = tmp; \ 1338 } 1339 1340 #define XXGENPCV_LE_COMP(NAME, SZ) \ 1341 void glue(helper_, glue(NAME, _le_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\ 1342 { \ 1343 ppc_vsr_t tmp = { .u64 = { 0, 0 } }; \ 1344 \ 1345 /* Iterate over the most significant byte of each element */ \ 1346 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \ 1347 if (b->VsrB(ARRAY_SIZE(b->u8) - i - SZ) & 0x80) { \ 1348 /* Update each byte of the element */ \ 1349 for (int k = 0, rk = SZ - 1; k < SZ; k++, rk--) { \ 1350 /* Reverse indexing of "j" */ \ 1351 const int idx = ARRAY_SIZE(b->u8) - j - SZ; \ 1352 tmp.VsrB(idx + rk) = i + k; \ 1353 } \ 1354 j += SZ; \ 1355 } \ 1356 } \ 1357 \ 1358 *t = tmp; \ 1359 } 1360 1361 #define XXGENPCV(NAME, SZ) \ 1362 XXGENPCV_BE_EXP(NAME, SZ) \ 1363 XXGENPCV_BE_COMP(NAME, SZ) \ 1364 XXGENPCV_LE_EXP(NAME, SZ) \ 1365 XXGENPCV_LE_COMP(NAME, SZ) \ 1366 1367 XXGENPCV(XXGENPCVBM, 1) 1368 XXGENPCV(XXGENPCVHM, 2) 1369 XXGENPCV(XXGENPCVWM, 4) 1370 XXGENPCV(XXGENPCVDM, 8) 1371 1372 #undef XXGENPCV_BE_EXP 1373 #undef XXGENPCV_BE_COMP 1374 #undef XXGENPCV_LE_EXP 1375 #undef XXGENPCV_LE_COMP 1376 #undef XXGENPCV 1377 1378 #if HOST_BIG_ENDIAN 1379 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)]) 1380 #define VBPERMD_INDEX(i) (i) 1381 #define VBPERMQ_DW(index) (((index) & 0x40) != 0) 1382 #else 1383 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)]) 1384 #define VBPERMD_INDEX(i) (1 - i) 1385 #define VBPERMQ_DW(index) (((index) & 0x40) == 0) 1386 #endif 1387 #define EXTRACT_BIT(avr, i, index) \ 1388 (extract64((avr)->VsrD(i), 63 - index, 1)) 1389 1390 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1391 { 1392 int i, j; 1393 ppc_avr_t result = { .u64 = { 0, 0 } }; 1394 VECTOR_FOR_INORDER_I(i, u64) { 1395 for (j = 0; j < 8; j++) { 1396 int index = VBPERMQ_INDEX(b, (i * 8) + j); 1397 if (index < 64 && EXTRACT_BIT(a, i, index)) { 1398 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j); 1399 } 1400 } 1401 } 1402 *r = result; 1403 } 1404 1405 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1406 { 1407 int i; 1408 uint64_t perm = 0; 1409 1410 VECTOR_FOR_INORDER_I(i, u8) { 1411 int index = VBPERMQ_INDEX(b, i); 1412 1413 if (index < 128) { 1414 uint64_t mask = (1ull << (63 - (index & 0x3F))); 1415 if (a->u64[VBPERMQ_DW(index)] & mask) { 1416 perm |= (0x8000 >> i); 1417 } 1418 } 1419 } 1420 1421 r->VsrD(0) = perm; 1422 r->VsrD(1) = 0; 1423 } 1424 1425 #undef VBPERMQ_INDEX 1426 #undef VBPERMQ_DW 1427 1428 /* 1429 * There is no carry across the two doublewords, so their order does 1430 * not matter. Nor is there partial overlap between registers. 1431 */ 1432 void helper_vpmsumb(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1433 { 1434 for (int i = 0; i < 2; ++i) { 1435 uint64_t aa = a->u64[i], bb = b->u64[i]; 1436 r->u64[i] = clmul_8x4_even(aa, bb) ^ clmul_8x4_odd(aa, bb); 1437 } 1438 } 1439 1440 void helper_vpmsumh(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1441 { 1442 for (int i = 0; i < 2; ++i) { 1443 uint64_t aa = a->u64[i], bb = b->u64[i]; 1444 r->u64[i] = clmul_16x2_even(aa, bb) ^ clmul_16x2_odd(aa, bb); 1445 } 1446 } 1447 1448 #define PMSUM(name, srcfld, trgfld, trgtyp) \ 1449 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1450 { \ 1451 int i, j; \ 1452 trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])]; \ 1453 \ 1454 VECTOR_FOR_INORDER_I(i, srcfld) { \ 1455 prod[i] = 0; \ 1456 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \ 1457 if (a->srcfld[i] & (1ull << j)) { \ 1458 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \ 1459 } \ 1460 } \ 1461 } \ 1462 \ 1463 VECTOR_FOR_INORDER_I(i, trgfld) { \ 1464 r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1]; \ 1465 } \ 1466 } 1467 1468 PMSUM(vpmsumw, u32, u64, uint64_t) 1469 1470 void helper_VPMSUMD(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1471 { 1472 int i, j; 1473 Int128 tmp, prod[2] = {int128_zero(), int128_zero()}; 1474 1475 for (j = 0; j < 64; j++) { 1476 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 1477 if (a->VsrD(i) & (1ull << j)) { 1478 tmp = int128_make64(b->VsrD(i)); 1479 tmp = int128_lshift(tmp, j); 1480 prod[i] = int128_xor(prod[i], tmp); 1481 } 1482 } 1483 } 1484 1485 r->s128 = int128_xor(prod[0], prod[1]); 1486 } 1487 1488 #if HOST_BIG_ENDIAN 1489 #define PKBIG 1 1490 #else 1491 #define PKBIG 0 1492 #endif 1493 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1494 { 1495 int i, j; 1496 ppc_avr_t result; 1497 #if HOST_BIG_ENDIAN 1498 const ppc_avr_t *x[2] = { a, b }; 1499 #else 1500 const ppc_avr_t *x[2] = { b, a }; 1501 #endif 1502 1503 VECTOR_FOR_INORDER_I(i, u64) { 1504 VECTOR_FOR_INORDER_I(j, u32) { 1505 uint32_t e = x[i]->u32[j]; 1506 1507 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) | 1508 ((e >> 6) & 0x3e0) | 1509 ((e >> 3) & 0x1f)); 1510 } 1511 } 1512 *r = result; 1513 } 1514 1515 #define VPK(suffix, from, to, cvt, dosat) \ 1516 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1517 ppc_avr_t *a, ppc_avr_t *b) \ 1518 { \ 1519 int i; \ 1520 int sat = 0; \ 1521 ppc_avr_t result; \ 1522 ppc_avr_t *a0 = PKBIG ? a : b; \ 1523 ppc_avr_t *a1 = PKBIG ? b : a; \ 1524 \ 1525 VECTOR_FOR_INORDER_I(i, from) { \ 1526 result.to[i] = cvt(a0->from[i], &sat); \ 1527 result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\ 1528 } \ 1529 *r = result; \ 1530 if (dosat && sat) { \ 1531 set_vscr_sat(env); \ 1532 } \ 1533 } 1534 #define I(x, y) (x) 1535 VPK(shss, s16, s8, cvtshsb, 1) 1536 VPK(shus, s16, u8, cvtshub, 1) 1537 VPK(swss, s32, s16, cvtswsh, 1) 1538 VPK(swus, s32, u16, cvtswuh, 1) 1539 VPK(sdss, s64, s32, cvtsdsw, 1) 1540 VPK(sdus, s64, u32, cvtsduw, 1) 1541 VPK(uhus, u16, u8, cvtuhub, 1) 1542 VPK(uwus, u32, u16, cvtuwuh, 1) 1543 VPK(udus, u64, u32, cvtuduw, 1) 1544 VPK(uhum, u16, u8, I, 0) 1545 VPK(uwum, u32, u16, I, 0) 1546 VPK(udum, u64, u32, I, 0) 1547 #undef I 1548 #undef VPK 1549 #undef PKBIG 1550 1551 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1552 { 1553 int i; 1554 1555 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1556 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status); 1557 } 1558 } 1559 1560 #define VRFI(suffix, rounding) \ 1561 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1562 ppc_avr_t *b) \ 1563 { \ 1564 int i; \ 1565 float_status s = env->vec_status; \ 1566 \ 1567 set_float_rounding_mode(rounding, &s); \ 1568 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 1569 r->f32[i] = float32_round_to_int (b->f32[i], &s); \ 1570 } \ 1571 } 1572 VRFI(n, float_round_nearest_even) 1573 VRFI(m, float_round_down) 1574 VRFI(p, float_round_up) 1575 VRFI(z, float_round_to_zero) 1576 #undef VRFI 1577 1578 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1579 { 1580 int i; 1581 1582 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1583 float32 t = float32_sqrt(b->f32[i], &env->vec_status); 1584 1585 r->f32[i] = float32_div(float32_one, t, &env->vec_status); 1586 } 1587 } 1588 1589 #define VRLMI(name, size, element, insert) \ 1590 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \ 1591 { \ 1592 int i; \ 1593 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1594 uint##size##_t src1 = a->element[i]; \ 1595 uint##size##_t src2 = b->element[i]; \ 1596 uint##size##_t src3 = r->element[i]; \ 1597 uint##size##_t begin, end, shift, mask, rot_val; \ 1598 \ 1599 shift = extract##size(src2, 0, 6); \ 1600 end = extract##size(src2, 8, 6); \ 1601 begin = extract##size(src2, 16, 6); \ 1602 rot_val = rol##size(src1, shift); \ 1603 mask = mask_u##size(begin, end); \ 1604 if (insert) { \ 1605 r->element[i] = (rot_val & mask) | (src3 & ~mask); \ 1606 } else { \ 1607 r->element[i] = (rot_val & mask); \ 1608 } \ 1609 } \ 1610 } 1611 1612 VRLMI(VRLDMI, 64, u64, 1); 1613 VRLMI(VRLWMI, 32, u32, 1); 1614 VRLMI(VRLDNM, 64, u64, 0); 1615 VRLMI(VRLWNM, 32, u32, 0); 1616 1617 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1618 { 1619 int i; 1620 1621 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1622 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status); 1623 } 1624 } 1625 1626 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1627 { 1628 int i; 1629 1630 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1631 r->f32[i] = float32_log2(b->f32[i], &env->vec_status); 1632 } 1633 } 1634 1635 #define VEXTU_X_DO(name, size, left) \ 1636 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \ 1637 { \ 1638 int index = (a & 0xf) * 8; \ 1639 if (left) { \ 1640 index = 128 - index - size; \ 1641 } \ 1642 return int128_getlo(int128_rshift(b->s128, index)) & \ 1643 MAKE_64BIT_MASK(0, size); \ 1644 } 1645 VEXTU_X_DO(vextublx, 8, 1) 1646 VEXTU_X_DO(vextuhlx, 16, 1) 1647 VEXTU_X_DO(vextuwlx, 32, 1) 1648 VEXTU_X_DO(vextubrx, 8, 0) 1649 VEXTU_X_DO(vextuhrx, 16, 0) 1650 VEXTU_X_DO(vextuwrx, 32, 0) 1651 #undef VEXTU_X_DO 1652 1653 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1654 { 1655 int i; 1656 unsigned int shift, bytes, size; 1657 1658 size = ARRAY_SIZE(r->u8); 1659 for (i = 0; i < size; i++) { 1660 shift = b->VsrB(i) & 0x7; /* extract shift value */ 1661 bytes = (a->VsrB(i) << 8) + /* extract adjacent bytes */ 1662 (((i + 1) < size) ? a->VsrB(i + 1) : 0); 1663 r->VsrB(i) = (bytes << shift) >> 8; /* shift and store result */ 1664 } 1665 } 1666 1667 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1668 { 1669 int i; 1670 unsigned int shift, bytes; 1671 1672 /* 1673 * Use reverse order, as destination and source register can be 1674 * same. Its being modified in place saving temporary, reverse 1675 * order will guarantee that computed result is not fed back. 1676 */ 1677 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 1678 shift = b->VsrB(i) & 0x7; /* extract shift value */ 1679 bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i); 1680 /* extract adjacent bytes */ 1681 r->VsrB(i) = (bytes >> shift) & 0xFF; /* shift and store result */ 1682 } 1683 } 1684 1685 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift) 1686 { 1687 int sh = shift & 0xf; 1688 int i; 1689 ppc_avr_t result; 1690 1691 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1692 int index = sh + i; 1693 if (index > 0xf) { 1694 result.VsrB(i) = b->VsrB(index - 0x10); 1695 } else { 1696 result.VsrB(i) = a->VsrB(index); 1697 } 1698 } 1699 *r = result; 1700 } 1701 1702 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1703 { 1704 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1705 1706 #if HOST_BIG_ENDIAN 1707 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1708 memset(&r->u8[16 - sh], 0, sh); 1709 #else 1710 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1711 memset(&r->u8[0], 0, sh); 1712 #endif 1713 } 1714 1715 #if HOST_BIG_ENDIAN 1716 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[IDX]) 1717 #else 1718 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[15 - (IDX)] - (SIZE) + 1) 1719 #endif 1720 1721 #define VINSX(SUFFIX, TYPE) \ 1722 void glue(glue(helper_VINS, SUFFIX), LX)(CPUPPCState *env, ppc_avr_t *t, \ 1723 uint64_t val, target_ulong index) \ 1724 { \ 1725 const int maxidx = ARRAY_SIZE(t->u8) - sizeof(TYPE); \ 1726 target_long idx = index; \ 1727 \ 1728 if (idx < 0 || idx > maxidx) { \ 1729 idx = idx < 0 ? sizeof(TYPE) - idx : idx; \ 1730 qemu_log_mask(LOG_GUEST_ERROR, \ 1731 "Invalid index for Vector Insert Element after 0x" TARGET_FMT_lx \ 1732 ", RA = " TARGET_FMT_ld " > %d\n", env->nip, idx, maxidx); \ 1733 } else { \ 1734 TYPE src = val; \ 1735 memcpy(ELEM_ADDR(t, idx, sizeof(TYPE)), &src, sizeof(TYPE)); \ 1736 } \ 1737 } 1738 VINSX(B, uint8_t) 1739 VINSX(H, uint16_t) 1740 VINSX(W, uint32_t) 1741 VINSX(D, uint64_t) 1742 #undef ELEM_ADDR 1743 #undef VINSX 1744 #if HOST_BIG_ENDIAN 1745 #define VEXTDVLX(NAME, SIZE) \ 1746 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \ 1747 target_ulong index) \ 1748 { \ 1749 const target_long idx = index; \ 1750 ppc_avr_t tmp[2] = { *a, *b }; \ 1751 memset(t, 0, sizeof(*t)); \ 1752 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \ 1753 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2 - SIZE], (void *)tmp + idx, SIZE); \ 1754 } else { \ 1755 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \ 1756 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \ 1757 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \ 1758 } \ 1759 } 1760 #else 1761 #define VEXTDVLX(NAME, SIZE) \ 1762 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \ 1763 target_ulong index) \ 1764 { \ 1765 const target_long idx = index; \ 1766 ppc_avr_t tmp[2] = { *b, *a }; \ 1767 memset(t, 0, sizeof(*t)); \ 1768 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \ 1769 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2], \ 1770 (void *)tmp + sizeof(tmp) - SIZE - idx, SIZE); \ 1771 } else { \ 1772 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \ 1773 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \ 1774 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \ 1775 } \ 1776 } 1777 #endif 1778 VEXTDVLX(VEXTDUBVLX, 1) 1779 VEXTDVLX(VEXTDUHVLX, 2) 1780 VEXTDVLX(VEXTDUWVLX, 4) 1781 VEXTDVLX(VEXTDDVLX, 8) 1782 #undef VEXTDVLX 1783 #if HOST_BIG_ENDIAN 1784 #define VEXTRACT(suffix, element) \ 1785 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1786 { \ 1787 uint32_t es = sizeof(r->element[0]); \ 1788 memmove(&r->u8[8 - es], &b->u8[index], es); \ 1789 memset(&r->u8[8], 0, 8); \ 1790 memset(&r->u8[0], 0, 8 - es); \ 1791 } 1792 #else 1793 #define VEXTRACT(suffix, element) \ 1794 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1795 { \ 1796 uint32_t es = sizeof(r->element[0]); \ 1797 uint32_t s = (16 - index) - es; \ 1798 memmove(&r->u8[8], &b->u8[s], es); \ 1799 memset(&r->u8[0], 0, 8); \ 1800 memset(&r->u8[8 + es], 0, 8 - es); \ 1801 } 1802 #endif 1803 VEXTRACT(ub, u8) 1804 VEXTRACT(uh, u16) 1805 VEXTRACT(uw, u32) 1806 VEXTRACT(d, u64) 1807 #undef VEXTRACT 1808 1809 #define VSTRI(NAME, ELEM, NUM_ELEMS, LEFT) \ 1810 uint32_t helper_##NAME(ppc_avr_t *t, ppc_avr_t *b) \ 1811 { \ 1812 int i, idx, crf = 0; \ 1813 \ 1814 for (i = 0; i < NUM_ELEMS; i++) { \ 1815 idx = LEFT ? i : NUM_ELEMS - i - 1; \ 1816 if (b->Vsr##ELEM(idx)) { \ 1817 t->Vsr##ELEM(idx) = b->Vsr##ELEM(idx); \ 1818 } else { \ 1819 crf = 0b0010; \ 1820 break; \ 1821 } \ 1822 } \ 1823 \ 1824 for (; i < NUM_ELEMS; i++) { \ 1825 idx = LEFT ? i : NUM_ELEMS - i - 1; \ 1826 t->Vsr##ELEM(idx) = 0; \ 1827 } \ 1828 \ 1829 return crf; \ 1830 } 1831 VSTRI(VSTRIBL, B, 16, true) 1832 VSTRI(VSTRIBR, B, 16, false) 1833 VSTRI(VSTRIHL, H, 8, true) 1834 VSTRI(VSTRIHR, H, 8, false) 1835 #undef VSTRI 1836 1837 void helper_XXEXTRACTUW(ppc_vsr_t *xt, ppc_vsr_t *xb, uint32_t index) 1838 { 1839 ppc_vsr_t t = { }; 1840 size_t es = sizeof(uint32_t); 1841 uint32_t ext_index; 1842 int i; 1843 1844 ext_index = index; 1845 for (i = 0; i < es; i++, ext_index++) { 1846 t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16); 1847 } 1848 1849 *xt = t; 1850 } 1851 1852 void helper_XXINSERTW(ppc_vsr_t *xt, ppc_vsr_t *xb, uint32_t index) 1853 { 1854 ppc_vsr_t t = *xt; 1855 size_t es = sizeof(uint32_t); 1856 int ins_index, i = 0; 1857 1858 ins_index = index; 1859 for (i = 0; i < es && ins_index < 16; i++, ins_index++) { 1860 t.VsrB(ins_index) = xb->VsrB(8 - es + i); 1861 } 1862 1863 *xt = t; 1864 } 1865 1866 void helper_XXEVAL(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c, 1867 uint32_t desc) 1868 { 1869 /* 1870 * Instead of processing imm bit-by-bit, we'll skip the computation of 1871 * conjunctions whose corresponding bit is unset. 1872 */ 1873 int bit, imm = simd_data(desc); 1874 Int128 conj, disj = int128_zero(); 1875 1876 /* Iterate over set bits from the least to the most significant bit */ 1877 while (imm) { 1878 /* 1879 * Get the next bit to be processed with ctz64. Invert the result of 1880 * ctz64 to match the indexing used by PowerISA. 1881 */ 1882 bit = 7 - ctzl(imm); 1883 if (bit & 0x4) { 1884 conj = a->s128; 1885 } else { 1886 conj = int128_not(a->s128); 1887 } 1888 if (bit & 0x2) { 1889 conj = int128_and(conj, b->s128); 1890 } else { 1891 conj = int128_and(conj, int128_not(b->s128)); 1892 } 1893 if (bit & 0x1) { 1894 conj = int128_and(conj, c->s128); 1895 } else { 1896 conj = int128_and(conj, int128_not(c->s128)); 1897 } 1898 disj = int128_or(disj, conj); 1899 1900 /* Unset the least significant bit that is set */ 1901 imm &= imm - 1; 1902 } 1903 1904 t->s128 = disj; 1905 } 1906 1907 #define XXBLEND(name, sz) \ 1908 void glue(helper_XXBLENDV, name)(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \ 1909 ppc_avr_t *c, uint32_t desc) \ 1910 { \ 1911 for (int i = 0; i < ARRAY_SIZE(t->glue(u, sz)); i++) { \ 1912 t->glue(u, sz)[i] = (c->glue(s, sz)[i] >> (sz - 1)) ? \ 1913 b->glue(u, sz)[i] : a->glue(u, sz)[i]; \ 1914 } \ 1915 } 1916 XXBLEND(B, 8) 1917 XXBLEND(H, 16) 1918 XXBLEND(W, 32) 1919 XXBLEND(D, 64) 1920 #undef XXBLEND 1921 1922 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1923 { 1924 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1925 1926 #if HOST_BIG_ENDIAN 1927 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1928 memset(&r->u8[0], 0, sh); 1929 #else 1930 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1931 memset(&r->u8[16 - sh], 0, sh); 1932 #endif 1933 } 1934 1935 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1936 { 1937 int64_t t; 1938 int i, upper; 1939 ppc_avr_t result; 1940 int sat = 0; 1941 1942 upper = ARRAY_SIZE(r->s32) - 1; 1943 t = (int64_t)b->VsrSW(upper); 1944 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1945 t += a->VsrSW(i); 1946 result.VsrSW(i) = 0; 1947 } 1948 result.VsrSW(upper) = cvtsdsw(t, &sat); 1949 *r = result; 1950 1951 if (sat) { 1952 set_vscr_sat(env); 1953 } 1954 } 1955 1956 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1957 { 1958 int i, j, upper; 1959 ppc_avr_t result; 1960 int sat = 0; 1961 1962 upper = 1; 1963 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 1964 int64_t t = (int64_t)b->VsrSW(upper + i * 2); 1965 1966 result.VsrD(i) = 0; 1967 for (j = 0; j < ARRAY_SIZE(r->u64); j++) { 1968 t += a->VsrSW(2 * i + j); 1969 } 1970 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat); 1971 } 1972 1973 *r = result; 1974 if (sat) { 1975 set_vscr_sat(env); 1976 } 1977 } 1978 1979 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1980 { 1981 int i, j; 1982 int sat = 0; 1983 1984 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1985 int64_t t = (int64_t)b->s32[i]; 1986 1987 for (j = 0; j < ARRAY_SIZE(r->s32); j++) { 1988 t += a->s8[4 * i + j]; 1989 } 1990 r->s32[i] = cvtsdsw(t, &sat); 1991 } 1992 1993 if (sat) { 1994 set_vscr_sat(env); 1995 } 1996 } 1997 1998 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1999 { 2000 int sat = 0; 2001 int i; 2002 2003 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 2004 int64_t t = (int64_t)b->s32[i]; 2005 2006 t += a->s16[2 * i] + a->s16[2 * i + 1]; 2007 r->s32[i] = cvtsdsw(t, &sat); 2008 } 2009 2010 if (sat) { 2011 set_vscr_sat(env); 2012 } 2013 } 2014 2015 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2016 { 2017 int i, j; 2018 int sat = 0; 2019 2020 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 2021 uint64_t t = (uint64_t)b->u32[i]; 2022 2023 for (j = 0; j < ARRAY_SIZE(r->u32); j++) { 2024 t += a->u8[4 * i + j]; 2025 } 2026 r->u32[i] = cvtuduw(t, &sat); 2027 } 2028 2029 if (sat) { 2030 set_vscr_sat(env); 2031 } 2032 } 2033 2034 #if HOST_BIG_ENDIAN 2035 #define UPKHI 1 2036 #define UPKLO 0 2037 #else 2038 #define UPKHI 0 2039 #define UPKLO 1 2040 #endif 2041 #define VUPKPX(suffix, hi) \ 2042 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 2043 { \ 2044 int i; \ 2045 ppc_avr_t result; \ 2046 \ 2047 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \ 2048 uint16_t e = b->u16[hi ? i : i + 4]; \ 2049 uint8_t a = (e >> 15) ? 0xff : 0; \ 2050 uint8_t r = (e >> 10) & 0x1f; \ 2051 uint8_t g = (e >> 5) & 0x1f; \ 2052 uint8_t b = e & 0x1f; \ 2053 \ 2054 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \ 2055 } \ 2056 *r = result; \ 2057 } 2058 VUPKPX(lpx, UPKLO) 2059 VUPKPX(hpx, UPKHI) 2060 #undef VUPKPX 2061 2062 #define VUPK(suffix, unpacked, packee, hi) \ 2063 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 2064 { \ 2065 int i; \ 2066 ppc_avr_t result; \ 2067 \ 2068 if (hi) { \ 2069 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \ 2070 result.unpacked[i] = b->packee[i]; \ 2071 } \ 2072 } else { \ 2073 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \ 2074 i++) { \ 2075 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \ 2076 } \ 2077 } \ 2078 *r = result; \ 2079 } 2080 VUPK(hsb, s16, s8, UPKHI) 2081 VUPK(hsh, s32, s16, UPKHI) 2082 VUPK(hsw, s64, s32, UPKHI) 2083 VUPK(lsb, s16, s8, UPKLO) 2084 VUPK(lsh, s32, s16, UPKLO) 2085 VUPK(lsw, s64, s32, UPKLO) 2086 #undef VUPK 2087 #undef UPKHI 2088 #undef UPKLO 2089 2090 #define VGENERIC_DO(name, element) \ 2091 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \ 2092 { \ 2093 int i; \ 2094 \ 2095 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 2096 r->element[i] = name(b->element[i]); \ 2097 } \ 2098 } 2099 2100 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8) 2101 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16) 2102 2103 VGENERIC_DO(clzb, u8) 2104 VGENERIC_DO(clzh, u16) 2105 2106 #undef clzb 2107 #undef clzh 2108 2109 #define ctzb(v) ((v) ? ctz32(v) : 8) 2110 #define ctzh(v) ((v) ? ctz32(v) : 16) 2111 #define ctzw(v) ctz32((v)) 2112 #define ctzd(v) ctz64((v)) 2113 2114 VGENERIC_DO(ctzb, u8) 2115 VGENERIC_DO(ctzh, u16) 2116 VGENERIC_DO(ctzw, u32) 2117 VGENERIC_DO(ctzd, u64) 2118 2119 #undef ctzb 2120 #undef ctzh 2121 #undef ctzw 2122 #undef ctzd 2123 2124 #define popcntb(v) ctpop8(v) 2125 #define popcnth(v) ctpop16(v) 2126 #define popcntw(v) ctpop32(v) 2127 #define popcntd(v) ctpop64(v) 2128 2129 VGENERIC_DO(popcntb, u8) 2130 VGENERIC_DO(popcnth, u16) 2131 VGENERIC_DO(popcntw, u32) 2132 VGENERIC_DO(popcntd, u64) 2133 2134 #undef popcntb 2135 #undef popcnth 2136 #undef popcntw 2137 #undef popcntd 2138 2139 #undef VGENERIC_DO 2140 2141 void helper_VADDUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2142 { 2143 r->s128 = int128_add(a->s128, b->s128); 2144 } 2145 2146 void helper_VADDEUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2147 { 2148 r->s128 = int128_add(int128_add(a->s128, b->s128), 2149 int128_make64(int128_getlo(c->s128) & 1)); 2150 } 2151 2152 void helper_VADDCUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2153 { 2154 r->VsrD(1) = int128_ult(int128_not(a->s128), b->s128); 2155 r->VsrD(0) = 0; 2156 } 2157 2158 void helper_VADDECUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2159 { 2160 bool carry_out = int128_ult(int128_not(a->s128), b->s128), 2161 carry_in = int128_getlo(c->s128) & 1; 2162 2163 if (!carry_out && carry_in) { 2164 carry_out = (int128_nz(a->s128) || int128_nz(b->s128)) && 2165 int128_eq(int128_add(a->s128, b->s128), int128_makes64(-1)); 2166 } 2167 2168 r->VsrD(0) = 0; 2169 r->VsrD(1) = carry_out; 2170 } 2171 2172 void helper_VSUBUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2173 { 2174 r->s128 = int128_sub(a->s128, b->s128); 2175 } 2176 2177 void helper_VSUBEUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2178 { 2179 r->s128 = int128_add(int128_add(a->s128, int128_not(b->s128)), 2180 int128_make64(int128_getlo(c->s128) & 1)); 2181 } 2182 2183 void helper_VSUBCUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2184 { 2185 Int128 tmp = int128_not(b->s128); 2186 2187 r->VsrD(1) = int128_ult(int128_not(a->s128), tmp) || 2188 int128_eq(int128_add(a->s128, tmp), int128_makes64(-1)); 2189 r->VsrD(0) = 0; 2190 } 2191 2192 void helper_VSUBECUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2193 { 2194 Int128 tmp = int128_not(b->s128); 2195 bool carry_out = int128_ult(int128_not(a->s128), tmp), 2196 carry_in = int128_getlo(c->s128) & 1; 2197 2198 r->VsrD(1) = carry_out || (carry_in && int128_eq(int128_add(a->s128, tmp), 2199 int128_makes64(-1))); 2200 r->VsrD(0) = 0; 2201 } 2202 2203 #define BCD_PLUS_PREF_1 0xC 2204 #define BCD_PLUS_PREF_2 0xF 2205 #define BCD_PLUS_ALT_1 0xA 2206 #define BCD_NEG_PREF 0xD 2207 #define BCD_NEG_ALT 0xB 2208 #define BCD_PLUS_ALT_2 0xE 2209 #define NATIONAL_PLUS 0x2B 2210 #define NATIONAL_NEG 0x2D 2211 2212 #define BCD_DIG_BYTE(n) (15 - ((n) / 2)) 2213 2214 static int bcd_get_sgn(ppc_avr_t *bcd) 2215 { 2216 switch (bcd->VsrB(BCD_DIG_BYTE(0)) & 0xF) { 2217 case BCD_PLUS_PREF_1: 2218 case BCD_PLUS_PREF_2: 2219 case BCD_PLUS_ALT_1: 2220 case BCD_PLUS_ALT_2: 2221 { 2222 return 1; 2223 } 2224 2225 case BCD_NEG_PREF: 2226 case BCD_NEG_ALT: 2227 { 2228 return -1; 2229 } 2230 2231 default: 2232 { 2233 return 0; 2234 } 2235 } 2236 } 2237 2238 static int bcd_preferred_sgn(int sgn, int ps) 2239 { 2240 if (sgn >= 0) { 2241 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2; 2242 } else { 2243 return BCD_NEG_PREF; 2244 } 2245 } 2246 2247 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid) 2248 { 2249 uint8_t result; 2250 if (n & 1) { 2251 result = bcd->VsrB(BCD_DIG_BYTE(n)) >> 4; 2252 } else { 2253 result = bcd->VsrB(BCD_DIG_BYTE(n)) & 0xF; 2254 } 2255 2256 if (unlikely(result > 9)) { 2257 *invalid = true; 2258 } 2259 return result; 2260 } 2261 2262 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n) 2263 { 2264 if (n & 1) { 2265 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0x0F; 2266 bcd->VsrB(BCD_DIG_BYTE(n)) |= (digit << 4); 2267 } else { 2268 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0xF0; 2269 bcd->VsrB(BCD_DIG_BYTE(n)) |= digit; 2270 } 2271 } 2272 2273 static bool bcd_is_valid(ppc_avr_t *bcd) 2274 { 2275 int i; 2276 int invalid = 0; 2277 2278 if (bcd_get_sgn(bcd) == 0) { 2279 return false; 2280 } 2281 2282 for (i = 1; i < 32; i++) { 2283 bcd_get_digit(bcd, i, &invalid); 2284 if (unlikely(invalid)) { 2285 return false; 2286 } 2287 } 2288 return true; 2289 } 2290 2291 static int bcd_cmp_zero(ppc_avr_t *bcd) 2292 { 2293 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) { 2294 return CRF_EQ; 2295 } else { 2296 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT; 2297 } 2298 } 2299 2300 static uint16_t get_national_digit(ppc_avr_t *reg, int n) 2301 { 2302 return reg->VsrH(7 - n); 2303 } 2304 2305 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n) 2306 { 2307 reg->VsrH(7 - n) = val; 2308 } 2309 2310 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b) 2311 { 2312 int i; 2313 int invalid = 0; 2314 for (i = 31; i > 0; i--) { 2315 uint8_t dig_a = bcd_get_digit(a, i, &invalid); 2316 uint8_t dig_b = bcd_get_digit(b, i, &invalid); 2317 if (unlikely(invalid)) { 2318 return 0; /* doesn't matter */ 2319 } else if (dig_a > dig_b) { 2320 return 1; 2321 } else if (dig_a < dig_b) { 2322 return -1; 2323 } 2324 } 2325 2326 return 0; 2327 } 2328 2329 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2330 int *overflow) 2331 { 2332 int carry = 0; 2333 int i; 2334 int is_zero = 1; 2335 2336 for (i = 1; i <= 31; i++) { 2337 uint8_t digit = bcd_get_digit(a, i, invalid) + 2338 bcd_get_digit(b, i, invalid) + carry; 2339 is_zero &= (digit == 0); 2340 if (digit > 9) { 2341 carry = 1; 2342 digit -= 10; 2343 } else { 2344 carry = 0; 2345 } 2346 2347 bcd_put_digit(t, digit, i); 2348 } 2349 2350 *overflow = carry; 2351 return is_zero; 2352 } 2353 2354 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2355 int *overflow) 2356 { 2357 int carry = 0; 2358 int i; 2359 2360 for (i = 1; i <= 31; i++) { 2361 uint8_t digit = bcd_get_digit(a, i, invalid) - 2362 bcd_get_digit(b, i, invalid) + carry; 2363 if (digit & 0x80) { 2364 carry = -1; 2365 digit += 10; 2366 } else { 2367 carry = 0; 2368 } 2369 2370 bcd_put_digit(t, digit, i); 2371 } 2372 2373 *overflow = carry; 2374 } 2375 2376 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2377 { 2378 2379 int sgna = bcd_get_sgn(a); 2380 int sgnb = bcd_get_sgn(b); 2381 int invalid = (sgna == 0) || (sgnb == 0); 2382 int overflow = 0; 2383 int zero = 0; 2384 uint32_t cr = 0; 2385 ppc_avr_t result = { .u64 = { 0, 0 } }; 2386 2387 if (!invalid) { 2388 if (sgna == sgnb) { 2389 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps); 2390 zero = bcd_add_mag(&result, a, b, &invalid, &overflow); 2391 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2392 } else { 2393 int magnitude = bcd_cmp_mag(a, b); 2394 if (magnitude > 0) { 2395 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps); 2396 bcd_sub_mag(&result, a, b, &invalid, &overflow); 2397 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2398 } else if (magnitude < 0) { 2399 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgnb, ps); 2400 bcd_sub_mag(&result, b, a, &invalid, &overflow); 2401 cr = (sgnb > 0) ? CRF_GT : CRF_LT; 2402 } else { 2403 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(0, ps); 2404 cr = CRF_EQ; 2405 } 2406 } 2407 } 2408 2409 if (unlikely(invalid)) { 2410 result.VsrD(0) = result.VsrD(1) = -1; 2411 cr = CRF_SO; 2412 } else if (overflow) { 2413 cr |= CRF_SO; 2414 } else if (zero) { 2415 cr |= CRF_EQ; 2416 } 2417 2418 *r = result; 2419 2420 return cr; 2421 } 2422 2423 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2424 { 2425 ppc_avr_t bcopy = *b; 2426 int sgnb = bcd_get_sgn(b); 2427 if (sgnb < 0) { 2428 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0); 2429 } else if (sgnb > 0) { 2430 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0); 2431 } 2432 /* else invalid ... defer to bcdadd code for proper handling */ 2433 2434 return helper_bcdadd(r, a, &bcopy, ps); 2435 } 2436 2437 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2438 { 2439 int i; 2440 int cr = 0; 2441 uint16_t national = 0; 2442 uint16_t sgnb = get_national_digit(b, 0); 2443 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2444 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG); 2445 2446 for (i = 1; i < 8; i++) { 2447 national = get_national_digit(b, i); 2448 if (unlikely(national < 0x30 || national > 0x39)) { 2449 invalid = 1; 2450 break; 2451 } 2452 2453 bcd_put_digit(&ret, national & 0xf, i); 2454 } 2455 2456 if (sgnb == NATIONAL_PLUS) { 2457 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0); 2458 } else { 2459 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2460 } 2461 2462 cr = bcd_cmp_zero(&ret); 2463 2464 if (unlikely(invalid)) { 2465 cr = CRF_SO; 2466 } 2467 2468 *r = ret; 2469 2470 return cr; 2471 } 2472 2473 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2474 { 2475 int i; 2476 int cr = 0; 2477 int sgnb = bcd_get_sgn(b); 2478 int invalid = (sgnb == 0); 2479 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2480 2481 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0); 2482 2483 for (i = 1; i < 8; i++) { 2484 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i); 2485 2486 if (unlikely(invalid)) { 2487 break; 2488 } 2489 } 2490 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0); 2491 2492 cr = bcd_cmp_zero(b); 2493 2494 if (ox_flag) { 2495 cr |= CRF_SO; 2496 } 2497 2498 if (unlikely(invalid)) { 2499 cr = CRF_SO; 2500 } 2501 2502 *r = ret; 2503 2504 return cr; 2505 } 2506 2507 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2508 { 2509 int i; 2510 int cr = 0; 2511 int invalid = 0; 2512 int zone_digit = 0; 2513 int zone_lead = ps ? 0xF : 0x3; 2514 int digit = 0; 2515 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2516 int sgnb = b->VsrB(BCD_DIG_BYTE(0)) >> 4; 2517 2518 if (unlikely((sgnb < 0xA) && ps)) { 2519 invalid = 1; 2520 } 2521 2522 for (i = 0; i < 16; i++) { 2523 zone_digit = i ? b->VsrB(BCD_DIG_BYTE(i * 2)) >> 4 : zone_lead; 2524 digit = b->VsrB(BCD_DIG_BYTE(i * 2)) & 0xF; 2525 if (unlikely(zone_digit != zone_lead || digit > 0x9)) { 2526 invalid = 1; 2527 break; 2528 } 2529 2530 bcd_put_digit(&ret, digit, i + 1); 2531 } 2532 2533 if ((ps && (sgnb == 0xB || sgnb == 0xD)) || 2534 (!ps && (sgnb & 0x4))) { 2535 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2536 } else { 2537 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0); 2538 } 2539 2540 cr = bcd_cmp_zero(&ret); 2541 2542 if (unlikely(invalid)) { 2543 cr = CRF_SO; 2544 } 2545 2546 *r = ret; 2547 2548 return cr; 2549 } 2550 2551 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2552 { 2553 int i; 2554 int cr = 0; 2555 uint8_t digit = 0; 2556 int sgnb = bcd_get_sgn(b); 2557 int zone_lead = (ps) ? 0xF0 : 0x30; 2558 int invalid = (sgnb == 0); 2559 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2560 2561 int ox_flag = ((b->VsrD(0) >> 4) != 0); 2562 2563 for (i = 0; i < 16; i++) { 2564 digit = bcd_get_digit(b, i + 1, &invalid); 2565 2566 if (unlikely(invalid)) { 2567 break; 2568 } 2569 2570 ret.VsrB(BCD_DIG_BYTE(i * 2)) = zone_lead + digit; 2571 } 2572 2573 if (ps) { 2574 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1); 2575 } else { 2576 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1); 2577 } 2578 2579 cr = bcd_cmp_zero(b); 2580 2581 if (ox_flag) { 2582 cr |= CRF_SO; 2583 } 2584 2585 if (unlikely(invalid)) { 2586 cr = CRF_SO; 2587 } 2588 2589 *r = ret; 2590 2591 return cr; 2592 } 2593 2594 /** 2595 * Compare 2 128-bit unsigned integers, passed in as unsigned 64-bit pairs 2596 * 2597 * Returns: 2598 * > 0 if ahi|alo > bhi|blo, 2599 * 0 if ahi|alo == bhi|blo, 2600 * < 0 if ahi|alo < bhi|blo 2601 */ 2602 static inline int ucmp128(uint64_t alo, uint64_t ahi, 2603 uint64_t blo, uint64_t bhi) 2604 { 2605 return (ahi == bhi) ? 2606 (alo > blo ? 1 : (alo == blo ? 0 : -1)) : 2607 (ahi > bhi ? 1 : -1); 2608 } 2609 2610 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2611 { 2612 int i; 2613 int cr; 2614 uint64_t lo_value; 2615 uint64_t hi_value; 2616 uint64_t rem; 2617 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2618 2619 if (b->VsrSD(0) < 0) { 2620 lo_value = -b->VsrSD(1); 2621 hi_value = ~b->VsrD(0) + !lo_value; 2622 bcd_put_digit(&ret, 0xD, 0); 2623 2624 cr = CRF_LT; 2625 } else { 2626 lo_value = b->VsrD(1); 2627 hi_value = b->VsrD(0); 2628 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0); 2629 2630 if (hi_value == 0 && lo_value == 0) { 2631 cr = CRF_EQ; 2632 } else { 2633 cr = CRF_GT; 2634 } 2635 } 2636 2637 /* 2638 * Check src limits: abs(src) <= 10^31 - 1 2639 * 2640 * 10^31 - 1 = 0x0000007e37be2022 c0914b267fffffff 2641 */ 2642 if (ucmp128(lo_value, hi_value, 2643 0xc0914b267fffffffULL, 0x7e37be2022ULL) > 0) { 2644 cr |= CRF_SO; 2645 2646 /* 2647 * According to the ISA, if src wouldn't fit in the destination 2648 * register, the result is undefined. 2649 * In that case, we leave r unchanged. 2650 */ 2651 } else { 2652 rem = divu128(&lo_value, &hi_value, 1000000000000000ULL); 2653 2654 for (i = 1; i < 16; rem /= 10, i++) { 2655 bcd_put_digit(&ret, rem % 10, i); 2656 } 2657 2658 for (; i < 32; lo_value /= 10, i++) { 2659 bcd_put_digit(&ret, lo_value % 10, i); 2660 } 2661 2662 *r = ret; 2663 } 2664 2665 return cr; 2666 } 2667 2668 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2669 { 2670 uint8_t i; 2671 int cr; 2672 uint64_t carry; 2673 uint64_t unused; 2674 uint64_t lo_value; 2675 uint64_t hi_value = 0; 2676 int sgnb = bcd_get_sgn(b); 2677 int invalid = (sgnb == 0); 2678 2679 lo_value = bcd_get_digit(b, 31, &invalid); 2680 for (i = 30; i > 0; i--) { 2681 mulu64(&lo_value, &carry, lo_value, 10ULL); 2682 mulu64(&hi_value, &unused, hi_value, 10ULL); 2683 lo_value += bcd_get_digit(b, i, &invalid); 2684 hi_value += carry; 2685 2686 if (unlikely(invalid)) { 2687 break; 2688 } 2689 } 2690 2691 if (sgnb == -1) { 2692 r->VsrSD(1) = -lo_value; 2693 r->VsrSD(0) = ~hi_value + !r->VsrSD(1); 2694 } else { 2695 r->VsrSD(1) = lo_value; 2696 r->VsrSD(0) = hi_value; 2697 } 2698 2699 cr = bcd_cmp_zero(b); 2700 2701 if (unlikely(invalid)) { 2702 cr = CRF_SO; 2703 } 2704 2705 return cr; 2706 } 2707 2708 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2709 { 2710 int i; 2711 int invalid = 0; 2712 2713 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) { 2714 return CRF_SO; 2715 } 2716 2717 *r = *a; 2718 bcd_put_digit(r, b->VsrB(BCD_DIG_BYTE(0)) & 0xF, 0); 2719 2720 for (i = 1; i < 32; i++) { 2721 bcd_get_digit(a, i, &invalid); 2722 bcd_get_digit(b, i, &invalid); 2723 if (unlikely(invalid)) { 2724 return CRF_SO; 2725 } 2726 } 2727 2728 return bcd_cmp_zero(r); 2729 } 2730 2731 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2732 { 2733 int sgnb = bcd_get_sgn(b); 2734 2735 *r = *b; 2736 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0); 2737 2738 if (bcd_is_valid(b) == false) { 2739 return CRF_SO; 2740 } 2741 2742 return bcd_cmp_zero(r); 2743 } 2744 2745 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2746 { 2747 int cr; 2748 int i = a->VsrSB(7); 2749 bool ox_flag = false; 2750 int sgnb = bcd_get_sgn(b); 2751 ppc_avr_t ret = *b; 2752 ret.VsrD(1) &= ~0xf; 2753 2754 if (bcd_is_valid(b) == false) { 2755 return CRF_SO; 2756 } 2757 2758 if (unlikely(i > 31)) { 2759 i = 31; 2760 } else if (unlikely(i < -31)) { 2761 i = -31; 2762 } 2763 2764 if (i > 0) { 2765 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2766 } else { 2767 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2768 } 2769 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2770 2771 *r = ret; 2772 2773 cr = bcd_cmp_zero(r); 2774 if (ox_flag) { 2775 cr |= CRF_SO; 2776 } 2777 2778 return cr; 2779 } 2780 2781 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2782 { 2783 int cr; 2784 int i; 2785 int invalid = 0; 2786 bool ox_flag = false; 2787 ppc_avr_t ret = *b; 2788 2789 for (i = 0; i < 32; i++) { 2790 bcd_get_digit(b, i, &invalid); 2791 2792 if (unlikely(invalid)) { 2793 return CRF_SO; 2794 } 2795 } 2796 2797 i = a->VsrSB(7); 2798 if (i >= 32) { 2799 ox_flag = true; 2800 ret.VsrD(1) = ret.VsrD(0) = 0; 2801 } else if (i <= -32) { 2802 ret.VsrD(1) = ret.VsrD(0) = 0; 2803 } else if (i > 0) { 2804 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2805 } else { 2806 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2807 } 2808 *r = ret; 2809 2810 cr = bcd_cmp_zero(r); 2811 if (ox_flag) { 2812 cr |= CRF_SO; 2813 } 2814 2815 return cr; 2816 } 2817 2818 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2819 { 2820 int cr; 2821 int unused = 0; 2822 int invalid = 0; 2823 bool ox_flag = false; 2824 int sgnb = bcd_get_sgn(b); 2825 ppc_avr_t ret = *b; 2826 ret.VsrD(1) &= ~0xf; 2827 2828 int i = a->VsrSB(7); 2829 ppc_avr_t bcd_one; 2830 2831 bcd_one.VsrD(0) = 0; 2832 bcd_one.VsrD(1) = 0x10; 2833 2834 if (bcd_is_valid(b) == false) { 2835 return CRF_SO; 2836 } 2837 2838 if (unlikely(i > 31)) { 2839 i = 31; 2840 } else if (unlikely(i < -31)) { 2841 i = -31; 2842 } 2843 2844 if (i > 0) { 2845 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2846 } else { 2847 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2848 2849 if (bcd_get_digit(&ret, 0, &invalid) >= 5) { 2850 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused); 2851 } 2852 } 2853 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2854 2855 cr = bcd_cmp_zero(&ret); 2856 if (ox_flag) { 2857 cr |= CRF_SO; 2858 } 2859 *r = ret; 2860 2861 return cr; 2862 } 2863 2864 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2865 { 2866 uint64_t mask; 2867 uint32_t ox_flag = 0; 2868 int i = a->VsrSH(3) + 1; 2869 ppc_avr_t ret = *b; 2870 2871 if (bcd_is_valid(b) == false) { 2872 return CRF_SO; 2873 } 2874 2875 if (i > 16 && i < 32) { 2876 mask = (uint64_t)-1 >> (128 - i * 4); 2877 if (ret.VsrD(0) & ~mask) { 2878 ox_flag = CRF_SO; 2879 } 2880 2881 ret.VsrD(0) &= mask; 2882 } else if (i >= 0 && i <= 16) { 2883 mask = (uint64_t)-1 >> (64 - i * 4); 2884 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 2885 ox_flag = CRF_SO; 2886 } 2887 2888 ret.VsrD(1) &= mask; 2889 ret.VsrD(0) = 0; 2890 } 2891 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0); 2892 *r = ret; 2893 2894 return bcd_cmp_zero(&ret) | ox_flag; 2895 } 2896 2897 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2898 { 2899 int i; 2900 uint64_t mask; 2901 uint32_t ox_flag = 0; 2902 int invalid = 0; 2903 ppc_avr_t ret = *b; 2904 2905 for (i = 0; i < 32; i++) { 2906 bcd_get_digit(b, i, &invalid); 2907 2908 if (unlikely(invalid)) { 2909 return CRF_SO; 2910 } 2911 } 2912 2913 i = a->VsrSH(3); 2914 if (i > 16 && i < 33) { 2915 mask = (uint64_t)-1 >> (128 - i * 4); 2916 if (ret.VsrD(0) & ~mask) { 2917 ox_flag = CRF_SO; 2918 } 2919 2920 ret.VsrD(0) &= mask; 2921 } else if (i > 0 && i <= 16) { 2922 mask = (uint64_t)-1 >> (64 - i * 4); 2923 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 2924 ox_flag = CRF_SO; 2925 } 2926 2927 ret.VsrD(1) &= mask; 2928 ret.VsrD(0) = 0; 2929 } else if (i == 0) { 2930 if (ret.VsrD(0) || ret.VsrD(1)) { 2931 ox_flag = CRF_SO; 2932 } 2933 ret.VsrD(0) = ret.VsrD(1) = 0; 2934 } 2935 2936 *r = ret; 2937 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) { 2938 return ox_flag | CRF_EQ; 2939 } 2940 2941 return ox_flag | CRF_GT; 2942 } 2943 2944 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a) 2945 { 2946 int i; 2947 VECTOR_FOR_INORDER_I(i, u8) { 2948 r->u8[i] = AES_sbox[a->u8[i]]; 2949 } 2950 } 2951 2952 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2953 { 2954 AESState *ad = (AESState *)r; 2955 AESState *st = (AESState *)a; 2956 AESState *rk = (AESState *)b; 2957 2958 aesenc_SB_SR_MC_AK(ad, st, rk, true); 2959 } 2960 2961 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2962 { 2963 aesenc_SB_SR_AK((AESState *)r, (AESState *)a, (AESState *)b, true); 2964 } 2965 2966 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2967 { 2968 AESState *ad = (AESState *)r; 2969 AESState *st = (AESState *)a; 2970 AESState *rk = (AESState *)b; 2971 2972 aesdec_ISB_ISR_AK_IMC(ad, st, rk, true); 2973 } 2974 2975 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2976 { 2977 aesdec_ISB_ISR_AK((AESState *)r, (AESState *)a, (AESState *)b, true); 2978 } 2979 2980 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 2981 { 2982 int st = (st_six & 0x10) != 0; 2983 int six = st_six & 0xF; 2984 int i; 2985 2986 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 2987 if (st == 0) { 2988 if ((six & (0x8 >> i)) == 0) { 2989 r->VsrW(i) = ror32(a->VsrW(i), 7) ^ 2990 ror32(a->VsrW(i), 18) ^ 2991 (a->VsrW(i) >> 3); 2992 } else { /* six.bit[i] == 1 */ 2993 r->VsrW(i) = ror32(a->VsrW(i), 17) ^ 2994 ror32(a->VsrW(i), 19) ^ 2995 (a->VsrW(i) >> 10); 2996 } 2997 } else { /* st == 1 */ 2998 if ((six & (0x8 >> i)) == 0) { 2999 r->VsrW(i) = ror32(a->VsrW(i), 2) ^ 3000 ror32(a->VsrW(i), 13) ^ 3001 ror32(a->VsrW(i), 22); 3002 } else { /* six.bit[i] == 1 */ 3003 r->VsrW(i) = ror32(a->VsrW(i), 6) ^ 3004 ror32(a->VsrW(i), 11) ^ 3005 ror32(a->VsrW(i), 25); 3006 } 3007 } 3008 } 3009 } 3010 3011 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 3012 { 3013 int st = (st_six & 0x10) != 0; 3014 int six = st_six & 0xF; 3015 int i; 3016 3017 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 3018 if (st == 0) { 3019 if ((six & (0x8 >> (2 * i))) == 0) { 3020 r->VsrD(i) = ror64(a->VsrD(i), 1) ^ 3021 ror64(a->VsrD(i), 8) ^ 3022 (a->VsrD(i) >> 7); 3023 } else { /* six.bit[2*i] == 1 */ 3024 r->VsrD(i) = ror64(a->VsrD(i), 19) ^ 3025 ror64(a->VsrD(i), 61) ^ 3026 (a->VsrD(i) >> 6); 3027 } 3028 } else { /* st == 1 */ 3029 if ((six & (0x8 >> (2 * i))) == 0) { 3030 r->VsrD(i) = ror64(a->VsrD(i), 28) ^ 3031 ror64(a->VsrD(i), 34) ^ 3032 ror64(a->VsrD(i), 39); 3033 } else { /* six.bit[2*i] == 1 */ 3034 r->VsrD(i) = ror64(a->VsrD(i), 14) ^ 3035 ror64(a->VsrD(i), 18) ^ 3036 ror64(a->VsrD(i), 41); 3037 } 3038 } 3039 } 3040 } 3041 3042 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 3043 { 3044 ppc_avr_t result; 3045 int i; 3046 3047 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 3048 int indexA = c->VsrB(i) >> 4; 3049 int indexB = c->VsrB(i) & 0xF; 3050 3051 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB); 3052 } 3053 *r = result; 3054 } 3055 3056 #undef VECTOR_FOR_INORDER_I 3057 3058 /*****************************************************************************/ 3059 /* SPE extension helpers */ 3060 /* Use a table to make this quicker */ 3061 static const uint8_t hbrev[16] = { 3062 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE, 3063 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF, 3064 }; 3065 3066 static inline uint8_t byte_reverse(uint8_t val) 3067 { 3068 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4); 3069 } 3070 3071 static inline uint32_t word_reverse(uint32_t val) 3072 { 3073 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) | 3074 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24); 3075 } 3076 3077 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */ 3078 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2) 3079 { 3080 uint32_t a, b, d, mask; 3081 3082 mask = UINT32_MAX >> (32 - MASKBITS); 3083 a = arg1 & mask; 3084 b = arg2 & mask; 3085 d = word_reverse(1 + word_reverse(a | ~b)); 3086 return (arg1 & ~mask) | (d & b); 3087 } 3088 3089 uint32_t helper_cntlsw32(uint32_t val) 3090 { 3091 if (val & 0x80000000) { 3092 return clz32(~val); 3093 } else { 3094 return clz32(val); 3095 } 3096 } 3097 3098 uint32_t helper_cntlzw32(uint32_t val) 3099 { 3100 return clz32(val); 3101 } 3102 3103 /* 440 specific */ 3104 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high, 3105 target_ulong low, uint32_t update_Rc) 3106 { 3107 target_ulong mask; 3108 int i; 3109 3110 i = 1; 3111 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3112 if ((high & mask) == 0) { 3113 if (update_Rc) { 3114 env->crf[0] = 0x4; 3115 } 3116 goto done; 3117 } 3118 i++; 3119 } 3120 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3121 if ((low & mask) == 0) { 3122 if (update_Rc) { 3123 env->crf[0] = 0x8; 3124 } 3125 goto done; 3126 } 3127 i++; 3128 } 3129 i = 8; 3130 if (update_Rc) { 3131 env->crf[0] = 0x2; 3132 } 3133 done: 3134 env->xer = (env->xer & ~0x7F) | i; 3135 if (update_Rc) { 3136 env->crf[0] |= xer_so; 3137 } 3138 return i; 3139 } 3140