1 /* 2 * PowerPC integer and vector emulation helpers for QEMU. 3 * 4 * Copyright (c) 2003-2007 Jocelyn Mayer 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "cpu.h" 22 #include "internal.h" 23 #include "qemu/host-utils.h" 24 #include "qemu/log.h" 25 #include "exec/helper-proto.h" 26 #include "crypto/aes.h" 27 #include "crypto/aes-round.h" 28 #include "crypto/clmul.h" 29 #include "fpu/softfloat.h" 30 #include "qapi/error.h" 31 #include "qemu/guest-random.h" 32 #include "tcg/tcg-gvec-desc.h" 33 34 #include "helper_regs.h" 35 /*****************************************************************************/ 36 /* Fixed point operations helpers */ 37 38 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov) 39 { 40 if (unlikely(ov)) { 41 env->so = env->ov = env->ov32 = 1; 42 } else { 43 env->ov = env->ov32 = 0; 44 } 45 } 46 47 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb, 48 uint32_t oe) 49 { 50 uint64_t rt = 0; 51 int overflow = 0; 52 53 uint64_t dividend = (uint64_t)ra << 32; 54 uint64_t divisor = (uint32_t)rb; 55 56 if (unlikely(divisor == 0)) { 57 overflow = 1; 58 } else { 59 rt = dividend / divisor; 60 overflow = rt > UINT32_MAX; 61 } 62 63 if (unlikely(overflow)) { 64 rt = 0; /* Undefined */ 65 } 66 67 if (oe) { 68 helper_update_ov_legacy(env, overflow); 69 } 70 71 return (target_ulong)rt; 72 } 73 74 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb, 75 uint32_t oe) 76 { 77 int64_t rt = 0; 78 int overflow = 0; 79 80 int64_t dividend = (int64_t)ra << 32; 81 int64_t divisor = (int64_t)((int32_t)rb); 82 83 if (unlikely((divisor == 0) || 84 ((divisor == -1ull) && (dividend == INT64_MIN)))) { 85 overflow = 1; 86 } else { 87 rt = dividend / divisor; 88 overflow = rt != (int32_t)rt; 89 } 90 91 if (unlikely(overflow)) { 92 rt = 0; /* Undefined */ 93 } 94 95 if (oe) { 96 helper_update_ov_legacy(env, overflow); 97 } 98 99 return (target_ulong)rt; 100 } 101 102 #if defined(TARGET_PPC64) 103 104 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe) 105 { 106 uint64_t rt = 0; 107 int overflow = 0; 108 109 if (unlikely(rb == 0 || ra >= rb)) { 110 overflow = 1; 111 rt = 0; /* Undefined */ 112 } else { 113 divu128(&rt, &ra, rb); 114 } 115 116 if (oe) { 117 helper_update_ov_legacy(env, overflow); 118 } 119 120 return rt; 121 } 122 123 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe) 124 { 125 uint64_t rt = 0; 126 int64_t ra = (int64_t)rau; 127 int64_t rb = (int64_t)rbu; 128 int overflow = 0; 129 130 if (unlikely(rb == 0 || uabs64(ra) >= uabs64(rb))) { 131 overflow = 1; 132 rt = 0; /* Undefined */ 133 } else { 134 divs128(&rt, &ra, rb); 135 } 136 137 if (oe) { 138 helper_update_ov_legacy(env, overflow); 139 } 140 141 return rt; 142 } 143 144 #endif 145 146 147 #if defined(TARGET_PPC64) 148 /* if x = 0xab, returns 0xababababababababa */ 149 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff)) 150 151 /* 152 * subtract 1 from each byte, and with inverse, check if MSB is set at each 153 * byte. 154 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80 155 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found) 156 */ 157 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80)) 158 159 /* When you XOR the pattern and there is a match, that byte will be zero */ 160 #define hasvalue(x, n) (haszero((x) ^ pattern(n))) 161 162 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb) 163 { 164 return hasvalue(rb, ra) ? CRF_GT : 0; 165 } 166 167 #undef pattern 168 #undef haszero 169 #undef hasvalue 170 171 /* 172 * Return a random number. 173 */ 174 uint64_t helper_darn32(void) 175 { 176 Error *err = NULL; 177 uint32_t ret; 178 179 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 180 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 181 error_get_pretty(err)); 182 error_free(err); 183 return -1; 184 } 185 186 return ret; 187 } 188 189 uint64_t helper_darn64(void) 190 { 191 Error *err = NULL; 192 uint64_t ret; 193 194 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 195 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 196 error_get_pretty(err)); 197 error_free(err); 198 return -1; 199 } 200 201 return ret; 202 } 203 204 uint64_t helper_bpermd(uint64_t rs, uint64_t rb) 205 { 206 int i; 207 uint64_t ra = 0; 208 209 for (i = 0; i < 8; i++) { 210 int index = (rs >> (i * 8)) & 0xFF; 211 if (index < 64) { 212 if (rb & PPC_BIT(index)) { 213 ra |= 1 << i; 214 } 215 } 216 } 217 return ra; 218 } 219 220 #endif 221 222 target_ulong helper_cmpb(target_ulong rs, target_ulong rb) 223 { 224 target_ulong mask = 0xff; 225 target_ulong ra = 0; 226 int i; 227 228 for (i = 0; i < sizeof(target_ulong); i++) { 229 if ((rs & mask) == (rb & mask)) { 230 ra |= mask; 231 } 232 mask <<= 8; 233 } 234 return ra; 235 } 236 237 /* shift right arithmetic helper */ 238 target_ulong helper_sraw(CPUPPCState *env, target_ulong value, 239 target_ulong shift) 240 { 241 int32_t ret; 242 243 if (likely(!(shift & 0x20))) { 244 if (likely((uint32_t)shift != 0)) { 245 shift &= 0x1f; 246 ret = (int32_t)value >> shift; 247 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) { 248 env->ca32 = env->ca = 0; 249 } else { 250 env->ca32 = env->ca = 1; 251 } 252 } else { 253 ret = (int32_t)value; 254 env->ca32 = env->ca = 0; 255 } 256 } else { 257 ret = (int32_t)value >> 31; 258 env->ca32 = env->ca = (ret != 0); 259 } 260 return (target_long)ret; 261 } 262 263 #if defined(TARGET_PPC64) 264 target_ulong helper_srad(CPUPPCState *env, target_ulong value, 265 target_ulong shift) 266 { 267 int64_t ret; 268 269 if (likely(!(shift & 0x40))) { 270 if (likely((uint64_t)shift != 0)) { 271 shift &= 0x3f; 272 ret = (int64_t)value >> shift; 273 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) { 274 env->ca32 = env->ca = 0; 275 } else { 276 env->ca32 = env->ca = 1; 277 } 278 } else { 279 ret = (int64_t)value; 280 env->ca32 = env->ca = 0; 281 } 282 } else { 283 ret = (int64_t)value >> 63; 284 env->ca32 = env->ca = (ret != 0); 285 } 286 return ret; 287 } 288 #endif 289 290 #if defined(TARGET_PPC64) 291 target_ulong helper_popcntb(target_ulong val) 292 { 293 /* Note that we don't fold past bytes */ 294 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 295 0x5555555555555555ULL); 296 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 297 0x3333333333333333ULL); 298 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 299 0x0f0f0f0f0f0f0f0fULL); 300 return val; 301 } 302 303 target_ulong helper_popcntw(target_ulong val) 304 { 305 /* Note that we don't fold past words. */ 306 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 307 0x5555555555555555ULL); 308 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 309 0x3333333333333333ULL); 310 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 311 0x0f0f0f0f0f0f0f0fULL); 312 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) & 313 0x00ff00ff00ff00ffULL); 314 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) & 315 0x0000ffff0000ffffULL); 316 return val; 317 } 318 #else 319 target_ulong helper_popcntb(target_ulong val) 320 { 321 /* Note that we don't fold past bytes */ 322 val = (val & 0x55555555) + ((val >> 1) & 0x55555555); 323 val = (val & 0x33333333) + ((val >> 2) & 0x33333333); 324 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f); 325 return val; 326 } 327 #endif 328 329 uint64_t helper_CFUGED(uint64_t src, uint64_t mask) 330 { 331 /* 332 * Instead of processing the mask bit-by-bit from the most significant to 333 * the least significant bit, as described in PowerISA, we'll handle it in 334 * blocks of 'n' zeros/ones from LSB to MSB. To avoid the decision to use 335 * ctz or cto, we negate the mask at the end of the loop. 336 */ 337 target_ulong m, left = 0, right = 0; 338 unsigned int n, i = 64; 339 bool bit = false; /* tracks if we are processing zeros or ones */ 340 341 if (mask == 0 || mask == -1) { 342 return src; 343 } 344 345 /* Processes the mask in blocks, from LSB to MSB */ 346 while (i) { 347 /* Find how many bits we should take */ 348 n = ctz64(mask); 349 if (n > i) { 350 n = i; 351 } 352 353 /* 354 * Extracts 'n' trailing bits of src and put them on the leading 'n' 355 * bits of 'right' or 'left', pushing down the previously extracted 356 * values. 357 */ 358 m = (1ll << n) - 1; 359 if (bit) { 360 right = ror64(right | (src & m), n); 361 } else { 362 left = ror64(left | (src & m), n); 363 } 364 365 /* 366 * Discards the processed bits from 'src' and 'mask'. Note that we are 367 * removing 'n' trailing zeros from 'mask', but the logical shift will 368 * add 'n' leading zeros back, so the population count of 'mask' is kept 369 * the same. 370 */ 371 src >>= n; 372 mask >>= n; 373 i -= n; 374 bit = !bit; 375 mask = ~mask; 376 } 377 378 /* 379 * At the end, right was ror'ed ctpop(mask) times. To put it back in place, 380 * we'll shift it more 64-ctpop(mask) times. 381 */ 382 if (bit) { 383 n = ctpop64(mask); 384 } else { 385 n = 64 - ctpop64(mask); 386 } 387 388 return left | (right >> n); 389 } 390 391 uint64_t helper_PDEPD(uint64_t src, uint64_t mask) 392 { 393 int i, o; 394 uint64_t result = 0; 395 396 if (mask == -1) { 397 return src; 398 } 399 400 for (i = 0; mask != 0; i++) { 401 o = ctz64(mask); 402 mask &= mask - 1; 403 result |= ((src >> i) & 1) << o; 404 } 405 406 return result; 407 } 408 409 uint64_t helper_PEXTD(uint64_t src, uint64_t mask) 410 { 411 int i, o; 412 uint64_t result = 0; 413 414 if (mask == -1) { 415 return src; 416 } 417 418 for (o = 0; mask != 0; o++) { 419 i = ctz64(mask); 420 mask &= mask - 1; 421 result |= ((src >> i) & 1) << o; 422 } 423 424 return result; 425 } 426 427 /*****************************************************************************/ 428 /* Altivec extension helpers */ 429 #if HOST_BIG_ENDIAN 430 #define VECTOR_FOR_INORDER_I(index, element) \ 431 for (index = 0; index < ARRAY_SIZE(r->element); index++) 432 #else 433 #define VECTOR_FOR_INORDER_I(index, element) \ 434 for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--) 435 #endif 436 437 /* Saturating arithmetic helpers. */ 438 #define SATCVT(from, to, from_type, to_type, min, max) \ 439 static inline to_type cvt##from##to(from_type x, int *sat) \ 440 { \ 441 to_type r; \ 442 \ 443 if (x < (from_type)min) { \ 444 r = min; \ 445 *sat = 1; \ 446 } else if (x > (from_type)max) { \ 447 r = max; \ 448 *sat = 1; \ 449 } else { \ 450 r = x; \ 451 } \ 452 return r; \ 453 } 454 #define SATCVTU(from, to, from_type, to_type, min, max) \ 455 static inline to_type cvt##from##to(from_type x, int *sat) \ 456 { \ 457 to_type r; \ 458 \ 459 if (x > (from_type)max) { \ 460 r = max; \ 461 *sat = 1; \ 462 } else { \ 463 r = x; \ 464 } \ 465 return r; \ 466 } 467 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX) 468 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX) 469 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX) 470 471 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX) 472 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX) 473 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX) 474 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX) 475 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX) 476 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX) 477 #undef SATCVT 478 #undef SATCVTU 479 480 void helper_mtvscr(CPUPPCState *env, uint32_t vscr) 481 { 482 ppc_store_vscr(env, vscr); 483 } 484 485 uint32_t helper_mfvscr(CPUPPCState *env) 486 { 487 return ppc_get_vscr(env); 488 } 489 490 static inline void set_vscr_sat(CPUPPCState *env) 491 { 492 /* The choice of non-zero value is arbitrary. */ 493 env->vscr_sat.u32[0] = 1; 494 } 495 496 /* vprtybq */ 497 void helper_VPRTYBQ(ppc_avr_t *r, ppc_avr_t *b, uint32_t v) 498 { 499 uint64_t res = b->u64[0] ^ b->u64[1]; 500 res ^= res >> 32; 501 res ^= res >> 16; 502 res ^= res >> 8; 503 r->VsrD(1) = res & 1; 504 r->VsrD(0) = 0; 505 } 506 507 #define VARITHFP(suffix, func) \ 508 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 509 ppc_avr_t *b) \ 510 { \ 511 int i; \ 512 \ 513 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 514 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \ 515 } \ 516 } 517 VARITHFP(addfp, float32_add) 518 VARITHFP(subfp, float32_sub) 519 VARITHFP(minfp, float32_min) 520 VARITHFP(maxfp, float32_max) 521 #undef VARITHFP 522 523 #define VARITHFPFMA(suffix, type) \ 524 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 525 ppc_avr_t *b, ppc_avr_t *c) \ 526 { \ 527 int i; \ 528 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 529 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \ 530 type, &env->vec_status); \ 531 } \ 532 } 533 VARITHFPFMA(maddfp, 0); 534 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c); 535 #undef VARITHFPFMA 536 537 #define VARITHSAT_CASE(type, op, cvt, element) \ 538 { \ 539 type result = (type)a->element[i] op (type)b->element[i]; \ 540 r->element[i] = cvt(result, &sat); \ 541 } 542 543 #define VARITHSAT_DO(name, op, optype, cvt, element) \ 544 void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \ 545 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \ 546 { \ 547 int sat = 0; \ 548 int i; \ 549 \ 550 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 551 VARITHSAT_CASE(optype, op, cvt, element); \ 552 } \ 553 if (sat) { \ 554 vscr_sat->u32[0] = 1; \ 555 } \ 556 } 557 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \ 558 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \ 559 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element) 560 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \ 561 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \ 562 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element) 563 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb) 564 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh) 565 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw) 566 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub) 567 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh) 568 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw) 569 #undef VARITHSAT_CASE 570 #undef VARITHSAT_DO 571 #undef VARITHSAT_SIGNED 572 #undef VARITHSAT_UNSIGNED 573 574 #define VAVG(name, element, etype) \ 575 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t v)\ 576 { \ 577 int i; \ 578 \ 579 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 580 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \ 581 r->element[i] = x >> 1; \ 582 } \ 583 } 584 585 VAVG(VAVGSB, s8, int16_t) 586 VAVG(VAVGUB, u8, uint16_t) 587 VAVG(VAVGSH, s16, int32_t) 588 VAVG(VAVGUH, u16, uint32_t) 589 VAVG(VAVGSW, s32, int64_t) 590 VAVG(VAVGUW, u32, uint64_t) 591 #undef VAVG 592 593 #define VABSDU(name, element) \ 594 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t v)\ 595 { \ 596 int i; \ 597 \ 598 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 599 r->element[i] = (a->element[i] > b->element[i]) ? \ 600 (a->element[i] - b->element[i]) : \ 601 (b->element[i] - a->element[i]); \ 602 } \ 603 } 604 605 /* 606 * VABSDU - Vector absolute difference unsigned 607 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word) 608 * element - element type to access from vector 609 */ 610 VABSDU(VABSDUB, u8) 611 VABSDU(VABSDUH, u16) 612 VABSDU(VABSDUW, u32) 613 #undef VABSDU 614 615 #define VCF(suffix, cvt, element) \ 616 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \ 617 ppc_avr_t *b, uint32_t uim) \ 618 { \ 619 int i; \ 620 \ 621 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 622 float32 t = cvt(b->element[i], &env->vec_status); \ 623 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \ 624 } \ 625 } 626 VCF(ux, uint32_to_float32, u32) 627 VCF(sx, int32_to_float32, s32) 628 #undef VCF 629 630 #define VCMPNEZ(NAME, ELEM) \ 631 void helper_##NAME(ppc_vsr_t *t, ppc_vsr_t *a, ppc_vsr_t *b, uint32_t desc) \ 632 { \ 633 for (int i = 0; i < ARRAY_SIZE(t->ELEM); i++) { \ 634 t->ELEM[i] = ((a->ELEM[i] == 0) || (b->ELEM[i] == 0) || \ 635 (a->ELEM[i] != b->ELEM[i])) ? -1 : 0; \ 636 } \ 637 } 638 VCMPNEZ(VCMPNEZB, u8) 639 VCMPNEZ(VCMPNEZH, u16) 640 VCMPNEZ(VCMPNEZW, u32) 641 #undef VCMPNEZ 642 643 #define VCMPFP_DO(suffix, compare, order, record) \ 644 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 645 ppc_avr_t *a, ppc_avr_t *b) \ 646 { \ 647 uint32_t ones = (uint32_t)-1; \ 648 uint32_t all = ones; \ 649 uint32_t none = 0; \ 650 int i; \ 651 \ 652 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 653 uint32_t result; \ 654 FloatRelation rel = \ 655 float32_compare_quiet(a->f32[i], b->f32[i], \ 656 &env->vec_status); \ 657 if (rel == float_relation_unordered) { \ 658 result = 0; \ 659 } else if (rel compare order) { \ 660 result = ones; \ 661 } else { \ 662 result = 0; \ 663 } \ 664 r->u32[i] = result; \ 665 all &= result; \ 666 none |= result; \ 667 } \ 668 if (record) { \ 669 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 670 } \ 671 } 672 #define VCMPFP(suffix, compare, order) \ 673 VCMPFP_DO(suffix, compare, order, 0) \ 674 VCMPFP_DO(suffix##_dot, compare, order, 1) 675 VCMPFP(eqfp, ==, float_relation_equal) 676 VCMPFP(gefp, !=, float_relation_less) 677 VCMPFP(gtfp, ==, float_relation_greater) 678 #undef VCMPFP_DO 679 #undef VCMPFP 680 681 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r, 682 ppc_avr_t *a, ppc_avr_t *b, int record) 683 { 684 int i; 685 int all_in = 0; 686 687 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 688 FloatRelation le_rel = float32_compare_quiet(a->f32[i], b->f32[i], 689 &env->vec_status); 690 if (le_rel == float_relation_unordered) { 691 r->u32[i] = 0xc0000000; 692 all_in = 1; 693 } else { 694 float32 bneg = float32_chs(b->f32[i]); 695 FloatRelation ge_rel = float32_compare_quiet(a->f32[i], bneg, 696 &env->vec_status); 697 int le = le_rel != float_relation_greater; 698 int ge = ge_rel != float_relation_less; 699 700 r->u32[i] = ((!le) << 31) | ((!ge) << 30); 701 all_in |= (!le | !ge); 702 } 703 } 704 if (record) { 705 env->crf[6] = (all_in == 0) << 1; 706 } 707 } 708 709 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 710 { 711 vcmpbfp_internal(env, r, a, b, 0); 712 } 713 714 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 715 ppc_avr_t *b) 716 { 717 vcmpbfp_internal(env, r, a, b, 1); 718 } 719 720 #define VCT(suffix, satcvt, element) \ 721 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \ 722 ppc_avr_t *b, uint32_t uim) \ 723 { \ 724 int i; \ 725 int sat = 0; \ 726 float_status s = env->vec_status; \ 727 \ 728 set_float_rounding_mode(float_round_to_zero, &s); \ 729 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 730 if (float32_is_any_nan(b->f32[i])) { \ 731 r->element[i] = 0; \ 732 } else { \ 733 float64 t = float32_to_float64(b->f32[i], &s); \ 734 int64_t j; \ 735 \ 736 t = float64_scalbn(t, uim, &s); \ 737 j = float64_to_int64(t, &s); \ 738 r->element[i] = satcvt(j, &sat); \ 739 } \ 740 } \ 741 if (sat) { \ 742 set_vscr_sat(env); \ 743 } \ 744 } 745 VCT(uxs, cvtsduw, u32) 746 VCT(sxs, cvtsdsw, s32) 747 #undef VCT 748 749 typedef int64_t do_ger(uint32_t, uint32_t, uint32_t); 750 751 static int64_t ger_rank8(uint32_t a, uint32_t b, uint32_t mask) 752 { 753 int64_t psum = 0; 754 for (int i = 0; i < 8; i++, mask >>= 1) { 755 if (mask & 1) { 756 psum += (int64_t)sextract32(a, 4 * i, 4) * sextract32(b, 4 * i, 4); 757 } 758 } 759 return psum; 760 } 761 762 static int64_t ger_rank4(uint32_t a, uint32_t b, uint32_t mask) 763 { 764 int64_t psum = 0; 765 for (int i = 0; i < 4; i++, mask >>= 1) { 766 if (mask & 1) { 767 psum += sextract32(a, 8 * i, 8) * (int64_t)extract32(b, 8 * i, 8); 768 } 769 } 770 return psum; 771 } 772 773 static int64_t ger_rank2(uint32_t a, uint32_t b, uint32_t mask) 774 { 775 int64_t psum = 0; 776 for (int i = 0; i < 2; i++, mask >>= 1) { 777 if (mask & 1) { 778 psum += (int64_t)sextract32(a, 16 * i, 16) * 779 sextract32(b, 16 * i, 16); 780 } 781 } 782 return psum; 783 } 784 785 static void xviger(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, ppc_acc_t *at, 786 uint32_t mask, bool sat, bool acc, do_ger ger) 787 { 788 uint8_t pmsk = FIELD_EX32(mask, GER_MSK, PMSK), 789 xmsk = FIELD_EX32(mask, GER_MSK, XMSK), 790 ymsk = FIELD_EX32(mask, GER_MSK, YMSK); 791 uint8_t xmsk_bit, ymsk_bit; 792 int64_t psum; 793 int i, j; 794 for (i = 0, xmsk_bit = 1 << 3; i < 4; i++, xmsk_bit >>= 1) { 795 for (j = 0, ymsk_bit = 1 << 3; j < 4; j++, ymsk_bit >>= 1) { 796 if ((xmsk_bit & xmsk) && (ymsk_bit & ymsk)) { 797 psum = ger(a->VsrW(i), b->VsrW(j), pmsk); 798 if (acc) { 799 psum += at[i].VsrSW(j); 800 } 801 if (sat && psum > INT32_MAX) { 802 set_vscr_sat(env); 803 at[i].VsrSW(j) = INT32_MAX; 804 } else if (sat && psum < INT32_MIN) { 805 set_vscr_sat(env); 806 at[i].VsrSW(j) = INT32_MIN; 807 } else { 808 at[i].VsrSW(j) = (int32_t) psum; 809 } 810 } else { 811 at[i].VsrSW(j) = 0; 812 } 813 } 814 } 815 } 816 817 QEMU_FLATTEN 818 void helper_XVI4GER8(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 819 ppc_acc_t *at, uint32_t mask) 820 { 821 xviger(env, a, b, at, mask, false, false, ger_rank8); 822 } 823 824 QEMU_FLATTEN 825 void helper_XVI4GER8PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 826 ppc_acc_t *at, uint32_t mask) 827 { 828 xviger(env, a, b, at, mask, false, true, ger_rank8); 829 } 830 831 QEMU_FLATTEN 832 void helper_XVI8GER4(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 833 ppc_acc_t *at, uint32_t mask) 834 { 835 xviger(env, a, b, at, mask, false, false, ger_rank4); 836 } 837 838 QEMU_FLATTEN 839 void helper_XVI8GER4PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 840 ppc_acc_t *at, uint32_t mask) 841 { 842 xviger(env, a, b, at, mask, false, true, ger_rank4); 843 } 844 845 QEMU_FLATTEN 846 void helper_XVI8GER4SPP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 847 ppc_acc_t *at, uint32_t mask) 848 { 849 xviger(env, a, b, at, mask, true, true, ger_rank4); 850 } 851 852 QEMU_FLATTEN 853 void helper_XVI16GER2(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 854 ppc_acc_t *at, uint32_t mask) 855 { 856 xviger(env, a, b, at, mask, false, false, ger_rank2); 857 } 858 859 QEMU_FLATTEN 860 void helper_XVI16GER2S(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 861 ppc_acc_t *at, uint32_t mask) 862 { 863 xviger(env, a, b, at, mask, true, false, ger_rank2); 864 } 865 866 QEMU_FLATTEN 867 void helper_XVI16GER2PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 868 ppc_acc_t *at, uint32_t mask) 869 { 870 xviger(env, a, b, at, mask, false, true, ger_rank2); 871 } 872 873 QEMU_FLATTEN 874 void helper_XVI16GER2SPP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, 875 ppc_acc_t *at, uint32_t mask) 876 { 877 xviger(env, a, b, at, mask, true, true, ger_rank2); 878 } 879 880 target_ulong helper_vclzlsbb(ppc_avr_t *r) 881 { 882 target_ulong count = 0; 883 int i; 884 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 885 if (r->VsrB(i) & 0x01) { 886 break; 887 } 888 count++; 889 } 890 return count; 891 } 892 893 target_ulong helper_vctzlsbb(ppc_avr_t *r) 894 { 895 target_ulong count = 0; 896 int i; 897 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 898 if (r->VsrB(i) & 0x01) { 899 break; 900 } 901 count++; 902 } 903 return count; 904 } 905 906 void helper_VMHADDSHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 907 ppc_avr_t *b, ppc_avr_t *c) 908 { 909 int sat = 0; 910 int i; 911 912 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 913 int32_t prod = a->s16[i] * b->s16[i]; 914 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 915 916 r->s16[i] = cvtswsh(t, &sat); 917 } 918 919 if (sat) { 920 set_vscr_sat(env); 921 } 922 } 923 924 void helper_VMHRADDSHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 925 ppc_avr_t *b, ppc_avr_t *c) 926 { 927 int sat = 0; 928 int i; 929 930 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 931 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000; 932 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 933 r->s16[i] = cvtswsh(t, &sat); 934 } 935 936 if (sat) { 937 set_vscr_sat(env); 938 } 939 } 940 941 void helper_VMLADDUHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c, 942 uint32_t v) 943 { 944 int i; 945 946 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 947 int32_t prod = a->s16[i] * b->s16[i]; 948 r->s16[i] = (int16_t) (prod + c->s16[i]); 949 } 950 } 951 952 #define VMRG_DO(name, element, access, ofs) \ 953 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 954 { \ 955 ppc_avr_t result; \ 956 int i, half = ARRAY_SIZE(r->element) / 2; \ 957 \ 958 for (i = 0; i < half; i++) { \ 959 result.access(i * 2 + 0) = a->access(i + ofs); \ 960 result.access(i * 2 + 1) = b->access(i + ofs); \ 961 } \ 962 *r = result; \ 963 } 964 965 #define VMRG(suffix, element, access) \ 966 VMRG_DO(mrgl##suffix, element, access, half) \ 967 VMRG_DO(mrgh##suffix, element, access, 0) 968 VMRG(b, u8, VsrB) 969 VMRG(h, u16, VsrH) 970 VMRG(w, u32, VsrW) 971 #undef VMRG_DO 972 #undef VMRG 973 974 void helper_VMSUMMBM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 975 { 976 int32_t prod[16]; 977 int i; 978 979 for (i = 0; i < ARRAY_SIZE(r->s8); i++) { 980 prod[i] = (int32_t)a->s8[i] * b->u8[i]; 981 } 982 983 VECTOR_FOR_INORDER_I(i, s32) { 984 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] + 985 prod[4 * i + 2] + prod[4 * i + 3]; 986 } 987 } 988 989 void helper_VMSUMSHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 990 { 991 int32_t prod[8]; 992 int i; 993 994 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 995 prod[i] = a->s16[i] * b->s16[i]; 996 } 997 998 VECTOR_FOR_INORDER_I(i, s32) { 999 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 1000 } 1001 } 1002 1003 void helper_VMSUMSHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1004 ppc_avr_t *b, ppc_avr_t *c) 1005 { 1006 int32_t prod[8]; 1007 int i; 1008 int sat = 0; 1009 1010 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 1011 prod[i] = (int32_t)a->s16[i] * b->s16[i]; 1012 } 1013 1014 VECTOR_FOR_INORDER_I(i, s32) { 1015 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 1016 1017 r->u32[i] = cvtsdsw(t, &sat); 1018 } 1019 1020 if (sat) { 1021 set_vscr_sat(env); 1022 } 1023 } 1024 1025 void helper_VMSUMUBM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1026 { 1027 uint16_t prod[16]; 1028 int i; 1029 1030 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1031 prod[i] = a->u8[i] * b->u8[i]; 1032 } 1033 1034 VECTOR_FOR_INORDER_I(i, u32) { 1035 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] + 1036 prod[4 * i + 2] + prod[4 * i + 3]; 1037 } 1038 } 1039 1040 void helper_VMSUMUHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1041 { 1042 uint32_t prod[8]; 1043 int i; 1044 1045 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1046 prod[i] = a->u16[i] * b->u16[i]; 1047 } 1048 1049 VECTOR_FOR_INORDER_I(i, u32) { 1050 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1051 } 1052 } 1053 1054 void helper_VMSUMUHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1055 ppc_avr_t *b, ppc_avr_t *c) 1056 { 1057 uint32_t prod[8]; 1058 int i; 1059 int sat = 0; 1060 1061 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1062 prod[i] = a->u16[i] * b->u16[i]; 1063 } 1064 1065 VECTOR_FOR_INORDER_I(i, s32) { 1066 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1067 1068 r->u32[i] = cvtuduw(t, &sat); 1069 } 1070 1071 if (sat) { 1072 set_vscr_sat(env); 1073 } 1074 } 1075 1076 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \ 1077 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1078 { \ 1079 int i; \ 1080 \ 1081 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1082 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \ 1083 (cast)b->mul_access(i); \ 1084 } \ 1085 } 1086 1087 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \ 1088 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1089 { \ 1090 int i; \ 1091 \ 1092 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1093 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \ 1094 (cast)b->mul_access(i + 1); \ 1095 } \ 1096 } 1097 1098 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \ 1099 VMUL_DO_EVN(MULE##suffix, mul_element, mul_access, prod_access, cast) \ 1100 VMUL_DO_ODD(MULO##suffix, mul_element, mul_access, prod_access, cast) 1101 VMUL(SB, s8, VsrSB, VsrSH, int16_t) 1102 VMUL(SH, s16, VsrSH, VsrSW, int32_t) 1103 VMUL(SW, s32, VsrSW, VsrSD, int64_t) 1104 VMUL(UB, u8, VsrB, VsrH, uint16_t) 1105 VMUL(UH, u16, VsrH, VsrW, uint32_t) 1106 VMUL(UW, u32, VsrW, VsrD, uint64_t) 1107 #undef VMUL_DO_EVN 1108 #undef VMUL_DO_ODD 1109 #undef VMUL 1110 1111 void helper_XXPERMX(ppc_vsr_t *t, ppc_vsr_t *s0, ppc_vsr_t *s1, ppc_vsr_t *pcv, 1112 target_ulong uim) 1113 { 1114 int i, idx; 1115 ppc_vsr_t tmp = { .u64 = {0, 0} }; 1116 1117 for (i = 0; i < ARRAY_SIZE(t->u8); i++) { 1118 if ((pcv->VsrB(i) >> 5) == uim) { 1119 idx = pcv->VsrB(i) & 0x1f; 1120 if (idx < ARRAY_SIZE(t->u8)) { 1121 tmp.VsrB(i) = s0->VsrB(idx); 1122 } else { 1123 tmp.VsrB(i) = s1->VsrB(idx - ARRAY_SIZE(t->u8)); 1124 } 1125 } 1126 } 1127 1128 *t = tmp; 1129 } 1130 1131 void helper_VDIVSQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1132 { 1133 Int128 neg1 = int128_makes64(-1); 1134 Int128 int128_min = int128_make128(0, INT64_MIN); 1135 if (likely(int128_nz(b->s128) && 1136 (int128_ne(a->s128, int128_min) || int128_ne(b->s128, neg1)))) { 1137 t->s128 = int128_divs(a->s128, b->s128); 1138 } else { 1139 t->s128 = a->s128; /* Undefined behavior */ 1140 } 1141 } 1142 1143 void helper_VDIVUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1144 { 1145 if (int128_nz(b->s128)) { 1146 t->s128 = int128_divu(a->s128, b->s128); 1147 } else { 1148 t->s128 = a->s128; /* Undefined behavior */ 1149 } 1150 } 1151 1152 void helper_VDIVESD(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1153 { 1154 int i; 1155 int64_t high; 1156 uint64_t low; 1157 for (i = 0; i < 2; i++) { 1158 high = a->s64[i]; 1159 low = 0; 1160 if (unlikely((high == INT64_MIN && b->s64[i] == -1) || !b->s64[i])) { 1161 t->s64[i] = a->s64[i]; /* Undefined behavior */ 1162 } else { 1163 divs128(&low, &high, b->s64[i]); 1164 t->s64[i] = low; 1165 } 1166 } 1167 } 1168 1169 void helper_VDIVEUD(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1170 { 1171 int i; 1172 uint64_t high, low; 1173 for (i = 0; i < 2; i++) { 1174 high = a->u64[i]; 1175 low = 0; 1176 if (unlikely(!b->u64[i])) { 1177 t->u64[i] = a->u64[i]; /* Undefined behavior */ 1178 } else { 1179 divu128(&low, &high, b->u64[i]); 1180 t->u64[i] = low; 1181 } 1182 } 1183 } 1184 1185 void helper_VDIVESQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1186 { 1187 Int128 high, low; 1188 Int128 int128_min = int128_make128(0, INT64_MIN); 1189 Int128 neg1 = int128_makes64(-1); 1190 1191 high = a->s128; 1192 low = int128_zero(); 1193 if (unlikely(!int128_nz(b->s128) || 1194 (int128_eq(b->s128, neg1) && int128_eq(high, int128_min)))) { 1195 t->s128 = a->s128; /* Undefined behavior */ 1196 } else { 1197 divs256(&low, &high, b->s128); 1198 t->s128 = low; 1199 } 1200 } 1201 1202 void helper_VDIVEUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1203 { 1204 Int128 high, low; 1205 1206 high = a->s128; 1207 low = int128_zero(); 1208 if (unlikely(!int128_nz(b->s128))) { 1209 t->s128 = a->s128; /* Undefined behavior */ 1210 } else { 1211 divu256(&low, &high, b->s128); 1212 t->s128 = low; 1213 } 1214 } 1215 1216 void helper_VMODSQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1217 { 1218 Int128 neg1 = int128_makes64(-1); 1219 Int128 int128_min = int128_make128(0, INT64_MIN); 1220 if (likely(int128_nz(b->s128) && 1221 (int128_ne(a->s128, int128_min) || int128_ne(b->s128, neg1)))) { 1222 t->s128 = int128_rems(a->s128, b->s128); 1223 } else { 1224 t->s128 = int128_zero(); /* Undefined behavior */ 1225 } 1226 } 1227 1228 void helper_VMODUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) 1229 { 1230 if (likely(int128_nz(b->s128))) { 1231 t->s128 = int128_remu(a->s128, b->s128); 1232 } else { 1233 t->s128 = int128_zero(); /* Undefined behavior */ 1234 } 1235 } 1236 1237 void helper_VPERM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1238 { 1239 ppc_avr_t result; 1240 int i; 1241 1242 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1243 int s = c->VsrB(i) & 0x1f; 1244 int index = s & 0xf; 1245 1246 if (s & 0x10) { 1247 result.VsrB(i) = b->VsrB(index); 1248 } else { 1249 result.VsrB(i) = a->VsrB(index); 1250 } 1251 } 1252 *r = result; 1253 } 1254 1255 void helper_VPERMR(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1256 { 1257 ppc_avr_t result; 1258 int i; 1259 1260 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1261 int s = c->VsrB(i) & 0x1f; 1262 int index = 15 - (s & 0xf); 1263 1264 if (s & 0x10) { 1265 result.VsrB(i) = a->VsrB(index); 1266 } else { 1267 result.VsrB(i) = b->VsrB(index); 1268 } 1269 } 1270 *r = result; 1271 } 1272 1273 #define XXGENPCV_BE_EXP(NAME, SZ) \ 1274 void glue(helper_, glue(NAME, _be_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \ 1275 { \ 1276 ppc_vsr_t tmp; \ 1277 \ 1278 /* Initialize tmp with the result of an all-zeros mask */ \ 1279 tmp.VsrD(0) = 0x1011121314151617; \ 1280 tmp.VsrD(1) = 0x18191A1B1C1D1E1F; \ 1281 \ 1282 /* Iterate over the most significant byte of each element */ \ 1283 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \ 1284 if (b->VsrB(i) & 0x80) { \ 1285 /* Update each byte of the element */ \ 1286 for (int k = 0; k < SZ; k++) { \ 1287 tmp.VsrB(i + k) = j + k; \ 1288 } \ 1289 j += SZ; \ 1290 } \ 1291 } \ 1292 \ 1293 *t = tmp; \ 1294 } 1295 1296 #define XXGENPCV_BE_COMP(NAME, SZ) \ 1297 void glue(helper_, glue(NAME, _be_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\ 1298 { \ 1299 ppc_vsr_t tmp = { .u64 = { 0, 0 } }; \ 1300 \ 1301 /* Iterate over the most significant byte of each element */ \ 1302 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \ 1303 if (b->VsrB(i) & 0x80) { \ 1304 /* Update each byte of the element */ \ 1305 for (int k = 0; k < SZ; k++) { \ 1306 tmp.VsrB(j + k) = i + k; \ 1307 } \ 1308 j += SZ; \ 1309 } \ 1310 } \ 1311 \ 1312 *t = tmp; \ 1313 } 1314 1315 #define XXGENPCV_LE_EXP(NAME, SZ) \ 1316 void glue(helper_, glue(NAME, _le_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \ 1317 { \ 1318 ppc_vsr_t tmp; \ 1319 \ 1320 /* Initialize tmp with the result of an all-zeros mask */ \ 1321 tmp.VsrD(0) = 0x1F1E1D1C1B1A1918; \ 1322 tmp.VsrD(1) = 0x1716151413121110; \ 1323 \ 1324 /* Iterate over the most significant byte of each element */ \ 1325 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \ 1326 /* Reverse indexing of "i" */ \ 1327 const int idx = ARRAY_SIZE(b->u8) - i - SZ; \ 1328 if (b->VsrB(idx) & 0x80) { \ 1329 /* Update each byte of the element */ \ 1330 for (int k = 0, rk = SZ - 1; k < SZ; k++, rk--) { \ 1331 tmp.VsrB(idx + rk) = j + k; \ 1332 } \ 1333 j += SZ; \ 1334 } \ 1335 } \ 1336 \ 1337 *t = tmp; \ 1338 } 1339 1340 #define XXGENPCV_LE_COMP(NAME, SZ) \ 1341 void glue(helper_, glue(NAME, _le_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\ 1342 { \ 1343 ppc_vsr_t tmp = { .u64 = { 0, 0 } }; \ 1344 \ 1345 /* Iterate over the most significant byte of each element */ \ 1346 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \ 1347 if (b->VsrB(ARRAY_SIZE(b->u8) - i - SZ) & 0x80) { \ 1348 /* Update each byte of the element */ \ 1349 for (int k = 0, rk = SZ - 1; k < SZ; k++, rk--) { \ 1350 /* Reverse indexing of "j" */ \ 1351 const int idx = ARRAY_SIZE(b->u8) - j - SZ; \ 1352 tmp.VsrB(idx + rk) = i + k; \ 1353 } \ 1354 j += SZ; \ 1355 } \ 1356 } \ 1357 \ 1358 *t = tmp; \ 1359 } 1360 1361 #define XXGENPCV(NAME, SZ) \ 1362 XXGENPCV_BE_EXP(NAME, SZ) \ 1363 XXGENPCV_BE_COMP(NAME, SZ) \ 1364 XXGENPCV_LE_EXP(NAME, SZ) \ 1365 XXGENPCV_LE_COMP(NAME, SZ) \ 1366 1367 XXGENPCV(XXGENPCVBM, 1) 1368 XXGENPCV(XXGENPCVHM, 2) 1369 XXGENPCV(XXGENPCVWM, 4) 1370 XXGENPCV(XXGENPCVDM, 8) 1371 1372 #undef XXGENPCV_BE_EXP 1373 #undef XXGENPCV_BE_COMP 1374 #undef XXGENPCV_LE_EXP 1375 #undef XXGENPCV_LE_COMP 1376 #undef XXGENPCV 1377 1378 #if HOST_BIG_ENDIAN 1379 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)]) 1380 #define VBPERMD_INDEX(i) (i) 1381 #define VBPERMQ_DW(index) (((index) & 0x40) != 0) 1382 #else 1383 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)]) 1384 #define VBPERMD_INDEX(i) (1 - i) 1385 #define VBPERMQ_DW(index) (((index) & 0x40) == 0) 1386 #endif 1387 #define EXTRACT_BIT(avr, i, index) \ 1388 (extract64((avr)->VsrD(i), 63 - index, 1)) 1389 1390 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1391 { 1392 int i, j; 1393 ppc_avr_t result = { .u64 = { 0, 0 } }; 1394 VECTOR_FOR_INORDER_I(i, u64) { 1395 for (j = 0; j < 8; j++) { 1396 int index = VBPERMQ_INDEX(b, (i * 8) + j); 1397 if (index < 64 && EXTRACT_BIT(a, i, index)) { 1398 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j); 1399 } 1400 } 1401 } 1402 *r = result; 1403 } 1404 1405 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1406 { 1407 int i; 1408 uint64_t perm = 0; 1409 1410 VECTOR_FOR_INORDER_I(i, u8) { 1411 int index = VBPERMQ_INDEX(b, i); 1412 1413 if (index < 128) { 1414 uint64_t mask = (1ull << (63 - (index & 0x3F))); 1415 if (a->u64[VBPERMQ_DW(index)] & mask) { 1416 perm |= (0x8000 >> i); 1417 } 1418 } 1419 } 1420 1421 r->VsrD(0) = perm; 1422 r->VsrD(1) = 0; 1423 } 1424 1425 #undef VBPERMQ_INDEX 1426 #undef VBPERMQ_DW 1427 1428 /* 1429 * There is no carry across the two doublewords, so their order does 1430 * not matter. Nor is there partial overlap between registers. 1431 */ 1432 void helper_vpmsumb(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1433 { 1434 for (int i = 0; i < 2; ++i) { 1435 uint64_t aa = a->u64[i], bb = b->u64[i]; 1436 r->u64[i] = clmul_8x4_even(aa, bb) ^ clmul_8x4_odd(aa, bb); 1437 } 1438 } 1439 1440 #define PMSUM(name, srcfld, trgfld, trgtyp) \ 1441 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1442 { \ 1443 int i, j; \ 1444 trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])]; \ 1445 \ 1446 VECTOR_FOR_INORDER_I(i, srcfld) { \ 1447 prod[i] = 0; \ 1448 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \ 1449 if (a->srcfld[i] & (1ull << j)) { \ 1450 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \ 1451 } \ 1452 } \ 1453 } \ 1454 \ 1455 VECTOR_FOR_INORDER_I(i, trgfld) { \ 1456 r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1]; \ 1457 } \ 1458 } 1459 1460 PMSUM(vpmsumh, u16, u32, uint32_t) 1461 PMSUM(vpmsumw, u32, u64, uint64_t) 1462 1463 void helper_VPMSUMD(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1464 { 1465 int i, j; 1466 Int128 tmp, prod[2] = {int128_zero(), int128_zero()}; 1467 1468 for (j = 0; j < 64; j++) { 1469 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 1470 if (a->VsrD(i) & (1ull << j)) { 1471 tmp = int128_make64(b->VsrD(i)); 1472 tmp = int128_lshift(tmp, j); 1473 prod[i] = int128_xor(prod[i], tmp); 1474 } 1475 } 1476 } 1477 1478 r->s128 = int128_xor(prod[0], prod[1]); 1479 } 1480 1481 #if HOST_BIG_ENDIAN 1482 #define PKBIG 1 1483 #else 1484 #define PKBIG 0 1485 #endif 1486 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1487 { 1488 int i, j; 1489 ppc_avr_t result; 1490 #if HOST_BIG_ENDIAN 1491 const ppc_avr_t *x[2] = { a, b }; 1492 #else 1493 const ppc_avr_t *x[2] = { b, a }; 1494 #endif 1495 1496 VECTOR_FOR_INORDER_I(i, u64) { 1497 VECTOR_FOR_INORDER_I(j, u32) { 1498 uint32_t e = x[i]->u32[j]; 1499 1500 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) | 1501 ((e >> 6) & 0x3e0) | 1502 ((e >> 3) & 0x1f)); 1503 } 1504 } 1505 *r = result; 1506 } 1507 1508 #define VPK(suffix, from, to, cvt, dosat) \ 1509 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1510 ppc_avr_t *a, ppc_avr_t *b) \ 1511 { \ 1512 int i; \ 1513 int sat = 0; \ 1514 ppc_avr_t result; \ 1515 ppc_avr_t *a0 = PKBIG ? a : b; \ 1516 ppc_avr_t *a1 = PKBIG ? b : a; \ 1517 \ 1518 VECTOR_FOR_INORDER_I(i, from) { \ 1519 result.to[i] = cvt(a0->from[i], &sat); \ 1520 result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\ 1521 } \ 1522 *r = result; \ 1523 if (dosat && sat) { \ 1524 set_vscr_sat(env); \ 1525 } \ 1526 } 1527 #define I(x, y) (x) 1528 VPK(shss, s16, s8, cvtshsb, 1) 1529 VPK(shus, s16, u8, cvtshub, 1) 1530 VPK(swss, s32, s16, cvtswsh, 1) 1531 VPK(swus, s32, u16, cvtswuh, 1) 1532 VPK(sdss, s64, s32, cvtsdsw, 1) 1533 VPK(sdus, s64, u32, cvtsduw, 1) 1534 VPK(uhus, u16, u8, cvtuhub, 1) 1535 VPK(uwus, u32, u16, cvtuwuh, 1) 1536 VPK(udus, u64, u32, cvtuduw, 1) 1537 VPK(uhum, u16, u8, I, 0) 1538 VPK(uwum, u32, u16, I, 0) 1539 VPK(udum, u64, u32, I, 0) 1540 #undef I 1541 #undef VPK 1542 #undef PKBIG 1543 1544 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1545 { 1546 int i; 1547 1548 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1549 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status); 1550 } 1551 } 1552 1553 #define VRFI(suffix, rounding) \ 1554 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1555 ppc_avr_t *b) \ 1556 { \ 1557 int i; \ 1558 float_status s = env->vec_status; \ 1559 \ 1560 set_float_rounding_mode(rounding, &s); \ 1561 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 1562 r->f32[i] = float32_round_to_int (b->f32[i], &s); \ 1563 } \ 1564 } 1565 VRFI(n, float_round_nearest_even) 1566 VRFI(m, float_round_down) 1567 VRFI(p, float_round_up) 1568 VRFI(z, float_round_to_zero) 1569 #undef VRFI 1570 1571 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1572 { 1573 int i; 1574 1575 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1576 float32 t = float32_sqrt(b->f32[i], &env->vec_status); 1577 1578 r->f32[i] = float32_div(float32_one, t, &env->vec_status); 1579 } 1580 } 1581 1582 #define VRLMI(name, size, element, insert) \ 1583 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \ 1584 { \ 1585 int i; \ 1586 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1587 uint##size##_t src1 = a->element[i]; \ 1588 uint##size##_t src2 = b->element[i]; \ 1589 uint##size##_t src3 = r->element[i]; \ 1590 uint##size##_t begin, end, shift, mask, rot_val; \ 1591 \ 1592 shift = extract##size(src2, 0, 6); \ 1593 end = extract##size(src2, 8, 6); \ 1594 begin = extract##size(src2, 16, 6); \ 1595 rot_val = rol##size(src1, shift); \ 1596 mask = mask_u##size(begin, end); \ 1597 if (insert) { \ 1598 r->element[i] = (rot_val & mask) | (src3 & ~mask); \ 1599 } else { \ 1600 r->element[i] = (rot_val & mask); \ 1601 } \ 1602 } \ 1603 } 1604 1605 VRLMI(VRLDMI, 64, u64, 1); 1606 VRLMI(VRLWMI, 32, u32, 1); 1607 VRLMI(VRLDNM, 64, u64, 0); 1608 VRLMI(VRLWNM, 32, u32, 0); 1609 1610 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1611 { 1612 int i; 1613 1614 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1615 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status); 1616 } 1617 } 1618 1619 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1620 { 1621 int i; 1622 1623 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1624 r->f32[i] = float32_log2(b->f32[i], &env->vec_status); 1625 } 1626 } 1627 1628 #define VEXTU_X_DO(name, size, left) \ 1629 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \ 1630 { \ 1631 int index = (a & 0xf) * 8; \ 1632 if (left) { \ 1633 index = 128 - index - size; \ 1634 } \ 1635 return int128_getlo(int128_rshift(b->s128, index)) & \ 1636 MAKE_64BIT_MASK(0, size); \ 1637 } 1638 VEXTU_X_DO(vextublx, 8, 1) 1639 VEXTU_X_DO(vextuhlx, 16, 1) 1640 VEXTU_X_DO(vextuwlx, 32, 1) 1641 VEXTU_X_DO(vextubrx, 8, 0) 1642 VEXTU_X_DO(vextuhrx, 16, 0) 1643 VEXTU_X_DO(vextuwrx, 32, 0) 1644 #undef VEXTU_X_DO 1645 1646 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1647 { 1648 int i; 1649 unsigned int shift, bytes, size; 1650 1651 size = ARRAY_SIZE(r->u8); 1652 for (i = 0; i < size; i++) { 1653 shift = b->VsrB(i) & 0x7; /* extract shift value */ 1654 bytes = (a->VsrB(i) << 8) + /* extract adjacent bytes */ 1655 (((i + 1) < size) ? a->VsrB(i + 1) : 0); 1656 r->VsrB(i) = (bytes << shift) >> 8; /* shift and store result */ 1657 } 1658 } 1659 1660 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1661 { 1662 int i; 1663 unsigned int shift, bytes; 1664 1665 /* 1666 * Use reverse order, as destination and source register can be 1667 * same. Its being modified in place saving temporary, reverse 1668 * order will guarantee that computed result is not fed back. 1669 */ 1670 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 1671 shift = b->VsrB(i) & 0x7; /* extract shift value */ 1672 bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i); 1673 /* extract adjacent bytes */ 1674 r->VsrB(i) = (bytes >> shift) & 0xFF; /* shift and store result */ 1675 } 1676 } 1677 1678 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift) 1679 { 1680 int sh = shift & 0xf; 1681 int i; 1682 ppc_avr_t result; 1683 1684 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1685 int index = sh + i; 1686 if (index > 0xf) { 1687 result.VsrB(i) = b->VsrB(index - 0x10); 1688 } else { 1689 result.VsrB(i) = a->VsrB(index); 1690 } 1691 } 1692 *r = result; 1693 } 1694 1695 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1696 { 1697 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1698 1699 #if HOST_BIG_ENDIAN 1700 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1701 memset(&r->u8[16 - sh], 0, sh); 1702 #else 1703 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1704 memset(&r->u8[0], 0, sh); 1705 #endif 1706 } 1707 1708 #if HOST_BIG_ENDIAN 1709 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[IDX]) 1710 #else 1711 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[15 - (IDX)] - (SIZE) + 1) 1712 #endif 1713 1714 #define VINSX(SUFFIX, TYPE) \ 1715 void glue(glue(helper_VINS, SUFFIX), LX)(CPUPPCState *env, ppc_avr_t *t, \ 1716 uint64_t val, target_ulong index) \ 1717 { \ 1718 const int maxidx = ARRAY_SIZE(t->u8) - sizeof(TYPE); \ 1719 target_long idx = index; \ 1720 \ 1721 if (idx < 0 || idx > maxidx) { \ 1722 idx = idx < 0 ? sizeof(TYPE) - idx : idx; \ 1723 qemu_log_mask(LOG_GUEST_ERROR, \ 1724 "Invalid index for Vector Insert Element after 0x" TARGET_FMT_lx \ 1725 ", RA = " TARGET_FMT_ld " > %d\n", env->nip, idx, maxidx); \ 1726 } else { \ 1727 TYPE src = val; \ 1728 memcpy(ELEM_ADDR(t, idx, sizeof(TYPE)), &src, sizeof(TYPE)); \ 1729 } \ 1730 } 1731 VINSX(B, uint8_t) 1732 VINSX(H, uint16_t) 1733 VINSX(W, uint32_t) 1734 VINSX(D, uint64_t) 1735 #undef ELEM_ADDR 1736 #undef VINSX 1737 #if HOST_BIG_ENDIAN 1738 #define VEXTDVLX(NAME, SIZE) \ 1739 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \ 1740 target_ulong index) \ 1741 { \ 1742 const target_long idx = index; \ 1743 ppc_avr_t tmp[2] = { *a, *b }; \ 1744 memset(t, 0, sizeof(*t)); \ 1745 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \ 1746 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2 - SIZE], (void *)tmp + idx, SIZE); \ 1747 } else { \ 1748 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \ 1749 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \ 1750 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \ 1751 } \ 1752 } 1753 #else 1754 #define VEXTDVLX(NAME, SIZE) \ 1755 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \ 1756 target_ulong index) \ 1757 { \ 1758 const target_long idx = index; \ 1759 ppc_avr_t tmp[2] = { *b, *a }; \ 1760 memset(t, 0, sizeof(*t)); \ 1761 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \ 1762 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2], \ 1763 (void *)tmp + sizeof(tmp) - SIZE - idx, SIZE); \ 1764 } else { \ 1765 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \ 1766 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \ 1767 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \ 1768 } \ 1769 } 1770 #endif 1771 VEXTDVLX(VEXTDUBVLX, 1) 1772 VEXTDVLX(VEXTDUHVLX, 2) 1773 VEXTDVLX(VEXTDUWVLX, 4) 1774 VEXTDVLX(VEXTDDVLX, 8) 1775 #undef VEXTDVLX 1776 #if HOST_BIG_ENDIAN 1777 #define VEXTRACT(suffix, element) \ 1778 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1779 { \ 1780 uint32_t es = sizeof(r->element[0]); \ 1781 memmove(&r->u8[8 - es], &b->u8[index], es); \ 1782 memset(&r->u8[8], 0, 8); \ 1783 memset(&r->u8[0], 0, 8 - es); \ 1784 } 1785 #else 1786 #define VEXTRACT(suffix, element) \ 1787 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1788 { \ 1789 uint32_t es = sizeof(r->element[0]); \ 1790 uint32_t s = (16 - index) - es; \ 1791 memmove(&r->u8[8], &b->u8[s], es); \ 1792 memset(&r->u8[0], 0, 8); \ 1793 memset(&r->u8[8 + es], 0, 8 - es); \ 1794 } 1795 #endif 1796 VEXTRACT(ub, u8) 1797 VEXTRACT(uh, u16) 1798 VEXTRACT(uw, u32) 1799 VEXTRACT(d, u64) 1800 #undef VEXTRACT 1801 1802 #define VSTRI(NAME, ELEM, NUM_ELEMS, LEFT) \ 1803 uint32_t helper_##NAME(ppc_avr_t *t, ppc_avr_t *b) \ 1804 { \ 1805 int i, idx, crf = 0; \ 1806 \ 1807 for (i = 0; i < NUM_ELEMS; i++) { \ 1808 idx = LEFT ? i : NUM_ELEMS - i - 1; \ 1809 if (b->Vsr##ELEM(idx)) { \ 1810 t->Vsr##ELEM(idx) = b->Vsr##ELEM(idx); \ 1811 } else { \ 1812 crf = 0b0010; \ 1813 break; \ 1814 } \ 1815 } \ 1816 \ 1817 for (; i < NUM_ELEMS; i++) { \ 1818 idx = LEFT ? i : NUM_ELEMS - i - 1; \ 1819 t->Vsr##ELEM(idx) = 0; \ 1820 } \ 1821 \ 1822 return crf; \ 1823 } 1824 VSTRI(VSTRIBL, B, 16, true) 1825 VSTRI(VSTRIBR, B, 16, false) 1826 VSTRI(VSTRIHL, H, 8, true) 1827 VSTRI(VSTRIHR, H, 8, false) 1828 #undef VSTRI 1829 1830 void helper_XXEXTRACTUW(ppc_vsr_t *xt, ppc_vsr_t *xb, uint32_t index) 1831 { 1832 ppc_vsr_t t = { }; 1833 size_t es = sizeof(uint32_t); 1834 uint32_t ext_index; 1835 int i; 1836 1837 ext_index = index; 1838 for (i = 0; i < es; i++, ext_index++) { 1839 t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16); 1840 } 1841 1842 *xt = t; 1843 } 1844 1845 void helper_XXINSERTW(ppc_vsr_t *xt, ppc_vsr_t *xb, uint32_t index) 1846 { 1847 ppc_vsr_t t = *xt; 1848 size_t es = sizeof(uint32_t); 1849 int ins_index, i = 0; 1850 1851 ins_index = index; 1852 for (i = 0; i < es && ins_index < 16; i++, ins_index++) { 1853 t.VsrB(ins_index) = xb->VsrB(8 - es + i); 1854 } 1855 1856 *xt = t; 1857 } 1858 1859 void helper_XXEVAL(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c, 1860 uint32_t desc) 1861 { 1862 /* 1863 * Instead of processing imm bit-by-bit, we'll skip the computation of 1864 * conjunctions whose corresponding bit is unset. 1865 */ 1866 int bit, imm = simd_data(desc); 1867 Int128 conj, disj = int128_zero(); 1868 1869 /* Iterate over set bits from the least to the most significant bit */ 1870 while (imm) { 1871 /* 1872 * Get the next bit to be processed with ctz64. Invert the result of 1873 * ctz64 to match the indexing used by PowerISA. 1874 */ 1875 bit = 7 - ctzl(imm); 1876 if (bit & 0x4) { 1877 conj = a->s128; 1878 } else { 1879 conj = int128_not(a->s128); 1880 } 1881 if (bit & 0x2) { 1882 conj = int128_and(conj, b->s128); 1883 } else { 1884 conj = int128_and(conj, int128_not(b->s128)); 1885 } 1886 if (bit & 0x1) { 1887 conj = int128_and(conj, c->s128); 1888 } else { 1889 conj = int128_and(conj, int128_not(c->s128)); 1890 } 1891 disj = int128_or(disj, conj); 1892 1893 /* Unset the least significant bit that is set */ 1894 imm &= imm - 1; 1895 } 1896 1897 t->s128 = disj; 1898 } 1899 1900 #define XXBLEND(name, sz) \ 1901 void glue(helper_XXBLENDV, name)(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \ 1902 ppc_avr_t *c, uint32_t desc) \ 1903 { \ 1904 for (int i = 0; i < ARRAY_SIZE(t->glue(u, sz)); i++) { \ 1905 t->glue(u, sz)[i] = (c->glue(s, sz)[i] >> (sz - 1)) ? \ 1906 b->glue(u, sz)[i] : a->glue(u, sz)[i]; \ 1907 } \ 1908 } 1909 XXBLEND(B, 8) 1910 XXBLEND(H, 16) 1911 XXBLEND(W, 32) 1912 XXBLEND(D, 64) 1913 #undef XXBLEND 1914 1915 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1916 { 1917 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1918 1919 #if HOST_BIG_ENDIAN 1920 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1921 memset(&r->u8[0], 0, sh); 1922 #else 1923 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1924 memset(&r->u8[16 - sh], 0, sh); 1925 #endif 1926 } 1927 1928 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1929 { 1930 int64_t t; 1931 int i, upper; 1932 ppc_avr_t result; 1933 int sat = 0; 1934 1935 upper = ARRAY_SIZE(r->s32) - 1; 1936 t = (int64_t)b->VsrSW(upper); 1937 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1938 t += a->VsrSW(i); 1939 result.VsrSW(i) = 0; 1940 } 1941 result.VsrSW(upper) = cvtsdsw(t, &sat); 1942 *r = result; 1943 1944 if (sat) { 1945 set_vscr_sat(env); 1946 } 1947 } 1948 1949 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1950 { 1951 int i, j, upper; 1952 ppc_avr_t result; 1953 int sat = 0; 1954 1955 upper = 1; 1956 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 1957 int64_t t = (int64_t)b->VsrSW(upper + i * 2); 1958 1959 result.VsrD(i) = 0; 1960 for (j = 0; j < ARRAY_SIZE(r->u64); j++) { 1961 t += a->VsrSW(2 * i + j); 1962 } 1963 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat); 1964 } 1965 1966 *r = result; 1967 if (sat) { 1968 set_vscr_sat(env); 1969 } 1970 } 1971 1972 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1973 { 1974 int i, j; 1975 int sat = 0; 1976 1977 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1978 int64_t t = (int64_t)b->s32[i]; 1979 1980 for (j = 0; j < ARRAY_SIZE(r->s32); j++) { 1981 t += a->s8[4 * i + j]; 1982 } 1983 r->s32[i] = cvtsdsw(t, &sat); 1984 } 1985 1986 if (sat) { 1987 set_vscr_sat(env); 1988 } 1989 } 1990 1991 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1992 { 1993 int sat = 0; 1994 int i; 1995 1996 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1997 int64_t t = (int64_t)b->s32[i]; 1998 1999 t += a->s16[2 * i] + a->s16[2 * i + 1]; 2000 r->s32[i] = cvtsdsw(t, &sat); 2001 } 2002 2003 if (sat) { 2004 set_vscr_sat(env); 2005 } 2006 } 2007 2008 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2009 { 2010 int i, j; 2011 int sat = 0; 2012 2013 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 2014 uint64_t t = (uint64_t)b->u32[i]; 2015 2016 for (j = 0; j < ARRAY_SIZE(r->u32); j++) { 2017 t += a->u8[4 * i + j]; 2018 } 2019 r->u32[i] = cvtuduw(t, &sat); 2020 } 2021 2022 if (sat) { 2023 set_vscr_sat(env); 2024 } 2025 } 2026 2027 #if HOST_BIG_ENDIAN 2028 #define UPKHI 1 2029 #define UPKLO 0 2030 #else 2031 #define UPKHI 0 2032 #define UPKLO 1 2033 #endif 2034 #define VUPKPX(suffix, hi) \ 2035 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 2036 { \ 2037 int i; \ 2038 ppc_avr_t result; \ 2039 \ 2040 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \ 2041 uint16_t e = b->u16[hi ? i : i + 4]; \ 2042 uint8_t a = (e >> 15) ? 0xff : 0; \ 2043 uint8_t r = (e >> 10) & 0x1f; \ 2044 uint8_t g = (e >> 5) & 0x1f; \ 2045 uint8_t b = e & 0x1f; \ 2046 \ 2047 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \ 2048 } \ 2049 *r = result; \ 2050 } 2051 VUPKPX(lpx, UPKLO) 2052 VUPKPX(hpx, UPKHI) 2053 #undef VUPKPX 2054 2055 #define VUPK(suffix, unpacked, packee, hi) \ 2056 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 2057 { \ 2058 int i; \ 2059 ppc_avr_t result; \ 2060 \ 2061 if (hi) { \ 2062 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \ 2063 result.unpacked[i] = b->packee[i]; \ 2064 } \ 2065 } else { \ 2066 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \ 2067 i++) { \ 2068 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \ 2069 } \ 2070 } \ 2071 *r = result; \ 2072 } 2073 VUPK(hsb, s16, s8, UPKHI) 2074 VUPK(hsh, s32, s16, UPKHI) 2075 VUPK(hsw, s64, s32, UPKHI) 2076 VUPK(lsb, s16, s8, UPKLO) 2077 VUPK(lsh, s32, s16, UPKLO) 2078 VUPK(lsw, s64, s32, UPKLO) 2079 #undef VUPK 2080 #undef UPKHI 2081 #undef UPKLO 2082 2083 #define VGENERIC_DO(name, element) \ 2084 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \ 2085 { \ 2086 int i; \ 2087 \ 2088 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 2089 r->element[i] = name(b->element[i]); \ 2090 } \ 2091 } 2092 2093 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8) 2094 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16) 2095 2096 VGENERIC_DO(clzb, u8) 2097 VGENERIC_DO(clzh, u16) 2098 2099 #undef clzb 2100 #undef clzh 2101 2102 #define ctzb(v) ((v) ? ctz32(v) : 8) 2103 #define ctzh(v) ((v) ? ctz32(v) : 16) 2104 #define ctzw(v) ctz32((v)) 2105 #define ctzd(v) ctz64((v)) 2106 2107 VGENERIC_DO(ctzb, u8) 2108 VGENERIC_DO(ctzh, u16) 2109 VGENERIC_DO(ctzw, u32) 2110 VGENERIC_DO(ctzd, u64) 2111 2112 #undef ctzb 2113 #undef ctzh 2114 #undef ctzw 2115 #undef ctzd 2116 2117 #define popcntb(v) ctpop8(v) 2118 #define popcnth(v) ctpop16(v) 2119 #define popcntw(v) ctpop32(v) 2120 #define popcntd(v) ctpop64(v) 2121 2122 VGENERIC_DO(popcntb, u8) 2123 VGENERIC_DO(popcnth, u16) 2124 VGENERIC_DO(popcntw, u32) 2125 VGENERIC_DO(popcntd, u64) 2126 2127 #undef popcntb 2128 #undef popcnth 2129 #undef popcntw 2130 #undef popcntd 2131 2132 #undef VGENERIC_DO 2133 2134 void helper_VADDUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2135 { 2136 r->s128 = int128_add(a->s128, b->s128); 2137 } 2138 2139 void helper_VADDEUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2140 { 2141 r->s128 = int128_add(int128_add(a->s128, b->s128), 2142 int128_make64(int128_getlo(c->s128) & 1)); 2143 } 2144 2145 void helper_VADDCUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2146 { 2147 r->VsrD(1) = int128_ult(int128_not(a->s128), b->s128); 2148 r->VsrD(0) = 0; 2149 } 2150 2151 void helper_VADDECUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2152 { 2153 bool carry_out = int128_ult(int128_not(a->s128), b->s128), 2154 carry_in = int128_getlo(c->s128) & 1; 2155 2156 if (!carry_out && carry_in) { 2157 carry_out = (int128_nz(a->s128) || int128_nz(b->s128)) && 2158 int128_eq(int128_add(a->s128, b->s128), int128_makes64(-1)); 2159 } 2160 2161 r->VsrD(0) = 0; 2162 r->VsrD(1) = carry_out; 2163 } 2164 2165 void helper_VSUBUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2166 { 2167 r->s128 = int128_sub(a->s128, b->s128); 2168 } 2169 2170 void helper_VSUBEUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2171 { 2172 r->s128 = int128_add(int128_add(a->s128, int128_not(b->s128)), 2173 int128_make64(int128_getlo(c->s128) & 1)); 2174 } 2175 2176 void helper_VSUBCUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2177 { 2178 Int128 tmp = int128_not(b->s128); 2179 2180 r->VsrD(1) = int128_ult(int128_not(a->s128), tmp) || 2181 int128_eq(int128_add(a->s128, tmp), int128_makes64(-1)); 2182 r->VsrD(0) = 0; 2183 } 2184 2185 void helper_VSUBECUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2186 { 2187 Int128 tmp = int128_not(b->s128); 2188 bool carry_out = int128_ult(int128_not(a->s128), tmp), 2189 carry_in = int128_getlo(c->s128) & 1; 2190 2191 r->VsrD(1) = carry_out || (carry_in && int128_eq(int128_add(a->s128, tmp), 2192 int128_makes64(-1))); 2193 r->VsrD(0) = 0; 2194 } 2195 2196 #define BCD_PLUS_PREF_1 0xC 2197 #define BCD_PLUS_PREF_2 0xF 2198 #define BCD_PLUS_ALT_1 0xA 2199 #define BCD_NEG_PREF 0xD 2200 #define BCD_NEG_ALT 0xB 2201 #define BCD_PLUS_ALT_2 0xE 2202 #define NATIONAL_PLUS 0x2B 2203 #define NATIONAL_NEG 0x2D 2204 2205 #define BCD_DIG_BYTE(n) (15 - ((n) / 2)) 2206 2207 static int bcd_get_sgn(ppc_avr_t *bcd) 2208 { 2209 switch (bcd->VsrB(BCD_DIG_BYTE(0)) & 0xF) { 2210 case BCD_PLUS_PREF_1: 2211 case BCD_PLUS_PREF_2: 2212 case BCD_PLUS_ALT_1: 2213 case BCD_PLUS_ALT_2: 2214 { 2215 return 1; 2216 } 2217 2218 case BCD_NEG_PREF: 2219 case BCD_NEG_ALT: 2220 { 2221 return -1; 2222 } 2223 2224 default: 2225 { 2226 return 0; 2227 } 2228 } 2229 } 2230 2231 static int bcd_preferred_sgn(int sgn, int ps) 2232 { 2233 if (sgn >= 0) { 2234 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2; 2235 } else { 2236 return BCD_NEG_PREF; 2237 } 2238 } 2239 2240 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid) 2241 { 2242 uint8_t result; 2243 if (n & 1) { 2244 result = bcd->VsrB(BCD_DIG_BYTE(n)) >> 4; 2245 } else { 2246 result = bcd->VsrB(BCD_DIG_BYTE(n)) & 0xF; 2247 } 2248 2249 if (unlikely(result > 9)) { 2250 *invalid = true; 2251 } 2252 return result; 2253 } 2254 2255 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n) 2256 { 2257 if (n & 1) { 2258 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0x0F; 2259 bcd->VsrB(BCD_DIG_BYTE(n)) |= (digit << 4); 2260 } else { 2261 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0xF0; 2262 bcd->VsrB(BCD_DIG_BYTE(n)) |= digit; 2263 } 2264 } 2265 2266 static bool bcd_is_valid(ppc_avr_t *bcd) 2267 { 2268 int i; 2269 int invalid = 0; 2270 2271 if (bcd_get_sgn(bcd) == 0) { 2272 return false; 2273 } 2274 2275 for (i = 1; i < 32; i++) { 2276 bcd_get_digit(bcd, i, &invalid); 2277 if (unlikely(invalid)) { 2278 return false; 2279 } 2280 } 2281 return true; 2282 } 2283 2284 static int bcd_cmp_zero(ppc_avr_t *bcd) 2285 { 2286 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) { 2287 return CRF_EQ; 2288 } else { 2289 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT; 2290 } 2291 } 2292 2293 static uint16_t get_national_digit(ppc_avr_t *reg, int n) 2294 { 2295 return reg->VsrH(7 - n); 2296 } 2297 2298 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n) 2299 { 2300 reg->VsrH(7 - n) = val; 2301 } 2302 2303 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b) 2304 { 2305 int i; 2306 int invalid = 0; 2307 for (i = 31; i > 0; i--) { 2308 uint8_t dig_a = bcd_get_digit(a, i, &invalid); 2309 uint8_t dig_b = bcd_get_digit(b, i, &invalid); 2310 if (unlikely(invalid)) { 2311 return 0; /* doesn't matter */ 2312 } else if (dig_a > dig_b) { 2313 return 1; 2314 } else if (dig_a < dig_b) { 2315 return -1; 2316 } 2317 } 2318 2319 return 0; 2320 } 2321 2322 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2323 int *overflow) 2324 { 2325 int carry = 0; 2326 int i; 2327 int is_zero = 1; 2328 2329 for (i = 1; i <= 31; i++) { 2330 uint8_t digit = bcd_get_digit(a, i, invalid) + 2331 bcd_get_digit(b, i, invalid) + carry; 2332 is_zero &= (digit == 0); 2333 if (digit > 9) { 2334 carry = 1; 2335 digit -= 10; 2336 } else { 2337 carry = 0; 2338 } 2339 2340 bcd_put_digit(t, digit, i); 2341 } 2342 2343 *overflow = carry; 2344 return is_zero; 2345 } 2346 2347 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2348 int *overflow) 2349 { 2350 int carry = 0; 2351 int i; 2352 2353 for (i = 1; i <= 31; i++) { 2354 uint8_t digit = bcd_get_digit(a, i, invalid) - 2355 bcd_get_digit(b, i, invalid) + carry; 2356 if (digit & 0x80) { 2357 carry = -1; 2358 digit += 10; 2359 } else { 2360 carry = 0; 2361 } 2362 2363 bcd_put_digit(t, digit, i); 2364 } 2365 2366 *overflow = carry; 2367 } 2368 2369 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2370 { 2371 2372 int sgna = bcd_get_sgn(a); 2373 int sgnb = bcd_get_sgn(b); 2374 int invalid = (sgna == 0) || (sgnb == 0); 2375 int overflow = 0; 2376 int zero = 0; 2377 uint32_t cr = 0; 2378 ppc_avr_t result = { .u64 = { 0, 0 } }; 2379 2380 if (!invalid) { 2381 if (sgna == sgnb) { 2382 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps); 2383 zero = bcd_add_mag(&result, a, b, &invalid, &overflow); 2384 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2385 } else { 2386 int magnitude = bcd_cmp_mag(a, b); 2387 if (magnitude > 0) { 2388 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps); 2389 bcd_sub_mag(&result, a, b, &invalid, &overflow); 2390 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2391 } else if (magnitude < 0) { 2392 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgnb, ps); 2393 bcd_sub_mag(&result, b, a, &invalid, &overflow); 2394 cr = (sgnb > 0) ? CRF_GT : CRF_LT; 2395 } else { 2396 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(0, ps); 2397 cr = CRF_EQ; 2398 } 2399 } 2400 } 2401 2402 if (unlikely(invalid)) { 2403 result.VsrD(0) = result.VsrD(1) = -1; 2404 cr = CRF_SO; 2405 } else if (overflow) { 2406 cr |= CRF_SO; 2407 } else if (zero) { 2408 cr |= CRF_EQ; 2409 } 2410 2411 *r = result; 2412 2413 return cr; 2414 } 2415 2416 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2417 { 2418 ppc_avr_t bcopy = *b; 2419 int sgnb = bcd_get_sgn(b); 2420 if (sgnb < 0) { 2421 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0); 2422 } else if (sgnb > 0) { 2423 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0); 2424 } 2425 /* else invalid ... defer to bcdadd code for proper handling */ 2426 2427 return helper_bcdadd(r, a, &bcopy, ps); 2428 } 2429 2430 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2431 { 2432 int i; 2433 int cr = 0; 2434 uint16_t national = 0; 2435 uint16_t sgnb = get_national_digit(b, 0); 2436 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2437 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG); 2438 2439 for (i = 1; i < 8; i++) { 2440 national = get_national_digit(b, i); 2441 if (unlikely(national < 0x30 || national > 0x39)) { 2442 invalid = 1; 2443 break; 2444 } 2445 2446 bcd_put_digit(&ret, national & 0xf, i); 2447 } 2448 2449 if (sgnb == NATIONAL_PLUS) { 2450 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0); 2451 } else { 2452 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2453 } 2454 2455 cr = bcd_cmp_zero(&ret); 2456 2457 if (unlikely(invalid)) { 2458 cr = CRF_SO; 2459 } 2460 2461 *r = ret; 2462 2463 return cr; 2464 } 2465 2466 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2467 { 2468 int i; 2469 int cr = 0; 2470 int sgnb = bcd_get_sgn(b); 2471 int invalid = (sgnb == 0); 2472 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2473 2474 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0); 2475 2476 for (i = 1; i < 8; i++) { 2477 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i); 2478 2479 if (unlikely(invalid)) { 2480 break; 2481 } 2482 } 2483 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0); 2484 2485 cr = bcd_cmp_zero(b); 2486 2487 if (ox_flag) { 2488 cr |= CRF_SO; 2489 } 2490 2491 if (unlikely(invalid)) { 2492 cr = CRF_SO; 2493 } 2494 2495 *r = ret; 2496 2497 return cr; 2498 } 2499 2500 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2501 { 2502 int i; 2503 int cr = 0; 2504 int invalid = 0; 2505 int zone_digit = 0; 2506 int zone_lead = ps ? 0xF : 0x3; 2507 int digit = 0; 2508 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2509 int sgnb = b->VsrB(BCD_DIG_BYTE(0)) >> 4; 2510 2511 if (unlikely((sgnb < 0xA) && ps)) { 2512 invalid = 1; 2513 } 2514 2515 for (i = 0; i < 16; i++) { 2516 zone_digit = i ? b->VsrB(BCD_DIG_BYTE(i * 2)) >> 4 : zone_lead; 2517 digit = b->VsrB(BCD_DIG_BYTE(i * 2)) & 0xF; 2518 if (unlikely(zone_digit != zone_lead || digit > 0x9)) { 2519 invalid = 1; 2520 break; 2521 } 2522 2523 bcd_put_digit(&ret, digit, i + 1); 2524 } 2525 2526 if ((ps && (sgnb == 0xB || sgnb == 0xD)) || 2527 (!ps && (sgnb & 0x4))) { 2528 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2529 } else { 2530 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0); 2531 } 2532 2533 cr = bcd_cmp_zero(&ret); 2534 2535 if (unlikely(invalid)) { 2536 cr = CRF_SO; 2537 } 2538 2539 *r = ret; 2540 2541 return cr; 2542 } 2543 2544 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2545 { 2546 int i; 2547 int cr = 0; 2548 uint8_t digit = 0; 2549 int sgnb = bcd_get_sgn(b); 2550 int zone_lead = (ps) ? 0xF0 : 0x30; 2551 int invalid = (sgnb == 0); 2552 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2553 2554 int ox_flag = ((b->VsrD(0) >> 4) != 0); 2555 2556 for (i = 0; i < 16; i++) { 2557 digit = bcd_get_digit(b, i + 1, &invalid); 2558 2559 if (unlikely(invalid)) { 2560 break; 2561 } 2562 2563 ret.VsrB(BCD_DIG_BYTE(i * 2)) = zone_lead + digit; 2564 } 2565 2566 if (ps) { 2567 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1); 2568 } else { 2569 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1); 2570 } 2571 2572 cr = bcd_cmp_zero(b); 2573 2574 if (ox_flag) { 2575 cr |= CRF_SO; 2576 } 2577 2578 if (unlikely(invalid)) { 2579 cr = CRF_SO; 2580 } 2581 2582 *r = ret; 2583 2584 return cr; 2585 } 2586 2587 /** 2588 * Compare 2 128-bit unsigned integers, passed in as unsigned 64-bit pairs 2589 * 2590 * Returns: 2591 * > 0 if ahi|alo > bhi|blo, 2592 * 0 if ahi|alo == bhi|blo, 2593 * < 0 if ahi|alo < bhi|blo 2594 */ 2595 static inline int ucmp128(uint64_t alo, uint64_t ahi, 2596 uint64_t blo, uint64_t bhi) 2597 { 2598 return (ahi == bhi) ? 2599 (alo > blo ? 1 : (alo == blo ? 0 : -1)) : 2600 (ahi > bhi ? 1 : -1); 2601 } 2602 2603 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2604 { 2605 int i; 2606 int cr; 2607 uint64_t lo_value; 2608 uint64_t hi_value; 2609 uint64_t rem; 2610 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2611 2612 if (b->VsrSD(0) < 0) { 2613 lo_value = -b->VsrSD(1); 2614 hi_value = ~b->VsrD(0) + !lo_value; 2615 bcd_put_digit(&ret, 0xD, 0); 2616 2617 cr = CRF_LT; 2618 } else { 2619 lo_value = b->VsrD(1); 2620 hi_value = b->VsrD(0); 2621 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0); 2622 2623 if (hi_value == 0 && lo_value == 0) { 2624 cr = CRF_EQ; 2625 } else { 2626 cr = CRF_GT; 2627 } 2628 } 2629 2630 /* 2631 * Check src limits: abs(src) <= 10^31 - 1 2632 * 2633 * 10^31 - 1 = 0x0000007e37be2022 c0914b267fffffff 2634 */ 2635 if (ucmp128(lo_value, hi_value, 2636 0xc0914b267fffffffULL, 0x7e37be2022ULL) > 0) { 2637 cr |= CRF_SO; 2638 2639 /* 2640 * According to the ISA, if src wouldn't fit in the destination 2641 * register, the result is undefined. 2642 * In that case, we leave r unchanged. 2643 */ 2644 } else { 2645 rem = divu128(&lo_value, &hi_value, 1000000000000000ULL); 2646 2647 for (i = 1; i < 16; rem /= 10, i++) { 2648 bcd_put_digit(&ret, rem % 10, i); 2649 } 2650 2651 for (; i < 32; lo_value /= 10, i++) { 2652 bcd_put_digit(&ret, lo_value % 10, i); 2653 } 2654 2655 *r = ret; 2656 } 2657 2658 return cr; 2659 } 2660 2661 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2662 { 2663 uint8_t i; 2664 int cr; 2665 uint64_t carry; 2666 uint64_t unused; 2667 uint64_t lo_value; 2668 uint64_t hi_value = 0; 2669 int sgnb = bcd_get_sgn(b); 2670 int invalid = (sgnb == 0); 2671 2672 lo_value = bcd_get_digit(b, 31, &invalid); 2673 for (i = 30; i > 0; i--) { 2674 mulu64(&lo_value, &carry, lo_value, 10ULL); 2675 mulu64(&hi_value, &unused, hi_value, 10ULL); 2676 lo_value += bcd_get_digit(b, i, &invalid); 2677 hi_value += carry; 2678 2679 if (unlikely(invalid)) { 2680 break; 2681 } 2682 } 2683 2684 if (sgnb == -1) { 2685 r->VsrSD(1) = -lo_value; 2686 r->VsrSD(0) = ~hi_value + !r->VsrSD(1); 2687 } else { 2688 r->VsrSD(1) = lo_value; 2689 r->VsrSD(0) = hi_value; 2690 } 2691 2692 cr = bcd_cmp_zero(b); 2693 2694 if (unlikely(invalid)) { 2695 cr = CRF_SO; 2696 } 2697 2698 return cr; 2699 } 2700 2701 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2702 { 2703 int i; 2704 int invalid = 0; 2705 2706 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) { 2707 return CRF_SO; 2708 } 2709 2710 *r = *a; 2711 bcd_put_digit(r, b->VsrB(BCD_DIG_BYTE(0)) & 0xF, 0); 2712 2713 for (i = 1; i < 32; i++) { 2714 bcd_get_digit(a, i, &invalid); 2715 bcd_get_digit(b, i, &invalid); 2716 if (unlikely(invalid)) { 2717 return CRF_SO; 2718 } 2719 } 2720 2721 return bcd_cmp_zero(r); 2722 } 2723 2724 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2725 { 2726 int sgnb = bcd_get_sgn(b); 2727 2728 *r = *b; 2729 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0); 2730 2731 if (bcd_is_valid(b) == false) { 2732 return CRF_SO; 2733 } 2734 2735 return bcd_cmp_zero(r); 2736 } 2737 2738 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2739 { 2740 int cr; 2741 int i = a->VsrSB(7); 2742 bool ox_flag = false; 2743 int sgnb = bcd_get_sgn(b); 2744 ppc_avr_t ret = *b; 2745 ret.VsrD(1) &= ~0xf; 2746 2747 if (bcd_is_valid(b) == false) { 2748 return CRF_SO; 2749 } 2750 2751 if (unlikely(i > 31)) { 2752 i = 31; 2753 } else if (unlikely(i < -31)) { 2754 i = -31; 2755 } 2756 2757 if (i > 0) { 2758 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2759 } else { 2760 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2761 } 2762 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2763 2764 *r = ret; 2765 2766 cr = bcd_cmp_zero(r); 2767 if (ox_flag) { 2768 cr |= CRF_SO; 2769 } 2770 2771 return cr; 2772 } 2773 2774 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2775 { 2776 int cr; 2777 int i; 2778 int invalid = 0; 2779 bool ox_flag = false; 2780 ppc_avr_t ret = *b; 2781 2782 for (i = 0; i < 32; i++) { 2783 bcd_get_digit(b, i, &invalid); 2784 2785 if (unlikely(invalid)) { 2786 return CRF_SO; 2787 } 2788 } 2789 2790 i = a->VsrSB(7); 2791 if (i >= 32) { 2792 ox_flag = true; 2793 ret.VsrD(1) = ret.VsrD(0) = 0; 2794 } else if (i <= -32) { 2795 ret.VsrD(1) = ret.VsrD(0) = 0; 2796 } else if (i > 0) { 2797 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2798 } else { 2799 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2800 } 2801 *r = ret; 2802 2803 cr = bcd_cmp_zero(r); 2804 if (ox_flag) { 2805 cr |= CRF_SO; 2806 } 2807 2808 return cr; 2809 } 2810 2811 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2812 { 2813 int cr; 2814 int unused = 0; 2815 int invalid = 0; 2816 bool ox_flag = false; 2817 int sgnb = bcd_get_sgn(b); 2818 ppc_avr_t ret = *b; 2819 ret.VsrD(1) &= ~0xf; 2820 2821 int i = a->VsrSB(7); 2822 ppc_avr_t bcd_one; 2823 2824 bcd_one.VsrD(0) = 0; 2825 bcd_one.VsrD(1) = 0x10; 2826 2827 if (bcd_is_valid(b) == false) { 2828 return CRF_SO; 2829 } 2830 2831 if (unlikely(i > 31)) { 2832 i = 31; 2833 } else if (unlikely(i < -31)) { 2834 i = -31; 2835 } 2836 2837 if (i > 0) { 2838 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2839 } else { 2840 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2841 2842 if (bcd_get_digit(&ret, 0, &invalid) >= 5) { 2843 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused); 2844 } 2845 } 2846 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2847 2848 cr = bcd_cmp_zero(&ret); 2849 if (ox_flag) { 2850 cr |= CRF_SO; 2851 } 2852 *r = ret; 2853 2854 return cr; 2855 } 2856 2857 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2858 { 2859 uint64_t mask; 2860 uint32_t ox_flag = 0; 2861 int i = a->VsrSH(3) + 1; 2862 ppc_avr_t ret = *b; 2863 2864 if (bcd_is_valid(b) == false) { 2865 return CRF_SO; 2866 } 2867 2868 if (i > 16 && i < 32) { 2869 mask = (uint64_t)-1 >> (128 - i * 4); 2870 if (ret.VsrD(0) & ~mask) { 2871 ox_flag = CRF_SO; 2872 } 2873 2874 ret.VsrD(0) &= mask; 2875 } else if (i >= 0 && i <= 16) { 2876 mask = (uint64_t)-1 >> (64 - i * 4); 2877 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 2878 ox_flag = CRF_SO; 2879 } 2880 2881 ret.VsrD(1) &= mask; 2882 ret.VsrD(0) = 0; 2883 } 2884 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0); 2885 *r = ret; 2886 2887 return bcd_cmp_zero(&ret) | ox_flag; 2888 } 2889 2890 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2891 { 2892 int i; 2893 uint64_t mask; 2894 uint32_t ox_flag = 0; 2895 int invalid = 0; 2896 ppc_avr_t ret = *b; 2897 2898 for (i = 0; i < 32; i++) { 2899 bcd_get_digit(b, i, &invalid); 2900 2901 if (unlikely(invalid)) { 2902 return CRF_SO; 2903 } 2904 } 2905 2906 i = a->VsrSH(3); 2907 if (i > 16 && i < 33) { 2908 mask = (uint64_t)-1 >> (128 - i * 4); 2909 if (ret.VsrD(0) & ~mask) { 2910 ox_flag = CRF_SO; 2911 } 2912 2913 ret.VsrD(0) &= mask; 2914 } else if (i > 0 && i <= 16) { 2915 mask = (uint64_t)-1 >> (64 - i * 4); 2916 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 2917 ox_flag = CRF_SO; 2918 } 2919 2920 ret.VsrD(1) &= mask; 2921 ret.VsrD(0) = 0; 2922 } else if (i == 0) { 2923 if (ret.VsrD(0) || ret.VsrD(1)) { 2924 ox_flag = CRF_SO; 2925 } 2926 ret.VsrD(0) = ret.VsrD(1) = 0; 2927 } 2928 2929 *r = ret; 2930 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) { 2931 return ox_flag | CRF_EQ; 2932 } 2933 2934 return ox_flag | CRF_GT; 2935 } 2936 2937 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a) 2938 { 2939 int i; 2940 VECTOR_FOR_INORDER_I(i, u8) { 2941 r->u8[i] = AES_sbox[a->u8[i]]; 2942 } 2943 } 2944 2945 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2946 { 2947 AESState *ad = (AESState *)r; 2948 AESState *st = (AESState *)a; 2949 AESState *rk = (AESState *)b; 2950 2951 aesenc_SB_SR_MC_AK(ad, st, rk, true); 2952 } 2953 2954 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2955 { 2956 aesenc_SB_SR_AK((AESState *)r, (AESState *)a, (AESState *)b, true); 2957 } 2958 2959 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2960 { 2961 AESState *ad = (AESState *)r; 2962 AESState *st = (AESState *)a; 2963 AESState *rk = (AESState *)b; 2964 2965 aesdec_ISB_ISR_AK_IMC(ad, st, rk, true); 2966 } 2967 2968 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2969 { 2970 aesdec_ISB_ISR_AK((AESState *)r, (AESState *)a, (AESState *)b, true); 2971 } 2972 2973 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 2974 { 2975 int st = (st_six & 0x10) != 0; 2976 int six = st_six & 0xF; 2977 int i; 2978 2979 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 2980 if (st == 0) { 2981 if ((six & (0x8 >> i)) == 0) { 2982 r->VsrW(i) = ror32(a->VsrW(i), 7) ^ 2983 ror32(a->VsrW(i), 18) ^ 2984 (a->VsrW(i) >> 3); 2985 } else { /* six.bit[i] == 1 */ 2986 r->VsrW(i) = ror32(a->VsrW(i), 17) ^ 2987 ror32(a->VsrW(i), 19) ^ 2988 (a->VsrW(i) >> 10); 2989 } 2990 } else { /* st == 1 */ 2991 if ((six & (0x8 >> i)) == 0) { 2992 r->VsrW(i) = ror32(a->VsrW(i), 2) ^ 2993 ror32(a->VsrW(i), 13) ^ 2994 ror32(a->VsrW(i), 22); 2995 } else { /* six.bit[i] == 1 */ 2996 r->VsrW(i) = ror32(a->VsrW(i), 6) ^ 2997 ror32(a->VsrW(i), 11) ^ 2998 ror32(a->VsrW(i), 25); 2999 } 3000 } 3001 } 3002 } 3003 3004 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 3005 { 3006 int st = (st_six & 0x10) != 0; 3007 int six = st_six & 0xF; 3008 int i; 3009 3010 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 3011 if (st == 0) { 3012 if ((six & (0x8 >> (2 * i))) == 0) { 3013 r->VsrD(i) = ror64(a->VsrD(i), 1) ^ 3014 ror64(a->VsrD(i), 8) ^ 3015 (a->VsrD(i) >> 7); 3016 } else { /* six.bit[2*i] == 1 */ 3017 r->VsrD(i) = ror64(a->VsrD(i), 19) ^ 3018 ror64(a->VsrD(i), 61) ^ 3019 (a->VsrD(i) >> 6); 3020 } 3021 } else { /* st == 1 */ 3022 if ((six & (0x8 >> (2 * i))) == 0) { 3023 r->VsrD(i) = ror64(a->VsrD(i), 28) ^ 3024 ror64(a->VsrD(i), 34) ^ 3025 ror64(a->VsrD(i), 39); 3026 } else { /* six.bit[2*i] == 1 */ 3027 r->VsrD(i) = ror64(a->VsrD(i), 14) ^ 3028 ror64(a->VsrD(i), 18) ^ 3029 ror64(a->VsrD(i), 41); 3030 } 3031 } 3032 } 3033 } 3034 3035 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 3036 { 3037 ppc_avr_t result; 3038 int i; 3039 3040 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 3041 int indexA = c->VsrB(i) >> 4; 3042 int indexB = c->VsrB(i) & 0xF; 3043 3044 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB); 3045 } 3046 *r = result; 3047 } 3048 3049 #undef VECTOR_FOR_INORDER_I 3050 3051 /*****************************************************************************/ 3052 /* SPE extension helpers */ 3053 /* Use a table to make this quicker */ 3054 static const uint8_t hbrev[16] = { 3055 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE, 3056 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF, 3057 }; 3058 3059 static inline uint8_t byte_reverse(uint8_t val) 3060 { 3061 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4); 3062 } 3063 3064 static inline uint32_t word_reverse(uint32_t val) 3065 { 3066 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) | 3067 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24); 3068 } 3069 3070 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */ 3071 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2) 3072 { 3073 uint32_t a, b, d, mask; 3074 3075 mask = UINT32_MAX >> (32 - MASKBITS); 3076 a = arg1 & mask; 3077 b = arg2 & mask; 3078 d = word_reverse(1 + word_reverse(a | ~b)); 3079 return (arg1 & ~mask) | (d & b); 3080 } 3081 3082 uint32_t helper_cntlsw32(uint32_t val) 3083 { 3084 if (val & 0x80000000) { 3085 return clz32(~val); 3086 } else { 3087 return clz32(val); 3088 } 3089 } 3090 3091 uint32_t helper_cntlzw32(uint32_t val) 3092 { 3093 return clz32(val); 3094 } 3095 3096 /* 440 specific */ 3097 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high, 3098 target_ulong low, uint32_t update_Rc) 3099 { 3100 target_ulong mask; 3101 int i; 3102 3103 i = 1; 3104 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3105 if ((high & mask) == 0) { 3106 if (update_Rc) { 3107 env->crf[0] = 0x4; 3108 } 3109 goto done; 3110 } 3111 i++; 3112 } 3113 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 3114 if ((low & mask) == 0) { 3115 if (update_Rc) { 3116 env->crf[0] = 0x8; 3117 } 3118 goto done; 3119 } 3120 i++; 3121 } 3122 i = 8; 3123 if (update_Rc) { 3124 env->crf[0] = 0x2; 3125 } 3126 done: 3127 env->xer = (env->xer & ~0x7F) | i; 3128 if (update_Rc) { 3129 env->crf[0] |= xer_so; 3130 } 3131 return i; 3132 } 3133