1 /* 2 * PowerPC integer and vector emulation helpers for QEMU. 3 * 4 * Copyright (c) 2003-2007 Jocelyn Mayer 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "cpu.h" 22 #include "internal.h" 23 #include "qemu/host-utils.h" 24 #include "qemu/main-loop.h" 25 #include "exec/helper-proto.h" 26 #include "crypto/aes.h" 27 #include "fpu/softfloat.h" 28 #include "qapi/error.h" 29 #include "qemu/guest-random.h" 30 31 #include "helper_regs.h" 32 /*****************************************************************************/ 33 /* Fixed point operations helpers */ 34 35 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov) 36 { 37 if (unlikely(ov)) { 38 env->so = env->ov = 1; 39 } else { 40 env->ov = 0; 41 } 42 } 43 44 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb, 45 uint32_t oe) 46 { 47 uint64_t rt = 0; 48 int overflow = 0; 49 50 uint64_t dividend = (uint64_t)ra << 32; 51 uint64_t divisor = (uint32_t)rb; 52 53 if (unlikely(divisor == 0)) { 54 overflow = 1; 55 } else { 56 rt = dividend / divisor; 57 overflow = rt > UINT32_MAX; 58 } 59 60 if (unlikely(overflow)) { 61 rt = 0; /* Undefined */ 62 } 63 64 if (oe) { 65 helper_update_ov_legacy(env, overflow); 66 } 67 68 return (target_ulong)rt; 69 } 70 71 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb, 72 uint32_t oe) 73 { 74 int64_t rt = 0; 75 int overflow = 0; 76 77 int64_t dividend = (int64_t)ra << 32; 78 int64_t divisor = (int64_t)((int32_t)rb); 79 80 if (unlikely((divisor == 0) || 81 ((divisor == -1ull) && (dividend == INT64_MIN)))) { 82 overflow = 1; 83 } else { 84 rt = dividend / divisor; 85 overflow = rt != (int32_t)rt; 86 } 87 88 if (unlikely(overflow)) { 89 rt = 0; /* Undefined */ 90 } 91 92 if (oe) { 93 helper_update_ov_legacy(env, overflow); 94 } 95 96 return (target_ulong)rt; 97 } 98 99 #if defined(TARGET_PPC64) 100 101 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe) 102 { 103 uint64_t rt = 0; 104 int overflow = 0; 105 106 overflow = divu128(&rt, &ra, rb); 107 108 if (unlikely(overflow)) { 109 rt = 0; /* Undefined */ 110 } 111 112 if (oe) { 113 helper_update_ov_legacy(env, overflow); 114 } 115 116 return rt; 117 } 118 119 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe) 120 { 121 int64_t rt = 0; 122 int64_t ra = (int64_t)rau; 123 int64_t rb = (int64_t)rbu; 124 int overflow = divs128(&rt, &ra, rb); 125 126 if (unlikely(overflow)) { 127 rt = 0; /* Undefined */ 128 } 129 130 if (oe) { 131 helper_update_ov_legacy(env, overflow); 132 } 133 134 return rt; 135 } 136 137 #endif 138 139 140 #if defined(TARGET_PPC64) 141 /* if x = 0xab, returns 0xababababababababa */ 142 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff)) 143 144 /* 145 * subtract 1 from each byte, and with inverse, check if MSB is set at each 146 * byte. 147 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80 148 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found) 149 */ 150 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80)) 151 152 /* When you XOR the pattern and there is a match, that byte will be zero */ 153 #define hasvalue(x, n) (haszero((x) ^ pattern(n))) 154 155 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb) 156 { 157 return hasvalue(rb, ra) ? CRF_GT : 0; 158 } 159 160 #undef pattern 161 #undef haszero 162 #undef hasvalue 163 164 /* 165 * Return a random number. 166 */ 167 uint64_t helper_darn32(void) 168 { 169 Error *err = NULL; 170 uint32_t ret; 171 172 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 173 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 174 error_get_pretty(err)); 175 error_free(err); 176 return -1; 177 } 178 179 return ret; 180 } 181 182 uint64_t helper_darn64(void) 183 { 184 Error *err = NULL; 185 uint64_t ret; 186 187 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { 188 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s", 189 error_get_pretty(err)); 190 error_free(err); 191 return -1; 192 } 193 194 return ret; 195 } 196 197 uint64_t helper_bpermd(uint64_t rs, uint64_t rb) 198 { 199 int i; 200 uint64_t ra = 0; 201 202 for (i = 0; i < 8; i++) { 203 int index = (rs >> (i * 8)) & 0xFF; 204 if (index < 64) { 205 if (rb & PPC_BIT(index)) { 206 ra |= 1 << i; 207 } 208 } 209 } 210 return ra; 211 } 212 213 #endif 214 215 target_ulong helper_cmpb(target_ulong rs, target_ulong rb) 216 { 217 target_ulong mask = 0xff; 218 target_ulong ra = 0; 219 int i; 220 221 for (i = 0; i < sizeof(target_ulong); i++) { 222 if ((rs & mask) == (rb & mask)) { 223 ra |= mask; 224 } 225 mask <<= 8; 226 } 227 return ra; 228 } 229 230 /* shift right arithmetic helper */ 231 target_ulong helper_sraw(CPUPPCState *env, target_ulong value, 232 target_ulong shift) 233 { 234 int32_t ret; 235 236 if (likely(!(shift & 0x20))) { 237 if (likely((uint32_t)shift != 0)) { 238 shift &= 0x1f; 239 ret = (int32_t)value >> shift; 240 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) { 241 env->ca32 = env->ca = 0; 242 } else { 243 env->ca32 = env->ca = 1; 244 } 245 } else { 246 ret = (int32_t)value; 247 env->ca32 = env->ca = 0; 248 } 249 } else { 250 ret = (int32_t)value >> 31; 251 env->ca32 = env->ca = (ret != 0); 252 } 253 return (target_long)ret; 254 } 255 256 #if defined(TARGET_PPC64) 257 target_ulong helper_srad(CPUPPCState *env, target_ulong value, 258 target_ulong shift) 259 { 260 int64_t ret; 261 262 if (likely(!(shift & 0x40))) { 263 if (likely((uint64_t)shift != 0)) { 264 shift &= 0x3f; 265 ret = (int64_t)value >> shift; 266 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) { 267 env->ca32 = env->ca = 0; 268 } else { 269 env->ca32 = env->ca = 1; 270 } 271 } else { 272 ret = (int64_t)value; 273 env->ca32 = env->ca = 0; 274 } 275 } else { 276 ret = (int64_t)value >> 63; 277 env->ca32 = env->ca = (ret != 0); 278 } 279 return ret; 280 } 281 #endif 282 283 #if defined(TARGET_PPC64) 284 target_ulong helper_popcntb(target_ulong val) 285 { 286 /* Note that we don't fold past bytes */ 287 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 288 0x5555555555555555ULL); 289 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 290 0x3333333333333333ULL); 291 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 292 0x0f0f0f0f0f0f0f0fULL); 293 return val; 294 } 295 296 target_ulong helper_popcntw(target_ulong val) 297 { 298 /* Note that we don't fold past words. */ 299 val = (val & 0x5555555555555555ULL) + ((val >> 1) & 300 0x5555555555555555ULL); 301 val = (val & 0x3333333333333333ULL) + ((val >> 2) & 302 0x3333333333333333ULL); 303 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 304 0x0f0f0f0f0f0f0f0fULL); 305 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) & 306 0x00ff00ff00ff00ffULL); 307 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) & 308 0x0000ffff0000ffffULL); 309 return val; 310 } 311 #else 312 target_ulong helper_popcntb(target_ulong val) 313 { 314 /* Note that we don't fold past bytes */ 315 val = (val & 0x55555555) + ((val >> 1) & 0x55555555); 316 val = (val & 0x33333333) + ((val >> 2) & 0x33333333); 317 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f); 318 return val; 319 } 320 #endif 321 322 /*****************************************************************************/ 323 /* PowerPC 601 specific instructions (POWER bridge) */ 324 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2) 325 { 326 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ]; 327 328 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 329 (int32_t)arg2 == 0) { 330 env->spr[SPR_MQ] = 0; 331 return INT32_MIN; 332 } else { 333 env->spr[SPR_MQ] = tmp % arg2; 334 return tmp / (int32_t)arg2; 335 } 336 } 337 338 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1, 339 target_ulong arg2) 340 { 341 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ]; 342 343 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 344 (int32_t)arg2 == 0) { 345 env->so = env->ov = 1; 346 env->spr[SPR_MQ] = 0; 347 return INT32_MIN; 348 } else { 349 env->spr[SPR_MQ] = tmp % arg2; 350 tmp /= (int32_t)arg2; 351 if ((int32_t)tmp != tmp) { 352 env->so = env->ov = 1; 353 } else { 354 env->ov = 0; 355 } 356 return tmp; 357 } 358 } 359 360 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1, 361 target_ulong arg2) 362 { 363 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 364 (int32_t)arg2 == 0) { 365 env->spr[SPR_MQ] = 0; 366 return INT32_MIN; 367 } else { 368 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2; 369 return (int32_t)arg1 / (int32_t)arg2; 370 } 371 } 372 373 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1, 374 target_ulong arg2) 375 { 376 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) || 377 (int32_t)arg2 == 0) { 378 env->so = env->ov = 1; 379 env->spr[SPR_MQ] = 0; 380 return INT32_MIN; 381 } else { 382 env->ov = 0; 383 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2; 384 return (int32_t)arg1 / (int32_t)arg2; 385 } 386 } 387 388 /*****************************************************************************/ 389 /* 602 specific instructions */ 390 /* mfrom is the most crazy instruction ever seen, imho ! */ 391 /* Real implementation uses a ROM table. Do the same */ 392 /* 393 * Extremely decomposed: 394 * -arg / 256 395 * return 256 * log10(10 + 1.0) + 0.5 396 */ 397 #if !defined(CONFIG_USER_ONLY) 398 target_ulong helper_602_mfrom(target_ulong arg) 399 { 400 if (likely(arg < 602)) { 401 #include "mfrom_table.inc.c" 402 return mfrom_ROM_table[arg]; 403 } else { 404 return 0; 405 } 406 } 407 #endif 408 409 /*****************************************************************************/ 410 /* Altivec extension helpers */ 411 #if defined(HOST_WORDS_BIGENDIAN) 412 #define VECTOR_FOR_INORDER_I(index, element) \ 413 for (index = 0; index < ARRAY_SIZE(r->element); index++) 414 #else 415 #define VECTOR_FOR_INORDER_I(index, element) \ 416 for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--) 417 #endif 418 419 /* Saturating arithmetic helpers. */ 420 #define SATCVT(from, to, from_type, to_type, min, max) \ 421 static inline to_type cvt##from##to(from_type x, int *sat) \ 422 { \ 423 to_type r; \ 424 \ 425 if (x < (from_type)min) { \ 426 r = min; \ 427 *sat = 1; \ 428 } else if (x > (from_type)max) { \ 429 r = max; \ 430 *sat = 1; \ 431 } else { \ 432 r = x; \ 433 } \ 434 return r; \ 435 } 436 #define SATCVTU(from, to, from_type, to_type, min, max) \ 437 static inline to_type cvt##from##to(from_type x, int *sat) \ 438 { \ 439 to_type r; \ 440 \ 441 if (x > (from_type)max) { \ 442 r = max; \ 443 *sat = 1; \ 444 } else { \ 445 r = x; \ 446 } \ 447 return r; \ 448 } 449 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX) 450 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX) 451 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX) 452 453 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX) 454 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX) 455 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX) 456 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX) 457 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX) 458 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX) 459 #undef SATCVT 460 #undef SATCVTU 461 462 void helper_mtvscr(CPUPPCState *env, uint32_t vscr) 463 { 464 env->vscr = vscr & ~(1u << VSCR_SAT); 465 /* Which bit we set is completely arbitrary, but clear the rest. */ 466 env->vscr_sat.u64[0] = vscr & (1u << VSCR_SAT); 467 env->vscr_sat.u64[1] = 0; 468 set_flush_to_zero((vscr >> VSCR_NJ) & 1, &env->vec_status); 469 } 470 471 uint32_t helper_mfvscr(CPUPPCState *env) 472 { 473 uint32_t sat = (env->vscr_sat.u64[0] | env->vscr_sat.u64[1]) != 0; 474 return env->vscr | (sat << VSCR_SAT); 475 } 476 477 static inline void set_vscr_sat(CPUPPCState *env) 478 { 479 /* The choice of non-zero value is arbitrary. */ 480 env->vscr_sat.u32[0] = 1; 481 } 482 483 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 484 { 485 int i; 486 487 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 488 r->u32[i] = ~a->u32[i] < b->u32[i]; 489 } 490 } 491 492 /* vprtybw */ 493 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b) 494 { 495 int i; 496 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 497 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16); 498 res ^= res >> 8; 499 r->u32[i] = res & 1; 500 } 501 } 502 503 /* vprtybd */ 504 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b) 505 { 506 int i; 507 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 508 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32); 509 res ^= res >> 16; 510 res ^= res >> 8; 511 r->u64[i] = res & 1; 512 } 513 } 514 515 /* vprtybq */ 516 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b) 517 { 518 uint64_t res = b->u64[0] ^ b->u64[1]; 519 res ^= res >> 32; 520 res ^= res >> 16; 521 res ^= res >> 8; 522 r->VsrD(1) = res & 1; 523 r->VsrD(0) = 0; 524 } 525 526 #define VARITHFP(suffix, func) \ 527 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 528 ppc_avr_t *b) \ 529 { \ 530 int i; \ 531 \ 532 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 533 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \ 534 } \ 535 } 536 VARITHFP(addfp, float32_add) 537 VARITHFP(subfp, float32_sub) 538 VARITHFP(minfp, float32_min) 539 VARITHFP(maxfp, float32_max) 540 #undef VARITHFP 541 542 #define VARITHFPFMA(suffix, type) \ 543 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \ 544 ppc_avr_t *b, ppc_avr_t *c) \ 545 { \ 546 int i; \ 547 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 548 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \ 549 type, &env->vec_status); \ 550 } \ 551 } 552 VARITHFPFMA(maddfp, 0); 553 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c); 554 #undef VARITHFPFMA 555 556 #define VARITHSAT_CASE(type, op, cvt, element) \ 557 { \ 558 type result = (type)a->element[i] op (type)b->element[i]; \ 559 r->element[i] = cvt(result, &sat); \ 560 } 561 562 #define VARITHSAT_DO(name, op, optype, cvt, element) \ 563 void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \ 564 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \ 565 { \ 566 int sat = 0; \ 567 int i; \ 568 \ 569 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 570 VARITHSAT_CASE(optype, op, cvt, element); \ 571 } \ 572 if (sat) { \ 573 vscr_sat->u32[0] = 1; \ 574 } \ 575 } 576 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \ 577 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \ 578 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element) 579 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \ 580 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \ 581 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element) 582 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb) 583 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh) 584 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw) 585 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub) 586 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh) 587 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw) 588 #undef VARITHSAT_CASE 589 #undef VARITHSAT_DO 590 #undef VARITHSAT_SIGNED 591 #undef VARITHSAT_UNSIGNED 592 593 #define VAVG_DO(name, element, etype) \ 594 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 595 { \ 596 int i; \ 597 \ 598 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 599 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \ 600 r->element[i] = x >> 1; \ 601 } \ 602 } 603 604 #define VAVG(type, signed_element, signed_type, unsigned_element, \ 605 unsigned_type) \ 606 VAVG_DO(avgs##type, signed_element, signed_type) \ 607 VAVG_DO(avgu##type, unsigned_element, unsigned_type) 608 VAVG(b, s8, int16_t, u8, uint16_t) 609 VAVG(h, s16, int32_t, u16, uint32_t) 610 VAVG(w, s32, int64_t, u32, uint64_t) 611 #undef VAVG_DO 612 #undef VAVG 613 614 #define VABSDU_DO(name, element) \ 615 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 616 { \ 617 int i; \ 618 \ 619 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 620 r->element[i] = (a->element[i] > b->element[i]) ? \ 621 (a->element[i] - b->element[i]) : \ 622 (b->element[i] - a->element[i]); \ 623 } \ 624 } 625 626 /* 627 * VABSDU - Vector absolute difference unsigned 628 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word) 629 * element - element type to access from vector 630 */ 631 #define VABSDU(type, element) \ 632 VABSDU_DO(absdu##type, element) 633 VABSDU(b, u8) 634 VABSDU(h, u16) 635 VABSDU(w, u32) 636 #undef VABSDU_DO 637 #undef VABSDU 638 639 #define VCF(suffix, cvt, element) \ 640 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \ 641 ppc_avr_t *b, uint32_t uim) \ 642 { \ 643 int i; \ 644 \ 645 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 646 float32 t = cvt(b->element[i], &env->vec_status); \ 647 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \ 648 } \ 649 } 650 VCF(ux, uint32_to_float32, u32) 651 VCF(sx, int32_to_float32, s32) 652 #undef VCF 653 654 #define VCMP_DO(suffix, compare, element, record) \ 655 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 656 ppc_avr_t *a, ppc_avr_t *b) \ 657 { \ 658 uint64_t ones = (uint64_t)-1; \ 659 uint64_t all = ones; \ 660 uint64_t none = 0; \ 661 int i; \ 662 \ 663 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 664 uint64_t result = (a->element[i] compare b->element[i] ? \ 665 ones : 0x0); \ 666 switch (sizeof(a->element[0])) { \ 667 case 8: \ 668 r->u64[i] = result; \ 669 break; \ 670 case 4: \ 671 r->u32[i] = result; \ 672 break; \ 673 case 2: \ 674 r->u16[i] = result; \ 675 break; \ 676 case 1: \ 677 r->u8[i] = result; \ 678 break; \ 679 } \ 680 all &= result; \ 681 none |= result; \ 682 } \ 683 if (record) { \ 684 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 685 } \ 686 } 687 #define VCMP(suffix, compare, element) \ 688 VCMP_DO(suffix, compare, element, 0) \ 689 VCMP_DO(suffix##_dot, compare, element, 1) 690 VCMP(equb, ==, u8) 691 VCMP(equh, ==, u16) 692 VCMP(equw, ==, u32) 693 VCMP(equd, ==, u64) 694 VCMP(gtub, >, u8) 695 VCMP(gtuh, >, u16) 696 VCMP(gtuw, >, u32) 697 VCMP(gtud, >, u64) 698 VCMP(gtsb, >, s8) 699 VCMP(gtsh, >, s16) 700 VCMP(gtsw, >, s32) 701 VCMP(gtsd, >, s64) 702 #undef VCMP_DO 703 #undef VCMP 704 705 #define VCMPNE_DO(suffix, element, etype, cmpzero, record) \ 706 void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r, \ 707 ppc_avr_t *a, ppc_avr_t *b) \ 708 { \ 709 etype ones = (etype)-1; \ 710 etype all = ones; \ 711 etype result, none = 0; \ 712 int i; \ 713 \ 714 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 715 if (cmpzero) { \ 716 result = ((a->element[i] == 0) \ 717 || (b->element[i] == 0) \ 718 || (a->element[i] != b->element[i]) ? \ 719 ones : 0x0); \ 720 } else { \ 721 result = (a->element[i] != b->element[i]) ? ones : 0x0; \ 722 } \ 723 r->element[i] = result; \ 724 all &= result; \ 725 none |= result; \ 726 } \ 727 if (record) { \ 728 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 729 } \ 730 } 731 732 /* 733 * VCMPNEZ - Vector compare not equal to zero 734 * suffix - instruction mnemonic suffix (b: byte, h: halfword, w: word) 735 * element - element type to access from vector 736 */ 737 #define VCMPNE(suffix, element, etype, cmpzero) \ 738 VCMPNE_DO(suffix, element, etype, cmpzero, 0) \ 739 VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1) 740 VCMPNE(zb, u8, uint8_t, 1) 741 VCMPNE(zh, u16, uint16_t, 1) 742 VCMPNE(zw, u32, uint32_t, 1) 743 VCMPNE(b, u8, uint8_t, 0) 744 VCMPNE(h, u16, uint16_t, 0) 745 VCMPNE(w, u32, uint32_t, 0) 746 #undef VCMPNE_DO 747 #undef VCMPNE 748 749 #define VCMPFP_DO(suffix, compare, order, record) \ 750 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ 751 ppc_avr_t *a, ppc_avr_t *b) \ 752 { \ 753 uint32_t ones = (uint32_t)-1; \ 754 uint32_t all = ones; \ 755 uint32_t none = 0; \ 756 int i; \ 757 \ 758 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 759 uint32_t result; \ 760 FloatRelation rel = \ 761 float32_compare_quiet(a->f32[i], b->f32[i], \ 762 &env->vec_status); \ 763 if (rel == float_relation_unordered) { \ 764 result = 0; \ 765 } else if (rel compare order) { \ 766 result = ones; \ 767 } else { \ 768 result = 0; \ 769 } \ 770 r->u32[i] = result; \ 771 all &= result; \ 772 none |= result; \ 773 } \ 774 if (record) { \ 775 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ 776 } \ 777 } 778 #define VCMPFP(suffix, compare, order) \ 779 VCMPFP_DO(suffix, compare, order, 0) \ 780 VCMPFP_DO(suffix##_dot, compare, order, 1) 781 VCMPFP(eqfp, ==, float_relation_equal) 782 VCMPFP(gefp, !=, float_relation_less) 783 VCMPFP(gtfp, ==, float_relation_greater) 784 #undef VCMPFP_DO 785 #undef VCMPFP 786 787 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r, 788 ppc_avr_t *a, ppc_avr_t *b, int record) 789 { 790 int i; 791 int all_in = 0; 792 793 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 794 FloatRelation le_rel = float32_compare_quiet(a->f32[i], b->f32[i], 795 &env->vec_status); 796 if (le_rel == float_relation_unordered) { 797 r->u32[i] = 0xc0000000; 798 all_in = 1; 799 } else { 800 float32 bneg = float32_chs(b->f32[i]); 801 FloatRelation ge_rel = float32_compare_quiet(a->f32[i], bneg, 802 &env->vec_status); 803 int le = le_rel != float_relation_greater; 804 int ge = ge_rel != float_relation_less; 805 806 r->u32[i] = ((!le) << 31) | ((!ge) << 30); 807 all_in |= (!le | !ge); 808 } 809 } 810 if (record) { 811 env->crf[6] = (all_in == 0) << 1; 812 } 813 } 814 815 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 816 { 817 vcmpbfp_internal(env, r, a, b, 0); 818 } 819 820 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 821 ppc_avr_t *b) 822 { 823 vcmpbfp_internal(env, r, a, b, 1); 824 } 825 826 #define VCT(suffix, satcvt, element) \ 827 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \ 828 ppc_avr_t *b, uint32_t uim) \ 829 { \ 830 int i; \ 831 int sat = 0; \ 832 float_status s = env->vec_status; \ 833 \ 834 set_float_rounding_mode(float_round_to_zero, &s); \ 835 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 836 if (float32_is_any_nan(b->f32[i])) { \ 837 r->element[i] = 0; \ 838 } else { \ 839 float64 t = float32_to_float64(b->f32[i], &s); \ 840 int64_t j; \ 841 \ 842 t = float64_scalbn(t, uim, &s); \ 843 j = float64_to_int64(t, &s); \ 844 r->element[i] = satcvt(j, &sat); \ 845 } \ 846 } \ 847 if (sat) { \ 848 set_vscr_sat(env); \ 849 } \ 850 } 851 VCT(uxs, cvtsduw, u32) 852 VCT(sxs, cvtsdsw, s32) 853 #undef VCT 854 855 target_ulong helper_vclzlsbb(ppc_avr_t *r) 856 { 857 target_ulong count = 0; 858 int i; 859 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 860 if (r->VsrB(i) & 0x01) { 861 break; 862 } 863 count++; 864 } 865 return count; 866 } 867 868 target_ulong helper_vctzlsbb(ppc_avr_t *r) 869 { 870 target_ulong count = 0; 871 int i; 872 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 873 if (r->VsrB(i) & 0x01) { 874 break; 875 } 876 count++; 877 } 878 return count; 879 } 880 881 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 882 ppc_avr_t *b, ppc_avr_t *c) 883 { 884 int sat = 0; 885 int i; 886 887 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 888 int32_t prod = a->s16[i] * b->s16[i]; 889 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 890 891 r->s16[i] = cvtswsh(t, &sat); 892 } 893 894 if (sat) { 895 set_vscr_sat(env); 896 } 897 } 898 899 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 900 ppc_avr_t *b, ppc_avr_t *c) 901 { 902 int sat = 0; 903 int i; 904 905 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 906 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000; 907 int32_t t = (int32_t)c->s16[i] + (prod >> 15); 908 r->s16[i] = cvtswsh(t, &sat); 909 } 910 911 if (sat) { 912 set_vscr_sat(env); 913 } 914 } 915 916 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 917 { 918 int i; 919 920 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 921 int32_t prod = a->s16[i] * b->s16[i]; 922 r->s16[i] = (int16_t) (prod + c->s16[i]); 923 } 924 } 925 926 #define VMRG_DO(name, element, access, ofs) \ 927 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 928 { \ 929 ppc_avr_t result; \ 930 int i, half = ARRAY_SIZE(r->element) / 2; \ 931 \ 932 for (i = 0; i < half; i++) { \ 933 result.access(i * 2 + 0) = a->access(i + ofs); \ 934 result.access(i * 2 + 1) = b->access(i + ofs); \ 935 } \ 936 *r = result; \ 937 } 938 939 #define VMRG(suffix, element, access) \ 940 VMRG_DO(mrgl##suffix, element, access, half) \ 941 VMRG_DO(mrgh##suffix, element, access, 0) 942 VMRG(b, u8, VsrB) 943 VMRG(h, u16, VsrH) 944 VMRG(w, u32, VsrW) 945 #undef VMRG_DO 946 #undef VMRG 947 948 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 949 ppc_avr_t *b, ppc_avr_t *c) 950 { 951 int32_t prod[16]; 952 int i; 953 954 for (i = 0; i < ARRAY_SIZE(r->s8); i++) { 955 prod[i] = (int32_t)a->s8[i] * b->u8[i]; 956 } 957 958 VECTOR_FOR_INORDER_I(i, s32) { 959 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] + 960 prod[4 * i + 2] + prod[4 * i + 3]; 961 } 962 } 963 964 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 965 ppc_avr_t *b, ppc_avr_t *c) 966 { 967 int32_t prod[8]; 968 int i; 969 970 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 971 prod[i] = a->s16[i] * b->s16[i]; 972 } 973 974 VECTOR_FOR_INORDER_I(i, s32) { 975 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 976 } 977 } 978 979 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 980 ppc_avr_t *b, ppc_avr_t *c) 981 { 982 int32_t prod[8]; 983 int i; 984 int sat = 0; 985 986 for (i = 0; i < ARRAY_SIZE(r->s16); i++) { 987 prod[i] = (int32_t)a->s16[i] * b->s16[i]; 988 } 989 990 VECTOR_FOR_INORDER_I(i, s32) { 991 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1]; 992 993 r->u32[i] = cvtsdsw(t, &sat); 994 } 995 996 if (sat) { 997 set_vscr_sat(env); 998 } 999 } 1000 1001 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1002 ppc_avr_t *b, ppc_avr_t *c) 1003 { 1004 uint16_t prod[16]; 1005 int i; 1006 1007 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1008 prod[i] = a->u8[i] * b->u8[i]; 1009 } 1010 1011 VECTOR_FOR_INORDER_I(i, u32) { 1012 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] + 1013 prod[4 * i + 2] + prod[4 * i + 3]; 1014 } 1015 } 1016 1017 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1018 ppc_avr_t *b, ppc_avr_t *c) 1019 { 1020 uint32_t prod[8]; 1021 int i; 1022 1023 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1024 prod[i] = a->u16[i] * b->u16[i]; 1025 } 1026 1027 VECTOR_FOR_INORDER_I(i, u32) { 1028 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1029 } 1030 } 1031 1032 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, 1033 ppc_avr_t *b, ppc_avr_t *c) 1034 { 1035 uint32_t prod[8]; 1036 int i; 1037 int sat = 0; 1038 1039 for (i = 0; i < ARRAY_SIZE(r->u16); i++) { 1040 prod[i] = a->u16[i] * b->u16[i]; 1041 } 1042 1043 VECTOR_FOR_INORDER_I(i, s32) { 1044 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1]; 1045 1046 r->u32[i] = cvtuduw(t, &sat); 1047 } 1048 1049 if (sat) { 1050 set_vscr_sat(env); 1051 } 1052 } 1053 1054 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \ 1055 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1056 { \ 1057 int i; \ 1058 \ 1059 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1060 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \ 1061 (cast)b->mul_access(i); \ 1062 } \ 1063 } 1064 1065 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \ 1066 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1067 { \ 1068 int i; \ 1069 \ 1070 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ 1071 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \ 1072 (cast)b->mul_access(i + 1); \ 1073 } \ 1074 } 1075 1076 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \ 1077 VMUL_DO_EVN(mule##suffix, mul_element, mul_access, prod_access, cast) \ 1078 VMUL_DO_ODD(mulo##suffix, mul_element, mul_access, prod_access, cast) 1079 VMUL(sb, s8, VsrSB, VsrSH, int16_t) 1080 VMUL(sh, s16, VsrSH, VsrSW, int32_t) 1081 VMUL(sw, s32, VsrSW, VsrSD, int64_t) 1082 VMUL(ub, u8, VsrB, VsrH, uint16_t) 1083 VMUL(uh, u16, VsrH, VsrW, uint32_t) 1084 VMUL(uw, u32, VsrW, VsrD, uint64_t) 1085 #undef VMUL_DO_EVN 1086 #undef VMUL_DO_ODD 1087 #undef VMUL 1088 1089 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1090 ppc_avr_t *c) 1091 { 1092 ppc_avr_t result; 1093 int i; 1094 1095 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1096 int s = c->VsrB(i) & 0x1f; 1097 int index = s & 0xf; 1098 1099 if (s & 0x10) { 1100 result.VsrB(i) = b->VsrB(index); 1101 } else { 1102 result.VsrB(i) = a->VsrB(index); 1103 } 1104 } 1105 *r = result; 1106 } 1107 1108 void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1109 ppc_avr_t *c) 1110 { 1111 ppc_avr_t result; 1112 int i; 1113 1114 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1115 int s = c->VsrB(i) & 0x1f; 1116 int index = 15 - (s & 0xf); 1117 1118 if (s & 0x10) { 1119 result.VsrB(i) = a->VsrB(index); 1120 } else { 1121 result.VsrB(i) = b->VsrB(index); 1122 } 1123 } 1124 *r = result; 1125 } 1126 1127 #if defined(HOST_WORDS_BIGENDIAN) 1128 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)]) 1129 #define VBPERMD_INDEX(i) (i) 1130 #define VBPERMQ_DW(index) (((index) & 0x40) != 0) 1131 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1)) 1132 #else 1133 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)]) 1134 #define VBPERMD_INDEX(i) (1 - i) 1135 #define VBPERMQ_DW(index) (((index) & 0x40) == 0) 1136 #define EXTRACT_BIT(avr, i, index) \ 1137 (extract64((avr)->u64[1 - i], 63 - index, 1)) 1138 #endif 1139 1140 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1141 { 1142 int i, j; 1143 ppc_avr_t result = { .u64 = { 0, 0 } }; 1144 VECTOR_FOR_INORDER_I(i, u64) { 1145 for (j = 0; j < 8; j++) { 1146 int index = VBPERMQ_INDEX(b, (i * 8) + j); 1147 if (index < 64 && EXTRACT_BIT(a, i, index)) { 1148 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j); 1149 } 1150 } 1151 } 1152 *r = result; 1153 } 1154 1155 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1156 { 1157 int i; 1158 uint64_t perm = 0; 1159 1160 VECTOR_FOR_INORDER_I(i, u8) { 1161 int index = VBPERMQ_INDEX(b, i); 1162 1163 if (index < 128) { 1164 uint64_t mask = (1ull << (63 - (index & 0x3F))); 1165 if (a->u64[VBPERMQ_DW(index)] & mask) { 1166 perm |= (0x8000 >> i); 1167 } 1168 } 1169 } 1170 1171 r->VsrD(0) = perm; 1172 r->VsrD(1) = 0; 1173 } 1174 1175 #undef VBPERMQ_INDEX 1176 #undef VBPERMQ_DW 1177 1178 #define PMSUM(name, srcfld, trgfld, trgtyp) \ 1179 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1180 { \ 1181 int i, j; \ 1182 trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])]; \ 1183 \ 1184 VECTOR_FOR_INORDER_I(i, srcfld) { \ 1185 prod[i] = 0; \ 1186 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \ 1187 if (a->srcfld[i] & (1ull << j)) { \ 1188 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \ 1189 } \ 1190 } \ 1191 } \ 1192 \ 1193 VECTOR_FOR_INORDER_I(i, trgfld) { \ 1194 r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1]; \ 1195 } \ 1196 } 1197 1198 PMSUM(vpmsumb, u8, u16, uint16_t) 1199 PMSUM(vpmsumh, u16, u32, uint32_t) 1200 PMSUM(vpmsumw, u32, u64, uint64_t) 1201 1202 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1203 { 1204 1205 #ifdef CONFIG_INT128 1206 int i, j; 1207 __uint128_t prod[2]; 1208 1209 VECTOR_FOR_INORDER_I(i, u64) { 1210 prod[i] = 0; 1211 for (j = 0; j < 64; j++) { 1212 if (a->u64[i] & (1ull << j)) { 1213 prod[i] ^= (((__uint128_t)b->u64[i]) << j); 1214 } 1215 } 1216 } 1217 1218 r->u128 = prod[0] ^ prod[1]; 1219 1220 #else 1221 int i, j; 1222 ppc_avr_t prod[2]; 1223 1224 VECTOR_FOR_INORDER_I(i, u64) { 1225 prod[i].VsrD(1) = prod[i].VsrD(0) = 0; 1226 for (j = 0; j < 64; j++) { 1227 if (a->u64[i] & (1ull << j)) { 1228 ppc_avr_t bshift; 1229 if (j == 0) { 1230 bshift.VsrD(0) = 0; 1231 bshift.VsrD(1) = b->u64[i]; 1232 } else { 1233 bshift.VsrD(0) = b->u64[i] >> (64 - j); 1234 bshift.VsrD(1) = b->u64[i] << j; 1235 } 1236 prod[i].VsrD(1) ^= bshift.VsrD(1); 1237 prod[i].VsrD(0) ^= bshift.VsrD(0); 1238 } 1239 } 1240 } 1241 1242 r->VsrD(1) = prod[0].VsrD(1) ^ prod[1].VsrD(1); 1243 r->VsrD(0) = prod[0].VsrD(0) ^ prod[1].VsrD(0); 1244 #endif 1245 } 1246 1247 1248 #if defined(HOST_WORDS_BIGENDIAN) 1249 #define PKBIG 1 1250 #else 1251 #define PKBIG 0 1252 #endif 1253 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1254 { 1255 int i, j; 1256 ppc_avr_t result; 1257 #if defined(HOST_WORDS_BIGENDIAN) 1258 const ppc_avr_t *x[2] = { a, b }; 1259 #else 1260 const ppc_avr_t *x[2] = { b, a }; 1261 #endif 1262 1263 VECTOR_FOR_INORDER_I(i, u64) { 1264 VECTOR_FOR_INORDER_I(j, u32) { 1265 uint32_t e = x[i]->u32[j]; 1266 1267 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) | 1268 ((e >> 6) & 0x3e0) | 1269 ((e >> 3) & 0x1f)); 1270 } 1271 } 1272 *r = result; 1273 } 1274 1275 #define VPK(suffix, from, to, cvt, dosat) \ 1276 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1277 ppc_avr_t *a, ppc_avr_t *b) \ 1278 { \ 1279 int i; \ 1280 int sat = 0; \ 1281 ppc_avr_t result; \ 1282 ppc_avr_t *a0 = PKBIG ? a : b; \ 1283 ppc_avr_t *a1 = PKBIG ? b : a; \ 1284 \ 1285 VECTOR_FOR_INORDER_I(i, from) { \ 1286 result.to[i] = cvt(a0->from[i], &sat); \ 1287 result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\ 1288 } \ 1289 *r = result; \ 1290 if (dosat && sat) { \ 1291 set_vscr_sat(env); \ 1292 } \ 1293 } 1294 #define I(x, y) (x) 1295 VPK(shss, s16, s8, cvtshsb, 1) 1296 VPK(shus, s16, u8, cvtshub, 1) 1297 VPK(swss, s32, s16, cvtswsh, 1) 1298 VPK(swus, s32, u16, cvtswuh, 1) 1299 VPK(sdss, s64, s32, cvtsdsw, 1) 1300 VPK(sdus, s64, u32, cvtsduw, 1) 1301 VPK(uhus, u16, u8, cvtuhub, 1) 1302 VPK(uwus, u32, u16, cvtuwuh, 1) 1303 VPK(udus, u64, u32, cvtuduw, 1) 1304 VPK(uhum, u16, u8, I, 0) 1305 VPK(uwum, u32, u16, I, 0) 1306 VPK(udum, u64, u32, I, 0) 1307 #undef I 1308 #undef VPK 1309 #undef PKBIG 1310 1311 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1312 { 1313 int i; 1314 1315 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1316 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status); 1317 } 1318 } 1319 1320 #define VRFI(suffix, rounding) \ 1321 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \ 1322 ppc_avr_t *b) \ 1323 { \ 1324 int i; \ 1325 float_status s = env->vec_status; \ 1326 \ 1327 set_float_rounding_mode(rounding, &s); \ 1328 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \ 1329 r->f32[i] = float32_round_to_int (b->f32[i], &s); \ 1330 } \ 1331 } 1332 VRFI(n, float_round_nearest_even) 1333 VRFI(m, float_round_down) 1334 VRFI(p, float_round_up) 1335 VRFI(z, float_round_to_zero) 1336 #undef VRFI 1337 1338 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1339 { 1340 int i; 1341 1342 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1343 float32 t = float32_sqrt(b->f32[i], &env->vec_status); 1344 1345 r->f32[i] = float32_div(float32_one, t, &env->vec_status); 1346 } 1347 } 1348 1349 #define VRLMI(name, size, element, insert) \ 1350 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ 1351 { \ 1352 int i; \ 1353 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1354 uint##size##_t src1 = a->element[i]; \ 1355 uint##size##_t src2 = b->element[i]; \ 1356 uint##size##_t src3 = r->element[i]; \ 1357 uint##size##_t begin, end, shift, mask, rot_val; \ 1358 \ 1359 shift = extract##size(src2, 0, 6); \ 1360 end = extract##size(src2, 8, 6); \ 1361 begin = extract##size(src2, 16, 6); \ 1362 rot_val = rol##size(src1, shift); \ 1363 mask = mask_u##size(begin, end); \ 1364 if (insert) { \ 1365 r->element[i] = (rot_val & mask) | (src3 & ~mask); \ 1366 } else { \ 1367 r->element[i] = (rot_val & mask); \ 1368 } \ 1369 } \ 1370 } 1371 1372 VRLMI(vrldmi, 64, u64, 1); 1373 VRLMI(vrlwmi, 32, u32, 1); 1374 VRLMI(vrldnm, 64, u64, 0); 1375 VRLMI(vrlwnm, 32, u32, 0); 1376 1377 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, 1378 ppc_avr_t *c) 1379 { 1380 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]); 1381 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]); 1382 } 1383 1384 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1385 { 1386 int i; 1387 1388 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1389 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status); 1390 } 1391 } 1392 1393 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) 1394 { 1395 int i; 1396 1397 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { 1398 r->f32[i] = float32_log2(b->f32[i], &env->vec_status); 1399 } 1400 } 1401 1402 #if defined(HOST_WORDS_BIGENDIAN) 1403 #define VEXTU_X_DO(name, size, left) \ 1404 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \ 1405 { \ 1406 int index; \ 1407 if (left) { \ 1408 index = (a & 0xf) * 8; \ 1409 } else { \ 1410 index = ((15 - (a & 0xf) + 1) * 8) - size; \ 1411 } \ 1412 return int128_getlo(int128_rshift(b->s128, index)) & \ 1413 MAKE_64BIT_MASK(0, size); \ 1414 } 1415 #else 1416 #define VEXTU_X_DO(name, size, left) \ 1417 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \ 1418 { \ 1419 int index; \ 1420 if (left) { \ 1421 index = ((15 - (a & 0xf) + 1) * 8) - size; \ 1422 } else { \ 1423 index = (a & 0xf) * 8; \ 1424 } \ 1425 return int128_getlo(int128_rshift(b->s128, index)) & \ 1426 MAKE_64BIT_MASK(0, size); \ 1427 } 1428 #endif 1429 1430 VEXTU_X_DO(vextublx, 8, 1) 1431 VEXTU_X_DO(vextuhlx, 16, 1) 1432 VEXTU_X_DO(vextuwlx, 32, 1) 1433 VEXTU_X_DO(vextubrx, 8, 0) 1434 VEXTU_X_DO(vextuhrx, 16, 0) 1435 VEXTU_X_DO(vextuwrx, 32, 0) 1436 #undef VEXTU_X_DO 1437 1438 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1439 { 1440 int i; 1441 unsigned int shift, bytes, size; 1442 1443 size = ARRAY_SIZE(r->u8); 1444 for (i = 0; i < size; i++) { 1445 shift = b->VsrB(i) & 0x7; /* extract shift value */ 1446 bytes = (a->VsrB(i) << 8) + /* extract adjacent bytes */ 1447 (((i + 1) < size) ? a->VsrB(i + 1) : 0); 1448 r->VsrB(i) = (bytes << shift) >> 8; /* shift and store result */ 1449 } 1450 } 1451 1452 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1453 { 1454 int i; 1455 unsigned int shift, bytes; 1456 1457 /* 1458 * Use reverse order, as destination and source register can be 1459 * same. Its being modified in place saving temporary, reverse 1460 * order will guarantee that computed result is not fed back. 1461 */ 1462 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { 1463 shift = b->VsrB(i) & 0x7; /* extract shift value */ 1464 bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i); 1465 /* extract adjacent bytes */ 1466 r->VsrB(i) = (bytes >> shift) & 0xFF; /* shift and store result */ 1467 } 1468 } 1469 1470 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift) 1471 { 1472 int sh = shift & 0xf; 1473 int i; 1474 ppc_avr_t result; 1475 1476 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 1477 int index = sh + i; 1478 if (index > 0xf) { 1479 result.VsrB(i) = b->VsrB(index - 0x10); 1480 } else { 1481 result.VsrB(i) = a->VsrB(index); 1482 } 1483 } 1484 *r = result; 1485 } 1486 1487 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1488 { 1489 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1490 1491 #if defined(HOST_WORDS_BIGENDIAN) 1492 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1493 memset(&r->u8[16 - sh], 0, sh); 1494 #else 1495 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1496 memset(&r->u8[0], 0, sh); 1497 #endif 1498 } 1499 1500 #if defined(HOST_WORDS_BIGENDIAN) 1501 #define VINSERT(suffix, element) \ 1502 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1503 { \ 1504 memmove(&r->u8[index], &b->u8[8 - sizeof(r->element[0])], \ 1505 sizeof(r->element[0])); \ 1506 } 1507 #else 1508 #define VINSERT(suffix, element) \ 1509 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1510 { \ 1511 uint32_t d = (16 - index) - sizeof(r->element[0]); \ 1512 memmove(&r->u8[d], &b->u8[8], sizeof(r->element[0])); \ 1513 } 1514 #endif 1515 VINSERT(b, u8) 1516 VINSERT(h, u16) 1517 VINSERT(w, u32) 1518 VINSERT(d, u64) 1519 #undef VINSERT 1520 #if defined(HOST_WORDS_BIGENDIAN) 1521 #define VEXTRACT(suffix, element) \ 1522 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1523 { \ 1524 uint32_t es = sizeof(r->element[0]); \ 1525 memmove(&r->u8[8 - es], &b->u8[index], es); \ 1526 memset(&r->u8[8], 0, 8); \ 1527 memset(&r->u8[0], 0, 8 - es); \ 1528 } 1529 #else 1530 #define VEXTRACT(suffix, element) \ 1531 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \ 1532 { \ 1533 uint32_t es = sizeof(r->element[0]); \ 1534 uint32_t s = (16 - index) - es; \ 1535 memmove(&r->u8[8], &b->u8[s], es); \ 1536 memset(&r->u8[0], 0, 8); \ 1537 memset(&r->u8[8 + es], 0, 8 - es); \ 1538 } 1539 #endif 1540 VEXTRACT(ub, u8) 1541 VEXTRACT(uh, u16) 1542 VEXTRACT(uw, u32) 1543 VEXTRACT(d, u64) 1544 #undef VEXTRACT 1545 1546 void helper_xxextractuw(CPUPPCState *env, ppc_vsr_t *xt, 1547 ppc_vsr_t *xb, uint32_t index) 1548 { 1549 ppc_vsr_t t = { }; 1550 size_t es = sizeof(uint32_t); 1551 uint32_t ext_index; 1552 int i; 1553 1554 ext_index = index; 1555 for (i = 0; i < es; i++, ext_index++) { 1556 t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16); 1557 } 1558 1559 *xt = t; 1560 } 1561 1562 void helper_xxinsertw(CPUPPCState *env, ppc_vsr_t *xt, 1563 ppc_vsr_t *xb, uint32_t index) 1564 { 1565 ppc_vsr_t t = *xt; 1566 size_t es = sizeof(uint32_t); 1567 int ins_index, i = 0; 1568 1569 ins_index = index; 1570 for (i = 0; i < es && ins_index < 16; i++, ins_index++) { 1571 t.VsrB(ins_index) = xb->VsrB(8 - es + i); 1572 } 1573 1574 *xt = t; 1575 } 1576 1577 #define VEXT_SIGNED(name, element, cast) \ 1578 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ 1579 { \ 1580 int i; \ 1581 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1582 r->element[i] = (cast)b->element[i]; \ 1583 } \ 1584 } 1585 VEXT_SIGNED(vextsb2w, s32, int8_t) 1586 VEXT_SIGNED(vextsb2d, s64, int8_t) 1587 VEXT_SIGNED(vextsh2w, s32, int16_t) 1588 VEXT_SIGNED(vextsh2d, s64, int16_t) 1589 VEXT_SIGNED(vextsw2d, s64, int32_t) 1590 #undef VEXT_SIGNED 1591 1592 #define VNEG(name, element) \ 1593 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ 1594 { \ 1595 int i; \ 1596 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1597 r->element[i] = -b->element[i]; \ 1598 } \ 1599 } 1600 VNEG(vnegw, s32) 1601 VNEG(vnegd, s64) 1602 #undef VNEG 1603 1604 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1605 { 1606 int sh = (b->VsrB(0xf) >> 3) & 0xf; 1607 1608 #if defined(HOST_WORDS_BIGENDIAN) 1609 memmove(&r->u8[sh], &a->u8[0], 16 - sh); 1610 memset(&r->u8[0], 0, sh); 1611 #else 1612 memmove(&r->u8[0], &a->u8[sh], 16 - sh); 1613 memset(&r->u8[16 - sh], 0, sh); 1614 #endif 1615 } 1616 1617 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1618 { 1619 int i; 1620 1621 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 1622 r->u32[i] = a->u32[i] >= b->u32[i]; 1623 } 1624 } 1625 1626 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1627 { 1628 int64_t t; 1629 int i, upper; 1630 ppc_avr_t result; 1631 int sat = 0; 1632 1633 upper = ARRAY_SIZE(r->s32) - 1; 1634 t = (int64_t)b->VsrSW(upper); 1635 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1636 t += a->VsrSW(i); 1637 result.VsrSW(i) = 0; 1638 } 1639 result.VsrSW(upper) = cvtsdsw(t, &sat); 1640 *r = result; 1641 1642 if (sat) { 1643 set_vscr_sat(env); 1644 } 1645 } 1646 1647 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1648 { 1649 int i, j, upper; 1650 ppc_avr_t result; 1651 int sat = 0; 1652 1653 upper = 1; 1654 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 1655 int64_t t = (int64_t)b->VsrSW(upper + i * 2); 1656 1657 result.VsrD(i) = 0; 1658 for (j = 0; j < ARRAY_SIZE(r->u64); j++) { 1659 t += a->VsrSW(2 * i + j); 1660 } 1661 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat); 1662 } 1663 1664 *r = result; 1665 if (sat) { 1666 set_vscr_sat(env); 1667 } 1668 } 1669 1670 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1671 { 1672 int i, j; 1673 int sat = 0; 1674 1675 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1676 int64_t t = (int64_t)b->s32[i]; 1677 1678 for (j = 0; j < ARRAY_SIZE(r->s32); j++) { 1679 t += a->s8[4 * i + j]; 1680 } 1681 r->s32[i] = cvtsdsw(t, &sat); 1682 } 1683 1684 if (sat) { 1685 set_vscr_sat(env); 1686 } 1687 } 1688 1689 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1690 { 1691 int sat = 0; 1692 int i; 1693 1694 for (i = 0; i < ARRAY_SIZE(r->s32); i++) { 1695 int64_t t = (int64_t)b->s32[i]; 1696 1697 t += a->s16[2 * i] + a->s16[2 * i + 1]; 1698 r->s32[i] = cvtsdsw(t, &sat); 1699 } 1700 1701 if (sat) { 1702 set_vscr_sat(env); 1703 } 1704 } 1705 1706 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1707 { 1708 int i, j; 1709 int sat = 0; 1710 1711 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 1712 uint64_t t = (uint64_t)b->u32[i]; 1713 1714 for (j = 0; j < ARRAY_SIZE(r->u32); j++) { 1715 t += a->u8[4 * i + j]; 1716 } 1717 r->u32[i] = cvtuduw(t, &sat); 1718 } 1719 1720 if (sat) { 1721 set_vscr_sat(env); 1722 } 1723 } 1724 1725 #if defined(HOST_WORDS_BIGENDIAN) 1726 #define UPKHI 1 1727 #define UPKLO 0 1728 #else 1729 #define UPKHI 0 1730 #define UPKLO 1 1731 #endif 1732 #define VUPKPX(suffix, hi) \ 1733 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 1734 { \ 1735 int i; \ 1736 ppc_avr_t result; \ 1737 \ 1738 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \ 1739 uint16_t e = b->u16[hi ? i : i + 4]; \ 1740 uint8_t a = (e >> 15) ? 0xff : 0; \ 1741 uint8_t r = (e >> 10) & 0x1f; \ 1742 uint8_t g = (e >> 5) & 0x1f; \ 1743 uint8_t b = e & 0x1f; \ 1744 \ 1745 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \ 1746 } \ 1747 *r = result; \ 1748 } 1749 VUPKPX(lpx, UPKLO) 1750 VUPKPX(hpx, UPKHI) 1751 #undef VUPKPX 1752 1753 #define VUPK(suffix, unpacked, packee, hi) \ 1754 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \ 1755 { \ 1756 int i; \ 1757 ppc_avr_t result; \ 1758 \ 1759 if (hi) { \ 1760 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \ 1761 result.unpacked[i] = b->packee[i]; \ 1762 } \ 1763 } else { \ 1764 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \ 1765 i++) { \ 1766 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \ 1767 } \ 1768 } \ 1769 *r = result; \ 1770 } 1771 VUPK(hsb, s16, s8, UPKHI) 1772 VUPK(hsh, s32, s16, UPKHI) 1773 VUPK(hsw, s64, s32, UPKHI) 1774 VUPK(lsb, s16, s8, UPKLO) 1775 VUPK(lsh, s32, s16, UPKLO) 1776 VUPK(lsw, s64, s32, UPKLO) 1777 #undef VUPK 1778 #undef UPKHI 1779 #undef UPKLO 1780 1781 #define VGENERIC_DO(name, element) \ 1782 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \ 1783 { \ 1784 int i; \ 1785 \ 1786 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ 1787 r->element[i] = name(b->element[i]); \ 1788 } \ 1789 } 1790 1791 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8) 1792 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16) 1793 1794 VGENERIC_DO(clzb, u8) 1795 VGENERIC_DO(clzh, u16) 1796 1797 #undef clzb 1798 #undef clzh 1799 1800 #define ctzb(v) ((v) ? ctz32(v) : 8) 1801 #define ctzh(v) ((v) ? ctz32(v) : 16) 1802 #define ctzw(v) ctz32((v)) 1803 #define ctzd(v) ctz64((v)) 1804 1805 VGENERIC_DO(ctzb, u8) 1806 VGENERIC_DO(ctzh, u16) 1807 VGENERIC_DO(ctzw, u32) 1808 VGENERIC_DO(ctzd, u64) 1809 1810 #undef ctzb 1811 #undef ctzh 1812 #undef ctzw 1813 #undef ctzd 1814 1815 #define popcntb(v) ctpop8(v) 1816 #define popcnth(v) ctpop16(v) 1817 #define popcntw(v) ctpop32(v) 1818 #define popcntd(v) ctpop64(v) 1819 1820 VGENERIC_DO(popcntb, u8) 1821 VGENERIC_DO(popcnth, u16) 1822 VGENERIC_DO(popcntw, u32) 1823 VGENERIC_DO(popcntd, u64) 1824 1825 #undef popcntb 1826 #undef popcnth 1827 #undef popcntw 1828 #undef popcntd 1829 1830 #undef VGENERIC_DO 1831 1832 #if defined(HOST_WORDS_BIGENDIAN) 1833 #define QW_ONE { .u64 = { 0, 1 } } 1834 #else 1835 #define QW_ONE { .u64 = { 1, 0 } } 1836 #endif 1837 1838 #ifndef CONFIG_INT128 1839 1840 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a) 1841 { 1842 t->u64[0] = ~a.u64[0]; 1843 t->u64[1] = ~a.u64[1]; 1844 } 1845 1846 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b) 1847 { 1848 if (a.VsrD(0) < b.VsrD(0)) { 1849 return -1; 1850 } else if (a.VsrD(0) > b.VsrD(0)) { 1851 return 1; 1852 } else if (a.VsrD(1) < b.VsrD(1)) { 1853 return -1; 1854 } else if (a.VsrD(1) > b.VsrD(1)) { 1855 return 1; 1856 } else { 1857 return 0; 1858 } 1859 } 1860 1861 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 1862 { 1863 t->VsrD(1) = a.VsrD(1) + b.VsrD(1); 1864 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) + 1865 (~a.VsrD(1) < b.VsrD(1)); 1866 } 1867 1868 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) 1869 { 1870 ppc_avr_t not_a; 1871 t->VsrD(1) = a.VsrD(1) + b.VsrD(1); 1872 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) + 1873 (~a.VsrD(1) < b.VsrD(1)); 1874 avr_qw_not(¬_a, a); 1875 return avr_qw_cmpu(not_a, b) < 0; 1876 } 1877 1878 #endif 1879 1880 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1881 { 1882 #ifdef CONFIG_INT128 1883 r->u128 = a->u128 + b->u128; 1884 #else 1885 avr_qw_add(r, *a, *b); 1886 #endif 1887 } 1888 1889 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1890 { 1891 #ifdef CONFIG_INT128 1892 r->u128 = a->u128 + b->u128 + (c->u128 & 1); 1893 #else 1894 1895 if (c->VsrD(1) & 1) { 1896 ppc_avr_t tmp; 1897 1898 tmp.VsrD(0) = 0; 1899 tmp.VsrD(1) = c->VsrD(1) & 1; 1900 avr_qw_add(&tmp, *a, tmp); 1901 avr_qw_add(r, tmp, *b); 1902 } else { 1903 avr_qw_add(r, *a, *b); 1904 } 1905 #endif 1906 } 1907 1908 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1909 { 1910 #ifdef CONFIG_INT128 1911 r->u128 = (~a->u128 < b->u128); 1912 #else 1913 ppc_avr_t not_a; 1914 1915 avr_qw_not(¬_a, *a); 1916 1917 r->VsrD(0) = 0; 1918 r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0); 1919 #endif 1920 } 1921 1922 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1923 { 1924 #ifdef CONFIG_INT128 1925 int carry_out = (~a->u128 < b->u128); 1926 if (!carry_out && (c->u128 & 1)) { 1927 carry_out = ((a->u128 + b->u128 + 1) == 0) && 1928 ((a->u128 != 0) || (b->u128 != 0)); 1929 } 1930 r->u128 = carry_out; 1931 #else 1932 1933 int carry_in = c->VsrD(1) & 1; 1934 int carry_out = 0; 1935 ppc_avr_t tmp; 1936 1937 carry_out = avr_qw_addc(&tmp, *a, *b); 1938 1939 if (!carry_out && carry_in) { 1940 ppc_avr_t one = QW_ONE; 1941 carry_out = avr_qw_addc(&tmp, tmp, one); 1942 } 1943 r->VsrD(0) = 0; 1944 r->VsrD(1) = carry_out; 1945 #endif 1946 } 1947 1948 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1949 { 1950 #ifdef CONFIG_INT128 1951 r->u128 = a->u128 - b->u128; 1952 #else 1953 ppc_avr_t tmp; 1954 ppc_avr_t one = QW_ONE; 1955 1956 avr_qw_not(&tmp, *b); 1957 avr_qw_add(&tmp, *a, tmp); 1958 avr_qw_add(r, tmp, one); 1959 #endif 1960 } 1961 1962 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1963 { 1964 #ifdef CONFIG_INT128 1965 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1); 1966 #else 1967 ppc_avr_t tmp, sum; 1968 1969 avr_qw_not(&tmp, *b); 1970 avr_qw_add(&sum, *a, tmp); 1971 1972 tmp.VsrD(0) = 0; 1973 tmp.VsrD(1) = c->VsrD(1) & 1; 1974 avr_qw_add(r, sum, tmp); 1975 #endif 1976 } 1977 1978 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 1979 { 1980 #ifdef CONFIG_INT128 1981 r->u128 = (~a->u128 < ~b->u128) || 1982 (a->u128 + ~b->u128 == (__uint128_t)-1); 1983 #else 1984 int carry = (avr_qw_cmpu(*a, *b) > 0); 1985 if (!carry) { 1986 ppc_avr_t tmp; 1987 avr_qw_not(&tmp, *b); 1988 avr_qw_add(&tmp, *a, tmp); 1989 carry = ((tmp.VsrSD(0) == -1ull) && (tmp.VsrSD(1) == -1ull)); 1990 } 1991 r->VsrD(0) = 0; 1992 r->VsrD(1) = carry; 1993 #endif 1994 } 1995 1996 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 1997 { 1998 #ifdef CONFIG_INT128 1999 r->u128 = 2000 (~a->u128 < ~b->u128) || 2001 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1)); 2002 #else 2003 int carry_in = c->VsrD(1) & 1; 2004 int carry_out = (avr_qw_cmpu(*a, *b) > 0); 2005 if (!carry_out && carry_in) { 2006 ppc_avr_t tmp; 2007 avr_qw_not(&tmp, *b); 2008 avr_qw_add(&tmp, *a, tmp); 2009 carry_out = ((tmp.VsrD(0) == -1ull) && (tmp.VsrD(1) == -1ull)); 2010 } 2011 2012 r->VsrD(0) = 0; 2013 r->VsrD(1) = carry_out; 2014 #endif 2015 } 2016 2017 #define BCD_PLUS_PREF_1 0xC 2018 #define BCD_PLUS_PREF_2 0xF 2019 #define BCD_PLUS_ALT_1 0xA 2020 #define BCD_NEG_PREF 0xD 2021 #define BCD_NEG_ALT 0xB 2022 #define BCD_PLUS_ALT_2 0xE 2023 #define NATIONAL_PLUS 0x2B 2024 #define NATIONAL_NEG 0x2D 2025 2026 #define BCD_DIG_BYTE(n) (15 - ((n) / 2)) 2027 2028 static int bcd_get_sgn(ppc_avr_t *bcd) 2029 { 2030 switch (bcd->VsrB(BCD_DIG_BYTE(0)) & 0xF) { 2031 case BCD_PLUS_PREF_1: 2032 case BCD_PLUS_PREF_2: 2033 case BCD_PLUS_ALT_1: 2034 case BCD_PLUS_ALT_2: 2035 { 2036 return 1; 2037 } 2038 2039 case BCD_NEG_PREF: 2040 case BCD_NEG_ALT: 2041 { 2042 return -1; 2043 } 2044 2045 default: 2046 { 2047 return 0; 2048 } 2049 } 2050 } 2051 2052 static int bcd_preferred_sgn(int sgn, int ps) 2053 { 2054 if (sgn >= 0) { 2055 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2; 2056 } else { 2057 return BCD_NEG_PREF; 2058 } 2059 } 2060 2061 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid) 2062 { 2063 uint8_t result; 2064 if (n & 1) { 2065 result = bcd->VsrB(BCD_DIG_BYTE(n)) >> 4; 2066 } else { 2067 result = bcd->VsrB(BCD_DIG_BYTE(n)) & 0xF; 2068 } 2069 2070 if (unlikely(result > 9)) { 2071 *invalid = true; 2072 } 2073 return result; 2074 } 2075 2076 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n) 2077 { 2078 if (n & 1) { 2079 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0x0F; 2080 bcd->VsrB(BCD_DIG_BYTE(n)) |= (digit << 4); 2081 } else { 2082 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0xF0; 2083 bcd->VsrB(BCD_DIG_BYTE(n)) |= digit; 2084 } 2085 } 2086 2087 static bool bcd_is_valid(ppc_avr_t *bcd) 2088 { 2089 int i; 2090 int invalid = 0; 2091 2092 if (bcd_get_sgn(bcd) == 0) { 2093 return false; 2094 } 2095 2096 for (i = 1; i < 32; i++) { 2097 bcd_get_digit(bcd, i, &invalid); 2098 if (unlikely(invalid)) { 2099 return false; 2100 } 2101 } 2102 return true; 2103 } 2104 2105 static int bcd_cmp_zero(ppc_avr_t *bcd) 2106 { 2107 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) { 2108 return CRF_EQ; 2109 } else { 2110 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT; 2111 } 2112 } 2113 2114 static uint16_t get_national_digit(ppc_avr_t *reg, int n) 2115 { 2116 return reg->VsrH(7 - n); 2117 } 2118 2119 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n) 2120 { 2121 reg->VsrH(7 - n) = val; 2122 } 2123 2124 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b) 2125 { 2126 int i; 2127 int invalid = 0; 2128 for (i = 31; i > 0; i--) { 2129 uint8_t dig_a = bcd_get_digit(a, i, &invalid); 2130 uint8_t dig_b = bcd_get_digit(b, i, &invalid); 2131 if (unlikely(invalid)) { 2132 return 0; /* doesn't matter */ 2133 } else if (dig_a > dig_b) { 2134 return 1; 2135 } else if (dig_a < dig_b) { 2136 return -1; 2137 } 2138 } 2139 2140 return 0; 2141 } 2142 2143 static void bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2144 int *overflow) 2145 { 2146 int carry = 0; 2147 int i; 2148 for (i = 1; i <= 31; i++) { 2149 uint8_t digit = bcd_get_digit(a, i, invalid) + 2150 bcd_get_digit(b, i, invalid) + carry; 2151 if (digit > 9) { 2152 carry = 1; 2153 digit -= 10; 2154 } else { 2155 carry = 0; 2156 } 2157 2158 bcd_put_digit(t, digit, i); 2159 } 2160 2161 *overflow = carry; 2162 } 2163 2164 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid, 2165 int *overflow) 2166 { 2167 int carry = 0; 2168 int i; 2169 2170 for (i = 1; i <= 31; i++) { 2171 uint8_t digit = bcd_get_digit(a, i, invalid) - 2172 bcd_get_digit(b, i, invalid) + carry; 2173 if (digit & 0x80) { 2174 carry = -1; 2175 digit += 10; 2176 } else { 2177 carry = 0; 2178 } 2179 2180 bcd_put_digit(t, digit, i); 2181 } 2182 2183 *overflow = carry; 2184 } 2185 2186 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2187 { 2188 2189 int sgna = bcd_get_sgn(a); 2190 int sgnb = bcd_get_sgn(b); 2191 int invalid = (sgna == 0) || (sgnb == 0); 2192 int overflow = 0; 2193 uint32_t cr = 0; 2194 ppc_avr_t result = { .u64 = { 0, 0 } }; 2195 2196 if (!invalid) { 2197 if (sgna == sgnb) { 2198 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps); 2199 bcd_add_mag(&result, a, b, &invalid, &overflow); 2200 cr = bcd_cmp_zero(&result); 2201 } else { 2202 int magnitude = bcd_cmp_mag(a, b); 2203 if (magnitude > 0) { 2204 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps); 2205 bcd_sub_mag(&result, a, b, &invalid, &overflow); 2206 cr = (sgna > 0) ? CRF_GT : CRF_LT; 2207 } else if (magnitude < 0) { 2208 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgnb, ps); 2209 bcd_sub_mag(&result, b, a, &invalid, &overflow); 2210 cr = (sgnb > 0) ? CRF_GT : CRF_LT; 2211 } else { 2212 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(0, ps); 2213 cr = CRF_EQ; 2214 } 2215 } 2216 } 2217 2218 if (unlikely(invalid)) { 2219 result.VsrD(0) = result.VsrD(1) = -1; 2220 cr = CRF_SO; 2221 } else if (overflow) { 2222 cr |= CRF_SO; 2223 } 2224 2225 *r = result; 2226 2227 return cr; 2228 } 2229 2230 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2231 { 2232 ppc_avr_t bcopy = *b; 2233 int sgnb = bcd_get_sgn(b); 2234 if (sgnb < 0) { 2235 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0); 2236 } else if (sgnb > 0) { 2237 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0); 2238 } 2239 /* else invalid ... defer to bcdadd code for proper handling */ 2240 2241 return helper_bcdadd(r, a, &bcopy, ps); 2242 } 2243 2244 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2245 { 2246 int i; 2247 int cr = 0; 2248 uint16_t national = 0; 2249 uint16_t sgnb = get_national_digit(b, 0); 2250 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2251 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG); 2252 2253 for (i = 1; i < 8; i++) { 2254 national = get_national_digit(b, i); 2255 if (unlikely(national < 0x30 || national > 0x39)) { 2256 invalid = 1; 2257 break; 2258 } 2259 2260 bcd_put_digit(&ret, national & 0xf, i); 2261 } 2262 2263 if (sgnb == NATIONAL_PLUS) { 2264 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0); 2265 } else { 2266 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2267 } 2268 2269 cr = bcd_cmp_zero(&ret); 2270 2271 if (unlikely(invalid)) { 2272 cr = CRF_SO; 2273 } 2274 2275 *r = ret; 2276 2277 return cr; 2278 } 2279 2280 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2281 { 2282 int i; 2283 int cr = 0; 2284 int sgnb = bcd_get_sgn(b); 2285 int invalid = (sgnb == 0); 2286 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2287 2288 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0); 2289 2290 for (i = 1; i < 8; i++) { 2291 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i); 2292 2293 if (unlikely(invalid)) { 2294 break; 2295 } 2296 } 2297 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0); 2298 2299 cr = bcd_cmp_zero(b); 2300 2301 if (ox_flag) { 2302 cr |= CRF_SO; 2303 } 2304 2305 if (unlikely(invalid)) { 2306 cr = CRF_SO; 2307 } 2308 2309 *r = ret; 2310 2311 return cr; 2312 } 2313 2314 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2315 { 2316 int i; 2317 int cr = 0; 2318 int invalid = 0; 2319 int zone_digit = 0; 2320 int zone_lead = ps ? 0xF : 0x3; 2321 int digit = 0; 2322 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2323 int sgnb = b->VsrB(BCD_DIG_BYTE(0)) >> 4; 2324 2325 if (unlikely((sgnb < 0xA) && ps)) { 2326 invalid = 1; 2327 } 2328 2329 for (i = 0; i < 16; i++) { 2330 zone_digit = i ? b->VsrB(BCD_DIG_BYTE(i * 2)) >> 4 : zone_lead; 2331 digit = b->VsrB(BCD_DIG_BYTE(i * 2)) & 0xF; 2332 if (unlikely(zone_digit != zone_lead || digit > 0x9)) { 2333 invalid = 1; 2334 break; 2335 } 2336 2337 bcd_put_digit(&ret, digit, i + 1); 2338 } 2339 2340 if ((ps && (sgnb == 0xB || sgnb == 0xD)) || 2341 (!ps && (sgnb & 0x4))) { 2342 bcd_put_digit(&ret, BCD_NEG_PREF, 0); 2343 } else { 2344 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0); 2345 } 2346 2347 cr = bcd_cmp_zero(&ret); 2348 2349 if (unlikely(invalid)) { 2350 cr = CRF_SO; 2351 } 2352 2353 *r = ret; 2354 2355 return cr; 2356 } 2357 2358 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2359 { 2360 int i; 2361 int cr = 0; 2362 uint8_t digit = 0; 2363 int sgnb = bcd_get_sgn(b); 2364 int zone_lead = (ps) ? 0xF0 : 0x30; 2365 int invalid = (sgnb == 0); 2366 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2367 2368 int ox_flag = ((b->VsrD(0) >> 4) != 0); 2369 2370 for (i = 0; i < 16; i++) { 2371 digit = bcd_get_digit(b, i + 1, &invalid); 2372 2373 if (unlikely(invalid)) { 2374 break; 2375 } 2376 2377 ret.VsrB(BCD_DIG_BYTE(i * 2)) = zone_lead + digit; 2378 } 2379 2380 if (ps) { 2381 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1); 2382 } else { 2383 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1); 2384 } 2385 2386 cr = bcd_cmp_zero(b); 2387 2388 if (ox_flag) { 2389 cr |= CRF_SO; 2390 } 2391 2392 if (unlikely(invalid)) { 2393 cr = CRF_SO; 2394 } 2395 2396 *r = ret; 2397 2398 return cr; 2399 } 2400 2401 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2402 { 2403 int i; 2404 int cr = 0; 2405 uint64_t lo_value; 2406 uint64_t hi_value; 2407 ppc_avr_t ret = { .u64 = { 0, 0 } }; 2408 2409 if (b->VsrSD(0) < 0) { 2410 lo_value = -b->VsrSD(1); 2411 hi_value = ~b->VsrD(0) + !lo_value; 2412 bcd_put_digit(&ret, 0xD, 0); 2413 } else { 2414 lo_value = b->VsrD(1); 2415 hi_value = b->VsrD(0); 2416 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0); 2417 } 2418 2419 if (divu128(&lo_value, &hi_value, 1000000000000000ULL) || 2420 lo_value > 9999999999999999ULL) { 2421 cr = CRF_SO; 2422 } 2423 2424 for (i = 1; i < 16; hi_value /= 10, i++) { 2425 bcd_put_digit(&ret, hi_value % 10, i); 2426 } 2427 2428 for (; i < 32; lo_value /= 10, i++) { 2429 bcd_put_digit(&ret, lo_value % 10, i); 2430 } 2431 2432 cr |= bcd_cmp_zero(&ret); 2433 2434 *r = ret; 2435 2436 return cr; 2437 } 2438 2439 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2440 { 2441 uint8_t i; 2442 int cr; 2443 uint64_t carry; 2444 uint64_t unused; 2445 uint64_t lo_value; 2446 uint64_t hi_value = 0; 2447 int sgnb = bcd_get_sgn(b); 2448 int invalid = (sgnb == 0); 2449 2450 lo_value = bcd_get_digit(b, 31, &invalid); 2451 for (i = 30; i > 0; i--) { 2452 mulu64(&lo_value, &carry, lo_value, 10ULL); 2453 mulu64(&hi_value, &unused, hi_value, 10ULL); 2454 lo_value += bcd_get_digit(b, i, &invalid); 2455 hi_value += carry; 2456 2457 if (unlikely(invalid)) { 2458 break; 2459 } 2460 } 2461 2462 if (sgnb == -1) { 2463 r->VsrSD(1) = -lo_value; 2464 r->VsrSD(0) = ~hi_value + !r->VsrSD(1); 2465 } else { 2466 r->VsrSD(1) = lo_value; 2467 r->VsrSD(0) = hi_value; 2468 } 2469 2470 cr = bcd_cmp_zero(b); 2471 2472 if (unlikely(invalid)) { 2473 cr = CRF_SO; 2474 } 2475 2476 return cr; 2477 } 2478 2479 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2480 { 2481 int i; 2482 int invalid = 0; 2483 2484 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) { 2485 return CRF_SO; 2486 } 2487 2488 *r = *a; 2489 bcd_put_digit(r, b->VsrB(BCD_DIG_BYTE(0)) & 0xF, 0); 2490 2491 for (i = 1; i < 32; i++) { 2492 bcd_get_digit(a, i, &invalid); 2493 bcd_get_digit(b, i, &invalid); 2494 if (unlikely(invalid)) { 2495 return CRF_SO; 2496 } 2497 } 2498 2499 return bcd_cmp_zero(r); 2500 } 2501 2502 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps) 2503 { 2504 int sgnb = bcd_get_sgn(b); 2505 2506 *r = *b; 2507 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0); 2508 2509 if (bcd_is_valid(b) == false) { 2510 return CRF_SO; 2511 } 2512 2513 return bcd_cmp_zero(r); 2514 } 2515 2516 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2517 { 2518 int cr; 2519 int i = a->VsrSB(7); 2520 bool ox_flag = false; 2521 int sgnb = bcd_get_sgn(b); 2522 ppc_avr_t ret = *b; 2523 ret.VsrD(1) &= ~0xf; 2524 2525 if (bcd_is_valid(b) == false) { 2526 return CRF_SO; 2527 } 2528 2529 if (unlikely(i > 31)) { 2530 i = 31; 2531 } else if (unlikely(i < -31)) { 2532 i = -31; 2533 } 2534 2535 if (i > 0) { 2536 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2537 } else { 2538 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2539 } 2540 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2541 2542 *r = ret; 2543 2544 cr = bcd_cmp_zero(r); 2545 if (ox_flag) { 2546 cr |= CRF_SO; 2547 } 2548 2549 return cr; 2550 } 2551 2552 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2553 { 2554 int cr; 2555 int i; 2556 int invalid = 0; 2557 bool ox_flag = false; 2558 ppc_avr_t ret = *b; 2559 2560 for (i = 0; i < 32; i++) { 2561 bcd_get_digit(b, i, &invalid); 2562 2563 if (unlikely(invalid)) { 2564 return CRF_SO; 2565 } 2566 } 2567 2568 i = a->VsrSB(7); 2569 if (i >= 32) { 2570 ox_flag = true; 2571 ret.VsrD(1) = ret.VsrD(0) = 0; 2572 } else if (i <= -32) { 2573 ret.VsrD(1) = ret.VsrD(0) = 0; 2574 } else if (i > 0) { 2575 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2576 } else { 2577 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2578 } 2579 *r = ret; 2580 2581 cr = bcd_cmp_zero(r); 2582 if (ox_flag) { 2583 cr |= CRF_SO; 2584 } 2585 2586 return cr; 2587 } 2588 2589 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2590 { 2591 int cr; 2592 int unused = 0; 2593 int invalid = 0; 2594 bool ox_flag = false; 2595 int sgnb = bcd_get_sgn(b); 2596 ppc_avr_t ret = *b; 2597 ret.VsrD(1) &= ~0xf; 2598 2599 int i = a->VsrSB(7); 2600 ppc_avr_t bcd_one; 2601 2602 bcd_one.VsrD(0) = 0; 2603 bcd_one.VsrD(1) = 0x10; 2604 2605 if (bcd_is_valid(b) == false) { 2606 return CRF_SO; 2607 } 2608 2609 if (unlikely(i > 31)) { 2610 i = 31; 2611 } else if (unlikely(i < -31)) { 2612 i = -31; 2613 } 2614 2615 if (i > 0) { 2616 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag); 2617 } else { 2618 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4); 2619 2620 if (bcd_get_digit(&ret, 0, &invalid) >= 5) { 2621 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused); 2622 } 2623 } 2624 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0); 2625 2626 cr = bcd_cmp_zero(&ret); 2627 if (ox_flag) { 2628 cr |= CRF_SO; 2629 } 2630 *r = ret; 2631 2632 return cr; 2633 } 2634 2635 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2636 { 2637 uint64_t mask; 2638 uint32_t ox_flag = 0; 2639 int i = a->VsrSH(3) + 1; 2640 ppc_avr_t ret = *b; 2641 2642 if (bcd_is_valid(b) == false) { 2643 return CRF_SO; 2644 } 2645 2646 if (i > 16 && i < 32) { 2647 mask = (uint64_t)-1 >> (128 - i * 4); 2648 if (ret.VsrD(0) & ~mask) { 2649 ox_flag = CRF_SO; 2650 } 2651 2652 ret.VsrD(0) &= mask; 2653 } else if (i >= 0 && i <= 16) { 2654 mask = (uint64_t)-1 >> (64 - i * 4); 2655 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 2656 ox_flag = CRF_SO; 2657 } 2658 2659 ret.VsrD(1) &= mask; 2660 ret.VsrD(0) = 0; 2661 } 2662 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0); 2663 *r = ret; 2664 2665 return bcd_cmp_zero(&ret) | ox_flag; 2666 } 2667 2668 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps) 2669 { 2670 int i; 2671 uint64_t mask; 2672 uint32_t ox_flag = 0; 2673 int invalid = 0; 2674 ppc_avr_t ret = *b; 2675 2676 for (i = 0; i < 32; i++) { 2677 bcd_get_digit(b, i, &invalid); 2678 2679 if (unlikely(invalid)) { 2680 return CRF_SO; 2681 } 2682 } 2683 2684 i = a->VsrSH(3); 2685 if (i > 16 && i < 33) { 2686 mask = (uint64_t)-1 >> (128 - i * 4); 2687 if (ret.VsrD(0) & ~mask) { 2688 ox_flag = CRF_SO; 2689 } 2690 2691 ret.VsrD(0) &= mask; 2692 } else if (i > 0 && i <= 16) { 2693 mask = (uint64_t)-1 >> (64 - i * 4); 2694 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) { 2695 ox_flag = CRF_SO; 2696 } 2697 2698 ret.VsrD(1) &= mask; 2699 ret.VsrD(0) = 0; 2700 } else if (i == 0) { 2701 if (ret.VsrD(0) || ret.VsrD(1)) { 2702 ox_flag = CRF_SO; 2703 } 2704 ret.VsrD(0) = ret.VsrD(1) = 0; 2705 } 2706 2707 *r = ret; 2708 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) { 2709 return ox_flag | CRF_EQ; 2710 } 2711 2712 return ox_flag | CRF_GT; 2713 } 2714 2715 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a) 2716 { 2717 int i; 2718 VECTOR_FOR_INORDER_I(i, u8) { 2719 r->u8[i] = AES_sbox[a->u8[i]]; 2720 } 2721 } 2722 2723 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2724 { 2725 ppc_avr_t result; 2726 int i; 2727 2728 VECTOR_FOR_INORDER_I(i, u32) { 2729 result.VsrW(i) = b->VsrW(i) ^ 2730 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^ 2731 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^ 2732 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^ 2733 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]); 2734 } 2735 *r = result; 2736 } 2737 2738 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2739 { 2740 ppc_avr_t result; 2741 int i; 2742 2743 VECTOR_FOR_INORDER_I(i, u8) { 2744 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]); 2745 } 2746 *r = result; 2747 } 2748 2749 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2750 { 2751 /* This differs from what is written in ISA V2.07. The RTL is */ 2752 /* incorrect and will be fixed in V2.07B. */ 2753 int i; 2754 ppc_avr_t tmp; 2755 2756 VECTOR_FOR_INORDER_I(i, u8) { 2757 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])]; 2758 } 2759 2760 VECTOR_FOR_INORDER_I(i, u32) { 2761 r->VsrW(i) = 2762 AES_imc[tmp.VsrB(4 * i + 0)][0] ^ 2763 AES_imc[tmp.VsrB(4 * i + 1)][1] ^ 2764 AES_imc[tmp.VsrB(4 * i + 2)][2] ^ 2765 AES_imc[tmp.VsrB(4 * i + 3)][3]; 2766 } 2767 } 2768 2769 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) 2770 { 2771 ppc_avr_t result; 2772 int i; 2773 2774 VECTOR_FOR_INORDER_I(i, u8) { 2775 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]); 2776 } 2777 *r = result; 2778 } 2779 2780 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 2781 { 2782 int st = (st_six & 0x10) != 0; 2783 int six = st_six & 0xF; 2784 int i; 2785 2786 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { 2787 if (st == 0) { 2788 if ((six & (0x8 >> i)) == 0) { 2789 r->VsrW(i) = ror32(a->VsrW(i), 7) ^ 2790 ror32(a->VsrW(i), 18) ^ 2791 (a->VsrW(i) >> 3); 2792 } else { /* six.bit[i] == 1 */ 2793 r->VsrW(i) = ror32(a->VsrW(i), 17) ^ 2794 ror32(a->VsrW(i), 19) ^ 2795 (a->VsrW(i) >> 10); 2796 } 2797 } else { /* st == 1 */ 2798 if ((six & (0x8 >> i)) == 0) { 2799 r->VsrW(i) = ror32(a->VsrW(i), 2) ^ 2800 ror32(a->VsrW(i), 13) ^ 2801 ror32(a->VsrW(i), 22); 2802 } else { /* six.bit[i] == 1 */ 2803 r->VsrW(i) = ror32(a->VsrW(i), 6) ^ 2804 ror32(a->VsrW(i), 11) ^ 2805 ror32(a->VsrW(i), 25); 2806 } 2807 } 2808 } 2809 } 2810 2811 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six) 2812 { 2813 int st = (st_six & 0x10) != 0; 2814 int six = st_six & 0xF; 2815 int i; 2816 2817 for (i = 0; i < ARRAY_SIZE(r->u64); i++) { 2818 if (st == 0) { 2819 if ((six & (0x8 >> (2 * i))) == 0) { 2820 r->VsrD(i) = ror64(a->VsrD(i), 1) ^ 2821 ror64(a->VsrD(i), 8) ^ 2822 (a->VsrD(i) >> 7); 2823 } else { /* six.bit[2*i] == 1 */ 2824 r->VsrD(i) = ror64(a->VsrD(i), 19) ^ 2825 ror64(a->VsrD(i), 61) ^ 2826 (a->VsrD(i) >> 6); 2827 } 2828 } else { /* st == 1 */ 2829 if ((six & (0x8 >> (2 * i))) == 0) { 2830 r->VsrD(i) = ror64(a->VsrD(i), 28) ^ 2831 ror64(a->VsrD(i), 34) ^ 2832 ror64(a->VsrD(i), 39); 2833 } else { /* six.bit[2*i] == 1 */ 2834 r->VsrD(i) = ror64(a->VsrD(i), 14) ^ 2835 ror64(a->VsrD(i), 18) ^ 2836 ror64(a->VsrD(i), 41); 2837 } 2838 } 2839 } 2840 } 2841 2842 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) 2843 { 2844 ppc_avr_t result; 2845 int i; 2846 2847 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { 2848 int indexA = c->VsrB(i) >> 4; 2849 int indexB = c->VsrB(i) & 0xF; 2850 2851 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB); 2852 } 2853 *r = result; 2854 } 2855 2856 #undef VECTOR_FOR_INORDER_I 2857 2858 /*****************************************************************************/ 2859 /* SPE extension helpers */ 2860 /* Use a table to make this quicker */ 2861 static const uint8_t hbrev[16] = { 2862 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE, 2863 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF, 2864 }; 2865 2866 static inline uint8_t byte_reverse(uint8_t val) 2867 { 2868 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4); 2869 } 2870 2871 static inline uint32_t word_reverse(uint32_t val) 2872 { 2873 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) | 2874 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24); 2875 } 2876 2877 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */ 2878 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2) 2879 { 2880 uint32_t a, b, d, mask; 2881 2882 mask = UINT32_MAX >> (32 - MASKBITS); 2883 a = arg1 & mask; 2884 b = arg2 & mask; 2885 d = word_reverse(1 + word_reverse(a | ~b)); 2886 return (arg1 & ~mask) | (d & b); 2887 } 2888 2889 uint32_t helper_cntlsw32(uint32_t val) 2890 { 2891 if (val & 0x80000000) { 2892 return clz32(~val); 2893 } else { 2894 return clz32(val); 2895 } 2896 } 2897 2898 uint32_t helper_cntlzw32(uint32_t val) 2899 { 2900 return clz32(val); 2901 } 2902 2903 /* 440 specific */ 2904 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high, 2905 target_ulong low, uint32_t update_Rc) 2906 { 2907 target_ulong mask; 2908 int i; 2909 2910 i = 1; 2911 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 2912 if ((high & mask) == 0) { 2913 if (update_Rc) { 2914 env->crf[0] = 0x4; 2915 } 2916 goto done; 2917 } 2918 i++; 2919 } 2920 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) { 2921 if ((low & mask) == 0) { 2922 if (update_Rc) { 2923 env->crf[0] = 0x8; 2924 } 2925 goto done; 2926 } 2927 i++; 2928 } 2929 i = 8; 2930 if (update_Rc) { 2931 env->crf[0] = 0x2; 2932 } 2933 done: 2934 env->xer = (env->xer & ~0x7F) | i; 2935 if (update_Rc) { 2936 env->crf[0] |= xer_so; 2937 } 2938 return i; 2939 } 2940