xref: /qemu/target/ppc/int_helper.c (revision 20daa90a)
1 /*
2  *  PowerPC integer and vector emulation helpers for QEMU.
3  *
4  *  Copyright (c) 2003-2007 Jocelyn Mayer
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 #include "cpu.h"
21 #include "internal.h"
22 #include "exec/exec-all.h"
23 #include "qemu/host-utils.h"
24 #include "exec/helper-proto.h"
25 #include "crypto/aes.h"
26 
27 #include "helper_regs.h"
28 /*****************************************************************************/
29 /* Fixed point operations helpers */
30 
31 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
32                            uint32_t oe)
33 {
34     uint64_t rt = 0;
35     int overflow = 0;
36 
37     uint64_t dividend = (uint64_t)ra << 32;
38     uint64_t divisor = (uint32_t)rb;
39 
40     if (unlikely(divisor == 0)) {
41         overflow = 1;
42     } else {
43         rt = dividend / divisor;
44         overflow = rt > UINT32_MAX;
45     }
46 
47     if (unlikely(overflow)) {
48         rt = 0; /* Undefined */
49     }
50 
51     if (oe) {
52         if (unlikely(overflow)) {
53             env->so = env->ov = 1;
54         } else {
55             env->ov = 0;
56         }
57     }
58 
59     return (target_ulong)rt;
60 }
61 
62 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
63                           uint32_t oe)
64 {
65     int64_t rt = 0;
66     int overflow = 0;
67 
68     int64_t dividend = (int64_t)ra << 32;
69     int64_t divisor = (int64_t)((int32_t)rb);
70 
71     if (unlikely((divisor == 0) ||
72                  ((divisor == -1ull) && (dividend == INT64_MIN)))) {
73         overflow = 1;
74     } else {
75         rt = dividend / divisor;
76         overflow = rt != (int32_t)rt;
77     }
78 
79     if (unlikely(overflow)) {
80         rt = 0; /* Undefined */
81     }
82 
83     if (oe) {
84         if (unlikely(overflow)) {
85             env->so = env->ov = 1;
86         } else {
87             env->ov = 0;
88         }
89     }
90 
91     return (target_ulong)rt;
92 }
93 
94 #if defined(TARGET_PPC64)
95 
96 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
97 {
98     uint64_t rt = 0;
99     int overflow = 0;
100 
101     overflow = divu128(&rt, &ra, rb);
102 
103     if (unlikely(overflow)) {
104         rt = 0; /* Undefined */
105     }
106 
107     if (oe) {
108         if (unlikely(overflow)) {
109             env->so = env->ov = 1;
110         } else {
111             env->ov = 0;
112         }
113     }
114 
115     return rt;
116 }
117 
118 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
119 {
120     int64_t rt = 0;
121     int64_t ra = (int64_t)rau;
122     int64_t rb = (int64_t)rbu;
123     int overflow = divs128(&rt, &ra, rb);
124 
125     if (unlikely(overflow)) {
126         rt = 0; /* Undefined */
127     }
128 
129     if (oe) {
130 
131         if (unlikely(overflow)) {
132             env->so = env->ov = 1;
133         } else {
134             env->ov = 0;
135         }
136     }
137 
138     return rt;
139 }
140 
141 #endif
142 
143 
144 #if defined(TARGET_PPC64)
145 /* if x = 0xab, returns 0xababababababababa */
146 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
147 
148 /* substract 1 from each byte, and with inverse, check if MSB is set at each
149  * byte.
150  * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
151  *      (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
152  */
153 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
154 
155 /* When you XOR the pattern and there is a match, that byte will be zero */
156 #define hasvalue(x, n)  (haszero((x) ^ pattern(n)))
157 
158 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
159 {
160     return hasvalue(rb, ra) ? 1 << CRF_GT : 0;
161 }
162 
163 #undef pattern
164 #undef haszero
165 #undef hasvalue
166 
167 /* Return invalid random number.
168  *
169  * FIXME: Add rng backend or other mechanism to get cryptographically suitable
170  * random number
171  */
172 target_ulong helper_darn32(void)
173 {
174     return -1;
175 }
176 
177 target_ulong helper_darn64(void)
178 {
179     return -1;
180 }
181 
182 #endif
183 
184 #if defined(TARGET_PPC64)
185 
186 uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
187 {
188     int i;
189     uint64_t ra = 0;
190 
191     for (i = 0; i < 8; i++) {
192         int index = (rs >> (i*8)) & 0xFF;
193         if (index < 64) {
194             if (rb & (1ull << (63-index))) {
195                 ra |= 1 << i;
196             }
197         }
198     }
199     return ra;
200 }
201 
202 #endif
203 
204 target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
205 {
206     target_ulong mask = 0xff;
207     target_ulong ra = 0;
208     int i;
209 
210     for (i = 0; i < sizeof(target_ulong); i++) {
211         if ((rs & mask) == (rb & mask)) {
212             ra |= mask;
213         }
214         mask <<= 8;
215     }
216     return ra;
217 }
218 
219 /* shift right arithmetic helper */
220 target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
221                          target_ulong shift)
222 {
223     int32_t ret;
224 
225     if (likely(!(shift & 0x20))) {
226         if (likely((uint32_t)shift != 0)) {
227             shift &= 0x1f;
228             ret = (int32_t)value >> shift;
229             if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
230                 env->ca = 0;
231             } else {
232                 env->ca = 1;
233             }
234         } else {
235             ret = (int32_t)value;
236             env->ca = 0;
237         }
238     } else {
239         ret = (int32_t)value >> 31;
240         env->ca = (ret != 0);
241     }
242     return (target_long)ret;
243 }
244 
245 #if defined(TARGET_PPC64)
246 target_ulong helper_srad(CPUPPCState *env, target_ulong value,
247                          target_ulong shift)
248 {
249     int64_t ret;
250 
251     if (likely(!(shift & 0x40))) {
252         if (likely((uint64_t)shift != 0)) {
253             shift &= 0x3f;
254             ret = (int64_t)value >> shift;
255             if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
256                 env->ca = 0;
257             } else {
258                 env->ca = 1;
259             }
260         } else {
261             ret = (int64_t)value;
262             env->ca = 0;
263         }
264     } else {
265         ret = (int64_t)value >> 63;
266         env->ca = (ret != 0);
267     }
268     return ret;
269 }
270 #endif
271 
272 #if defined(TARGET_PPC64)
273 target_ulong helper_popcntb(target_ulong val)
274 {
275     /* Note that we don't fold past bytes */
276     val = (val & 0x5555555555555555ULL) + ((val >>  1) &
277                                            0x5555555555555555ULL);
278     val = (val & 0x3333333333333333ULL) + ((val >>  2) &
279                                            0x3333333333333333ULL);
280     val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >>  4) &
281                                            0x0f0f0f0f0f0f0f0fULL);
282     return val;
283 }
284 
285 target_ulong helper_popcntw(target_ulong val)
286 {
287     /* Note that we don't fold past words.  */
288     val = (val & 0x5555555555555555ULL) + ((val >>  1) &
289                                            0x5555555555555555ULL);
290     val = (val & 0x3333333333333333ULL) + ((val >>  2) &
291                                            0x3333333333333333ULL);
292     val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >>  4) &
293                                            0x0f0f0f0f0f0f0f0fULL);
294     val = (val & 0x00ff00ff00ff00ffULL) + ((val >>  8) &
295                                            0x00ff00ff00ff00ffULL);
296     val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
297                                            0x0000ffff0000ffffULL);
298     return val;
299 }
300 #else
301 target_ulong helper_popcntb(target_ulong val)
302 {
303     /* Note that we don't fold past bytes */
304     val = (val & 0x55555555) + ((val >>  1) & 0x55555555);
305     val = (val & 0x33333333) + ((val >>  2) & 0x33333333);
306     val = (val & 0x0f0f0f0f) + ((val >>  4) & 0x0f0f0f0f);
307     return val;
308 }
309 #endif
310 
311 /*****************************************************************************/
312 /* PowerPC 601 specific instructions (POWER bridge) */
313 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
314 {
315     uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
316 
317     if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
318         (int32_t)arg2 == 0) {
319         env->spr[SPR_MQ] = 0;
320         return INT32_MIN;
321     } else {
322         env->spr[SPR_MQ] = tmp % arg2;
323         return  tmp / (int32_t)arg2;
324     }
325 }
326 
327 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
328                          target_ulong arg2)
329 {
330     uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
331 
332     if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
333         (int32_t)arg2 == 0) {
334         env->so = env->ov = 1;
335         env->spr[SPR_MQ] = 0;
336         return INT32_MIN;
337     } else {
338         env->spr[SPR_MQ] = tmp % arg2;
339         tmp /= (int32_t)arg2;
340         if ((int32_t)tmp != tmp) {
341             env->so = env->ov = 1;
342         } else {
343             env->ov = 0;
344         }
345         return tmp;
346     }
347 }
348 
349 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
350                          target_ulong arg2)
351 {
352     if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
353         (int32_t)arg2 == 0) {
354         env->spr[SPR_MQ] = 0;
355         return INT32_MIN;
356     } else {
357         env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
358         return (int32_t)arg1 / (int32_t)arg2;
359     }
360 }
361 
362 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
363                           target_ulong arg2)
364 {
365     if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
366         (int32_t)arg2 == 0) {
367         env->so = env->ov = 1;
368         env->spr[SPR_MQ] = 0;
369         return INT32_MIN;
370     } else {
371         env->ov = 0;
372         env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
373         return (int32_t)arg1 / (int32_t)arg2;
374     }
375 }
376 
377 /*****************************************************************************/
378 /* 602 specific instructions */
379 /* mfrom is the most crazy instruction ever seen, imho ! */
380 /* Real implementation uses a ROM table. Do the same */
381 /* Extremely decomposed:
382  *                      -arg / 256
383  * return 256 * log10(10           + 1.0) + 0.5
384  */
385 #if !defined(CONFIG_USER_ONLY)
386 target_ulong helper_602_mfrom(target_ulong arg)
387 {
388     if (likely(arg < 602)) {
389 #include "mfrom_table.c"
390         return mfrom_ROM_table[arg];
391     } else {
392         return 0;
393     }
394 }
395 #endif
396 
397 /*****************************************************************************/
398 /* Altivec extension helpers */
399 #if defined(HOST_WORDS_BIGENDIAN)
400 #define HI_IDX 0
401 #define LO_IDX 1
402 #define AVRB(i) u8[i]
403 #define AVRW(i) u32[i]
404 #else
405 #define HI_IDX 1
406 #define LO_IDX 0
407 #define AVRB(i) u8[15-(i)]
408 #define AVRW(i) u32[3-(i)]
409 #endif
410 
411 #if defined(HOST_WORDS_BIGENDIAN)
412 #define VECTOR_FOR_INORDER_I(index, element)                    \
413     for (index = 0; index < ARRAY_SIZE(r->element); index++)
414 #else
415 #define VECTOR_FOR_INORDER_I(index, element)                    \
416     for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--)
417 #endif
418 
419 /* Saturating arithmetic helpers.  */
420 #define SATCVT(from, to, from_type, to_type, min, max)          \
421     static inline to_type cvt##from##to(from_type x, int *sat)  \
422     {                                                           \
423         to_type r;                                              \
424                                                                 \
425         if (x < (from_type)min) {                               \
426             r = min;                                            \
427             *sat = 1;                                           \
428         } else if (x > (from_type)max) {                        \
429             r = max;                                            \
430             *sat = 1;                                           \
431         } else {                                                \
432             r = x;                                              \
433         }                                                       \
434         return r;                                               \
435     }
436 #define SATCVTU(from, to, from_type, to_type, min, max)         \
437     static inline to_type cvt##from##to(from_type x, int *sat)  \
438     {                                                           \
439         to_type r;                                              \
440                                                                 \
441         if (x > (from_type)max) {                               \
442             r = max;                                            \
443             *sat = 1;                                           \
444         } else {                                                \
445             r = x;                                              \
446         }                                                       \
447         return r;                                               \
448     }
449 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
450 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
451 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
452 
453 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
454 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
455 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
456 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
457 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
458 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
459 #undef SATCVT
460 #undef SATCVTU
461 
462 void helper_lvsl(ppc_avr_t *r, target_ulong sh)
463 {
464     int i, j = (sh & 0xf);
465 
466     VECTOR_FOR_INORDER_I(i, u8) {
467         r->u8[i] = j++;
468     }
469 }
470 
471 void helper_lvsr(ppc_avr_t *r, target_ulong sh)
472 {
473     int i, j = 0x10 - (sh & 0xf);
474 
475     VECTOR_FOR_INORDER_I(i, u8) {
476         r->u8[i] = j++;
477     }
478 }
479 
480 void helper_mtvscr(CPUPPCState *env, ppc_avr_t *r)
481 {
482 #if defined(HOST_WORDS_BIGENDIAN)
483     env->vscr = r->u32[3];
484 #else
485     env->vscr = r->u32[0];
486 #endif
487     set_flush_to_zero(vscr_nj, &env->vec_status);
488 }
489 
490 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
491 {
492     int i;
493 
494     for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
495         r->u32[i] = ~a->u32[i] < b->u32[i];
496     }
497 }
498 
499 /* vprtybw */
500 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b)
501 {
502     int i;
503     for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
504         uint64_t res = b->u32[i] ^ (b->u32[i] >> 16);
505         res ^= res >> 8;
506         r->u32[i] = res & 1;
507     }
508 }
509 
510 /* vprtybd */
511 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b)
512 {
513     int i;
514     for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
515         uint64_t res = b->u64[i] ^ (b->u64[i] >> 32);
516         res ^= res >> 16;
517         res ^= res >> 8;
518         r->u64[i] = res & 1;
519     }
520 }
521 
522 /* vprtybq */
523 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b)
524 {
525     uint64_t res = b->u64[0] ^ b->u64[1];
526     res ^= res >> 32;
527     res ^= res >> 16;
528     res ^= res >> 8;
529     r->u64[LO_IDX] = res & 1;
530     r->u64[HI_IDX] = 0;
531 }
532 
533 #define VARITH_DO(name, op, element)                                    \
534     void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
535     {                                                                   \
536         int i;                                                          \
537                                                                         \
538         for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
539             r->element[i] = a->element[i] op b->element[i];             \
540         }                                                               \
541     }
542 #define VARITH(suffix, element)                 \
543     VARITH_DO(add##suffix, +, element)          \
544     VARITH_DO(sub##suffix, -, element)
545 VARITH(ubm, u8)
546 VARITH(uhm, u16)
547 VARITH(uwm, u32)
548 VARITH(udm, u64)
549 VARITH_DO(muluwm, *, u32)
550 #undef VARITH_DO
551 #undef VARITH
552 
553 #define VARITHFP(suffix, func)                                          \
554     void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
555                           ppc_avr_t *b)                                 \
556     {                                                                   \
557         int i;                                                          \
558                                                                         \
559         for (i = 0; i < ARRAY_SIZE(r->f); i++) {                        \
560             r->f[i] = func(a->f[i], b->f[i], &env->vec_status);         \
561         }                                                               \
562     }
563 VARITHFP(addfp, float32_add)
564 VARITHFP(subfp, float32_sub)
565 VARITHFP(minfp, float32_min)
566 VARITHFP(maxfp, float32_max)
567 #undef VARITHFP
568 
569 #define VARITHFPFMA(suffix, type)                                       \
570     void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
571                            ppc_avr_t *b, ppc_avr_t *c)                  \
572     {                                                                   \
573         int i;                                                          \
574         for (i = 0; i < ARRAY_SIZE(r->f); i++) {                        \
575             r->f[i] = float32_muladd(a->f[i], c->f[i], b->f[i],         \
576                                      type, &env->vec_status);           \
577         }                                                               \
578     }
579 VARITHFPFMA(maddfp, 0);
580 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
581 #undef VARITHFPFMA
582 
583 #define VARITHSAT_CASE(type, op, cvt, element)                          \
584     {                                                                   \
585         type result = (type)a->element[i] op (type)b->element[i];       \
586         r->element[i] = cvt(result, &sat);                              \
587     }
588 
589 #define VARITHSAT_DO(name, op, optype, cvt, element)                    \
590     void helper_v##name(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,   \
591                         ppc_avr_t *b)                                   \
592     {                                                                   \
593         int sat = 0;                                                    \
594         int i;                                                          \
595                                                                         \
596         for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
597             switch (sizeof(r->element[0])) {                            \
598             case 1:                                                     \
599                 VARITHSAT_CASE(optype, op, cvt, element);               \
600                 break;                                                  \
601             case 2:                                                     \
602                 VARITHSAT_CASE(optype, op, cvt, element);               \
603                 break;                                                  \
604             case 4:                                                     \
605                 VARITHSAT_CASE(optype, op, cvt, element);               \
606                 break;                                                  \
607             }                                                           \
608         }                                                               \
609         if (sat) {                                                      \
610             env->vscr |= (1 << VSCR_SAT);                               \
611         }                                                               \
612     }
613 #define VARITHSAT_SIGNED(suffix, element, optype, cvt)          \
614     VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element)      \
615     VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
616 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt)        \
617     VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element)      \
618     VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
619 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
620 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
621 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
622 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
623 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
624 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
625 #undef VARITHSAT_CASE
626 #undef VARITHSAT_DO
627 #undef VARITHSAT_SIGNED
628 #undef VARITHSAT_UNSIGNED
629 
630 #define VAVG_DO(name, element, etype)                                   \
631     void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
632     {                                                                   \
633         int i;                                                          \
634                                                                         \
635         for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
636             etype x = (etype)a->element[i] + (etype)b->element[i] + 1;  \
637             r->element[i] = x >> 1;                                     \
638         }                                                               \
639     }
640 
641 #define VAVG(type, signed_element, signed_type, unsigned_element,       \
642              unsigned_type)                                             \
643     VAVG_DO(avgs##type, signed_element, signed_type)                    \
644     VAVG_DO(avgu##type, unsigned_element, unsigned_type)
645 VAVG(b, s8, int16_t, u8, uint16_t)
646 VAVG(h, s16, int32_t, u16, uint32_t)
647 VAVG(w, s32, int64_t, u32, uint64_t)
648 #undef VAVG_DO
649 #undef VAVG
650 
651 #define VABSDU_DO(name, element)                                        \
652 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)           \
653 {                                                                       \
654     int i;                                                              \
655                                                                         \
656     for (i = 0; i < ARRAY_SIZE(r->element); i++) {                      \
657         r->element[i] = (a->element[i] > b->element[i]) ?               \
658             (a->element[i] - b->element[i]) :                           \
659             (b->element[i] - a->element[i]);                            \
660     }                                                                   \
661 }
662 
663 /* VABSDU - Vector absolute difference unsigned
664  *   name    - instruction mnemonic suffix (b: byte, h: halfword, w: word)
665  *   element - element type to access from vector
666  */
667 #define VABSDU(type, element)                   \
668     VABSDU_DO(absdu##type, element)
669 VABSDU(b, u8)
670 VABSDU(h, u16)
671 VABSDU(w, u32)
672 #undef VABSDU_DO
673 #undef VABSDU
674 
675 #define VCF(suffix, cvt, element)                                       \
676     void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r,             \
677                             ppc_avr_t *b, uint32_t uim)                 \
678     {                                                                   \
679         int i;                                                          \
680                                                                         \
681         for (i = 0; i < ARRAY_SIZE(r->f); i++) {                        \
682             float32 t = cvt(b->element[i], &env->vec_status);           \
683             r->f[i] = float32_scalbn(t, -uim, &env->vec_status);        \
684         }                                                               \
685     }
686 VCF(ux, uint32_to_float32, u32)
687 VCF(sx, int32_to_float32, s32)
688 #undef VCF
689 
690 #define VCMP_DO(suffix, compare, element, record)                       \
691     void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r,            \
692                              ppc_avr_t *a, ppc_avr_t *b)                \
693     {                                                                   \
694         uint64_t ones = (uint64_t)-1;                                   \
695         uint64_t all = ones;                                            \
696         uint64_t none = 0;                                              \
697         int i;                                                          \
698                                                                         \
699         for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
700             uint64_t result = (a->element[i] compare b->element[i] ?    \
701                                ones : 0x0);                             \
702             switch (sizeof(a->element[0])) {                            \
703             case 8:                                                     \
704                 r->u64[i] = result;                                     \
705                 break;                                                  \
706             case 4:                                                     \
707                 r->u32[i] = result;                                     \
708                 break;                                                  \
709             case 2:                                                     \
710                 r->u16[i] = result;                                     \
711                 break;                                                  \
712             case 1:                                                     \
713                 r->u8[i] = result;                                      \
714                 break;                                                  \
715             }                                                           \
716             all &= result;                                              \
717             none |= result;                                             \
718         }                                                               \
719         if (record) {                                                   \
720             env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1);       \
721         }                                                               \
722     }
723 #define VCMP(suffix, compare, element)          \
724     VCMP_DO(suffix, compare, element, 0)        \
725     VCMP_DO(suffix##_dot, compare, element, 1)
726 VCMP(equb, ==, u8)
727 VCMP(equh, ==, u16)
728 VCMP(equw, ==, u32)
729 VCMP(equd, ==, u64)
730 VCMP(gtub, >, u8)
731 VCMP(gtuh, >, u16)
732 VCMP(gtuw, >, u32)
733 VCMP(gtud, >, u64)
734 VCMP(gtsb, >, s8)
735 VCMP(gtsh, >, s16)
736 VCMP(gtsw, >, s32)
737 VCMP(gtsd, >, s64)
738 #undef VCMP_DO
739 #undef VCMP
740 
741 #define VCMPNE_DO(suffix, element, etype, cmpzero, record)              \
742 void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r,              \
743                             ppc_avr_t *a, ppc_avr_t *b)                 \
744 {                                                                       \
745     etype ones = (etype)-1;                                             \
746     etype all = ones;                                                   \
747     etype result, none = 0;                                             \
748     int i;                                                              \
749                                                                         \
750     for (i = 0; i < ARRAY_SIZE(r->element); i++) {                      \
751         if (cmpzero) {                                                  \
752             result = ((a->element[i] == 0)                              \
753                            || (b->element[i] == 0)                      \
754                            || (a->element[i] != b->element[i]) ?        \
755                            ones : 0x0);                                 \
756         } else {                                                        \
757             result = (a->element[i] != b->element[i]) ? ones : 0x0;     \
758         }                                                               \
759         r->element[i] = result;                                         \
760         all &= result;                                                  \
761         none |= result;                                                 \
762     }                                                                   \
763     if (record) {                                                       \
764         env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1);           \
765     }                                                                   \
766 }
767 
768 /* VCMPNEZ - Vector compare not equal to zero
769  *   suffix  - instruction mnemonic suffix (b: byte, h: halfword, w: word)
770  *   element - element type to access from vector
771  */
772 #define VCMPNE(suffix, element, etype, cmpzero)         \
773     VCMPNE_DO(suffix, element, etype, cmpzero, 0)       \
774     VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1)
775 VCMPNE(zb, u8, uint8_t, 1)
776 VCMPNE(zh, u16, uint16_t, 1)
777 VCMPNE(zw, u32, uint32_t, 1)
778 VCMPNE(b, u8, uint8_t, 0)
779 VCMPNE(h, u16, uint16_t, 0)
780 VCMPNE(w, u32, uint32_t, 0)
781 #undef VCMPNE_DO
782 #undef VCMPNE
783 
784 #define VCMPFP_DO(suffix, compare, order, record)                       \
785     void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r,            \
786                              ppc_avr_t *a, ppc_avr_t *b)                \
787     {                                                                   \
788         uint32_t ones = (uint32_t)-1;                                   \
789         uint32_t all = ones;                                            \
790         uint32_t none = 0;                                              \
791         int i;                                                          \
792                                                                         \
793         for (i = 0; i < ARRAY_SIZE(r->f); i++) {                        \
794             uint32_t result;                                            \
795             int rel = float32_compare_quiet(a->f[i], b->f[i],           \
796                                             &env->vec_status);          \
797             if (rel == float_relation_unordered) {                      \
798                 result = 0;                                             \
799             } else if (rel compare order) {                             \
800                 result = ones;                                          \
801             } else {                                                    \
802                 result = 0;                                             \
803             }                                                           \
804             r->u32[i] = result;                                         \
805             all &= result;                                              \
806             none |= result;                                             \
807         }                                                               \
808         if (record) {                                                   \
809             env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1);       \
810         }                                                               \
811     }
812 #define VCMPFP(suffix, compare, order)          \
813     VCMPFP_DO(suffix, compare, order, 0)        \
814     VCMPFP_DO(suffix##_dot, compare, order, 1)
815 VCMPFP(eqfp, ==, float_relation_equal)
816 VCMPFP(gefp, !=, float_relation_less)
817 VCMPFP(gtfp, ==, float_relation_greater)
818 #undef VCMPFP_DO
819 #undef VCMPFP
820 
821 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
822                                     ppc_avr_t *a, ppc_avr_t *b, int record)
823 {
824     int i;
825     int all_in = 0;
826 
827     for (i = 0; i < ARRAY_SIZE(r->f); i++) {
828         int le_rel = float32_compare_quiet(a->f[i], b->f[i], &env->vec_status);
829         if (le_rel == float_relation_unordered) {
830             r->u32[i] = 0xc0000000;
831             all_in = 1;
832         } else {
833             float32 bneg = float32_chs(b->f[i]);
834             int ge_rel = float32_compare_quiet(a->f[i], bneg, &env->vec_status);
835             int le = le_rel != float_relation_greater;
836             int ge = ge_rel != float_relation_less;
837 
838             r->u32[i] = ((!le) << 31) | ((!ge) << 30);
839             all_in |= (!le | !ge);
840         }
841     }
842     if (record) {
843         env->crf[6] = (all_in == 0) << 1;
844     }
845 }
846 
847 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
848 {
849     vcmpbfp_internal(env, r, a, b, 0);
850 }
851 
852 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
853                         ppc_avr_t *b)
854 {
855     vcmpbfp_internal(env, r, a, b, 1);
856 }
857 
858 #define VCT(suffix, satcvt, element)                                    \
859     void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r,             \
860                             ppc_avr_t *b, uint32_t uim)                 \
861     {                                                                   \
862         int i;                                                          \
863         int sat = 0;                                                    \
864         float_status s = env->vec_status;                               \
865                                                                         \
866         set_float_rounding_mode(float_round_to_zero, &s);               \
867         for (i = 0; i < ARRAY_SIZE(r->f); i++) {                        \
868             if (float32_is_any_nan(b->f[i])) {                          \
869                 r->element[i] = 0;                                      \
870             } else {                                                    \
871                 float64 t = float32_to_float64(b->f[i], &s);            \
872                 int64_t j;                                              \
873                                                                         \
874                 t = float64_scalbn(t, uim, &s);                         \
875                 j = float64_to_int64(t, &s);                            \
876                 r->element[i] = satcvt(j, &sat);                        \
877             }                                                           \
878         }                                                               \
879         if (sat) {                                                      \
880             env->vscr |= (1 << VSCR_SAT);                               \
881         }                                                               \
882     }
883 VCT(uxs, cvtsduw, u32)
884 VCT(sxs, cvtsdsw, s32)
885 #undef VCT
886 
887 target_ulong helper_vclzlsbb(ppc_avr_t *r)
888 {
889     target_ulong count = 0;
890     int i;
891     VECTOR_FOR_INORDER_I(i, u8) {
892         if (r->u8[i] & 0x01) {
893             break;
894         }
895         count++;
896     }
897     return count;
898 }
899 
900 target_ulong helper_vctzlsbb(ppc_avr_t *r)
901 {
902     target_ulong count = 0;
903     int i;
904 #if defined(HOST_WORDS_BIGENDIAN)
905     for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
906 #else
907     for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
908 #endif
909         if (r->u8[i] & 0x01) {
910             break;
911         }
912         count++;
913     }
914     return count;
915 }
916 
917 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
918                       ppc_avr_t *b, ppc_avr_t *c)
919 {
920     int sat = 0;
921     int i;
922 
923     for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
924         int32_t prod = a->s16[i] * b->s16[i];
925         int32_t t = (int32_t)c->s16[i] + (prod >> 15);
926 
927         r->s16[i] = cvtswsh(t, &sat);
928     }
929 
930     if (sat) {
931         env->vscr |= (1 << VSCR_SAT);
932     }
933 }
934 
935 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
936                        ppc_avr_t *b, ppc_avr_t *c)
937 {
938     int sat = 0;
939     int i;
940 
941     for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
942         int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
943         int32_t t = (int32_t)c->s16[i] + (prod >> 15);
944         r->s16[i] = cvtswsh(t, &sat);
945     }
946 
947     if (sat) {
948         env->vscr |= (1 << VSCR_SAT);
949     }
950 }
951 
952 #define VMINMAX_DO(name, compare, element)                              \
953     void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
954     {                                                                   \
955         int i;                                                          \
956                                                                         \
957         for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
958             if (a->element[i] compare b->element[i]) {                  \
959                 r->element[i] = b->element[i];                          \
960             } else {                                                    \
961                 r->element[i] = a->element[i];                          \
962             }                                                           \
963         }                                                               \
964     }
965 #define VMINMAX(suffix, element)                \
966     VMINMAX_DO(min##suffix, >, element)         \
967     VMINMAX_DO(max##suffix, <, element)
968 VMINMAX(sb, s8)
969 VMINMAX(sh, s16)
970 VMINMAX(sw, s32)
971 VMINMAX(sd, s64)
972 VMINMAX(ub, u8)
973 VMINMAX(uh, u16)
974 VMINMAX(uw, u32)
975 VMINMAX(ud, u64)
976 #undef VMINMAX_DO
977 #undef VMINMAX
978 
979 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
980 {
981     int i;
982 
983     for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
984         int32_t prod = a->s16[i] * b->s16[i];
985         r->s16[i] = (int16_t) (prod + c->s16[i]);
986     }
987 }
988 
989 #define VMRG_DO(name, element, highp)                                   \
990     void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
991     {                                                                   \
992         ppc_avr_t result;                                               \
993         int i;                                                          \
994         size_t n_elems = ARRAY_SIZE(r->element);                        \
995                                                                         \
996         for (i = 0; i < n_elems / 2; i++) {                             \
997             if (highp) {                                                \
998                 result.element[i*2+HI_IDX] = a->element[i];             \
999                 result.element[i*2+LO_IDX] = b->element[i];             \
1000             } else {                                                    \
1001                 result.element[n_elems - i * 2 - (1 + HI_IDX)] =        \
1002                     b->element[n_elems - i - 1];                        \
1003                 result.element[n_elems - i * 2 - (1 + LO_IDX)] =        \
1004                     a->element[n_elems - i - 1];                        \
1005             }                                                           \
1006         }                                                               \
1007         *r = result;                                                    \
1008     }
1009 #if defined(HOST_WORDS_BIGENDIAN)
1010 #define MRGHI 0
1011 #define MRGLO 1
1012 #else
1013 #define MRGHI 1
1014 #define MRGLO 0
1015 #endif
1016 #define VMRG(suffix, element)                   \
1017     VMRG_DO(mrgl##suffix, element, MRGHI)       \
1018     VMRG_DO(mrgh##suffix, element, MRGLO)
1019 VMRG(b, u8)
1020 VMRG(h, u16)
1021 VMRG(w, u32)
1022 #undef VMRG_DO
1023 #undef VMRG
1024 #undef MRGHI
1025 #undef MRGLO
1026 
1027 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1028                      ppc_avr_t *b, ppc_avr_t *c)
1029 {
1030     int32_t prod[16];
1031     int i;
1032 
1033     for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
1034         prod[i] = (int32_t)a->s8[i] * b->u8[i];
1035     }
1036 
1037     VECTOR_FOR_INORDER_I(i, s32) {
1038         r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
1039             prod[4 * i + 2] + prod[4 * i + 3];
1040     }
1041 }
1042 
1043 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1044                      ppc_avr_t *b, ppc_avr_t *c)
1045 {
1046     int32_t prod[8];
1047     int i;
1048 
1049     for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1050         prod[i] = a->s16[i] * b->s16[i];
1051     }
1052 
1053     VECTOR_FOR_INORDER_I(i, s32) {
1054         r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1055     }
1056 }
1057 
1058 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1059                      ppc_avr_t *b, ppc_avr_t *c)
1060 {
1061     int32_t prod[8];
1062     int i;
1063     int sat = 0;
1064 
1065     for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1066         prod[i] = (int32_t)a->s16[i] * b->s16[i];
1067     }
1068 
1069     VECTOR_FOR_INORDER_I(i, s32) {
1070         int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1071 
1072         r->u32[i] = cvtsdsw(t, &sat);
1073     }
1074 
1075     if (sat) {
1076         env->vscr |= (1 << VSCR_SAT);
1077     }
1078 }
1079 
1080 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1081                      ppc_avr_t *b, ppc_avr_t *c)
1082 {
1083     uint16_t prod[16];
1084     int i;
1085 
1086     for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1087         prod[i] = a->u8[i] * b->u8[i];
1088     }
1089 
1090     VECTOR_FOR_INORDER_I(i, u32) {
1091         r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
1092             prod[4 * i + 2] + prod[4 * i + 3];
1093     }
1094 }
1095 
1096 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1097                      ppc_avr_t *b, ppc_avr_t *c)
1098 {
1099     uint32_t prod[8];
1100     int i;
1101 
1102     for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1103         prod[i] = a->u16[i] * b->u16[i];
1104     }
1105 
1106     VECTOR_FOR_INORDER_I(i, u32) {
1107         r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1108     }
1109 }
1110 
1111 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1112                      ppc_avr_t *b, ppc_avr_t *c)
1113 {
1114     uint32_t prod[8];
1115     int i;
1116     int sat = 0;
1117 
1118     for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1119         prod[i] = a->u16[i] * b->u16[i];
1120     }
1121 
1122     VECTOR_FOR_INORDER_I(i, s32) {
1123         uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1124 
1125         r->u32[i] = cvtuduw(t, &sat);
1126     }
1127 
1128     if (sat) {
1129         env->vscr |= (1 << VSCR_SAT);
1130     }
1131 }
1132 
1133 #define VMUL_DO(name, mul_element, prod_element, cast, evenp)           \
1134     void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
1135     {                                                                   \
1136         int i;                                                          \
1137                                                                         \
1138         VECTOR_FOR_INORDER_I(i, prod_element) {                         \
1139             if (evenp) {                                                \
1140                 r->prod_element[i] =                                    \
1141                     (cast)a->mul_element[i * 2 + HI_IDX] *              \
1142                     (cast)b->mul_element[i * 2 + HI_IDX];               \
1143             } else {                                                    \
1144                 r->prod_element[i] =                                    \
1145                     (cast)a->mul_element[i * 2 + LO_IDX] *              \
1146                     (cast)b->mul_element[i * 2 + LO_IDX];               \
1147             }                                                           \
1148         }                                                               \
1149     }
1150 #define VMUL(suffix, mul_element, prod_element, cast)            \
1151     VMUL_DO(mule##suffix, mul_element, prod_element, cast, 1)    \
1152     VMUL_DO(mulo##suffix, mul_element, prod_element, cast, 0)
1153 VMUL(sb, s8, s16, int16_t)
1154 VMUL(sh, s16, s32, int32_t)
1155 VMUL(sw, s32, s64, int64_t)
1156 VMUL(ub, u8, u16, uint16_t)
1157 VMUL(uh, u16, u32, uint32_t)
1158 VMUL(uw, u32, u64, uint64_t)
1159 #undef VMUL_DO
1160 #undef VMUL
1161 
1162 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1163                   ppc_avr_t *c)
1164 {
1165     ppc_avr_t result;
1166     int i;
1167 
1168     VECTOR_FOR_INORDER_I(i, u8) {
1169         int s = c->u8[i] & 0x1f;
1170 #if defined(HOST_WORDS_BIGENDIAN)
1171         int index = s & 0xf;
1172 #else
1173         int index = 15 - (s & 0xf);
1174 #endif
1175 
1176         if (s & 0x10) {
1177             result.u8[i] = b->u8[index];
1178         } else {
1179             result.u8[i] = a->u8[index];
1180         }
1181     }
1182     *r = result;
1183 }
1184 
1185 void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1186                   ppc_avr_t *c)
1187 {
1188     ppc_avr_t result;
1189     int i;
1190 
1191     VECTOR_FOR_INORDER_I(i, u8) {
1192         int s = c->u8[i] & 0x1f;
1193 #if defined(HOST_WORDS_BIGENDIAN)
1194         int index = 15 - (s & 0xf);
1195 #else
1196         int index = s & 0xf;
1197 #endif
1198 
1199         if (s & 0x10) {
1200             result.u8[i] = a->u8[index];
1201         } else {
1202             result.u8[i] = b->u8[index];
1203         }
1204     }
1205     *r = result;
1206 }
1207 
1208 #if defined(HOST_WORDS_BIGENDIAN)
1209 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1210 #define VBPERMD_INDEX(i) (i)
1211 #define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1212 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1))
1213 #else
1214 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15-(i)])
1215 #define VBPERMD_INDEX(i) (1 - i)
1216 #define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1217 #define EXTRACT_BIT(avr, i, index) \
1218         (extract64((avr)->u64[1 - i], 63 - index, 1))
1219 #endif
1220 
1221 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1222 {
1223     int i, j;
1224     ppc_avr_t result = { .u64 = { 0, 0 } };
1225     VECTOR_FOR_INORDER_I(i, u64) {
1226         for (j = 0; j < 8; j++) {
1227             int index = VBPERMQ_INDEX(b, (i * 8) + j);
1228             if (index < 64 && EXTRACT_BIT(a, i, index)) {
1229                 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j);
1230             }
1231         }
1232     }
1233     *r = result;
1234 }
1235 
1236 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1237 {
1238     int i;
1239     uint64_t perm = 0;
1240 
1241     VECTOR_FOR_INORDER_I(i, u8) {
1242         int index = VBPERMQ_INDEX(b, i);
1243 
1244         if (index < 128) {
1245             uint64_t mask = (1ull << (63-(index & 0x3F)));
1246             if (a->u64[VBPERMQ_DW(index)] & mask) {
1247                 perm |= (0x8000 >> i);
1248             }
1249         }
1250     }
1251 
1252     r->u64[HI_IDX] = perm;
1253     r->u64[LO_IDX] = 0;
1254 }
1255 
1256 #undef VBPERMQ_INDEX
1257 #undef VBPERMQ_DW
1258 
1259 static const uint64_t VGBBD_MASKS[256] = {
1260     0x0000000000000000ull, /* 00 */
1261     0x0000000000000080ull, /* 01 */
1262     0x0000000000008000ull, /* 02 */
1263     0x0000000000008080ull, /* 03 */
1264     0x0000000000800000ull, /* 04 */
1265     0x0000000000800080ull, /* 05 */
1266     0x0000000000808000ull, /* 06 */
1267     0x0000000000808080ull, /* 07 */
1268     0x0000000080000000ull, /* 08 */
1269     0x0000000080000080ull, /* 09 */
1270     0x0000000080008000ull, /* 0A */
1271     0x0000000080008080ull, /* 0B */
1272     0x0000000080800000ull, /* 0C */
1273     0x0000000080800080ull, /* 0D */
1274     0x0000000080808000ull, /* 0E */
1275     0x0000000080808080ull, /* 0F */
1276     0x0000008000000000ull, /* 10 */
1277     0x0000008000000080ull, /* 11 */
1278     0x0000008000008000ull, /* 12 */
1279     0x0000008000008080ull, /* 13 */
1280     0x0000008000800000ull, /* 14 */
1281     0x0000008000800080ull, /* 15 */
1282     0x0000008000808000ull, /* 16 */
1283     0x0000008000808080ull, /* 17 */
1284     0x0000008080000000ull, /* 18 */
1285     0x0000008080000080ull, /* 19 */
1286     0x0000008080008000ull, /* 1A */
1287     0x0000008080008080ull, /* 1B */
1288     0x0000008080800000ull, /* 1C */
1289     0x0000008080800080ull, /* 1D */
1290     0x0000008080808000ull, /* 1E */
1291     0x0000008080808080ull, /* 1F */
1292     0x0000800000000000ull, /* 20 */
1293     0x0000800000000080ull, /* 21 */
1294     0x0000800000008000ull, /* 22 */
1295     0x0000800000008080ull, /* 23 */
1296     0x0000800000800000ull, /* 24 */
1297     0x0000800000800080ull, /* 25 */
1298     0x0000800000808000ull, /* 26 */
1299     0x0000800000808080ull, /* 27 */
1300     0x0000800080000000ull, /* 28 */
1301     0x0000800080000080ull, /* 29 */
1302     0x0000800080008000ull, /* 2A */
1303     0x0000800080008080ull, /* 2B */
1304     0x0000800080800000ull, /* 2C */
1305     0x0000800080800080ull, /* 2D */
1306     0x0000800080808000ull, /* 2E */
1307     0x0000800080808080ull, /* 2F */
1308     0x0000808000000000ull, /* 30 */
1309     0x0000808000000080ull, /* 31 */
1310     0x0000808000008000ull, /* 32 */
1311     0x0000808000008080ull, /* 33 */
1312     0x0000808000800000ull, /* 34 */
1313     0x0000808000800080ull, /* 35 */
1314     0x0000808000808000ull, /* 36 */
1315     0x0000808000808080ull, /* 37 */
1316     0x0000808080000000ull, /* 38 */
1317     0x0000808080000080ull, /* 39 */
1318     0x0000808080008000ull, /* 3A */
1319     0x0000808080008080ull, /* 3B */
1320     0x0000808080800000ull, /* 3C */
1321     0x0000808080800080ull, /* 3D */
1322     0x0000808080808000ull, /* 3E */
1323     0x0000808080808080ull, /* 3F */
1324     0x0080000000000000ull, /* 40 */
1325     0x0080000000000080ull, /* 41 */
1326     0x0080000000008000ull, /* 42 */
1327     0x0080000000008080ull, /* 43 */
1328     0x0080000000800000ull, /* 44 */
1329     0x0080000000800080ull, /* 45 */
1330     0x0080000000808000ull, /* 46 */
1331     0x0080000000808080ull, /* 47 */
1332     0x0080000080000000ull, /* 48 */
1333     0x0080000080000080ull, /* 49 */
1334     0x0080000080008000ull, /* 4A */
1335     0x0080000080008080ull, /* 4B */
1336     0x0080000080800000ull, /* 4C */
1337     0x0080000080800080ull, /* 4D */
1338     0x0080000080808000ull, /* 4E */
1339     0x0080000080808080ull, /* 4F */
1340     0x0080008000000000ull, /* 50 */
1341     0x0080008000000080ull, /* 51 */
1342     0x0080008000008000ull, /* 52 */
1343     0x0080008000008080ull, /* 53 */
1344     0x0080008000800000ull, /* 54 */
1345     0x0080008000800080ull, /* 55 */
1346     0x0080008000808000ull, /* 56 */
1347     0x0080008000808080ull, /* 57 */
1348     0x0080008080000000ull, /* 58 */
1349     0x0080008080000080ull, /* 59 */
1350     0x0080008080008000ull, /* 5A */
1351     0x0080008080008080ull, /* 5B */
1352     0x0080008080800000ull, /* 5C */
1353     0x0080008080800080ull, /* 5D */
1354     0x0080008080808000ull, /* 5E */
1355     0x0080008080808080ull, /* 5F */
1356     0x0080800000000000ull, /* 60 */
1357     0x0080800000000080ull, /* 61 */
1358     0x0080800000008000ull, /* 62 */
1359     0x0080800000008080ull, /* 63 */
1360     0x0080800000800000ull, /* 64 */
1361     0x0080800000800080ull, /* 65 */
1362     0x0080800000808000ull, /* 66 */
1363     0x0080800000808080ull, /* 67 */
1364     0x0080800080000000ull, /* 68 */
1365     0x0080800080000080ull, /* 69 */
1366     0x0080800080008000ull, /* 6A */
1367     0x0080800080008080ull, /* 6B */
1368     0x0080800080800000ull, /* 6C */
1369     0x0080800080800080ull, /* 6D */
1370     0x0080800080808000ull, /* 6E */
1371     0x0080800080808080ull, /* 6F */
1372     0x0080808000000000ull, /* 70 */
1373     0x0080808000000080ull, /* 71 */
1374     0x0080808000008000ull, /* 72 */
1375     0x0080808000008080ull, /* 73 */
1376     0x0080808000800000ull, /* 74 */
1377     0x0080808000800080ull, /* 75 */
1378     0x0080808000808000ull, /* 76 */
1379     0x0080808000808080ull, /* 77 */
1380     0x0080808080000000ull, /* 78 */
1381     0x0080808080000080ull, /* 79 */
1382     0x0080808080008000ull, /* 7A */
1383     0x0080808080008080ull, /* 7B */
1384     0x0080808080800000ull, /* 7C */
1385     0x0080808080800080ull, /* 7D */
1386     0x0080808080808000ull, /* 7E */
1387     0x0080808080808080ull, /* 7F */
1388     0x8000000000000000ull, /* 80 */
1389     0x8000000000000080ull, /* 81 */
1390     0x8000000000008000ull, /* 82 */
1391     0x8000000000008080ull, /* 83 */
1392     0x8000000000800000ull, /* 84 */
1393     0x8000000000800080ull, /* 85 */
1394     0x8000000000808000ull, /* 86 */
1395     0x8000000000808080ull, /* 87 */
1396     0x8000000080000000ull, /* 88 */
1397     0x8000000080000080ull, /* 89 */
1398     0x8000000080008000ull, /* 8A */
1399     0x8000000080008080ull, /* 8B */
1400     0x8000000080800000ull, /* 8C */
1401     0x8000000080800080ull, /* 8D */
1402     0x8000000080808000ull, /* 8E */
1403     0x8000000080808080ull, /* 8F */
1404     0x8000008000000000ull, /* 90 */
1405     0x8000008000000080ull, /* 91 */
1406     0x8000008000008000ull, /* 92 */
1407     0x8000008000008080ull, /* 93 */
1408     0x8000008000800000ull, /* 94 */
1409     0x8000008000800080ull, /* 95 */
1410     0x8000008000808000ull, /* 96 */
1411     0x8000008000808080ull, /* 97 */
1412     0x8000008080000000ull, /* 98 */
1413     0x8000008080000080ull, /* 99 */
1414     0x8000008080008000ull, /* 9A */
1415     0x8000008080008080ull, /* 9B */
1416     0x8000008080800000ull, /* 9C */
1417     0x8000008080800080ull, /* 9D */
1418     0x8000008080808000ull, /* 9E */
1419     0x8000008080808080ull, /* 9F */
1420     0x8000800000000000ull, /* A0 */
1421     0x8000800000000080ull, /* A1 */
1422     0x8000800000008000ull, /* A2 */
1423     0x8000800000008080ull, /* A3 */
1424     0x8000800000800000ull, /* A4 */
1425     0x8000800000800080ull, /* A5 */
1426     0x8000800000808000ull, /* A6 */
1427     0x8000800000808080ull, /* A7 */
1428     0x8000800080000000ull, /* A8 */
1429     0x8000800080000080ull, /* A9 */
1430     0x8000800080008000ull, /* AA */
1431     0x8000800080008080ull, /* AB */
1432     0x8000800080800000ull, /* AC */
1433     0x8000800080800080ull, /* AD */
1434     0x8000800080808000ull, /* AE */
1435     0x8000800080808080ull, /* AF */
1436     0x8000808000000000ull, /* B0 */
1437     0x8000808000000080ull, /* B1 */
1438     0x8000808000008000ull, /* B2 */
1439     0x8000808000008080ull, /* B3 */
1440     0x8000808000800000ull, /* B4 */
1441     0x8000808000800080ull, /* B5 */
1442     0x8000808000808000ull, /* B6 */
1443     0x8000808000808080ull, /* B7 */
1444     0x8000808080000000ull, /* B8 */
1445     0x8000808080000080ull, /* B9 */
1446     0x8000808080008000ull, /* BA */
1447     0x8000808080008080ull, /* BB */
1448     0x8000808080800000ull, /* BC */
1449     0x8000808080800080ull, /* BD */
1450     0x8000808080808000ull, /* BE */
1451     0x8000808080808080ull, /* BF */
1452     0x8080000000000000ull, /* C0 */
1453     0x8080000000000080ull, /* C1 */
1454     0x8080000000008000ull, /* C2 */
1455     0x8080000000008080ull, /* C3 */
1456     0x8080000000800000ull, /* C4 */
1457     0x8080000000800080ull, /* C5 */
1458     0x8080000000808000ull, /* C6 */
1459     0x8080000000808080ull, /* C7 */
1460     0x8080000080000000ull, /* C8 */
1461     0x8080000080000080ull, /* C9 */
1462     0x8080000080008000ull, /* CA */
1463     0x8080000080008080ull, /* CB */
1464     0x8080000080800000ull, /* CC */
1465     0x8080000080800080ull, /* CD */
1466     0x8080000080808000ull, /* CE */
1467     0x8080000080808080ull, /* CF */
1468     0x8080008000000000ull, /* D0 */
1469     0x8080008000000080ull, /* D1 */
1470     0x8080008000008000ull, /* D2 */
1471     0x8080008000008080ull, /* D3 */
1472     0x8080008000800000ull, /* D4 */
1473     0x8080008000800080ull, /* D5 */
1474     0x8080008000808000ull, /* D6 */
1475     0x8080008000808080ull, /* D7 */
1476     0x8080008080000000ull, /* D8 */
1477     0x8080008080000080ull, /* D9 */
1478     0x8080008080008000ull, /* DA */
1479     0x8080008080008080ull, /* DB */
1480     0x8080008080800000ull, /* DC */
1481     0x8080008080800080ull, /* DD */
1482     0x8080008080808000ull, /* DE */
1483     0x8080008080808080ull, /* DF */
1484     0x8080800000000000ull, /* E0 */
1485     0x8080800000000080ull, /* E1 */
1486     0x8080800000008000ull, /* E2 */
1487     0x8080800000008080ull, /* E3 */
1488     0x8080800000800000ull, /* E4 */
1489     0x8080800000800080ull, /* E5 */
1490     0x8080800000808000ull, /* E6 */
1491     0x8080800000808080ull, /* E7 */
1492     0x8080800080000000ull, /* E8 */
1493     0x8080800080000080ull, /* E9 */
1494     0x8080800080008000ull, /* EA */
1495     0x8080800080008080ull, /* EB */
1496     0x8080800080800000ull, /* EC */
1497     0x8080800080800080ull, /* ED */
1498     0x8080800080808000ull, /* EE */
1499     0x8080800080808080ull, /* EF */
1500     0x8080808000000000ull, /* F0 */
1501     0x8080808000000080ull, /* F1 */
1502     0x8080808000008000ull, /* F2 */
1503     0x8080808000008080ull, /* F3 */
1504     0x8080808000800000ull, /* F4 */
1505     0x8080808000800080ull, /* F5 */
1506     0x8080808000808000ull, /* F6 */
1507     0x8080808000808080ull, /* F7 */
1508     0x8080808080000000ull, /* F8 */
1509     0x8080808080000080ull, /* F9 */
1510     0x8080808080008000ull, /* FA */
1511     0x8080808080008080ull, /* FB */
1512     0x8080808080800000ull, /* FC */
1513     0x8080808080800080ull, /* FD */
1514     0x8080808080808000ull, /* FE */
1515     0x8080808080808080ull, /* FF */
1516 };
1517 
1518 void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b)
1519 {
1520     int i;
1521     uint64_t t[2] = { 0, 0 };
1522 
1523     VECTOR_FOR_INORDER_I(i, u8) {
1524 #if defined(HOST_WORDS_BIGENDIAN)
1525         t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (i & 7);
1526 #else
1527         t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (7-(i & 7));
1528 #endif
1529     }
1530 
1531     r->u64[0] = t[0];
1532     r->u64[1] = t[1];
1533 }
1534 
1535 #define PMSUM(name, srcfld, trgfld, trgtyp)                   \
1536 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)  \
1537 {                                                             \
1538     int i, j;                                                 \
1539     trgtyp prod[sizeof(ppc_avr_t)/sizeof(a->srcfld[0])];      \
1540                                                               \
1541     VECTOR_FOR_INORDER_I(i, srcfld) {                         \
1542         prod[i] = 0;                                          \
1543         for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) {      \
1544             if (a->srcfld[i] & (1ull<<j)) {                   \
1545                 prod[i] ^= ((trgtyp)b->srcfld[i] << j);       \
1546             }                                                 \
1547         }                                                     \
1548     }                                                         \
1549                                                               \
1550     VECTOR_FOR_INORDER_I(i, trgfld) {                         \
1551         r->trgfld[i] = prod[2*i] ^ prod[2*i+1];               \
1552     }                                                         \
1553 }
1554 
1555 PMSUM(vpmsumb, u8, u16, uint16_t)
1556 PMSUM(vpmsumh, u16, u32, uint32_t)
1557 PMSUM(vpmsumw, u32, u64, uint64_t)
1558 
1559 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1560 {
1561 
1562 #ifdef CONFIG_INT128
1563     int i, j;
1564     __uint128_t prod[2];
1565 
1566     VECTOR_FOR_INORDER_I(i, u64) {
1567         prod[i] = 0;
1568         for (j = 0; j < 64; j++) {
1569             if (a->u64[i] & (1ull<<j)) {
1570                 prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1571             }
1572         }
1573     }
1574 
1575     r->u128 = prod[0] ^ prod[1];
1576 
1577 #else
1578     int i, j;
1579     ppc_avr_t prod[2];
1580 
1581     VECTOR_FOR_INORDER_I(i, u64) {
1582         prod[i].u64[LO_IDX] = prod[i].u64[HI_IDX] = 0;
1583         for (j = 0; j < 64; j++) {
1584             if (a->u64[i] & (1ull<<j)) {
1585                 ppc_avr_t bshift;
1586                 if (j == 0) {
1587                     bshift.u64[HI_IDX] = 0;
1588                     bshift.u64[LO_IDX] = b->u64[i];
1589                 } else {
1590                     bshift.u64[HI_IDX] = b->u64[i] >> (64-j);
1591                     bshift.u64[LO_IDX] = b->u64[i] << j;
1592                 }
1593                 prod[i].u64[LO_IDX] ^= bshift.u64[LO_IDX];
1594                 prod[i].u64[HI_IDX] ^= bshift.u64[HI_IDX];
1595             }
1596         }
1597     }
1598 
1599     r->u64[LO_IDX] = prod[0].u64[LO_IDX] ^ prod[1].u64[LO_IDX];
1600     r->u64[HI_IDX] = prod[0].u64[HI_IDX] ^ prod[1].u64[HI_IDX];
1601 #endif
1602 }
1603 
1604 
1605 #if defined(HOST_WORDS_BIGENDIAN)
1606 #define PKBIG 1
1607 #else
1608 #define PKBIG 0
1609 #endif
1610 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1611 {
1612     int i, j;
1613     ppc_avr_t result;
1614 #if defined(HOST_WORDS_BIGENDIAN)
1615     const ppc_avr_t *x[2] = { a, b };
1616 #else
1617     const ppc_avr_t *x[2] = { b, a };
1618 #endif
1619 
1620     VECTOR_FOR_INORDER_I(i, u64) {
1621         VECTOR_FOR_INORDER_I(j, u32) {
1622             uint32_t e = x[i]->u32[j];
1623 
1624             result.u16[4*i+j] = (((e >> 9) & 0xfc00) |
1625                                  ((e >> 6) & 0x3e0) |
1626                                  ((e >> 3) & 0x1f));
1627         }
1628     }
1629     *r = result;
1630 }
1631 
1632 #define VPK(suffix, from, to, cvt, dosat)                               \
1633     void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r,             \
1634                             ppc_avr_t *a, ppc_avr_t *b)                 \
1635     {                                                                   \
1636         int i;                                                          \
1637         int sat = 0;                                                    \
1638         ppc_avr_t result;                                               \
1639         ppc_avr_t *a0 = PKBIG ? a : b;                                  \
1640         ppc_avr_t *a1 = PKBIG ? b : a;                                  \
1641                                                                         \
1642         VECTOR_FOR_INORDER_I(i, from) {                                 \
1643             result.to[i] = cvt(a0->from[i], &sat);                      \
1644             result.to[i+ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);  \
1645         }                                                               \
1646         *r = result;                                                    \
1647         if (dosat && sat) {                                             \
1648             env->vscr |= (1 << VSCR_SAT);                               \
1649         }                                                               \
1650     }
1651 #define I(x, y) (x)
1652 VPK(shss, s16, s8, cvtshsb, 1)
1653 VPK(shus, s16, u8, cvtshub, 1)
1654 VPK(swss, s32, s16, cvtswsh, 1)
1655 VPK(swus, s32, u16, cvtswuh, 1)
1656 VPK(sdss, s64, s32, cvtsdsw, 1)
1657 VPK(sdus, s64, u32, cvtsduw, 1)
1658 VPK(uhus, u16, u8, cvtuhub, 1)
1659 VPK(uwus, u32, u16, cvtuwuh, 1)
1660 VPK(udus, u64, u32, cvtuduw, 1)
1661 VPK(uhum, u16, u8, I, 0)
1662 VPK(uwum, u32, u16, I, 0)
1663 VPK(udum, u64, u32, I, 0)
1664 #undef I
1665 #undef VPK
1666 #undef PKBIG
1667 
1668 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1669 {
1670     int i;
1671 
1672     for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1673         r->f[i] = float32_div(float32_one, b->f[i], &env->vec_status);
1674     }
1675 }
1676 
1677 #define VRFI(suffix, rounding)                                  \
1678     void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r,    \
1679                              ppc_avr_t *b)                      \
1680     {                                                           \
1681         int i;                                                  \
1682         float_status s = env->vec_status;                       \
1683                                                                 \
1684         set_float_rounding_mode(rounding, &s);                  \
1685         for (i = 0; i < ARRAY_SIZE(r->f); i++) {                \
1686             r->f[i] = float32_round_to_int (b->f[i], &s);       \
1687         }                                                       \
1688     }
1689 VRFI(n, float_round_nearest_even)
1690 VRFI(m, float_round_down)
1691 VRFI(p, float_round_up)
1692 VRFI(z, float_round_to_zero)
1693 #undef VRFI
1694 
1695 #define VROTATE(suffix, element, mask)                                  \
1696     void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)   \
1697     {                                                                   \
1698         int i;                                                          \
1699                                                                         \
1700         for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
1701             unsigned int shift = b->element[i] & mask;                  \
1702             r->element[i] = (a->element[i] << shift) |                  \
1703                 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
1704         }                                                               \
1705     }
1706 VROTATE(b, u8, 0x7)
1707 VROTATE(h, u16, 0xF)
1708 VROTATE(w, u32, 0x1F)
1709 VROTATE(d, u64, 0x3F)
1710 #undef VROTATE
1711 
1712 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1713 {
1714     int i;
1715 
1716     for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1717         float32 t = float32_sqrt(b->f[i], &env->vec_status);
1718 
1719         r->f[i] = float32_div(float32_one, t, &env->vec_status);
1720     }
1721 }
1722 
1723 #define VRLMI(name, size, element, insert)                            \
1724 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)          \
1725 {                                                                     \
1726     int i;                                                            \
1727     for (i = 0; i < ARRAY_SIZE(r->element); i++) {                    \
1728         uint##size##_t src1 = a->element[i];                          \
1729         uint##size##_t src2 = b->element[i];                          \
1730         uint##size##_t src3 = r->element[i];                          \
1731         uint##size##_t begin, end, shift, mask, rot_val;              \
1732                                                                       \
1733         shift = extract##size(src2, 0, 6);                            \
1734         end   = extract##size(src2, 8, 6);                            \
1735         begin = extract##size(src2, 16, 6);                           \
1736         rot_val = rol##size(src1, shift);                             \
1737         mask = mask_u##size(begin, end);                              \
1738         if (insert) {                                                 \
1739             r->element[i] = (rot_val & mask) | (src3 & ~mask);        \
1740         } else {                                                      \
1741             r->element[i] = (rot_val & mask);                         \
1742         }                                                             \
1743     }                                                                 \
1744 }
1745 
1746 VRLMI(vrldmi, 64, u64, 1);
1747 VRLMI(vrlwmi, 32, u32, 1);
1748 VRLMI(vrldnm, 64, u64, 0);
1749 VRLMI(vrlwnm, 32, u32, 0);
1750 
1751 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1752                  ppc_avr_t *c)
1753 {
1754     r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1755     r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1756 }
1757 
1758 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1759 {
1760     int i;
1761 
1762     for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1763         r->f[i] = float32_exp2(b->f[i], &env->vec_status);
1764     }
1765 }
1766 
1767 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1768 {
1769     int i;
1770 
1771     for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1772         r->f[i] = float32_log2(b->f[i], &env->vec_status);
1773     }
1774 }
1775 
1776 /* The specification says that the results are undefined if all of the
1777  * shift counts are not identical.  We check to make sure that they are
1778  * to conform to what real hardware appears to do.  */
1779 #define VSHIFT(suffix, leftp)                                           \
1780     void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)    \
1781     {                                                                   \
1782         int shift = b->u8[LO_IDX*15] & 0x7;                             \
1783         int doit = 1;                                                   \
1784         int i;                                                          \
1785                                                                         \
1786         for (i = 0; i < ARRAY_SIZE(r->u8); i++) {                       \
1787             doit = doit && ((b->u8[i] & 0x7) == shift);                 \
1788         }                                                               \
1789         if (doit) {                                                     \
1790             if (shift == 0) {                                           \
1791                 *r = *a;                                                \
1792             } else if (leftp) {                                         \
1793                 uint64_t carry = a->u64[LO_IDX] >> (64 - shift);        \
1794                                                                         \
1795                 r->u64[HI_IDX] = (a->u64[HI_IDX] << shift) | carry;     \
1796                 r->u64[LO_IDX] = a->u64[LO_IDX] << shift;               \
1797             } else {                                                    \
1798                 uint64_t carry = a->u64[HI_IDX] << (64 - shift);        \
1799                                                                         \
1800                 r->u64[LO_IDX] = (a->u64[LO_IDX] >> shift) | carry;     \
1801                 r->u64[HI_IDX] = a->u64[HI_IDX] >> shift;               \
1802             }                                                           \
1803         }                                                               \
1804     }
1805 VSHIFT(l, 1)
1806 VSHIFT(r, 0)
1807 #undef VSHIFT
1808 
1809 #define VSL(suffix, element, mask)                                      \
1810     void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)   \
1811     {                                                                   \
1812         int i;                                                          \
1813                                                                         \
1814         for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
1815             unsigned int shift = b->element[i] & mask;                  \
1816                                                                         \
1817             r->element[i] = a->element[i] << shift;                     \
1818         }                                                               \
1819     }
1820 VSL(b, u8, 0x7)
1821 VSL(h, u16, 0x0F)
1822 VSL(w, u32, 0x1F)
1823 VSL(d, u64, 0x3F)
1824 #undef VSL
1825 
1826 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1827 {
1828     int i;
1829     unsigned int shift, bytes, size;
1830 
1831     size = ARRAY_SIZE(r->u8);
1832     for (i = 0; i < size; i++) {
1833         shift = b->u8[i] & 0x7;             /* extract shift value */
1834         bytes = (a->u8[i] << 8) +             /* extract adjacent bytes */
1835             (((i + 1) < size) ? a->u8[i + 1] : 0);
1836         r->u8[i] = (bytes << shift) >> 8;   /* shift and store result */
1837     }
1838 }
1839 
1840 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1841 {
1842     int i;
1843     unsigned int shift, bytes;
1844 
1845     /* Use reverse order, as destination and source register can be same. Its
1846      * being modified in place saving temporary, reverse order will guarantee
1847      * that computed result is not fed back.
1848      */
1849     for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
1850         shift = b->u8[i] & 0x7;                 /* extract shift value */
1851         bytes = ((i ? a->u8[i - 1] : 0) << 8) + a->u8[i];
1852                                                 /* extract adjacent bytes */
1853         r->u8[i] = (bytes >> shift) & 0xFF;     /* shift and store result */
1854     }
1855 }
1856 
1857 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1858 {
1859     int sh = shift & 0xf;
1860     int i;
1861     ppc_avr_t result;
1862 
1863 #if defined(HOST_WORDS_BIGENDIAN)
1864     for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1865         int index = sh + i;
1866         if (index > 0xf) {
1867             result.u8[i] = b->u8[index - 0x10];
1868         } else {
1869             result.u8[i] = a->u8[index];
1870         }
1871     }
1872 #else
1873     for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1874         int index = (16 - sh) + i;
1875         if (index > 0xf) {
1876             result.u8[i] = a->u8[index - 0x10];
1877         } else {
1878             result.u8[i] = b->u8[index];
1879         }
1880     }
1881 #endif
1882     *r = result;
1883 }
1884 
1885 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1886 {
1887     int sh = (b->u8[LO_IDX*0xf] >> 3) & 0xf;
1888 
1889 #if defined(HOST_WORDS_BIGENDIAN)
1890     memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1891     memset(&r->u8[16-sh], 0, sh);
1892 #else
1893     memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1894     memset(&r->u8[0], 0, sh);
1895 #endif
1896 }
1897 
1898 /* Experimental testing shows that hardware masks the immediate.  */
1899 #define _SPLAT_MASKED(element) (splat & (ARRAY_SIZE(r->element) - 1))
1900 #if defined(HOST_WORDS_BIGENDIAN)
1901 #define SPLAT_ELEMENT(element) _SPLAT_MASKED(element)
1902 #else
1903 #define SPLAT_ELEMENT(element)                                  \
1904     (ARRAY_SIZE(r->element) - 1 - _SPLAT_MASKED(element))
1905 #endif
1906 #define VSPLT(suffix, element)                                          \
1907     void helper_vsplt##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t splat) \
1908     {                                                                   \
1909         uint32_t s = b->element[SPLAT_ELEMENT(element)];                \
1910         int i;                                                          \
1911                                                                         \
1912         for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
1913             r->element[i] = s;                                          \
1914         }                                                               \
1915     }
1916 VSPLT(b, u8)
1917 VSPLT(h, u16)
1918 VSPLT(w, u32)
1919 #undef VSPLT
1920 #undef SPLAT_ELEMENT
1921 #undef _SPLAT_MASKED
1922 #if defined(HOST_WORDS_BIGENDIAN)
1923 #define VINSERT(suffix, element)                                            \
1924     void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1925     {                                                                       \
1926         memmove(&r->u8[index], &b->u8[8 - sizeof(r->element)],              \
1927                sizeof(r->element[0]));                                      \
1928     }
1929 #else
1930 #define VINSERT(suffix, element)                                            \
1931     void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1932     {                                                                       \
1933         uint32_t d = (16 - index) - sizeof(r->element[0]);                  \
1934         memmove(&r->u8[d], &b->u8[8], sizeof(r->element[0]));               \
1935     }
1936 #endif
1937 VINSERT(b, u8)
1938 VINSERT(h, u16)
1939 VINSERT(w, u32)
1940 VINSERT(d, u64)
1941 #undef VINSERT
1942 #if defined(HOST_WORDS_BIGENDIAN)
1943 #define VEXTRACT(suffix, element)                                            \
1944     void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1945     {                                                                        \
1946         uint32_t es = sizeof(r->element[0]);                                 \
1947         memmove(&r->u8[8 - es], &b->u8[index], es);                          \
1948         memset(&r->u8[8], 0, 8);                                             \
1949         memset(&r->u8[0], 0, 8 - es);                                        \
1950     }
1951 #else
1952 #define VEXTRACT(suffix, element)                                            \
1953     void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1954     {                                                                        \
1955         uint32_t es = sizeof(r->element[0]);                                 \
1956         uint32_t s = (16 - index) - es;                                      \
1957         memmove(&r->u8[8], &b->u8[s], es);                                   \
1958         memset(&r->u8[0], 0, 8);                                             \
1959         memset(&r->u8[8 + es], 0, 8 - es);                                   \
1960     }
1961 #endif
1962 VEXTRACT(ub, u8)
1963 VEXTRACT(uh, u16)
1964 VEXTRACT(uw, u32)
1965 VEXTRACT(d, u64)
1966 #undef VEXTRACT
1967 
1968 #define VEXT_SIGNED(name, element, mask, cast, recast)              \
1969 void helper_##name(ppc_avr_t *r, ppc_avr_t *b)                      \
1970 {                                                                   \
1971     int i;                                                          \
1972     VECTOR_FOR_INORDER_I(i, element) {                              \
1973         r->element[i] = (recast)((cast)(b->element[i] & mask));     \
1974     }                                                               \
1975 }
1976 VEXT_SIGNED(vextsb2w, s32, UINT8_MAX, int8_t, int32_t)
1977 VEXT_SIGNED(vextsb2d, s64, UINT8_MAX, int8_t, int64_t)
1978 VEXT_SIGNED(vextsh2w, s32, UINT16_MAX, int16_t, int32_t)
1979 VEXT_SIGNED(vextsh2d, s64, UINT16_MAX, int16_t, int64_t)
1980 VEXT_SIGNED(vextsw2d, s64, UINT32_MAX, int32_t, int64_t)
1981 #undef VEXT_SIGNED
1982 
1983 #define VNEG(name, element)                                         \
1984 void helper_##name(ppc_avr_t *r, ppc_avr_t *b)                      \
1985 {                                                                   \
1986     int i;                                                          \
1987     VECTOR_FOR_INORDER_I(i, element) {                              \
1988         r->element[i] = -b->element[i];                             \
1989     }                                                               \
1990 }
1991 VNEG(vnegw, s32)
1992 VNEG(vnegd, s64)
1993 #undef VNEG
1994 
1995 #define VSPLTI(suffix, element, splat_type)                     \
1996     void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat)   \
1997     {                                                           \
1998         splat_type x = (int8_t)(splat << 3) >> 3;               \
1999         int i;                                                  \
2000                                                                 \
2001         for (i = 0; i < ARRAY_SIZE(r->element); i++) {          \
2002             r->element[i] = x;                                  \
2003         }                                                       \
2004     }
2005 VSPLTI(b, s8, int8_t)
2006 VSPLTI(h, s16, int16_t)
2007 VSPLTI(w, s32, int32_t)
2008 #undef VSPLTI
2009 
2010 #define VSR(suffix, element, mask)                                      \
2011     void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)   \
2012     {                                                                   \
2013         int i;                                                          \
2014                                                                         \
2015         for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
2016             unsigned int shift = b->element[i] & mask;                  \
2017             r->element[i] = a->element[i] >> shift;                     \
2018         }                                                               \
2019     }
2020 VSR(ab, s8, 0x7)
2021 VSR(ah, s16, 0xF)
2022 VSR(aw, s32, 0x1F)
2023 VSR(ad, s64, 0x3F)
2024 VSR(b, u8, 0x7)
2025 VSR(h, u16, 0xF)
2026 VSR(w, u32, 0x1F)
2027 VSR(d, u64, 0x3F)
2028 #undef VSR
2029 
2030 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2031 {
2032     int sh = (b->u8[LO_IDX * 0xf] >> 3) & 0xf;
2033 
2034 #if defined(HOST_WORDS_BIGENDIAN)
2035     memmove(&r->u8[sh], &a->u8[0], 16 - sh);
2036     memset(&r->u8[0], 0, sh);
2037 #else
2038     memmove(&r->u8[0], &a->u8[sh], 16 - sh);
2039     memset(&r->u8[16 - sh], 0, sh);
2040 #endif
2041 }
2042 
2043 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2044 {
2045     int i;
2046 
2047     for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2048         r->u32[i] = a->u32[i] >= b->u32[i];
2049     }
2050 }
2051 
2052 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2053 {
2054     int64_t t;
2055     int i, upper;
2056     ppc_avr_t result;
2057     int sat = 0;
2058 
2059 #if defined(HOST_WORDS_BIGENDIAN)
2060     upper = ARRAY_SIZE(r->s32)-1;
2061 #else
2062     upper = 0;
2063 #endif
2064     t = (int64_t)b->s32[upper];
2065     for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2066         t += a->s32[i];
2067         result.s32[i] = 0;
2068     }
2069     result.s32[upper] = cvtsdsw(t, &sat);
2070     *r = result;
2071 
2072     if (sat) {
2073         env->vscr |= (1 << VSCR_SAT);
2074     }
2075 }
2076 
2077 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2078 {
2079     int i, j, upper;
2080     ppc_avr_t result;
2081     int sat = 0;
2082 
2083 #if defined(HOST_WORDS_BIGENDIAN)
2084     upper = 1;
2085 #else
2086     upper = 0;
2087 #endif
2088     for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
2089         int64_t t = (int64_t)b->s32[upper + i * 2];
2090 
2091         result.u64[i] = 0;
2092         for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
2093             t += a->s32[2 * i + j];
2094         }
2095         result.s32[upper + i * 2] = cvtsdsw(t, &sat);
2096     }
2097 
2098     *r = result;
2099     if (sat) {
2100         env->vscr |= (1 << VSCR_SAT);
2101     }
2102 }
2103 
2104 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2105 {
2106     int i, j;
2107     int sat = 0;
2108 
2109     for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2110         int64_t t = (int64_t)b->s32[i];
2111 
2112         for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
2113             t += a->s8[4 * i + j];
2114         }
2115         r->s32[i] = cvtsdsw(t, &sat);
2116     }
2117 
2118     if (sat) {
2119         env->vscr |= (1 << VSCR_SAT);
2120     }
2121 }
2122 
2123 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2124 {
2125     int sat = 0;
2126     int i;
2127 
2128     for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2129         int64_t t = (int64_t)b->s32[i];
2130 
2131         t += a->s16[2 * i] + a->s16[2 * i + 1];
2132         r->s32[i] = cvtsdsw(t, &sat);
2133     }
2134 
2135     if (sat) {
2136         env->vscr |= (1 << VSCR_SAT);
2137     }
2138 }
2139 
2140 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2141 {
2142     int i, j;
2143     int sat = 0;
2144 
2145     for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2146         uint64_t t = (uint64_t)b->u32[i];
2147 
2148         for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
2149             t += a->u8[4 * i + j];
2150         }
2151         r->u32[i] = cvtuduw(t, &sat);
2152     }
2153 
2154     if (sat) {
2155         env->vscr |= (1 << VSCR_SAT);
2156     }
2157 }
2158 
2159 #if defined(HOST_WORDS_BIGENDIAN)
2160 #define UPKHI 1
2161 #define UPKLO 0
2162 #else
2163 #define UPKHI 0
2164 #define UPKLO 1
2165 #endif
2166 #define VUPKPX(suffix, hi)                                              \
2167     void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b)                \
2168     {                                                                   \
2169         int i;                                                          \
2170         ppc_avr_t result;                                               \
2171                                                                         \
2172         for (i = 0; i < ARRAY_SIZE(r->u32); i++) {                      \
2173             uint16_t e = b->u16[hi ? i : i+4];                          \
2174             uint8_t a = (e >> 15) ? 0xff : 0;                           \
2175             uint8_t r = (e >> 10) & 0x1f;                               \
2176             uint8_t g = (e >> 5) & 0x1f;                                \
2177             uint8_t b = e & 0x1f;                                       \
2178                                                                         \
2179             result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b;       \
2180         }                                                               \
2181         *r = result;                                                    \
2182     }
2183 VUPKPX(lpx, UPKLO)
2184 VUPKPX(hpx, UPKHI)
2185 #undef VUPKPX
2186 
2187 #define VUPK(suffix, unpacked, packee, hi)                              \
2188     void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b)                \
2189     {                                                                   \
2190         int i;                                                          \
2191         ppc_avr_t result;                                               \
2192                                                                         \
2193         if (hi) {                                                       \
2194             for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) {             \
2195                 result.unpacked[i] = b->packee[i];                      \
2196             }                                                           \
2197         } else {                                                        \
2198             for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
2199                  i++) {                                                 \
2200                 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
2201             }                                                           \
2202         }                                                               \
2203         *r = result;                                                    \
2204     }
2205 VUPK(hsb, s16, s8, UPKHI)
2206 VUPK(hsh, s32, s16, UPKHI)
2207 VUPK(hsw, s64, s32, UPKHI)
2208 VUPK(lsb, s16, s8, UPKLO)
2209 VUPK(lsh, s32, s16, UPKLO)
2210 VUPK(lsw, s64, s32, UPKLO)
2211 #undef VUPK
2212 #undef UPKHI
2213 #undef UPKLO
2214 
2215 #define VGENERIC_DO(name, element)                                      \
2216     void helper_v##name(ppc_avr_t *r, ppc_avr_t *b)                     \
2217     {                                                                   \
2218         int i;                                                          \
2219                                                                         \
2220         VECTOR_FOR_INORDER_I(i, element) {                              \
2221             r->element[i] = name(b->element[i]);                        \
2222         }                                                               \
2223     }
2224 
2225 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
2226 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
2227 #define clzw(v) clz32((v))
2228 #define clzd(v) clz64((v))
2229 
2230 VGENERIC_DO(clzb, u8)
2231 VGENERIC_DO(clzh, u16)
2232 VGENERIC_DO(clzw, u32)
2233 VGENERIC_DO(clzd, u64)
2234 
2235 #undef clzb
2236 #undef clzh
2237 #undef clzw
2238 #undef clzd
2239 
2240 #define ctzb(v) ((v) ? ctz32(v) : 8)
2241 #define ctzh(v) ((v) ? ctz32(v) : 16)
2242 #define ctzw(v) ctz32((v))
2243 #define ctzd(v) ctz64((v))
2244 
2245 VGENERIC_DO(ctzb, u8)
2246 VGENERIC_DO(ctzh, u16)
2247 VGENERIC_DO(ctzw, u32)
2248 VGENERIC_DO(ctzd, u64)
2249 
2250 #undef ctzb
2251 #undef ctzh
2252 #undef ctzw
2253 #undef ctzd
2254 
2255 #define popcntb(v) ctpop8(v)
2256 #define popcnth(v) ctpop16(v)
2257 #define popcntw(v) ctpop32(v)
2258 #define popcntd(v) ctpop64(v)
2259 
2260 VGENERIC_DO(popcntb, u8)
2261 VGENERIC_DO(popcnth, u16)
2262 VGENERIC_DO(popcntw, u32)
2263 VGENERIC_DO(popcntd, u64)
2264 
2265 #undef popcntb
2266 #undef popcnth
2267 #undef popcntw
2268 #undef popcntd
2269 
2270 #undef VGENERIC_DO
2271 
2272 #if defined(HOST_WORDS_BIGENDIAN)
2273 #define QW_ONE { .u64 = { 0, 1 } }
2274 #else
2275 #define QW_ONE { .u64 = { 1, 0 } }
2276 #endif
2277 
2278 #ifndef CONFIG_INT128
2279 
2280 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
2281 {
2282     t->u64[0] = ~a.u64[0];
2283     t->u64[1] = ~a.u64[1];
2284 }
2285 
2286 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
2287 {
2288     if (a.u64[HI_IDX] < b.u64[HI_IDX]) {
2289         return -1;
2290     } else if (a.u64[HI_IDX] > b.u64[HI_IDX]) {
2291         return 1;
2292     } else if (a.u64[LO_IDX] < b.u64[LO_IDX]) {
2293         return -1;
2294     } else if (a.u64[LO_IDX] > b.u64[LO_IDX]) {
2295         return 1;
2296     } else {
2297         return 0;
2298     }
2299 }
2300 
2301 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2302 {
2303     t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
2304     t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
2305                      (~a.u64[LO_IDX] < b.u64[LO_IDX]);
2306 }
2307 
2308 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2309 {
2310     ppc_avr_t not_a;
2311     t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
2312     t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
2313                      (~a.u64[LO_IDX] < b.u64[LO_IDX]);
2314     avr_qw_not(&not_a, a);
2315     return avr_qw_cmpu(not_a, b) < 0;
2316 }
2317 
2318 #endif
2319 
2320 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2321 {
2322 #ifdef CONFIG_INT128
2323     r->u128 = a->u128 + b->u128;
2324 #else
2325     avr_qw_add(r, *a, *b);
2326 #endif
2327 }
2328 
2329 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2330 {
2331 #ifdef CONFIG_INT128
2332     r->u128 = a->u128 + b->u128 + (c->u128 & 1);
2333 #else
2334 
2335     if (c->u64[LO_IDX] & 1) {
2336         ppc_avr_t tmp;
2337 
2338         tmp.u64[HI_IDX] = 0;
2339         tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2340         avr_qw_add(&tmp, *a, tmp);
2341         avr_qw_add(r, tmp, *b);
2342     } else {
2343         avr_qw_add(r, *a, *b);
2344     }
2345 #endif
2346 }
2347 
2348 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2349 {
2350 #ifdef CONFIG_INT128
2351     r->u128 = (~a->u128 < b->u128);
2352 #else
2353     ppc_avr_t not_a;
2354 
2355     avr_qw_not(&not_a, *a);
2356 
2357     r->u64[HI_IDX] = 0;
2358     r->u64[LO_IDX] = (avr_qw_cmpu(not_a, *b) < 0);
2359 #endif
2360 }
2361 
2362 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2363 {
2364 #ifdef CONFIG_INT128
2365     int carry_out = (~a->u128 < b->u128);
2366     if (!carry_out && (c->u128 & 1)) {
2367         carry_out = ((a->u128 + b->u128 + 1) == 0) &&
2368                     ((a->u128 != 0) || (b->u128 != 0));
2369     }
2370     r->u128 = carry_out;
2371 #else
2372 
2373     int carry_in = c->u64[LO_IDX] & 1;
2374     int carry_out = 0;
2375     ppc_avr_t tmp;
2376 
2377     carry_out = avr_qw_addc(&tmp, *a, *b);
2378 
2379     if (!carry_out && carry_in) {
2380         ppc_avr_t one = QW_ONE;
2381         carry_out = avr_qw_addc(&tmp, tmp, one);
2382     }
2383     r->u64[HI_IDX] = 0;
2384     r->u64[LO_IDX] = carry_out;
2385 #endif
2386 }
2387 
2388 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2389 {
2390 #ifdef CONFIG_INT128
2391     r->u128 = a->u128 - b->u128;
2392 #else
2393     ppc_avr_t tmp;
2394     ppc_avr_t one = QW_ONE;
2395 
2396     avr_qw_not(&tmp, *b);
2397     avr_qw_add(&tmp, *a, tmp);
2398     avr_qw_add(r, tmp, one);
2399 #endif
2400 }
2401 
2402 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2403 {
2404 #ifdef CONFIG_INT128
2405     r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
2406 #else
2407     ppc_avr_t tmp, sum;
2408 
2409     avr_qw_not(&tmp, *b);
2410     avr_qw_add(&sum, *a, tmp);
2411 
2412     tmp.u64[HI_IDX] = 0;
2413     tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2414     avr_qw_add(r, sum, tmp);
2415 #endif
2416 }
2417 
2418 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2419 {
2420 #ifdef CONFIG_INT128
2421     r->u128 = (~a->u128 < ~b->u128) ||
2422                  (a->u128 + ~b->u128 == (__uint128_t)-1);
2423 #else
2424     int carry = (avr_qw_cmpu(*a, *b) > 0);
2425     if (!carry) {
2426         ppc_avr_t tmp;
2427         avr_qw_not(&tmp, *b);
2428         avr_qw_add(&tmp, *a, tmp);
2429         carry = ((tmp.s64[HI_IDX] == -1ull) && (tmp.s64[LO_IDX] == -1ull));
2430     }
2431     r->u64[HI_IDX] = 0;
2432     r->u64[LO_IDX] = carry;
2433 #endif
2434 }
2435 
2436 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2437 {
2438 #ifdef CONFIG_INT128
2439     r->u128 =
2440         (~a->u128 < ~b->u128) ||
2441         ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2442 #else
2443     int carry_in = c->u64[LO_IDX] & 1;
2444     int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2445     if (!carry_out && carry_in) {
2446         ppc_avr_t tmp;
2447         avr_qw_not(&tmp, *b);
2448         avr_qw_add(&tmp, *a, tmp);
2449         carry_out = ((tmp.u64[HI_IDX] == -1ull) && (tmp.u64[LO_IDX] == -1ull));
2450     }
2451 
2452     r->u64[HI_IDX] = 0;
2453     r->u64[LO_IDX] = carry_out;
2454 #endif
2455 }
2456 
2457 #define BCD_PLUS_PREF_1 0xC
2458 #define BCD_PLUS_PREF_2 0xF
2459 #define BCD_PLUS_ALT_1  0xA
2460 #define BCD_NEG_PREF    0xD
2461 #define BCD_NEG_ALT     0xB
2462 #define BCD_PLUS_ALT_2  0xE
2463 #define NATIONAL_PLUS   0x2B
2464 #define NATIONAL_NEG    0x2D
2465 
2466 #if defined(HOST_WORDS_BIGENDIAN)
2467 #define BCD_DIG_BYTE(n) (15 - (n/2))
2468 #else
2469 #define BCD_DIG_BYTE(n) (n/2)
2470 #endif
2471 
2472 static int bcd_get_sgn(ppc_avr_t *bcd)
2473 {
2474     switch (bcd->u8[BCD_DIG_BYTE(0)] & 0xF) {
2475     case BCD_PLUS_PREF_1:
2476     case BCD_PLUS_PREF_2:
2477     case BCD_PLUS_ALT_1:
2478     case BCD_PLUS_ALT_2:
2479     {
2480         return 1;
2481     }
2482 
2483     case BCD_NEG_PREF:
2484     case BCD_NEG_ALT:
2485     {
2486         return -1;
2487     }
2488 
2489     default:
2490     {
2491         return 0;
2492     }
2493     }
2494 }
2495 
2496 static int bcd_preferred_sgn(int sgn, int ps)
2497 {
2498     if (sgn >= 0) {
2499         return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2500     } else {
2501         return BCD_NEG_PREF;
2502     }
2503 }
2504 
2505 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2506 {
2507     uint8_t result;
2508     if (n & 1) {
2509         result = bcd->u8[BCD_DIG_BYTE(n)] >> 4;
2510     } else {
2511        result = bcd->u8[BCD_DIG_BYTE(n)] & 0xF;
2512     }
2513 
2514     if (unlikely(result > 9)) {
2515         *invalid = true;
2516     }
2517     return result;
2518 }
2519 
2520 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2521 {
2522     if (n & 1) {
2523         bcd->u8[BCD_DIG_BYTE(n)] &= 0x0F;
2524         bcd->u8[BCD_DIG_BYTE(n)] |= (digit<<4);
2525     } else {
2526         bcd->u8[BCD_DIG_BYTE(n)] &= 0xF0;
2527         bcd->u8[BCD_DIG_BYTE(n)] |= digit;
2528     }
2529 }
2530 
2531 static int bcd_cmp_zero(ppc_avr_t *bcd)
2532 {
2533     if (bcd->u64[HI_IDX] == 0 && (bcd->u64[LO_IDX] >> 4) == 0) {
2534         return 1 << CRF_EQ;
2535     } else {
2536         return (bcd_get_sgn(bcd) == 1) ? 1 << CRF_GT : 1 << CRF_LT;
2537     }
2538 }
2539 
2540 static uint16_t get_national_digit(ppc_avr_t *reg, int n)
2541 {
2542 #if defined(HOST_WORDS_BIGENDIAN)
2543     return reg->u16[7 - n];
2544 #else
2545     return reg->u16[n];
2546 #endif
2547 }
2548 
2549 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n)
2550 {
2551 #if defined(HOST_WORDS_BIGENDIAN)
2552     reg->u16[7 - n] = val;
2553 #else
2554     reg->u16[n] = val;
2555 #endif
2556 }
2557 
2558 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2559 {
2560     int i;
2561     int invalid = 0;
2562     for (i = 31; i > 0; i--) {
2563         uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2564         uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2565         if (unlikely(invalid)) {
2566             return 0; /* doesn't matter */
2567         } else if (dig_a > dig_b) {
2568             return 1;
2569         } else if (dig_a < dig_b) {
2570             return -1;
2571         }
2572     }
2573 
2574     return 0;
2575 }
2576 
2577 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2578                        int *overflow)
2579 {
2580     int carry = 0;
2581     int i;
2582     int is_zero = 1;
2583     for (i = 1; i <= 31; i++) {
2584         uint8_t digit = bcd_get_digit(a, i, invalid) +
2585                         bcd_get_digit(b, i, invalid) + carry;
2586         is_zero &= (digit == 0);
2587         if (digit > 9) {
2588             carry = 1;
2589             digit -= 10;
2590         } else {
2591             carry = 0;
2592         }
2593 
2594         bcd_put_digit(t, digit, i);
2595 
2596         if (unlikely(*invalid)) {
2597             return -1;
2598         }
2599     }
2600 
2601     *overflow = carry;
2602     return is_zero;
2603 }
2604 
2605 static int bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2606                        int *overflow)
2607 {
2608     int carry = 0;
2609     int i;
2610     int is_zero = 1;
2611     for (i = 1; i <= 31; i++) {
2612         uint8_t digit = bcd_get_digit(a, i, invalid) -
2613                         bcd_get_digit(b, i, invalid) + carry;
2614         is_zero &= (digit == 0);
2615         if (digit & 0x80) {
2616             carry = -1;
2617             digit += 10;
2618         } else {
2619             carry = 0;
2620         }
2621 
2622         bcd_put_digit(t, digit, i);
2623 
2624         if (unlikely(*invalid)) {
2625             return -1;
2626         }
2627     }
2628 
2629     *overflow = carry;
2630     return is_zero;
2631 }
2632 
2633 uint32_t helper_bcdadd(ppc_avr_t *r,  ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2634 {
2635 
2636     int sgna = bcd_get_sgn(a);
2637     int sgnb = bcd_get_sgn(b);
2638     int invalid = (sgna == 0) || (sgnb == 0);
2639     int overflow = 0;
2640     int zero = 0;
2641     uint32_t cr = 0;
2642     ppc_avr_t result = { .u64 = { 0, 0 } };
2643 
2644     if (!invalid) {
2645         if (sgna == sgnb) {
2646             result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2647             zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
2648             cr = (sgna > 0) ? 1 << CRF_GT : 1 << CRF_LT;
2649         } else if (bcd_cmp_mag(a, b) > 0) {
2650             result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2651             zero = bcd_sub_mag(&result, a, b, &invalid, &overflow);
2652             cr = (sgna > 0) ? 1 << CRF_GT : 1 << CRF_LT;
2653         } else {
2654             result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps);
2655             zero = bcd_sub_mag(&result, b, a, &invalid, &overflow);
2656             cr = (sgnb > 0) ? 1 << CRF_GT : 1 << CRF_LT;
2657         }
2658     }
2659 
2660     if (unlikely(invalid)) {
2661         result.u64[HI_IDX] = result.u64[LO_IDX] = -1;
2662         cr = 1 << CRF_SO;
2663     } else if (overflow) {
2664         cr |= 1 << CRF_SO;
2665     } else if (zero) {
2666         cr = 1 << CRF_EQ;
2667     }
2668 
2669     *r = result;
2670 
2671     return cr;
2672 }
2673 
2674 uint32_t helper_bcdsub(ppc_avr_t *r,  ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2675 {
2676     ppc_avr_t bcopy = *b;
2677     int sgnb = bcd_get_sgn(b);
2678     if (sgnb < 0) {
2679         bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2680     } else if (sgnb > 0) {
2681         bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2682     }
2683     /* else invalid ... defer to bcdadd code for proper handling */
2684 
2685     return helper_bcdadd(r, a, &bcopy, ps);
2686 }
2687 
2688 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2689 {
2690     int i;
2691     int cr = 0;
2692     uint16_t national = 0;
2693     uint16_t sgnb = get_national_digit(b, 0);
2694     ppc_avr_t ret = { .u64 = { 0, 0 } };
2695     int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG);
2696 
2697     for (i = 1; i < 8; i++) {
2698         national = get_national_digit(b, i);
2699         if (unlikely(national < 0x30 || national > 0x39)) {
2700             invalid = 1;
2701             break;
2702         }
2703 
2704         bcd_put_digit(&ret, national & 0xf, i);
2705     }
2706 
2707     if (sgnb == NATIONAL_PLUS) {
2708         bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0);
2709     } else {
2710         bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2711     }
2712 
2713     cr = bcd_cmp_zero(&ret);
2714 
2715     if (unlikely(invalid)) {
2716         cr = 1 << CRF_SO;
2717     }
2718 
2719     *r = ret;
2720 
2721     return cr;
2722 }
2723 
2724 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2725 {
2726     int i;
2727     int cr = 0;
2728     int sgnb = bcd_get_sgn(b);
2729     int invalid = (sgnb == 0);
2730     ppc_avr_t ret = { .u64 = { 0, 0 } };
2731 
2732     int ox_flag = (b->u64[HI_IDX] != 0) || ((b->u64[LO_IDX] >> 32) != 0);
2733 
2734     for (i = 1; i < 8; i++) {
2735         set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i);
2736 
2737         if (unlikely(invalid)) {
2738             break;
2739         }
2740     }
2741     set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0);
2742 
2743     cr = bcd_cmp_zero(b);
2744 
2745     if (ox_flag) {
2746         cr |= 1 << CRF_SO;
2747     }
2748 
2749     if (unlikely(invalid)) {
2750         cr = 1 << CRF_SO;
2751     }
2752 
2753     *r = ret;
2754 
2755     return cr;
2756 }
2757 
2758 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2759 {
2760     int i;
2761     int cr = 0;
2762     int invalid = 0;
2763     int zone_digit = 0;
2764     int zone_lead = ps ? 0xF : 0x3;
2765     int digit = 0;
2766     ppc_avr_t ret = { .u64 = { 0, 0 } };
2767     int sgnb = b->u8[BCD_DIG_BYTE(0)] >> 4;
2768 
2769     if (unlikely((sgnb < 0xA) && ps)) {
2770         invalid = 1;
2771     }
2772 
2773     for (i = 0; i < 16; i++) {
2774         zone_digit = (i * 2) ? b->u8[BCD_DIG_BYTE(i * 2)] >> 4 : zone_lead;
2775         digit = b->u8[BCD_DIG_BYTE(i * 2)] & 0xF;
2776         if (unlikely(zone_digit != zone_lead || digit > 0x9)) {
2777             invalid = 1;
2778             break;
2779         }
2780 
2781         bcd_put_digit(&ret, digit, i + 1);
2782     }
2783 
2784     if ((ps && (sgnb == 0xB || sgnb == 0xD)) ||
2785             (!ps && (sgnb & 0x4))) {
2786         bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2787     } else {
2788         bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0);
2789     }
2790 
2791     cr = bcd_cmp_zero(&ret);
2792 
2793     if (unlikely(invalid)) {
2794         cr = 1 << CRF_SO;
2795     }
2796 
2797     *r = ret;
2798 
2799     return cr;
2800 }
2801 
2802 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2803 {
2804     int i;
2805     int cr = 0;
2806     uint8_t digit = 0;
2807     int sgnb = bcd_get_sgn(b);
2808     int zone_lead = (ps) ? 0xF0 : 0x30;
2809     int invalid = (sgnb == 0);
2810     ppc_avr_t ret = { .u64 = { 0, 0 } };
2811 
2812     int ox_flag = ((b->u64[HI_IDX] >> 4) != 0);
2813 
2814     for (i = 0; i < 16; i++) {
2815         digit = bcd_get_digit(b, i + 1, &invalid);
2816 
2817         if (unlikely(invalid)) {
2818             break;
2819         }
2820 
2821         ret.u8[BCD_DIG_BYTE(i * 2)] = zone_lead + digit;
2822     }
2823 
2824     if (ps) {
2825         bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1);
2826     } else {
2827         bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1);
2828     }
2829 
2830     cr = bcd_cmp_zero(b);
2831 
2832     if (ox_flag) {
2833         cr |= 1 << CRF_SO;
2834     }
2835 
2836     if (unlikely(invalid)) {
2837         cr = 1 << CRF_SO;
2838     }
2839 
2840     *r = ret;
2841 
2842     return cr;
2843 }
2844 
2845 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
2846 {
2847     int i;
2848     VECTOR_FOR_INORDER_I(i, u8) {
2849         r->u8[i] = AES_sbox[a->u8[i]];
2850     }
2851 }
2852 
2853 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2854 {
2855     ppc_avr_t result;
2856     int i;
2857 
2858     VECTOR_FOR_INORDER_I(i, u32) {
2859         result.AVRW(i) = b->AVRW(i) ^
2860             (AES_Te0[a->AVRB(AES_shifts[4*i + 0])] ^
2861              AES_Te1[a->AVRB(AES_shifts[4*i + 1])] ^
2862              AES_Te2[a->AVRB(AES_shifts[4*i + 2])] ^
2863              AES_Te3[a->AVRB(AES_shifts[4*i + 3])]);
2864     }
2865     *r = result;
2866 }
2867 
2868 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2869 {
2870     ppc_avr_t result;
2871     int i;
2872 
2873     VECTOR_FOR_INORDER_I(i, u8) {
2874         result.AVRB(i) = b->AVRB(i) ^ (AES_sbox[a->AVRB(AES_shifts[i])]);
2875     }
2876     *r = result;
2877 }
2878 
2879 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2880 {
2881     /* This differs from what is written in ISA V2.07.  The RTL is */
2882     /* incorrect and will be fixed in V2.07B.                      */
2883     int i;
2884     ppc_avr_t tmp;
2885 
2886     VECTOR_FOR_INORDER_I(i, u8) {
2887         tmp.AVRB(i) = b->AVRB(i) ^ AES_isbox[a->AVRB(AES_ishifts[i])];
2888     }
2889 
2890     VECTOR_FOR_INORDER_I(i, u32) {
2891         r->AVRW(i) =
2892             AES_imc[tmp.AVRB(4*i + 0)][0] ^
2893             AES_imc[tmp.AVRB(4*i + 1)][1] ^
2894             AES_imc[tmp.AVRB(4*i + 2)][2] ^
2895             AES_imc[tmp.AVRB(4*i + 3)][3];
2896     }
2897 }
2898 
2899 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2900 {
2901     ppc_avr_t result;
2902     int i;
2903 
2904     VECTOR_FOR_INORDER_I(i, u8) {
2905         result.AVRB(i) = b->AVRB(i) ^ (AES_isbox[a->AVRB(AES_ishifts[i])]);
2906     }
2907     *r = result;
2908 }
2909 
2910 #define ROTRu32(v, n) (((v) >> (n)) | ((v) << (32-n)))
2911 #if defined(HOST_WORDS_BIGENDIAN)
2912 #define EL_IDX(i) (i)
2913 #else
2914 #define EL_IDX(i) (3 - (i))
2915 #endif
2916 
2917 void helper_vshasigmaw(ppc_avr_t *r,  ppc_avr_t *a, uint32_t st_six)
2918 {
2919     int st = (st_six & 0x10) != 0;
2920     int six = st_six & 0xF;
2921     int i;
2922 
2923     VECTOR_FOR_INORDER_I(i, u32) {
2924         if (st == 0) {
2925             if ((six & (0x8 >> i)) == 0) {
2926                 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 7) ^
2927                                     ROTRu32(a->u32[EL_IDX(i)], 18) ^
2928                                     (a->u32[EL_IDX(i)] >> 3);
2929             } else { /* six.bit[i] == 1 */
2930                 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 17) ^
2931                                     ROTRu32(a->u32[EL_IDX(i)], 19) ^
2932                                     (a->u32[EL_IDX(i)] >> 10);
2933             }
2934         } else { /* st == 1 */
2935             if ((six & (0x8 >> i)) == 0) {
2936                 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 2) ^
2937                                     ROTRu32(a->u32[EL_IDX(i)], 13) ^
2938                                     ROTRu32(a->u32[EL_IDX(i)], 22);
2939             } else { /* six.bit[i] == 1 */
2940                 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 6) ^
2941                                     ROTRu32(a->u32[EL_IDX(i)], 11) ^
2942                                     ROTRu32(a->u32[EL_IDX(i)], 25);
2943             }
2944         }
2945     }
2946 }
2947 
2948 #undef ROTRu32
2949 #undef EL_IDX
2950 
2951 #define ROTRu64(v, n) (((v) >> (n)) | ((v) << (64-n)))
2952 #if defined(HOST_WORDS_BIGENDIAN)
2953 #define EL_IDX(i) (i)
2954 #else
2955 #define EL_IDX(i) (1 - (i))
2956 #endif
2957 
2958 void helper_vshasigmad(ppc_avr_t *r,  ppc_avr_t *a, uint32_t st_six)
2959 {
2960     int st = (st_six & 0x10) != 0;
2961     int six = st_six & 0xF;
2962     int i;
2963 
2964     VECTOR_FOR_INORDER_I(i, u64) {
2965         if (st == 0) {
2966             if ((six & (0x8 >> (2*i))) == 0) {
2967                 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 1) ^
2968                                     ROTRu64(a->u64[EL_IDX(i)], 8) ^
2969                                     (a->u64[EL_IDX(i)] >> 7);
2970             } else { /* six.bit[2*i] == 1 */
2971                 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 19) ^
2972                                     ROTRu64(a->u64[EL_IDX(i)], 61) ^
2973                                     (a->u64[EL_IDX(i)] >> 6);
2974             }
2975         } else { /* st == 1 */
2976             if ((six & (0x8 >> (2*i))) == 0) {
2977                 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 28) ^
2978                                     ROTRu64(a->u64[EL_IDX(i)], 34) ^
2979                                     ROTRu64(a->u64[EL_IDX(i)], 39);
2980             } else { /* six.bit[2*i] == 1 */
2981                 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 14) ^
2982                                     ROTRu64(a->u64[EL_IDX(i)], 18) ^
2983                                     ROTRu64(a->u64[EL_IDX(i)], 41);
2984             }
2985         }
2986     }
2987 }
2988 
2989 #undef ROTRu64
2990 #undef EL_IDX
2991 
2992 void helper_vpermxor(ppc_avr_t *r,  ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2993 {
2994     ppc_avr_t result;
2995     int i;
2996 
2997     VECTOR_FOR_INORDER_I(i, u8) {
2998         int indexA = c->u8[i] >> 4;
2999         int indexB = c->u8[i] & 0xF;
3000 #if defined(HOST_WORDS_BIGENDIAN)
3001         result.u8[i] = a->u8[indexA] ^ b->u8[indexB];
3002 #else
3003         result.u8[i] = a->u8[15-indexA] ^ b->u8[15-indexB];
3004 #endif
3005     }
3006     *r = result;
3007 }
3008 
3009 #undef VECTOR_FOR_INORDER_I
3010 #undef HI_IDX
3011 #undef LO_IDX
3012 
3013 /*****************************************************************************/
3014 /* SPE extension helpers */
3015 /* Use a table to make this quicker */
3016 static const uint8_t hbrev[16] = {
3017     0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
3018     0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
3019 };
3020 
3021 static inline uint8_t byte_reverse(uint8_t val)
3022 {
3023     return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
3024 }
3025 
3026 static inline uint32_t word_reverse(uint32_t val)
3027 {
3028     return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
3029         (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
3030 }
3031 
3032 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
3033 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
3034 {
3035     uint32_t a, b, d, mask;
3036 
3037     mask = UINT32_MAX >> (32 - MASKBITS);
3038     a = arg1 & mask;
3039     b = arg2 & mask;
3040     d = word_reverse(1 + word_reverse(a | ~b));
3041     return (arg1 & ~mask) | (d & b);
3042 }
3043 
3044 uint32_t helper_cntlsw32(uint32_t val)
3045 {
3046     if (val & 0x80000000) {
3047         return clz32(~val);
3048     } else {
3049         return clz32(val);
3050     }
3051 }
3052 
3053 uint32_t helper_cntlzw32(uint32_t val)
3054 {
3055     return clz32(val);
3056 }
3057 
3058 /* 440 specific */
3059 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
3060                           target_ulong low, uint32_t update_Rc)
3061 {
3062     target_ulong mask;
3063     int i;
3064 
3065     i = 1;
3066     for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3067         if ((high & mask) == 0) {
3068             if (update_Rc) {
3069                 env->crf[0] = 0x4;
3070             }
3071             goto done;
3072         }
3073         i++;
3074     }
3075     for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3076         if ((low & mask) == 0) {
3077             if (update_Rc) {
3078                 env->crf[0] = 0x8;
3079             }
3080             goto done;
3081         }
3082         i++;
3083     }
3084     i = 8;
3085     if (update_Rc) {
3086         env->crf[0] = 0x2;
3087     }
3088  done:
3089     env->xer = (env->xer & ~0x7F) | i;
3090     if (update_Rc) {
3091         env->crf[0] |= xer_so;
3092     }
3093     return i;
3094 }
3095