xref: /qemu/target/s390x/tcg/vec_int_helper.c (revision 9a65a570)
1 /*
2  * QEMU TCG support -- s390x vector integer instruction support
3  *
4  * Copyright (C) 2019 Red Hat Inc
5  *
6  * Authors:
7  *   David Hildenbrand <david@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2 or later.
10  * See the COPYING file in the top-level directory.
11  */
12 #include "qemu/osdep.h"
13 #include "cpu.h"
14 #include "vec.h"
15 #include "exec/helper-proto.h"
16 #include "tcg/tcg-gvec-desc.h"
17 #include "crypto/clmul.h"
18 
19 static bool s390_vec_is_zero(const S390Vector *v)
20 {
21     return !v->doubleword[0] && !v->doubleword[1];
22 }
23 
24 static void s390_vec_xor(S390Vector *res, const S390Vector *a,
25                          const S390Vector *b)
26 {
27     res->doubleword[0] = a->doubleword[0] ^ b->doubleword[0];
28     res->doubleword[1] = a->doubleword[1] ^ b->doubleword[1];
29 }
30 
31 static void s390_vec_and(S390Vector *res, const S390Vector *a,
32                          const S390Vector *b)
33 {
34     res->doubleword[0] = a->doubleword[0] & b->doubleword[0];
35     res->doubleword[1] = a->doubleword[1] & b->doubleword[1];
36 }
37 
38 static bool s390_vec_equal(const S390Vector *a, const S390Vector *b)
39 {
40     return a->doubleword[0] == b->doubleword[0] &&
41            a->doubleword[1] == b->doubleword[1];
42 }
43 
44 static void s390_vec_shl(S390Vector *d, const S390Vector *a, uint64_t count)
45 {
46     uint64_t tmp;
47 
48     g_assert(count < 128);
49     if (count == 0) {
50         d->doubleword[0] = a->doubleword[0];
51         d->doubleword[1] = a->doubleword[1];
52     } else if (count == 64) {
53         d->doubleword[0] = a->doubleword[1];
54         d->doubleword[1] = 0;
55     } else if (count < 64) {
56         tmp = extract64(a->doubleword[1], 64 - count, count);
57         d->doubleword[1] = a->doubleword[1] << count;
58         d->doubleword[0] = (a->doubleword[0] << count) | tmp;
59     } else {
60         d->doubleword[0] = a->doubleword[1] << (count - 64);
61         d->doubleword[1] = 0;
62     }
63 }
64 
65 static void s390_vec_sar(S390Vector *d, const S390Vector *a, uint64_t count)
66 {
67     uint64_t tmp;
68 
69     if (count == 0) {
70         d->doubleword[0] = a->doubleword[0];
71         d->doubleword[1] = a->doubleword[1];
72     } else if (count == 64) {
73         tmp = (int64_t)a->doubleword[0] >> 63;
74         d->doubleword[1] = a->doubleword[0];
75         d->doubleword[0] = tmp;
76     } else if (count < 64) {
77         tmp = a->doubleword[1] >> count;
78         d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]);
79         d->doubleword[0] = (int64_t)a->doubleword[0] >> count;
80     } else {
81         tmp = (int64_t)a->doubleword[0] >> 63;
82         d->doubleword[1] = (int64_t)a->doubleword[0] >> (count - 64);
83         d->doubleword[0] = tmp;
84     }
85 }
86 
87 static void s390_vec_shr(S390Vector *d, const S390Vector *a, uint64_t count)
88 {
89     uint64_t tmp;
90 
91     g_assert(count < 128);
92     if (count == 0) {
93         d->doubleword[0] = a->doubleword[0];
94         d->doubleword[1] = a->doubleword[1];
95     } else if (count == 64) {
96         d->doubleword[1] = a->doubleword[0];
97         d->doubleword[0] = 0;
98     } else if (count < 64) {
99         tmp = a->doubleword[1] >> count;
100         d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]);
101         d->doubleword[0] = a->doubleword[0] >> count;
102     } else {
103         d->doubleword[1] = a->doubleword[0] >> (count - 64);
104         d->doubleword[0] = 0;
105     }
106 }
107 #define DEF_VAVG(BITS)                                                         \
108 void HELPER(gvec_vavg##BITS)(void *v1, const void *v2, const void *v3,         \
109                              uint32_t desc)                                    \
110 {                                                                              \
111     int i;                                                                     \
112                                                                                \
113     for (i = 0; i < (128 / BITS); i++) {                                       \
114         const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i);   \
115         const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i);   \
116                                                                                \
117         s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1);                 \
118     }                                                                          \
119 }
120 DEF_VAVG(8)
121 DEF_VAVG(16)
122 
123 #define DEF_VAVGL(BITS)                                                        \
124 void HELPER(gvec_vavgl##BITS)(void *v1, const void *v2, const void *v3,        \
125                               uint32_t desc)                                   \
126 {                                                                              \
127     int i;                                                                     \
128                                                                                \
129     for (i = 0; i < (128 / BITS); i++) {                                       \
130         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
131         const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
132                                                                                \
133         s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1);                 \
134     }                                                                          \
135 }
136 DEF_VAVGL(8)
137 DEF_VAVGL(16)
138 
139 #define DEF_VCLZ(BITS)                                                         \
140 void HELPER(gvec_vclz##BITS)(void *v1, const void *v2, uint32_t desc)          \
141 {                                                                              \
142     int i;                                                                     \
143                                                                                \
144     for (i = 0; i < (128 / BITS); i++) {                                       \
145         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
146                                                                                \
147         s390_vec_write_element##BITS(v1, i, clz32(a) - 32 + BITS);             \
148     }                                                                          \
149 }
150 DEF_VCLZ(8)
151 DEF_VCLZ(16)
152 
153 #define DEF_VCTZ(BITS)                                                         \
154 void HELPER(gvec_vctz##BITS)(void *v1, const void *v2, uint32_t desc)          \
155 {                                                                              \
156     int i;                                                                     \
157                                                                                \
158     for (i = 0; i < (128 / BITS); i++) {                                       \
159         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
160                                                                                \
161         s390_vec_write_element##BITS(v1, i, a ? ctz32(a) : BITS);              \
162     }                                                                          \
163 }
164 DEF_VCTZ(8)
165 DEF_VCTZ(16)
166 
167 /* like binary multiplication, but XOR instead of addition */
168 #define DEF_GALOIS_MULTIPLY(BITS, TBITS)                                       \
169 static uint##TBITS##_t galois_multiply##BITS(uint##TBITS##_t a,                \
170                                              uint##TBITS##_t b)                \
171 {                                                                              \
172     uint##TBITS##_t res = 0;                                                   \
173                                                                                \
174     while (b) {                                                                \
175         if (b & 0x1) {                                                         \
176             res = res ^ a;                                                     \
177         }                                                                      \
178         a = a << 1;                                                            \
179         b = b >> 1;                                                            \
180     }                                                                          \
181     return res;                                                                \
182 }
183 DEF_GALOIS_MULTIPLY(32, 64)
184 
185 static S390Vector galois_multiply64(uint64_t a, uint64_t b)
186 {
187     S390Vector res = {};
188     S390Vector va = {
189         .doubleword[1] = a,
190     };
191     S390Vector vb = {
192         .doubleword[1] = b,
193     };
194 
195     while (!s390_vec_is_zero(&vb)) {
196         if (vb.doubleword[1] & 0x1) {
197             s390_vec_xor(&res, &res, &va);
198         }
199         s390_vec_shl(&va, &va, 1);
200         s390_vec_shr(&vb, &vb, 1);
201     }
202     return res;
203 }
204 
205 /*
206  * There is no carry across the two doublewords, so their order does
207  * not matter.  Nor is there partial overlap between registers.
208  */
209 static inline uint64_t do_gfma8(uint64_t n, uint64_t m, uint64_t a)
210 {
211     return clmul_8x4_even(n, m) ^ clmul_8x4_odd(n, m) ^ a;
212 }
213 
214 void HELPER(gvec_vgfm8)(void *v1, const void *v2, const void *v3, uint32_t d)
215 {
216     uint64_t *q1 = v1;
217     const uint64_t *q2 = v2, *q3 = v3;
218 
219     q1[0] = do_gfma8(q2[0], q3[0], 0);
220     q1[1] = do_gfma8(q2[1], q3[1], 0);
221 }
222 
223 void HELPER(gvec_vgfma8)(void *v1, const void *v2, const void *v3,
224                          const void *v4, uint32_t desc)
225 {
226     uint64_t *q1 = v1;
227     const uint64_t *q2 = v2, *q3 = v3, *q4 = v4;
228 
229     q1[0] = do_gfma8(q2[0], q3[0], q4[0]);
230     q1[1] = do_gfma8(q2[1], q3[1], q4[1]);
231 }
232 
233 static inline uint64_t do_gfma16(uint64_t n, uint64_t m, uint64_t a)
234 {
235     return clmul_16x2_even(n, m) ^ clmul_16x2_odd(n, m) ^ a;
236 }
237 
238 void HELPER(gvec_vgfm16)(void *v1, const void *v2, const void *v3, uint32_t d)
239 {
240     uint64_t *q1 = v1;
241     const uint64_t *q2 = v2, *q3 = v3;
242 
243     q1[0] = do_gfma16(q2[0], q3[0], 0);
244     q1[1] = do_gfma16(q2[1], q3[1], 0);
245 }
246 
247 void HELPER(gvec_vgfma16)(void *v1, const void *v2, const void *v3,
248                          const void *v4, uint32_t d)
249 {
250     uint64_t *q1 = v1;
251     const uint64_t *q2 = v2, *q3 = v3, *q4 = v4;
252 
253     q1[0] = do_gfma16(q2[0], q3[0], q4[0]);
254     q1[1] = do_gfma16(q2[1], q3[1], q4[1]);
255 }
256 
257 #define DEF_VGFM(BITS, TBITS)                                                  \
258 void HELPER(gvec_vgfm##BITS)(void *v1, const void *v2, const void *v3,         \
259                              uint32_t desc)                                    \
260 {                                                                              \
261     int i;                                                                     \
262                                                                                \
263     for (i = 0; i < (128 / TBITS); i++) {                                      \
264         uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2);             \
265         uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2);             \
266         uint##TBITS##_t d = galois_multiply##BITS(a, b);                       \
267                                                                                \
268         a = s390_vec_read_element##BITS(v2, i * 2 + 1);                        \
269         b = s390_vec_read_element##BITS(v3, i * 2 + 1);                        \
270         d = d ^ galois_multiply32(a, b);                                       \
271         s390_vec_write_element##TBITS(v1, i, d);                               \
272     }                                                                          \
273 }
274 DEF_VGFM(32, 64)
275 
276 void HELPER(gvec_vgfm64)(void *v1, const void *v2, const void *v3,
277                          uint32_t desc)
278 {
279     S390Vector tmp1, tmp2;
280     uint64_t a, b;
281 
282     a = s390_vec_read_element64(v2, 0);
283     b = s390_vec_read_element64(v3, 0);
284     tmp1 = galois_multiply64(a, b);
285     a = s390_vec_read_element64(v2, 1);
286     b = s390_vec_read_element64(v3, 1);
287     tmp2 = galois_multiply64(a, b);
288     s390_vec_xor(v1, &tmp1, &tmp2);
289 }
290 
291 #define DEF_VGFMA(BITS, TBITS)                                                 \
292 void HELPER(gvec_vgfma##BITS)(void *v1, const void *v2, const void *v3,        \
293                               const void *v4, uint32_t desc)                   \
294 {                                                                              \
295     int i;                                                                     \
296                                                                                \
297     for (i = 0; i < (128 / TBITS); i++) {                                      \
298         uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2);             \
299         uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2);             \
300         uint##TBITS##_t d = galois_multiply##BITS(a, b);                       \
301                                                                                \
302         a = s390_vec_read_element##BITS(v2, i * 2 + 1);                        \
303         b = s390_vec_read_element##BITS(v3, i * 2 + 1);                        \
304         d = d ^ galois_multiply32(a, b);                                       \
305         d = d ^ s390_vec_read_element##TBITS(v4, i);                           \
306         s390_vec_write_element##TBITS(v1, i, d);                               \
307     }                                                                          \
308 }
309 DEF_VGFMA(32, 64)
310 
311 void HELPER(gvec_vgfma64)(void *v1, const void *v2, const void *v3,
312                           const void *v4, uint32_t desc)
313 {
314     S390Vector tmp1, tmp2;
315     uint64_t a, b;
316 
317     a = s390_vec_read_element64(v2, 0);
318     b = s390_vec_read_element64(v3, 0);
319     tmp1 = galois_multiply64(a, b);
320     a = s390_vec_read_element64(v2, 1);
321     b = s390_vec_read_element64(v3, 1);
322     tmp2 = galois_multiply64(a, b);
323     s390_vec_xor(&tmp1, &tmp1, &tmp2);
324     s390_vec_xor(v1, &tmp1, v4);
325 }
326 
327 #define DEF_VMAL(BITS)                                                         \
328 void HELPER(gvec_vmal##BITS)(void *v1, const void *v2, const void *v3,         \
329                              const void *v4, uint32_t desc)                    \
330 {                                                                              \
331     int i;                                                                     \
332                                                                                \
333     for (i = 0; i < (128 / BITS); i++) {                                       \
334         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
335         const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
336         const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i);           \
337                                                                                \
338         s390_vec_write_element##BITS(v1, i, a * b + c);                        \
339     }                                                                          \
340 }
341 DEF_VMAL(8)
342 DEF_VMAL(16)
343 
344 #define DEF_VMAH(BITS)                                                         \
345 void HELPER(gvec_vmah##BITS)(void *v1, const void *v2, const void *v3,         \
346                              const void *v4, uint32_t desc)                    \
347 {                                                                              \
348     int i;                                                                     \
349                                                                                \
350     for (i = 0; i < (128 / BITS); i++) {                                       \
351         const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i);   \
352         const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i);   \
353         const int32_t c = (int##BITS##_t)s390_vec_read_element##BITS(v4, i);   \
354                                                                                \
355         s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS);              \
356     }                                                                          \
357 }
358 DEF_VMAH(8)
359 DEF_VMAH(16)
360 
361 #define DEF_VMALH(BITS)                                                        \
362 void HELPER(gvec_vmalh##BITS)(void *v1, const void *v2, const void *v3,        \
363                               const void *v4, uint32_t desc)                   \
364 {                                                                              \
365     int i;                                                                     \
366                                                                                \
367     for (i = 0; i < (128 / BITS); i++) {                                       \
368         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
369         const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
370         const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i);           \
371                                                                                \
372         s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS);              \
373     }                                                                          \
374 }
375 DEF_VMALH(8)
376 DEF_VMALH(16)
377 
378 #define DEF_VMAE(BITS, TBITS)                                                  \
379 void HELPER(gvec_vmae##BITS)(void *v1, const void *v2, const void *v3,         \
380                              const void *v4, uint32_t desc)                    \
381 {                                                                              \
382     int i, j;                                                                  \
383                                                                                \
384     for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) {                       \
385         int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j);  \
386         int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j);  \
387         int##TBITS##_t c = s390_vec_read_element##TBITS(v4, i);                \
388                                                                                \
389         s390_vec_write_element##TBITS(v1, i, a * b + c);                       \
390     }                                                                          \
391 }
392 DEF_VMAE(8, 16)
393 DEF_VMAE(16, 32)
394 DEF_VMAE(32, 64)
395 
396 #define DEF_VMALE(BITS, TBITS)                                                 \
397 void HELPER(gvec_vmale##BITS)(void *v1, const void *v2, const void *v3,        \
398                               const void *v4, uint32_t desc)                   \
399 {                                                                              \
400     int i, j;                                                                  \
401                                                                                \
402     for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) {                       \
403         uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j);                \
404         uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j);                \
405         uint##TBITS##_t c = s390_vec_read_element##TBITS(v4, i);               \
406                                                                                \
407         s390_vec_write_element##TBITS(v1, i, a * b + c);                       \
408     }                                                                          \
409 }
410 DEF_VMALE(8, 16)
411 DEF_VMALE(16, 32)
412 DEF_VMALE(32, 64)
413 
414 #define DEF_VMAO(BITS, TBITS)                                                  \
415 void HELPER(gvec_vmao##BITS)(void *v1, const void *v2, const void *v3,         \
416                              const void *v4, uint32_t desc)                    \
417 {                                                                              \
418     int i, j;                                                                  \
419                                                                                \
420     for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) {                       \
421         int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j);  \
422         int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j);  \
423         int##TBITS##_t c = s390_vec_read_element##TBITS(v4, i);                \
424                                                                                \
425         s390_vec_write_element##TBITS(v1, i, a * b + c);                       \
426     }                                                                          \
427 }
428 DEF_VMAO(8, 16)
429 DEF_VMAO(16, 32)
430 DEF_VMAO(32, 64)
431 
432 #define DEF_VMALO(BITS, TBITS)                                                 \
433 void HELPER(gvec_vmalo##BITS)(void *v1, const void *v2, const void *v3,        \
434                               const void *v4, uint32_t desc)                   \
435 {                                                                              \
436     int i, j;                                                                  \
437                                                                                \
438     for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) {                       \
439         uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j);                \
440         uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j);                \
441         uint##TBITS##_t c = s390_vec_read_element##TBITS(v4, i);               \
442                                                                                \
443         s390_vec_write_element##TBITS(v1, i, a * b + c);                       \
444     }                                                                          \
445 }
446 DEF_VMALO(8, 16)
447 DEF_VMALO(16, 32)
448 DEF_VMALO(32, 64)
449 
450 #define DEF_VMH(BITS)                                                          \
451 void HELPER(gvec_vmh##BITS)(void *v1, const void *v2, const void *v3,          \
452                             uint32_t desc)                                     \
453 {                                                                              \
454     int i;                                                                     \
455                                                                                \
456     for (i = 0; i < (128 / BITS); i++) {                                       \
457         const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i);   \
458         const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i);   \
459                                                                                \
460         s390_vec_write_element##BITS(v1, i, (a * b) >> BITS);                  \
461     }                                                                          \
462 }
463 DEF_VMH(8)
464 DEF_VMH(16)
465 
466 #define DEF_VMLH(BITS)                                                         \
467 void HELPER(gvec_vmlh##BITS)(void *v1, const void *v2, const void *v3,         \
468                              uint32_t desc)                                    \
469 {                                                                              \
470     int i;                                                                     \
471                                                                                \
472     for (i = 0; i < (128 / BITS); i++) {                                       \
473         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
474         const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
475                                                                                \
476         s390_vec_write_element##BITS(v1, i, (a * b) >> BITS);                  \
477     }                                                                          \
478 }
479 DEF_VMLH(8)
480 DEF_VMLH(16)
481 
482 #define DEF_VME(BITS, TBITS)                                                   \
483 void HELPER(gvec_vme##BITS)(void *v1, const void *v2, const void *v3,          \
484                             uint32_t desc)                                     \
485 {                                                                              \
486     int i, j;                                                                  \
487                                                                                \
488     for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) {                       \
489         int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j);  \
490         int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j);  \
491                                                                                \
492         s390_vec_write_element##TBITS(v1, i, a * b);                           \
493     }                                                                          \
494 }
495 DEF_VME(8, 16)
496 DEF_VME(16, 32)
497 DEF_VME(32, 64)
498 
499 #define DEF_VMLE(BITS, TBITS)                                                  \
500 void HELPER(gvec_vmle##BITS)(void *v1, const void *v2, const void *v3,         \
501                              uint32_t desc)                                    \
502 {                                                                              \
503     int i, j;                                                                  \
504                                                                                \
505     for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) {                       \
506         const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j);          \
507         const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j);          \
508                                                                                \
509         s390_vec_write_element##TBITS(v1, i, a * b);                           \
510     }                                                                          \
511 }
512 DEF_VMLE(8, 16)
513 DEF_VMLE(16, 32)
514 DEF_VMLE(32, 64)
515 
516 #define DEF_VMO(BITS, TBITS)                                                   \
517 void HELPER(gvec_vmo##BITS)(void *v1, const void *v2, const void *v3,          \
518                             uint32_t desc)                                     \
519 {                                                                              \
520     int i, j;                                                                  \
521                                                                                \
522     for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) {                       \
523         int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j);  \
524         int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j);  \
525                                                                                \
526         s390_vec_write_element##TBITS(v1, i, a * b);                           \
527     }                                                                          \
528 }
529 DEF_VMO(8, 16)
530 DEF_VMO(16, 32)
531 DEF_VMO(32, 64)
532 
533 #define DEF_VMLO(BITS, TBITS)                                                  \
534 void HELPER(gvec_vmlo##BITS)(void *v1, const void *v2, const void *v3,         \
535                              uint32_t desc)                                    \
536 {                                                                              \
537     int i, j;                                                                  \
538                                                                                \
539     for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) {                       \
540         const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j);          \
541         const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j);          \
542                                                                                \
543         s390_vec_write_element##TBITS(v1, i, a * b);                           \
544     }                                                                          \
545 }
546 DEF_VMLO(8, 16)
547 DEF_VMLO(16, 32)
548 DEF_VMLO(32, 64)
549 
550 #define DEF_VPOPCT(BITS)                                                       \
551 void HELPER(gvec_vpopct##BITS)(void *v1, const void *v2, uint32_t desc)        \
552 {                                                                              \
553     int i;                                                                     \
554                                                                                \
555     for (i = 0; i < (128 / BITS); i++) {                                       \
556         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
557                                                                                \
558         s390_vec_write_element##BITS(v1, i, ctpop32(a));                       \
559     }                                                                          \
560 }
561 DEF_VPOPCT(8)
562 DEF_VPOPCT(16)
563 
564 #define DEF_VERIM(BITS)                                                        \
565 void HELPER(gvec_verim##BITS)(void *v1, const void *v2, const void *v3,        \
566                               uint32_t desc)                                   \
567 {                                                                              \
568     const uint8_t count = simd_data(desc);                                     \
569     int i;                                                                     \
570                                                                                \
571     for (i = 0; i < (128 / BITS); i++) {                                       \
572         const uint##BITS##_t a = s390_vec_read_element##BITS(v1, i);           \
573         const uint##BITS##_t b = s390_vec_read_element##BITS(v2, i);           \
574         const uint##BITS##_t mask = s390_vec_read_element##BITS(v3, i);        \
575         const uint##BITS##_t d = (a & ~mask) | (rol##BITS(b, count) & mask);   \
576                                                                                \
577         s390_vec_write_element##BITS(v1, i, d);                                \
578     }                                                                          \
579 }
580 DEF_VERIM(8)
581 DEF_VERIM(16)
582 
583 void HELPER(gvec_vsl)(void *v1, const void *v2, uint64_t count,
584                       uint32_t desc)
585 {
586     s390_vec_shl(v1, v2, count);
587 }
588 
589 void HELPER(gvec_vsl_ve2)(void *v1, const void *v2, const void *v3,
590                           uint32_t desc)
591 {
592     S390Vector tmp;
593     uint32_t sh, e0, e1 = 0;
594     int i;
595 
596     for (i = 15; i >= 0; --i, e1 = e0) {
597         e0 = s390_vec_read_element8(v2, i);
598         sh = s390_vec_read_element8(v3, i) & 7;
599 
600         s390_vec_write_element8(&tmp, i, rol32(e0 | (e1 << 24), sh));
601     }
602 
603     *(S390Vector *)v1 = tmp;
604 }
605 
606 void HELPER(gvec_vsra)(void *v1, const void *v2, uint64_t count,
607                        uint32_t desc)
608 {
609     s390_vec_sar(v1, v2, count);
610 }
611 
612 void HELPER(gvec_vsra_ve2)(void *v1, const void *v2, const void *v3,
613                            uint32_t desc)
614 {
615     S390Vector tmp;
616     uint32_t sh, e0, e1 = 0;
617     int i = 0;
618 
619     /* Byte 0 is special only. */
620     e0 = (int32_t)(int8_t)s390_vec_read_element8(v2, i);
621     sh = s390_vec_read_element8(v3, i) & 7;
622     s390_vec_write_element8(&tmp, i, e0 >> sh);
623 
624     e1 = e0;
625     for (i = 1; i < 16; ++i, e1 = e0) {
626         e0 = s390_vec_read_element8(v2, i);
627         sh = s390_vec_read_element8(v3, i) & 7;
628         s390_vec_write_element8(&tmp, i, (e0 | e1 << 8) >> sh);
629     }
630 
631     *(S390Vector *)v1 = tmp;
632 }
633 
634 void HELPER(gvec_vsrl)(void *v1, const void *v2, uint64_t count,
635                        uint32_t desc)
636 {
637     s390_vec_shr(v1, v2, count);
638 }
639 
640 void HELPER(gvec_vsrl_ve2)(void *v1, const void *v2, const void *v3,
641                            uint32_t desc)
642 {
643     S390Vector tmp;
644     uint32_t sh, e0, e1 = 0;
645 
646     for (int i = 0; i < 16; ++i, e1 = e0) {
647         e0 = s390_vec_read_element8(v2, i);
648         sh = s390_vec_read_element8(v3, i) & 7;
649 
650         s390_vec_write_element8(&tmp, i, (e0 | (e1 << 8)) >> sh);
651     }
652 
653     *(S390Vector *)v1 = tmp;
654 }
655 
656 #define DEF_VSCBI(BITS)                                                        \
657 void HELPER(gvec_vscbi##BITS)(void *v1, const void *v2, const void *v3,        \
658                               uint32_t desc)                                   \
659 {                                                                              \
660     int i;                                                                     \
661                                                                                \
662     for (i = 0; i < (128 / BITS); i++) {                                       \
663         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
664         const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
665                                                                                \
666         s390_vec_write_element##BITS(v1, i, a >= b);                           \
667     }                                                                          \
668 }
669 DEF_VSCBI(8)
670 DEF_VSCBI(16)
671 
672 void HELPER(gvec_vtm)(void *v1, const void *v2, CPUS390XState *env,
673                       uint32_t desc)
674 {
675     S390Vector tmp;
676 
677     s390_vec_and(&tmp, v1, v2);
678     if (s390_vec_is_zero(&tmp)) {
679         /* Selected bits all zeros; or all mask bits zero */
680         env->cc_op = 0;
681     } else if (s390_vec_equal(&tmp, v2)) {
682         /* Selected bits all ones */
683         env->cc_op = 3;
684     } else {
685         /* Selected bits a mix of zeros and ones */
686         env->cc_op = 1;
687     }
688 }
689