xref: /qemu/target/s390x/tcg/vec_int_helper.c (revision cf1b2cab)
1 /*
2  * QEMU TCG support -- s390x vector integer instruction support
3  *
4  * Copyright (C) 2019 Red Hat Inc
5  *
6  * Authors:
7  *   David Hildenbrand <david@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2 or later.
10  * See the COPYING file in the top-level directory.
11  */
12 #include "qemu/osdep.h"
13 #include "cpu.h"
14 #include "vec.h"
15 #include "exec/helper-proto.h"
16 #include "tcg/tcg-gvec-desc.h"
17 #include "crypto/clmul.h"
18 
19 static bool s390_vec_is_zero(const S390Vector *v)
20 {
21     return !v->doubleword[0] && !v->doubleword[1];
22 }
23 
24 static void s390_vec_xor(S390Vector *res, const S390Vector *a,
25                          const S390Vector *b)
26 {
27     res->doubleword[0] = a->doubleword[0] ^ b->doubleword[0];
28     res->doubleword[1] = a->doubleword[1] ^ b->doubleword[1];
29 }
30 
31 static void s390_vec_and(S390Vector *res, const S390Vector *a,
32                          const S390Vector *b)
33 {
34     res->doubleword[0] = a->doubleword[0] & b->doubleword[0];
35     res->doubleword[1] = a->doubleword[1] & b->doubleword[1];
36 }
37 
38 static bool s390_vec_equal(const S390Vector *a, const S390Vector *b)
39 {
40     return a->doubleword[0] == b->doubleword[0] &&
41            a->doubleword[1] == b->doubleword[1];
42 }
43 
44 static void s390_vec_shl(S390Vector *d, const S390Vector *a, uint64_t count)
45 {
46     uint64_t tmp;
47 
48     g_assert(count < 128);
49     if (count == 0) {
50         d->doubleword[0] = a->doubleword[0];
51         d->doubleword[1] = a->doubleword[1];
52     } else if (count == 64) {
53         d->doubleword[0] = a->doubleword[1];
54         d->doubleword[1] = 0;
55     } else if (count < 64) {
56         tmp = extract64(a->doubleword[1], 64 - count, count);
57         d->doubleword[1] = a->doubleword[1] << count;
58         d->doubleword[0] = (a->doubleword[0] << count) | tmp;
59     } else {
60         d->doubleword[0] = a->doubleword[1] << (count - 64);
61         d->doubleword[1] = 0;
62     }
63 }
64 
65 static void s390_vec_sar(S390Vector *d, const S390Vector *a, uint64_t count)
66 {
67     uint64_t tmp;
68 
69     if (count == 0) {
70         d->doubleword[0] = a->doubleword[0];
71         d->doubleword[1] = a->doubleword[1];
72     } else if (count == 64) {
73         tmp = (int64_t)a->doubleword[0] >> 63;
74         d->doubleword[1] = a->doubleword[0];
75         d->doubleword[0] = tmp;
76     } else if (count < 64) {
77         tmp = a->doubleword[1] >> count;
78         d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]);
79         d->doubleword[0] = (int64_t)a->doubleword[0] >> count;
80     } else {
81         tmp = (int64_t)a->doubleword[0] >> 63;
82         d->doubleword[1] = (int64_t)a->doubleword[0] >> (count - 64);
83         d->doubleword[0] = tmp;
84     }
85 }
86 
87 static void s390_vec_shr(S390Vector *d, const S390Vector *a, uint64_t count)
88 {
89     uint64_t tmp;
90 
91     g_assert(count < 128);
92     if (count == 0) {
93         d->doubleword[0] = a->doubleword[0];
94         d->doubleword[1] = a->doubleword[1];
95     } else if (count == 64) {
96         d->doubleword[1] = a->doubleword[0];
97         d->doubleword[0] = 0;
98     } else if (count < 64) {
99         tmp = a->doubleword[1] >> count;
100         d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]);
101         d->doubleword[0] = a->doubleword[0] >> count;
102     } else {
103         d->doubleword[1] = a->doubleword[0] >> (count - 64);
104         d->doubleword[0] = 0;
105     }
106 }
107 #define DEF_VAVG(BITS)                                                         \
108 void HELPER(gvec_vavg##BITS)(void *v1, const void *v2, const void *v3,         \
109                              uint32_t desc)                                    \
110 {                                                                              \
111     int i;                                                                     \
112                                                                                \
113     for (i = 0; i < (128 / BITS); i++) {                                       \
114         const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i);   \
115         const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i);   \
116                                                                                \
117         s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1);                 \
118     }                                                                          \
119 }
120 DEF_VAVG(8)
121 DEF_VAVG(16)
122 
123 #define DEF_VAVGL(BITS)                                                        \
124 void HELPER(gvec_vavgl##BITS)(void *v1, const void *v2, const void *v3,        \
125                               uint32_t desc)                                   \
126 {                                                                              \
127     int i;                                                                     \
128                                                                                \
129     for (i = 0; i < (128 / BITS); i++) {                                       \
130         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
131         const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
132                                                                                \
133         s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1);                 \
134     }                                                                          \
135 }
136 DEF_VAVGL(8)
137 DEF_VAVGL(16)
138 
139 #define DEF_VCLZ(BITS)                                                         \
140 void HELPER(gvec_vclz##BITS)(void *v1, const void *v2, uint32_t desc)          \
141 {                                                                              \
142     int i;                                                                     \
143                                                                                \
144     for (i = 0; i < (128 / BITS); i++) {                                       \
145         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
146                                                                                \
147         s390_vec_write_element##BITS(v1, i, clz32(a) - 32 + BITS);             \
148     }                                                                          \
149 }
150 DEF_VCLZ(8)
151 DEF_VCLZ(16)
152 
153 #define DEF_VCTZ(BITS)                                                         \
154 void HELPER(gvec_vctz##BITS)(void *v1, const void *v2, uint32_t desc)          \
155 {                                                                              \
156     int i;                                                                     \
157                                                                                \
158     for (i = 0; i < (128 / BITS); i++) {                                       \
159         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
160                                                                                \
161         s390_vec_write_element##BITS(v1, i, a ? ctz32(a) : BITS);              \
162     }                                                                          \
163 }
164 DEF_VCTZ(8)
165 DEF_VCTZ(16)
166 
167 /* like binary multiplication, but XOR instead of addition */
168 #define DEF_GALOIS_MULTIPLY(BITS, TBITS)                                       \
169 static uint##TBITS##_t galois_multiply##BITS(uint##TBITS##_t a,                \
170                                              uint##TBITS##_t b)                \
171 {                                                                              \
172     uint##TBITS##_t res = 0;                                                   \
173                                                                                \
174     while (b) {                                                                \
175         if (b & 0x1) {                                                         \
176             res = res ^ a;                                                     \
177         }                                                                      \
178         a = a << 1;                                                            \
179         b = b >> 1;                                                            \
180     }                                                                          \
181     return res;                                                                \
182 }
183 DEF_GALOIS_MULTIPLY(16, 32)
184 DEF_GALOIS_MULTIPLY(32, 64)
185 
186 static S390Vector galois_multiply64(uint64_t a, uint64_t b)
187 {
188     S390Vector res = {};
189     S390Vector va = {
190         .doubleword[1] = a,
191     };
192     S390Vector vb = {
193         .doubleword[1] = b,
194     };
195 
196     while (!s390_vec_is_zero(&vb)) {
197         if (vb.doubleword[1] & 0x1) {
198             s390_vec_xor(&res, &res, &va);
199         }
200         s390_vec_shl(&va, &va, 1);
201         s390_vec_shr(&vb, &vb, 1);
202     }
203     return res;
204 }
205 
206 /*
207  * There is no carry across the two doublewords, so their order does
208  * not matter.  Nor is there partial overlap between registers.
209  */
210 static inline uint64_t do_gfma8(uint64_t n, uint64_t m, uint64_t a)
211 {
212     return clmul_8x4_even(n, m) ^ clmul_8x4_odd(n, m) ^ a;
213 }
214 
215 void HELPER(gvec_vgfm8)(void *v1, const void *v2, const void *v3, uint32_t d)
216 {
217     uint64_t *q1 = v1;
218     const uint64_t *q2 = v2, *q3 = v3;
219 
220     q1[0] = do_gfma8(q2[0], q3[0], 0);
221     q1[1] = do_gfma8(q2[1], q3[1], 0);
222 }
223 
224 void HELPER(gvec_vgfma8)(void *v1, const void *v2, const void *v3,
225                          const void *v4, uint32_t desc)
226 {
227     uint64_t *q1 = v1;
228     const uint64_t *q2 = v2, *q3 = v3, *q4 = v4;
229 
230     q1[0] = do_gfma8(q2[0], q3[0], q4[0]);
231     q1[1] = do_gfma8(q2[1], q3[1], q4[1]);
232 }
233 
234 #define DEF_VGFM(BITS, TBITS)                                                  \
235 void HELPER(gvec_vgfm##BITS)(void *v1, const void *v2, const void *v3,         \
236                              uint32_t desc)                                    \
237 {                                                                              \
238     int i;                                                                     \
239                                                                                \
240     for (i = 0; i < (128 / TBITS); i++) {                                      \
241         uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2);             \
242         uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2);             \
243         uint##TBITS##_t d = galois_multiply##BITS(a, b);                       \
244                                                                                \
245         a = s390_vec_read_element##BITS(v2, i * 2 + 1);                        \
246         b = s390_vec_read_element##BITS(v3, i * 2 + 1);                        \
247         d = d ^ galois_multiply32(a, b);                                       \
248         s390_vec_write_element##TBITS(v1, i, d);                               \
249     }                                                                          \
250 }
251 DEF_VGFM(16, 32)
252 DEF_VGFM(32, 64)
253 
254 void HELPER(gvec_vgfm64)(void *v1, const void *v2, const void *v3,
255                          uint32_t desc)
256 {
257     S390Vector tmp1, tmp2;
258     uint64_t a, b;
259 
260     a = s390_vec_read_element64(v2, 0);
261     b = s390_vec_read_element64(v3, 0);
262     tmp1 = galois_multiply64(a, b);
263     a = s390_vec_read_element64(v2, 1);
264     b = s390_vec_read_element64(v3, 1);
265     tmp2 = galois_multiply64(a, b);
266     s390_vec_xor(v1, &tmp1, &tmp2);
267 }
268 
269 #define DEF_VGFMA(BITS, TBITS)                                                 \
270 void HELPER(gvec_vgfma##BITS)(void *v1, const void *v2, const void *v3,        \
271                               const void *v4, uint32_t desc)                   \
272 {                                                                              \
273     int i;                                                                     \
274                                                                                \
275     for (i = 0; i < (128 / TBITS); i++) {                                      \
276         uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2);             \
277         uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2);             \
278         uint##TBITS##_t d = galois_multiply##BITS(a, b);                       \
279                                                                                \
280         a = s390_vec_read_element##BITS(v2, i * 2 + 1);                        \
281         b = s390_vec_read_element##BITS(v3, i * 2 + 1);                        \
282         d = d ^ galois_multiply32(a, b);                                       \
283         d = d ^ s390_vec_read_element##TBITS(v4, i);                           \
284         s390_vec_write_element##TBITS(v1, i, d);                               \
285     }                                                                          \
286 }
287 DEF_VGFMA(16, 32)
288 DEF_VGFMA(32, 64)
289 
290 void HELPER(gvec_vgfma64)(void *v1, const void *v2, const void *v3,
291                           const void *v4, uint32_t desc)
292 {
293     S390Vector tmp1, tmp2;
294     uint64_t a, b;
295 
296     a = s390_vec_read_element64(v2, 0);
297     b = s390_vec_read_element64(v3, 0);
298     tmp1 = galois_multiply64(a, b);
299     a = s390_vec_read_element64(v2, 1);
300     b = s390_vec_read_element64(v3, 1);
301     tmp2 = galois_multiply64(a, b);
302     s390_vec_xor(&tmp1, &tmp1, &tmp2);
303     s390_vec_xor(v1, &tmp1, v4);
304 }
305 
306 #define DEF_VMAL(BITS)                                                         \
307 void HELPER(gvec_vmal##BITS)(void *v1, const void *v2, const void *v3,         \
308                              const void *v4, uint32_t desc)                    \
309 {                                                                              \
310     int i;                                                                     \
311                                                                                \
312     for (i = 0; i < (128 / BITS); i++) {                                       \
313         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
314         const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
315         const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i);           \
316                                                                                \
317         s390_vec_write_element##BITS(v1, i, a * b + c);                        \
318     }                                                                          \
319 }
320 DEF_VMAL(8)
321 DEF_VMAL(16)
322 
323 #define DEF_VMAH(BITS)                                                         \
324 void HELPER(gvec_vmah##BITS)(void *v1, const void *v2, const void *v3,         \
325                              const void *v4, uint32_t desc)                    \
326 {                                                                              \
327     int i;                                                                     \
328                                                                                \
329     for (i = 0; i < (128 / BITS); i++) {                                       \
330         const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i);   \
331         const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i);   \
332         const int32_t c = (int##BITS##_t)s390_vec_read_element##BITS(v4, i);   \
333                                                                                \
334         s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS);              \
335     }                                                                          \
336 }
337 DEF_VMAH(8)
338 DEF_VMAH(16)
339 
340 #define DEF_VMALH(BITS)                                                        \
341 void HELPER(gvec_vmalh##BITS)(void *v1, const void *v2, const void *v3,        \
342                               const void *v4, uint32_t desc)                   \
343 {                                                                              \
344     int i;                                                                     \
345                                                                                \
346     for (i = 0; i < (128 / BITS); i++) {                                       \
347         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
348         const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
349         const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i);           \
350                                                                                \
351         s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS);              \
352     }                                                                          \
353 }
354 DEF_VMALH(8)
355 DEF_VMALH(16)
356 
357 #define DEF_VMAE(BITS, TBITS)                                                  \
358 void HELPER(gvec_vmae##BITS)(void *v1, const void *v2, const void *v3,         \
359                              const void *v4, uint32_t desc)                    \
360 {                                                                              \
361     int i, j;                                                                  \
362                                                                                \
363     for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) {                       \
364         int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j);  \
365         int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j);  \
366         int##TBITS##_t c = s390_vec_read_element##TBITS(v4, i);                \
367                                                                                \
368         s390_vec_write_element##TBITS(v1, i, a * b + c);                       \
369     }                                                                          \
370 }
371 DEF_VMAE(8, 16)
372 DEF_VMAE(16, 32)
373 DEF_VMAE(32, 64)
374 
375 #define DEF_VMALE(BITS, TBITS)                                                 \
376 void HELPER(gvec_vmale##BITS)(void *v1, const void *v2, const void *v3,        \
377                               const void *v4, uint32_t desc)                   \
378 {                                                                              \
379     int i, j;                                                                  \
380                                                                                \
381     for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) {                       \
382         uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j);                \
383         uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j);                \
384         uint##TBITS##_t c = s390_vec_read_element##TBITS(v4, i);               \
385                                                                                \
386         s390_vec_write_element##TBITS(v1, i, a * b + c);                       \
387     }                                                                          \
388 }
389 DEF_VMALE(8, 16)
390 DEF_VMALE(16, 32)
391 DEF_VMALE(32, 64)
392 
393 #define DEF_VMAO(BITS, TBITS)                                                  \
394 void HELPER(gvec_vmao##BITS)(void *v1, const void *v2, const void *v3,         \
395                              const void *v4, uint32_t desc)                    \
396 {                                                                              \
397     int i, j;                                                                  \
398                                                                                \
399     for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) {                       \
400         int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j);  \
401         int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j);  \
402         int##TBITS##_t c = s390_vec_read_element##TBITS(v4, i);                \
403                                                                                \
404         s390_vec_write_element##TBITS(v1, i, a * b + c);                       \
405     }                                                                          \
406 }
407 DEF_VMAO(8, 16)
408 DEF_VMAO(16, 32)
409 DEF_VMAO(32, 64)
410 
411 #define DEF_VMALO(BITS, TBITS)                                                 \
412 void HELPER(gvec_vmalo##BITS)(void *v1, const void *v2, const void *v3,        \
413                               const void *v4, uint32_t desc)                   \
414 {                                                                              \
415     int i, j;                                                                  \
416                                                                                \
417     for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) {                       \
418         uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j);                \
419         uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j);                \
420         uint##TBITS##_t c = s390_vec_read_element##TBITS(v4, i);               \
421                                                                                \
422         s390_vec_write_element##TBITS(v1, i, a * b + c);                       \
423     }                                                                          \
424 }
425 DEF_VMALO(8, 16)
426 DEF_VMALO(16, 32)
427 DEF_VMALO(32, 64)
428 
429 #define DEF_VMH(BITS)                                                          \
430 void HELPER(gvec_vmh##BITS)(void *v1, const void *v2, const void *v3,          \
431                             uint32_t desc)                                     \
432 {                                                                              \
433     int i;                                                                     \
434                                                                                \
435     for (i = 0; i < (128 / BITS); i++) {                                       \
436         const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i);   \
437         const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i);   \
438                                                                                \
439         s390_vec_write_element##BITS(v1, i, (a * b) >> BITS);                  \
440     }                                                                          \
441 }
442 DEF_VMH(8)
443 DEF_VMH(16)
444 
445 #define DEF_VMLH(BITS)                                                         \
446 void HELPER(gvec_vmlh##BITS)(void *v1, const void *v2, const void *v3,         \
447                              uint32_t desc)                                    \
448 {                                                                              \
449     int i;                                                                     \
450                                                                                \
451     for (i = 0; i < (128 / BITS); i++) {                                       \
452         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
453         const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
454                                                                                \
455         s390_vec_write_element##BITS(v1, i, (a * b) >> BITS);                  \
456     }                                                                          \
457 }
458 DEF_VMLH(8)
459 DEF_VMLH(16)
460 
461 #define DEF_VME(BITS, TBITS)                                                   \
462 void HELPER(gvec_vme##BITS)(void *v1, const void *v2, const void *v3,          \
463                             uint32_t desc)                                     \
464 {                                                                              \
465     int i, j;                                                                  \
466                                                                                \
467     for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) {                       \
468         int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j);  \
469         int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j);  \
470                                                                                \
471         s390_vec_write_element##TBITS(v1, i, a * b);                           \
472     }                                                                          \
473 }
474 DEF_VME(8, 16)
475 DEF_VME(16, 32)
476 DEF_VME(32, 64)
477 
478 #define DEF_VMLE(BITS, TBITS)                                                  \
479 void HELPER(gvec_vmle##BITS)(void *v1, const void *v2, const void *v3,         \
480                              uint32_t desc)                                    \
481 {                                                                              \
482     int i, j;                                                                  \
483                                                                                \
484     for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) {                       \
485         const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j);          \
486         const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j);          \
487                                                                                \
488         s390_vec_write_element##TBITS(v1, i, a * b);                           \
489     }                                                                          \
490 }
491 DEF_VMLE(8, 16)
492 DEF_VMLE(16, 32)
493 DEF_VMLE(32, 64)
494 
495 #define DEF_VMO(BITS, TBITS)                                                   \
496 void HELPER(gvec_vmo##BITS)(void *v1, const void *v2, const void *v3,          \
497                             uint32_t desc)                                     \
498 {                                                                              \
499     int i, j;                                                                  \
500                                                                                \
501     for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) {                       \
502         int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j);  \
503         int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j);  \
504                                                                                \
505         s390_vec_write_element##TBITS(v1, i, a * b);                           \
506     }                                                                          \
507 }
508 DEF_VMO(8, 16)
509 DEF_VMO(16, 32)
510 DEF_VMO(32, 64)
511 
512 #define DEF_VMLO(BITS, TBITS)                                                  \
513 void HELPER(gvec_vmlo##BITS)(void *v1, const void *v2, const void *v3,         \
514                              uint32_t desc)                                    \
515 {                                                                              \
516     int i, j;                                                                  \
517                                                                                \
518     for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) {                       \
519         const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j);          \
520         const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j);          \
521                                                                                \
522         s390_vec_write_element##TBITS(v1, i, a * b);                           \
523     }                                                                          \
524 }
525 DEF_VMLO(8, 16)
526 DEF_VMLO(16, 32)
527 DEF_VMLO(32, 64)
528 
529 #define DEF_VPOPCT(BITS)                                                       \
530 void HELPER(gvec_vpopct##BITS)(void *v1, const void *v2, uint32_t desc)        \
531 {                                                                              \
532     int i;                                                                     \
533                                                                                \
534     for (i = 0; i < (128 / BITS); i++) {                                       \
535         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
536                                                                                \
537         s390_vec_write_element##BITS(v1, i, ctpop32(a));                       \
538     }                                                                          \
539 }
540 DEF_VPOPCT(8)
541 DEF_VPOPCT(16)
542 
543 #define DEF_VERIM(BITS)                                                        \
544 void HELPER(gvec_verim##BITS)(void *v1, const void *v2, const void *v3,        \
545                               uint32_t desc)                                   \
546 {                                                                              \
547     const uint8_t count = simd_data(desc);                                     \
548     int i;                                                                     \
549                                                                                \
550     for (i = 0; i < (128 / BITS); i++) {                                       \
551         const uint##BITS##_t a = s390_vec_read_element##BITS(v1, i);           \
552         const uint##BITS##_t b = s390_vec_read_element##BITS(v2, i);           \
553         const uint##BITS##_t mask = s390_vec_read_element##BITS(v3, i);        \
554         const uint##BITS##_t d = (a & ~mask) | (rol##BITS(b, count) & mask);   \
555                                                                                \
556         s390_vec_write_element##BITS(v1, i, d);                                \
557     }                                                                          \
558 }
559 DEF_VERIM(8)
560 DEF_VERIM(16)
561 
562 void HELPER(gvec_vsl)(void *v1, const void *v2, uint64_t count,
563                       uint32_t desc)
564 {
565     s390_vec_shl(v1, v2, count);
566 }
567 
568 void HELPER(gvec_vsl_ve2)(void *v1, const void *v2, const void *v3,
569                           uint32_t desc)
570 {
571     S390Vector tmp;
572     uint32_t sh, e0, e1 = 0;
573     int i;
574 
575     for (i = 15; i >= 0; --i, e1 = e0) {
576         e0 = s390_vec_read_element8(v2, i);
577         sh = s390_vec_read_element8(v3, i) & 7;
578 
579         s390_vec_write_element8(&tmp, i, rol32(e0 | (e1 << 24), sh));
580     }
581 
582     *(S390Vector *)v1 = tmp;
583 }
584 
585 void HELPER(gvec_vsra)(void *v1, const void *v2, uint64_t count,
586                        uint32_t desc)
587 {
588     s390_vec_sar(v1, v2, count);
589 }
590 
591 void HELPER(gvec_vsra_ve2)(void *v1, const void *v2, const void *v3,
592                            uint32_t desc)
593 {
594     S390Vector tmp;
595     uint32_t sh, e0, e1 = 0;
596     int i = 0;
597 
598     /* Byte 0 is special only. */
599     e0 = (int32_t)(int8_t)s390_vec_read_element8(v2, i);
600     sh = s390_vec_read_element8(v3, i) & 7;
601     s390_vec_write_element8(&tmp, i, e0 >> sh);
602 
603     e1 = e0;
604     for (i = 1; i < 16; ++i, e1 = e0) {
605         e0 = s390_vec_read_element8(v2, i);
606         sh = s390_vec_read_element8(v3, i) & 7;
607         s390_vec_write_element8(&tmp, i, (e0 | e1 << 8) >> sh);
608     }
609 
610     *(S390Vector *)v1 = tmp;
611 }
612 
613 void HELPER(gvec_vsrl)(void *v1, const void *v2, uint64_t count,
614                        uint32_t desc)
615 {
616     s390_vec_shr(v1, v2, count);
617 }
618 
619 void HELPER(gvec_vsrl_ve2)(void *v1, const void *v2, const void *v3,
620                            uint32_t desc)
621 {
622     S390Vector tmp;
623     uint32_t sh, e0, e1 = 0;
624 
625     for (int i = 0; i < 16; ++i, e1 = e0) {
626         e0 = s390_vec_read_element8(v2, i);
627         sh = s390_vec_read_element8(v3, i) & 7;
628 
629         s390_vec_write_element8(&tmp, i, (e0 | (e1 << 8)) >> sh);
630     }
631 
632     *(S390Vector *)v1 = tmp;
633 }
634 
635 #define DEF_VSCBI(BITS)                                                        \
636 void HELPER(gvec_vscbi##BITS)(void *v1, const void *v2, const void *v3,        \
637                               uint32_t desc)                                   \
638 {                                                                              \
639     int i;                                                                     \
640                                                                                \
641     for (i = 0; i < (128 / BITS); i++) {                                       \
642         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
643         const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
644                                                                                \
645         s390_vec_write_element##BITS(v1, i, a >= b);                           \
646     }                                                                          \
647 }
648 DEF_VSCBI(8)
649 DEF_VSCBI(16)
650 
651 void HELPER(gvec_vtm)(void *v1, const void *v2, CPUS390XState *env,
652                       uint32_t desc)
653 {
654     S390Vector tmp;
655 
656     s390_vec_and(&tmp, v1, v2);
657     if (s390_vec_is_zero(&tmp)) {
658         /* Selected bits all zeros; or all mask bits zero */
659         env->cc_op = 0;
660     } else if (s390_vec_equal(&tmp, v2)) {
661         /* Selected bits all ones */
662         env->cc_op = 3;
663     } else {
664         /* Selected bits a mix of zeros and ones */
665         env->cc_op = 1;
666     }
667 }
668