xref: /qemu/target/s390x/tcg/vec_int_helper.c (revision ca61e750)
1 /*
2  * QEMU TCG support -- s390x vector integer instruction support
3  *
4  * Copyright (C) 2019 Red Hat Inc
5  *
6  * Authors:
7  *   David Hildenbrand <david@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2 or later.
10  * See the COPYING file in the top-level directory.
11  */
12 #include "qemu/osdep.h"
13 #include "cpu.h"
14 #include "vec.h"
15 #include "exec/helper-proto.h"
16 #include "tcg/tcg-gvec-desc.h"
17 
18 static bool s390_vec_is_zero(const S390Vector *v)
19 {
20     return !v->doubleword[0] && !v->doubleword[1];
21 }
22 
23 static void s390_vec_xor(S390Vector *res, const S390Vector *a,
24                          const S390Vector *b)
25 {
26     res->doubleword[0] = a->doubleword[0] ^ b->doubleword[0];
27     res->doubleword[1] = a->doubleword[1] ^ b->doubleword[1];
28 }
29 
30 static void s390_vec_and(S390Vector *res, const S390Vector *a,
31                          const S390Vector *b)
32 {
33     res->doubleword[0] = a->doubleword[0] & b->doubleword[0];
34     res->doubleword[1] = a->doubleword[1] & b->doubleword[1];
35 }
36 
37 static bool s390_vec_equal(const S390Vector *a, const S390Vector *b)
38 {
39     return a->doubleword[0] == b->doubleword[0] &&
40            a->doubleword[1] == b->doubleword[1];
41 }
42 
43 static void s390_vec_shl(S390Vector *d, const S390Vector *a, uint64_t count)
44 {
45     uint64_t tmp;
46 
47     g_assert(count < 128);
48     if (count == 0) {
49         d->doubleword[0] = a->doubleword[0];
50         d->doubleword[1] = a->doubleword[1];
51     } else if (count == 64) {
52         d->doubleword[0] = a->doubleword[1];
53         d->doubleword[1] = 0;
54     } else if (count < 64) {
55         tmp = extract64(a->doubleword[1], 64 - count, count);
56         d->doubleword[1] = a->doubleword[1] << count;
57         d->doubleword[0] = (a->doubleword[0] << count) | tmp;
58     } else {
59         d->doubleword[0] = a->doubleword[1] << (count - 64);
60         d->doubleword[1] = 0;
61     }
62 }
63 
64 static void s390_vec_sar(S390Vector *d, const S390Vector *a, uint64_t count)
65 {
66     uint64_t tmp;
67 
68     if (count == 0) {
69         d->doubleword[0] = a->doubleword[0];
70         d->doubleword[1] = a->doubleword[1];
71     } else if (count == 64) {
72         tmp = (int64_t)a->doubleword[0] >> 63;
73         d->doubleword[1] = a->doubleword[0];
74         d->doubleword[0] = tmp;
75     } else if (count < 64) {
76         tmp = a->doubleword[1] >> count;
77         d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]);
78         d->doubleword[0] = (int64_t)a->doubleword[0] >> count;
79     } else {
80         tmp = (int64_t)a->doubleword[0] >> 63;
81         d->doubleword[1] = (int64_t)a->doubleword[0] >> (count - 64);
82         d->doubleword[0] = tmp;
83     }
84 }
85 
86 static void s390_vec_shr(S390Vector *d, const S390Vector *a, uint64_t count)
87 {
88     uint64_t tmp;
89 
90     g_assert(count < 128);
91     if (count == 0) {
92         d->doubleword[0] = a->doubleword[0];
93         d->doubleword[1] = a->doubleword[1];
94     } else if (count == 64) {
95         d->doubleword[1] = a->doubleword[0];
96         d->doubleword[0] = 0;
97     } else if (count < 64) {
98         tmp = a->doubleword[1] >> count;
99         d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]);
100         d->doubleword[0] = a->doubleword[0] >> count;
101     } else {
102         d->doubleword[1] = a->doubleword[0] >> (count - 64);
103         d->doubleword[0] = 0;
104     }
105 }
106 #define DEF_VAVG(BITS)                                                         \
107 void HELPER(gvec_vavg##BITS)(void *v1, const void *v2, const void *v3,         \
108                              uint32_t desc)                                    \
109 {                                                                              \
110     int i;                                                                     \
111                                                                                \
112     for (i = 0; i < (128 / BITS); i++) {                                       \
113         const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i);   \
114         const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i);   \
115                                                                                \
116         s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1);                 \
117     }                                                                          \
118 }
119 DEF_VAVG(8)
120 DEF_VAVG(16)
121 
122 #define DEF_VAVGL(BITS)                                                        \
123 void HELPER(gvec_vavgl##BITS)(void *v1, const void *v2, const void *v3,        \
124                               uint32_t desc)                                   \
125 {                                                                              \
126     int i;                                                                     \
127                                                                                \
128     for (i = 0; i < (128 / BITS); i++) {                                       \
129         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
130         const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
131                                                                                \
132         s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1);                 \
133     }                                                                          \
134 }
135 DEF_VAVGL(8)
136 DEF_VAVGL(16)
137 
138 #define DEF_VCLZ(BITS)                                                         \
139 void HELPER(gvec_vclz##BITS)(void *v1, const void *v2, uint32_t desc)          \
140 {                                                                              \
141     int i;                                                                     \
142                                                                                \
143     for (i = 0; i < (128 / BITS); i++) {                                       \
144         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
145                                                                                \
146         s390_vec_write_element##BITS(v1, i, clz32(a) - 32 + BITS);             \
147     }                                                                          \
148 }
149 DEF_VCLZ(8)
150 DEF_VCLZ(16)
151 
152 #define DEF_VCTZ(BITS)                                                         \
153 void HELPER(gvec_vctz##BITS)(void *v1, const void *v2, uint32_t desc)          \
154 {                                                                              \
155     int i;                                                                     \
156                                                                                \
157     for (i = 0; i < (128 / BITS); i++) {                                       \
158         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
159                                                                                \
160         s390_vec_write_element##BITS(v1, i, a ? ctz32(a) : BITS);              \
161     }                                                                          \
162 }
163 DEF_VCTZ(8)
164 DEF_VCTZ(16)
165 
166 /* like binary multiplication, but XOR instead of addition */
167 #define DEF_GALOIS_MULTIPLY(BITS, TBITS)                                       \
168 static uint##TBITS##_t galois_multiply##BITS(uint##TBITS##_t a,                \
169                                              uint##TBITS##_t b)                \
170 {                                                                              \
171     uint##TBITS##_t res = 0;                                                   \
172                                                                                \
173     while (b) {                                                                \
174         if (b & 0x1) {                                                         \
175             res = res ^ a;                                                     \
176         }                                                                      \
177         a = a << 1;                                                            \
178         b = b >> 1;                                                            \
179     }                                                                          \
180     return res;                                                                \
181 }
182 DEF_GALOIS_MULTIPLY(8, 16)
183 DEF_GALOIS_MULTIPLY(16, 32)
184 DEF_GALOIS_MULTIPLY(32, 64)
185 
186 static S390Vector galois_multiply64(uint64_t a, uint64_t b)
187 {
188     S390Vector res = {};
189     S390Vector va = {
190         .doubleword[1] = a,
191     };
192     S390Vector vb = {
193         .doubleword[1] = b,
194     };
195 
196     while (!s390_vec_is_zero(&vb)) {
197         if (vb.doubleword[1] & 0x1) {
198             s390_vec_xor(&res, &res, &va);
199         }
200         s390_vec_shl(&va, &va, 1);
201         s390_vec_shr(&vb, &vb, 1);
202     }
203     return res;
204 }
205 
206 #define DEF_VGFM(BITS, TBITS)                                                  \
207 void HELPER(gvec_vgfm##BITS)(void *v1, const void *v2, const void *v3,         \
208                              uint32_t desc)                                    \
209 {                                                                              \
210     int i;                                                                     \
211                                                                                \
212     for (i = 0; i < (128 / TBITS); i++) {                                      \
213         uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2);             \
214         uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2);             \
215         uint##TBITS##_t d = galois_multiply##BITS(a, b);                       \
216                                                                                \
217         a = s390_vec_read_element##BITS(v2, i * 2 + 1);                        \
218         b = s390_vec_read_element##BITS(v3, i * 2 + 1);                        \
219         d = d ^ galois_multiply32(a, b);                                       \
220         s390_vec_write_element##TBITS(v1, i, d);                               \
221     }                                                                          \
222 }
223 DEF_VGFM(8, 16)
224 DEF_VGFM(16, 32)
225 DEF_VGFM(32, 64)
226 
227 void HELPER(gvec_vgfm64)(void *v1, const void *v2, const void *v3,
228                          uint32_t desc)
229 {
230     S390Vector tmp1, tmp2;
231     uint64_t a, b;
232 
233     a = s390_vec_read_element64(v2, 0);
234     b = s390_vec_read_element64(v3, 0);
235     tmp1 = galois_multiply64(a, b);
236     a = s390_vec_read_element64(v2, 1);
237     b = s390_vec_read_element64(v3, 1);
238     tmp2 = galois_multiply64(a, b);
239     s390_vec_xor(v1, &tmp1, &tmp2);
240 }
241 
242 #define DEF_VGFMA(BITS, TBITS)                                                 \
243 void HELPER(gvec_vgfma##BITS)(void *v1, const void *v2, const void *v3,        \
244                               const void *v4, uint32_t desc)                   \
245 {                                                                              \
246     int i;                                                                     \
247                                                                                \
248     for (i = 0; i < (128 / TBITS); i++) {                                      \
249         uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2);             \
250         uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2);             \
251         uint##TBITS##_t d = galois_multiply##BITS(a, b);                       \
252                                                                                \
253         a = s390_vec_read_element##BITS(v2, i * 2 + 1);                        \
254         b = s390_vec_read_element##BITS(v3, i * 2 + 1);                        \
255         d = d ^ galois_multiply32(a, b);                                       \
256         d = d ^ s390_vec_read_element##TBITS(v4, i);                           \
257         s390_vec_write_element##TBITS(v1, i, d);                               \
258     }                                                                          \
259 }
260 DEF_VGFMA(8, 16)
261 DEF_VGFMA(16, 32)
262 DEF_VGFMA(32, 64)
263 
264 void HELPER(gvec_vgfma64)(void *v1, const void *v2, const void *v3,
265                           const void *v4, uint32_t desc)
266 {
267     S390Vector tmp1, tmp2;
268     uint64_t a, b;
269 
270     a = s390_vec_read_element64(v2, 0);
271     b = s390_vec_read_element64(v3, 0);
272     tmp1 = galois_multiply64(a, b);
273     a = s390_vec_read_element64(v2, 1);
274     b = s390_vec_read_element64(v3, 1);
275     tmp2 = galois_multiply64(a, b);
276     s390_vec_xor(&tmp1, &tmp1, &tmp2);
277     s390_vec_xor(v1, &tmp1, v4);
278 }
279 
280 #define DEF_VMAL(BITS)                                                         \
281 void HELPER(gvec_vmal##BITS)(void *v1, const void *v2, const void *v3,         \
282                              const void *v4, uint32_t desc)                    \
283 {                                                                              \
284     int i;                                                                     \
285                                                                                \
286     for (i = 0; i < (128 / BITS); i++) {                                       \
287         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
288         const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
289         const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i);           \
290                                                                                \
291         s390_vec_write_element##BITS(v1, i, a * b + c);                        \
292     }                                                                          \
293 }
294 DEF_VMAL(8)
295 DEF_VMAL(16)
296 
297 #define DEF_VMAH(BITS)                                                         \
298 void HELPER(gvec_vmah##BITS)(void *v1, const void *v2, const void *v3,         \
299                              const void *v4, uint32_t desc)                    \
300 {                                                                              \
301     int i;                                                                     \
302                                                                                \
303     for (i = 0; i < (128 / BITS); i++) {                                       \
304         const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i);   \
305         const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i);   \
306         const int32_t c = (int##BITS##_t)s390_vec_read_element##BITS(v4, i);   \
307                                                                                \
308         s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS);              \
309     }                                                                          \
310 }
311 DEF_VMAH(8)
312 DEF_VMAH(16)
313 
314 #define DEF_VMALH(BITS)                                                        \
315 void HELPER(gvec_vmalh##BITS)(void *v1, const void *v2, const void *v3,        \
316                               const void *v4, uint32_t desc)                   \
317 {                                                                              \
318     int i;                                                                     \
319                                                                                \
320     for (i = 0; i < (128 / BITS); i++) {                                       \
321         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
322         const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
323         const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i);           \
324                                                                                \
325         s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS);              \
326     }                                                                          \
327 }
328 DEF_VMALH(8)
329 DEF_VMALH(16)
330 
331 #define DEF_VMAE(BITS, TBITS)                                                  \
332 void HELPER(gvec_vmae##BITS)(void *v1, const void *v2, const void *v3,         \
333                              const void *v4, uint32_t desc)                    \
334 {                                                                              \
335     int i, j;                                                                  \
336                                                                                \
337     for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) {                       \
338         int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j);  \
339         int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j);  \
340         int##TBITS##_t c = s390_vec_read_element##TBITS(v4, i);                \
341                                                                                \
342         s390_vec_write_element##TBITS(v1, i, a * b + c);                       \
343     }                                                                          \
344 }
345 DEF_VMAE(8, 16)
346 DEF_VMAE(16, 32)
347 DEF_VMAE(32, 64)
348 
349 #define DEF_VMALE(BITS, TBITS)                                                 \
350 void HELPER(gvec_vmale##BITS)(void *v1, const void *v2, const void *v3,        \
351                               const void *v4, uint32_t desc)                   \
352 {                                                                              \
353     int i, j;                                                                  \
354                                                                                \
355     for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) {                       \
356         uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j);                \
357         uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j);                \
358         uint##TBITS##_t c = s390_vec_read_element##TBITS(v4, i);               \
359                                                                                \
360         s390_vec_write_element##TBITS(v1, i, a * b + c);                       \
361     }                                                                          \
362 }
363 DEF_VMALE(8, 16)
364 DEF_VMALE(16, 32)
365 DEF_VMALE(32, 64)
366 
367 #define DEF_VMAO(BITS, TBITS)                                                  \
368 void HELPER(gvec_vmao##BITS)(void *v1, const void *v2, const void *v3,         \
369                              const void *v4, uint32_t desc)                    \
370 {                                                                              \
371     int i, j;                                                                  \
372                                                                                \
373     for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) {                       \
374         int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j);  \
375         int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j);  \
376         int##TBITS##_t c = s390_vec_read_element##TBITS(v4, i);                \
377                                                                                \
378         s390_vec_write_element##TBITS(v1, i, a * b + c);                       \
379     }                                                                          \
380 }
381 DEF_VMAO(8, 16)
382 DEF_VMAO(16, 32)
383 DEF_VMAO(32, 64)
384 
385 #define DEF_VMALO(BITS, TBITS)                                                 \
386 void HELPER(gvec_vmalo##BITS)(void *v1, const void *v2, const void *v3,        \
387                               const void *v4, uint32_t desc)                   \
388 {                                                                              \
389     int i, j;                                                                  \
390                                                                                \
391     for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) {                       \
392         uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j);                \
393         uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j);                \
394         uint##TBITS##_t c = s390_vec_read_element##TBITS(v4, i);               \
395                                                                                \
396         s390_vec_write_element##TBITS(v1, i, a * b + c);                       \
397     }                                                                          \
398 }
399 DEF_VMALO(8, 16)
400 DEF_VMALO(16, 32)
401 DEF_VMALO(32, 64)
402 
403 #define DEF_VMH(BITS)                                                          \
404 void HELPER(gvec_vmh##BITS)(void *v1, const void *v2, const void *v3,          \
405                             uint32_t desc)                                     \
406 {                                                                              \
407     int i;                                                                     \
408                                                                                \
409     for (i = 0; i < (128 / BITS); i++) {                                       \
410         const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i);   \
411         const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i);   \
412                                                                                \
413         s390_vec_write_element##BITS(v1, i, (a * b) >> BITS);                  \
414     }                                                                          \
415 }
416 DEF_VMH(8)
417 DEF_VMH(16)
418 
419 #define DEF_VMLH(BITS)                                                         \
420 void HELPER(gvec_vmlh##BITS)(void *v1, const void *v2, const void *v3,         \
421                              uint32_t desc)                                    \
422 {                                                                              \
423     int i;                                                                     \
424                                                                                \
425     for (i = 0; i < (128 / BITS); i++) {                                       \
426         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
427         const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
428                                                                                \
429         s390_vec_write_element##BITS(v1, i, (a * b) >> BITS);                  \
430     }                                                                          \
431 }
432 DEF_VMLH(8)
433 DEF_VMLH(16)
434 
435 #define DEF_VME(BITS, TBITS)                                                   \
436 void HELPER(gvec_vme##BITS)(void *v1, const void *v2, const void *v3,          \
437                             uint32_t desc)                                     \
438 {                                                                              \
439     int i, j;                                                                  \
440                                                                                \
441     for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) {                       \
442         int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j);  \
443         int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j);  \
444                                                                                \
445         s390_vec_write_element##TBITS(v1, i, a * b);                           \
446     }                                                                          \
447 }
448 DEF_VME(8, 16)
449 DEF_VME(16, 32)
450 DEF_VME(32, 64)
451 
452 #define DEF_VMLE(BITS, TBITS)                                                  \
453 void HELPER(gvec_vmle##BITS)(void *v1, const void *v2, const void *v3,         \
454                              uint32_t desc)                                    \
455 {                                                                              \
456     int i, j;                                                                  \
457                                                                                \
458     for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) {                       \
459         const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j);          \
460         const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j);          \
461                                                                                \
462         s390_vec_write_element##TBITS(v1, i, a * b);                           \
463     }                                                                          \
464 }
465 DEF_VMLE(8, 16)
466 DEF_VMLE(16, 32)
467 DEF_VMLE(32, 64)
468 
469 #define DEF_VMO(BITS, TBITS)                                                   \
470 void HELPER(gvec_vmo##BITS)(void *v1, const void *v2, const void *v3,          \
471                             uint32_t desc)                                     \
472 {                                                                              \
473     int i, j;                                                                  \
474                                                                                \
475     for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) {                       \
476         int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j);  \
477         int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j);  \
478                                                                                \
479         s390_vec_write_element##TBITS(v1, i, a * b);                           \
480     }                                                                          \
481 }
482 DEF_VMO(8, 16)
483 DEF_VMO(16, 32)
484 DEF_VMO(32, 64)
485 
486 #define DEF_VMLO(BITS, TBITS)                                                  \
487 void HELPER(gvec_vmlo##BITS)(void *v1, const void *v2, const void *v3,         \
488                              uint32_t desc)                                    \
489 {                                                                              \
490     int i, j;                                                                  \
491                                                                                \
492     for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) {                       \
493         const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j);          \
494         const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j);          \
495                                                                                \
496         s390_vec_write_element##TBITS(v1, i, a * b);                           \
497     }                                                                          \
498 }
499 DEF_VMLO(8, 16)
500 DEF_VMLO(16, 32)
501 DEF_VMLO(32, 64)
502 
503 #define DEF_VPOPCT(BITS)                                                       \
504 void HELPER(gvec_vpopct##BITS)(void *v1, const void *v2, uint32_t desc)        \
505 {                                                                              \
506     int i;                                                                     \
507                                                                                \
508     for (i = 0; i < (128 / BITS); i++) {                                       \
509         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
510                                                                                \
511         s390_vec_write_element##BITS(v1, i, ctpop32(a));                       \
512     }                                                                          \
513 }
514 DEF_VPOPCT(8)
515 DEF_VPOPCT(16)
516 
517 #define DEF_VERIM(BITS)                                                        \
518 void HELPER(gvec_verim##BITS)(void *v1, const void *v2, const void *v3,        \
519                               uint32_t desc)                                   \
520 {                                                                              \
521     const uint8_t count = simd_data(desc);                                     \
522     int i;                                                                     \
523                                                                                \
524     for (i = 0; i < (128 / BITS); i++) {                                       \
525         const uint##BITS##_t a = s390_vec_read_element##BITS(v1, i);           \
526         const uint##BITS##_t b = s390_vec_read_element##BITS(v2, i);           \
527         const uint##BITS##_t mask = s390_vec_read_element##BITS(v3, i);        \
528         const uint##BITS##_t d = (a & ~mask) | (rol##BITS(b, count) & mask);   \
529                                                                                \
530         s390_vec_write_element##BITS(v1, i, d);                                \
531     }                                                                          \
532 }
533 DEF_VERIM(8)
534 DEF_VERIM(16)
535 
536 void HELPER(gvec_vsl)(void *v1, const void *v2, uint64_t count,
537                       uint32_t desc)
538 {
539     s390_vec_shl(v1, v2, count);
540 }
541 
542 void HELPER(gvec_vsl_ve2)(void *v1, const void *v2, const void *v3,
543                           uint32_t desc)
544 {
545     S390Vector tmp;
546     uint32_t sh, e0, e1 = 0;
547     int i;
548 
549     for (i = 15; i >= 0; --i, e1 = e0) {
550         e0 = s390_vec_read_element8(v2, i);
551         sh = s390_vec_read_element8(v3, i) & 7;
552 
553         s390_vec_write_element8(&tmp, i, rol32(e0 | (e1 << 24), sh));
554     }
555 
556     *(S390Vector *)v1 = tmp;
557 }
558 
559 void HELPER(gvec_vsra)(void *v1, const void *v2, uint64_t count,
560                        uint32_t desc)
561 {
562     s390_vec_sar(v1, v2, count);
563 }
564 
565 void HELPER(gvec_vsra_ve2)(void *v1, const void *v2, const void *v3,
566                            uint32_t desc)
567 {
568     S390Vector tmp;
569     uint32_t sh, e0, e1 = 0;
570     int i = 0;
571 
572     /* Byte 0 is special only. */
573     e0 = (int32_t)(int8_t)s390_vec_read_element8(v2, i);
574     sh = s390_vec_read_element8(v3, i) & 7;
575     s390_vec_write_element8(&tmp, i, e0 >> sh);
576 
577     e1 = e0;
578     for (i = 1; i < 16; ++i, e1 = e0) {
579         e0 = s390_vec_read_element8(v2, i);
580         sh = s390_vec_read_element8(v3, i) & 7;
581         s390_vec_write_element8(&tmp, i, (e0 | e1 << 8) >> sh);
582     }
583 
584     *(S390Vector *)v1 = tmp;
585 }
586 
587 void HELPER(gvec_vsrl)(void *v1, const void *v2, uint64_t count,
588                        uint32_t desc)
589 {
590     s390_vec_shr(v1, v2, count);
591 }
592 
593 void HELPER(gvec_vsrl_ve2)(void *v1, const void *v2, const void *v3,
594                            uint32_t desc)
595 {
596     S390Vector tmp;
597     uint32_t sh, e0, e1 = 0;
598 
599     for (int i = 0; i < 16; ++i, e1 = e0) {
600         e0 = s390_vec_read_element8(v2, i);
601         sh = s390_vec_read_element8(v3, i) & 7;
602 
603         s390_vec_write_element8(&tmp, i, (e0 | (e1 << 8)) >> sh);
604     }
605 
606     *(S390Vector *)v1 = tmp;
607 }
608 
609 #define DEF_VSCBI(BITS)                                                        \
610 void HELPER(gvec_vscbi##BITS)(void *v1, const void *v2, const void *v3,        \
611                               uint32_t desc)                                   \
612 {                                                                              \
613     int i;                                                                     \
614                                                                                \
615     for (i = 0; i < (128 / BITS); i++) {                                       \
616         const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
617         const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
618                                                                                \
619         s390_vec_write_element##BITS(v1, i, a >= b);                           \
620     }                                                                          \
621 }
622 DEF_VSCBI(8)
623 DEF_VSCBI(16)
624 
625 void HELPER(gvec_vtm)(void *v1, const void *v2, CPUS390XState *env,
626                       uint32_t desc)
627 {
628     S390Vector tmp;
629 
630     s390_vec_and(&tmp, v1, v2);
631     if (s390_vec_is_zero(&tmp)) {
632         /* Selected bits all zeros; or all mask bits zero */
633         env->cc_op = 0;
634     } else if (s390_vec_equal(&tmp, v2)) {
635         /* Selected bits all ones */
636         env->cc_op = 3;
637     } else {
638         /* Selected bits a mix of zeros and ones */
639         env->cc_op = 1;
640     }
641 }
642