1 /* 2 * QEMU TCG support -- s390x vector integer instruction support 3 * 4 * Copyright (C) 2019 Red Hat Inc 5 * 6 * Authors: 7 * David Hildenbrand <david@redhat.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or later. 10 * See the COPYING file in the top-level directory. 11 */ 12 #include "qemu/osdep.h" 13 #include "cpu.h" 14 #include "vec.h" 15 #include "exec/helper-proto.h" 16 #include "tcg/tcg-gvec-desc.h" 17 #include "crypto/clmul.h" 18 19 static bool s390_vec_is_zero(const S390Vector *v) 20 { 21 return !v->doubleword[0] && !v->doubleword[1]; 22 } 23 24 static void s390_vec_xor(S390Vector *res, const S390Vector *a, 25 const S390Vector *b) 26 { 27 res->doubleword[0] = a->doubleword[0] ^ b->doubleword[0]; 28 res->doubleword[1] = a->doubleword[1] ^ b->doubleword[1]; 29 } 30 31 static void s390_vec_and(S390Vector *res, const S390Vector *a, 32 const S390Vector *b) 33 { 34 res->doubleword[0] = a->doubleword[0] & b->doubleword[0]; 35 res->doubleword[1] = a->doubleword[1] & b->doubleword[1]; 36 } 37 38 static bool s390_vec_equal(const S390Vector *a, const S390Vector *b) 39 { 40 return a->doubleword[0] == b->doubleword[0] && 41 a->doubleword[1] == b->doubleword[1]; 42 } 43 44 static void s390_vec_shl(S390Vector *d, const S390Vector *a, uint64_t count) 45 { 46 uint64_t tmp; 47 48 g_assert(count < 128); 49 if (count == 0) { 50 d->doubleword[0] = a->doubleword[0]; 51 d->doubleword[1] = a->doubleword[1]; 52 } else if (count == 64) { 53 d->doubleword[0] = a->doubleword[1]; 54 d->doubleword[1] = 0; 55 } else if (count < 64) { 56 tmp = extract64(a->doubleword[1], 64 - count, count); 57 d->doubleword[1] = a->doubleword[1] << count; 58 d->doubleword[0] = (a->doubleword[0] << count) | tmp; 59 } else { 60 d->doubleword[0] = a->doubleword[1] << (count - 64); 61 d->doubleword[1] = 0; 62 } 63 } 64 65 static void s390_vec_sar(S390Vector *d, const S390Vector *a, uint64_t count) 66 { 67 uint64_t tmp; 68 69 if (count == 0) { 70 d->doubleword[0] = a->doubleword[0]; 71 d->doubleword[1] = a->doubleword[1]; 72 } else if (count == 64) { 73 tmp = (int64_t)a->doubleword[0] >> 63; 74 d->doubleword[1] = a->doubleword[0]; 75 d->doubleword[0] = tmp; 76 } else if (count < 64) { 77 tmp = a->doubleword[1] >> count; 78 d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]); 79 d->doubleword[0] = (int64_t)a->doubleword[0] >> count; 80 } else { 81 tmp = (int64_t)a->doubleword[0] >> 63; 82 d->doubleword[1] = (int64_t)a->doubleword[0] >> (count - 64); 83 d->doubleword[0] = tmp; 84 } 85 } 86 87 static void s390_vec_shr(S390Vector *d, const S390Vector *a, uint64_t count) 88 { 89 uint64_t tmp; 90 91 g_assert(count < 128); 92 if (count == 0) { 93 d->doubleword[0] = a->doubleword[0]; 94 d->doubleword[1] = a->doubleword[1]; 95 } else if (count == 64) { 96 d->doubleword[1] = a->doubleword[0]; 97 d->doubleword[0] = 0; 98 } else if (count < 64) { 99 tmp = a->doubleword[1] >> count; 100 d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]); 101 d->doubleword[0] = a->doubleword[0] >> count; 102 } else { 103 d->doubleword[1] = a->doubleword[0] >> (count - 64); 104 d->doubleword[0] = 0; 105 } 106 } 107 #define DEF_VAVG(BITS) \ 108 void HELPER(gvec_vavg##BITS)(void *v1, const void *v2, const void *v3, \ 109 uint32_t desc) \ 110 { \ 111 int i; \ 112 \ 113 for (i = 0; i < (128 / BITS); i++) { \ 114 const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \ 115 const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \ 116 \ 117 s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1); \ 118 } \ 119 } 120 DEF_VAVG(8) 121 DEF_VAVG(16) 122 123 #define DEF_VAVGL(BITS) \ 124 void HELPER(gvec_vavgl##BITS)(void *v1, const void *v2, const void *v3, \ 125 uint32_t desc) \ 126 { \ 127 int i; \ 128 \ 129 for (i = 0; i < (128 / BITS); i++) { \ 130 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 131 const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 132 \ 133 s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1); \ 134 } \ 135 } 136 DEF_VAVGL(8) 137 DEF_VAVGL(16) 138 139 #define DEF_VCLZ(BITS) \ 140 void HELPER(gvec_vclz##BITS)(void *v1, const void *v2, uint32_t desc) \ 141 { \ 142 int i; \ 143 \ 144 for (i = 0; i < (128 / BITS); i++) { \ 145 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 146 \ 147 s390_vec_write_element##BITS(v1, i, clz32(a) - 32 + BITS); \ 148 } \ 149 } 150 DEF_VCLZ(8) 151 DEF_VCLZ(16) 152 153 #define DEF_VCTZ(BITS) \ 154 void HELPER(gvec_vctz##BITS)(void *v1, const void *v2, uint32_t desc) \ 155 { \ 156 int i; \ 157 \ 158 for (i = 0; i < (128 / BITS); i++) { \ 159 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 160 \ 161 s390_vec_write_element##BITS(v1, i, a ? ctz32(a) : BITS); \ 162 } \ 163 } 164 DEF_VCTZ(8) 165 DEF_VCTZ(16) 166 167 /* like binary multiplication, but XOR instead of addition */ 168 #define DEF_GALOIS_MULTIPLY(BITS, TBITS) \ 169 static uint##TBITS##_t galois_multiply##BITS(uint##TBITS##_t a, \ 170 uint##TBITS##_t b) \ 171 { \ 172 uint##TBITS##_t res = 0; \ 173 \ 174 while (b) { \ 175 if (b & 0x1) { \ 176 res = res ^ a; \ 177 } \ 178 a = a << 1; \ 179 b = b >> 1; \ 180 } \ 181 return res; \ 182 } 183 DEF_GALOIS_MULTIPLY(16, 32) 184 DEF_GALOIS_MULTIPLY(32, 64) 185 186 static S390Vector galois_multiply64(uint64_t a, uint64_t b) 187 { 188 S390Vector res = {}; 189 S390Vector va = { 190 .doubleword[1] = a, 191 }; 192 S390Vector vb = { 193 .doubleword[1] = b, 194 }; 195 196 while (!s390_vec_is_zero(&vb)) { 197 if (vb.doubleword[1] & 0x1) { 198 s390_vec_xor(&res, &res, &va); 199 } 200 s390_vec_shl(&va, &va, 1); 201 s390_vec_shr(&vb, &vb, 1); 202 } 203 return res; 204 } 205 206 /* 207 * There is no carry across the two doublewords, so their order does 208 * not matter. Nor is there partial overlap between registers. 209 */ 210 static inline uint64_t do_gfma8(uint64_t n, uint64_t m, uint64_t a) 211 { 212 return clmul_8x4_even(n, m) ^ clmul_8x4_odd(n, m) ^ a; 213 } 214 215 void HELPER(gvec_vgfm8)(void *v1, const void *v2, const void *v3, uint32_t d) 216 { 217 uint64_t *q1 = v1; 218 const uint64_t *q2 = v2, *q3 = v3; 219 220 q1[0] = do_gfma8(q2[0], q3[0], 0); 221 q1[1] = do_gfma8(q2[1], q3[1], 0); 222 } 223 224 void HELPER(gvec_vgfma8)(void *v1, const void *v2, const void *v3, 225 const void *v4, uint32_t desc) 226 { 227 uint64_t *q1 = v1; 228 const uint64_t *q2 = v2, *q3 = v3, *q4 = v4; 229 230 q1[0] = do_gfma8(q2[0], q3[0], q4[0]); 231 q1[1] = do_gfma8(q2[1], q3[1], q4[1]); 232 } 233 234 #define DEF_VGFM(BITS, TBITS) \ 235 void HELPER(gvec_vgfm##BITS)(void *v1, const void *v2, const void *v3, \ 236 uint32_t desc) \ 237 { \ 238 int i; \ 239 \ 240 for (i = 0; i < (128 / TBITS); i++) { \ 241 uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2); \ 242 uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2); \ 243 uint##TBITS##_t d = galois_multiply##BITS(a, b); \ 244 \ 245 a = s390_vec_read_element##BITS(v2, i * 2 + 1); \ 246 b = s390_vec_read_element##BITS(v3, i * 2 + 1); \ 247 d = d ^ galois_multiply32(a, b); \ 248 s390_vec_write_element##TBITS(v1, i, d); \ 249 } \ 250 } 251 DEF_VGFM(16, 32) 252 DEF_VGFM(32, 64) 253 254 void HELPER(gvec_vgfm64)(void *v1, const void *v2, const void *v3, 255 uint32_t desc) 256 { 257 S390Vector tmp1, tmp2; 258 uint64_t a, b; 259 260 a = s390_vec_read_element64(v2, 0); 261 b = s390_vec_read_element64(v3, 0); 262 tmp1 = galois_multiply64(a, b); 263 a = s390_vec_read_element64(v2, 1); 264 b = s390_vec_read_element64(v3, 1); 265 tmp2 = galois_multiply64(a, b); 266 s390_vec_xor(v1, &tmp1, &tmp2); 267 } 268 269 #define DEF_VGFMA(BITS, TBITS) \ 270 void HELPER(gvec_vgfma##BITS)(void *v1, const void *v2, const void *v3, \ 271 const void *v4, uint32_t desc) \ 272 { \ 273 int i; \ 274 \ 275 for (i = 0; i < (128 / TBITS); i++) { \ 276 uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2); \ 277 uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2); \ 278 uint##TBITS##_t d = galois_multiply##BITS(a, b); \ 279 \ 280 a = s390_vec_read_element##BITS(v2, i * 2 + 1); \ 281 b = s390_vec_read_element##BITS(v3, i * 2 + 1); \ 282 d = d ^ galois_multiply32(a, b); \ 283 d = d ^ s390_vec_read_element##TBITS(v4, i); \ 284 s390_vec_write_element##TBITS(v1, i, d); \ 285 } \ 286 } 287 DEF_VGFMA(16, 32) 288 DEF_VGFMA(32, 64) 289 290 void HELPER(gvec_vgfma64)(void *v1, const void *v2, const void *v3, 291 const void *v4, uint32_t desc) 292 { 293 S390Vector tmp1, tmp2; 294 uint64_t a, b; 295 296 a = s390_vec_read_element64(v2, 0); 297 b = s390_vec_read_element64(v3, 0); 298 tmp1 = galois_multiply64(a, b); 299 a = s390_vec_read_element64(v2, 1); 300 b = s390_vec_read_element64(v3, 1); 301 tmp2 = galois_multiply64(a, b); 302 s390_vec_xor(&tmp1, &tmp1, &tmp2); 303 s390_vec_xor(v1, &tmp1, v4); 304 } 305 306 #define DEF_VMAL(BITS) \ 307 void HELPER(gvec_vmal##BITS)(void *v1, const void *v2, const void *v3, \ 308 const void *v4, uint32_t desc) \ 309 { \ 310 int i; \ 311 \ 312 for (i = 0; i < (128 / BITS); i++) { \ 313 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 314 const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 315 const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i); \ 316 \ 317 s390_vec_write_element##BITS(v1, i, a * b + c); \ 318 } \ 319 } 320 DEF_VMAL(8) 321 DEF_VMAL(16) 322 323 #define DEF_VMAH(BITS) \ 324 void HELPER(gvec_vmah##BITS)(void *v1, const void *v2, const void *v3, \ 325 const void *v4, uint32_t desc) \ 326 { \ 327 int i; \ 328 \ 329 for (i = 0; i < (128 / BITS); i++) { \ 330 const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \ 331 const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \ 332 const int32_t c = (int##BITS##_t)s390_vec_read_element##BITS(v4, i); \ 333 \ 334 s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS); \ 335 } \ 336 } 337 DEF_VMAH(8) 338 DEF_VMAH(16) 339 340 #define DEF_VMALH(BITS) \ 341 void HELPER(gvec_vmalh##BITS)(void *v1, const void *v2, const void *v3, \ 342 const void *v4, uint32_t desc) \ 343 { \ 344 int i; \ 345 \ 346 for (i = 0; i < (128 / BITS); i++) { \ 347 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 348 const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 349 const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i); \ 350 \ 351 s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS); \ 352 } \ 353 } 354 DEF_VMALH(8) 355 DEF_VMALH(16) 356 357 #define DEF_VMAE(BITS, TBITS) \ 358 void HELPER(gvec_vmae##BITS)(void *v1, const void *v2, const void *v3, \ 359 const void *v4, uint32_t desc) \ 360 { \ 361 int i, j; \ 362 \ 363 for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \ 364 int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \ 365 int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \ 366 int##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \ 367 \ 368 s390_vec_write_element##TBITS(v1, i, a * b + c); \ 369 } \ 370 } 371 DEF_VMAE(8, 16) 372 DEF_VMAE(16, 32) 373 DEF_VMAE(32, 64) 374 375 #define DEF_VMALE(BITS, TBITS) \ 376 void HELPER(gvec_vmale##BITS)(void *v1, const void *v2, const void *v3, \ 377 const void *v4, uint32_t desc) \ 378 { \ 379 int i, j; \ 380 \ 381 for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \ 382 uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \ 383 uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \ 384 uint##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \ 385 \ 386 s390_vec_write_element##TBITS(v1, i, a * b + c); \ 387 } \ 388 } 389 DEF_VMALE(8, 16) 390 DEF_VMALE(16, 32) 391 DEF_VMALE(32, 64) 392 393 #define DEF_VMAO(BITS, TBITS) \ 394 void HELPER(gvec_vmao##BITS)(void *v1, const void *v2, const void *v3, \ 395 const void *v4, uint32_t desc) \ 396 { \ 397 int i, j; \ 398 \ 399 for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \ 400 int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \ 401 int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \ 402 int##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \ 403 \ 404 s390_vec_write_element##TBITS(v1, i, a * b + c); \ 405 } \ 406 } 407 DEF_VMAO(8, 16) 408 DEF_VMAO(16, 32) 409 DEF_VMAO(32, 64) 410 411 #define DEF_VMALO(BITS, TBITS) \ 412 void HELPER(gvec_vmalo##BITS)(void *v1, const void *v2, const void *v3, \ 413 const void *v4, uint32_t desc) \ 414 { \ 415 int i, j; \ 416 \ 417 for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \ 418 uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \ 419 uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \ 420 uint##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \ 421 \ 422 s390_vec_write_element##TBITS(v1, i, a * b + c); \ 423 } \ 424 } 425 DEF_VMALO(8, 16) 426 DEF_VMALO(16, 32) 427 DEF_VMALO(32, 64) 428 429 #define DEF_VMH(BITS) \ 430 void HELPER(gvec_vmh##BITS)(void *v1, const void *v2, const void *v3, \ 431 uint32_t desc) \ 432 { \ 433 int i; \ 434 \ 435 for (i = 0; i < (128 / BITS); i++) { \ 436 const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \ 437 const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \ 438 \ 439 s390_vec_write_element##BITS(v1, i, (a * b) >> BITS); \ 440 } \ 441 } 442 DEF_VMH(8) 443 DEF_VMH(16) 444 445 #define DEF_VMLH(BITS) \ 446 void HELPER(gvec_vmlh##BITS)(void *v1, const void *v2, const void *v3, \ 447 uint32_t desc) \ 448 { \ 449 int i; \ 450 \ 451 for (i = 0; i < (128 / BITS); i++) { \ 452 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 453 const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 454 \ 455 s390_vec_write_element##BITS(v1, i, (a * b) >> BITS); \ 456 } \ 457 } 458 DEF_VMLH(8) 459 DEF_VMLH(16) 460 461 #define DEF_VME(BITS, TBITS) \ 462 void HELPER(gvec_vme##BITS)(void *v1, const void *v2, const void *v3, \ 463 uint32_t desc) \ 464 { \ 465 int i, j; \ 466 \ 467 for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \ 468 int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \ 469 int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \ 470 \ 471 s390_vec_write_element##TBITS(v1, i, a * b); \ 472 } \ 473 } 474 DEF_VME(8, 16) 475 DEF_VME(16, 32) 476 DEF_VME(32, 64) 477 478 #define DEF_VMLE(BITS, TBITS) \ 479 void HELPER(gvec_vmle##BITS)(void *v1, const void *v2, const void *v3, \ 480 uint32_t desc) \ 481 { \ 482 int i, j; \ 483 \ 484 for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \ 485 const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \ 486 const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \ 487 \ 488 s390_vec_write_element##TBITS(v1, i, a * b); \ 489 } \ 490 } 491 DEF_VMLE(8, 16) 492 DEF_VMLE(16, 32) 493 DEF_VMLE(32, 64) 494 495 #define DEF_VMO(BITS, TBITS) \ 496 void HELPER(gvec_vmo##BITS)(void *v1, const void *v2, const void *v3, \ 497 uint32_t desc) \ 498 { \ 499 int i, j; \ 500 \ 501 for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \ 502 int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \ 503 int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \ 504 \ 505 s390_vec_write_element##TBITS(v1, i, a * b); \ 506 } \ 507 } 508 DEF_VMO(8, 16) 509 DEF_VMO(16, 32) 510 DEF_VMO(32, 64) 511 512 #define DEF_VMLO(BITS, TBITS) \ 513 void HELPER(gvec_vmlo##BITS)(void *v1, const void *v2, const void *v3, \ 514 uint32_t desc) \ 515 { \ 516 int i, j; \ 517 \ 518 for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \ 519 const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \ 520 const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \ 521 \ 522 s390_vec_write_element##TBITS(v1, i, a * b); \ 523 } \ 524 } 525 DEF_VMLO(8, 16) 526 DEF_VMLO(16, 32) 527 DEF_VMLO(32, 64) 528 529 #define DEF_VPOPCT(BITS) \ 530 void HELPER(gvec_vpopct##BITS)(void *v1, const void *v2, uint32_t desc) \ 531 { \ 532 int i; \ 533 \ 534 for (i = 0; i < (128 / BITS); i++) { \ 535 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 536 \ 537 s390_vec_write_element##BITS(v1, i, ctpop32(a)); \ 538 } \ 539 } 540 DEF_VPOPCT(8) 541 DEF_VPOPCT(16) 542 543 #define DEF_VERIM(BITS) \ 544 void HELPER(gvec_verim##BITS)(void *v1, const void *v2, const void *v3, \ 545 uint32_t desc) \ 546 { \ 547 const uint8_t count = simd_data(desc); \ 548 int i; \ 549 \ 550 for (i = 0; i < (128 / BITS); i++) { \ 551 const uint##BITS##_t a = s390_vec_read_element##BITS(v1, i); \ 552 const uint##BITS##_t b = s390_vec_read_element##BITS(v2, i); \ 553 const uint##BITS##_t mask = s390_vec_read_element##BITS(v3, i); \ 554 const uint##BITS##_t d = (a & ~mask) | (rol##BITS(b, count) & mask); \ 555 \ 556 s390_vec_write_element##BITS(v1, i, d); \ 557 } \ 558 } 559 DEF_VERIM(8) 560 DEF_VERIM(16) 561 562 void HELPER(gvec_vsl)(void *v1, const void *v2, uint64_t count, 563 uint32_t desc) 564 { 565 s390_vec_shl(v1, v2, count); 566 } 567 568 void HELPER(gvec_vsl_ve2)(void *v1, const void *v2, const void *v3, 569 uint32_t desc) 570 { 571 S390Vector tmp; 572 uint32_t sh, e0, e1 = 0; 573 int i; 574 575 for (i = 15; i >= 0; --i, e1 = e0) { 576 e0 = s390_vec_read_element8(v2, i); 577 sh = s390_vec_read_element8(v3, i) & 7; 578 579 s390_vec_write_element8(&tmp, i, rol32(e0 | (e1 << 24), sh)); 580 } 581 582 *(S390Vector *)v1 = tmp; 583 } 584 585 void HELPER(gvec_vsra)(void *v1, const void *v2, uint64_t count, 586 uint32_t desc) 587 { 588 s390_vec_sar(v1, v2, count); 589 } 590 591 void HELPER(gvec_vsra_ve2)(void *v1, const void *v2, const void *v3, 592 uint32_t desc) 593 { 594 S390Vector tmp; 595 uint32_t sh, e0, e1 = 0; 596 int i = 0; 597 598 /* Byte 0 is special only. */ 599 e0 = (int32_t)(int8_t)s390_vec_read_element8(v2, i); 600 sh = s390_vec_read_element8(v3, i) & 7; 601 s390_vec_write_element8(&tmp, i, e0 >> sh); 602 603 e1 = e0; 604 for (i = 1; i < 16; ++i, e1 = e0) { 605 e0 = s390_vec_read_element8(v2, i); 606 sh = s390_vec_read_element8(v3, i) & 7; 607 s390_vec_write_element8(&tmp, i, (e0 | e1 << 8) >> sh); 608 } 609 610 *(S390Vector *)v1 = tmp; 611 } 612 613 void HELPER(gvec_vsrl)(void *v1, const void *v2, uint64_t count, 614 uint32_t desc) 615 { 616 s390_vec_shr(v1, v2, count); 617 } 618 619 void HELPER(gvec_vsrl_ve2)(void *v1, const void *v2, const void *v3, 620 uint32_t desc) 621 { 622 S390Vector tmp; 623 uint32_t sh, e0, e1 = 0; 624 625 for (int i = 0; i < 16; ++i, e1 = e0) { 626 e0 = s390_vec_read_element8(v2, i); 627 sh = s390_vec_read_element8(v3, i) & 7; 628 629 s390_vec_write_element8(&tmp, i, (e0 | (e1 << 8)) >> sh); 630 } 631 632 *(S390Vector *)v1 = tmp; 633 } 634 635 #define DEF_VSCBI(BITS) \ 636 void HELPER(gvec_vscbi##BITS)(void *v1, const void *v2, const void *v3, \ 637 uint32_t desc) \ 638 { \ 639 int i; \ 640 \ 641 for (i = 0; i < (128 / BITS); i++) { \ 642 const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ 643 const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ 644 \ 645 s390_vec_write_element##BITS(v1, i, a >= b); \ 646 } \ 647 } 648 DEF_VSCBI(8) 649 DEF_VSCBI(16) 650 651 void HELPER(gvec_vtm)(void *v1, const void *v2, CPUS390XState *env, 652 uint32_t desc) 653 { 654 S390Vector tmp; 655 656 s390_vec_and(&tmp, v1, v2); 657 if (s390_vec_is_zero(&tmp)) { 658 /* Selected bits all zeros; or all mask bits zero */ 659 env->cc_op = 0; 660 } else if (s390_vec_equal(&tmp, v2)) { 661 /* Selected bits all ones */ 662 env->cc_op = 3; 663 } else { 664 /* Selected bits a mix of zeros and ones */ 665 env->cc_op = 1; 666 } 667 } 668