1 /*
2 * Generic vectorized operation runtime
3 *
4 * Copyright (c) 2018 Linaro
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "qemu/osdep.h"
21 #include "qemu/host-utils.h"
22 #include "cpu.h"
23 #include "exec/helper-proto-common.h"
24 #include "tcg/tcg-gvec-desc.h"
25
26
clear_high(void * d,intptr_t oprsz,uint32_t desc)27 static inline void clear_high(void *d, intptr_t oprsz, uint32_t desc)
28 {
29 intptr_t maxsz = simd_maxsz(desc);
30 intptr_t i;
31
32 if (unlikely(maxsz > oprsz)) {
33 for (i = oprsz; i < maxsz; i += sizeof(uint64_t)) {
34 *(uint64_t *)(d + i) = 0;
35 }
36 }
37 }
38
HELPER(gvec_add8)39 void HELPER(gvec_add8)(void *d, void *a, void *b, uint32_t desc)
40 {
41 intptr_t oprsz = simd_oprsz(desc);
42 intptr_t i;
43
44 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
45 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + *(uint8_t *)(b + i);
46 }
47 clear_high(d, oprsz, desc);
48 }
49
HELPER(gvec_add16)50 void HELPER(gvec_add16)(void *d, void *a, void *b, uint32_t desc)
51 {
52 intptr_t oprsz = simd_oprsz(desc);
53 intptr_t i;
54
55 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
56 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + *(uint16_t *)(b + i);
57 }
58 clear_high(d, oprsz, desc);
59 }
60
HELPER(gvec_add32)61 void HELPER(gvec_add32)(void *d, void *a, void *b, uint32_t desc)
62 {
63 intptr_t oprsz = simd_oprsz(desc);
64 intptr_t i;
65
66 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
67 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + *(uint32_t *)(b + i);
68 }
69 clear_high(d, oprsz, desc);
70 }
71
HELPER(gvec_add64)72 void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc)
73 {
74 intptr_t oprsz = simd_oprsz(desc);
75 intptr_t i;
76
77 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
78 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + *(uint64_t *)(b + i);
79 }
80 clear_high(d, oprsz, desc);
81 }
82
HELPER(gvec_adds8)83 void HELPER(gvec_adds8)(void *d, void *a, uint64_t b, uint32_t desc)
84 {
85 intptr_t oprsz = simd_oprsz(desc);
86 intptr_t i;
87
88 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
89 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + (uint8_t)b;
90 }
91 clear_high(d, oprsz, desc);
92 }
93
HELPER(gvec_adds16)94 void HELPER(gvec_adds16)(void *d, void *a, uint64_t b, uint32_t desc)
95 {
96 intptr_t oprsz = simd_oprsz(desc);
97 intptr_t i;
98
99 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
100 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + (uint16_t)b;
101 }
102 clear_high(d, oprsz, desc);
103 }
104
HELPER(gvec_adds32)105 void HELPER(gvec_adds32)(void *d, void *a, uint64_t b, uint32_t desc)
106 {
107 intptr_t oprsz = simd_oprsz(desc);
108 intptr_t i;
109
110 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
111 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + (uint32_t)b;
112 }
113 clear_high(d, oprsz, desc);
114 }
115
HELPER(gvec_adds64)116 void HELPER(gvec_adds64)(void *d, void *a, uint64_t b, uint32_t desc)
117 {
118 intptr_t oprsz = simd_oprsz(desc);
119 intptr_t i;
120
121 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
122 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + b;
123 }
124 clear_high(d, oprsz, desc);
125 }
126
HELPER(gvec_sub8)127 void HELPER(gvec_sub8)(void *d, void *a, void *b, uint32_t desc)
128 {
129 intptr_t oprsz = simd_oprsz(desc);
130 intptr_t i;
131
132 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
133 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - *(uint8_t *)(b + i);
134 }
135 clear_high(d, oprsz, desc);
136 }
137
HELPER(gvec_sub16)138 void HELPER(gvec_sub16)(void *d, void *a, void *b, uint32_t desc)
139 {
140 intptr_t oprsz = simd_oprsz(desc);
141 intptr_t i;
142
143 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
144 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - *(uint16_t *)(b + i);
145 }
146 clear_high(d, oprsz, desc);
147 }
148
HELPER(gvec_sub32)149 void HELPER(gvec_sub32)(void *d, void *a, void *b, uint32_t desc)
150 {
151 intptr_t oprsz = simd_oprsz(desc);
152 intptr_t i;
153
154 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
155 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - *(uint32_t *)(b + i);
156 }
157 clear_high(d, oprsz, desc);
158 }
159
HELPER(gvec_sub64)160 void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc)
161 {
162 intptr_t oprsz = simd_oprsz(desc);
163 intptr_t i;
164
165 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
166 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - *(uint64_t *)(b + i);
167 }
168 clear_high(d, oprsz, desc);
169 }
170
HELPER(gvec_subs8)171 void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc)
172 {
173 intptr_t oprsz = simd_oprsz(desc);
174 intptr_t i;
175
176 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
177 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - (uint8_t)b;
178 }
179 clear_high(d, oprsz, desc);
180 }
181
HELPER(gvec_subs16)182 void HELPER(gvec_subs16)(void *d, void *a, uint64_t b, uint32_t desc)
183 {
184 intptr_t oprsz = simd_oprsz(desc);
185 intptr_t i;
186
187 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
188 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - (uint16_t)b;
189 }
190 clear_high(d, oprsz, desc);
191 }
192
HELPER(gvec_subs32)193 void HELPER(gvec_subs32)(void *d, void *a, uint64_t b, uint32_t desc)
194 {
195 intptr_t oprsz = simd_oprsz(desc);
196 intptr_t i;
197
198 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
199 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - (uint32_t)b;
200 }
201 clear_high(d, oprsz, desc);
202 }
203
HELPER(gvec_subs64)204 void HELPER(gvec_subs64)(void *d, void *a, uint64_t b, uint32_t desc)
205 {
206 intptr_t oprsz = simd_oprsz(desc);
207 intptr_t i;
208
209 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
210 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - b;
211 }
212 clear_high(d, oprsz, desc);
213 }
214
HELPER(gvec_mul8)215 void HELPER(gvec_mul8)(void *d, void *a, void *b, uint32_t desc)
216 {
217 intptr_t oprsz = simd_oprsz(desc);
218 intptr_t i;
219
220 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
221 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * *(uint8_t *)(b + i);
222 }
223 clear_high(d, oprsz, desc);
224 }
225
HELPER(gvec_mul16)226 void HELPER(gvec_mul16)(void *d, void *a, void *b, uint32_t desc)
227 {
228 intptr_t oprsz = simd_oprsz(desc);
229 intptr_t i;
230
231 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
232 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * *(uint16_t *)(b + i);
233 }
234 clear_high(d, oprsz, desc);
235 }
236
HELPER(gvec_mul32)237 void HELPER(gvec_mul32)(void *d, void *a, void *b, uint32_t desc)
238 {
239 intptr_t oprsz = simd_oprsz(desc);
240 intptr_t i;
241
242 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
243 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * *(uint32_t *)(b + i);
244 }
245 clear_high(d, oprsz, desc);
246 }
247
HELPER(gvec_mul64)248 void HELPER(gvec_mul64)(void *d, void *a, void *b, uint32_t desc)
249 {
250 intptr_t oprsz = simd_oprsz(desc);
251 intptr_t i;
252
253 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
254 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * *(uint64_t *)(b + i);
255 }
256 clear_high(d, oprsz, desc);
257 }
258
HELPER(gvec_muls8)259 void HELPER(gvec_muls8)(void *d, void *a, uint64_t b, uint32_t desc)
260 {
261 intptr_t oprsz = simd_oprsz(desc);
262 intptr_t i;
263
264 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
265 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * (uint8_t)b;
266 }
267 clear_high(d, oprsz, desc);
268 }
269
HELPER(gvec_muls16)270 void HELPER(gvec_muls16)(void *d, void *a, uint64_t b, uint32_t desc)
271 {
272 intptr_t oprsz = simd_oprsz(desc);
273 intptr_t i;
274
275 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
276 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * (uint16_t)b;
277 }
278 clear_high(d, oprsz, desc);
279 }
280
HELPER(gvec_muls32)281 void HELPER(gvec_muls32)(void *d, void *a, uint64_t b, uint32_t desc)
282 {
283 intptr_t oprsz = simd_oprsz(desc);
284 intptr_t i;
285
286 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
287 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * (uint32_t)b;
288 }
289 clear_high(d, oprsz, desc);
290 }
291
HELPER(gvec_muls64)292 void HELPER(gvec_muls64)(void *d, void *a, uint64_t b, uint32_t desc)
293 {
294 intptr_t oprsz = simd_oprsz(desc);
295 intptr_t i;
296
297 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
298 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * b;
299 }
300 clear_high(d, oprsz, desc);
301 }
302
HELPER(gvec_neg8)303 void HELPER(gvec_neg8)(void *d, void *a, uint32_t desc)
304 {
305 intptr_t oprsz = simd_oprsz(desc);
306 intptr_t i;
307
308 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
309 *(uint8_t *)(d + i) = -*(uint8_t *)(a + i);
310 }
311 clear_high(d, oprsz, desc);
312 }
313
HELPER(gvec_neg16)314 void HELPER(gvec_neg16)(void *d, void *a, uint32_t desc)
315 {
316 intptr_t oprsz = simd_oprsz(desc);
317 intptr_t i;
318
319 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
320 *(uint16_t *)(d + i) = -*(uint16_t *)(a + i);
321 }
322 clear_high(d, oprsz, desc);
323 }
324
HELPER(gvec_neg32)325 void HELPER(gvec_neg32)(void *d, void *a, uint32_t desc)
326 {
327 intptr_t oprsz = simd_oprsz(desc);
328 intptr_t i;
329
330 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
331 *(uint32_t *)(d + i) = -*(uint32_t *)(a + i);
332 }
333 clear_high(d, oprsz, desc);
334 }
335
HELPER(gvec_neg64)336 void HELPER(gvec_neg64)(void *d, void *a, uint32_t desc)
337 {
338 intptr_t oprsz = simd_oprsz(desc);
339 intptr_t i;
340
341 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
342 *(uint64_t *)(d + i) = -*(uint64_t *)(a + i);
343 }
344 clear_high(d, oprsz, desc);
345 }
346
HELPER(gvec_abs8)347 void HELPER(gvec_abs8)(void *d, void *a, uint32_t desc)
348 {
349 intptr_t oprsz = simd_oprsz(desc);
350 intptr_t i;
351
352 for (i = 0; i < oprsz; i += sizeof(int8_t)) {
353 int8_t aa = *(int8_t *)(a + i);
354 *(int8_t *)(d + i) = aa < 0 ? -aa : aa;
355 }
356 clear_high(d, oprsz, desc);
357 }
358
HELPER(gvec_abs16)359 void HELPER(gvec_abs16)(void *d, void *a, uint32_t desc)
360 {
361 intptr_t oprsz = simd_oprsz(desc);
362 intptr_t i;
363
364 for (i = 0; i < oprsz; i += sizeof(int16_t)) {
365 int16_t aa = *(int16_t *)(a + i);
366 *(int16_t *)(d + i) = aa < 0 ? -aa : aa;
367 }
368 clear_high(d, oprsz, desc);
369 }
370
HELPER(gvec_abs32)371 void HELPER(gvec_abs32)(void *d, void *a, uint32_t desc)
372 {
373 intptr_t oprsz = simd_oprsz(desc);
374 intptr_t i;
375
376 for (i = 0; i < oprsz; i += sizeof(int32_t)) {
377 int32_t aa = *(int32_t *)(a + i);
378 *(int32_t *)(d + i) = aa < 0 ? -aa : aa;
379 }
380 clear_high(d, oprsz, desc);
381 }
382
HELPER(gvec_abs64)383 void HELPER(gvec_abs64)(void *d, void *a, uint32_t desc)
384 {
385 intptr_t oprsz = simd_oprsz(desc);
386 intptr_t i;
387
388 for (i = 0; i < oprsz; i += sizeof(int64_t)) {
389 int64_t aa = *(int64_t *)(a + i);
390 *(int64_t *)(d + i) = aa < 0 ? -aa : aa;
391 }
392 clear_high(d, oprsz, desc);
393 }
394
HELPER(gvec_mov)395 void HELPER(gvec_mov)(void *d, void *a, uint32_t desc)
396 {
397 intptr_t oprsz = simd_oprsz(desc);
398
399 memcpy(d, a, oprsz);
400 clear_high(d, oprsz, desc);
401 }
402
HELPER(gvec_dup64)403 void HELPER(gvec_dup64)(void *d, uint32_t desc, uint64_t c)
404 {
405 intptr_t oprsz = simd_oprsz(desc);
406 intptr_t i;
407
408 if (c == 0) {
409 oprsz = 0;
410 } else {
411 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
412 *(uint64_t *)(d + i) = c;
413 }
414 }
415 clear_high(d, oprsz, desc);
416 }
417
HELPER(gvec_dup32)418 void HELPER(gvec_dup32)(void *d, uint32_t desc, uint32_t c)
419 {
420 intptr_t oprsz = simd_oprsz(desc);
421 intptr_t i;
422
423 if (c == 0) {
424 oprsz = 0;
425 } else {
426 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
427 *(uint32_t *)(d + i) = c;
428 }
429 }
430 clear_high(d, oprsz, desc);
431 }
432
HELPER(gvec_dup16)433 void HELPER(gvec_dup16)(void *d, uint32_t desc, uint32_t c)
434 {
435 HELPER(gvec_dup32)(d, desc, 0x00010001 * (c & 0xffff));
436 }
437
HELPER(gvec_dup8)438 void HELPER(gvec_dup8)(void *d, uint32_t desc, uint32_t c)
439 {
440 HELPER(gvec_dup32)(d, desc, 0x01010101 * (c & 0xff));
441 }
442
HELPER(gvec_not)443 void HELPER(gvec_not)(void *d, void *a, uint32_t desc)
444 {
445 intptr_t oprsz = simd_oprsz(desc);
446 intptr_t i;
447
448 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
449 *(uint64_t *)(d + i) = ~*(uint64_t *)(a + i);
450 }
451 clear_high(d, oprsz, desc);
452 }
453
HELPER(gvec_and)454 void HELPER(gvec_and)(void *d, void *a, void *b, uint32_t desc)
455 {
456 intptr_t oprsz = simd_oprsz(desc);
457 intptr_t i;
458
459 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
460 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & *(uint64_t *)(b + i);
461 }
462 clear_high(d, oprsz, desc);
463 }
464
HELPER(gvec_or)465 void HELPER(gvec_or)(void *d, void *a, void *b, uint32_t desc)
466 {
467 intptr_t oprsz = simd_oprsz(desc);
468 intptr_t i;
469
470 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
471 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | *(uint64_t *)(b + i);
472 }
473 clear_high(d, oprsz, desc);
474 }
475
HELPER(gvec_xor)476 void HELPER(gvec_xor)(void *d, void *a, void *b, uint32_t desc)
477 {
478 intptr_t oprsz = simd_oprsz(desc);
479 intptr_t i;
480
481 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
482 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ *(uint64_t *)(b + i);
483 }
484 clear_high(d, oprsz, desc);
485 }
486
HELPER(gvec_andc)487 void HELPER(gvec_andc)(void *d, void *a, void *b, uint32_t desc)
488 {
489 intptr_t oprsz = simd_oprsz(desc);
490 intptr_t i;
491
492 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
493 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) &~ *(uint64_t *)(b + i);
494 }
495 clear_high(d, oprsz, desc);
496 }
497
HELPER(gvec_orc)498 void HELPER(gvec_orc)(void *d, void *a, void *b, uint32_t desc)
499 {
500 intptr_t oprsz = simd_oprsz(desc);
501 intptr_t i;
502
503 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
504 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) |~ *(uint64_t *)(b + i);
505 }
506 clear_high(d, oprsz, desc);
507 }
508
HELPER(gvec_nand)509 void HELPER(gvec_nand)(void *d, void *a, void *b, uint32_t desc)
510 {
511 intptr_t oprsz = simd_oprsz(desc);
512 intptr_t i;
513
514 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
515 *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) & *(uint64_t *)(b + i));
516 }
517 clear_high(d, oprsz, desc);
518 }
519
HELPER(gvec_nor)520 void HELPER(gvec_nor)(void *d, void *a, void *b, uint32_t desc)
521 {
522 intptr_t oprsz = simd_oprsz(desc);
523 intptr_t i;
524
525 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
526 *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) | *(uint64_t *)(b + i));
527 }
528 clear_high(d, oprsz, desc);
529 }
530
HELPER(gvec_eqv)531 void HELPER(gvec_eqv)(void *d, void *a, void *b, uint32_t desc)
532 {
533 intptr_t oprsz = simd_oprsz(desc);
534 intptr_t i;
535
536 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
537 *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) ^ *(uint64_t *)(b + i));
538 }
539 clear_high(d, oprsz, desc);
540 }
541
HELPER(gvec_ands)542 void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc)
543 {
544 intptr_t oprsz = simd_oprsz(desc);
545 intptr_t i;
546
547 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
548 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & b;
549 }
550 clear_high(d, oprsz, desc);
551 }
552
HELPER(gvec_andcs)553 void HELPER(gvec_andcs)(void *d, void *a, uint64_t b, uint32_t desc)
554 {
555 intptr_t oprsz = simd_oprsz(desc);
556 intptr_t i;
557
558 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
559 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & ~b;
560 }
561 clear_high(d, oprsz, desc);
562 }
563
HELPER(gvec_xors)564 void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc)
565 {
566 intptr_t oprsz = simd_oprsz(desc);
567 intptr_t i;
568
569 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
570 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ b;
571 }
572 clear_high(d, oprsz, desc);
573 }
574
HELPER(gvec_ors)575 void HELPER(gvec_ors)(void *d, void *a, uint64_t b, uint32_t desc)
576 {
577 intptr_t oprsz = simd_oprsz(desc);
578 intptr_t i;
579
580 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
581 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | b;
582 }
583 clear_high(d, oprsz, desc);
584 }
585
HELPER(gvec_shl8i)586 void HELPER(gvec_shl8i)(void *d, void *a, uint32_t desc)
587 {
588 intptr_t oprsz = simd_oprsz(desc);
589 int shift = simd_data(desc);
590 intptr_t i;
591
592 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
593 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << shift;
594 }
595 clear_high(d, oprsz, desc);
596 }
597
HELPER(gvec_shl16i)598 void HELPER(gvec_shl16i)(void *d, void *a, uint32_t desc)
599 {
600 intptr_t oprsz = simd_oprsz(desc);
601 int shift = simd_data(desc);
602 intptr_t i;
603
604 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
605 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << shift;
606 }
607 clear_high(d, oprsz, desc);
608 }
609
HELPER(gvec_shl32i)610 void HELPER(gvec_shl32i)(void *d, void *a, uint32_t desc)
611 {
612 intptr_t oprsz = simd_oprsz(desc);
613 int shift = simd_data(desc);
614 intptr_t i;
615
616 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
617 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << shift;
618 }
619 clear_high(d, oprsz, desc);
620 }
621
HELPER(gvec_shl64i)622 void HELPER(gvec_shl64i)(void *d, void *a, uint32_t desc)
623 {
624 intptr_t oprsz = simd_oprsz(desc);
625 int shift = simd_data(desc);
626 intptr_t i;
627
628 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
629 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << shift;
630 }
631 clear_high(d, oprsz, desc);
632 }
633
HELPER(gvec_shr8i)634 void HELPER(gvec_shr8i)(void *d, void *a, uint32_t desc)
635 {
636 intptr_t oprsz = simd_oprsz(desc);
637 int shift = simd_data(desc);
638 intptr_t i;
639
640 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
641 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> shift;
642 }
643 clear_high(d, oprsz, desc);
644 }
645
HELPER(gvec_shr16i)646 void HELPER(gvec_shr16i)(void *d, void *a, uint32_t desc)
647 {
648 intptr_t oprsz = simd_oprsz(desc);
649 int shift = simd_data(desc);
650 intptr_t i;
651
652 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
653 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> shift;
654 }
655 clear_high(d, oprsz, desc);
656 }
657
HELPER(gvec_shr32i)658 void HELPER(gvec_shr32i)(void *d, void *a, uint32_t desc)
659 {
660 intptr_t oprsz = simd_oprsz(desc);
661 int shift = simd_data(desc);
662 intptr_t i;
663
664 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
665 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> shift;
666 }
667 clear_high(d, oprsz, desc);
668 }
669
HELPER(gvec_shr64i)670 void HELPER(gvec_shr64i)(void *d, void *a, uint32_t desc)
671 {
672 intptr_t oprsz = simd_oprsz(desc);
673 int shift = simd_data(desc);
674 intptr_t i;
675
676 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
677 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> shift;
678 }
679 clear_high(d, oprsz, desc);
680 }
681
HELPER(gvec_sar8i)682 void HELPER(gvec_sar8i)(void *d, void *a, uint32_t desc)
683 {
684 intptr_t oprsz = simd_oprsz(desc);
685 int shift = simd_data(desc);
686 intptr_t i;
687
688 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
689 *(int8_t *)(d + i) = *(int8_t *)(a + i) >> shift;
690 }
691 clear_high(d, oprsz, desc);
692 }
693
HELPER(gvec_sar16i)694 void HELPER(gvec_sar16i)(void *d, void *a, uint32_t desc)
695 {
696 intptr_t oprsz = simd_oprsz(desc);
697 int shift = simd_data(desc);
698 intptr_t i;
699
700 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
701 *(int16_t *)(d + i) = *(int16_t *)(a + i) >> shift;
702 }
703 clear_high(d, oprsz, desc);
704 }
705
HELPER(gvec_sar32i)706 void HELPER(gvec_sar32i)(void *d, void *a, uint32_t desc)
707 {
708 intptr_t oprsz = simd_oprsz(desc);
709 int shift = simd_data(desc);
710 intptr_t i;
711
712 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
713 *(int32_t *)(d + i) = *(int32_t *)(a + i) >> shift;
714 }
715 clear_high(d, oprsz, desc);
716 }
717
HELPER(gvec_sar64i)718 void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc)
719 {
720 intptr_t oprsz = simd_oprsz(desc);
721 int shift = simd_data(desc);
722 intptr_t i;
723
724 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
725 *(int64_t *)(d + i) = *(int64_t *)(a + i) >> shift;
726 }
727 clear_high(d, oprsz, desc);
728 }
729
HELPER(gvec_rotl8i)730 void HELPER(gvec_rotl8i)(void *d, void *a, uint32_t desc)
731 {
732 intptr_t oprsz = simd_oprsz(desc);
733 int shift = simd_data(desc);
734 intptr_t i;
735
736 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
737 *(uint8_t *)(d + i) = rol8(*(uint8_t *)(a + i), shift);
738 }
739 clear_high(d, oprsz, desc);
740 }
741
HELPER(gvec_rotl16i)742 void HELPER(gvec_rotl16i)(void *d, void *a, uint32_t desc)
743 {
744 intptr_t oprsz = simd_oprsz(desc);
745 int shift = simd_data(desc);
746 intptr_t i;
747
748 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
749 *(uint16_t *)(d + i) = rol16(*(uint16_t *)(a + i), shift);
750 }
751 clear_high(d, oprsz, desc);
752 }
753
HELPER(gvec_rotl32i)754 void HELPER(gvec_rotl32i)(void *d, void *a, uint32_t desc)
755 {
756 intptr_t oprsz = simd_oprsz(desc);
757 int shift = simd_data(desc);
758 intptr_t i;
759
760 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
761 *(uint32_t *)(d + i) = rol32(*(uint32_t *)(a + i), shift);
762 }
763 clear_high(d, oprsz, desc);
764 }
765
HELPER(gvec_rotl64i)766 void HELPER(gvec_rotl64i)(void *d, void *a, uint32_t desc)
767 {
768 intptr_t oprsz = simd_oprsz(desc);
769 int shift = simd_data(desc);
770 intptr_t i;
771
772 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
773 *(uint64_t *)(d + i) = rol64(*(uint64_t *)(a + i), shift);
774 }
775 clear_high(d, oprsz, desc);
776 }
777
HELPER(gvec_shl8v)778 void HELPER(gvec_shl8v)(void *d, void *a, void *b, uint32_t desc)
779 {
780 intptr_t oprsz = simd_oprsz(desc);
781 intptr_t i;
782
783 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
784 uint8_t sh = *(uint8_t *)(b + i) & 7;
785 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << sh;
786 }
787 clear_high(d, oprsz, desc);
788 }
789
HELPER(gvec_shl16v)790 void HELPER(gvec_shl16v)(void *d, void *a, void *b, uint32_t desc)
791 {
792 intptr_t oprsz = simd_oprsz(desc);
793 intptr_t i;
794
795 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
796 uint8_t sh = *(uint16_t *)(b + i) & 15;
797 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << sh;
798 }
799 clear_high(d, oprsz, desc);
800 }
801
HELPER(gvec_shl32v)802 void HELPER(gvec_shl32v)(void *d, void *a, void *b, uint32_t desc)
803 {
804 intptr_t oprsz = simd_oprsz(desc);
805 intptr_t i;
806
807 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
808 uint8_t sh = *(uint32_t *)(b + i) & 31;
809 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << sh;
810 }
811 clear_high(d, oprsz, desc);
812 }
813
HELPER(gvec_shl64v)814 void HELPER(gvec_shl64v)(void *d, void *a, void *b, uint32_t desc)
815 {
816 intptr_t oprsz = simd_oprsz(desc);
817 intptr_t i;
818
819 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
820 uint8_t sh = *(uint64_t *)(b + i) & 63;
821 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << sh;
822 }
823 clear_high(d, oprsz, desc);
824 }
825
HELPER(gvec_shr8v)826 void HELPER(gvec_shr8v)(void *d, void *a, void *b, uint32_t desc)
827 {
828 intptr_t oprsz = simd_oprsz(desc);
829 intptr_t i;
830
831 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
832 uint8_t sh = *(uint8_t *)(b + i) & 7;
833 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> sh;
834 }
835 clear_high(d, oprsz, desc);
836 }
837
HELPER(gvec_shr16v)838 void HELPER(gvec_shr16v)(void *d, void *a, void *b, uint32_t desc)
839 {
840 intptr_t oprsz = simd_oprsz(desc);
841 intptr_t i;
842
843 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
844 uint8_t sh = *(uint16_t *)(b + i) & 15;
845 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> sh;
846 }
847 clear_high(d, oprsz, desc);
848 }
849
HELPER(gvec_shr32v)850 void HELPER(gvec_shr32v)(void *d, void *a, void *b, uint32_t desc)
851 {
852 intptr_t oprsz = simd_oprsz(desc);
853 intptr_t i;
854
855 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
856 uint8_t sh = *(uint32_t *)(b + i) & 31;
857 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> sh;
858 }
859 clear_high(d, oprsz, desc);
860 }
861
HELPER(gvec_shr64v)862 void HELPER(gvec_shr64v)(void *d, void *a, void *b, uint32_t desc)
863 {
864 intptr_t oprsz = simd_oprsz(desc);
865 intptr_t i;
866
867 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
868 uint8_t sh = *(uint64_t *)(b + i) & 63;
869 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> sh;
870 }
871 clear_high(d, oprsz, desc);
872 }
873
HELPER(gvec_sar8v)874 void HELPER(gvec_sar8v)(void *d, void *a, void *b, uint32_t desc)
875 {
876 intptr_t oprsz = simd_oprsz(desc);
877 intptr_t i;
878
879 for (i = 0; i < oprsz; i += sizeof(int8_t)) {
880 uint8_t sh = *(uint8_t *)(b + i) & 7;
881 *(int8_t *)(d + i) = *(int8_t *)(a + i) >> sh;
882 }
883 clear_high(d, oprsz, desc);
884 }
885
HELPER(gvec_sar16v)886 void HELPER(gvec_sar16v)(void *d, void *a, void *b, uint32_t desc)
887 {
888 intptr_t oprsz = simd_oprsz(desc);
889 intptr_t i;
890
891 for (i = 0; i < oprsz; i += sizeof(int16_t)) {
892 uint8_t sh = *(uint16_t *)(b + i) & 15;
893 *(int16_t *)(d + i) = *(int16_t *)(a + i) >> sh;
894 }
895 clear_high(d, oprsz, desc);
896 }
897
HELPER(gvec_sar32v)898 void HELPER(gvec_sar32v)(void *d, void *a, void *b, uint32_t desc)
899 {
900 intptr_t oprsz = simd_oprsz(desc);
901 intptr_t i;
902
903 for (i = 0; i < oprsz; i += sizeof(int32_t)) {
904 uint8_t sh = *(uint32_t *)(b + i) & 31;
905 *(int32_t *)(d + i) = *(int32_t *)(a + i) >> sh;
906 }
907 clear_high(d, oprsz, desc);
908 }
909
HELPER(gvec_sar64v)910 void HELPER(gvec_sar64v)(void *d, void *a, void *b, uint32_t desc)
911 {
912 intptr_t oprsz = simd_oprsz(desc);
913 intptr_t i;
914
915 for (i = 0; i < oprsz; i += sizeof(int64_t)) {
916 uint8_t sh = *(uint64_t *)(b + i) & 63;
917 *(int64_t *)(d + i) = *(int64_t *)(a + i) >> sh;
918 }
919 clear_high(d, oprsz, desc);
920 }
921
HELPER(gvec_rotl8v)922 void HELPER(gvec_rotl8v)(void *d, void *a, void *b, uint32_t desc)
923 {
924 intptr_t oprsz = simd_oprsz(desc);
925 intptr_t i;
926
927 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
928 uint8_t sh = *(uint8_t *)(b + i) & 7;
929 *(uint8_t *)(d + i) = rol8(*(uint8_t *)(a + i), sh);
930 }
931 clear_high(d, oprsz, desc);
932 }
933
HELPER(gvec_rotl16v)934 void HELPER(gvec_rotl16v)(void *d, void *a, void *b, uint32_t desc)
935 {
936 intptr_t oprsz = simd_oprsz(desc);
937 intptr_t i;
938
939 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
940 uint8_t sh = *(uint16_t *)(b + i) & 15;
941 *(uint16_t *)(d + i) = rol16(*(uint16_t *)(a + i), sh);
942 }
943 clear_high(d, oprsz, desc);
944 }
945
HELPER(gvec_rotl32v)946 void HELPER(gvec_rotl32v)(void *d, void *a, void *b, uint32_t desc)
947 {
948 intptr_t oprsz = simd_oprsz(desc);
949 intptr_t i;
950
951 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
952 uint8_t sh = *(uint32_t *)(b + i) & 31;
953 *(uint32_t *)(d + i) = rol32(*(uint32_t *)(a + i), sh);
954 }
955 clear_high(d, oprsz, desc);
956 }
957
HELPER(gvec_rotl64v)958 void HELPER(gvec_rotl64v)(void *d, void *a, void *b, uint32_t desc)
959 {
960 intptr_t oprsz = simd_oprsz(desc);
961 intptr_t i;
962
963 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
964 uint8_t sh = *(uint64_t *)(b + i) & 63;
965 *(uint64_t *)(d + i) = rol64(*(uint64_t *)(a + i), sh);
966 }
967 clear_high(d, oprsz, desc);
968 }
969
HELPER(gvec_rotr8v)970 void HELPER(gvec_rotr8v)(void *d, void *a, void *b, uint32_t desc)
971 {
972 intptr_t oprsz = simd_oprsz(desc);
973 intptr_t i;
974
975 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
976 uint8_t sh = *(uint8_t *)(b + i) & 7;
977 *(uint8_t *)(d + i) = ror8(*(uint8_t *)(a + i), sh);
978 }
979 clear_high(d, oprsz, desc);
980 }
981
HELPER(gvec_rotr16v)982 void HELPER(gvec_rotr16v)(void *d, void *a, void *b, uint32_t desc)
983 {
984 intptr_t oprsz = simd_oprsz(desc);
985 intptr_t i;
986
987 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
988 uint8_t sh = *(uint16_t *)(b + i) & 15;
989 *(uint16_t *)(d + i) = ror16(*(uint16_t *)(a + i), sh);
990 }
991 clear_high(d, oprsz, desc);
992 }
993
HELPER(gvec_rotr32v)994 void HELPER(gvec_rotr32v)(void *d, void *a, void *b, uint32_t desc)
995 {
996 intptr_t oprsz = simd_oprsz(desc);
997 intptr_t i;
998
999 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1000 uint8_t sh = *(uint32_t *)(b + i) & 31;
1001 *(uint32_t *)(d + i) = ror32(*(uint32_t *)(a + i), sh);
1002 }
1003 clear_high(d, oprsz, desc);
1004 }
1005
HELPER(gvec_rotr64v)1006 void HELPER(gvec_rotr64v)(void *d, void *a, void *b, uint32_t desc)
1007 {
1008 intptr_t oprsz = simd_oprsz(desc);
1009 intptr_t i;
1010
1011 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1012 uint8_t sh = *(uint64_t *)(b + i) & 63;
1013 *(uint64_t *)(d + i) = ror64(*(uint64_t *)(a + i), sh);
1014 }
1015 clear_high(d, oprsz, desc);
1016 }
1017
1018 #define DO_CMP1(NAME, TYPE, OP) \
1019 void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc) \
1020 { \
1021 intptr_t oprsz = simd_oprsz(desc); \
1022 intptr_t i; \
1023 for (i = 0; i < oprsz; i += sizeof(TYPE)) { \
1024 *(TYPE *)(d + i) = -(*(TYPE *)(a + i) OP *(TYPE *)(b + i)); \
1025 } \
1026 clear_high(d, oprsz, desc); \
1027 }
1028
1029 #define DO_CMP2(SZ) \
1030 DO_CMP1(gvec_eq##SZ, uint##SZ##_t, ==) \
1031 DO_CMP1(gvec_ne##SZ, uint##SZ##_t, !=) \
1032 DO_CMP1(gvec_lt##SZ, int##SZ##_t, <) \
1033 DO_CMP1(gvec_le##SZ, int##SZ##_t, <=) \
1034 DO_CMP1(gvec_ltu##SZ, uint##SZ##_t, <) \
1035 DO_CMP1(gvec_leu##SZ, uint##SZ##_t, <=)
1036
1037 DO_CMP2(8)
1038 DO_CMP2(16)
1039 DO_CMP2(32)
1040 DO_CMP2(64)
1041
1042 #undef DO_CMP1
1043 #undef DO_CMP2
1044
1045 #define DO_CMP1(NAME, TYPE, OP) \
1046 void HELPER(NAME)(void *d, void *a, uint64_t b64, uint32_t desc) \
1047 { \
1048 intptr_t oprsz = simd_oprsz(desc); \
1049 TYPE inv = simd_data(desc), b = b64; \
1050 for (intptr_t i = 0; i < oprsz; i += sizeof(TYPE)) { \
1051 *(TYPE *)(d + i) = -((*(TYPE *)(a + i) OP b) ^ inv); \
1052 } \
1053 clear_high(d, oprsz, desc); \
1054 }
1055
1056 #define DO_CMP2(SZ) \
1057 DO_CMP1(gvec_eqs##SZ, uint##SZ##_t, ==) \
1058 DO_CMP1(gvec_lts##SZ, int##SZ##_t, <) \
1059 DO_CMP1(gvec_les##SZ, int##SZ##_t, <=) \
1060 DO_CMP1(gvec_ltus##SZ, uint##SZ##_t, <) \
1061 DO_CMP1(gvec_leus##SZ, uint##SZ##_t, <=)
1062
1063 DO_CMP2(8)
1064 DO_CMP2(16)
1065 DO_CMP2(32)
1066 DO_CMP2(64)
1067
1068 #undef DO_CMP1
1069 #undef DO_CMP2
1070
HELPER(gvec_ssadd8)1071 void HELPER(gvec_ssadd8)(void *d, void *a, void *b, uint32_t desc)
1072 {
1073 intptr_t oprsz = simd_oprsz(desc);
1074 intptr_t i;
1075
1076 for (i = 0; i < oprsz; i += sizeof(int8_t)) {
1077 int r = *(int8_t *)(a + i) + *(int8_t *)(b + i);
1078 if (r > INT8_MAX) {
1079 r = INT8_MAX;
1080 } else if (r < INT8_MIN) {
1081 r = INT8_MIN;
1082 }
1083 *(int8_t *)(d + i) = r;
1084 }
1085 clear_high(d, oprsz, desc);
1086 }
1087
HELPER(gvec_ssadd16)1088 void HELPER(gvec_ssadd16)(void *d, void *a, void *b, uint32_t desc)
1089 {
1090 intptr_t oprsz = simd_oprsz(desc);
1091 intptr_t i;
1092
1093 for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1094 int r = *(int16_t *)(a + i) + *(int16_t *)(b + i);
1095 if (r > INT16_MAX) {
1096 r = INT16_MAX;
1097 } else if (r < INT16_MIN) {
1098 r = INT16_MIN;
1099 }
1100 *(int16_t *)(d + i) = r;
1101 }
1102 clear_high(d, oprsz, desc);
1103 }
1104
HELPER(gvec_ssadd32)1105 void HELPER(gvec_ssadd32)(void *d, void *a, void *b, uint32_t desc)
1106 {
1107 intptr_t oprsz = simd_oprsz(desc);
1108 intptr_t i;
1109
1110 for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1111 int32_t ai = *(int32_t *)(a + i);
1112 int32_t bi = *(int32_t *)(b + i);
1113 int32_t di;
1114 if (sadd32_overflow(ai, bi, &di)) {
1115 di = (di < 0 ? INT32_MAX : INT32_MIN);
1116 }
1117 *(int32_t *)(d + i) = di;
1118 }
1119 clear_high(d, oprsz, desc);
1120 }
1121
HELPER(gvec_ssadd64)1122 void HELPER(gvec_ssadd64)(void *d, void *a, void *b, uint32_t desc)
1123 {
1124 intptr_t oprsz = simd_oprsz(desc);
1125 intptr_t i;
1126
1127 for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1128 int64_t ai = *(int64_t *)(a + i);
1129 int64_t bi = *(int64_t *)(b + i);
1130 int64_t di;
1131 if (sadd64_overflow(ai, bi, &di)) {
1132 di = (di < 0 ? INT64_MAX : INT64_MIN);
1133 }
1134 *(int64_t *)(d + i) = di;
1135 }
1136 clear_high(d, oprsz, desc);
1137 }
1138
HELPER(gvec_sssub8)1139 void HELPER(gvec_sssub8)(void *d, void *a, void *b, uint32_t desc)
1140 {
1141 intptr_t oprsz = simd_oprsz(desc);
1142 intptr_t i;
1143
1144 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1145 int r = *(int8_t *)(a + i) - *(int8_t *)(b + i);
1146 if (r > INT8_MAX) {
1147 r = INT8_MAX;
1148 } else if (r < INT8_MIN) {
1149 r = INT8_MIN;
1150 }
1151 *(uint8_t *)(d + i) = r;
1152 }
1153 clear_high(d, oprsz, desc);
1154 }
1155
HELPER(gvec_sssub16)1156 void HELPER(gvec_sssub16)(void *d, void *a, void *b, uint32_t desc)
1157 {
1158 intptr_t oprsz = simd_oprsz(desc);
1159 intptr_t i;
1160
1161 for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1162 int r = *(int16_t *)(a + i) - *(int16_t *)(b + i);
1163 if (r > INT16_MAX) {
1164 r = INT16_MAX;
1165 } else if (r < INT16_MIN) {
1166 r = INT16_MIN;
1167 }
1168 *(int16_t *)(d + i) = r;
1169 }
1170 clear_high(d, oprsz, desc);
1171 }
1172
HELPER(gvec_sssub32)1173 void HELPER(gvec_sssub32)(void *d, void *a, void *b, uint32_t desc)
1174 {
1175 intptr_t oprsz = simd_oprsz(desc);
1176 intptr_t i;
1177
1178 for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1179 int32_t ai = *(int32_t *)(a + i);
1180 int32_t bi = *(int32_t *)(b + i);
1181 int32_t di;
1182 if (ssub32_overflow(ai, bi, &di)) {
1183 di = (di < 0 ? INT32_MAX : INT32_MIN);
1184 }
1185 *(int32_t *)(d + i) = di;
1186 }
1187 clear_high(d, oprsz, desc);
1188 }
1189
HELPER(gvec_sssub64)1190 void HELPER(gvec_sssub64)(void *d, void *a, void *b, uint32_t desc)
1191 {
1192 intptr_t oprsz = simd_oprsz(desc);
1193 intptr_t i;
1194
1195 for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1196 int64_t ai = *(int64_t *)(a + i);
1197 int64_t bi = *(int64_t *)(b + i);
1198 int64_t di;
1199 if (ssub64_overflow(ai, bi, &di)) {
1200 di = (di < 0 ? INT64_MAX : INT64_MIN);
1201 }
1202 *(int64_t *)(d + i) = di;
1203 }
1204 clear_high(d, oprsz, desc);
1205 }
1206
HELPER(gvec_usadd8)1207 void HELPER(gvec_usadd8)(void *d, void *a, void *b, uint32_t desc)
1208 {
1209 intptr_t oprsz = simd_oprsz(desc);
1210 intptr_t i;
1211
1212 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1213 unsigned r = *(uint8_t *)(a + i) + *(uint8_t *)(b + i);
1214 if (r > UINT8_MAX) {
1215 r = UINT8_MAX;
1216 }
1217 *(uint8_t *)(d + i) = r;
1218 }
1219 clear_high(d, oprsz, desc);
1220 }
1221
HELPER(gvec_usadd16)1222 void HELPER(gvec_usadd16)(void *d, void *a, void *b, uint32_t desc)
1223 {
1224 intptr_t oprsz = simd_oprsz(desc);
1225 intptr_t i;
1226
1227 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1228 unsigned r = *(uint16_t *)(a + i) + *(uint16_t *)(b + i);
1229 if (r > UINT16_MAX) {
1230 r = UINT16_MAX;
1231 }
1232 *(uint16_t *)(d + i) = r;
1233 }
1234 clear_high(d, oprsz, desc);
1235 }
1236
HELPER(gvec_usadd32)1237 void HELPER(gvec_usadd32)(void *d, void *a, void *b, uint32_t desc)
1238 {
1239 intptr_t oprsz = simd_oprsz(desc);
1240 intptr_t i;
1241
1242 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1243 uint32_t ai = *(uint32_t *)(a + i);
1244 uint32_t bi = *(uint32_t *)(b + i);
1245 uint32_t di;
1246 if (uadd32_overflow(ai, bi, &di)) {
1247 di = UINT32_MAX;
1248 }
1249 *(uint32_t *)(d + i) = di;
1250 }
1251 clear_high(d, oprsz, desc);
1252 }
1253
HELPER(gvec_usadd64)1254 void HELPER(gvec_usadd64)(void *d, void *a, void *b, uint32_t desc)
1255 {
1256 intptr_t oprsz = simd_oprsz(desc);
1257 intptr_t i;
1258
1259 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1260 uint64_t ai = *(uint64_t *)(a + i);
1261 uint64_t bi = *(uint64_t *)(b + i);
1262 uint64_t di;
1263 if (uadd64_overflow(ai, bi, &di)) {
1264 di = UINT64_MAX;
1265 }
1266 *(uint64_t *)(d + i) = di;
1267 }
1268 clear_high(d, oprsz, desc);
1269 }
1270
HELPER(gvec_ussub8)1271 void HELPER(gvec_ussub8)(void *d, void *a, void *b, uint32_t desc)
1272 {
1273 intptr_t oprsz = simd_oprsz(desc);
1274 intptr_t i;
1275
1276 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1277 int r = *(uint8_t *)(a + i) - *(uint8_t *)(b + i);
1278 if (r < 0) {
1279 r = 0;
1280 }
1281 *(uint8_t *)(d + i) = r;
1282 }
1283 clear_high(d, oprsz, desc);
1284 }
1285
HELPER(gvec_ussub16)1286 void HELPER(gvec_ussub16)(void *d, void *a, void *b, uint32_t desc)
1287 {
1288 intptr_t oprsz = simd_oprsz(desc);
1289 intptr_t i;
1290
1291 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1292 int r = *(uint16_t *)(a + i) - *(uint16_t *)(b + i);
1293 if (r < 0) {
1294 r = 0;
1295 }
1296 *(uint16_t *)(d + i) = r;
1297 }
1298 clear_high(d, oprsz, desc);
1299 }
1300
HELPER(gvec_ussub32)1301 void HELPER(gvec_ussub32)(void *d, void *a, void *b, uint32_t desc)
1302 {
1303 intptr_t oprsz = simd_oprsz(desc);
1304 intptr_t i;
1305
1306 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1307 uint32_t ai = *(uint32_t *)(a + i);
1308 uint32_t bi = *(uint32_t *)(b + i);
1309 uint32_t di;
1310 if (usub32_overflow(ai, bi, &di)) {
1311 di = 0;
1312 }
1313 *(uint32_t *)(d + i) = di;
1314 }
1315 clear_high(d, oprsz, desc);
1316 }
1317
HELPER(gvec_ussub64)1318 void HELPER(gvec_ussub64)(void *d, void *a, void *b, uint32_t desc)
1319 {
1320 intptr_t oprsz = simd_oprsz(desc);
1321 intptr_t i;
1322
1323 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1324 uint64_t ai = *(uint64_t *)(a + i);
1325 uint64_t bi = *(uint64_t *)(b + i);
1326 uint64_t di;
1327 if (usub64_overflow(ai, bi, &di)) {
1328 di = 0;
1329 }
1330 *(uint64_t *)(d + i) = di;
1331 }
1332 clear_high(d, oprsz, desc);
1333 }
1334
HELPER(gvec_smin8)1335 void HELPER(gvec_smin8)(void *d, void *a, void *b, uint32_t desc)
1336 {
1337 intptr_t oprsz = simd_oprsz(desc);
1338 intptr_t i;
1339
1340 for (i = 0; i < oprsz; i += sizeof(int8_t)) {
1341 int8_t aa = *(int8_t *)(a + i);
1342 int8_t bb = *(int8_t *)(b + i);
1343 int8_t dd = aa < bb ? aa : bb;
1344 *(int8_t *)(d + i) = dd;
1345 }
1346 clear_high(d, oprsz, desc);
1347 }
1348
HELPER(gvec_smin16)1349 void HELPER(gvec_smin16)(void *d, void *a, void *b, uint32_t desc)
1350 {
1351 intptr_t oprsz = simd_oprsz(desc);
1352 intptr_t i;
1353
1354 for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1355 int16_t aa = *(int16_t *)(a + i);
1356 int16_t bb = *(int16_t *)(b + i);
1357 int16_t dd = aa < bb ? aa : bb;
1358 *(int16_t *)(d + i) = dd;
1359 }
1360 clear_high(d, oprsz, desc);
1361 }
1362
HELPER(gvec_smin32)1363 void HELPER(gvec_smin32)(void *d, void *a, void *b, uint32_t desc)
1364 {
1365 intptr_t oprsz = simd_oprsz(desc);
1366 intptr_t i;
1367
1368 for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1369 int32_t aa = *(int32_t *)(a + i);
1370 int32_t bb = *(int32_t *)(b + i);
1371 int32_t dd = aa < bb ? aa : bb;
1372 *(int32_t *)(d + i) = dd;
1373 }
1374 clear_high(d, oprsz, desc);
1375 }
1376
HELPER(gvec_smin64)1377 void HELPER(gvec_smin64)(void *d, void *a, void *b, uint32_t desc)
1378 {
1379 intptr_t oprsz = simd_oprsz(desc);
1380 intptr_t i;
1381
1382 for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1383 int64_t aa = *(int64_t *)(a + i);
1384 int64_t bb = *(int64_t *)(b + i);
1385 int64_t dd = aa < bb ? aa : bb;
1386 *(int64_t *)(d + i) = dd;
1387 }
1388 clear_high(d, oprsz, desc);
1389 }
1390
HELPER(gvec_smax8)1391 void HELPER(gvec_smax8)(void *d, void *a, void *b, uint32_t desc)
1392 {
1393 intptr_t oprsz = simd_oprsz(desc);
1394 intptr_t i;
1395
1396 for (i = 0; i < oprsz; i += sizeof(int8_t)) {
1397 int8_t aa = *(int8_t *)(a + i);
1398 int8_t bb = *(int8_t *)(b + i);
1399 int8_t dd = aa > bb ? aa : bb;
1400 *(int8_t *)(d + i) = dd;
1401 }
1402 clear_high(d, oprsz, desc);
1403 }
1404
HELPER(gvec_smax16)1405 void HELPER(gvec_smax16)(void *d, void *a, void *b, uint32_t desc)
1406 {
1407 intptr_t oprsz = simd_oprsz(desc);
1408 intptr_t i;
1409
1410 for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1411 int16_t aa = *(int16_t *)(a + i);
1412 int16_t bb = *(int16_t *)(b + i);
1413 int16_t dd = aa > bb ? aa : bb;
1414 *(int16_t *)(d + i) = dd;
1415 }
1416 clear_high(d, oprsz, desc);
1417 }
1418
HELPER(gvec_smax32)1419 void HELPER(gvec_smax32)(void *d, void *a, void *b, uint32_t desc)
1420 {
1421 intptr_t oprsz = simd_oprsz(desc);
1422 intptr_t i;
1423
1424 for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1425 int32_t aa = *(int32_t *)(a + i);
1426 int32_t bb = *(int32_t *)(b + i);
1427 int32_t dd = aa > bb ? aa : bb;
1428 *(int32_t *)(d + i) = dd;
1429 }
1430 clear_high(d, oprsz, desc);
1431 }
1432
HELPER(gvec_smax64)1433 void HELPER(gvec_smax64)(void *d, void *a, void *b, uint32_t desc)
1434 {
1435 intptr_t oprsz = simd_oprsz(desc);
1436 intptr_t i;
1437
1438 for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1439 int64_t aa = *(int64_t *)(a + i);
1440 int64_t bb = *(int64_t *)(b + i);
1441 int64_t dd = aa > bb ? aa : bb;
1442 *(int64_t *)(d + i) = dd;
1443 }
1444 clear_high(d, oprsz, desc);
1445 }
1446
HELPER(gvec_umin8)1447 void HELPER(gvec_umin8)(void *d, void *a, void *b, uint32_t desc)
1448 {
1449 intptr_t oprsz = simd_oprsz(desc);
1450 intptr_t i;
1451
1452 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1453 uint8_t aa = *(uint8_t *)(a + i);
1454 uint8_t bb = *(uint8_t *)(b + i);
1455 uint8_t dd = aa < bb ? aa : bb;
1456 *(uint8_t *)(d + i) = dd;
1457 }
1458 clear_high(d, oprsz, desc);
1459 }
1460
HELPER(gvec_umin16)1461 void HELPER(gvec_umin16)(void *d, void *a, void *b, uint32_t desc)
1462 {
1463 intptr_t oprsz = simd_oprsz(desc);
1464 intptr_t i;
1465
1466 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1467 uint16_t aa = *(uint16_t *)(a + i);
1468 uint16_t bb = *(uint16_t *)(b + i);
1469 uint16_t dd = aa < bb ? aa : bb;
1470 *(uint16_t *)(d + i) = dd;
1471 }
1472 clear_high(d, oprsz, desc);
1473 }
1474
HELPER(gvec_umin32)1475 void HELPER(gvec_umin32)(void *d, void *a, void *b, uint32_t desc)
1476 {
1477 intptr_t oprsz = simd_oprsz(desc);
1478 intptr_t i;
1479
1480 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1481 uint32_t aa = *(uint32_t *)(a + i);
1482 uint32_t bb = *(uint32_t *)(b + i);
1483 uint32_t dd = aa < bb ? aa : bb;
1484 *(uint32_t *)(d + i) = dd;
1485 }
1486 clear_high(d, oprsz, desc);
1487 }
1488
HELPER(gvec_umin64)1489 void HELPER(gvec_umin64)(void *d, void *a, void *b, uint32_t desc)
1490 {
1491 intptr_t oprsz = simd_oprsz(desc);
1492 intptr_t i;
1493
1494 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1495 uint64_t aa = *(uint64_t *)(a + i);
1496 uint64_t bb = *(uint64_t *)(b + i);
1497 uint64_t dd = aa < bb ? aa : bb;
1498 *(uint64_t *)(d + i) = dd;
1499 }
1500 clear_high(d, oprsz, desc);
1501 }
1502
HELPER(gvec_umax8)1503 void HELPER(gvec_umax8)(void *d, void *a, void *b, uint32_t desc)
1504 {
1505 intptr_t oprsz = simd_oprsz(desc);
1506 intptr_t i;
1507
1508 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1509 uint8_t aa = *(uint8_t *)(a + i);
1510 uint8_t bb = *(uint8_t *)(b + i);
1511 uint8_t dd = aa > bb ? aa : bb;
1512 *(uint8_t *)(d + i) = dd;
1513 }
1514 clear_high(d, oprsz, desc);
1515 }
1516
HELPER(gvec_umax16)1517 void HELPER(gvec_umax16)(void *d, void *a, void *b, uint32_t desc)
1518 {
1519 intptr_t oprsz = simd_oprsz(desc);
1520 intptr_t i;
1521
1522 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1523 uint16_t aa = *(uint16_t *)(a + i);
1524 uint16_t bb = *(uint16_t *)(b + i);
1525 uint16_t dd = aa > bb ? aa : bb;
1526 *(uint16_t *)(d + i) = dd;
1527 }
1528 clear_high(d, oprsz, desc);
1529 }
1530
HELPER(gvec_umax32)1531 void HELPER(gvec_umax32)(void *d, void *a, void *b, uint32_t desc)
1532 {
1533 intptr_t oprsz = simd_oprsz(desc);
1534 intptr_t i;
1535
1536 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1537 uint32_t aa = *(uint32_t *)(a + i);
1538 uint32_t bb = *(uint32_t *)(b + i);
1539 uint32_t dd = aa > bb ? aa : bb;
1540 *(uint32_t *)(d + i) = dd;
1541 }
1542 clear_high(d, oprsz, desc);
1543 }
1544
HELPER(gvec_umax64)1545 void HELPER(gvec_umax64)(void *d, void *a, void *b, uint32_t desc)
1546 {
1547 intptr_t oprsz = simd_oprsz(desc);
1548 intptr_t i;
1549
1550 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1551 uint64_t aa = *(uint64_t *)(a + i);
1552 uint64_t bb = *(uint64_t *)(b + i);
1553 uint64_t dd = aa > bb ? aa : bb;
1554 *(uint64_t *)(d + i) = dd;
1555 }
1556 clear_high(d, oprsz, desc);
1557 }
1558
HELPER(gvec_bitsel)1559 void HELPER(gvec_bitsel)(void *d, void *a, void *b, void *c, uint32_t desc)
1560 {
1561 intptr_t oprsz = simd_oprsz(desc);
1562 intptr_t i;
1563
1564 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1565 uint64_t aa = *(uint64_t *)(a + i);
1566 uint64_t bb = *(uint64_t *)(b + i);
1567 uint64_t cc = *(uint64_t *)(c + i);
1568 *(uint64_t *)(d + i) = (bb & aa) | (cc & ~aa);
1569 }
1570 clear_high(d, oprsz, desc);
1571 }
1572