1 // Copyright 2015, ARM Limited
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 // * Redistributions of source code must retain the above copyright notice,
8 // this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above copyright notice,
10 // this list of conditions and the following disclaimer in the documentation
11 // and/or other materials provided with the distribution.
12 // * Neither the name of ARM Limited nor the names of its contributors may be
13 // used to endorse or promote products derived from this software without
14 // specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27 #ifdef JS_SIMULATOR_ARM64
28
29 #include <cmath>
30
31 #include "jit/arm64/vixl/Simulator-vixl.h"
32
33 namespace vixl {
34
FPDefaultNaN()35 template<> double Simulator::FPDefaultNaN<double>() {
36 return kFP64DefaultNaN;
37 }
38
39
FPDefaultNaN()40 template<> float Simulator::FPDefaultNaN<float>() {
41 return kFP32DefaultNaN;
42 }
43
44 // See FPRound for a description of this function.
FPRoundToDouble(int64_t sign,int64_t exponent,uint64_t mantissa,FPRounding round_mode)45 static inline double FPRoundToDouble(int64_t sign, int64_t exponent,
46 uint64_t mantissa, FPRounding round_mode) {
47 int64_t bits =
48 FPRound<int64_t, kDoubleExponentBits, kDoubleMantissaBits>(sign,
49 exponent,
50 mantissa,
51 round_mode);
52 return rawbits_to_double(bits);
53 }
54
55
56 // See FPRound for a description of this function.
FPRoundToFloat(int64_t sign,int64_t exponent,uint64_t mantissa,FPRounding round_mode)57 static inline float FPRoundToFloat(int64_t sign, int64_t exponent,
58 uint64_t mantissa, FPRounding round_mode) {
59 int32_t bits =
60 FPRound<int32_t, kFloatExponentBits, kFloatMantissaBits>(sign,
61 exponent,
62 mantissa,
63 round_mode);
64 return rawbits_to_float(bits);
65 }
66
67
68 // See FPRound for a description of this function.
FPRoundToFloat16(int64_t sign,int64_t exponent,uint64_t mantissa,FPRounding round_mode)69 static inline float16 FPRoundToFloat16(int64_t sign,
70 int64_t exponent,
71 uint64_t mantissa,
72 FPRounding round_mode) {
73 return FPRound<float16, kFloat16ExponentBits, kFloat16MantissaBits>(
74 sign, exponent, mantissa, round_mode);
75 }
76
77
FixedToDouble(int64_t src,int fbits,FPRounding round)78 double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) {
79 if (src >= 0) {
80 return UFixedToDouble(src, fbits, round);
81 } else {
82 // This works for all negative values, including INT64_MIN.
83 return -UFixedToDouble(-src, fbits, round);
84 }
85 }
86
87
UFixedToDouble(uint64_t src,int fbits,FPRounding round)88 double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) {
89 // An input of 0 is a special case because the result is effectively
90 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
91 if (src == 0) {
92 return 0.0;
93 }
94
95 // Calculate the exponent. The highest significant bit will have the value
96 // 2^exponent.
97 const int highest_significant_bit = 63 - CountLeadingZeros(src);
98 const int64_t exponent = highest_significant_bit - fbits;
99
100 return FPRoundToDouble(0, exponent, src, round);
101 }
102
103
FixedToFloat(int64_t src,int fbits,FPRounding round)104 float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) {
105 if (src >= 0) {
106 return UFixedToFloat(src, fbits, round);
107 } else {
108 // This works for all negative values, including INT64_MIN.
109 return -UFixedToFloat(-src, fbits, round);
110 }
111 }
112
113
UFixedToFloat(uint64_t src,int fbits,FPRounding round)114 float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) {
115 // An input of 0 is a special case because the result is effectively
116 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
117 if (src == 0) {
118 return 0.0f;
119 }
120
121 // Calculate the exponent. The highest significant bit will have the value
122 // 2^exponent.
123 const int highest_significant_bit = 63 - CountLeadingZeros(src);
124 const int32_t exponent = highest_significant_bit - fbits;
125
126 return FPRoundToFloat(0, exponent, src, round);
127 }
128
129
FPToDouble(float value)130 double Simulator::FPToDouble(float value) {
131 switch (std::fpclassify(value)) {
132 case FP_NAN: {
133 if (IsSignallingNaN(value)) {
134 FPProcessException();
135 }
136 if (DN()) return kFP64DefaultNaN;
137
138 // Convert NaNs as the processor would:
139 // - The sign is propagated.
140 // - The payload (mantissa) is transferred entirely, except that the top
141 // bit is forced to '1', making the result a quiet NaN. The unused
142 // (low-order) payload bits are set to 0.
143 uint32_t raw = float_to_rawbits(value);
144
145 uint64_t sign = raw >> 31;
146 uint64_t exponent = (1 << 11) - 1;
147 uint64_t payload = unsigned_bitextract_64(21, 0, raw);
148 payload <<= (52 - 23); // The unused low-order bits should be 0.
149 payload |= (UINT64_C(1) << 51); // Force a quiet NaN.
150
151 return rawbits_to_double((sign << 63) | (exponent << 52) | payload);
152 }
153
154 case FP_ZERO:
155 case FP_NORMAL:
156 case FP_SUBNORMAL:
157 case FP_INFINITE: {
158 // All other inputs are preserved in a standard cast, because every value
159 // representable using an IEEE-754 float is also representable using an
160 // IEEE-754 double.
161 return static_cast<double>(value);
162 }
163 }
164
165 VIXL_UNREACHABLE();
166 return static_cast<double>(value);
167 }
168
169
FPToFloat(float16 value)170 float Simulator::FPToFloat(float16 value) {
171 uint32_t sign = value >> 15;
172 uint32_t exponent = unsigned_bitextract_32(
173 kFloat16MantissaBits + kFloat16ExponentBits - 1, kFloat16MantissaBits,
174 value);
175 uint32_t mantissa = unsigned_bitextract_32(
176 kFloat16MantissaBits - 1, 0, value);
177
178 switch (float16classify(value)) {
179 case FP_ZERO:
180 return (sign == 0) ? 0.0f : -0.0f;
181
182 case FP_INFINITE:
183 return (sign == 0) ? kFP32PositiveInfinity : kFP32NegativeInfinity;
184
185 case FP_SUBNORMAL: {
186 // Calculate shift required to put mantissa into the most-significant bits
187 // of the destination mantissa.
188 int shift = CountLeadingZeros(mantissa << (32 - 10));
189
190 // Shift mantissa and discard implicit '1'.
191 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits) + shift + 1;
192 mantissa &= (1 << kFloatMantissaBits) - 1;
193
194 // Adjust the exponent for the shift applied, and rebias.
195 exponent = exponent - shift + (-15 + 127);
196 break;
197 }
198
199 case FP_NAN:
200 if (IsSignallingNaN(value)) {
201 FPProcessException();
202 }
203 if (DN()) return kFP32DefaultNaN;
204
205 // Convert NaNs as the processor would:
206 // - The sign is propagated.
207 // - The payload (mantissa) is transferred entirely, except that the top
208 // bit is forced to '1', making the result a quiet NaN. The unused
209 // (low-order) payload bits are set to 0.
210 exponent = (1 << kFloatExponentBits) - 1;
211
212 // Increase bits in mantissa, making low-order bits 0.
213 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
214 mantissa |= 1 << 22; // Force a quiet NaN.
215 break;
216
217 case FP_NORMAL:
218 // Increase bits in mantissa, making low-order bits 0.
219 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
220
221 // Change exponent bias.
222 exponent += (-15 + 127);
223 break;
224
225 default: VIXL_UNREACHABLE();
226 }
227 return rawbits_to_float((sign << 31) |
228 (exponent << kFloatMantissaBits) |
229 mantissa);
230 }
231
232
FPToFloat16(float value,FPRounding round_mode)233 float16 Simulator::FPToFloat16(float value, FPRounding round_mode) {
234 // Only the FPTieEven rounding mode is implemented.
235 VIXL_ASSERT(round_mode == FPTieEven);
236 USE(round_mode);
237
238 uint32_t raw = float_to_rawbits(value);
239 int32_t sign = raw >> 31;
240 int32_t exponent = unsigned_bitextract_32(30, 23, raw) - 127;
241 uint32_t mantissa = unsigned_bitextract_32(22, 0, raw);
242
243 switch (std::fpclassify(value)) {
244 case FP_NAN: {
245 if (IsSignallingNaN(value)) {
246 FPProcessException();
247 }
248 if (DN()) return kFP16DefaultNaN;
249
250 // Convert NaNs as the processor would:
251 // - The sign is propagated.
252 // - The payload (mantissa) is transferred as much as possible, except
253 // that the top bit is forced to '1', making the result a quiet NaN.
254 float16 result = (sign == 0) ? kFP16PositiveInfinity
255 : kFP16NegativeInfinity;
256 result |= mantissa >> (kFloatMantissaBits - kFloat16MantissaBits);
257 result |= (1 << 9); // Force a quiet NaN;
258 return result;
259 }
260
261 case FP_ZERO:
262 return (sign == 0) ? 0 : 0x8000;
263
264 case FP_INFINITE:
265 return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
266
267 case FP_NORMAL:
268 case FP_SUBNORMAL: {
269 // Convert float-to-half as the processor would, assuming that FPCR.FZ
270 // (flush-to-zero) is not set.
271
272 // Add the implicit '1' bit to the mantissa.
273 mantissa += (1 << 23);
274 return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
275 }
276 }
277
278 VIXL_UNREACHABLE();
279 return 0;
280 }
281
282
FPToFloat16(double value,FPRounding round_mode)283 float16 Simulator::FPToFloat16(double value, FPRounding round_mode) {
284 // Only the FPTieEven rounding mode is implemented.
285 VIXL_ASSERT(round_mode == FPTieEven);
286 USE(round_mode);
287
288 uint64_t raw = double_to_rawbits(value);
289 int32_t sign = raw >> 63;
290 int64_t exponent = unsigned_bitextract_64(62, 52, raw) - 1023;
291 uint64_t mantissa = unsigned_bitextract_64(51, 0, raw);
292
293 switch (std::fpclassify(value)) {
294 case FP_NAN: {
295 if (IsSignallingNaN(value)) {
296 FPProcessException();
297 }
298 if (DN()) return kFP16DefaultNaN;
299
300 // Convert NaNs as the processor would:
301 // - The sign is propagated.
302 // - The payload (mantissa) is transferred as much as possible, except
303 // that the top bit is forced to '1', making the result a quiet NaN.
304 float16 result = (sign == 0) ? kFP16PositiveInfinity
305 : kFP16NegativeInfinity;
306 result |= mantissa >> (kDoubleMantissaBits - kFloat16MantissaBits);
307 result |= (1 << 9); // Force a quiet NaN;
308 return result;
309 }
310
311 case FP_ZERO:
312 return (sign == 0) ? 0 : 0x8000;
313
314 case FP_INFINITE:
315 return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
316
317 case FP_NORMAL:
318 case FP_SUBNORMAL: {
319 // Convert double-to-half as the processor would, assuming that FPCR.FZ
320 // (flush-to-zero) is not set.
321
322 // Add the implicit '1' bit to the mantissa.
323 mantissa += (UINT64_C(1) << 52);
324 return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
325 }
326 }
327
328 VIXL_UNREACHABLE();
329 return 0;
330 }
331
332
FPToFloat(double value,FPRounding round_mode)333 float Simulator::FPToFloat(double value, FPRounding round_mode) {
334 // Only the FPTieEven rounding mode is implemented.
335 VIXL_ASSERT((round_mode == FPTieEven) || (round_mode == FPRoundOdd));
336 USE(round_mode);
337
338 switch (std::fpclassify(value)) {
339 case FP_NAN: {
340 if (IsSignallingNaN(value)) {
341 FPProcessException();
342 }
343 if (DN()) return kFP32DefaultNaN;
344
345 // Convert NaNs as the processor would:
346 // - The sign is propagated.
347 // - The payload (mantissa) is transferred as much as possible, except
348 // that the top bit is forced to '1', making the result a quiet NaN.
349 uint64_t raw = double_to_rawbits(value);
350
351 uint32_t sign = raw >> 63;
352 uint32_t exponent = (1 << 8) - 1;
353 uint32_t payload =
354 static_cast<uint32_t>(unsigned_bitextract_64(50, 52 - 23, raw));
355 payload |= (1 << 22); // Force a quiet NaN.
356
357 return rawbits_to_float((sign << 31) | (exponent << 23) | payload);
358 }
359
360 case FP_ZERO:
361 case FP_INFINITE: {
362 // In a C++ cast, any value representable in the target type will be
363 // unchanged. This is always the case for +/-0.0 and infinities.
364 return static_cast<float>(value);
365 }
366
367 case FP_NORMAL:
368 case FP_SUBNORMAL: {
369 // Convert double-to-float as the processor would, assuming that FPCR.FZ
370 // (flush-to-zero) is not set.
371 uint64_t raw = double_to_rawbits(value);
372 // Extract the IEEE-754 double components.
373 uint32_t sign = raw >> 63;
374 // Extract the exponent and remove the IEEE-754 encoding bias.
375 int32_t exponent =
376 static_cast<int32_t>(unsigned_bitextract_64(62, 52, raw)) - 1023;
377 // Extract the mantissa and add the implicit '1' bit.
378 uint64_t mantissa = unsigned_bitextract_64(51, 0, raw);
379 if (std::fpclassify(value) == FP_NORMAL) {
380 mantissa |= (UINT64_C(1) << 52);
381 }
382 return FPRoundToFloat(sign, exponent, mantissa, round_mode);
383 }
384 }
385
386 VIXL_UNREACHABLE();
387 return value;
388 }
389
390
ld1(VectorFormat vform,LogicVRegister dst,uint64_t addr)391 void Simulator::ld1(VectorFormat vform,
392 LogicVRegister dst,
393 uint64_t addr) {
394 dst.ClearForWrite(vform);
395 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
396 dst.ReadUintFromMem(vform, i, addr);
397 addr += LaneSizeInBytesFromFormat(vform);
398 }
399 }
400
401
ld1(VectorFormat vform,LogicVRegister dst,int index,uint64_t addr)402 void Simulator::ld1(VectorFormat vform,
403 LogicVRegister dst,
404 int index,
405 uint64_t addr) {
406 dst.ReadUintFromMem(vform, index, addr);
407 }
408
409
ld1r(VectorFormat vform,LogicVRegister dst,uint64_t addr)410 void Simulator::ld1r(VectorFormat vform,
411 LogicVRegister dst,
412 uint64_t addr) {
413 dst.ClearForWrite(vform);
414 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
415 dst.ReadUintFromMem(vform, i, addr);
416 }
417 }
418
419
ld2(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,uint64_t addr1)420 void Simulator::ld2(VectorFormat vform,
421 LogicVRegister dst1,
422 LogicVRegister dst2,
423 uint64_t addr1) {
424 dst1.ClearForWrite(vform);
425 dst2.ClearForWrite(vform);
426 int esize = LaneSizeInBytesFromFormat(vform);
427 uint64_t addr2 = addr1 + esize;
428 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
429 dst1.ReadUintFromMem(vform, i, addr1);
430 dst2.ReadUintFromMem(vform, i, addr2);
431 addr1 += 2 * esize;
432 addr2 += 2 * esize;
433 }
434 }
435
436
ld2(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,int index,uint64_t addr1)437 void Simulator::ld2(VectorFormat vform,
438 LogicVRegister dst1,
439 LogicVRegister dst2,
440 int index,
441 uint64_t addr1) {
442 dst1.ClearForWrite(vform);
443 dst2.ClearForWrite(vform);
444 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
445 dst1.ReadUintFromMem(vform, index, addr1);
446 dst2.ReadUintFromMem(vform, index, addr2);
447 }
448
449
ld2r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,uint64_t addr)450 void Simulator::ld2r(VectorFormat vform,
451 LogicVRegister dst1,
452 LogicVRegister dst2,
453 uint64_t addr) {
454 dst1.ClearForWrite(vform);
455 dst2.ClearForWrite(vform);
456 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
457 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
458 dst1.ReadUintFromMem(vform, i, addr);
459 dst2.ReadUintFromMem(vform, i, addr2);
460 }
461 }
462
463
ld3(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,uint64_t addr1)464 void Simulator::ld3(VectorFormat vform,
465 LogicVRegister dst1,
466 LogicVRegister dst2,
467 LogicVRegister dst3,
468 uint64_t addr1) {
469 dst1.ClearForWrite(vform);
470 dst2.ClearForWrite(vform);
471 dst3.ClearForWrite(vform);
472 int esize = LaneSizeInBytesFromFormat(vform);
473 uint64_t addr2 = addr1 + esize;
474 uint64_t addr3 = addr2 + esize;
475 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
476 dst1.ReadUintFromMem(vform, i, addr1);
477 dst2.ReadUintFromMem(vform, i, addr2);
478 dst3.ReadUintFromMem(vform, i, addr3);
479 addr1 += 3 * esize;
480 addr2 += 3 * esize;
481 addr3 += 3 * esize;
482 }
483 }
484
485
ld3(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,int index,uint64_t addr1)486 void Simulator::ld3(VectorFormat vform,
487 LogicVRegister dst1,
488 LogicVRegister dst2,
489 LogicVRegister dst3,
490 int index,
491 uint64_t addr1) {
492 dst1.ClearForWrite(vform);
493 dst2.ClearForWrite(vform);
494 dst3.ClearForWrite(vform);
495 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
496 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
497 dst1.ReadUintFromMem(vform, index, addr1);
498 dst2.ReadUintFromMem(vform, index, addr2);
499 dst3.ReadUintFromMem(vform, index, addr3);
500 }
501
502
ld3r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,uint64_t addr)503 void Simulator::ld3r(VectorFormat vform,
504 LogicVRegister dst1,
505 LogicVRegister dst2,
506 LogicVRegister dst3,
507 uint64_t addr) {
508 dst1.ClearForWrite(vform);
509 dst2.ClearForWrite(vform);
510 dst3.ClearForWrite(vform);
511 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
512 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
513 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
514 dst1.ReadUintFromMem(vform, i, addr);
515 dst2.ReadUintFromMem(vform, i, addr2);
516 dst3.ReadUintFromMem(vform, i, addr3);
517 }
518 }
519
520
ld4(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,uint64_t addr1)521 void Simulator::ld4(VectorFormat vform,
522 LogicVRegister dst1,
523 LogicVRegister dst2,
524 LogicVRegister dst3,
525 LogicVRegister dst4,
526 uint64_t addr1) {
527 dst1.ClearForWrite(vform);
528 dst2.ClearForWrite(vform);
529 dst3.ClearForWrite(vform);
530 dst4.ClearForWrite(vform);
531 int esize = LaneSizeInBytesFromFormat(vform);
532 uint64_t addr2 = addr1 + esize;
533 uint64_t addr3 = addr2 + esize;
534 uint64_t addr4 = addr3 + esize;
535 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
536 dst1.ReadUintFromMem(vform, i, addr1);
537 dst2.ReadUintFromMem(vform, i, addr2);
538 dst3.ReadUintFromMem(vform, i, addr3);
539 dst4.ReadUintFromMem(vform, i, addr4);
540 addr1 += 4 * esize;
541 addr2 += 4 * esize;
542 addr3 += 4 * esize;
543 addr4 += 4 * esize;
544 }
545 }
546
547
ld4(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,int index,uint64_t addr1)548 void Simulator::ld4(VectorFormat vform,
549 LogicVRegister dst1,
550 LogicVRegister dst2,
551 LogicVRegister dst3,
552 LogicVRegister dst4,
553 int index,
554 uint64_t addr1) {
555 dst1.ClearForWrite(vform);
556 dst2.ClearForWrite(vform);
557 dst3.ClearForWrite(vform);
558 dst4.ClearForWrite(vform);
559 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
560 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
561 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
562 dst1.ReadUintFromMem(vform, index, addr1);
563 dst2.ReadUintFromMem(vform, index, addr2);
564 dst3.ReadUintFromMem(vform, index, addr3);
565 dst4.ReadUintFromMem(vform, index, addr4);
566 }
567
568
ld4r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,uint64_t addr)569 void Simulator::ld4r(VectorFormat vform,
570 LogicVRegister dst1,
571 LogicVRegister dst2,
572 LogicVRegister dst3,
573 LogicVRegister dst4,
574 uint64_t addr) {
575 dst1.ClearForWrite(vform);
576 dst2.ClearForWrite(vform);
577 dst3.ClearForWrite(vform);
578 dst4.ClearForWrite(vform);
579 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
580 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
581 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
582 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
583 dst1.ReadUintFromMem(vform, i, addr);
584 dst2.ReadUintFromMem(vform, i, addr2);
585 dst3.ReadUintFromMem(vform, i, addr3);
586 dst4.ReadUintFromMem(vform, i, addr4);
587 }
588 }
589
590
st1(VectorFormat vform,LogicVRegister src,uint64_t addr)591 void Simulator::st1(VectorFormat vform,
592 LogicVRegister src,
593 uint64_t addr) {
594 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
595 src.WriteUintToMem(vform, i, addr);
596 addr += LaneSizeInBytesFromFormat(vform);
597 }
598 }
599
600
st1(VectorFormat vform,LogicVRegister src,int index,uint64_t addr)601 void Simulator::st1(VectorFormat vform,
602 LogicVRegister src,
603 int index,
604 uint64_t addr) {
605 src.WriteUintToMem(vform, index, addr);
606 }
607
608
st2(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,uint64_t addr)609 void Simulator::st2(VectorFormat vform,
610 LogicVRegister dst,
611 LogicVRegister dst2,
612 uint64_t addr) {
613 int esize = LaneSizeInBytesFromFormat(vform);
614 uint64_t addr2 = addr + esize;
615 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
616 dst.WriteUintToMem(vform, i, addr);
617 dst2.WriteUintToMem(vform, i, addr2);
618 addr += 2 * esize;
619 addr2 += 2 * esize;
620 }
621 }
622
623
st2(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,int index,uint64_t addr)624 void Simulator::st2(VectorFormat vform,
625 LogicVRegister dst,
626 LogicVRegister dst2,
627 int index,
628 uint64_t addr) {
629 int esize = LaneSizeInBytesFromFormat(vform);
630 dst.WriteUintToMem(vform, index, addr);
631 dst2.WriteUintToMem(vform, index, addr + 1 * esize);
632 }
633
634
st3(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,LogicVRegister dst3,uint64_t addr)635 void Simulator::st3(VectorFormat vform,
636 LogicVRegister dst,
637 LogicVRegister dst2,
638 LogicVRegister dst3,
639 uint64_t addr) {
640 int esize = LaneSizeInBytesFromFormat(vform);
641 uint64_t addr2 = addr + esize;
642 uint64_t addr3 = addr2 + esize;
643 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
644 dst.WriteUintToMem(vform, i, addr);
645 dst2.WriteUintToMem(vform, i, addr2);
646 dst3.WriteUintToMem(vform, i, addr3);
647 addr += 3 * esize;
648 addr2 += 3 * esize;
649 addr3 += 3 * esize;
650 }
651 }
652
653
st3(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,LogicVRegister dst3,int index,uint64_t addr)654 void Simulator::st3(VectorFormat vform,
655 LogicVRegister dst,
656 LogicVRegister dst2,
657 LogicVRegister dst3,
658 int index,
659 uint64_t addr) {
660 int esize = LaneSizeInBytesFromFormat(vform);
661 dst.WriteUintToMem(vform, index, addr);
662 dst2.WriteUintToMem(vform, index, addr + 1 * esize);
663 dst3.WriteUintToMem(vform, index, addr + 2 * esize);
664 }
665
666
st4(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,uint64_t addr)667 void Simulator::st4(VectorFormat vform,
668 LogicVRegister dst,
669 LogicVRegister dst2,
670 LogicVRegister dst3,
671 LogicVRegister dst4,
672 uint64_t addr) {
673 int esize = LaneSizeInBytesFromFormat(vform);
674 uint64_t addr2 = addr + esize;
675 uint64_t addr3 = addr2 + esize;
676 uint64_t addr4 = addr3 + esize;
677 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
678 dst.WriteUintToMem(vform, i, addr);
679 dst2.WriteUintToMem(vform, i, addr2);
680 dst3.WriteUintToMem(vform, i, addr3);
681 dst4.WriteUintToMem(vform, i, addr4);
682 addr += 4 * esize;
683 addr2 += 4 * esize;
684 addr3 += 4 * esize;
685 addr4 += 4 * esize;
686 }
687 }
688
689
st4(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,int index,uint64_t addr)690 void Simulator::st4(VectorFormat vform,
691 LogicVRegister dst,
692 LogicVRegister dst2,
693 LogicVRegister dst3,
694 LogicVRegister dst4,
695 int index,
696 uint64_t addr) {
697 int esize = LaneSizeInBytesFromFormat(vform);
698 dst.WriteUintToMem(vform, index, addr);
699 dst2.WriteUintToMem(vform, index, addr + 1 * esize);
700 dst3.WriteUintToMem(vform, index, addr + 2 * esize);
701 dst4.WriteUintToMem(vform, index, addr + 3 * esize);
702 }
703
704
cmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)705 LogicVRegister Simulator::cmp(VectorFormat vform,
706 LogicVRegister dst,
707 const LogicVRegister& src1,
708 const LogicVRegister& src2,
709 Condition cond) {
710 dst.ClearForWrite(vform);
711 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
712 int64_t sa = src1.Int(vform, i);
713 int64_t sb = src2.Int(vform, i);
714 uint64_t ua = src1.Uint(vform, i);
715 uint64_t ub = src2.Uint(vform, i);
716 bool result = false;
717 switch (cond) {
718 case eq: result = (ua == ub); break;
719 case ge: result = (sa >= sb); break;
720 case gt: result = (sa > sb) ; break;
721 case hi: result = (ua > ub) ; break;
722 case hs: result = (ua >= ub); break;
723 case lt: result = (sa < sb) ; break;
724 case le: result = (sa <= sb); break;
725 default: VIXL_UNREACHABLE(); break;
726 }
727 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
728 }
729 return dst;
730 }
731
732
cmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,int imm,Condition cond)733 LogicVRegister Simulator::cmp(VectorFormat vform,
734 LogicVRegister dst,
735 const LogicVRegister& src1,
736 int imm,
737 Condition cond) {
738 SimVRegister temp;
739 LogicVRegister imm_reg = dup_immediate(vform, temp, imm);
740 return cmp(vform, dst, src1, imm_reg, cond);
741 }
742
743
cmptst(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)744 LogicVRegister Simulator::cmptst(VectorFormat vform,
745 LogicVRegister dst,
746 const LogicVRegister& src1,
747 const LogicVRegister& src2) {
748 dst.ClearForWrite(vform);
749 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
750 uint64_t ua = src1.Uint(vform, i);
751 uint64_t ub = src2.Uint(vform, i);
752 dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0);
753 }
754 return dst;
755 }
756
757
add(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)758 LogicVRegister Simulator::add(VectorFormat vform,
759 LogicVRegister dst,
760 const LogicVRegister& src1,
761 const LogicVRegister& src2) {
762 dst.ClearForWrite(vform);
763 // TODO(all): consider assigning the result of LaneCountFromFormat to a local.
764 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
765 // Test for unsigned saturation.
766 uint64_t ua = src1.UintLeftJustified(vform, i);
767 uint64_t ub = src2.UintLeftJustified(vform, i);
768 uint64_t ur = ua + ub;
769 if (ur < ua) {
770 dst.SetUnsignedSat(i, true);
771 }
772
773 // Test for signed saturation.
774 int64_t sa = src1.IntLeftJustified(vform, i);
775 int64_t sb = src2.IntLeftJustified(vform, i);
776 int64_t sr = sa + sb;
777 // If the signs of the operands are the same, but different from the result,
778 // there was an overflow.
779 if (((sa >= 0) == (sb >= 0)) && ((sa >= 0) != (sr >= 0))) {
780 dst.SetSignedSat(i, sa >= 0);
781 }
782
783 dst.SetInt(vform, i, src1.Int(vform, i) + src2.Int(vform, i));
784 }
785 return dst;
786 }
787
788
addp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)789 LogicVRegister Simulator::addp(VectorFormat vform,
790 LogicVRegister dst,
791 const LogicVRegister& src1,
792 const LogicVRegister& src2) {
793 SimVRegister temp1, temp2;
794 uzp1(vform, temp1, src1, src2);
795 uzp2(vform, temp2, src1, src2);
796 add(vform, dst, temp1, temp2);
797 return dst;
798 }
799
800
mla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)801 LogicVRegister Simulator::mla(VectorFormat vform,
802 LogicVRegister dst,
803 const LogicVRegister& src1,
804 const LogicVRegister& src2) {
805 SimVRegister temp;
806 mul(vform, temp, src1, src2);
807 add(vform, dst, dst, temp);
808 return dst;
809 }
810
811
mls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)812 LogicVRegister Simulator::mls(VectorFormat vform,
813 LogicVRegister dst,
814 const LogicVRegister& src1,
815 const LogicVRegister& src2) {
816 SimVRegister temp;
817 mul(vform, temp, src1, src2);
818 sub(vform, dst, dst, temp);
819 return dst;
820 }
821
822
mul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)823 LogicVRegister Simulator::mul(VectorFormat vform,
824 LogicVRegister dst,
825 const LogicVRegister& src1,
826 const LogicVRegister& src2) {
827 dst.ClearForWrite(vform);
828 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
829 dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i));
830 }
831 return dst;
832 }
833
834
mul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)835 LogicVRegister Simulator::mul(VectorFormat vform,
836 LogicVRegister dst,
837 const LogicVRegister& src1,
838 const LogicVRegister& src2,
839 int index) {
840 SimVRegister temp;
841 VectorFormat indexform = VectorFormatFillQ(vform);
842 return mul(vform, dst, src1, dup_element(indexform, temp, src2, index));
843 }
844
845
mla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)846 LogicVRegister Simulator::mla(VectorFormat vform,
847 LogicVRegister dst,
848 const LogicVRegister& src1,
849 const LogicVRegister& src2,
850 int index) {
851 SimVRegister temp;
852 VectorFormat indexform = VectorFormatFillQ(vform);
853 return mla(vform, dst, src1, dup_element(indexform, temp, src2, index));
854 }
855
856
mls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)857 LogicVRegister Simulator::mls(VectorFormat vform,
858 LogicVRegister dst,
859 const LogicVRegister& src1,
860 const LogicVRegister& src2,
861 int index) {
862 SimVRegister temp;
863 VectorFormat indexform = VectorFormatFillQ(vform);
864 return mls(vform, dst, src1, dup_element(indexform, temp, src2, index));
865 }
866
867
smull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)868 LogicVRegister Simulator::smull(VectorFormat vform,
869 LogicVRegister dst,
870 const LogicVRegister& src1,
871 const LogicVRegister& src2,
872 int index) {
873 SimVRegister temp;
874 VectorFormat indexform =
875 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
876 return smull(vform, dst, src1, dup_element(indexform, temp, src2, index));
877 }
878
879
smull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)880 LogicVRegister Simulator::smull2(VectorFormat vform,
881 LogicVRegister dst,
882 const LogicVRegister& src1,
883 const LogicVRegister& src2,
884 int index) {
885 SimVRegister temp;
886 VectorFormat indexform =
887 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
888 return smull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
889 }
890
891
umull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)892 LogicVRegister Simulator::umull(VectorFormat vform,
893 LogicVRegister dst,
894 const LogicVRegister& src1,
895 const LogicVRegister& src2,
896 int index) {
897 SimVRegister temp;
898 VectorFormat indexform =
899 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
900 return umull(vform, dst, src1, dup_element(indexform, temp, src2, index));
901 }
902
903
umull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)904 LogicVRegister Simulator::umull2(VectorFormat vform,
905 LogicVRegister dst,
906 const LogicVRegister& src1,
907 const LogicVRegister& src2,
908 int index) {
909 SimVRegister temp;
910 VectorFormat indexform =
911 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
912 return umull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
913 }
914
915
smlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)916 LogicVRegister Simulator::smlal(VectorFormat vform,
917 LogicVRegister dst,
918 const LogicVRegister& src1,
919 const LogicVRegister& src2,
920 int index) {
921 SimVRegister temp;
922 VectorFormat indexform =
923 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
924 return smlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
925 }
926
927
smlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)928 LogicVRegister Simulator::smlal2(VectorFormat vform,
929 LogicVRegister dst,
930 const LogicVRegister& src1,
931 const LogicVRegister& src2,
932 int index) {
933 SimVRegister temp;
934 VectorFormat indexform =
935 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
936 return smlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
937 }
938
939
umlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)940 LogicVRegister Simulator::umlal(VectorFormat vform,
941 LogicVRegister dst,
942 const LogicVRegister& src1,
943 const LogicVRegister& src2,
944 int index) {
945 SimVRegister temp;
946 VectorFormat indexform =
947 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
948 return umlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
949 }
950
951
umlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)952 LogicVRegister Simulator::umlal2(VectorFormat vform,
953 LogicVRegister dst,
954 const LogicVRegister& src1,
955 const LogicVRegister& src2,
956 int index) {
957 SimVRegister temp;
958 VectorFormat indexform =
959 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
960 return umlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
961 }
962
963
smlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)964 LogicVRegister Simulator::smlsl(VectorFormat vform,
965 LogicVRegister dst,
966 const LogicVRegister& src1,
967 const LogicVRegister& src2,
968 int index) {
969 SimVRegister temp;
970 VectorFormat indexform =
971 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
972 return smlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
973 }
974
975
smlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)976 LogicVRegister Simulator::smlsl2(VectorFormat vform,
977 LogicVRegister dst,
978 const LogicVRegister& src1,
979 const LogicVRegister& src2,
980 int index) {
981 SimVRegister temp;
982 VectorFormat indexform =
983 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
984 return smlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
985 }
986
987
umlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)988 LogicVRegister Simulator::umlsl(VectorFormat vform,
989 LogicVRegister dst,
990 const LogicVRegister& src1,
991 const LogicVRegister& src2,
992 int index) {
993 SimVRegister temp;
994 VectorFormat indexform =
995 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
996 return umlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
997 }
998
999
umlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1000 LogicVRegister Simulator::umlsl2(VectorFormat vform,
1001 LogicVRegister dst,
1002 const LogicVRegister& src1,
1003 const LogicVRegister& src2,
1004 int index) {
1005 SimVRegister temp;
1006 VectorFormat indexform =
1007 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1008 return umlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
1009 }
1010
1011
sqdmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1012 LogicVRegister Simulator::sqdmull(VectorFormat vform,
1013 LogicVRegister dst,
1014 const LogicVRegister& src1,
1015 const LogicVRegister& src2,
1016 int index) {
1017 SimVRegister temp;
1018 VectorFormat indexform =
1019 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1020 return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index));
1021 }
1022
1023
sqdmull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1024 LogicVRegister Simulator::sqdmull2(VectorFormat vform,
1025 LogicVRegister dst,
1026 const LogicVRegister& src1,
1027 const LogicVRegister& src2,
1028 int index) {
1029 SimVRegister temp;
1030 VectorFormat indexform =
1031 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1032 return sqdmull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
1033 }
1034
1035
sqdmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1036 LogicVRegister Simulator::sqdmlal(VectorFormat vform,
1037 LogicVRegister dst,
1038 const LogicVRegister& src1,
1039 const LogicVRegister& src2,
1040 int index) {
1041 SimVRegister temp;
1042 VectorFormat indexform =
1043 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1044 return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
1045 }
1046
1047
sqdmlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1048 LogicVRegister Simulator::sqdmlal2(VectorFormat vform,
1049 LogicVRegister dst,
1050 const LogicVRegister& src1,
1051 const LogicVRegister& src2,
1052 int index) {
1053 SimVRegister temp;
1054 VectorFormat indexform =
1055 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1056 return sqdmlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
1057 }
1058
1059
sqdmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1060 LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
1061 LogicVRegister dst,
1062 const LogicVRegister& src1,
1063 const LogicVRegister& src2,
1064 int index) {
1065 SimVRegister temp;
1066 VectorFormat indexform =
1067 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1068 return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
1069 }
1070
1071
sqdmlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1072 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform,
1073 LogicVRegister dst,
1074 const LogicVRegister& src1,
1075 const LogicVRegister& src2,
1076 int index) {
1077 SimVRegister temp;
1078 VectorFormat indexform =
1079 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1080 return sqdmlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
1081 }
1082
1083
sqdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1084 LogicVRegister Simulator::sqdmulh(VectorFormat vform,
1085 LogicVRegister dst,
1086 const LogicVRegister& src1,
1087 const LogicVRegister& src2,
1088 int index) {
1089 SimVRegister temp;
1090 VectorFormat indexform = VectorFormatFillQ(vform);
1091 return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
1092 }
1093
1094
sqrdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1095 LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
1096 LogicVRegister dst,
1097 const LogicVRegister& src1,
1098 const LogicVRegister& src2,
1099 int index) {
1100 SimVRegister temp;
1101 VectorFormat indexform = VectorFormatFillQ(vform);
1102 return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
1103 }
1104
1105
PolynomialMult(uint8_t op1,uint8_t op2)1106 uint16_t Simulator::PolynomialMult(uint8_t op1, uint8_t op2) {
1107 uint16_t result = 0;
1108 uint16_t extended_op2 = op2;
1109 for (int i = 0; i < 8; ++i) {
1110 if ((op1 >> i) & 1) {
1111 result = result ^ (extended_op2 << i);
1112 }
1113 }
1114 return result;
1115 }
1116
1117
pmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1118 LogicVRegister Simulator::pmul(VectorFormat vform,
1119 LogicVRegister dst,
1120 const LogicVRegister& src1,
1121 const LogicVRegister& src2) {
1122 dst.ClearForWrite(vform);
1123 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1124 dst.SetUint(vform, i,
1125 PolynomialMult(src1.Uint(vform, i), src2.Uint(vform, i)));
1126 }
1127 return dst;
1128 }
1129
1130
pmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1131 LogicVRegister Simulator::pmull(VectorFormat vform,
1132 LogicVRegister dst,
1133 const LogicVRegister& src1,
1134 const LogicVRegister& src2) {
1135 VectorFormat vform_src = VectorFormatHalfWidth(vform);
1136 dst.ClearForWrite(vform);
1137 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1138 dst.SetUint(vform, i, PolynomialMult(src1.Uint(vform_src, i),
1139 src2.Uint(vform_src, i)));
1140 }
1141 return dst;
1142 }
1143
1144
pmull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1145 LogicVRegister Simulator::pmull2(VectorFormat vform,
1146 LogicVRegister dst,
1147 const LogicVRegister& src1,
1148 const LogicVRegister& src2) {
1149 VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform);
1150 dst.ClearForWrite(vform);
1151 int lane_count = LaneCountFromFormat(vform);
1152 for (int i = 0; i < lane_count; i++) {
1153 dst.SetUint(vform, i, PolynomialMult(src1.Uint(vform_src, lane_count + i),
1154 src2.Uint(vform_src, lane_count + i)));
1155 }
1156 return dst;
1157 }
1158
1159
sub(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1160 LogicVRegister Simulator::sub(VectorFormat vform,
1161 LogicVRegister dst,
1162 const LogicVRegister& src1,
1163 const LogicVRegister& src2) {
1164 dst.ClearForWrite(vform);
1165 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1166 // Test for unsigned saturation.
1167 if (src2.Uint(vform, i) > src1.Uint(vform, i)) {
1168 dst.SetUnsignedSat(i, false);
1169 }
1170
1171 // Test for signed saturation.
1172 int64_t sa = src1.IntLeftJustified(vform, i);
1173 int64_t sb = src2.IntLeftJustified(vform, i);
1174 int64_t sr = sa - sb;
1175 // If the signs of the operands are different, and the sign of the first
1176 // operand doesn't match the result, there was an overflow.
1177 if (((sa >= 0) != (sb >= 0)) && ((sa >= 0) != (sr >= 0))) {
1178 dst.SetSignedSat(i, sr < 0);
1179 }
1180
1181 dst.SetInt(vform, i, src1.Int(vform, i) - src2.Int(vform, i));
1182 }
1183 return dst;
1184 }
1185
1186
and_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1187 LogicVRegister Simulator::and_(VectorFormat vform,
1188 LogicVRegister dst,
1189 const LogicVRegister& src1,
1190 const LogicVRegister& src2) {
1191 dst.ClearForWrite(vform);
1192 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1193 dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i));
1194 }
1195 return dst;
1196 }
1197
1198
orr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1199 LogicVRegister Simulator::orr(VectorFormat vform,
1200 LogicVRegister dst,
1201 const LogicVRegister& src1,
1202 const LogicVRegister& src2) {
1203 dst.ClearForWrite(vform);
1204 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1205 dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i));
1206 }
1207 return dst;
1208 }
1209
1210
orn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1211 LogicVRegister Simulator::orn(VectorFormat vform,
1212 LogicVRegister dst,
1213 const LogicVRegister& src1,
1214 const LogicVRegister& src2) {
1215 dst.ClearForWrite(vform);
1216 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1217 dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i));
1218 }
1219 return dst;
1220 }
1221
1222
eor(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1223 LogicVRegister Simulator::eor(VectorFormat vform,
1224 LogicVRegister dst,
1225 const LogicVRegister& src1,
1226 const LogicVRegister& src2) {
1227 dst.ClearForWrite(vform);
1228 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1229 dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i));
1230 }
1231 return dst;
1232 }
1233
1234
bic(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1235 LogicVRegister Simulator::bic(VectorFormat vform,
1236 LogicVRegister dst,
1237 const LogicVRegister& src1,
1238 const LogicVRegister& src2) {
1239 dst.ClearForWrite(vform);
1240 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1241 dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i));
1242 }
1243 return dst;
1244 }
1245
1246
bic(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,uint64_t imm)1247 LogicVRegister Simulator::bic(VectorFormat vform,
1248 LogicVRegister dst,
1249 const LogicVRegister& src,
1250 uint64_t imm) {
1251 uint64_t result[16];
1252 int laneCount = LaneCountFromFormat(vform);
1253 for (int i = 0; i < laneCount; ++i) {
1254 result[i] = src.Uint(vform, i) & ~imm;
1255 }
1256 dst.ClearForWrite(vform);
1257 for (int i = 0; i < laneCount; ++i) {
1258 dst.SetUint(vform, i, result[i]);
1259 }
1260 return dst;
1261 }
1262
1263
bif(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1264 LogicVRegister Simulator::bif(VectorFormat vform,
1265 LogicVRegister dst,
1266 const LogicVRegister& src1,
1267 const LogicVRegister& src2) {
1268 dst.ClearForWrite(vform);
1269 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1270 uint64_t operand1 = dst.Uint(vform, i);
1271 uint64_t operand2 = ~src2.Uint(vform, i);
1272 uint64_t operand3 = src1.Uint(vform, i);
1273 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1274 dst.SetUint(vform, i, result);
1275 }
1276 return dst;
1277 }
1278
1279
bit(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1280 LogicVRegister Simulator::bit(VectorFormat vform,
1281 LogicVRegister dst,
1282 const LogicVRegister& src1,
1283 const LogicVRegister& src2) {
1284 dst.ClearForWrite(vform);
1285 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1286 uint64_t operand1 = dst.Uint(vform, i);
1287 uint64_t operand2 = src2.Uint(vform, i);
1288 uint64_t operand3 = src1.Uint(vform, i);
1289 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1290 dst.SetUint(vform, i, result);
1291 }
1292 return dst;
1293 }
1294
1295
bsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1296 LogicVRegister Simulator::bsl(VectorFormat vform,
1297 LogicVRegister dst,
1298 const LogicVRegister& src1,
1299 const LogicVRegister& src2) {
1300 dst.ClearForWrite(vform);
1301 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1302 uint64_t operand1 = src2.Uint(vform, i);
1303 uint64_t operand2 = dst.Uint(vform, i);
1304 uint64_t operand3 = src1.Uint(vform, i);
1305 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1306 dst.SetUint(vform, i, result);
1307 }
1308 return dst;
1309 }
1310
1311
sminmax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1312 LogicVRegister Simulator::sminmax(VectorFormat vform,
1313 LogicVRegister dst,
1314 const LogicVRegister& src1,
1315 const LogicVRegister& src2,
1316 bool max) {
1317 dst.ClearForWrite(vform);
1318 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1319 int64_t src1_val = src1.Int(vform, i);
1320 int64_t src2_val = src2.Int(vform, i);
1321 int64_t dst_val;
1322 if (max == true) {
1323 dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1324 } else {
1325 dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1326 }
1327 dst.SetInt(vform, i, dst_val);
1328 }
1329 return dst;
1330 }
1331
1332
smax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1333 LogicVRegister Simulator::smax(VectorFormat vform,
1334 LogicVRegister dst,
1335 const LogicVRegister& src1,
1336 const LogicVRegister& src2) {
1337 return sminmax(vform, dst, src1, src2, true);
1338 }
1339
1340
smin(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1341 LogicVRegister Simulator::smin(VectorFormat vform,
1342 LogicVRegister dst,
1343 const LogicVRegister& src1,
1344 const LogicVRegister& src2) {
1345 return sminmax(vform, dst, src1, src2, false);
1346 }
1347
1348
sminmaxp(VectorFormat vform,LogicVRegister dst,int dst_index,const LogicVRegister & src,bool max)1349 LogicVRegister Simulator::sminmaxp(VectorFormat vform,
1350 LogicVRegister dst,
1351 int dst_index,
1352 const LogicVRegister& src,
1353 bool max) {
1354 for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {
1355 int64_t src1_val = src.Int(vform, i);
1356 int64_t src2_val = src.Int(vform, i + 1);
1357 int64_t dst_val;
1358 if (max == true) {
1359 dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1360 } else {
1361 dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1362 }
1363 dst.SetInt(vform, dst_index + (i >> 1), dst_val);
1364 }
1365 return dst;
1366 }
1367
1368
smaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1369 LogicVRegister Simulator::smaxp(VectorFormat vform,
1370 LogicVRegister dst,
1371 const LogicVRegister& src1,
1372 const LogicVRegister& src2) {
1373 dst.ClearForWrite(vform);
1374 sminmaxp(vform, dst, 0, src1, true);
1375 sminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, true);
1376 return dst;
1377 }
1378
1379
sminp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1380 LogicVRegister Simulator::sminp(VectorFormat vform,
1381 LogicVRegister dst,
1382 const LogicVRegister& src1,
1383 const LogicVRegister& src2) {
1384 dst.ClearForWrite(vform);
1385 sminmaxp(vform, dst, 0, src1, false);
1386 sminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, false);
1387 return dst;
1388 }
1389
1390
addp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1391 LogicVRegister Simulator::addp(VectorFormat vform,
1392 LogicVRegister dst,
1393 const LogicVRegister& src) {
1394 VIXL_ASSERT(vform == kFormatD);
1395
1396 int64_t dst_val = src.Int(kFormat2D, 0) + src.Int(kFormat2D, 1);
1397 dst.ClearForWrite(vform);
1398 dst.SetInt(vform, 0, dst_val);
1399 return dst;
1400 }
1401
1402
addv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1403 LogicVRegister Simulator::addv(VectorFormat vform,
1404 LogicVRegister dst,
1405 const LogicVRegister& src) {
1406 VectorFormat vform_dst
1407 = ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
1408
1409
1410 int64_t dst_val = 0;
1411 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1412 dst_val += src.Int(vform, i);
1413 }
1414
1415 dst.ClearForWrite(vform_dst);
1416 dst.SetInt(vform_dst, 0, dst_val);
1417 return dst;
1418 }
1419
1420
saddlv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1421 LogicVRegister Simulator::saddlv(VectorFormat vform,
1422 LogicVRegister dst,
1423 const LogicVRegister& src) {
1424 VectorFormat vform_dst
1425 = ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1426
1427 int64_t dst_val = 0;
1428 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1429 dst_val += src.Int(vform, i);
1430 }
1431
1432 dst.ClearForWrite(vform_dst);
1433 dst.SetInt(vform_dst, 0, dst_val);
1434 return dst;
1435 }
1436
1437
uaddlv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1438 LogicVRegister Simulator::uaddlv(VectorFormat vform,
1439 LogicVRegister dst,
1440 const LogicVRegister& src) {
1441 VectorFormat vform_dst
1442 = ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1443
1444 uint64_t dst_val = 0;
1445 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1446 dst_val += src.Uint(vform, i);
1447 }
1448
1449 dst.ClearForWrite(vform_dst);
1450 dst.SetUint(vform_dst, 0, dst_val);
1451 return dst;
1452 }
1453
1454
sminmaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool max)1455 LogicVRegister Simulator::sminmaxv(VectorFormat vform,
1456 LogicVRegister dst,
1457 const LogicVRegister& src,
1458 bool max) {
1459 dst.ClearForWrite(vform);
1460 int64_t dst_val = max ? INT64_MIN : INT64_MAX;
1461 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1462 dst.SetInt(vform, i, 0);
1463 int64_t src_val = src.Int(vform, i);
1464 if (max == true) {
1465 dst_val = (src_val > dst_val) ? src_val : dst_val;
1466 } else {
1467 dst_val = (src_val < dst_val) ? src_val : dst_val;
1468 }
1469 }
1470 dst.SetInt(vform, 0, dst_val);
1471 return dst;
1472 }
1473
1474
smaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1475 LogicVRegister Simulator::smaxv(VectorFormat vform,
1476 LogicVRegister dst,
1477 const LogicVRegister& src) {
1478 sminmaxv(vform, dst, src, true);
1479 return dst;
1480 }
1481
1482
sminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1483 LogicVRegister Simulator::sminv(VectorFormat vform,
1484 LogicVRegister dst,
1485 const LogicVRegister& src) {
1486 sminmaxv(vform, dst, src, false);
1487 return dst;
1488 }
1489
1490
uminmax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1491 LogicVRegister Simulator::uminmax(VectorFormat vform,
1492 LogicVRegister dst,
1493 const LogicVRegister& src1,
1494 const LogicVRegister& src2,
1495 bool max) {
1496 dst.ClearForWrite(vform);
1497 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1498 uint64_t src1_val = src1.Uint(vform, i);
1499 uint64_t src2_val = src2.Uint(vform, i);
1500 uint64_t dst_val;
1501 if (max == true) {
1502 dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1503 } else {
1504 dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1505 }
1506 dst.SetUint(vform, i, dst_val);
1507 }
1508 return dst;
1509 }
1510
1511
umax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1512 LogicVRegister Simulator::umax(VectorFormat vform,
1513 LogicVRegister dst,
1514 const LogicVRegister& src1,
1515 const LogicVRegister& src2) {
1516 return uminmax(vform, dst, src1, src2, true);
1517 }
1518
1519
umin(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1520 LogicVRegister Simulator::umin(VectorFormat vform,
1521 LogicVRegister dst,
1522 const LogicVRegister& src1,
1523 const LogicVRegister& src2) {
1524 return uminmax(vform, dst, src1, src2, false);
1525 }
1526
1527
uminmaxp(VectorFormat vform,LogicVRegister dst,int dst_index,const LogicVRegister & src,bool max)1528 LogicVRegister Simulator::uminmaxp(VectorFormat vform,
1529 LogicVRegister dst,
1530 int dst_index,
1531 const LogicVRegister& src,
1532 bool max) {
1533 for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {
1534 uint64_t src1_val = src.Uint(vform, i);
1535 uint64_t src2_val = src.Uint(vform, i + 1);
1536 uint64_t dst_val;
1537 if (max == true) {
1538 dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1539 } else {
1540 dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1541 }
1542 dst.SetUint(vform, dst_index + (i >> 1), dst_val);
1543 }
1544 return dst;
1545 }
1546
1547
umaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1548 LogicVRegister Simulator::umaxp(VectorFormat vform,
1549 LogicVRegister dst,
1550 const LogicVRegister& src1,
1551 const LogicVRegister& src2) {
1552 dst.ClearForWrite(vform);
1553 uminmaxp(vform, dst, 0, src1, true);
1554 uminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, true);
1555 return dst;
1556 }
1557
1558
uminp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1559 LogicVRegister Simulator::uminp(VectorFormat vform,
1560 LogicVRegister dst,
1561 const LogicVRegister& src1,
1562 const LogicVRegister& src2) {
1563 dst.ClearForWrite(vform);
1564 uminmaxp(vform, dst, 0, src1, false);
1565 uminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, false);
1566 return dst;
1567 }
1568
1569
uminmaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool max)1570 LogicVRegister Simulator::uminmaxv(VectorFormat vform,
1571 LogicVRegister dst,
1572 const LogicVRegister& src,
1573 bool max) {
1574 dst.ClearForWrite(vform);
1575 uint64_t dst_val = max ? 0 : UINT64_MAX;
1576 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1577 dst.SetUint(vform, i, 0);
1578 uint64_t src_val = src.Uint(vform, i);
1579 if (max == true) {
1580 dst_val = (src_val > dst_val) ? src_val : dst_val;
1581 } else {
1582 dst_val = (src_val < dst_val) ? src_val : dst_val;
1583 }
1584 }
1585 dst.SetUint(vform, 0, dst_val);
1586 return dst;
1587 }
1588
1589
umaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1590 LogicVRegister Simulator::umaxv(VectorFormat vform,
1591 LogicVRegister dst,
1592 const LogicVRegister& src) {
1593 uminmaxv(vform, dst, src, true);
1594 return dst;
1595 }
1596
1597
uminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1598 LogicVRegister Simulator::uminv(VectorFormat vform,
1599 LogicVRegister dst,
1600 const LogicVRegister& src) {
1601 uminmaxv(vform, dst, src, false);
1602 return dst;
1603 }
1604
1605
shl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1606 LogicVRegister Simulator::shl(VectorFormat vform,
1607 LogicVRegister dst,
1608 const LogicVRegister& src,
1609 int shift) {
1610 VIXL_ASSERT(shift >= 0);
1611 SimVRegister temp;
1612 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1613 return ushl(vform, dst, src, shiftreg);
1614 }
1615
1616
sshll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1617 LogicVRegister Simulator::sshll(VectorFormat vform,
1618 LogicVRegister dst,
1619 const LogicVRegister& src,
1620 int shift) {
1621 VIXL_ASSERT(shift >= 0);
1622 SimVRegister temp1, temp2;
1623 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1624 LogicVRegister extendedreg = sxtl(vform, temp2, src);
1625 return sshl(vform, dst, extendedreg, shiftreg);
1626 }
1627
1628
sshll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1629 LogicVRegister Simulator::sshll2(VectorFormat vform,
1630 LogicVRegister dst,
1631 const LogicVRegister& src,
1632 int shift) {
1633 VIXL_ASSERT(shift >= 0);
1634 SimVRegister temp1, temp2;
1635 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1636 LogicVRegister extendedreg = sxtl2(vform, temp2, src);
1637 return sshl(vform, dst, extendedreg, shiftreg);
1638 }
1639
1640
shll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1641 LogicVRegister Simulator::shll(VectorFormat vform,
1642 LogicVRegister dst,
1643 const LogicVRegister& src) {
1644 int shift = LaneSizeInBitsFromFormat(vform) / 2;
1645 return sshll(vform, dst, src, shift);
1646 }
1647
1648
shll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1649 LogicVRegister Simulator::shll2(VectorFormat vform,
1650 LogicVRegister dst,
1651 const LogicVRegister& src) {
1652 int shift = LaneSizeInBitsFromFormat(vform) / 2;
1653 return sshll2(vform, dst, src, shift);
1654 }
1655
1656
ushll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1657 LogicVRegister Simulator::ushll(VectorFormat vform,
1658 LogicVRegister dst,
1659 const LogicVRegister& src,
1660 int shift) {
1661 VIXL_ASSERT(shift >= 0);
1662 SimVRegister temp1, temp2;
1663 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1664 LogicVRegister extendedreg = uxtl(vform, temp2, src);
1665 return ushl(vform, dst, extendedreg, shiftreg);
1666 }
1667
1668
ushll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1669 LogicVRegister Simulator::ushll2(VectorFormat vform,
1670 LogicVRegister dst,
1671 const LogicVRegister& src,
1672 int shift) {
1673 VIXL_ASSERT(shift >= 0);
1674 SimVRegister temp1, temp2;
1675 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1676 LogicVRegister extendedreg = uxtl2(vform, temp2, src);
1677 return ushl(vform, dst, extendedreg, shiftreg);
1678 }
1679
1680
sli(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1681 LogicVRegister Simulator::sli(VectorFormat vform,
1682 LogicVRegister dst,
1683 const LogicVRegister& src,
1684 int shift) {
1685 dst.ClearForWrite(vform);
1686 int laneCount = LaneCountFromFormat(vform);
1687 for (int i = 0; i < laneCount; i++) {
1688 uint64_t src_lane = src.Uint(vform, i);
1689 uint64_t dst_lane = dst.Uint(vform, i);
1690 uint64_t shifted = src_lane << shift;
1691 uint64_t mask = MaxUintFromFormat(vform) << shift;
1692 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1693 }
1694 return dst;
1695 }
1696
1697
sqshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1698 LogicVRegister Simulator::sqshl(VectorFormat vform,
1699 LogicVRegister dst,
1700 const LogicVRegister& src,
1701 int shift) {
1702 VIXL_ASSERT(shift >= 0);
1703 SimVRegister temp;
1704 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1705 return sshl(vform, dst, src, shiftreg).SignedSaturate(vform);
1706 }
1707
1708
uqshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1709 LogicVRegister Simulator::uqshl(VectorFormat vform,
1710 LogicVRegister dst,
1711 const LogicVRegister& src,
1712 int shift) {
1713 VIXL_ASSERT(shift >= 0);
1714 SimVRegister temp;
1715 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1716 return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1717 }
1718
1719
sqshlu(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1720 LogicVRegister Simulator::sqshlu(VectorFormat vform,
1721 LogicVRegister dst,
1722 const LogicVRegister& src,
1723 int shift) {
1724 VIXL_ASSERT(shift >= 0);
1725 SimVRegister temp;
1726 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1727 return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1728 }
1729
1730
sri(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1731 LogicVRegister Simulator::sri(VectorFormat vform,
1732 LogicVRegister dst,
1733 const LogicVRegister& src,
1734 int shift) {
1735 dst.ClearForWrite(vform);
1736 int laneCount = LaneCountFromFormat(vform);
1737 VIXL_ASSERT((shift > 0) &&
1738 (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform))));
1739 for (int i = 0; i < laneCount; i++) {
1740 uint64_t src_lane = src.Uint(vform, i);
1741 uint64_t dst_lane = dst.Uint(vform, i);
1742 uint64_t shifted;
1743 uint64_t mask;
1744 if (shift == 64) {
1745 shifted = 0;
1746 mask = 0;
1747 } else {
1748 shifted = src_lane >> shift;
1749 mask = MaxUintFromFormat(vform) >> shift;
1750 }
1751 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1752 }
1753 return dst;
1754 }
1755
1756
ushr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1757 LogicVRegister Simulator::ushr(VectorFormat vform,
1758 LogicVRegister dst,
1759 const LogicVRegister& src,
1760 int shift) {
1761 VIXL_ASSERT(shift >= 0);
1762 SimVRegister temp;
1763 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1764 return ushl(vform, dst, src, shiftreg);
1765 }
1766
1767
sshr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1768 LogicVRegister Simulator::sshr(VectorFormat vform,
1769 LogicVRegister dst,
1770 const LogicVRegister& src,
1771 int shift) {
1772 VIXL_ASSERT(shift >= 0);
1773 SimVRegister temp;
1774 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1775 return sshl(vform, dst, src, shiftreg);
1776 }
1777
1778
ssra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1779 LogicVRegister Simulator::ssra(VectorFormat vform,
1780 LogicVRegister dst,
1781 const LogicVRegister& src,
1782 int shift) {
1783 SimVRegister temp;
1784 LogicVRegister shifted_reg = sshr(vform, temp, src, shift);
1785 return add(vform, dst, dst, shifted_reg);
1786 }
1787
1788
usra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1789 LogicVRegister Simulator::usra(VectorFormat vform,
1790 LogicVRegister dst,
1791 const LogicVRegister& src,
1792 int shift) {
1793 SimVRegister temp;
1794 LogicVRegister shifted_reg = ushr(vform, temp, src, shift);
1795 return add(vform, dst, dst, shifted_reg);
1796 }
1797
1798
srsra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1799 LogicVRegister Simulator::srsra(VectorFormat vform,
1800 LogicVRegister dst,
1801 const LogicVRegister& src,
1802 int shift) {
1803 SimVRegister temp;
1804 LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform);
1805 return add(vform, dst, dst, shifted_reg);
1806 }
1807
1808
ursra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1809 LogicVRegister Simulator::ursra(VectorFormat vform,
1810 LogicVRegister dst,
1811 const LogicVRegister& src,
1812 int shift) {
1813 SimVRegister temp;
1814 LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform);
1815 return add(vform, dst, dst, shifted_reg);
1816 }
1817
1818
cls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1819 LogicVRegister Simulator::cls(VectorFormat vform,
1820 LogicVRegister dst,
1821 const LogicVRegister& src) {
1822 uint64_t result[16];
1823 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1824 int laneCount = LaneCountFromFormat(vform);
1825 for (int i = 0; i < laneCount; i++) {
1826 result[i] = CountLeadingSignBits(src.Int(vform, i), laneSizeInBits);
1827 }
1828
1829 dst.ClearForWrite(vform);
1830 for (int i = 0; i < laneCount; ++i) {
1831 dst.SetUint(vform, i, result[i]);
1832 }
1833 return dst;
1834 }
1835
1836
clz(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1837 LogicVRegister Simulator::clz(VectorFormat vform,
1838 LogicVRegister dst,
1839 const LogicVRegister& src) {
1840 uint64_t result[16];
1841 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1842 int laneCount = LaneCountFromFormat(vform);
1843 for (int i = 0; i < laneCount; i++) {
1844 result[i] = CountLeadingZeros(src.Uint(vform, i), laneSizeInBits);
1845 }
1846
1847 dst.ClearForWrite(vform);
1848 for (int i = 0; i < laneCount; ++i) {
1849 dst.SetUint(vform, i, result[i]);
1850 }
1851 return dst;
1852 }
1853
1854
cnt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1855 LogicVRegister Simulator::cnt(VectorFormat vform,
1856 LogicVRegister dst,
1857 const LogicVRegister& src) {
1858 uint64_t result[16];
1859 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1860 int laneCount = LaneCountFromFormat(vform);
1861 for (int i = 0; i < laneCount; i++) {
1862 uint64_t value = src.Uint(vform, i);
1863 result[i] = 0;
1864 for (int j = 0; j < laneSizeInBits; j++) {
1865 result[i] += (value & 1);
1866 value >>= 1;
1867 }
1868 }
1869
1870 dst.ClearForWrite(vform);
1871 for (int i = 0; i < laneCount; ++i) {
1872 dst.SetUint(vform, i, result[i]);
1873 }
1874 return dst;
1875 }
1876
1877
sshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1878 LogicVRegister Simulator::sshl(VectorFormat vform,
1879 LogicVRegister dst,
1880 const LogicVRegister& src1,
1881 const LogicVRegister& src2) {
1882 dst.ClearForWrite(vform);
1883 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1884 int8_t shift_val = src2.Int(vform, i);
1885 int64_t lj_src_val = src1.IntLeftJustified(vform, i);
1886
1887 // Set signed saturation state.
1888 if ((shift_val > CountLeadingSignBits(lj_src_val)) &&
1889 (lj_src_val != 0)) {
1890 dst.SetSignedSat(i, lj_src_val >= 0);
1891 }
1892
1893 // Set unsigned saturation state.
1894 if (lj_src_val < 0) {
1895 dst.SetUnsignedSat(i, false);
1896 } else if ((shift_val > CountLeadingZeros(lj_src_val)) &&
1897 (lj_src_val != 0)) {
1898 dst.SetUnsignedSat(i, true);
1899 }
1900
1901 int64_t src_val = src1.Int(vform, i);
1902 if (shift_val > 63) {
1903 dst.SetInt(vform, i, 0);
1904 } else if (shift_val < -63) {
1905 dst.SetRounding(i, src_val < 0);
1906 dst.SetInt(vform, i, (src_val < 0) ? -1 : 0);
1907 } else {
1908 if (shift_val < 0) {
1909 // Set rounding state. Rounding only needed on right shifts.
1910 if (((src_val >> (-shift_val - 1)) & 1) == 1) {
1911 dst.SetRounding(i, true);
1912 }
1913 src_val >>= -shift_val;
1914 } else {
1915 src_val <<= shift_val;
1916 }
1917 dst.SetInt(vform, i, src_val);
1918 }
1919 }
1920 return dst;
1921 }
1922
1923
ushl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1924 LogicVRegister Simulator::ushl(VectorFormat vform,
1925 LogicVRegister dst,
1926 const LogicVRegister& src1,
1927 const LogicVRegister& src2) {
1928 dst.ClearForWrite(vform);
1929 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1930 int8_t shift_val = src2.Int(vform, i);
1931 uint64_t lj_src_val = src1.UintLeftJustified(vform, i);
1932
1933 // Set saturation state.
1934 if ((shift_val > CountLeadingZeros(lj_src_val)) && (lj_src_val != 0)) {
1935 dst.SetUnsignedSat(i, true);
1936 }
1937
1938 uint64_t src_val = src1.Uint(vform, i);
1939 if ((shift_val > 63) || (shift_val < -64)) {
1940 dst.SetUint(vform, i, 0);
1941 } else {
1942 if (shift_val < 0) {
1943 // Set rounding state. Rounding only needed on right shifts.
1944 if (((src_val >> (-shift_val - 1)) & 1) == 1) {
1945 dst.SetRounding(i, true);
1946 }
1947
1948 if (shift_val == -64) {
1949 src_val = 0;
1950 } else {
1951 src_val >>= -shift_val;
1952 }
1953 } else {
1954 src_val <<= shift_val;
1955 }
1956 dst.SetUint(vform, i, src_val);
1957 }
1958 }
1959 return dst;
1960 }
1961
1962
neg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1963 LogicVRegister Simulator::neg(VectorFormat vform,
1964 LogicVRegister dst,
1965 const LogicVRegister& src) {
1966 dst.ClearForWrite(vform);
1967 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1968 // Test for signed saturation.
1969 int64_t sa = src.Int(vform, i);
1970 if (sa == MinIntFromFormat(vform)) {
1971 dst.SetSignedSat(i, true);
1972 }
1973 dst.SetInt(vform, i, -sa);
1974 }
1975 return dst;
1976 }
1977
1978
suqadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1979 LogicVRegister Simulator::suqadd(VectorFormat vform,
1980 LogicVRegister dst,
1981 const LogicVRegister& src) {
1982 dst.ClearForWrite(vform);
1983 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1984 int64_t sa = dst.IntLeftJustified(vform, i);
1985 uint64_t ub = src.UintLeftJustified(vform, i);
1986 int64_t sr = sa + ub;
1987
1988 if (sr < sa) { // Test for signed positive saturation.
1989 dst.SetInt(vform, i, MaxIntFromFormat(vform));
1990 } else {
1991 dst.SetInt(vform, i, dst.Int(vform, i) + src.Int(vform, i));
1992 }
1993 }
1994 return dst;
1995 }
1996
1997
usqadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1998 LogicVRegister Simulator::usqadd(VectorFormat vform,
1999 LogicVRegister dst,
2000 const LogicVRegister& src) {
2001 dst.ClearForWrite(vform);
2002 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2003 uint64_t ua = dst.UintLeftJustified(vform, i);
2004 int64_t sb = src.IntLeftJustified(vform, i);
2005 uint64_t ur = ua + sb;
2006
2007 if ((sb > 0) && (ur <= ua)) {
2008 dst.SetUint(vform, i, MaxUintFromFormat(vform)); // Positive saturation.
2009 } else if ((sb < 0) && (ur >= ua)) {
2010 dst.SetUint(vform, i, 0); // Negative saturation.
2011 } else {
2012 dst.SetUint(vform, i, dst.Uint(vform, i) + src.Int(vform, i));
2013 }
2014 }
2015 return dst;
2016 }
2017
2018
abs(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2019 LogicVRegister Simulator::abs(VectorFormat vform,
2020 LogicVRegister dst,
2021 const LogicVRegister& src) {
2022 dst.ClearForWrite(vform);
2023 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2024 // Test for signed saturation.
2025 int64_t sa = src.Int(vform, i);
2026 if (sa == MinIntFromFormat(vform)) {
2027 dst.SetSignedSat(i, true);
2028 }
2029 if (sa < 0) {
2030 dst.SetInt(vform, i, -sa);
2031 } else {
2032 dst.SetInt(vform, i, sa);
2033 }
2034 }
2035 return dst;
2036 }
2037
2038
extractnarrow(VectorFormat dstform,LogicVRegister dst,bool dstIsSigned,const LogicVRegister & src,bool srcIsSigned)2039 LogicVRegister Simulator::extractnarrow(VectorFormat dstform,
2040 LogicVRegister dst,
2041 bool dstIsSigned,
2042 const LogicVRegister& src,
2043 bool srcIsSigned) {
2044 bool upperhalf = false;
2045 VectorFormat srcform = kFormatUndefined;
2046 int64_t ssrc[8];
2047 uint64_t usrc[8];
2048
2049 switch (dstform) {
2050 case kFormat8B : upperhalf = false; srcform = kFormat8H; break;
2051 case kFormat16B: upperhalf = true; srcform = kFormat8H; break;
2052 case kFormat4H : upperhalf = false; srcform = kFormat4S; break;
2053 case kFormat8H : upperhalf = true; srcform = kFormat4S; break;
2054 case kFormat2S : upperhalf = false; srcform = kFormat2D; break;
2055 case kFormat4S : upperhalf = true; srcform = kFormat2D; break;
2056 case kFormatB : upperhalf = false; srcform = kFormatH; break;
2057 case kFormatH : upperhalf = false; srcform = kFormatS; break;
2058 case kFormatS : upperhalf = false; srcform = kFormatD; break;
2059 default:VIXL_UNIMPLEMENTED();
2060 }
2061
2062 for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
2063 ssrc[i] = src.Int(srcform, i);
2064 usrc[i] = src.Uint(srcform, i);
2065 }
2066
2067 int offset;
2068 if (upperhalf) {
2069 offset = LaneCountFromFormat(dstform) / 2;
2070 } else {
2071 offset = 0;
2072 dst.ClearForWrite(dstform);
2073 }
2074
2075 for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
2076 // Test for signed saturation
2077 if (ssrc[i] > MaxIntFromFormat(dstform)) {
2078 dst.SetSignedSat(offset + i, true);
2079 } else if (ssrc[i] < MinIntFromFormat(dstform)) {
2080 dst.SetSignedSat(offset + i, false);
2081 }
2082
2083 // Test for unsigned saturation
2084 if (srcIsSigned) {
2085 if (ssrc[i] > static_cast<int64_t>(MaxUintFromFormat(dstform))) {
2086 dst.SetUnsignedSat(offset + i, true);
2087 } else if (ssrc[i] < 0) {
2088 dst.SetUnsignedSat(offset + i, false);
2089 }
2090 } else {
2091 if (usrc[i] > MaxUintFromFormat(dstform)) {
2092 dst.SetUnsignedSat(offset + i, true);
2093 }
2094 }
2095
2096 int64_t result;
2097 if (srcIsSigned) {
2098 result = ssrc[i] & MaxUintFromFormat(dstform);
2099 } else {
2100 result = usrc[i] & MaxUintFromFormat(dstform);
2101 }
2102
2103 if (dstIsSigned) {
2104 dst.SetInt(dstform, offset + i, result);
2105 } else {
2106 dst.SetUint(dstform, offset + i, result);
2107 }
2108 }
2109 return dst;
2110 }
2111
2112
xtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2113 LogicVRegister Simulator::xtn(VectorFormat vform,
2114 LogicVRegister dst,
2115 const LogicVRegister& src) {
2116 return extractnarrow(vform, dst, true, src, true);
2117 }
2118
2119
sqxtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2120 LogicVRegister Simulator::sqxtn(VectorFormat vform,
2121 LogicVRegister dst,
2122 const LogicVRegister& src) {
2123 return extractnarrow(vform, dst, true, src, true).SignedSaturate(vform);
2124 }
2125
2126
sqxtun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2127 LogicVRegister Simulator::sqxtun(VectorFormat vform,
2128 LogicVRegister dst,
2129 const LogicVRegister& src) {
2130 return extractnarrow(vform, dst, false, src, true).UnsignedSaturate(vform);
2131 }
2132
2133
uqxtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2134 LogicVRegister Simulator::uqxtn(VectorFormat vform,
2135 LogicVRegister dst,
2136 const LogicVRegister& src) {
2137 return extractnarrow(vform, dst, false, src, false).UnsignedSaturate(vform);
2138 }
2139
2140
absdiff(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool issigned)2141 LogicVRegister Simulator::absdiff(VectorFormat vform,
2142 LogicVRegister dst,
2143 const LogicVRegister& src1,
2144 const LogicVRegister& src2,
2145 bool issigned) {
2146 dst.ClearForWrite(vform);
2147 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2148 if (issigned) {
2149 int64_t sr = src1.Int(vform, i) - src2.Int(vform, i);
2150 sr = sr > 0 ? sr : -sr;
2151 dst.SetInt(vform, i, sr);
2152 } else {
2153 int64_t sr = src1.Uint(vform, i) - src2.Uint(vform, i);
2154 sr = sr > 0 ? sr : -sr;
2155 dst.SetUint(vform, i, sr);
2156 }
2157 }
2158 return dst;
2159 }
2160
2161
saba(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2162 LogicVRegister Simulator::saba(VectorFormat vform,
2163 LogicVRegister dst,
2164 const LogicVRegister& src1,
2165 const LogicVRegister& src2) {
2166 SimVRegister temp;
2167 dst.ClearForWrite(vform);
2168 absdiff(vform, temp, src1, src2, true);
2169 add(vform, dst, dst, temp);
2170 return dst;
2171 }
2172
2173
uaba(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2174 LogicVRegister Simulator::uaba(VectorFormat vform,
2175 LogicVRegister dst,
2176 const LogicVRegister& src1,
2177 const LogicVRegister& src2) {
2178 SimVRegister temp;
2179 dst.ClearForWrite(vform);
2180 absdiff(vform, temp, src1, src2, false);
2181 add(vform, dst, dst, temp);
2182 return dst;
2183 }
2184
2185
not_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2186 LogicVRegister Simulator::not_(VectorFormat vform,
2187 LogicVRegister dst,
2188 const LogicVRegister& src) {
2189 dst.ClearForWrite(vform);
2190 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2191 dst.SetUint(vform, i, ~src.Uint(vform, i));
2192 }
2193 return dst;
2194 }
2195
2196
rbit(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2197 LogicVRegister Simulator::rbit(VectorFormat vform,
2198 LogicVRegister dst,
2199 const LogicVRegister& src) {
2200 uint64_t result[16];
2201 int laneCount = LaneCountFromFormat(vform);
2202 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
2203 uint64_t reversed_value;
2204 uint64_t value;
2205 for (int i = 0; i < laneCount; i++) {
2206 value = src.Uint(vform, i);
2207 reversed_value = 0;
2208 for (int j = 0; j < laneSizeInBits; j++) {
2209 reversed_value = (reversed_value << 1) | (value & 1);
2210 value >>= 1;
2211 }
2212 result[i] = reversed_value;
2213 }
2214
2215 dst.ClearForWrite(vform);
2216 for (int i = 0; i < laneCount; ++i) {
2217 dst.SetUint(vform, i, result[i]);
2218 }
2219 return dst;
2220 }
2221
2222
rev(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int revSize)2223 LogicVRegister Simulator::rev(VectorFormat vform,
2224 LogicVRegister dst,
2225 const LogicVRegister& src,
2226 int revSize) {
2227 uint64_t result[16];
2228 int laneCount = LaneCountFromFormat(vform);
2229 int laneSize = LaneSizeInBytesFromFormat(vform);
2230 int lanesPerLoop = revSize / laneSize;
2231 for (int i = 0; i < laneCount; i += lanesPerLoop) {
2232 for (int j = 0; j < lanesPerLoop; j++) {
2233 result[i + lanesPerLoop - 1 - j] = src.Uint(vform, i + j);
2234 }
2235 }
2236 dst.ClearForWrite(vform);
2237 for (int i = 0; i < laneCount; ++i) {
2238 dst.SetUint(vform, i, result[i]);
2239 }
2240 return dst;
2241 }
2242
2243
rev16(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2244 LogicVRegister Simulator::rev16(VectorFormat vform,
2245 LogicVRegister dst,
2246 const LogicVRegister& src) {
2247 return rev(vform, dst, src, 2);
2248 }
2249
2250
rev32(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2251 LogicVRegister Simulator::rev32(VectorFormat vform,
2252 LogicVRegister dst,
2253 const LogicVRegister& src) {
2254 return rev(vform, dst, src, 4);
2255 }
2256
2257
rev64(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2258 LogicVRegister Simulator::rev64(VectorFormat vform,
2259 LogicVRegister dst,
2260 const LogicVRegister& src) {
2261 return rev(vform, dst, src, 8);
2262 }
2263
2264
addlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool is_signed,bool do_accumulate)2265 LogicVRegister Simulator::addlp(VectorFormat vform,
2266 LogicVRegister dst,
2267 const LogicVRegister& src,
2268 bool is_signed,
2269 bool do_accumulate) {
2270 VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform);
2271
2272 int64_t sr[16];
2273 uint64_t ur[16];
2274
2275 int laneCount = LaneCountFromFormat(vform);
2276 for (int i = 0; i < laneCount; ++i) {
2277 if (is_signed) {
2278 sr[i] = src.Int(vformsrc, 2 * i) + src.Int(vformsrc, 2 * i + 1);
2279 } else {
2280 ur[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1);
2281 }
2282 }
2283
2284 dst.ClearForWrite(vform);
2285 for (int i = 0; i < laneCount; ++i) {
2286 if (do_accumulate) {
2287 if (is_signed) {
2288 dst.SetInt(vform, i, dst.Int(vform, i) + sr[i]);
2289 } else {
2290 dst.SetUint(vform, i, dst.Uint(vform, i) + ur[i]);
2291 }
2292 } else {
2293 if (is_signed) {
2294 dst.SetInt(vform, i, sr[i]);
2295 } else {
2296 dst.SetUint(vform, i, ur[i]);
2297 }
2298 }
2299 }
2300
2301 return dst;
2302 }
2303
2304
saddlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2305 LogicVRegister Simulator::saddlp(VectorFormat vform,
2306 LogicVRegister dst,
2307 const LogicVRegister& src) {
2308 return addlp(vform, dst, src, true, false);
2309 }
2310
2311
uaddlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2312 LogicVRegister Simulator::uaddlp(VectorFormat vform,
2313 LogicVRegister dst,
2314 const LogicVRegister& src) {
2315 return addlp(vform, dst, src, false, false);
2316 }
2317
2318
sadalp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2319 LogicVRegister Simulator::sadalp(VectorFormat vform,
2320 LogicVRegister dst,
2321 const LogicVRegister& src) {
2322 return addlp(vform, dst, src, true, true);
2323 }
2324
2325
uadalp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2326 LogicVRegister Simulator::uadalp(VectorFormat vform,
2327 LogicVRegister dst,
2328 const LogicVRegister& src) {
2329 return addlp(vform, dst, src, false, true);
2330 }
2331
2332
ext(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)2333 LogicVRegister Simulator::ext(VectorFormat vform,
2334 LogicVRegister dst,
2335 const LogicVRegister& src1,
2336 const LogicVRegister& src2,
2337 int index) {
2338 uint8_t result[16];
2339 int laneCount = LaneCountFromFormat(vform);
2340 for (int i = 0; i < laneCount - index; ++i) {
2341 result[i] = src1.Uint(vform, i + index);
2342 }
2343 for (int i = 0; i < index; ++i) {
2344 result[laneCount - index + i] = src2.Uint(vform, i);
2345 }
2346 dst.ClearForWrite(vform);
2347 for (int i = 0; i < laneCount; ++i) {
2348 dst.SetUint(vform, i, result[i]);
2349 }
2350 return dst;
2351 }
2352
2353
dup_element(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int src_index)2354 LogicVRegister Simulator::dup_element(VectorFormat vform,
2355 LogicVRegister dst,
2356 const LogicVRegister& src,
2357 int src_index) {
2358 int laneCount = LaneCountFromFormat(vform);
2359 uint64_t value = src.Uint(vform, src_index);
2360 dst.ClearForWrite(vform);
2361 for (int i = 0; i < laneCount; ++i) {
2362 dst.SetUint(vform, i, value);
2363 }
2364 return dst;
2365 }
2366
2367
dup_immediate(VectorFormat vform,LogicVRegister dst,uint64_t imm)2368 LogicVRegister Simulator::dup_immediate(VectorFormat vform,
2369 LogicVRegister dst,
2370 uint64_t imm) {
2371 int laneCount = LaneCountFromFormat(vform);
2372 uint64_t value = imm & MaxUintFromFormat(vform);
2373 dst.ClearForWrite(vform);
2374 for (int i = 0; i < laneCount; ++i) {
2375 dst.SetUint(vform, i, value);
2376 }
2377 return dst;
2378 }
2379
2380
ins_element(VectorFormat vform,LogicVRegister dst,int dst_index,const LogicVRegister & src,int src_index)2381 LogicVRegister Simulator::ins_element(VectorFormat vform,
2382 LogicVRegister dst,
2383 int dst_index,
2384 const LogicVRegister& src,
2385 int src_index) {
2386 dst.SetUint(vform, dst_index, src.Uint(vform, src_index));
2387 return dst;
2388 }
2389
2390
ins_immediate(VectorFormat vform,LogicVRegister dst,int dst_index,uint64_t imm)2391 LogicVRegister Simulator::ins_immediate(VectorFormat vform,
2392 LogicVRegister dst,
2393 int dst_index,
2394 uint64_t imm) {
2395 uint64_t value = imm & MaxUintFromFormat(vform);
2396 dst.SetUint(vform, dst_index, value);
2397 return dst;
2398 }
2399
2400
movi(VectorFormat vform,LogicVRegister dst,uint64_t imm)2401 LogicVRegister Simulator::movi(VectorFormat vform,
2402 LogicVRegister dst,
2403 uint64_t imm) {
2404 int laneCount = LaneCountFromFormat(vform);
2405 dst.ClearForWrite(vform);
2406 for (int i = 0; i < laneCount; ++i) {
2407 dst.SetUint(vform, i, imm);
2408 }
2409 return dst;
2410 }
2411
2412
mvni(VectorFormat vform,LogicVRegister dst,uint64_t imm)2413 LogicVRegister Simulator::mvni(VectorFormat vform,
2414 LogicVRegister dst,
2415 uint64_t imm) {
2416 int laneCount = LaneCountFromFormat(vform);
2417 dst.ClearForWrite(vform);
2418 for (int i = 0; i < laneCount; ++i) {
2419 dst.SetUint(vform, i, ~imm);
2420 }
2421 return dst;
2422 }
2423
2424
orr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,uint64_t imm)2425 LogicVRegister Simulator::orr(VectorFormat vform,
2426 LogicVRegister dst,
2427 const LogicVRegister& src,
2428 uint64_t imm) {
2429 uint64_t result[16];
2430 int laneCount = LaneCountFromFormat(vform);
2431 for (int i = 0; i < laneCount; ++i) {
2432 result[i] = src.Uint(vform, i) | imm;
2433 }
2434 dst.ClearForWrite(vform);
2435 for (int i = 0; i < laneCount; ++i) {
2436 dst.SetUint(vform, i, result[i]);
2437 }
2438 return dst;
2439 }
2440
2441
uxtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2442 LogicVRegister Simulator::uxtl(VectorFormat vform,
2443 LogicVRegister dst,
2444 const LogicVRegister& src) {
2445 VectorFormat vform_half = VectorFormatHalfWidth(vform);
2446
2447 dst.ClearForWrite(vform);
2448 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2449 dst.SetUint(vform, i, src.Uint(vform_half, i));
2450 }
2451 return dst;
2452 }
2453
2454
sxtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2455 LogicVRegister Simulator::sxtl(VectorFormat vform,
2456 LogicVRegister dst,
2457 const LogicVRegister& src) {
2458 VectorFormat vform_half = VectorFormatHalfWidth(vform);
2459
2460 dst.ClearForWrite(vform);
2461 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2462 dst.SetInt(vform, i, src.Int(vform_half, i));
2463 }
2464 return dst;
2465 }
2466
2467
uxtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2468 LogicVRegister Simulator::uxtl2(VectorFormat vform,
2469 LogicVRegister dst,
2470 const LogicVRegister& src) {
2471 VectorFormat vform_half = VectorFormatHalfWidth(vform);
2472 int lane_count = LaneCountFromFormat(vform);
2473
2474 dst.ClearForWrite(vform);
2475 for (int i = 0; i < lane_count; i++) {
2476 dst.SetUint(vform, i, src.Uint(vform_half, lane_count + i));
2477 }
2478 return dst;
2479 }
2480
2481
sxtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2482 LogicVRegister Simulator::sxtl2(VectorFormat vform,
2483 LogicVRegister dst,
2484 const LogicVRegister& src) {
2485 VectorFormat vform_half = VectorFormatHalfWidth(vform);
2486 int lane_count = LaneCountFromFormat(vform);
2487
2488 dst.ClearForWrite(vform);
2489 for (int i = 0; i < lane_count; i++) {
2490 dst.SetInt(vform, i, src.Int(vform_half, lane_count + i));
2491 }
2492 return dst;
2493 }
2494
2495
shrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2496 LogicVRegister Simulator::shrn(VectorFormat vform,
2497 LogicVRegister dst,
2498 const LogicVRegister& src,
2499 int shift) {
2500 SimVRegister temp;
2501 VectorFormat vform_src = VectorFormatDoubleWidth(vform);
2502 VectorFormat vform_dst = vform;
2503 LogicVRegister shifted_src = ushr(vform_src, temp, src, shift);
2504 return extractnarrow(vform_dst, dst, false, shifted_src, false);
2505 }
2506
2507
shrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2508 LogicVRegister Simulator::shrn2(VectorFormat vform,
2509 LogicVRegister dst,
2510 const LogicVRegister& src,
2511 int shift) {
2512 SimVRegister temp;
2513 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2514 VectorFormat vformdst = vform;
2515 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift);
2516 return extractnarrow(vformdst, dst, false, shifted_src, false);
2517 }
2518
2519
rshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2520 LogicVRegister Simulator::rshrn(VectorFormat vform,
2521 LogicVRegister dst,
2522 const LogicVRegister& src,
2523 int shift) {
2524 SimVRegister temp;
2525 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2526 VectorFormat vformdst = vform;
2527 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
2528 return extractnarrow(vformdst, dst, false, shifted_src, false);
2529 }
2530
2531
rshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2532 LogicVRegister Simulator::rshrn2(VectorFormat vform,
2533 LogicVRegister dst,
2534 const LogicVRegister& src,
2535 int shift) {
2536 SimVRegister temp;
2537 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2538 VectorFormat vformdst = vform;
2539 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
2540 return extractnarrow(vformdst, dst, false, shifted_src, false);
2541 }
2542
2543
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & ind)2544 LogicVRegister Simulator::tbl(VectorFormat vform,
2545 LogicVRegister dst,
2546 const LogicVRegister& tab,
2547 const LogicVRegister& ind) {
2548 movi(vform, dst, 0);
2549 return tbx(vform, dst, tab, ind);
2550 }
2551
2552
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & ind)2553 LogicVRegister Simulator::tbl(VectorFormat vform,
2554 LogicVRegister dst,
2555 const LogicVRegister& tab,
2556 const LogicVRegister& tab2,
2557 const LogicVRegister& ind) {
2558 movi(vform, dst, 0);
2559 return tbx(vform, dst, tab, tab2, ind);
2560 }
2561
2562
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & ind)2563 LogicVRegister Simulator::tbl(VectorFormat vform,
2564 LogicVRegister dst,
2565 const LogicVRegister& tab,
2566 const LogicVRegister& tab2,
2567 const LogicVRegister& tab3,
2568 const LogicVRegister& ind) {
2569 movi(vform, dst, 0);
2570 return tbx(vform, dst, tab, tab2, tab3, ind);
2571 }
2572
2573
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & tab4,const LogicVRegister & ind)2574 LogicVRegister Simulator::tbl(VectorFormat vform,
2575 LogicVRegister dst,
2576 const LogicVRegister& tab,
2577 const LogicVRegister& tab2,
2578 const LogicVRegister& tab3,
2579 const LogicVRegister& tab4,
2580 const LogicVRegister& ind) {
2581 movi(vform, dst, 0);
2582 return tbx(vform, dst, tab, tab2, tab3, tab4, ind);
2583 }
2584
2585
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & ind)2586 LogicVRegister Simulator::tbx(VectorFormat vform,
2587 LogicVRegister dst,
2588 const LogicVRegister& tab,
2589 const LogicVRegister& ind) {
2590 dst.ClearForWrite(vform);
2591 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2592 uint64_t j = ind.Uint(vform, i);
2593 switch (j >> 4) {
2594 case 0: dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15)); break;
2595 }
2596 }
2597 return dst;
2598 }
2599
2600
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & ind)2601 LogicVRegister Simulator::tbx(VectorFormat vform,
2602 LogicVRegister dst,
2603 const LogicVRegister& tab,
2604 const LogicVRegister& tab2,
2605 const LogicVRegister& ind) {
2606 dst.ClearForWrite(vform);
2607 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2608 uint64_t j = ind.Uint(vform, i);
2609 switch (j >> 4) {
2610 case 0: dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15)); break;
2611 case 1: dst.SetUint(vform, i, tab2.Uint(kFormat16B, j & 15)); break;
2612 }
2613 }
2614 return dst;
2615 }
2616
2617
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & ind)2618 LogicVRegister Simulator::tbx(VectorFormat vform,
2619 LogicVRegister dst,
2620 const LogicVRegister& tab,
2621 const LogicVRegister& tab2,
2622 const LogicVRegister& tab3,
2623 const LogicVRegister& ind) {
2624 dst.ClearForWrite(vform);
2625 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2626 uint64_t j = ind.Uint(vform, i);
2627 switch (j >> 4) {
2628 case 0: dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15)); break;
2629 case 1: dst.SetUint(vform, i, tab2.Uint(kFormat16B, j & 15)); break;
2630 case 2: dst.SetUint(vform, i, tab3.Uint(kFormat16B, j & 15)); break;
2631 }
2632 }
2633 return dst;
2634 }
2635
2636
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & tab4,const LogicVRegister & ind)2637 LogicVRegister Simulator::tbx(VectorFormat vform,
2638 LogicVRegister dst,
2639 const LogicVRegister& tab,
2640 const LogicVRegister& tab2,
2641 const LogicVRegister& tab3,
2642 const LogicVRegister& tab4,
2643 const LogicVRegister& ind) {
2644 dst.ClearForWrite(vform);
2645 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2646 uint64_t j = ind.Uint(vform, i);
2647 switch (j >> 4) {
2648 case 0: dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15)); break;
2649 case 1: dst.SetUint(vform, i, tab2.Uint(kFormat16B, j & 15)); break;
2650 case 2: dst.SetUint(vform, i, tab3.Uint(kFormat16B, j & 15)); break;
2651 case 3: dst.SetUint(vform, i, tab4.Uint(kFormat16B, j & 15)); break;
2652 }
2653 }
2654 return dst;
2655 }
2656
2657
uqshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2658 LogicVRegister Simulator::uqshrn(VectorFormat vform,
2659 LogicVRegister dst,
2660 const LogicVRegister& src,
2661 int shift) {
2662 return shrn(vform, dst, src, shift).UnsignedSaturate(vform);
2663 }
2664
2665
uqshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2666 LogicVRegister Simulator::uqshrn2(VectorFormat vform,
2667 LogicVRegister dst,
2668 const LogicVRegister& src,
2669 int shift) {
2670 return shrn2(vform, dst, src, shift).UnsignedSaturate(vform);
2671 }
2672
2673
uqrshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2674 LogicVRegister Simulator::uqrshrn(VectorFormat vform,
2675 LogicVRegister dst,
2676 const LogicVRegister& src,
2677 int shift) {
2678 return rshrn(vform, dst, src, shift).UnsignedSaturate(vform);
2679 }
2680
2681
uqrshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2682 LogicVRegister Simulator::uqrshrn2(VectorFormat vform,
2683 LogicVRegister dst,
2684 const LogicVRegister& src,
2685 int shift) {
2686 return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform);
2687 }
2688
2689
sqshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2690 LogicVRegister Simulator::sqshrn(VectorFormat vform,
2691 LogicVRegister dst,
2692 const LogicVRegister& src,
2693 int shift) {
2694 SimVRegister temp;
2695 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2696 VectorFormat vformdst = vform;
2697 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2698 return sqxtn(vformdst, dst, shifted_src);
2699 }
2700
2701
sqshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2702 LogicVRegister Simulator::sqshrn2(VectorFormat vform,
2703 LogicVRegister dst,
2704 const LogicVRegister& src,
2705 int shift) {
2706 SimVRegister temp;
2707 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2708 VectorFormat vformdst = vform;
2709 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2710 return sqxtn(vformdst, dst, shifted_src);
2711 }
2712
2713
sqrshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2714 LogicVRegister Simulator::sqrshrn(VectorFormat vform,
2715 LogicVRegister dst,
2716 const LogicVRegister& src,
2717 int shift) {
2718 SimVRegister temp;
2719 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2720 VectorFormat vformdst = vform;
2721 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2722 return sqxtn(vformdst, dst, shifted_src);
2723 }
2724
2725
sqrshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2726 LogicVRegister Simulator::sqrshrn2(VectorFormat vform,
2727 LogicVRegister dst,
2728 const LogicVRegister& src,
2729 int shift) {
2730 SimVRegister temp;
2731 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2732 VectorFormat vformdst = vform;
2733 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2734 return sqxtn(vformdst, dst, shifted_src);
2735 }
2736
2737
sqshrun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2738 LogicVRegister Simulator::sqshrun(VectorFormat vform,
2739 LogicVRegister dst,
2740 const LogicVRegister& src,
2741 int shift) {
2742 SimVRegister temp;
2743 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2744 VectorFormat vformdst = vform;
2745 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2746 return sqxtun(vformdst, dst, shifted_src);
2747 }
2748
2749
sqshrun2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2750 LogicVRegister Simulator::sqshrun2(VectorFormat vform,
2751 LogicVRegister dst,
2752 const LogicVRegister& src,
2753 int shift) {
2754 SimVRegister temp;
2755 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2756 VectorFormat vformdst = vform;
2757 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2758 return sqxtun(vformdst, dst, shifted_src);
2759 }
2760
2761
sqrshrun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2762 LogicVRegister Simulator::sqrshrun(VectorFormat vform,
2763 LogicVRegister dst,
2764 const LogicVRegister& src,
2765 int shift) {
2766 SimVRegister temp;
2767 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2768 VectorFormat vformdst = vform;
2769 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2770 return sqxtun(vformdst, dst, shifted_src);
2771 }
2772
2773
sqrshrun2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2774 LogicVRegister Simulator::sqrshrun2(VectorFormat vform,
2775 LogicVRegister dst,
2776 const LogicVRegister& src,
2777 int shift) {
2778 SimVRegister temp;
2779 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2780 VectorFormat vformdst = vform;
2781 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2782 return sqxtun(vformdst, dst, shifted_src);
2783 }
2784
2785
uaddl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2786 LogicVRegister Simulator::uaddl(VectorFormat vform,
2787 LogicVRegister dst,
2788 const LogicVRegister& src1,
2789 const LogicVRegister& src2) {
2790 SimVRegister temp1, temp2;
2791 uxtl(vform, temp1, src1);
2792 uxtl(vform, temp2, src2);
2793 add(vform, dst, temp1, temp2);
2794 return dst;
2795 }
2796
2797
uaddl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2798 LogicVRegister Simulator::uaddl2(VectorFormat vform,
2799 LogicVRegister dst,
2800 const LogicVRegister& src1,
2801 const LogicVRegister& src2) {
2802 SimVRegister temp1, temp2;
2803 uxtl2(vform, temp1, src1);
2804 uxtl2(vform, temp2, src2);
2805 add(vform, dst, temp1, temp2);
2806 return dst;
2807 }
2808
2809
uaddw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2810 LogicVRegister Simulator::uaddw(VectorFormat vform,
2811 LogicVRegister dst,
2812 const LogicVRegister& src1,
2813 const LogicVRegister& src2) {
2814 SimVRegister temp;
2815 uxtl(vform, temp, src2);
2816 add(vform, dst, src1, temp);
2817 return dst;
2818 }
2819
2820
uaddw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2821 LogicVRegister Simulator::uaddw2(VectorFormat vform,
2822 LogicVRegister dst,
2823 const LogicVRegister& src1,
2824 const LogicVRegister& src2) {
2825 SimVRegister temp;
2826 uxtl2(vform, temp, src2);
2827 add(vform, dst, src1, temp);
2828 return dst;
2829 }
2830
2831
saddl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2832 LogicVRegister Simulator::saddl(VectorFormat vform,
2833 LogicVRegister dst,
2834 const LogicVRegister& src1,
2835 const LogicVRegister& src2) {
2836 SimVRegister temp1, temp2;
2837 sxtl(vform, temp1, src1);
2838 sxtl(vform, temp2, src2);
2839 add(vform, dst, temp1, temp2);
2840 return dst;
2841 }
2842
2843
saddl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2844 LogicVRegister Simulator::saddl2(VectorFormat vform,
2845 LogicVRegister dst,
2846 const LogicVRegister& src1,
2847 const LogicVRegister& src2) {
2848 SimVRegister temp1, temp2;
2849 sxtl2(vform, temp1, src1);
2850 sxtl2(vform, temp2, src2);
2851 add(vform, dst, temp1, temp2);
2852 return dst;
2853 }
2854
2855
saddw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2856 LogicVRegister Simulator::saddw(VectorFormat vform,
2857 LogicVRegister dst,
2858 const LogicVRegister& src1,
2859 const LogicVRegister& src2) {
2860 SimVRegister temp;
2861 sxtl(vform, temp, src2);
2862 add(vform, dst, src1, temp);
2863 return dst;
2864 }
2865
2866
saddw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2867 LogicVRegister Simulator::saddw2(VectorFormat vform,
2868 LogicVRegister dst,
2869 const LogicVRegister& src1,
2870 const LogicVRegister& src2) {
2871 SimVRegister temp;
2872 sxtl2(vform, temp, src2);
2873 add(vform, dst, src1, temp);
2874 return dst;
2875 }
2876
2877
usubl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2878 LogicVRegister Simulator::usubl(VectorFormat vform,
2879 LogicVRegister dst,
2880 const LogicVRegister& src1,
2881 const LogicVRegister& src2) {
2882 SimVRegister temp1, temp2;
2883 uxtl(vform, temp1, src1);
2884 uxtl(vform, temp2, src2);
2885 sub(vform, dst, temp1, temp2);
2886 return dst;
2887 }
2888
2889
usubl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2890 LogicVRegister Simulator::usubl2(VectorFormat vform,
2891 LogicVRegister dst,
2892 const LogicVRegister& src1,
2893 const LogicVRegister& src2) {
2894 SimVRegister temp1, temp2;
2895 uxtl2(vform, temp1, src1);
2896 uxtl2(vform, temp2, src2);
2897 sub(vform, dst, temp1, temp2);
2898 return dst;
2899 }
2900
2901
usubw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2902 LogicVRegister Simulator::usubw(VectorFormat vform,
2903 LogicVRegister dst,
2904 const LogicVRegister& src1,
2905 const LogicVRegister& src2) {
2906 SimVRegister temp;
2907 uxtl(vform, temp, src2);
2908 sub(vform, dst, src1, temp);
2909 return dst;
2910 }
2911
2912
usubw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2913 LogicVRegister Simulator::usubw2(VectorFormat vform,
2914 LogicVRegister dst,
2915 const LogicVRegister& src1,
2916 const LogicVRegister& src2) {
2917 SimVRegister temp;
2918 uxtl2(vform, temp, src2);
2919 sub(vform, dst, src1, temp);
2920 return dst;
2921 }
2922
2923
ssubl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2924 LogicVRegister Simulator::ssubl(VectorFormat vform,
2925 LogicVRegister dst,
2926 const LogicVRegister& src1,
2927 const LogicVRegister& src2) {
2928 SimVRegister temp1, temp2;
2929 sxtl(vform, temp1, src1);
2930 sxtl(vform, temp2, src2);
2931 sub(vform, dst, temp1, temp2);
2932 return dst;
2933 }
2934
2935
ssubl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2936 LogicVRegister Simulator::ssubl2(VectorFormat vform,
2937 LogicVRegister dst,
2938 const LogicVRegister& src1,
2939 const LogicVRegister& src2) {
2940 SimVRegister temp1, temp2;
2941 sxtl2(vform, temp1, src1);
2942 sxtl2(vform, temp2, src2);
2943 sub(vform, dst, temp1, temp2);
2944 return dst;
2945 }
2946
2947
ssubw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2948 LogicVRegister Simulator::ssubw(VectorFormat vform,
2949 LogicVRegister dst,
2950 const LogicVRegister& src1,
2951 const LogicVRegister& src2) {
2952 SimVRegister temp;
2953 sxtl(vform, temp, src2);
2954 sub(vform, dst, src1, temp);
2955 return dst;
2956 }
2957
2958
ssubw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2959 LogicVRegister Simulator::ssubw2(VectorFormat vform,
2960 LogicVRegister dst,
2961 const LogicVRegister& src1,
2962 const LogicVRegister& src2) {
2963 SimVRegister temp;
2964 sxtl2(vform, temp, src2);
2965 sub(vform, dst, src1, temp);
2966 return dst;
2967 }
2968
2969
uabal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2970 LogicVRegister Simulator::uabal(VectorFormat vform,
2971 LogicVRegister dst,
2972 const LogicVRegister& src1,
2973 const LogicVRegister& src2) {
2974 SimVRegister temp1, temp2;
2975 uxtl(vform, temp1, src1);
2976 uxtl(vform, temp2, src2);
2977 uaba(vform, dst, temp1, temp2);
2978 return dst;
2979 }
2980
2981
uabal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2982 LogicVRegister Simulator::uabal2(VectorFormat vform,
2983 LogicVRegister dst,
2984 const LogicVRegister& src1,
2985 const LogicVRegister& src2) {
2986 SimVRegister temp1, temp2;
2987 uxtl2(vform, temp1, src1);
2988 uxtl2(vform, temp2, src2);
2989 uaba(vform, dst, temp1, temp2);
2990 return dst;
2991 }
2992
2993
sabal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2994 LogicVRegister Simulator::sabal(VectorFormat vform,
2995 LogicVRegister dst,
2996 const LogicVRegister& src1,
2997 const LogicVRegister& src2) {
2998 SimVRegister temp1, temp2;
2999 sxtl(vform, temp1, src1);
3000 sxtl(vform, temp2, src2);
3001 saba(vform, dst, temp1, temp2);
3002 return dst;
3003 }
3004
3005
sabal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3006 LogicVRegister Simulator::sabal2(VectorFormat vform,
3007 LogicVRegister dst,
3008 const LogicVRegister& src1,
3009 const LogicVRegister& src2) {
3010 SimVRegister temp1, temp2;
3011 sxtl2(vform, temp1, src1);
3012 sxtl2(vform, temp2, src2);
3013 saba(vform, dst, temp1, temp2);
3014 return dst;
3015 }
3016
3017
uabdl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3018 LogicVRegister Simulator::uabdl(VectorFormat vform,
3019 LogicVRegister dst,
3020 const LogicVRegister& src1,
3021 const LogicVRegister& src2) {
3022 SimVRegister temp1, temp2;
3023 uxtl(vform, temp1, src1);
3024 uxtl(vform, temp2, src2);
3025 absdiff(vform, dst, temp1, temp2, false);
3026 return dst;
3027 }
3028
3029
uabdl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3030 LogicVRegister Simulator::uabdl2(VectorFormat vform,
3031 LogicVRegister dst,
3032 const LogicVRegister& src1,
3033 const LogicVRegister& src2) {
3034 SimVRegister temp1, temp2;
3035 uxtl2(vform, temp1, src1);
3036 uxtl2(vform, temp2, src2);
3037 absdiff(vform, dst, temp1, temp2, false);
3038 return dst;
3039 }
3040
3041
sabdl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3042 LogicVRegister Simulator::sabdl(VectorFormat vform,
3043 LogicVRegister dst,
3044 const LogicVRegister& src1,
3045 const LogicVRegister& src2) {
3046 SimVRegister temp1, temp2;
3047 sxtl(vform, temp1, src1);
3048 sxtl(vform, temp2, src2);
3049 absdiff(vform, dst, temp1, temp2, true);
3050 return dst;
3051 }
3052
3053
sabdl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3054 LogicVRegister Simulator::sabdl2(VectorFormat vform,
3055 LogicVRegister dst,
3056 const LogicVRegister& src1,
3057 const LogicVRegister& src2) {
3058 SimVRegister temp1, temp2;
3059 sxtl2(vform, temp1, src1);
3060 sxtl2(vform, temp2, src2);
3061 absdiff(vform, dst, temp1, temp2, true);
3062 return dst;
3063 }
3064
3065
umull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3066 LogicVRegister Simulator::umull(VectorFormat vform,
3067 LogicVRegister dst,
3068 const LogicVRegister& src1,
3069 const LogicVRegister& src2) {
3070 SimVRegister temp1, temp2;
3071 uxtl(vform, temp1, src1);
3072 uxtl(vform, temp2, src2);
3073 mul(vform, dst, temp1, temp2);
3074 return dst;
3075 }
3076
3077
umull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3078 LogicVRegister Simulator::umull2(VectorFormat vform,
3079 LogicVRegister dst,
3080 const LogicVRegister& src1,
3081 const LogicVRegister& src2) {
3082 SimVRegister temp1, temp2;
3083 uxtl2(vform, temp1, src1);
3084 uxtl2(vform, temp2, src2);
3085 mul(vform, dst, temp1, temp2);
3086 return dst;
3087 }
3088
3089
smull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3090 LogicVRegister Simulator::smull(VectorFormat vform,
3091 LogicVRegister dst,
3092 const LogicVRegister& src1,
3093 const LogicVRegister& src2) {
3094 SimVRegister temp1, temp2;
3095 sxtl(vform, temp1, src1);
3096 sxtl(vform, temp2, src2);
3097 mul(vform, dst, temp1, temp2);
3098 return dst;
3099 }
3100
3101
smull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3102 LogicVRegister Simulator::smull2(VectorFormat vform,
3103 LogicVRegister dst,
3104 const LogicVRegister& src1,
3105 const LogicVRegister& src2) {
3106 SimVRegister temp1, temp2;
3107 sxtl2(vform, temp1, src1);
3108 sxtl2(vform, temp2, src2);
3109 mul(vform, dst, temp1, temp2);
3110 return dst;
3111 }
3112
3113
umlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3114 LogicVRegister Simulator::umlsl(VectorFormat vform,
3115 LogicVRegister dst,
3116 const LogicVRegister& src1,
3117 const LogicVRegister& src2) {
3118 SimVRegister temp1, temp2;
3119 uxtl(vform, temp1, src1);
3120 uxtl(vform, temp2, src2);
3121 mls(vform, dst, temp1, temp2);
3122 return dst;
3123 }
3124
3125
umlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3126 LogicVRegister Simulator::umlsl2(VectorFormat vform,
3127 LogicVRegister dst,
3128 const LogicVRegister& src1,
3129 const LogicVRegister& src2) {
3130 SimVRegister temp1, temp2;
3131 uxtl2(vform, temp1, src1);
3132 uxtl2(vform, temp2, src2);
3133 mls(vform, dst, temp1, temp2);
3134 return dst;
3135 }
3136
3137
smlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3138 LogicVRegister Simulator::smlsl(VectorFormat vform,
3139 LogicVRegister dst,
3140 const LogicVRegister& src1,
3141 const LogicVRegister& src2) {
3142 SimVRegister temp1, temp2;
3143 sxtl(vform, temp1, src1);
3144 sxtl(vform, temp2, src2);
3145 mls(vform, dst, temp1, temp2);
3146 return dst;
3147 }
3148
3149
smlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3150 LogicVRegister Simulator::smlsl2(VectorFormat vform,
3151 LogicVRegister dst,
3152 const LogicVRegister& src1,
3153 const LogicVRegister& src2) {
3154 SimVRegister temp1, temp2;
3155 sxtl2(vform, temp1, src1);
3156 sxtl2(vform, temp2, src2);
3157 mls(vform, dst, temp1, temp2);
3158 return dst;
3159 }
3160
3161
umlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3162 LogicVRegister Simulator::umlal(VectorFormat vform,
3163 LogicVRegister dst,
3164 const LogicVRegister& src1,
3165 const LogicVRegister& src2) {
3166 SimVRegister temp1, temp2;
3167 uxtl(vform, temp1, src1);
3168 uxtl(vform, temp2, src2);
3169 mla(vform, dst, temp1, temp2);
3170 return dst;
3171 }
3172
3173
umlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3174 LogicVRegister Simulator::umlal2(VectorFormat vform,
3175 LogicVRegister dst,
3176 const LogicVRegister& src1,
3177 const LogicVRegister& src2) {
3178 SimVRegister temp1, temp2;
3179 uxtl2(vform, temp1, src1);
3180 uxtl2(vform, temp2, src2);
3181 mla(vform, dst, temp1, temp2);
3182 return dst;
3183 }
3184
3185
smlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3186 LogicVRegister Simulator::smlal(VectorFormat vform,
3187 LogicVRegister dst,
3188 const LogicVRegister& src1,
3189 const LogicVRegister& src2) {
3190 SimVRegister temp1, temp2;
3191 sxtl(vform, temp1, src1);
3192 sxtl(vform, temp2, src2);
3193 mla(vform, dst, temp1, temp2);
3194 return dst;
3195 }
3196
3197
smlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3198 LogicVRegister Simulator::smlal2(VectorFormat vform,
3199 LogicVRegister dst,
3200 const LogicVRegister& src1,
3201 const LogicVRegister& src2) {
3202 SimVRegister temp1, temp2;
3203 sxtl2(vform, temp1, src1);
3204 sxtl2(vform, temp2, src2);
3205 mla(vform, dst, temp1, temp2);
3206 return dst;
3207 }
3208
3209
sqdmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3210 LogicVRegister Simulator::sqdmlal(VectorFormat vform,
3211 LogicVRegister dst,
3212 const LogicVRegister& src1,
3213 const LogicVRegister& src2) {
3214 SimVRegister temp;
3215 LogicVRegister product = sqdmull(vform, temp, src1, src2);
3216 return add(vform, dst, dst, product).SignedSaturate(vform);
3217 }
3218
3219
sqdmlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3220 LogicVRegister Simulator::sqdmlal2(VectorFormat vform,
3221 LogicVRegister dst,
3222 const LogicVRegister& src1,
3223 const LogicVRegister& src2) {
3224 SimVRegister temp;
3225 LogicVRegister product = sqdmull2(vform, temp, src1, src2);
3226 return add(vform, dst, dst, product).SignedSaturate(vform);
3227 }
3228
3229
sqdmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3230 LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
3231 LogicVRegister dst,
3232 const LogicVRegister& src1,
3233 const LogicVRegister& src2) {
3234 SimVRegister temp;
3235 LogicVRegister product = sqdmull(vform, temp, src1, src2);
3236 return sub(vform, dst, dst, product).SignedSaturate(vform);
3237 }
3238
3239
sqdmlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3240 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform,
3241 LogicVRegister dst,
3242 const LogicVRegister& src1,
3243 const LogicVRegister& src2) {
3244 SimVRegister temp;
3245 LogicVRegister product = sqdmull2(vform, temp, src1, src2);
3246 return sub(vform, dst, dst, product).SignedSaturate(vform);
3247 }
3248
3249
sqdmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3250 LogicVRegister Simulator::sqdmull(VectorFormat vform,
3251 LogicVRegister dst,
3252 const LogicVRegister& src1,
3253 const LogicVRegister& src2) {
3254 SimVRegister temp;
3255 LogicVRegister product = smull(vform, temp, src1, src2);
3256 return add(vform, dst, product, product).SignedSaturate(vform);
3257 }
3258
3259
sqdmull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3260 LogicVRegister Simulator::sqdmull2(VectorFormat vform,
3261 LogicVRegister dst,
3262 const LogicVRegister& src1,
3263 const LogicVRegister& src2) {
3264 SimVRegister temp;
3265 LogicVRegister product = smull2(vform, temp, src1, src2);
3266 return add(vform, dst, product, product).SignedSaturate(vform);
3267 }
3268
3269
sqrdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round)3270 LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
3271 LogicVRegister dst,
3272 const LogicVRegister& src1,
3273 const LogicVRegister& src2,
3274 bool round) {
3275 // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow.
3276 // To avoid this, we use (src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
3277 // which is same as (2 * src1 * src2 + 1 << (esize - 1)) >> esize.
3278
3279 int esize = LaneSizeInBitsFromFormat(vform);
3280 int round_const = round ? (1 << (esize - 2)) : 0;
3281 int64_t product;
3282
3283 dst.ClearForWrite(vform);
3284 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3285 product = src1.Int(vform, i) * src2.Int(vform, i);
3286 product += round_const;
3287 product = product >> (esize - 1);
3288
3289 if (product > MaxIntFromFormat(vform)) {
3290 product = MaxIntFromFormat(vform);
3291 } else if (product < MinIntFromFormat(vform)) {
3292 product = MinIntFromFormat(vform);
3293 }
3294 dst.SetInt(vform, i, product);
3295 }
3296 return dst;
3297 }
3298
3299
sqdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3300 LogicVRegister Simulator::sqdmulh(VectorFormat vform,
3301 LogicVRegister dst,
3302 const LogicVRegister& src1,
3303 const LogicVRegister& src2) {
3304 return sqrdmulh(vform, dst, src1, src2, false);
3305 }
3306
3307
addhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3308 LogicVRegister Simulator::addhn(VectorFormat vform,
3309 LogicVRegister dst,
3310 const LogicVRegister& src1,
3311 const LogicVRegister& src2) {
3312 SimVRegister temp;
3313 add(VectorFormatDoubleWidth(vform), temp, src1, src2);
3314 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3315 return dst;
3316 }
3317
3318
addhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3319 LogicVRegister Simulator::addhn2(VectorFormat vform,
3320 LogicVRegister dst,
3321 const LogicVRegister& src1,
3322 const LogicVRegister& src2) {
3323 SimVRegister temp;
3324 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
3325 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3326 return dst;
3327 }
3328
3329
raddhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3330 LogicVRegister Simulator::raddhn(VectorFormat vform,
3331 LogicVRegister dst,
3332 const LogicVRegister& src1,
3333 const LogicVRegister& src2) {
3334 SimVRegister temp;
3335 add(VectorFormatDoubleWidth(vform), temp, src1, src2);
3336 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3337 return dst;
3338 }
3339
3340
raddhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3341 LogicVRegister Simulator::raddhn2(VectorFormat vform,
3342 LogicVRegister dst,
3343 const LogicVRegister& src1,
3344 const LogicVRegister& src2) {
3345 SimVRegister temp;
3346 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
3347 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3348 return dst;
3349 }
3350
3351
subhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3352 LogicVRegister Simulator::subhn(VectorFormat vform,
3353 LogicVRegister dst,
3354 const LogicVRegister& src1,
3355 const LogicVRegister& src2) {
3356 SimVRegister temp;
3357 sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
3358 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3359 return dst;
3360 }
3361
3362
subhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3363 LogicVRegister Simulator::subhn2(VectorFormat vform,
3364 LogicVRegister dst,
3365 const LogicVRegister& src1,
3366 const LogicVRegister& src2) {
3367 SimVRegister temp;
3368 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
3369 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3370 return dst;
3371 }
3372
3373
rsubhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3374 LogicVRegister Simulator::rsubhn(VectorFormat vform,
3375 LogicVRegister dst,
3376 const LogicVRegister& src1,
3377 const LogicVRegister& src2) {
3378 SimVRegister temp;
3379 sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
3380 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3381 return dst;
3382 }
3383
3384
rsubhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3385 LogicVRegister Simulator::rsubhn2(VectorFormat vform,
3386 LogicVRegister dst,
3387 const LogicVRegister& src1,
3388 const LogicVRegister& src2) {
3389 SimVRegister temp;
3390 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
3391 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3392 return dst;
3393 }
3394
3395
trn1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3396 LogicVRegister Simulator::trn1(VectorFormat vform,
3397 LogicVRegister dst,
3398 const LogicVRegister& src1,
3399 const LogicVRegister& src2) {
3400 uint64_t result[16];
3401 int laneCount = LaneCountFromFormat(vform);
3402 int pairs = laneCount / 2;
3403 for (int i = 0; i < pairs; ++i) {
3404 result[2 * i] = src1.Uint(vform, 2 * i);
3405 result[(2 * i) + 1] = src2.Uint(vform, 2 * i);
3406 }
3407
3408 dst.ClearForWrite(vform);
3409 for (int i = 0; i < laneCount; ++i) {
3410 dst.SetUint(vform, i, result[i]);
3411 }
3412 return dst;
3413 }
3414
3415
trn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3416 LogicVRegister Simulator::trn2(VectorFormat vform,
3417 LogicVRegister dst,
3418 const LogicVRegister& src1,
3419 const LogicVRegister& src2) {
3420 uint64_t result[16];
3421 int laneCount = LaneCountFromFormat(vform);
3422 int pairs = laneCount / 2;
3423 for (int i = 0; i < pairs; ++i) {
3424 result[2 * i] = src1.Uint(vform, (2 * i) + 1);
3425 result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1);
3426 }
3427
3428 dst.ClearForWrite(vform);
3429 for (int i = 0; i < laneCount; ++i) {
3430 dst.SetUint(vform, i, result[i]);
3431 }
3432 return dst;
3433 }
3434
3435
zip1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3436 LogicVRegister Simulator::zip1(VectorFormat vform,
3437 LogicVRegister dst,
3438 const LogicVRegister& src1,
3439 const LogicVRegister& src2) {
3440 uint64_t result[16];
3441 int laneCount = LaneCountFromFormat(vform);
3442 int pairs = laneCount / 2;
3443 for (int i = 0; i < pairs; ++i) {
3444 result[2 * i] = src1.Uint(vform, i);
3445 result[(2 * i) + 1] = src2.Uint(vform, i);
3446 }
3447
3448 dst.ClearForWrite(vform);
3449 for (int i = 0; i < laneCount; ++i) {
3450 dst.SetUint(vform, i, result[i]);
3451 }
3452 return dst;
3453 }
3454
3455
zip2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3456 LogicVRegister Simulator::zip2(VectorFormat vform,
3457 LogicVRegister dst,
3458 const LogicVRegister& src1,
3459 const LogicVRegister& src2) {
3460 uint64_t result[16];
3461 int laneCount = LaneCountFromFormat(vform);
3462 int pairs = laneCount / 2;
3463 for (int i = 0; i < pairs; ++i) {
3464 result[2 * i] = src1.Uint(vform, pairs + i);
3465 result[(2 * i) + 1] = src2.Uint(vform, pairs + i);
3466 }
3467
3468 dst.ClearForWrite(vform);
3469 for (int i = 0; i < laneCount; ++i) {
3470 dst.SetUint(vform, i, result[i]);
3471 }
3472 return dst;
3473 }
3474
3475
uzp1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3476 LogicVRegister Simulator::uzp1(VectorFormat vform,
3477 LogicVRegister dst,
3478 const LogicVRegister& src1,
3479 const LogicVRegister& src2) {
3480 uint64_t result[32];
3481 int laneCount = LaneCountFromFormat(vform);
3482 for (int i = 0; i < laneCount; ++i) {
3483 result[i] = src1.Uint(vform, i);
3484 result[laneCount + i] = src2.Uint(vform, i);
3485 }
3486
3487 dst.ClearForWrite(vform);
3488 for (int i = 0; i < laneCount; ++i) {
3489 dst.SetUint(vform, i, result[2 * i]);
3490 }
3491 return dst;
3492 }
3493
3494
uzp2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3495 LogicVRegister Simulator::uzp2(VectorFormat vform,
3496 LogicVRegister dst,
3497 const LogicVRegister& src1,
3498 const LogicVRegister& src2) {
3499 uint64_t result[32];
3500 int laneCount = LaneCountFromFormat(vform);
3501 for (int i = 0; i < laneCount; ++i) {
3502 result[i] = src1.Uint(vform, i);
3503 result[laneCount + i] = src2.Uint(vform, i);
3504 }
3505
3506 dst.ClearForWrite(vform);
3507 for (int i = 0; i < laneCount; ++i) {
3508 dst.SetUint(vform, i, result[ (2 * i) + 1]);
3509 }
3510 return dst;
3511 }
3512
3513
3514 template <typename T>
FPAdd(T op1,T op2)3515 T Simulator::FPAdd(T op1, T op2) {
3516 T result = FPProcessNaNs(op1, op2);
3517 if (std::isnan(result)) return result;
3518
3519 if (std::isinf(op1) && std::isinf(op2) && (op1 != op2)) {
3520 // inf + -inf returns the default NaN.
3521 FPProcessException();
3522 return FPDefaultNaN<T>();
3523 } else {
3524 // Other cases should be handled by standard arithmetic.
3525 return op1 + op2;
3526 }
3527 }
3528
3529
3530 template <typename T>
FPSub(T op1,T op2)3531 T Simulator::FPSub(T op1, T op2) {
3532 // NaNs should be handled elsewhere.
3533 VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2));
3534
3535 if (std::isinf(op1) && std::isinf(op2) && (op1 == op2)) {
3536 // inf - inf returns the default NaN.
3537 FPProcessException();
3538 return FPDefaultNaN<T>();
3539 } else {
3540 // Other cases should be handled by standard arithmetic.
3541 return op1 - op2;
3542 }
3543 }
3544
3545
3546 template <typename T>
FPMul(T op1,T op2)3547 T Simulator::FPMul(T op1, T op2) {
3548 // NaNs should be handled elsewhere.
3549 VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2));
3550
3551 if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
3552 // inf * 0.0 returns the default NaN.
3553 FPProcessException();
3554 return FPDefaultNaN<T>();
3555 } else {
3556 // Other cases should be handled by standard arithmetic.
3557 return op1 * op2;
3558 }
3559 }
3560
3561
3562 template<typename T>
FPMulx(T op1,T op2)3563 T Simulator::FPMulx(T op1, T op2) {
3564 if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
3565 // inf * 0.0 returns +/-2.0.
3566 T two = 2.0;
3567 return copysign(1.0, op1) * copysign(1.0, op2) * two;
3568 }
3569 return FPMul(op1, op2);
3570 }
3571
3572
3573 template<typename T>
FPMulAdd(T a,T op1,T op2)3574 T Simulator::FPMulAdd(T a, T op1, T op2) {
3575 T result = FPProcessNaNs3(a, op1, op2);
3576
3577 T sign_a = copysign(1.0, a);
3578 T sign_prod = copysign(1.0, op1) * copysign(1.0, op2);
3579 bool isinf_prod = std::isinf(op1) || std::isinf(op2);
3580 bool operation_generates_nan =
3581 (std::isinf(op1) && (op2 == 0.0)) || // inf * 0.0
3582 (std::isinf(op2) && (op1 == 0.0)) || // 0.0 * inf
3583 (std::isinf(a) && isinf_prod && (sign_a != sign_prod)); // inf - inf
3584
3585 if (std::isnan(result)) {
3586 // Generated NaNs override quiet NaNs propagated from a.
3587 if (operation_generates_nan && IsQuietNaN(a)) {
3588 FPProcessException();
3589 return FPDefaultNaN<T>();
3590 } else {
3591 return result;
3592 }
3593 }
3594
3595 // If the operation would produce a NaN, return the default NaN.
3596 if (operation_generates_nan) {
3597 FPProcessException();
3598 return FPDefaultNaN<T>();
3599 }
3600
3601 // Work around broken fma implementations for exact zero results: The sign of
3602 // exact 0.0 results is positive unless both a and op1 * op2 are negative.
3603 if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) {
3604 return ((sign_a < 0) && (sign_prod < 0)) ? -0.0 : 0.0;
3605 }
3606
3607 result = FusedMultiplyAdd(op1, op2, a);
3608 VIXL_ASSERT(!std::isnan(result));
3609
3610 // Work around broken fma implementations for rounded zero results: If a is
3611 // 0.0, the sign of the result is the sign of op1 * op2 before rounding.
3612 if ((a == 0.0) && (result == 0.0)) {
3613 return copysign(0.0, sign_prod);
3614 }
3615
3616 return result;
3617 }
3618
3619
3620 template <typename T>
FPDiv(T op1,T op2)3621 T Simulator::FPDiv(T op1, T op2) {
3622 // NaNs should be handled elsewhere.
3623 VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2));
3624
3625 if ((std::isinf(op1) && std::isinf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) {
3626 // inf / inf and 0.0 / 0.0 return the default NaN.
3627 FPProcessException();
3628 return FPDefaultNaN<T>();
3629 } else {
3630 if (op2 == 0.0) FPProcessException();
3631
3632 // Other cases should be handled by standard arithmetic.
3633 return op1 / op2;
3634 }
3635 }
3636
3637
3638 template <typename T>
FPSqrt(T op)3639 T Simulator::FPSqrt(T op) {
3640 if (std::isnan(op)) {
3641 return FPProcessNaN(op);
3642 } else if (op < 0.0) {
3643 FPProcessException();
3644 return FPDefaultNaN<T>();
3645 } else {
3646 return sqrt(op);
3647 }
3648 }
3649
3650
3651 template <typename T>
FPMax(T a,T b)3652 T Simulator::FPMax(T a, T b) {
3653 T result = FPProcessNaNs(a, b);
3654 if (std::isnan(result)) return result;
3655
3656 if ((a == 0.0) && (b == 0.0) &&
3657 (copysign(1.0, a) != copysign(1.0, b))) {
3658 // a and b are zero, and the sign differs: return +0.0.
3659 return 0.0;
3660 } else {
3661 return (a > b) ? a : b;
3662 }
3663 }
3664
3665
3666 template <typename T>
FPMaxNM(T a,T b)3667 T Simulator::FPMaxNM(T a, T b) {
3668 if (IsQuietNaN(a) && !IsQuietNaN(b)) {
3669 a = kFP64NegativeInfinity;
3670 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
3671 b = kFP64NegativeInfinity;
3672 }
3673
3674 T result = FPProcessNaNs(a, b);
3675 return std::isnan(result) ? result : FPMax(a, b);
3676 }
3677
3678
3679 template <typename T>
FPMin(T a,T b)3680 T Simulator::FPMin(T a, T b) {
3681 T result = FPProcessNaNs(a, b);
3682 if (std::isnan(result)) return result;
3683
3684 if ((a == 0.0) && (b == 0.0) &&
3685 (copysign(1.0, a) != copysign(1.0, b))) {
3686 // a and b are zero, and the sign differs: return -0.0.
3687 return -0.0;
3688 } else {
3689 return (a < b) ? a : b;
3690 }
3691 }
3692
3693
3694 template <typename T>
FPMinNM(T a,T b)3695 T Simulator::FPMinNM(T a, T b) {
3696 if (IsQuietNaN(a) && !IsQuietNaN(b)) {
3697 a = kFP64PositiveInfinity;
3698 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
3699 b = kFP64PositiveInfinity;
3700 }
3701
3702 T result = FPProcessNaNs(a, b);
3703 return std::isnan(result) ? result : FPMin(a, b);
3704 }
3705
3706
3707 template <typename T>
FPRecipStepFused(T op1,T op2)3708 T Simulator::FPRecipStepFused(T op1, T op2) {
3709 const T two = 2.0;
3710 if ((std::isinf(op1) && (op2 == 0.0))
3711 || ((op1 == 0.0) && (std::isinf(op2)))) {
3712 return two;
3713 } else if (std::isinf(op1) || std::isinf(op2)) {
3714 // Return +inf if signs match, otherwise -inf.
3715 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
3716 : kFP64NegativeInfinity;
3717 } else {
3718 return FusedMultiplyAdd(op1, op2, two);
3719 }
3720 }
3721
3722
3723 template <typename T>
FPRSqrtStepFused(T op1,T op2)3724 T Simulator::FPRSqrtStepFused(T op1, T op2) {
3725 const T one_point_five = 1.5;
3726 const T two = 2.0;
3727
3728 if ((std::isinf(op1) && (op2 == 0.0))
3729 || ((op1 == 0.0) && (std::isinf(op2)))) {
3730 return one_point_five;
3731 } else if (std::isinf(op1) || std::isinf(op2)) {
3732 // Return +inf if signs match, otherwise -inf.
3733 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
3734 : kFP64NegativeInfinity;
3735 } else {
3736 // The multiply-add-halve operation must be fully fused, so avoid interim
3737 // rounding by checking which operand can be losslessly divided by two
3738 // before doing the multiply-add.
3739 if (std::isnormal(op1 / two)) {
3740 return FusedMultiplyAdd(op1 / two, op2, one_point_five);
3741 } else if (std::isnormal(op2 / two)) {
3742 return FusedMultiplyAdd(op1, op2 / two, one_point_five);
3743 } else {
3744 // Neither operand is normal after halving: the result is dominated by
3745 // the addition term, so just return that.
3746 return one_point_five;
3747 }
3748 }
3749 }
3750
3751
FPRoundInt(double value,FPRounding round_mode)3752 double Simulator::FPRoundInt(double value, FPRounding round_mode) {
3753 if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
3754 (value == kFP64NegativeInfinity)) {
3755 return value;
3756 } else if (std::isnan(value)) {
3757 return FPProcessNaN(value);
3758 }
3759
3760 double int_result = std::floor(value);
3761 double error = value - int_result;
3762 switch (round_mode) {
3763 case FPTieAway: {
3764 // Take care of correctly handling the range ]-0.5, -0.0], which must
3765 // yield -0.0.
3766 if ((-0.5 < value) && (value < 0.0)) {
3767 int_result = -0.0;
3768
3769 } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) {
3770 // If the error is greater than 0.5, or is equal to 0.5 and the integer
3771 // result is positive, round up.
3772 int_result++;
3773 }
3774 break;
3775 }
3776 case FPTieEven: {
3777 // Take care of correctly handling the range [-0.5, -0.0], which must
3778 // yield -0.0.
3779 if ((-0.5 <= value) && (value < 0.0)) {
3780 int_result = -0.0;
3781
3782 // If the error is greater than 0.5, or is equal to 0.5 and the integer
3783 // result is odd, round up.
3784 } else if ((error > 0.5) ||
3785 ((error == 0.5) && (std::fmod(int_result, 2) != 0))) {
3786 int_result++;
3787 }
3788 break;
3789 }
3790 case FPZero: {
3791 // If value>0 then we take floor(value)
3792 // otherwise, ceil(value).
3793 if (value < 0) {
3794 int_result = ceil(value);
3795 }
3796 break;
3797 }
3798 case FPNegativeInfinity: {
3799 // We always use floor(value).
3800 break;
3801 }
3802 case FPPositiveInfinity: {
3803 // Take care of correctly handling the range ]-1.0, -0.0], which must
3804 // yield -0.0.
3805 if ((-1.0 < value) && (value < 0.0)) {
3806 int_result = -0.0;
3807
3808 // If the error is non-zero, round up.
3809 } else if (error > 0.0) {
3810 int_result++;
3811 }
3812 break;
3813 }
3814 default: VIXL_UNIMPLEMENTED();
3815 }
3816 return int_result;
3817 }
3818
3819
FPToInt32(double value,FPRounding rmode)3820 int32_t Simulator::FPToInt32(double value, FPRounding rmode) {
3821 value = FPRoundInt(value, rmode);
3822 if (value >= kWMaxInt) {
3823 return kWMaxInt;
3824 } else if (value < kWMinInt) {
3825 return kWMinInt;
3826 }
3827 return std::isnan(value) ? 0 : static_cast<int32_t>(value);
3828 }
3829
3830
FPToInt64(double value,FPRounding rmode)3831 int64_t Simulator::FPToInt64(double value, FPRounding rmode) {
3832 value = FPRoundInt(value, rmode);
3833 if (value >= kXMaxInt) {
3834 return kXMaxInt;
3835 } else if (value < kXMinInt) {
3836 return kXMinInt;
3837 }
3838 return std::isnan(value) ? 0 : static_cast<int64_t>(value);
3839 }
3840
3841
FPToUInt32(double value,FPRounding rmode)3842 uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) {
3843 value = FPRoundInt(value, rmode);
3844 if (value >= kWMaxUInt) {
3845 return kWMaxUInt;
3846 } else if (value < 0.0) {
3847 return 0;
3848 }
3849 return std::isnan(value) ? 0 : static_cast<uint32_t>(value);
3850 }
3851
3852
FPToUInt64(double value,FPRounding rmode)3853 uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) {
3854 value = FPRoundInt(value, rmode);
3855 if (value >= kXMaxUInt) {
3856 return kXMaxUInt;
3857 } else if (value < 0.0) {
3858 return 0;
3859 }
3860 return std::isnan(value) ? 0 : static_cast<uint64_t>(value);
3861 }
3862
3863
3864 #define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \
3865 template <typename T> \
3866 LogicVRegister Simulator::FN(VectorFormat vform, \
3867 LogicVRegister dst, \
3868 const LogicVRegister& src1, \
3869 const LogicVRegister& src2) { \
3870 dst.ClearForWrite(vform); \
3871 for (int i = 0; i < LaneCountFromFormat(vform); i++) { \
3872 T op1 = src1.Float<T>(i); \
3873 T op2 = src2.Float<T>(i); \
3874 T result; \
3875 if (PROCNAN) { \
3876 result = FPProcessNaNs(op1, op2); \
3877 if (!std::isnan(result)) { \
3878 result = OP(op1, op2); \
3879 } \
3880 } else { \
3881 result = OP(op1, op2); \
3882 } \
3883 dst.SetFloat(i, result); \
3884 } \
3885 return dst; \
3886 } \
3887 \
3888 LogicVRegister Simulator::FN(VectorFormat vform, \
3889 LogicVRegister dst, \
3890 const LogicVRegister& src1, \
3891 const LogicVRegister& src2) { \
3892 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { \
3893 FN<float>(vform, dst, src1, src2); \
3894 } else { \
3895 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); \
3896 FN<double>(vform, dst, src1, src2); \
3897 } \
3898 return dst; \
3899 }
NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)3900 NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)
3901 #undef DEFINE_NEON_FP_VECTOR_OP
3902
3903
3904 LogicVRegister Simulator::fnmul(VectorFormat vform,
3905 LogicVRegister dst,
3906 const LogicVRegister& src1,
3907 const LogicVRegister& src2) {
3908 SimVRegister temp;
3909 LogicVRegister product = fmul(vform, temp, src1, src2);
3910 return fneg(vform, dst, product);
3911 }
3912
3913
3914 template <typename T>
frecps(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3915 LogicVRegister Simulator::frecps(VectorFormat vform,
3916 LogicVRegister dst,
3917 const LogicVRegister& src1,
3918 const LogicVRegister& src2) {
3919 dst.ClearForWrite(vform);
3920 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3921 T op1 = -src1.Float<T>(i);
3922 T op2 = src2.Float<T>(i);
3923 T result = FPProcessNaNs(op1, op2);
3924 dst.SetFloat(i, std::isnan(result) ? result : FPRecipStepFused(op1, op2));
3925 }
3926 return dst;
3927 }
3928
3929
frecps(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3930 LogicVRegister Simulator::frecps(VectorFormat vform,
3931 LogicVRegister dst,
3932 const LogicVRegister& src1,
3933 const LogicVRegister& src2) {
3934 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
3935 frecps<float>(vform, dst, src1, src2);
3936 } else {
3937 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
3938 frecps<double>(vform, dst, src1, src2);
3939 }
3940 return dst;
3941 }
3942
3943
3944 template <typename T>
frsqrts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3945 LogicVRegister Simulator::frsqrts(VectorFormat vform,
3946 LogicVRegister dst,
3947 const LogicVRegister& src1,
3948 const LogicVRegister& src2) {
3949 dst.ClearForWrite(vform);
3950 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3951 T op1 = -src1.Float<T>(i);
3952 T op2 = src2.Float<T>(i);
3953 T result = FPProcessNaNs(op1, op2);
3954 dst.SetFloat(i, std::isnan(result) ? result : FPRSqrtStepFused(op1, op2));
3955 }
3956 return dst;
3957 }
3958
3959
frsqrts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3960 LogicVRegister Simulator::frsqrts(VectorFormat vform,
3961 LogicVRegister dst,
3962 const LogicVRegister& src1,
3963 const LogicVRegister& src2) {
3964 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
3965 frsqrts<float>(vform, dst, src1, src2);
3966 } else {
3967 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
3968 frsqrts<double>(vform, dst, src1, src2);
3969 }
3970 return dst;
3971 }
3972
3973
3974 template <typename T>
fcmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)3975 LogicVRegister Simulator::fcmp(VectorFormat vform,
3976 LogicVRegister dst,
3977 const LogicVRegister& src1,
3978 const LogicVRegister& src2,
3979 Condition cond) {
3980 dst.ClearForWrite(vform);
3981 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3982 bool result = false;
3983 T op1 = src1.Float<T>(i);
3984 T op2 = src2.Float<T>(i);
3985 T nan_result = FPProcessNaNs(op1, op2);
3986 if (!std::isnan(nan_result)) {
3987 switch (cond) {
3988 case eq: result = (op1 == op2); break;
3989 case ge: result = (op1 >= op2); break;
3990 case gt: result = (op1 > op2) ; break;
3991 case le: result = (op1 <= op2); break;
3992 case lt: result = (op1 < op2) ; break;
3993 default: VIXL_UNREACHABLE(); break;
3994 }
3995 }
3996 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
3997 }
3998 return dst;
3999 }
4000
4001
fcmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)4002 LogicVRegister Simulator::fcmp(VectorFormat vform,
4003 LogicVRegister dst,
4004 const LogicVRegister& src1,
4005 const LogicVRegister& src2,
4006 Condition cond) {
4007 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4008 fcmp<float>(vform, dst, src1, src2, cond);
4009 } else {
4010 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4011 fcmp<double>(vform, dst, src1, src2, cond);
4012 }
4013 return dst;
4014 }
4015
4016
fcmp_zero(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,Condition cond)4017 LogicVRegister Simulator::fcmp_zero(VectorFormat vform,
4018 LogicVRegister dst,
4019 const LogicVRegister& src,
4020 Condition cond) {
4021 SimVRegister temp;
4022 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4023 LogicVRegister zero_reg = dup_immediate(vform, temp, float_to_rawbits(0.0));
4024 fcmp<float>(vform, dst, src, zero_reg, cond);
4025 } else {
4026 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4027 LogicVRegister zero_reg = dup_immediate(vform, temp,
4028 double_to_rawbits(0.0));
4029 fcmp<double>(vform, dst, src, zero_reg, cond);
4030 }
4031 return dst;
4032 }
4033
4034
fabscmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)4035 LogicVRegister Simulator::fabscmp(VectorFormat vform,
4036 LogicVRegister dst,
4037 const LogicVRegister& src1,
4038 const LogicVRegister& src2,
4039 Condition cond) {
4040 SimVRegister temp1, temp2;
4041 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4042 LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1);
4043 LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2);
4044 fcmp<float>(vform, dst, abs_src1, abs_src2, cond);
4045 } else {
4046 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4047 LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1);
4048 LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2);
4049 fcmp<double>(vform, dst, abs_src1, abs_src2, cond);
4050 }
4051 return dst;
4052 }
4053
4054
4055 template <typename T>
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4056 LogicVRegister Simulator::fmla(VectorFormat vform,
4057 LogicVRegister dst,
4058 const LogicVRegister& src1,
4059 const LogicVRegister& src2) {
4060 dst.ClearForWrite(vform);
4061 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4062 T op1 = src1.Float<T>(i);
4063 T op2 = src2.Float<T>(i);
4064 T acc = dst.Float<T>(i);
4065 T result = FPMulAdd(acc, op1, op2);
4066 dst.SetFloat(i, result);
4067 }
4068 return dst;
4069 }
4070
4071
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4072 LogicVRegister Simulator::fmla(VectorFormat vform,
4073 LogicVRegister dst,
4074 const LogicVRegister& src1,
4075 const LogicVRegister& src2) {
4076 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4077 fmla<float>(vform, dst, src1, src2);
4078 } else {
4079 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4080 fmla<double>(vform, dst, src1, src2);
4081 }
4082 return dst;
4083 }
4084
4085
4086 template <typename T>
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4087 LogicVRegister Simulator::fmls(VectorFormat vform,
4088 LogicVRegister dst,
4089 const LogicVRegister& src1,
4090 const LogicVRegister& src2) {
4091 dst.ClearForWrite(vform);
4092 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4093 T op1 = -src1.Float<T>(i);
4094 T op2 = src2.Float<T>(i);
4095 T acc = dst.Float<T>(i);
4096 T result = FPMulAdd(acc, op1, op2);
4097 dst.SetFloat(i, result);
4098 }
4099 return dst;
4100 }
4101
4102
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4103 LogicVRegister Simulator::fmls(VectorFormat vform,
4104 LogicVRegister dst,
4105 const LogicVRegister& src1,
4106 const LogicVRegister& src2) {
4107 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4108 fmls<float>(vform, dst, src1, src2);
4109 } else {
4110 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4111 fmls<double>(vform, dst, src1, src2);
4112 }
4113 return dst;
4114 }
4115
4116
4117 template <typename T>
fneg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4118 LogicVRegister Simulator::fneg(VectorFormat vform,
4119 LogicVRegister dst,
4120 const LogicVRegister& src) {
4121 dst.ClearForWrite(vform);
4122 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4123 T op = src.Float<T>(i);
4124 op = -op;
4125 dst.SetFloat(i, op);
4126 }
4127 return dst;
4128 }
4129
4130
fneg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4131 LogicVRegister Simulator::fneg(VectorFormat vform,
4132 LogicVRegister dst,
4133 const LogicVRegister& src) {
4134 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4135 fneg<float>(vform, dst, src);
4136 } else {
4137 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4138 fneg<double>(vform, dst, src);
4139 }
4140 return dst;
4141 }
4142
4143
4144 template <typename T>
fabs_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4145 LogicVRegister Simulator::fabs_(VectorFormat vform,
4146 LogicVRegister dst,
4147 const LogicVRegister& src) {
4148 dst.ClearForWrite(vform);
4149 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4150 T op = src.Float<T>(i);
4151 if (copysign(1.0, op) < 0.0) {
4152 op = -op;
4153 }
4154 dst.SetFloat(i, op);
4155 }
4156 return dst;
4157 }
4158
4159
fabs_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4160 LogicVRegister Simulator::fabs_(VectorFormat vform,
4161 LogicVRegister dst,
4162 const LogicVRegister& src) {
4163 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4164 fabs_<float>(vform, dst, src);
4165 } else {
4166 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4167 fabs_<double>(vform, dst, src);
4168 }
4169 return dst;
4170 }
4171
4172
fabd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4173 LogicVRegister Simulator::fabd(VectorFormat vform,
4174 LogicVRegister dst,
4175 const LogicVRegister& src1,
4176 const LogicVRegister& src2) {
4177 SimVRegister temp;
4178 fsub(vform, temp, src1, src2);
4179 fabs_(vform, dst, temp);
4180 return dst;
4181 }
4182
4183
fsqrt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4184 LogicVRegister Simulator::fsqrt(VectorFormat vform,
4185 LogicVRegister dst,
4186 const LogicVRegister& src) {
4187 dst.ClearForWrite(vform);
4188 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4189 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4190 float result = FPSqrt(src.Float<float>(i));
4191 dst.SetFloat(i, result);
4192 }
4193 } else {
4194 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4195 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4196 double result = FPSqrt(src.Float<double>(i));
4197 dst.SetFloat(i, result);
4198 }
4199 }
4200 return dst;
4201 }
4202
4203
4204 #define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP) \
4205 LogicVRegister Simulator::FNP(VectorFormat vform, \
4206 LogicVRegister dst, \
4207 const LogicVRegister& src1, \
4208 const LogicVRegister& src2) { \
4209 SimVRegister temp1, temp2; \
4210 uzp1(vform, temp1, src1, src2); \
4211 uzp2(vform, temp2, src1, src2); \
4212 FN(vform, dst, temp1, temp2); \
4213 return dst; \
4214 } \
4215 \
4216 LogicVRegister Simulator::FNP(VectorFormat vform, \
4217 LogicVRegister dst, \
4218 const LogicVRegister& src) { \
4219 if (vform == kFormatS) { \
4220 float result = OP(src.Float<float>(0), src.Float<float>(1)); \
4221 dst.SetFloat(0, result); \
4222 } else { \
4223 VIXL_ASSERT(vform == kFormatD); \
4224 double result = OP(src.Float<double>(0), src.Float<double>(1)); \
4225 dst.SetFloat(0, result); \
4226 } \
4227 dst.ClearForWrite(vform); \
4228 return dst; \
4229 }
NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)4230 NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)
4231 #undef DEFINE_NEON_FP_PAIR_OP
4232
4233
4234 LogicVRegister Simulator::fminmaxv(VectorFormat vform,
4235 LogicVRegister dst,
4236 const LogicVRegister& src,
4237 FPMinMaxOp Op) {
4238 VIXL_ASSERT(vform == kFormat4S);
4239 USE(vform);
4240 float result1 = (this->*Op)(src.Float<float>(0), src.Float<float>(1));
4241 float result2 = (this->*Op)(src.Float<float>(2), src.Float<float>(3));
4242 float result = (this->*Op)(result1, result2);
4243 dst.ClearForWrite(kFormatS);
4244 dst.SetFloat<float>(0, result);
4245 return dst;
4246 }
4247
4248
fmaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4249 LogicVRegister Simulator::fmaxv(VectorFormat vform,
4250 LogicVRegister dst,
4251 const LogicVRegister& src) {
4252 return fminmaxv(vform, dst, src, &Simulator::FPMax);
4253 }
4254
4255
fminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4256 LogicVRegister Simulator::fminv(VectorFormat vform,
4257 LogicVRegister dst,
4258 const LogicVRegister& src) {
4259 return fminmaxv(vform, dst, src, &Simulator::FPMin);
4260 }
4261
4262
fmaxnmv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4263 LogicVRegister Simulator::fmaxnmv(VectorFormat vform,
4264 LogicVRegister dst,
4265 const LogicVRegister& src) {
4266 return fminmaxv(vform, dst, src, &Simulator::FPMaxNM);
4267 }
4268
4269
fminnmv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4270 LogicVRegister Simulator::fminnmv(VectorFormat vform,
4271 LogicVRegister dst,
4272 const LogicVRegister& src) {
4273 return fminmaxv(vform, dst, src, &Simulator::FPMinNM);
4274 }
4275
4276
fmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)4277 LogicVRegister Simulator::fmul(VectorFormat vform,
4278 LogicVRegister dst,
4279 const LogicVRegister& src1,
4280 const LogicVRegister& src2,
4281 int index) {
4282 dst.ClearForWrite(vform);
4283 SimVRegister temp;
4284 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4285 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
4286 fmul<float>(vform, dst, src1, index_reg);
4287
4288 } else {
4289 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4290 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
4291 fmul<double>(vform, dst, src1, index_reg);
4292 }
4293 return dst;
4294 }
4295
4296
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)4297 LogicVRegister Simulator::fmla(VectorFormat vform,
4298 LogicVRegister dst,
4299 const LogicVRegister& src1,
4300 const LogicVRegister& src2,
4301 int index) {
4302 dst.ClearForWrite(vform);
4303 SimVRegister temp;
4304 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4305 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
4306 fmla<float>(vform, dst, src1, index_reg);
4307
4308 } else {
4309 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4310 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
4311 fmla<double>(vform, dst, src1, index_reg);
4312 }
4313 return dst;
4314 }
4315
4316
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)4317 LogicVRegister Simulator::fmls(VectorFormat vform,
4318 LogicVRegister dst,
4319 const LogicVRegister& src1,
4320 const LogicVRegister& src2,
4321 int index) {
4322 dst.ClearForWrite(vform);
4323 SimVRegister temp;
4324 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4325 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
4326 fmls<float>(vform, dst, src1, index_reg);
4327
4328 } else {
4329 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4330 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
4331 fmls<double>(vform, dst, src1, index_reg);
4332 }
4333 return dst;
4334 }
4335
4336
fmulx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)4337 LogicVRegister Simulator::fmulx(VectorFormat vform,
4338 LogicVRegister dst,
4339 const LogicVRegister& src1,
4340 const LogicVRegister& src2,
4341 int index) {
4342 dst.ClearForWrite(vform);
4343 SimVRegister temp;
4344 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4345 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
4346 fmulx<float>(vform, dst, src1, index_reg);
4347
4348 } else {
4349 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4350 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
4351 fmulx<double>(vform, dst, src1, index_reg);
4352 }
4353 return dst;
4354 }
4355
4356
frint(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding rounding_mode,bool inexact_exception)4357 LogicVRegister Simulator::frint(VectorFormat vform,
4358 LogicVRegister dst,
4359 const LogicVRegister& src,
4360 FPRounding rounding_mode,
4361 bool inexact_exception) {
4362 dst.ClearForWrite(vform);
4363 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4364 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4365 float input = src.Float<float>(i);
4366 float rounded = FPRoundInt(input, rounding_mode);
4367 if (inexact_exception && !std::isnan(input) && (input != rounded)) {
4368 FPProcessException();
4369 }
4370 dst.SetFloat<float>(i, rounded);
4371 }
4372 } else {
4373 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4374 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4375 double input = src.Float<double>(i);
4376 double rounded = FPRoundInt(input, rounding_mode);
4377 if (inexact_exception && !std::isnan(input) && (input != rounded)) {
4378 FPProcessException();
4379 }
4380 dst.SetFloat<double>(i, rounded);
4381 }
4382 }
4383 return dst;
4384 }
4385
4386
fcvts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding rounding_mode,int fbits)4387 LogicVRegister Simulator::fcvts(VectorFormat vform,
4388 LogicVRegister dst,
4389 const LogicVRegister& src,
4390 FPRounding rounding_mode,
4391 int fbits) {
4392 dst.ClearForWrite(vform);
4393 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4394 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4395 float op = src.Float<float>(i) * std::pow(2.0f, fbits);
4396 dst.SetInt(vform, i, FPToInt32(op, rounding_mode));
4397 }
4398 } else {
4399 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4400 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4401 double op = src.Float<double>(i) * std::pow(2.0, fbits);
4402 dst.SetInt(vform, i, FPToInt64(op, rounding_mode));
4403 }
4404 }
4405 return dst;
4406 }
4407
4408
fcvtu(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding rounding_mode,int fbits)4409 LogicVRegister Simulator::fcvtu(VectorFormat vform,
4410 LogicVRegister dst,
4411 const LogicVRegister& src,
4412 FPRounding rounding_mode,
4413 int fbits) {
4414 dst.ClearForWrite(vform);
4415 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4416 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4417 float op = src.Float<float>(i) * std::pow(2.0f, fbits);
4418 dst.SetUint(vform, i, FPToUInt32(op, rounding_mode));
4419 }
4420 } else {
4421 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4422 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4423 double op = src.Float<double>(i) * std::pow(2.0, fbits);
4424 dst.SetUint(vform, i, FPToUInt64(op, rounding_mode));
4425 }
4426 }
4427 return dst;
4428 }
4429
4430
fcvtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4431 LogicVRegister Simulator::fcvtl(VectorFormat vform,
4432 LogicVRegister dst,
4433 const LogicVRegister& src) {
4434 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4435 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
4436 dst.SetFloat(i, FPToFloat(src.Float<float16>(i)));
4437 }
4438 } else {
4439 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4440 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
4441 dst.SetFloat(i, FPToDouble(src.Float<float>(i)));
4442 }
4443 }
4444 return dst;
4445 }
4446
4447
fcvtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4448 LogicVRegister Simulator::fcvtl2(VectorFormat vform,
4449 LogicVRegister dst,
4450 const LogicVRegister& src) {
4451 int lane_count = LaneCountFromFormat(vform);
4452 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4453 for (int i = 0; i < lane_count; i++) {
4454 dst.SetFloat(i, FPToFloat(src.Float<float16>(i + lane_count)));
4455 }
4456 } else {
4457 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4458 for (int i = 0; i < lane_count; i++) {
4459 dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count)));
4460 }
4461 }
4462 return dst;
4463 }
4464
4465
fcvtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4466 LogicVRegister Simulator::fcvtn(VectorFormat vform,
4467 LogicVRegister dst,
4468 const LogicVRegister& src) {
4469 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
4470 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4471 dst.SetFloat(i, FPToFloat16(src.Float<float>(i), FPTieEven));
4472 }
4473 } else {
4474 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
4475 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4476 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPTieEven));
4477 }
4478 }
4479 return dst;
4480 }
4481
4482
fcvtn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4483 LogicVRegister Simulator::fcvtn2(VectorFormat vform,
4484 LogicVRegister dst,
4485 const LogicVRegister& src) {
4486 int lane_count = LaneCountFromFormat(vform) / 2;
4487 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
4488 for (int i = lane_count - 1; i >= 0; i--) {
4489 dst.SetFloat(i + lane_count, FPToFloat16(src.Float<float>(i), FPTieEven));
4490 }
4491 } else {
4492 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
4493 for (int i = lane_count - 1; i >= 0; i--) {
4494 dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPTieEven));
4495 }
4496 }
4497 return dst;
4498 }
4499
4500
fcvtxn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4501 LogicVRegister Simulator::fcvtxn(VectorFormat vform,
4502 LogicVRegister dst,
4503 const LogicVRegister& src) {
4504 dst.ClearForWrite(vform);
4505 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
4506 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4507 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPRoundOdd));
4508 }
4509 return dst;
4510 }
4511
4512
fcvtxn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4513 LogicVRegister Simulator::fcvtxn2(VectorFormat vform,
4514 LogicVRegister dst,
4515 const LogicVRegister& src) {
4516 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
4517 int lane_count = LaneCountFromFormat(vform) / 2;
4518 for (int i = lane_count - 1; i >= 0; i--) {
4519 dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPRoundOdd));
4520 }
4521 return dst;
4522 }
4523
4524
4525 // Based on reference C function recip_sqrt_estimate from ARM ARM.
recip_sqrt_estimate(double a)4526 double Simulator::recip_sqrt_estimate(double a) {
4527 int q0, q1, s;
4528 double r;
4529 if (a < 0.5) {
4530 q0 = static_cast<int>(a * 512.0);
4531 r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0);
4532 } else {
4533 q1 = static_cast<int>(a * 256.0);
4534 r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0);
4535 }
4536 s = static_cast<int>(256.0 * r + 0.5);
4537 return static_cast<double>(s) / 256.0;
4538 }
4539
4540
Bits(uint64_t val,int start_bit,int end_bit)4541 static inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) {
4542 return unsigned_bitextract_64(start_bit, end_bit, val);
4543 }
4544
4545
4546 template <typename T>
FPRecipSqrtEstimate(T op)4547 T Simulator::FPRecipSqrtEstimate(T op) {
4548 if (std::isnan(op)) {
4549 return FPProcessNaN(op);
4550 } else if (op == 0.0) {
4551 if (copysign(1.0, op) < 0.0) {
4552 return kFP64NegativeInfinity;
4553 } else {
4554 return kFP64PositiveInfinity;
4555 }
4556 } else if (copysign(1.0, op) < 0.0) {
4557 FPProcessException();
4558 return FPDefaultNaN<T>();
4559 } else if (std::isinf(op)) {
4560 return 0.0;
4561 } else {
4562 uint64_t fraction;
4563 int exp, result_exp;
4564
4565 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
4566 exp = float_exp(op);
4567 fraction = float_mantissa(op);
4568 fraction <<= 29;
4569 } else {
4570 exp = double_exp(op);
4571 fraction = double_mantissa(op);
4572 }
4573
4574 if (exp == 0) {
4575 while (Bits(fraction, 51, 51) == 0) {
4576 fraction = Bits(fraction, 50, 0) << 1;
4577 exp -= 1;
4578 }
4579 fraction = Bits(fraction, 50, 0) << 1;
4580 }
4581
4582 double scaled;
4583 if (Bits(exp, 0, 0) == 0) {
4584 scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44);
4585 } else {
4586 scaled = double_pack(0, 1021, Bits(fraction, 51, 44) << 44);
4587 }
4588
4589 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
4590 result_exp = (380 - exp) / 2;
4591 } else {
4592 result_exp = (3068 - exp) / 2;
4593 }
4594
4595 uint64_t estimate = double_to_rawbits(recip_sqrt_estimate(scaled));
4596
4597 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
4598 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
4599 uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29));
4600 return float_pack(0, exp_bits, est_bits);
4601 } else {
4602 return double_pack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0));
4603 }
4604 }
4605 }
4606
4607
frsqrte(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4608 LogicVRegister Simulator::frsqrte(VectorFormat vform,
4609 LogicVRegister dst,
4610 const LogicVRegister& src) {
4611 dst.ClearForWrite(vform);
4612 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4613 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4614 float input = src.Float<float>(i);
4615 dst.SetFloat(i, FPRecipSqrtEstimate<float>(input));
4616 }
4617 } else {
4618 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4619 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4620 double input = src.Float<double>(i);
4621 dst.SetFloat(i, FPRecipSqrtEstimate<double>(input));
4622 }
4623 }
4624 return dst;
4625 }
4626
4627 template <typename T>
FPRecipEstimate(T op,FPRounding rounding)4628 T Simulator::FPRecipEstimate(T op, FPRounding rounding) {
4629 uint32_t sign;
4630
4631 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
4632 sign = float_sign(op);
4633 } else {
4634 sign = double_sign(op);
4635 }
4636
4637 if (std::isnan(op)) {
4638 return FPProcessNaN(op);
4639 } else if (std::isinf(op)) {
4640 return (sign == 1) ? -0.0 : 0.0;
4641 } else if (op == 0.0) {
4642 FPProcessException(); // FPExc_DivideByZero exception.
4643 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
4644 } else if (((sizeof(T) == sizeof(float)) && // NOLINT(runtime/sizeof)
4645 (std::fabs(op) < std::pow(2.0, -128.0))) ||
4646 ((sizeof(T) == sizeof(double)) && // NOLINT(runtime/sizeof)
4647 (std::fabs(op) < std::pow(2.0, -1024.0)))) {
4648 bool overflow_to_inf = false;
4649 switch (rounding) {
4650 case FPTieEven: overflow_to_inf = true; break;
4651 case FPPositiveInfinity: overflow_to_inf = (sign == 0); break;
4652 case FPNegativeInfinity: overflow_to_inf = (sign == 1); break;
4653 case FPZero: overflow_to_inf = false; break;
4654 default: break;
4655 }
4656 FPProcessException(); // FPExc_Overflow and FPExc_Inexact.
4657 if (overflow_to_inf) {
4658 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
4659 } else {
4660 // Return FPMaxNormal(sign).
4661 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
4662 return float_pack(sign, 0xfe, 0x07fffff);
4663 } else {
4664 return double_pack(sign, 0x7fe, 0x0fffffffffffffl);
4665 }
4666 }
4667 } else {
4668 uint64_t fraction;
4669 int exp, result_exp;
4670 uint32_t sign;
4671
4672 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
4673 sign = float_sign(op);
4674 exp = float_exp(op);
4675 fraction = float_mantissa(op);
4676 fraction <<= 29;
4677 } else {
4678 sign = double_sign(op);
4679 exp = double_exp(op);
4680 fraction = double_mantissa(op);
4681 }
4682
4683 if (exp == 0) {
4684 if (Bits(fraction, 51, 51) == 0) {
4685 exp -= 1;
4686 fraction = Bits(fraction, 49, 0) << 2;
4687 } else {
4688 fraction = Bits(fraction, 50, 0) << 1;
4689 }
4690 }
4691
4692 double scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44);
4693
4694 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
4695 result_exp = (253 - exp); // In range 253-254 = -1 to 253+1 = 254.
4696 } else {
4697 result_exp = (2045 - exp); // In range 2045-2046 = -1 to 2045+1 = 2046.
4698 }
4699
4700 double estimate = recip_estimate(scaled);
4701
4702 fraction = double_mantissa(estimate);
4703 if (result_exp == 0) {
4704 fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1);
4705 } else if (result_exp == -1) {
4706 fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2);
4707 result_exp = 0;
4708 }
4709 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
4710 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
4711 uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29));
4712 return float_pack(sign, exp_bits, frac_bits);
4713 } else {
4714 return double_pack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0));
4715 }
4716 }
4717 }
4718
4719
frecpe(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding round)4720 LogicVRegister Simulator::frecpe(VectorFormat vform,
4721 LogicVRegister dst,
4722 const LogicVRegister& src,
4723 FPRounding round) {
4724 dst.ClearForWrite(vform);
4725 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4726 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4727 float input = src.Float<float>(i);
4728 dst.SetFloat(i, FPRecipEstimate<float>(input, round));
4729 }
4730 } else {
4731 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4732 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4733 double input = src.Float<double>(i);
4734 dst.SetFloat(i, FPRecipEstimate<double>(input, round));
4735 }
4736 }
4737 return dst;
4738 }
4739
4740
ursqrte(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4741 LogicVRegister Simulator::ursqrte(VectorFormat vform,
4742 LogicVRegister dst,
4743 const LogicVRegister& src) {
4744 dst.ClearForWrite(vform);
4745 uint64_t operand;
4746 uint32_t result;
4747 double dp_operand, dp_result;
4748 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4749 operand = src.Uint(vform, i);
4750 if (operand <= 0x3FFFFFFF) {
4751 result = 0xFFFFFFFF;
4752 } else {
4753 dp_operand = operand * std::pow(2.0, -32);
4754 dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31);
4755 result = static_cast<uint32_t>(dp_result);
4756 }
4757 dst.SetUint(vform, i, result);
4758 }
4759 return dst;
4760 }
4761
4762
4763 // Based on reference C function recip_estimate from ARM ARM.
recip_estimate(double a)4764 double Simulator::recip_estimate(double a) {
4765 int q, s;
4766 double r;
4767 q = static_cast<int>(a * 512.0);
4768 r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0);
4769 s = static_cast<int>(256.0 * r + 0.5);
4770 return static_cast<double>(s) / 256.0;
4771 }
4772
4773
urecpe(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4774 LogicVRegister Simulator::urecpe(VectorFormat vform,
4775 LogicVRegister dst,
4776 const LogicVRegister& src) {
4777 dst.ClearForWrite(vform);
4778 uint64_t operand;
4779 uint32_t result;
4780 double dp_operand, dp_result;
4781 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4782 operand = src.Uint(vform, i);
4783 if (operand <= 0x7FFFFFFF) {
4784 result = 0xFFFFFFFF;
4785 } else {
4786 dp_operand = operand * std::pow(2.0, -32);
4787 dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31);
4788 result = static_cast<uint32_t>(dp_result);
4789 }
4790 dst.SetUint(vform, i, result);
4791 }
4792 return dst;
4793 }
4794
4795 template <typename T>
frecpx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4796 LogicVRegister Simulator::frecpx(VectorFormat vform,
4797 LogicVRegister dst,
4798 const LogicVRegister& src) {
4799 dst.ClearForWrite(vform);
4800 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4801 T op = src.Float<T>(i);
4802 T result;
4803 if (std::isnan(op)) {
4804 result = FPProcessNaN(op);
4805 } else {
4806 int exp;
4807 uint32_t sign;
4808 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
4809 sign = float_sign(op);
4810 exp = float_exp(op);
4811 exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0));
4812 result = float_pack(sign, exp, 0);
4813 } else {
4814 sign = double_sign(op);
4815 exp = double_exp(op);
4816 exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0));
4817 result = double_pack(sign, exp, 0);
4818 }
4819 }
4820 dst.SetFloat(i, result);
4821 }
4822 return dst;
4823 }
4824
4825
frecpx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4826 LogicVRegister Simulator::frecpx(VectorFormat vform,
4827 LogicVRegister dst,
4828 const LogicVRegister& src) {
4829 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4830 frecpx<float>(vform, dst, src);
4831 } else {
4832 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4833 frecpx<double>(vform, dst, src);
4834 }
4835 return dst;
4836 }
4837
scvtf(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int fbits,FPRounding round)4838 LogicVRegister Simulator::scvtf(VectorFormat vform,
4839 LogicVRegister dst,
4840 const LogicVRegister& src,
4841 int fbits,
4842 FPRounding round) {
4843 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4844 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4845 float result = FixedToFloat(src.Int(kFormatS, i), fbits, round);
4846 dst.SetFloat<float>(i, result);
4847 } else {
4848 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4849 double result = FixedToDouble(src.Int(kFormatD, i), fbits, round);
4850 dst.SetFloat<double>(i, result);
4851 }
4852 }
4853 return dst;
4854 }
4855
4856
ucvtf(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int fbits,FPRounding round)4857 LogicVRegister Simulator::ucvtf(VectorFormat vform,
4858 LogicVRegister dst,
4859 const LogicVRegister& src,
4860 int fbits,
4861 FPRounding round) {
4862 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4863 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4864 float result = UFixedToFloat(src.Uint(kFormatS, i), fbits, round);
4865 dst.SetFloat<float>(i, result);
4866 } else {
4867 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4868 double result = UFixedToDouble(src.Uint(kFormatD, i), fbits, round);
4869 dst.SetFloat<double>(i, result);
4870 }
4871 }
4872 return dst;
4873 }
4874
4875
4876 } // namespace vixl
4877
4878 #endif // JS_SIMULATOR_ARM64
4879