1 // Copyright 2015, ARM Limited
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 // * Redistributions of source code must retain the above copyright notice,
8 // this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above copyright notice,
10 // this list of conditions and the following disclaimer in the documentation
11 // and/or other materials provided with the distribution.
12 // * Neither the name of ARM Limited nor the names of its contributors may be
13 // used to endorse or promote products derived from this software without
14 // specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27 #include <cmath>
28
29 #include "jit/arm64/vixl/Simulator-vixl.h"
30
31 namespace vixl {
32
FPDefaultNaN()33 template<> double Simulator::FPDefaultNaN<double>() {
34 return kFP64DefaultNaN;
35 }
36
37
FPDefaultNaN()38 template<> float Simulator::FPDefaultNaN<float>() {
39 return kFP32DefaultNaN;
40 }
41
42 // See FPRound for a description of this function.
FPRoundToDouble(int64_t sign,int64_t exponent,uint64_t mantissa,FPRounding round_mode)43 static inline double FPRoundToDouble(int64_t sign, int64_t exponent,
44 uint64_t mantissa, FPRounding round_mode) {
45 int64_t bits =
46 FPRound<int64_t, kDoubleExponentBits, kDoubleMantissaBits>(sign,
47 exponent,
48 mantissa,
49 round_mode);
50 return rawbits_to_double(bits);
51 }
52
53
54 // See FPRound for a description of this function.
FPRoundToFloat(int64_t sign,int64_t exponent,uint64_t mantissa,FPRounding round_mode)55 static inline float FPRoundToFloat(int64_t sign, int64_t exponent,
56 uint64_t mantissa, FPRounding round_mode) {
57 int32_t bits =
58 FPRound<int32_t, kFloatExponentBits, kFloatMantissaBits>(sign,
59 exponent,
60 mantissa,
61 round_mode);
62 return rawbits_to_float(bits);
63 }
64
65
66 // See FPRound for a description of this function.
FPRoundToFloat16(int64_t sign,int64_t exponent,uint64_t mantissa,FPRounding round_mode)67 static inline float16 FPRoundToFloat16(int64_t sign,
68 int64_t exponent,
69 uint64_t mantissa,
70 FPRounding round_mode) {
71 return FPRound<float16, kFloat16ExponentBits, kFloat16MantissaBits>(
72 sign, exponent, mantissa, round_mode);
73 }
74
75
FixedToDouble(int64_t src,int fbits,FPRounding round)76 double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) {
77 if (src >= 0) {
78 return UFixedToDouble(src, fbits, round);
79 } else {
80 // This works for all negative values, including INT64_MIN.
81 return -UFixedToDouble(-src, fbits, round);
82 }
83 }
84
85
UFixedToDouble(uint64_t src,int fbits,FPRounding round)86 double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) {
87 // An input of 0 is a special case because the result is effectively
88 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
89 if (src == 0) {
90 return 0.0;
91 }
92
93 // Calculate the exponent. The highest significant bit will have the value
94 // 2^exponent.
95 const int highest_significant_bit = 63 - CountLeadingZeros(src);
96 const int64_t exponent = highest_significant_bit - fbits;
97
98 return FPRoundToDouble(0, exponent, src, round);
99 }
100
101
FixedToFloat(int64_t src,int fbits,FPRounding round)102 float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) {
103 if (src >= 0) {
104 return UFixedToFloat(src, fbits, round);
105 } else {
106 // This works for all negative values, including INT64_MIN.
107 return -UFixedToFloat(-src, fbits, round);
108 }
109 }
110
111
UFixedToFloat(uint64_t src,int fbits,FPRounding round)112 float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) {
113 // An input of 0 is a special case because the result is effectively
114 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
115 if (src == 0) {
116 return 0.0f;
117 }
118
119 // Calculate the exponent. The highest significant bit will have the value
120 // 2^exponent.
121 const int highest_significant_bit = 63 - CountLeadingZeros(src);
122 const int32_t exponent = highest_significant_bit - fbits;
123
124 return FPRoundToFloat(0, exponent, src, round);
125 }
126
127
FPToDouble(float value)128 double Simulator::FPToDouble(float value) {
129 switch (std::fpclassify(value)) {
130 case FP_NAN: {
131 if (IsSignallingNaN(value)) {
132 FPProcessException();
133 }
134 if (DN()) return kFP64DefaultNaN;
135
136 // Convert NaNs as the processor would:
137 // - The sign is propagated.
138 // - The payload (mantissa) is transferred entirely, except that the top
139 // bit is forced to '1', making the result a quiet NaN. The unused
140 // (low-order) payload bits are set to 0.
141 uint32_t raw = float_to_rawbits(value);
142
143 uint64_t sign = raw >> 31;
144 uint64_t exponent = (1 << 11) - 1;
145 uint64_t payload = unsigned_bitextract_64(21, 0, raw);
146 payload <<= (52 - 23); // The unused low-order bits should be 0.
147 payload |= (UINT64_C(1) << 51); // Force a quiet NaN.
148
149 return rawbits_to_double((sign << 63) | (exponent << 52) | payload);
150 }
151
152 case FP_ZERO:
153 case FP_NORMAL:
154 case FP_SUBNORMAL:
155 case FP_INFINITE: {
156 // All other inputs are preserved in a standard cast, because every value
157 // representable using an IEEE-754 float is also representable using an
158 // IEEE-754 double.
159 return static_cast<double>(value);
160 }
161 }
162
163 VIXL_UNREACHABLE();
164 return static_cast<double>(value);
165 }
166
167
FPToFloat(float16 value)168 float Simulator::FPToFloat(float16 value) {
169 uint32_t sign = value >> 15;
170 uint32_t exponent = unsigned_bitextract_32(
171 kFloat16MantissaBits + kFloat16ExponentBits - 1, kFloat16MantissaBits,
172 value);
173 uint32_t mantissa = unsigned_bitextract_32(
174 kFloat16MantissaBits - 1, 0, value);
175
176 switch (float16classify(value)) {
177 case FP_ZERO:
178 return (sign == 0) ? 0.0f : -0.0f;
179
180 case FP_INFINITE:
181 return (sign == 0) ? kFP32PositiveInfinity : kFP32NegativeInfinity;
182
183 case FP_SUBNORMAL: {
184 // Calculate shift required to put mantissa into the most-significant bits
185 // of the destination mantissa.
186 int shift = CountLeadingZeros(mantissa << (32 - 10));
187
188 // Shift mantissa and discard implicit '1'.
189 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits) + shift + 1;
190 mantissa &= (1 << kFloatMantissaBits) - 1;
191
192 // Adjust the exponent for the shift applied, and rebias.
193 exponent = exponent - shift + (-15 + 127);
194 break;
195 }
196
197 case FP_NAN:
198 if (IsSignallingNaN(value)) {
199 FPProcessException();
200 }
201 if (DN()) return kFP32DefaultNaN;
202
203 // Convert NaNs as the processor would:
204 // - The sign is propagated.
205 // - The payload (mantissa) is transferred entirely, except that the top
206 // bit is forced to '1', making the result a quiet NaN. The unused
207 // (low-order) payload bits are set to 0.
208 exponent = (1 << kFloatExponentBits) - 1;
209
210 // Increase bits in mantissa, making low-order bits 0.
211 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
212 mantissa |= 1 << 22; // Force a quiet NaN.
213 break;
214
215 case FP_NORMAL:
216 // Increase bits in mantissa, making low-order bits 0.
217 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
218
219 // Change exponent bias.
220 exponent += (-15 + 127);
221 break;
222
223 default: VIXL_UNREACHABLE();
224 }
225 return rawbits_to_float((sign << 31) |
226 (exponent << kFloatMantissaBits) |
227 mantissa);
228 }
229
230
FPToFloat16(float value,FPRounding round_mode)231 float16 Simulator::FPToFloat16(float value, FPRounding round_mode) {
232 // Only the FPTieEven rounding mode is implemented.
233 VIXL_ASSERT(round_mode == FPTieEven);
234 USE(round_mode);
235
236 uint32_t raw = float_to_rawbits(value);
237 int32_t sign = raw >> 31;
238 int32_t exponent = unsigned_bitextract_32(30, 23, raw) - 127;
239 uint32_t mantissa = unsigned_bitextract_32(22, 0, raw);
240
241 switch (std::fpclassify(value)) {
242 case FP_NAN: {
243 if (IsSignallingNaN(value)) {
244 FPProcessException();
245 }
246 if (DN()) return kFP16DefaultNaN;
247
248 // Convert NaNs as the processor would:
249 // - The sign is propagated.
250 // - The payload (mantissa) is transferred as much as possible, except
251 // that the top bit is forced to '1', making the result a quiet NaN.
252 float16 result = (sign == 0) ? kFP16PositiveInfinity
253 : kFP16NegativeInfinity;
254 result |= mantissa >> (kFloatMantissaBits - kFloat16MantissaBits);
255 result |= (1 << 9); // Force a quiet NaN;
256 return result;
257 }
258
259 case FP_ZERO:
260 return (sign == 0) ? 0 : 0x8000;
261
262 case FP_INFINITE:
263 return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
264
265 case FP_NORMAL:
266 case FP_SUBNORMAL: {
267 // Convert float-to-half as the processor would, assuming that FPCR.FZ
268 // (flush-to-zero) is not set.
269
270 // Add the implicit '1' bit to the mantissa.
271 mantissa += (1 << 23);
272 return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
273 }
274 }
275
276 VIXL_UNREACHABLE();
277 return 0;
278 }
279
280
FPToFloat16(double value,FPRounding round_mode)281 float16 Simulator::FPToFloat16(double value, FPRounding round_mode) {
282 // Only the FPTieEven rounding mode is implemented.
283 VIXL_ASSERT(round_mode == FPTieEven);
284 USE(round_mode);
285
286 uint64_t raw = double_to_rawbits(value);
287 int32_t sign = raw >> 63;
288 int64_t exponent = unsigned_bitextract_64(62, 52, raw) - 1023;
289 uint64_t mantissa = unsigned_bitextract_64(51, 0, raw);
290
291 switch (std::fpclassify(value)) {
292 case FP_NAN: {
293 if (IsSignallingNaN(value)) {
294 FPProcessException();
295 }
296 if (DN()) return kFP16DefaultNaN;
297
298 // Convert NaNs as the processor would:
299 // - The sign is propagated.
300 // - The payload (mantissa) is transferred as much as possible, except
301 // that the top bit is forced to '1', making the result a quiet NaN.
302 float16 result = (sign == 0) ? kFP16PositiveInfinity
303 : kFP16NegativeInfinity;
304 result |= mantissa >> (kDoubleMantissaBits - kFloat16MantissaBits);
305 result |= (1 << 9); // Force a quiet NaN;
306 return result;
307 }
308
309 case FP_ZERO:
310 return (sign == 0) ? 0 : 0x8000;
311
312 case FP_INFINITE:
313 return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
314
315 case FP_NORMAL:
316 case FP_SUBNORMAL: {
317 // Convert double-to-half as the processor would, assuming that FPCR.FZ
318 // (flush-to-zero) is not set.
319
320 // Add the implicit '1' bit to the mantissa.
321 mantissa += (UINT64_C(1) << 52);
322 return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
323 }
324 }
325
326 VIXL_UNREACHABLE();
327 return 0;
328 }
329
330
FPToFloat(double value,FPRounding round_mode)331 float Simulator::FPToFloat(double value, FPRounding round_mode) {
332 // Only the FPTieEven rounding mode is implemented.
333 VIXL_ASSERT((round_mode == FPTieEven) || (round_mode == FPRoundOdd));
334 USE(round_mode);
335
336 switch (std::fpclassify(value)) {
337 case FP_NAN: {
338 if (IsSignallingNaN(value)) {
339 FPProcessException();
340 }
341 if (DN()) return kFP32DefaultNaN;
342
343 // Convert NaNs as the processor would:
344 // - The sign is propagated.
345 // - The payload (mantissa) is transferred as much as possible, except
346 // that the top bit is forced to '1', making the result a quiet NaN.
347 uint64_t raw = double_to_rawbits(value);
348
349 uint32_t sign = raw >> 63;
350 uint32_t exponent = (1 << 8) - 1;
351 uint32_t payload =
352 static_cast<uint32_t>(unsigned_bitextract_64(50, 52 - 23, raw));
353 payload |= (1 << 22); // Force a quiet NaN.
354
355 return rawbits_to_float((sign << 31) | (exponent << 23) | payload);
356 }
357
358 case FP_ZERO:
359 case FP_INFINITE: {
360 // In a C++ cast, any value representable in the target type will be
361 // unchanged. This is always the case for +/-0.0 and infinities.
362 return static_cast<float>(value);
363 }
364
365 case FP_NORMAL:
366 case FP_SUBNORMAL: {
367 // Convert double-to-float as the processor would, assuming that FPCR.FZ
368 // (flush-to-zero) is not set.
369 uint64_t raw = double_to_rawbits(value);
370 // Extract the IEEE-754 double components.
371 uint32_t sign = raw >> 63;
372 // Extract the exponent and remove the IEEE-754 encoding bias.
373 int32_t exponent =
374 static_cast<int32_t>(unsigned_bitextract_64(62, 52, raw)) - 1023;
375 // Extract the mantissa and add the implicit '1' bit.
376 uint64_t mantissa = unsigned_bitextract_64(51, 0, raw);
377 if (std::fpclassify(value) == FP_NORMAL) {
378 mantissa |= (UINT64_C(1) << 52);
379 }
380 return FPRoundToFloat(sign, exponent, mantissa, round_mode);
381 }
382 }
383
384 VIXL_UNREACHABLE();
385 return value;
386 }
387
388
ld1(VectorFormat vform,LogicVRegister dst,uint64_t addr)389 void Simulator::ld1(VectorFormat vform,
390 LogicVRegister dst,
391 uint64_t addr) {
392 dst.ClearForWrite(vform);
393 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
394 dst.ReadUintFromMem(vform, i, addr);
395 addr += LaneSizeInBytesFromFormat(vform);
396 }
397 }
398
399
ld1(VectorFormat vform,LogicVRegister dst,int index,uint64_t addr)400 void Simulator::ld1(VectorFormat vform,
401 LogicVRegister dst,
402 int index,
403 uint64_t addr) {
404 dst.ReadUintFromMem(vform, index, addr);
405 }
406
407
ld1r(VectorFormat vform,LogicVRegister dst,uint64_t addr)408 void Simulator::ld1r(VectorFormat vform,
409 LogicVRegister dst,
410 uint64_t addr) {
411 dst.ClearForWrite(vform);
412 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
413 dst.ReadUintFromMem(vform, i, addr);
414 }
415 }
416
417
ld2(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,uint64_t addr1)418 void Simulator::ld2(VectorFormat vform,
419 LogicVRegister dst1,
420 LogicVRegister dst2,
421 uint64_t addr1) {
422 dst1.ClearForWrite(vform);
423 dst2.ClearForWrite(vform);
424 int esize = LaneSizeInBytesFromFormat(vform);
425 uint64_t addr2 = addr1 + esize;
426 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
427 dst1.ReadUintFromMem(vform, i, addr1);
428 dst2.ReadUintFromMem(vform, i, addr2);
429 addr1 += 2 * esize;
430 addr2 += 2 * esize;
431 }
432 }
433
434
ld2(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,int index,uint64_t addr1)435 void Simulator::ld2(VectorFormat vform,
436 LogicVRegister dst1,
437 LogicVRegister dst2,
438 int index,
439 uint64_t addr1) {
440 dst1.ClearForWrite(vform);
441 dst2.ClearForWrite(vform);
442 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
443 dst1.ReadUintFromMem(vform, index, addr1);
444 dst2.ReadUintFromMem(vform, index, addr2);
445 }
446
447
ld2r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,uint64_t addr)448 void Simulator::ld2r(VectorFormat vform,
449 LogicVRegister dst1,
450 LogicVRegister dst2,
451 uint64_t addr) {
452 dst1.ClearForWrite(vform);
453 dst2.ClearForWrite(vform);
454 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
455 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
456 dst1.ReadUintFromMem(vform, i, addr);
457 dst2.ReadUintFromMem(vform, i, addr2);
458 }
459 }
460
461
ld3(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,uint64_t addr1)462 void Simulator::ld3(VectorFormat vform,
463 LogicVRegister dst1,
464 LogicVRegister dst2,
465 LogicVRegister dst3,
466 uint64_t addr1) {
467 dst1.ClearForWrite(vform);
468 dst2.ClearForWrite(vform);
469 dst3.ClearForWrite(vform);
470 int esize = LaneSizeInBytesFromFormat(vform);
471 uint64_t addr2 = addr1 + esize;
472 uint64_t addr3 = addr2 + esize;
473 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
474 dst1.ReadUintFromMem(vform, i, addr1);
475 dst2.ReadUintFromMem(vform, i, addr2);
476 dst3.ReadUintFromMem(vform, i, addr3);
477 addr1 += 3 * esize;
478 addr2 += 3 * esize;
479 addr3 += 3 * esize;
480 }
481 }
482
483
ld3(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,int index,uint64_t addr1)484 void Simulator::ld3(VectorFormat vform,
485 LogicVRegister dst1,
486 LogicVRegister dst2,
487 LogicVRegister dst3,
488 int index,
489 uint64_t addr1) {
490 dst1.ClearForWrite(vform);
491 dst2.ClearForWrite(vform);
492 dst3.ClearForWrite(vform);
493 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
494 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
495 dst1.ReadUintFromMem(vform, index, addr1);
496 dst2.ReadUintFromMem(vform, index, addr2);
497 dst3.ReadUintFromMem(vform, index, addr3);
498 }
499
500
ld3r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,uint64_t addr)501 void Simulator::ld3r(VectorFormat vform,
502 LogicVRegister dst1,
503 LogicVRegister dst2,
504 LogicVRegister dst3,
505 uint64_t addr) {
506 dst1.ClearForWrite(vform);
507 dst2.ClearForWrite(vform);
508 dst3.ClearForWrite(vform);
509 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
510 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
511 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
512 dst1.ReadUintFromMem(vform, i, addr);
513 dst2.ReadUintFromMem(vform, i, addr2);
514 dst3.ReadUintFromMem(vform, i, addr3);
515 }
516 }
517
518
ld4(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,uint64_t addr1)519 void Simulator::ld4(VectorFormat vform,
520 LogicVRegister dst1,
521 LogicVRegister dst2,
522 LogicVRegister dst3,
523 LogicVRegister dst4,
524 uint64_t addr1) {
525 dst1.ClearForWrite(vform);
526 dst2.ClearForWrite(vform);
527 dst3.ClearForWrite(vform);
528 dst4.ClearForWrite(vform);
529 int esize = LaneSizeInBytesFromFormat(vform);
530 uint64_t addr2 = addr1 + esize;
531 uint64_t addr3 = addr2 + esize;
532 uint64_t addr4 = addr3 + esize;
533 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
534 dst1.ReadUintFromMem(vform, i, addr1);
535 dst2.ReadUintFromMem(vform, i, addr2);
536 dst3.ReadUintFromMem(vform, i, addr3);
537 dst4.ReadUintFromMem(vform, i, addr4);
538 addr1 += 4 * esize;
539 addr2 += 4 * esize;
540 addr3 += 4 * esize;
541 addr4 += 4 * esize;
542 }
543 }
544
545
ld4(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,int index,uint64_t addr1)546 void Simulator::ld4(VectorFormat vform,
547 LogicVRegister dst1,
548 LogicVRegister dst2,
549 LogicVRegister dst3,
550 LogicVRegister dst4,
551 int index,
552 uint64_t addr1) {
553 dst1.ClearForWrite(vform);
554 dst2.ClearForWrite(vform);
555 dst3.ClearForWrite(vform);
556 dst4.ClearForWrite(vform);
557 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
558 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
559 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
560 dst1.ReadUintFromMem(vform, index, addr1);
561 dst2.ReadUintFromMem(vform, index, addr2);
562 dst3.ReadUintFromMem(vform, index, addr3);
563 dst4.ReadUintFromMem(vform, index, addr4);
564 }
565
566
ld4r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,uint64_t addr)567 void Simulator::ld4r(VectorFormat vform,
568 LogicVRegister dst1,
569 LogicVRegister dst2,
570 LogicVRegister dst3,
571 LogicVRegister dst4,
572 uint64_t addr) {
573 dst1.ClearForWrite(vform);
574 dst2.ClearForWrite(vform);
575 dst3.ClearForWrite(vform);
576 dst4.ClearForWrite(vform);
577 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
578 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
579 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
580 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
581 dst1.ReadUintFromMem(vform, i, addr);
582 dst2.ReadUintFromMem(vform, i, addr2);
583 dst3.ReadUintFromMem(vform, i, addr3);
584 dst4.ReadUintFromMem(vform, i, addr4);
585 }
586 }
587
588
st1(VectorFormat vform,LogicVRegister src,uint64_t addr)589 void Simulator::st1(VectorFormat vform,
590 LogicVRegister src,
591 uint64_t addr) {
592 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
593 src.WriteUintToMem(vform, i, addr);
594 addr += LaneSizeInBytesFromFormat(vform);
595 }
596 }
597
598
st1(VectorFormat vform,LogicVRegister src,int index,uint64_t addr)599 void Simulator::st1(VectorFormat vform,
600 LogicVRegister src,
601 int index,
602 uint64_t addr) {
603 src.WriteUintToMem(vform, index, addr);
604 }
605
606
st2(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,uint64_t addr)607 void Simulator::st2(VectorFormat vform,
608 LogicVRegister dst,
609 LogicVRegister dst2,
610 uint64_t addr) {
611 int esize = LaneSizeInBytesFromFormat(vform);
612 uint64_t addr2 = addr + esize;
613 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
614 dst.WriteUintToMem(vform, i, addr);
615 dst2.WriteUintToMem(vform, i, addr2);
616 addr += 2 * esize;
617 addr2 += 2 * esize;
618 }
619 }
620
621
st2(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,int index,uint64_t addr)622 void Simulator::st2(VectorFormat vform,
623 LogicVRegister dst,
624 LogicVRegister dst2,
625 int index,
626 uint64_t addr) {
627 int esize = LaneSizeInBytesFromFormat(vform);
628 dst.WriteUintToMem(vform, index, addr);
629 dst2.WriteUintToMem(vform, index, addr + 1 * esize);
630 }
631
632
st3(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,LogicVRegister dst3,uint64_t addr)633 void Simulator::st3(VectorFormat vform,
634 LogicVRegister dst,
635 LogicVRegister dst2,
636 LogicVRegister dst3,
637 uint64_t addr) {
638 int esize = LaneSizeInBytesFromFormat(vform);
639 uint64_t addr2 = addr + esize;
640 uint64_t addr3 = addr2 + esize;
641 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
642 dst.WriteUintToMem(vform, i, addr);
643 dst2.WriteUintToMem(vform, i, addr2);
644 dst3.WriteUintToMem(vform, i, addr3);
645 addr += 3 * esize;
646 addr2 += 3 * esize;
647 addr3 += 3 * esize;
648 }
649 }
650
651
st3(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,LogicVRegister dst3,int index,uint64_t addr)652 void Simulator::st3(VectorFormat vform,
653 LogicVRegister dst,
654 LogicVRegister dst2,
655 LogicVRegister dst3,
656 int index,
657 uint64_t addr) {
658 int esize = LaneSizeInBytesFromFormat(vform);
659 dst.WriteUintToMem(vform, index, addr);
660 dst2.WriteUintToMem(vform, index, addr + 1 * esize);
661 dst3.WriteUintToMem(vform, index, addr + 2 * esize);
662 }
663
664
st4(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,uint64_t addr)665 void Simulator::st4(VectorFormat vform,
666 LogicVRegister dst,
667 LogicVRegister dst2,
668 LogicVRegister dst3,
669 LogicVRegister dst4,
670 uint64_t addr) {
671 int esize = LaneSizeInBytesFromFormat(vform);
672 uint64_t addr2 = addr + esize;
673 uint64_t addr3 = addr2 + esize;
674 uint64_t addr4 = addr3 + esize;
675 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
676 dst.WriteUintToMem(vform, i, addr);
677 dst2.WriteUintToMem(vform, i, addr2);
678 dst3.WriteUintToMem(vform, i, addr3);
679 dst4.WriteUintToMem(vform, i, addr4);
680 addr += 4 * esize;
681 addr2 += 4 * esize;
682 addr3 += 4 * esize;
683 addr4 += 4 * esize;
684 }
685 }
686
687
st4(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,int index,uint64_t addr)688 void Simulator::st4(VectorFormat vform,
689 LogicVRegister dst,
690 LogicVRegister dst2,
691 LogicVRegister dst3,
692 LogicVRegister dst4,
693 int index,
694 uint64_t addr) {
695 int esize = LaneSizeInBytesFromFormat(vform);
696 dst.WriteUintToMem(vform, index, addr);
697 dst2.WriteUintToMem(vform, index, addr + 1 * esize);
698 dst3.WriteUintToMem(vform, index, addr + 2 * esize);
699 dst4.WriteUintToMem(vform, index, addr + 3 * esize);
700 }
701
702
cmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)703 LogicVRegister Simulator::cmp(VectorFormat vform,
704 LogicVRegister dst,
705 const LogicVRegister& src1,
706 const LogicVRegister& src2,
707 Condition cond) {
708 dst.ClearForWrite(vform);
709 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
710 int64_t sa = src1.Int(vform, i);
711 int64_t sb = src2.Int(vform, i);
712 uint64_t ua = src1.Uint(vform, i);
713 uint64_t ub = src2.Uint(vform, i);
714 bool result = false;
715 switch (cond) {
716 case eq: result = (ua == ub); break;
717 case ge: result = (sa >= sb); break;
718 case gt: result = (sa > sb) ; break;
719 case hi: result = (ua > ub) ; break;
720 case hs: result = (ua >= ub); break;
721 case lt: result = (sa < sb) ; break;
722 case le: result = (sa <= sb); break;
723 default: VIXL_UNREACHABLE(); break;
724 }
725 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
726 }
727 return dst;
728 }
729
730
cmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,int imm,Condition cond)731 LogicVRegister Simulator::cmp(VectorFormat vform,
732 LogicVRegister dst,
733 const LogicVRegister& src1,
734 int imm,
735 Condition cond) {
736 SimVRegister temp;
737 LogicVRegister imm_reg = dup_immediate(vform, temp, imm);
738 return cmp(vform, dst, src1, imm_reg, cond);
739 }
740
741
cmptst(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)742 LogicVRegister Simulator::cmptst(VectorFormat vform,
743 LogicVRegister dst,
744 const LogicVRegister& src1,
745 const LogicVRegister& src2) {
746 dst.ClearForWrite(vform);
747 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
748 uint64_t ua = src1.Uint(vform, i);
749 uint64_t ub = src2.Uint(vform, i);
750 dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0);
751 }
752 return dst;
753 }
754
755
add(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)756 LogicVRegister Simulator::add(VectorFormat vform,
757 LogicVRegister dst,
758 const LogicVRegister& src1,
759 const LogicVRegister& src2) {
760 dst.ClearForWrite(vform);
761 // TODO(all): consider assigning the result of LaneCountFromFormat to a local.
762 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
763 // Test for unsigned saturation.
764 uint64_t ua = src1.UintLeftJustified(vform, i);
765 uint64_t ub = src2.UintLeftJustified(vform, i);
766 uint64_t ur = ua + ub;
767 if (ur < ua) {
768 dst.SetUnsignedSat(i, true);
769 }
770
771 // Test for signed saturation.
772 int64_t sa = src1.IntLeftJustified(vform, i);
773 int64_t sb = src2.IntLeftJustified(vform, i);
774 int64_t sr = sa + sb;
775 // If the signs of the operands are the same, but different from the result,
776 // there was an overflow.
777 if (((sa >= 0) == (sb >= 0)) && ((sa >= 0) != (sr >= 0))) {
778 dst.SetSignedSat(i, sa >= 0);
779 }
780
781 dst.SetInt(vform, i, src1.Int(vform, i) + src2.Int(vform, i));
782 }
783 return dst;
784 }
785
786
addp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)787 LogicVRegister Simulator::addp(VectorFormat vform,
788 LogicVRegister dst,
789 const LogicVRegister& src1,
790 const LogicVRegister& src2) {
791 SimVRegister temp1, temp2;
792 uzp1(vform, temp1, src1, src2);
793 uzp2(vform, temp2, src1, src2);
794 add(vform, dst, temp1, temp2);
795 return dst;
796 }
797
798
mla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)799 LogicVRegister Simulator::mla(VectorFormat vform,
800 LogicVRegister dst,
801 const LogicVRegister& src1,
802 const LogicVRegister& src2) {
803 SimVRegister temp;
804 mul(vform, temp, src1, src2);
805 add(vform, dst, dst, temp);
806 return dst;
807 }
808
809
mls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)810 LogicVRegister Simulator::mls(VectorFormat vform,
811 LogicVRegister dst,
812 const LogicVRegister& src1,
813 const LogicVRegister& src2) {
814 SimVRegister temp;
815 mul(vform, temp, src1, src2);
816 sub(vform, dst, dst, temp);
817 return dst;
818 }
819
820
mul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)821 LogicVRegister Simulator::mul(VectorFormat vform,
822 LogicVRegister dst,
823 const LogicVRegister& src1,
824 const LogicVRegister& src2) {
825 dst.ClearForWrite(vform);
826 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
827 dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i));
828 }
829 return dst;
830 }
831
832
mul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)833 LogicVRegister Simulator::mul(VectorFormat vform,
834 LogicVRegister dst,
835 const LogicVRegister& src1,
836 const LogicVRegister& src2,
837 int index) {
838 SimVRegister temp;
839 VectorFormat indexform = VectorFormatFillQ(vform);
840 return mul(vform, dst, src1, dup_element(indexform, temp, src2, index));
841 }
842
843
mla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)844 LogicVRegister Simulator::mla(VectorFormat vform,
845 LogicVRegister dst,
846 const LogicVRegister& src1,
847 const LogicVRegister& src2,
848 int index) {
849 SimVRegister temp;
850 VectorFormat indexform = VectorFormatFillQ(vform);
851 return mla(vform, dst, src1, dup_element(indexform, temp, src2, index));
852 }
853
854
mls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)855 LogicVRegister Simulator::mls(VectorFormat vform,
856 LogicVRegister dst,
857 const LogicVRegister& src1,
858 const LogicVRegister& src2,
859 int index) {
860 SimVRegister temp;
861 VectorFormat indexform = VectorFormatFillQ(vform);
862 return mls(vform, dst, src1, dup_element(indexform, temp, src2, index));
863 }
864
865
smull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)866 LogicVRegister Simulator::smull(VectorFormat vform,
867 LogicVRegister dst,
868 const LogicVRegister& src1,
869 const LogicVRegister& src2,
870 int index) {
871 SimVRegister temp;
872 VectorFormat indexform =
873 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
874 return smull(vform, dst, src1, dup_element(indexform, temp, src2, index));
875 }
876
877
smull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)878 LogicVRegister Simulator::smull2(VectorFormat vform,
879 LogicVRegister dst,
880 const LogicVRegister& src1,
881 const LogicVRegister& src2,
882 int index) {
883 SimVRegister temp;
884 VectorFormat indexform =
885 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
886 return smull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
887 }
888
889
umull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)890 LogicVRegister Simulator::umull(VectorFormat vform,
891 LogicVRegister dst,
892 const LogicVRegister& src1,
893 const LogicVRegister& src2,
894 int index) {
895 SimVRegister temp;
896 VectorFormat indexform =
897 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
898 return umull(vform, dst, src1, dup_element(indexform, temp, src2, index));
899 }
900
901
umull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)902 LogicVRegister Simulator::umull2(VectorFormat vform,
903 LogicVRegister dst,
904 const LogicVRegister& src1,
905 const LogicVRegister& src2,
906 int index) {
907 SimVRegister temp;
908 VectorFormat indexform =
909 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
910 return umull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
911 }
912
913
smlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)914 LogicVRegister Simulator::smlal(VectorFormat vform,
915 LogicVRegister dst,
916 const LogicVRegister& src1,
917 const LogicVRegister& src2,
918 int index) {
919 SimVRegister temp;
920 VectorFormat indexform =
921 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
922 return smlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
923 }
924
925
smlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)926 LogicVRegister Simulator::smlal2(VectorFormat vform,
927 LogicVRegister dst,
928 const LogicVRegister& src1,
929 const LogicVRegister& src2,
930 int index) {
931 SimVRegister temp;
932 VectorFormat indexform =
933 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
934 return smlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
935 }
936
937
umlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)938 LogicVRegister Simulator::umlal(VectorFormat vform,
939 LogicVRegister dst,
940 const LogicVRegister& src1,
941 const LogicVRegister& src2,
942 int index) {
943 SimVRegister temp;
944 VectorFormat indexform =
945 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
946 return umlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
947 }
948
949
umlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)950 LogicVRegister Simulator::umlal2(VectorFormat vform,
951 LogicVRegister dst,
952 const LogicVRegister& src1,
953 const LogicVRegister& src2,
954 int index) {
955 SimVRegister temp;
956 VectorFormat indexform =
957 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
958 return umlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
959 }
960
961
smlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)962 LogicVRegister Simulator::smlsl(VectorFormat vform,
963 LogicVRegister dst,
964 const LogicVRegister& src1,
965 const LogicVRegister& src2,
966 int index) {
967 SimVRegister temp;
968 VectorFormat indexform =
969 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
970 return smlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
971 }
972
973
smlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)974 LogicVRegister Simulator::smlsl2(VectorFormat vform,
975 LogicVRegister dst,
976 const LogicVRegister& src1,
977 const LogicVRegister& src2,
978 int index) {
979 SimVRegister temp;
980 VectorFormat indexform =
981 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
982 return smlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
983 }
984
985
umlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)986 LogicVRegister Simulator::umlsl(VectorFormat vform,
987 LogicVRegister dst,
988 const LogicVRegister& src1,
989 const LogicVRegister& src2,
990 int index) {
991 SimVRegister temp;
992 VectorFormat indexform =
993 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
994 return umlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
995 }
996
997
umlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)998 LogicVRegister Simulator::umlsl2(VectorFormat vform,
999 LogicVRegister dst,
1000 const LogicVRegister& src1,
1001 const LogicVRegister& src2,
1002 int index) {
1003 SimVRegister temp;
1004 VectorFormat indexform =
1005 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1006 return umlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
1007 }
1008
1009
sqdmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1010 LogicVRegister Simulator::sqdmull(VectorFormat vform,
1011 LogicVRegister dst,
1012 const LogicVRegister& src1,
1013 const LogicVRegister& src2,
1014 int index) {
1015 SimVRegister temp;
1016 VectorFormat indexform =
1017 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1018 return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index));
1019 }
1020
1021
sqdmull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1022 LogicVRegister Simulator::sqdmull2(VectorFormat vform,
1023 LogicVRegister dst,
1024 const LogicVRegister& src1,
1025 const LogicVRegister& src2,
1026 int index) {
1027 SimVRegister temp;
1028 VectorFormat indexform =
1029 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1030 return sqdmull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
1031 }
1032
1033
sqdmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1034 LogicVRegister Simulator::sqdmlal(VectorFormat vform,
1035 LogicVRegister dst,
1036 const LogicVRegister& src1,
1037 const LogicVRegister& src2,
1038 int index) {
1039 SimVRegister temp;
1040 VectorFormat indexform =
1041 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1042 return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
1043 }
1044
1045
sqdmlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1046 LogicVRegister Simulator::sqdmlal2(VectorFormat vform,
1047 LogicVRegister dst,
1048 const LogicVRegister& src1,
1049 const LogicVRegister& src2,
1050 int index) {
1051 SimVRegister temp;
1052 VectorFormat indexform =
1053 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1054 return sqdmlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
1055 }
1056
1057
sqdmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1058 LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
1059 LogicVRegister dst,
1060 const LogicVRegister& src1,
1061 const LogicVRegister& src2,
1062 int index) {
1063 SimVRegister temp;
1064 VectorFormat indexform =
1065 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1066 return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
1067 }
1068
1069
sqdmlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1070 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform,
1071 LogicVRegister dst,
1072 const LogicVRegister& src1,
1073 const LogicVRegister& src2,
1074 int index) {
1075 SimVRegister temp;
1076 VectorFormat indexform =
1077 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1078 return sqdmlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
1079 }
1080
1081
sqdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1082 LogicVRegister Simulator::sqdmulh(VectorFormat vform,
1083 LogicVRegister dst,
1084 const LogicVRegister& src1,
1085 const LogicVRegister& src2,
1086 int index) {
1087 SimVRegister temp;
1088 VectorFormat indexform = VectorFormatFillQ(vform);
1089 return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
1090 }
1091
1092
sqrdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1093 LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
1094 LogicVRegister dst,
1095 const LogicVRegister& src1,
1096 const LogicVRegister& src2,
1097 int index) {
1098 SimVRegister temp;
1099 VectorFormat indexform = VectorFormatFillQ(vform);
1100 return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
1101 }
1102
1103
PolynomialMult(uint8_t op1,uint8_t op2)1104 uint16_t Simulator::PolynomialMult(uint8_t op1, uint8_t op2) {
1105 uint16_t result = 0;
1106 uint16_t extended_op2 = op2;
1107 for (int i = 0; i < 8; ++i) {
1108 if ((op1 >> i) & 1) {
1109 result = result ^ (extended_op2 << i);
1110 }
1111 }
1112 return result;
1113 }
1114
1115
pmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1116 LogicVRegister Simulator::pmul(VectorFormat vform,
1117 LogicVRegister dst,
1118 const LogicVRegister& src1,
1119 const LogicVRegister& src2) {
1120 dst.ClearForWrite(vform);
1121 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1122 dst.SetUint(vform, i,
1123 PolynomialMult(src1.Uint(vform, i), src2.Uint(vform, i)));
1124 }
1125 return dst;
1126 }
1127
1128
pmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1129 LogicVRegister Simulator::pmull(VectorFormat vform,
1130 LogicVRegister dst,
1131 const LogicVRegister& src1,
1132 const LogicVRegister& src2) {
1133 VectorFormat vform_src = VectorFormatHalfWidth(vform);
1134 dst.ClearForWrite(vform);
1135 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1136 dst.SetUint(vform, i, PolynomialMult(src1.Uint(vform_src, i),
1137 src2.Uint(vform_src, i)));
1138 }
1139 return dst;
1140 }
1141
1142
pmull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1143 LogicVRegister Simulator::pmull2(VectorFormat vform,
1144 LogicVRegister dst,
1145 const LogicVRegister& src1,
1146 const LogicVRegister& src2) {
1147 VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform);
1148 dst.ClearForWrite(vform);
1149 int lane_count = LaneCountFromFormat(vform);
1150 for (int i = 0; i < lane_count; i++) {
1151 dst.SetUint(vform, i, PolynomialMult(src1.Uint(vform_src, lane_count + i),
1152 src2.Uint(vform_src, lane_count + i)));
1153 }
1154 return dst;
1155 }
1156
1157
sub(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1158 LogicVRegister Simulator::sub(VectorFormat vform,
1159 LogicVRegister dst,
1160 const LogicVRegister& src1,
1161 const LogicVRegister& src2) {
1162 dst.ClearForWrite(vform);
1163 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1164 // Test for unsigned saturation.
1165 if (src2.Uint(vform, i) > src1.Uint(vform, i)) {
1166 dst.SetUnsignedSat(i, false);
1167 }
1168
1169 // Test for signed saturation.
1170 int64_t sa = src1.IntLeftJustified(vform, i);
1171 int64_t sb = src2.IntLeftJustified(vform, i);
1172 int64_t sr = sa - sb;
1173 // If the signs of the operands are different, and the sign of the first
1174 // operand doesn't match the result, there was an overflow.
1175 if (((sa >= 0) != (sb >= 0)) && ((sa >= 0) != (sr >= 0))) {
1176 dst.SetSignedSat(i, sr < 0);
1177 }
1178
1179 dst.SetInt(vform, i, src1.Int(vform, i) - src2.Int(vform, i));
1180 }
1181 return dst;
1182 }
1183
1184
and_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1185 LogicVRegister Simulator::and_(VectorFormat vform,
1186 LogicVRegister dst,
1187 const LogicVRegister& src1,
1188 const LogicVRegister& src2) {
1189 dst.ClearForWrite(vform);
1190 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1191 dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i));
1192 }
1193 return dst;
1194 }
1195
1196
orr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1197 LogicVRegister Simulator::orr(VectorFormat vform,
1198 LogicVRegister dst,
1199 const LogicVRegister& src1,
1200 const LogicVRegister& src2) {
1201 dst.ClearForWrite(vform);
1202 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1203 dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i));
1204 }
1205 return dst;
1206 }
1207
1208
orn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1209 LogicVRegister Simulator::orn(VectorFormat vform,
1210 LogicVRegister dst,
1211 const LogicVRegister& src1,
1212 const LogicVRegister& src2) {
1213 dst.ClearForWrite(vform);
1214 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1215 dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i));
1216 }
1217 return dst;
1218 }
1219
1220
eor(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1221 LogicVRegister Simulator::eor(VectorFormat vform,
1222 LogicVRegister dst,
1223 const LogicVRegister& src1,
1224 const LogicVRegister& src2) {
1225 dst.ClearForWrite(vform);
1226 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1227 dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i));
1228 }
1229 return dst;
1230 }
1231
1232
bic(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1233 LogicVRegister Simulator::bic(VectorFormat vform,
1234 LogicVRegister dst,
1235 const LogicVRegister& src1,
1236 const LogicVRegister& src2) {
1237 dst.ClearForWrite(vform);
1238 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1239 dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i));
1240 }
1241 return dst;
1242 }
1243
1244
bic(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,uint64_t imm)1245 LogicVRegister Simulator::bic(VectorFormat vform,
1246 LogicVRegister dst,
1247 const LogicVRegister& src,
1248 uint64_t imm) {
1249 uint64_t result[16];
1250 int laneCount = LaneCountFromFormat(vform);
1251 for (int i = 0; i < laneCount; ++i) {
1252 result[i] = src.Uint(vform, i) & ~imm;
1253 }
1254 dst.ClearForWrite(vform);
1255 for (int i = 0; i < laneCount; ++i) {
1256 dst.SetUint(vform, i, result[i]);
1257 }
1258 return dst;
1259 }
1260
1261
bif(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1262 LogicVRegister Simulator::bif(VectorFormat vform,
1263 LogicVRegister dst,
1264 const LogicVRegister& src1,
1265 const LogicVRegister& src2) {
1266 dst.ClearForWrite(vform);
1267 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1268 uint64_t operand1 = dst.Uint(vform, i);
1269 uint64_t operand2 = ~src2.Uint(vform, i);
1270 uint64_t operand3 = src1.Uint(vform, i);
1271 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1272 dst.SetUint(vform, i, result);
1273 }
1274 return dst;
1275 }
1276
1277
bit(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1278 LogicVRegister Simulator::bit(VectorFormat vform,
1279 LogicVRegister dst,
1280 const LogicVRegister& src1,
1281 const LogicVRegister& src2) {
1282 dst.ClearForWrite(vform);
1283 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1284 uint64_t operand1 = dst.Uint(vform, i);
1285 uint64_t operand2 = src2.Uint(vform, i);
1286 uint64_t operand3 = src1.Uint(vform, i);
1287 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1288 dst.SetUint(vform, i, result);
1289 }
1290 return dst;
1291 }
1292
1293
bsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1294 LogicVRegister Simulator::bsl(VectorFormat vform,
1295 LogicVRegister dst,
1296 const LogicVRegister& src1,
1297 const LogicVRegister& src2) {
1298 dst.ClearForWrite(vform);
1299 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1300 uint64_t operand1 = src2.Uint(vform, i);
1301 uint64_t operand2 = dst.Uint(vform, i);
1302 uint64_t operand3 = src1.Uint(vform, i);
1303 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1304 dst.SetUint(vform, i, result);
1305 }
1306 return dst;
1307 }
1308
1309
sminmax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1310 LogicVRegister Simulator::sminmax(VectorFormat vform,
1311 LogicVRegister dst,
1312 const LogicVRegister& src1,
1313 const LogicVRegister& src2,
1314 bool max) {
1315 dst.ClearForWrite(vform);
1316 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1317 int64_t src1_val = src1.Int(vform, i);
1318 int64_t src2_val = src2.Int(vform, i);
1319 int64_t dst_val;
1320 if (max == true) {
1321 dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1322 } else {
1323 dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1324 }
1325 dst.SetInt(vform, i, dst_val);
1326 }
1327 return dst;
1328 }
1329
1330
smax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1331 LogicVRegister Simulator::smax(VectorFormat vform,
1332 LogicVRegister dst,
1333 const LogicVRegister& src1,
1334 const LogicVRegister& src2) {
1335 return sminmax(vform, dst, src1, src2, true);
1336 }
1337
1338
smin(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1339 LogicVRegister Simulator::smin(VectorFormat vform,
1340 LogicVRegister dst,
1341 const LogicVRegister& src1,
1342 const LogicVRegister& src2) {
1343 return sminmax(vform, dst, src1, src2, false);
1344 }
1345
1346
sminmaxp(VectorFormat vform,LogicVRegister dst,int dst_index,const LogicVRegister & src,bool max)1347 LogicVRegister Simulator::sminmaxp(VectorFormat vform,
1348 LogicVRegister dst,
1349 int dst_index,
1350 const LogicVRegister& src,
1351 bool max) {
1352 for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {
1353 int64_t src1_val = src.Int(vform, i);
1354 int64_t src2_val = src.Int(vform, i + 1);
1355 int64_t dst_val;
1356 if (max == true) {
1357 dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1358 } else {
1359 dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1360 }
1361 dst.SetInt(vform, dst_index + (i >> 1), dst_val);
1362 }
1363 return dst;
1364 }
1365
1366
smaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1367 LogicVRegister Simulator::smaxp(VectorFormat vform,
1368 LogicVRegister dst,
1369 const LogicVRegister& src1,
1370 const LogicVRegister& src2) {
1371 dst.ClearForWrite(vform);
1372 sminmaxp(vform, dst, 0, src1, true);
1373 sminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, true);
1374 return dst;
1375 }
1376
1377
sminp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1378 LogicVRegister Simulator::sminp(VectorFormat vform,
1379 LogicVRegister dst,
1380 const LogicVRegister& src1,
1381 const LogicVRegister& src2) {
1382 dst.ClearForWrite(vform);
1383 sminmaxp(vform, dst, 0, src1, false);
1384 sminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, false);
1385 return dst;
1386 }
1387
1388
addp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1389 LogicVRegister Simulator::addp(VectorFormat vform,
1390 LogicVRegister dst,
1391 const LogicVRegister& src) {
1392 VIXL_ASSERT(vform == kFormatD);
1393
1394 int64_t dst_val = src.Int(kFormat2D, 0) + src.Int(kFormat2D, 1);
1395 dst.ClearForWrite(vform);
1396 dst.SetInt(vform, 0, dst_val);
1397 return dst;
1398 }
1399
1400
addv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1401 LogicVRegister Simulator::addv(VectorFormat vform,
1402 LogicVRegister dst,
1403 const LogicVRegister& src) {
1404 VectorFormat vform_dst
1405 = ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
1406
1407
1408 int64_t dst_val = 0;
1409 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1410 dst_val += src.Int(vform, i);
1411 }
1412
1413 dst.ClearForWrite(vform_dst);
1414 dst.SetInt(vform_dst, 0, dst_val);
1415 return dst;
1416 }
1417
1418
saddlv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1419 LogicVRegister Simulator::saddlv(VectorFormat vform,
1420 LogicVRegister dst,
1421 const LogicVRegister& src) {
1422 VectorFormat vform_dst
1423 = ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1424
1425 int64_t dst_val = 0;
1426 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1427 dst_val += src.Int(vform, i);
1428 }
1429
1430 dst.ClearForWrite(vform_dst);
1431 dst.SetInt(vform_dst, 0, dst_val);
1432 return dst;
1433 }
1434
1435
uaddlv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1436 LogicVRegister Simulator::uaddlv(VectorFormat vform,
1437 LogicVRegister dst,
1438 const LogicVRegister& src) {
1439 VectorFormat vform_dst
1440 = ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1441
1442 uint64_t dst_val = 0;
1443 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1444 dst_val += src.Uint(vform, i);
1445 }
1446
1447 dst.ClearForWrite(vform_dst);
1448 dst.SetUint(vform_dst, 0, dst_val);
1449 return dst;
1450 }
1451
1452
sminmaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool max)1453 LogicVRegister Simulator::sminmaxv(VectorFormat vform,
1454 LogicVRegister dst,
1455 const LogicVRegister& src,
1456 bool max) {
1457 dst.ClearForWrite(vform);
1458 int64_t dst_val = max ? INT64_MIN : INT64_MAX;
1459 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1460 dst.SetInt(vform, i, 0);
1461 int64_t src_val = src.Int(vform, i);
1462 if (max == true) {
1463 dst_val = (src_val > dst_val) ? src_val : dst_val;
1464 } else {
1465 dst_val = (src_val < dst_val) ? src_val : dst_val;
1466 }
1467 }
1468 dst.SetInt(vform, 0, dst_val);
1469 return dst;
1470 }
1471
1472
smaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1473 LogicVRegister Simulator::smaxv(VectorFormat vform,
1474 LogicVRegister dst,
1475 const LogicVRegister& src) {
1476 sminmaxv(vform, dst, src, true);
1477 return dst;
1478 }
1479
1480
sminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1481 LogicVRegister Simulator::sminv(VectorFormat vform,
1482 LogicVRegister dst,
1483 const LogicVRegister& src) {
1484 sminmaxv(vform, dst, src, false);
1485 return dst;
1486 }
1487
1488
uminmax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1489 LogicVRegister Simulator::uminmax(VectorFormat vform,
1490 LogicVRegister dst,
1491 const LogicVRegister& src1,
1492 const LogicVRegister& src2,
1493 bool max) {
1494 dst.ClearForWrite(vform);
1495 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1496 uint64_t src1_val = src1.Uint(vform, i);
1497 uint64_t src2_val = src2.Uint(vform, i);
1498 uint64_t dst_val;
1499 if (max == true) {
1500 dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1501 } else {
1502 dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1503 }
1504 dst.SetUint(vform, i, dst_val);
1505 }
1506 return dst;
1507 }
1508
1509
umax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1510 LogicVRegister Simulator::umax(VectorFormat vform,
1511 LogicVRegister dst,
1512 const LogicVRegister& src1,
1513 const LogicVRegister& src2) {
1514 return uminmax(vform, dst, src1, src2, true);
1515 }
1516
1517
umin(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1518 LogicVRegister Simulator::umin(VectorFormat vform,
1519 LogicVRegister dst,
1520 const LogicVRegister& src1,
1521 const LogicVRegister& src2) {
1522 return uminmax(vform, dst, src1, src2, false);
1523 }
1524
1525
uminmaxp(VectorFormat vform,LogicVRegister dst,int dst_index,const LogicVRegister & src,bool max)1526 LogicVRegister Simulator::uminmaxp(VectorFormat vform,
1527 LogicVRegister dst,
1528 int dst_index,
1529 const LogicVRegister& src,
1530 bool max) {
1531 for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {
1532 uint64_t src1_val = src.Uint(vform, i);
1533 uint64_t src2_val = src.Uint(vform, i + 1);
1534 uint64_t dst_val;
1535 if (max == true) {
1536 dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1537 } else {
1538 dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1539 }
1540 dst.SetUint(vform, dst_index + (i >> 1), dst_val);
1541 }
1542 return dst;
1543 }
1544
1545
umaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1546 LogicVRegister Simulator::umaxp(VectorFormat vform,
1547 LogicVRegister dst,
1548 const LogicVRegister& src1,
1549 const LogicVRegister& src2) {
1550 dst.ClearForWrite(vform);
1551 uminmaxp(vform, dst, 0, src1, true);
1552 uminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, true);
1553 return dst;
1554 }
1555
1556
uminp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1557 LogicVRegister Simulator::uminp(VectorFormat vform,
1558 LogicVRegister dst,
1559 const LogicVRegister& src1,
1560 const LogicVRegister& src2) {
1561 dst.ClearForWrite(vform);
1562 uminmaxp(vform, dst, 0, src1, false);
1563 uminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, false);
1564 return dst;
1565 }
1566
1567
uminmaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool max)1568 LogicVRegister Simulator::uminmaxv(VectorFormat vform,
1569 LogicVRegister dst,
1570 const LogicVRegister& src,
1571 bool max) {
1572 dst.ClearForWrite(vform);
1573 uint64_t dst_val = max ? 0 : UINT64_MAX;
1574 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1575 dst.SetUint(vform, i, 0);
1576 uint64_t src_val = src.Uint(vform, i);
1577 if (max == true) {
1578 dst_val = (src_val > dst_val) ? src_val : dst_val;
1579 } else {
1580 dst_val = (src_val < dst_val) ? src_val : dst_val;
1581 }
1582 }
1583 dst.SetUint(vform, 0, dst_val);
1584 return dst;
1585 }
1586
1587
umaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1588 LogicVRegister Simulator::umaxv(VectorFormat vform,
1589 LogicVRegister dst,
1590 const LogicVRegister& src) {
1591 uminmaxv(vform, dst, src, true);
1592 return dst;
1593 }
1594
1595
uminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1596 LogicVRegister Simulator::uminv(VectorFormat vform,
1597 LogicVRegister dst,
1598 const LogicVRegister& src) {
1599 uminmaxv(vform, dst, src, false);
1600 return dst;
1601 }
1602
1603
shl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1604 LogicVRegister Simulator::shl(VectorFormat vform,
1605 LogicVRegister dst,
1606 const LogicVRegister& src,
1607 int shift) {
1608 VIXL_ASSERT(shift >= 0);
1609 SimVRegister temp;
1610 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1611 return ushl(vform, dst, src, shiftreg);
1612 }
1613
1614
sshll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1615 LogicVRegister Simulator::sshll(VectorFormat vform,
1616 LogicVRegister dst,
1617 const LogicVRegister& src,
1618 int shift) {
1619 VIXL_ASSERT(shift >= 0);
1620 SimVRegister temp1, temp2;
1621 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1622 LogicVRegister extendedreg = sxtl(vform, temp2, src);
1623 return sshl(vform, dst, extendedreg, shiftreg);
1624 }
1625
1626
sshll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1627 LogicVRegister Simulator::sshll2(VectorFormat vform,
1628 LogicVRegister dst,
1629 const LogicVRegister& src,
1630 int shift) {
1631 VIXL_ASSERT(shift >= 0);
1632 SimVRegister temp1, temp2;
1633 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1634 LogicVRegister extendedreg = sxtl2(vform, temp2, src);
1635 return sshl(vform, dst, extendedreg, shiftreg);
1636 }
1637
1638
shll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1639 LogicVRegister Simulator::shll(VectorFormat vform,
1640 LogicVRegister dst,
1641 const LogicVRegister& src) {
1642 int shift = LaneSizeInBitsFromFormat(vform) / 2;
1643 return sshll(vform, dst, src, shift);
1644 }
1645
1646
shll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1647 LogicVRegister Simulator::shll2(VectorFormat vform,
1648 LogicVRegister dst,
1649 const LogicVRegister& src) {
1650 int shift = LaneSizeInBitsFromFormat(vform) / 2;
1651 return sshll2(vform, dst, src, shift);
1652 }
1653
1654
ushll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1655 LogicVRegister Simulator::ushll(VectorFormat vform,
1656 LogicVRegister dst,
1657 const LogicVRegister& src,
1658 int shift) {
1659 VIXL_ASSERT(shift >= 0);
1660 SimVRegister temp1, temp2;
1661 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1662 LogicVRegister extendedreg = uxtl(vform, temp2, src);
1663 return ushl(vform, dst, extendedreg, shiftreg);
1664 }
1665
1666
ushll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1667 LogicVRegister Simulator::ushll2(VectorFormat vform,
1668 LogicVRegister dst,
1669 const LogicVRegister& src,
1670 int shift) {
1671 VIXL_ASSERT(shift >= 0);
1672 SimVRegister temp1, temp2;
1673 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1674 LogicVRegister extendedreg = uxtl2(vform, temp2, src);
1675 return ushl(vform, dst, extendedreg, shiftreg);
1676 }
1677
1678
sli(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1679 LogicVRegister Simulator::sli(VectorFormat vform,
1680 LogicVRegister dst,
1681 const LogicVRegister& src,
1682 int shift) {
1683 dst.ClearForWrite(vform);
1684 int laneCount = LaneCountFromFormat(vform);
1685 for (int i = 0; i < laneCount; i++) {
1686 uint64_t src_lane = src.Uint(vform, i);
1687 uint64_t dst_lane = dst.Uint(vform, i);
1688 uint64_t shifted = src_lane << shift;
1689 uint64_t mask = MaxUintFromFormat(vform) << shift;
1690 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1691 }
1692 return dst;
1693 }
1694
1695
sqshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1696 LogicVRegister Simulator::sqshl(VectorFormat vform,
1697 LogicVRegister dst,
1698 const LogicVRegister& src,
1699 int shift) {
1700 VIXL_ASSERT(shift >= 0);
1701 SimVRegister temp;
1702 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1703 return sshl(vform, dst, src, shiftreg).SignedSaturate(vform);
1704 }
1705
1706
uqshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1707 LogicVRegister Simulator::uqshl(VectorFormat vform,
1708 LogicVRegister dst,
1709 const LogicVRegister& src,
1710 int shift) {
1711 VIXL_ASSERT(shift >= 0);
1712 SimVRegister temp;
1713 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1714 return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1715 }
1716
1717
sqshlu(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1718 LogicVRegister Simulator::sqshlu(VectorFormat vform,
1719 LogicVRegister dst,
1720 const LogicVRegister& src,
1721 int shift) {
1722 VIXL_ASSERT(shift >= 0);
1723 SimVRegister temp;
1724 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1725 return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1726 }
1727
1728
sri(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1729 LogicVRegister Simulator::sri(VectorFormat vform,
1730 LogicVRegister dst,
1731 const LogicVRegister& src,
1732 int shift) {
1733 dst.ClearForWrite(vform);
1734 int laneCount = LaneCountFromFormat(vform);
1735 VIXL_ASSERT((shift > 0) &&
1736 (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform))));
1737 for (int i = 0; i < laneCount; i++) {
1738 uint64_t src_lane = src.Uint(vform, i);
1739 uint64_t dst_lane = dst.Uint(vform, i);
1740 uint64_t shifted;
1741 uint64_t mask;
1742 if (shift == 64) {
1743 shifted = 0;
1744 mask = 0;
1745 } else {
1746 shifted = src_lane >> shift;
1747 mask = MaxUintFromFormat(vform) >> shift;
1748 }
1749 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1750 }
1751 return dst;
1752 }
1753
1754
ushr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1755 LogicVRegister Simulator::ushr(VectorFormat vform,
1756 LogicVRegister dst,
1757 const LogicVRegister& src,
1758 int shift) {
1759 VIXL_ASSERT(shift >= 0);
1760 SimVRegister temp;
1761 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1762 return ushl(vform, dst, src, shiftreg);
1763 }
1764
1765
sshr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1766 LogicVRegister Simulator::sshr(VectorFormat vform,
1767 LogicVRegister dst,
1768 const LogicVRegister& src,
1769 int shift) {
1770 VIXL_ASSERT(shift >= 0);
1771 SimVRegister temp;
1772 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1773 return sshl(vform, dst, src, shiftreg);
1774 }
1775
1776
ssra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1777 LogicVRegister Simulator::ssra(VectorFormat vform,
1778 LogicVRegister dst,
1779 const LogicVRegister& src,
1780 int shift) {
1781 SimVRegister temp;
1782 LogicVRegister shifted_reg = sshr(vform, temp, src, shift);
1783 return add(vform, dst, dst, shifted_reg);
1784 }
1785
1786
usra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1787 LogicVRegister Simulator::usra(VectorFormat vform,
1788 LogicVRegister dst,
1789 const LogicVRegister& src,
1790 int shift) {
1791 SimVRegister temp;
1792 LogicVRegister shifted_reg = ushr(vform, temp, src, shift);
1793 return add(vform, dst, dst, shifted_reg);
1794 }
1795
1796
srsra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1797 LogicVRegister Simulator::srsra(VectorFormat vform,
1798 LogicVRegister dst,
1799 const LogicVRegister& src,
1800 int shift) {
1801 SimVRegister temp;
1802 LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform);
1803 return add(vform, dst, dst, shifted_reg);
1804 }
1805
1806
ursra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1807 LogicVRegister Simulator::ursra(VectorFormat vform,
1808 LogicVRegister dst,
1809 const LogicVRegister& src,
1810 int shift) {
1811 SimVRegister temp;
1812 LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform);
1813 return add(vform, dst, dst, shifted_reg);
1814 }
1815
1816
cls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1817 LogicVRegister Simulator::cls(VectorFormat vform,
1818 LogicVRegister dst,
1819 const LogicVRegister& src) {
1820 uint64_t result[16];
1821 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1822 int laneCount = LaneCountFromFormat(vform);
1823 for (int i = 0; i < laneCount; i++) {
1824 result[i] = CountLeadingSignBits(src.Int(vform, i), laneSizeInBits);
1825 }
1826
1827 dst.ClearForWrite(vform);
1828 for (int i = 0; i < laneCount; ++i) {
1829 dst.SetUint(vform, i, result[i]);
1830 }
1831 return dst;
1832 }
1833
1834
clz(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1835 LogicVRegister Simulator::clz(VectorFormat vform,
1836 LogicVRegister dst,
1837 const LogicVRegister& src) {
1838 uint64_t result[16];
1839 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1840 int laneCount = LaneCountFromFormat(vform);
1841 for (int i = 0; i < laneCount; i++) {
1842 result[i] = CountLeadingZeros(src.Uint(vform, i), laneSizeInBits);
1843 }
1844
1845 dst.ClearForWrite(vform);
1846 for (int i = 0; i < laneCount; ++i) {
1847 dst.SetUint(vform, i, result[i]);
1848 }
1849 return dst;
1850 }
1851
1852
cnt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1853 LogicVRegister Simulator::cnt(VectorFormat vform,
1854 LogicVRegister dst,
1855 const LogicVRegister& src) {
1856 uint64_t result[16];
1857 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1858 int laneCount = LaneCountFromFormat(vform);
1859 for (int i = 0; i < laneCount; i++) {
1860 uint64_t value = src.Uint(vform, i);
1861 result[i] = 0;
1862 for (int j = 0; j < laneSizeInBits; j++) {
1863 result[i] += (value & 1);
1864 value >>= 1;
1865 }
1866 }
1867
1868 dst.ClearForWrite(vform);
1869 for (int i = 0; i < laneCount; ++i) {
1870 dst.SetUint(vform, i, result[i]);
1871 }
1872 return dst;
1873 }
1874
1875
sshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1876 LogicVRegister Simulator::sshl(VectorFormat vform,
1877 LogicVRegister dst,
1878 const LogicVRegister& src1,
1879 const LogicVRegister& src2) {
1880 dst.ClearForWrite(vform);
1881 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1882 int8_t shift_val = src2.Int(vform, i);
1883 int64_t lj_src_val = src1.IntLeftJustified(vform, i);
1884
1885 // Set signed saturation state.
1886 if ((shift_val > CountLeadingSignBits(lj_src_val)) &&
1887 (lj_src_val != 0)) {
1888 dst.SetSignedSat(i, lj_src_val >= 0);
1889 }
1890
1891 // Set unsigned saturation state.
1892 if (lj_src_val < 0) {
1893 dst.SetUnsignedSat(i, false);
1894 } else if ((shift_val > CountLeadingZeros(lj_src_val)) &&
1895 (lj_src_val != 0)) {
1896 dst.SetUnsignedSat(i, true);
1897 }
1898
1899 int64_t src_val = src1.Int(vform, i);
1900 if (shift_val > 63) {
1901 dst.SetInt(vform, i, 0);
1902 } else if (shift_val < -63) {
1903 dst.SetRounding(i, src_val < 0);
1904 dst.SetInt(vform, i, (src_val < 0) ? -1 : 0);
1905 } else {
1906 if (shift_val < 0) {
1907 // Set rounding state. Rounding only needed on right shifts.
1908 if (((src_val >> (-shift_val - 1)) & 1) == 1) {
1909 dst.SetRounding(i, true);
1910 }
1911 src_val >>= -shift_val;
1912 } else {
1913 src_val <<= shift_val;
1914 }
1915 dst.SetInt(vform, i, src_val);
1916 }
1917 }
1918 return dst;
1919 }
1920
1921
ushl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1922 LogicVRegister Simulator::ushl(VectorFormat vform,
1923 LogicVRegister dst,
1924 const LogicVRegister& src1,
1925 const LogicVRegister& src2) {
1926 dst.ClearForWrite(vform);
1927 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1928 int8_t shift_val = src2.Int(vform, i);
1929 uint64_t lj_src_val = src1.UintLeftJustified(vform, i);
1930
1931 // Set saturation state.
1932 if ((shift_val > CountLeadingZeros(lj_src_val)) && (lj_src_val != 0)) {
1933 dst.SetUnsignedSat(i, true);
1934 }
1935
1936 uint64_t src_val = src1.Uint(vform, i);
1937 if ((shift_val > 63) || (shift_val < -64)) {
1938 dst.SetUint(vform, i, 0);
1939 } else {
1940 if (shift_val < 0) {
1941 // Set rounding state. Rounding only needed on right shifts.
1942 if (((src_val >> (-shift_val - 1)) & 1) == 1) {
1943 dst.SetRounding(i, true);
1944 }
1945
1946 if (shift_val == -64) {
1947 src_val = 0;
1948 } else {
1949 src_val >>= -shift_val;
1950 }
1951 } else {
1952 src_val <<= shift_val;
1953 }
1954 dst.SetUint(vform, i, src_val);
1955 }
1956 }
1957 return dst;
1958 }
1959
1960
neg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1961 LogicVRegister Simulator::neg(VectorFormat vform,
1962 LogicVRegister dst,
1963 const LogicVRegister& src) {
1964 dst.ClearForWrite(vform);
1965 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1966 // Test for signed saturation.
1967 int64_t sa = src.Int(vform, i);
1968 if (sa == MinIntFromFormat(vform)) {
1969 dst.SetSignedSat(i, true);
1970 }
1971 dst.SetInt(vform, i, -sa);
1972 }
1973 return dst;
1974 }
1975
1976
suqadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1977 LogicVRegister Simulator::suqadd(VectorFormat vform,
1978 LogicVRegister dst,
1979 const LogicVRegister& src) {
1980 dst.ClearForWrite(vform);
1981 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1982 int64_t sa = dst.IntLeftJustified(vform, i);
1983 uint64_t ub = src.UintLeftJustified(vform, i);
1984 int64_t sr = sa + ub;
1985
1986 if (sr < sa) { // Test for signed positive saturation.
1987 dst.SetInt(vform, i, MaxIntFromFormat(vform));
1988 } else {
1989 dst.SetInt(vform, i, dst.Int(vform, i) + src.Int(vform, i));
1990 }
1991 }
1992 return dst;
1993 }
1994
1995
usqadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1996 LogicVRegister Simulator::usqadd(VectorFormat vform,
1997 LogicVRegister dst,
1998 const LogicVRegister& src) {
1999 dst.ClearForWrite(vform);
2000 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2001 uint64_t ua = dst.UintLeftJustified(vform, i);
2002 int64_t sb = src.IntLeftJustified(vform, i);
2003 uint64_t ur = ua + sb;
2004
2005 if ((sb > 0) && (ur <= ua)) {
2006 dst.SetUint(vform, i, MaxUintFromFormat(vform)); // Positive saturation.
2007 } else if ((sb < 0) && (ur >= ua)) {
2008 dst.SetUint(vform, i, 0); // Negative saturation.
2009 } else {
2010 dst.SetUint(vform, i, dst.Uint(vform, i) + src.Int(vform, i));
2011 }
2012 }
2013 return dst;
2014 }
2015
2016
abs(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2017 LogicVRegister Simulator::abs(VectorFormat vform,
2018 LogicVRegister dst,
2019 const LogicVRegister& src) {
2020 dst.ClearForWrite(vform);
2021 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2022 // Test for signed saturation.
2023 int64_t sa = src.Int(vform, i);
2024 if (sa == MinIntFromFormat(vform)) {
2025 dst.SetSignedSat(i, true);
2026 }
2027 if (sa < 0) {
2028 dst.SetInt(vform, i, -sa);
2029 } else {
2030 dst.SetInt(vform, i, sa);
2031 }
2032 }
2033 return dst;
2034 }
2035
2036
extractnarrow(VectorFormat dstform,LogicVRegister dst,bool dstIsSigned,const LogicVRegister & src,bool srcIsSigned)2037 LogicVRegister Simulator::extractnarrow(VectorFormat dstform,
2038 LogicVRegister dst,
2039 bool dstIsSigned,
2040 const LogicVRegister& src,
2041 bool srcIsSigned) {
2042 bool upperhalf = false;
2043 VectorFormat srcform = kFormatUndefined;
2044 int64_t ssrc[8];
2045 uint64_t usrc[8];
2046
2047 switch (dstform) {
2048 case kFormat8B : upperhalf = false; srcform = kFormat8H; break;
2049 case kFormat16B: upperhalf = true; srcform = kFormat8H; break;
2050 case kFormat4H : upperhalf = false; srcform = kFormat4S; break;
2051 case kFormat8H : upperhalf = true; srcform = kFormat4S; break;
2052 case kFormat2S : upperhalf = false; srcform = kFormat2D; break;
2053 case kFormat4S : upperhalf = true; srcform = kFormat2D; break;
2054 case kFormatB : upperhalf = false; srcform = kFormatH; break;
2055 case kFormatH : upperhalf = false; srcform = kFormatS; break;
2056 case kFormatS : upperhalf = false; srcform = kFormatD; break;
2057 default:VIXL_UNIMPLEMENTED();
2058 }
2059
2060 for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
2061 ssrc[i] = src.Int(srcform, i);
2062 usrc[i] = src.Uint(srcform, i);
2063 }
2064
2065 int offset;
2066 if (upperhalf) {
2067 offset = LaneCountFromFormat(dstform) / 2;
2068 } else {
2069 offset = 0;
2070 dst.ClearForWrite(dstform);
2071 }
2072
2073 for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
2074 // Test for signed saturation
2075 if (ssrc[i] > MaxIntFromFormat(dstform)) {
2076 dst.SetSignedSat(offset + i, true);
2077 } else if (ssrc[i] < MinIntFromFormat(dstform)) {
2078 dst.SetSignedSat(offset + i, false);
2079 }
2080
2081 // Test for unsigned saturation
2082 if (srcIsSigned) {
2083 if (ssrc[i] > static_cast<int64_t>(MaxUintFromFormat(dstform))) {
2084 dst.SetUnsignedSat(offset + i, true);
2085 } else if (ssrc[i] < 0) {
2086 dst.SetUnsignedSat(offset + i, false);
2087 }
2088 } else {
2089 if (usrc[i] > MaxUintFromFormat(dstform)) {
2090 dst.SetUnsignedSat(offset + i, true);
2091 }
2092 }
2093
2094 int64_t result;
2095 if (srcIsSigned) {
2096 result = ssrc[i] & MaxUintFromFormat(dstform);
2097 } else {
2098 result = usrc[i] & MaxUintFromFormat(dstform);
2099 }
2100
2101 if (dstIsSigned) {
2102 dst.SetInt(dstform, offset + i, result);
2103 } else {
2104 dst.SetUint(dstform, offset + i, result);
2105 }
2106 }
2107 return dst;
2108 }
2109
2110
xtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2111 LogicVRegister Simulator::xtn(VectorFormat vform,
2112 LogicVRegister dst,
2113 const LogicVRegister& src) {
2114 return extractnarrow(vform, dst, true, src, true);
2115 }
2116
2117
sqxtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2118 LogicVRegister Simulator::sqxtn(VectorFormat vform,
2119 LogicVRegister dst,
2120 const LogicVRegister& src) {
2121 return extractnarrow(vform, dst, true, src, true).SignedSaturate(vform);
2122 }
2123
2124
sqxtun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2125 LogicVRegister Simulator::sqxtun(VectorFormat vform,
2126 LogicVRegister dst,
2127 const LogicVRegister& src) {
2128 return extractnarrow(vform, dst, false, src, true).UnsignedSaturate(vform);
2129 }
2130
2131
uqxtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2132 LogicVRegister Simulator::uqxtn(VectorFormat vform,
2133 LogicVRegister dst,
2134 const LogicVRegister& src) {
2135 return extractnarrow(vform, dst, false, src, false).UnsignedSaturate(vform);
2136 }
2137
2138
absdiff(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool issigned)2139 LogicVRegister Simulator::absdiff(VectorFormat vform,
2140 LogicVRegister dst,
2141 const LogicVRegister& src1,
2142 const LogicVRegister& src2,
2143 bool issigned) {
2144 dst.ClearForWrite(vform);
2145 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2146 if (issigned) {
2147 int64_t sr = src1.Int(vform, i) - src2.Int(vform, i);
2148 sr = sr > 0 ? sr : -sr;
2149 dst.SetInt(vform, i, sr);
2150 } else {
2151 int64_t sr = src1.Uint(vform, i) - src2.Uint(vform, i);
2152 sr = sr > 0 ? sr : -sr;
2153 dst.SetUint(vform, i, sr);
2154 }
2155 }
2156 return dst;
2157 }
2158
2159
saba(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2160 LogicVRegister Simulator::saba(VectorFormat vform,
2161 LogicVRegister dst,
2162 const LogicVRegister& src1,
2163 const LogicVRegister& src2) {
2164 SimVRegister temp;
2165 dst.ClearForWrite(vform);
2166 absdiff(vform, temp, src1, src2, true);
2167 add(vform, dst, dst, temp);
2168 return dst;
2169 }
2170
2171
uaba(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2172 LogicVRegister Simulator::uaba(VectorFormat vform,
2173 LogicVRegister dst,
2174 const LogicVRegister& src1,
2175 const LogicVRegister& src2) {
2176 SimVRegister temp;
2177 dst.ClearForWrite(vform);
2178 absdiff(vform, temp, src1, src2, false);
2179 add(vform, dst, dst, temp);
2180 return dst;
2181 }
2182
2183
not_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2184 LogicVRegister Simulator::not_(VectorFormat vform,
2185 LogicVRegister dst,
2186 const LogicVRegister& src) {
2187 dst.ClearForWrite(vform);
2188 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2189 dst.SetUint(vform, i, ~src.Uint(vform, i));
2190 }
2191 return dst;
2192 }
2193
2194
rbit(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2195 LogicVRegister Simulator::rbit(VectorFormat vform,
2196 LogicVRegister dst,
2197 const LogicVRegister& src) {
2198 uint64_t result[16];
2199 int laneCount = LaneCountFromFormat(vform);
2200 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
2201 uint64_t reversed_value;
2202 uint64_t value;
2203 for (int i = 0; i < laneCount; i++) {
2204 value = src.Uint(vform, i);
2205 reversed_value = 0;
2206 for (int j = 0; j < laneSizeInBits; j++) {
2207 reversed_value = (reversed_value << 1) | (value & 1);
2208 value >>= 1;
2209 }
2210 result[i] = reversed_value;
2211 }
2212
2213 dst.ClearForWrite(vform);
2214 for (int i = 0; i < laneCount; ++i) {
2215 dst.SetUint(vform, i, result[i]);
2216 }
2217 return dst;
2218 }
2219
2220
rev(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int revSize)2221 LogicVRegister Simulator::rev(VectorFormat vform,
2222 LogicVRegister dst,
2223 const LogicVRegister& src,
2224 int revSize) {
2225 uint64_t result[16];
2226 int laneCount = LaneCountFromFormat(vform);
2227 int laneSize = LaneSizeInBytesFromFormat(vform);
2228 int lanesPerLoop = revSize / laneSize;
2229 for (int i = 0; i < laneCount; i += lanesPerLoop) {
2230 for (int j = 0; j < lanesPerLoop; j++) {
2231 result[i + lanesPerLoop - 1 - j] = src.Uint(vform, i + j);
2232 }
2233 }
2234 dst.ClearForWrite(vform);
2235 for (int i = 0; i < laneCount; ++i) {
2236 dst.SetUint(vform, i, result[i]);
2237 }
2238 return dst;
2239 }
2240
2241
rev16(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2242 LogicVRegister Simulator::rev16(VectorFormat vform,
2243 LogicVRegister dst,
2244 const LogicVRegister& src) {
2245 return rev(vform, dst, src, 2);
2246 }
2247
2248
rev32(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2249 LogicVRegister Simulator::rev32(VectorFormat vform,
2250 LogicVRegister dst,
2251 const LogicVRegister& src) {
2252 return rev(vform, dst, src, 4);
2253 }
2254
2255
rev64(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2256 LogicVRegister Simulator::rev64(VectorFormat vform,
2257 LogicVRegister dst,
2258 const LogicVRegister& src) {
2259 return rev(vform, dst, src, 8);
2260 }
2261
2262
addlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool is_signed,bool do_accumulate)2263 LogicVRegister Simulator::addlp(VectorFormat vform,
2264 LogicVRegister dst,
2265 const LogicVRegister& src,
2266 bool is_signed,
2267 bool do_accumulate) {
2268 VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform);
2269
2270 int64_t sr[16];
2271 uint64_t ur[16];
2272
2273 int laneCount = LaneCountFromFormat(vform);
2274 for (int i = 0; i < laneCount; ++i) {
2275 if (is_signed) {
2276 sr[i] = src.Int(vformsrc, 2 * i) + src.Int(vformsrc, 2 * i + 1);
2277 } else {
2278 ur[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1);
2279 }
2280 }
2281
2282 dst.ClearForWrite(vform);
2283 for (int i = 0; i < laneCount; ++i) {
2284 if (do_accumulate) {
2285 if (is_signed) {
2286 dst.SetInt(vform, i, dst.Int(vform, i) + sr[i]);
2287 } else {
2288 dst.SetUint(vform, i, dst.Uint(vform, i) + ur[i]);
2289 }
2290 } else {
2291 if (is_signed) {
2292 dst.SetInt(vform, i, sr[i]);
2293 } else {
2294 dst.SetUint(vform, i, ur[i]);
2295 }
2296 }
2297 }
2298
2299 return dst;
2300 }
2301
2302
saddlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2303 LogicVRegister Simulator::saddlp(VectorFormat vform,
2304 LogicVRegister dst,
2305 const LogicVRegister& src) {
2306 return addlp(vform, dst, src, true, false);
2307 }
2308
2309
uaddlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2310 LogicVRegister Simulator::uaddlp(VectorFormat vform,
2311 LogicVRegister dst,
2312 const LogicVRegister& src) {
2313 return addlp(vform, dst, src, false, false);
2314 }
2315
2316
sadalp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2317 LogicVRegister Simulator::sadalp(VectorFormat vform,
2318 LogicVRegister dst,
2319 const LogicVRegister& src) {
2320 return addlp(vform, dst, src, true, true);
2321 }
2322
2323
uadalp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2324 LogicVRegister Simulator::uadalp(VectorFormat vform,
2325 LogicVRegister dst,
2326 const LogicVRegister& src) {
2327 return addlp(vform, dst, src, false, true);
2328 }
2329
2330
ext(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)2331 LogicVRegister Simulator::ext(VectorFormat vform,
2332 LogicVRegister dst,
2333 const LogicVRegister& src1,
2334 const LogicVRegister& src2,
2335 int index) {
2336 uint8_t result[16];
2337 int laneCount = LaneCountFromFormat(vform);
2338 for (int i = 0; i < laneCount - index; ++i) {
2339 result[i] = src1.Uint(vform, i + index);
2340 }
2341 for (int i = 0; i < index; ++i) {
2342 result[laneCount - index + i] = src2.Uint(vform, i);
2343 }
2344 dst.ClearForWrite(vform);
2345 for (int i = 0; i < laneCount; ++i) {
2346 dst.SetUint(vform, i, result[i]);
2347 }
2348 return dst;
2349 }
2350
2351
dup_element(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int src_index)2352 LogicVRegister Simulator::dup_element(VectorFormat vform,
2353 LogicVRegister dst,
2354 const LogicVRegister& src,
2355 int src_index) {
2356 int laneCount = LaneCountFromFormat(vform);
2357 uint64_t value = src.Uint(vform, src_index);
2358 dst.ClearForWrite(vform);
2359 for (int i = 0; i < laneCount; ++i) {
2360 dst.SetUint(vform, i, value);
2361 }
2362 return dst;
2363 }
2364
2365
dup_immediate(VectorFormat vform,LogicVRegister dst,uint64_t imm)2366 LogicVRegister Simulator::dup_immediate(VectorFormat vform,
2367 LogicVRegister dst,
2368 uint64_t imm) {
2369 int laneCount = LaneCountFromFormat(vform);
2370 uint64_t value = imm & MaxUintFromFormat(vform);
2371 dst.ClearForWrite(vform);
2372 for (int i = 0; i < laneCount; ++i) {
2373 dst.SetUint(vform, i, value);
2374 }
2375 return dst;
2376 }
2377
2378
ins_element(VectorFormat vform,LogicVRegister dst,int dst_index,const LogicVRegister & src,int src_index)2379 LogicVRegister Simulator::ins_element(VectorFormat vform,
2380 LogicVRegister dst,
2381 int dst_index,
2382 const LogicVRegister& src,
2383 int src_index) {
2384 dst.SetUint(vform, dst_index, src.Uint(vform, src_index));
2385 return dst;
2386 }
2387
2388
ins_immediate(VectorFormat vform,LogicVRegister dst,int dst_index,uint64_t imm)2389 LogicVRegister Simulator::ins_immediate(VectorFormat vform,
2390 LogicVRegister dst,
2391 int dst_index,
2392 uint64_t imm) {
2393 uint64_t value = imm & MaxUintFromFormat(vform);
2394 dst.SetUint(vform, dst_index, value);
2395 return dst;
2396 }
2397
2398
movi(VectorFormat vform,LogicVRegister dst,uint64_t imm)2399 LogicVRegister Simulator::movi(VectorFormat vform,
2400 LogicVRegister dst,
2401 uint64_t imm) {
2402 int laneCount = LaneCountFromFormat(vform);
2403 dst.ClearForWrite(vform);
2404 for (int i = 0; i < laneCount; ++i) {
2405 dst.SetUint(vform, i, imm);
2406 }
2407 return dst;
2408 }
2409
2410
mvni(VectorFormat vform,LogicVRegister dst,uint64_t imm)2411 LogicVRegister Simulator::mvni(VectorFormat vform,
2412 LogicVRegister dst,
2413 uint64_t imm) {
2414 int laneCount = LaneCountFromFormat(vform);
2415 dst.ClearForWrite(vform);
2416 for (int i = 0; i < laneCount; ++i) {
2417 dst.SetUint(vform, i, ~imm);
2418 }
2419 return dst;
2420 }
2421
2422
orr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,uint64_t imm)2423 LogicVRegister Simulator::orr(VectorFormat vform,
2424 LogicVRegister dst,
2425 const LogicVRegister& src,
2426 uint64_t imm) {
2427 uint64_t result[16];
2428 int laneCount = LaneCountFromFormat(vform);
2429 for (int i = 0; i < laneCount; ++i) {
2430 result[i] = src.Uint(vform, i) | imm;
2431 }
2432 dst.ClearForWrite(vform);
2433 for (int i = 0; i < laneCount; ++i) {
2434 dst.SetUint(vform, i, result[i]);
2435 }
2436 return dst;
2437 }
2438
2439
uxtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2440 LogicVRegister Simulator::uxtl(VectorFormat vform,
2441 LogicVRegister dst,
2442 const LogicVRegister& src) {
2443 VectorFormat vform_half = VectorFormatHalfWidth(vform);
2444
2445 dst.ClearForWrite(vform);
2446 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2447 dst.SetUint(vform, i, src.Uint(vform_half, i));
2448 }
2449 return dst;
2450 }
2451
2452
sxtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2453 LogicVRegister Simulator::sxtl(VectorFormat vform,
2454 LogicVRegister dst,
2455 const LogicVRegister& src) {
2456 VectorFormat vform_half = VectorFormatHalfWidth(vform);
2457
2458 dst.ClearForWrite(vform);
2459 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2460 dst.SetInt(vform, i, src.Int(vform_half, i));
2461 }
2462 return dst;
2463 }
2464
2465
uxtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2466 LogicVRegister Simulator::uxtl2(VectorFormat vform,
2467 LogicVRegister dst,
2468 const LogicVRegister& src) {
2469 VectorFormat vform_half = VectorFormatHalfWidth(vform);
2470 int lane_count = LaneCountFromFormat(vform);
2471
2472 dst.ClearForWrite(vform);
2473 for (int i = 0; i < lane_count; i++) {
2474 dst.SetUint(vform, i, src.Uint(vform_half, lane_count + i));
2475 }
2476 return dst;
2477 }
2478
2479
sxtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2480 LogicVRegister Simulator::sxtl2(VectorFormat vform,
2481 LogicVRegister dst,
2482 const LogicVRegister& src) {
2483 VectorFormat vform_half = VectorFormatHalfWidth(vform);
2484 int lane_count = LaneCountFromFormat(vform);
2485
2486 dst.ClearForWrite(vform);
2487 for (int i = 0; i < lane_count; i++) {
2488 dst.SetInt(vform, i, src.Int(vform_half, lane_count + i));
2489 }
2490 return dst;
2491 }
2492
2493
shrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2494 LogicVRegister Simulator::shrn(VectorFormat vform,
2495 LogicVRegister dst,
2496 const LogicVRegister& src,
2497 int shift) {
2498 SimVRegister temp;
2499 VectorFormat vform_src = VectorFormatDoubleWidth(vform);
2500 VectorFormat vform_dst = vform;
2501 LogicVRegister shifted_src = ushr(vform_src, temp, src, shift);
2502 return extractnarrow(vform_dst, dst, false, shifted_src, false);
2503 }
2504
2505
shrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2506 LogicVRegister Simulator::shrn2(VectorFormat vform,
2507 LogicVRegister dst,
2508 const LogicVRegister& src,
2509 int shift) {
2510 SimVRegister temp;
2511 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2512 VectorFormat vformdst = vform;
2513 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift);
2514 return extractnarrow(vformdst, dst, false, shifted_src, false);
2515 }
2516
2517
rshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2518 LogicVRegister Simulator::rshrn(VectorFormat vform,
2519 LogicVRegister dst,
2520 const LogicVRegister& src,
2521 int shift) {
2522 SimVRegister temp;
2523 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2524 VectorFormat vformdst = vform;
2525 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
2526 return extractnarrow(vformdst, dst, false, shifted_src, false);
2527 }
2528
2529
rshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2530 LogicVRegister Simulator::rshrn2(VectorFormat vform,
2531 LogicVRegister dst,
2532 const LogicVRegister& src,
2533 int shift) {
2534 SimVRegister temp;
2535 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2536 VectorFormat vformdst = vform;
2537 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
2538 return extractnarrow(vformdst, dst, false, shifted_src, false);
2539 }
2540
2541
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & ind)2542 LogicVRegister Simulator::tbl(VectorFormat vform,
2543 LogicVRegister dst,
2544 const LogicVRegister& tab,
2545 const LogicVRegister& ind) {
2546 movi(vform, dst, 0);
2547 return tbx(vform, dst, tab, ind);
2548 }
2549
2550
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & ind)2551 LogicVRegister Simulator::tbl(VectorFormat vform,
2552 LogicVRegister dst,
2553 const LogicVRegister& tab,
2554 const LogicVRegister& tab2,
2555 const LogicVRegister& ind) {
2556 movi(vform, dst, 0);
2557 return tbx(vform, dst, tab, tab2, ind);
2558 }
2559
2560
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & ind)2561 LogicVRegister Simulator::tbl(VectorFormat vform,
2562 LogicVRegister dst,
2563 const LogicVRegister& tab,
2564 const LogicVRegister& tab2,
2565 const LogicVRegister& tab3,
2566 const LogicVRegister& ind) {
2567 movi(vform, dst, 0);
2568 return tbx(vform, dst, tab, tab2, tab3, ind);
2569 }
2570
2571
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & tab4,const LogicVRegister & ind)2572 LogicVRegister Simulator::tbl(VectorFormat vform,
2573 LogicVRegister dst,
2574 const LogicVRegister& tab,
2575 const LogicVRegister& tab2,
2576 const LogicVRegister& tab3,
2577 const LogicVRegister& tab4,
2578 const LogicVRegister& ind) {
2579 movi(vform, dst, 0);
2580 return tbx(vform, dst, tab, tab2, tab3, tab4, ind);
2581 }
2582
2583
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & ind)2584 LogicVRegister Simulator::tbx(VectorFormat vform,
2585 LogicVRegister dst,
2586 const LogicVRegister& tab,
2587 const LogicVRegister& ind) {
2588 dst.ClearForWrite(vform);
2589 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2590 uint64_t j = ind.Uint(vform, i);
2591 switch (j >> 4) {
2592 case 0: dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15)); break;
2593 }
2594 }
2595 return dst;
2596 }
2597
2598
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & ind)2599 LogicVRegister Simulator::tbx(VectorFormat vform,
2600 LogicVRegister dst,
2601 const LogicVRegister& tab,
2602 const LogicVRegister& tab2,
2603 const LogicVRegister& ind) {
2604 dst.ClearForWrite(vform);
2605 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2606 uint64_t j = ind.Uint(vform, i);
2607 switch (j >> 4) {
2608 case 0: dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15)); break;
2609 case 1: dst.SetUint(vform, i, tab2.Uint(kFormat16B, j & 15)); break;
2610 }
2611 }
2612 return dst;
2613 }
2614
2615
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & ind)2616 LogicVRegister Simulator::tbx(VectorFormat vform,
2617 LogicVRegister dst,
2618 const LogicVRegister& tab,
2619 const LogicVRegister& tab2,
2620 const LogicVRegister& tab3,
2621 const LogicVRegister& ind) {
2622 dst.ClearForWrite(vform);
2623 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2624 uint64_t j = ind.Uint(vform, i);
2625 switch (j >> 4) {
2626 case 0: dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15)); break;
2627 case 1: dst.SetUint(vform, i, tab2.Uint(kFormat16B, j & 15)); break;
2628 case 2: dst.SetUint(vform, i, tab3.Uint(kFormat16B, j & 15)); break;
2629 }
2630 }
2631 return dst;
2632 }
2633
2634
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & tab4,const LogicVRegister & ind)2635 LogicVRegister Simulator::tbx(VectorFormat vform,
2636 LogicVRegister dst,
2637 const LogicVRegister& tab,
2638 const LogicVRegister& tab2,
2639 const LogicVRegister& tab3,
2640 const LogicVRegister& tab4,
2641 const LogicVRegister& ind) {
2642 dst.ClearForWrite(vform);
2643 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2644 uint64_t j = ind.Uint(vform, i);
2645 switch (j >> 4) {
2646 case 0: dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15)); break;
2647 case 1: dst.SetUint(vform, i, tab2.Uint(kFormat16B, j & 15)); break;
2648 case 2: dst.SetUint(vform, i, tab3.Uint(kFormat16B, j & 15)); break;
2649 case 3: dst.SetUint(vform, i, tab4.Uint(kFormat16B, j & 15)); break;
2650 }
2651 }
2652 return dst;
2653 }
2654
2655
uqshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2656 LogicVRegister Simulator::uqshrn(VectorFormat vform,
2657 LogicVRegister dst,
2658 const LogicVRegister& src,
2659 int shift) {
2660 return shrn(vform, dst, src, shift).UnsignedSaturate(vform);
2661 }
2662
2663
uqshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2664 LogicVRegister Simulator::uqshrn2(VectorFormat vform,
2665 LogicVRegister dst,
2666 const LogicVRegister& src,
2667 int shift) {
2668 return shrn2(vform, dst, src, shift).UnsignedSaturate(vform);
2669 }
2670
2671
uqrshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2672 LogicVRegister Simulator::uqrshrn(VectorFormat vform,
2673 LogicVRegister dst,
2674 const LogicVRegister& src,
2675 int shift) {
2676 return rshrn(vform, dst, src, shift).UnsignedSaturate(vform);
2677 }
2678
2679
uqrshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2680 LogicVRegister Simulator::uqrshrn2(VectorFormat vform,
2681 LogicVRegister dst,
2682 const LogicVRegister& src,
2683 int shift) {
2684 return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform);
2685 }
2686
2687
sqshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2688 LogicVRegister Simulator::sqshrn(VectorFormat vform,
2689 LogicVRegister dst,
2690 const LogicVRegister& src,
2691 int shift) {
2692 SimVRegister temp;
2693 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2694 VectorFormat vformdst = vform;
2695 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2696 return sqxtn(vformdst, dst, shifted_src);
2697 }
2698
2699
sqshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2700 LogicVRegister Simulator::sqshrn2(VectorFormat vform,
2701 LogicVRegister dst,
2702 const LogicVRegister& src,
2703 int shift) {
2704 SimVRegister temp;
2705 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2706 VectorFormat vformdst = vform;
2707 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2708 return sqxtn(vformdst, dst, shifted_src);
2709 }
2710
2711
sqrshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2712 LogicVRegister Simulator::sqrshrn(VectorFormat vform,
2713 LogicVRegister dst,
2714 const LogicVRegister& src,
2715 int shift) {
2716 SimVRegister temp;
2717 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2718 VectorFormat vformdst = vform;
2719 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2720 return sqxtn(vformdst, dst, shifted_src);
2721 }
2722
2723
sqrshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2724 LogicVRegister Simulator::sqrshrn2(VectorFormat vform,
2725 LogicVRegister dst,
2726 const LogicVRegister& src,
2727 int shift) {
2728 SimVRegister temp;
2729 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2730 VectorFormat vformdst = vform;
2731 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2732 return sqxtn(vformdst, dst, shifted_src);
2733 }
2734
2735
sqshrun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2736 LogicVRegister Simulator::sqshrun(VectorFormat vform,
2737 LogicVRegister dst,
2738 const LogicVRegister& src,
2739 int shift) {
2740 SimVRegister temp;
2741 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2742 VectorFormat vformdst = vform;
2743 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2744 return sqxtun(vformdst, dst, shifted_src);
2745 }
2746
2747
sqshrun2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2748 LogicVRegister Simulator::sqshrun2(VectorFormat vform,
2749 LogicVRegister dst,
2750 const LogicVRegister& src,
2751 int shift) {
2752 SimVRegister temp;
2753 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2754 VectorFormat vformdst = vform;
2755 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2756 return sqxtun(vformdst, dst, shifted_src);
2757 }
2758
2759
sqrshrun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2760 LogicVRegister Simulator::sqrshrun(VectorFormat vform,
2761 LogicVRegister dst,
2762 const LogicVRegister& src,
2763 int shift) {
2764 SimVRegister temp;
2765 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2766 VectorFormat vformdst = vform;
2767 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2768 return sqxtun(vformdst, dst, shifted_src);
2769 }
2770
2771
sqrshrun2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2772 LogicVRegister Simulator::sqrshrun2(VectorFormat vform,
2773 LogicVRegister dst,
2774 const LogicVRegister& src,
2775 int shift) {
2776 SimVRegister temp;
2777 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2778 VectorFormat vformdst = vform;
2779 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2780 return sqxtun(vformdst, dst, shifted_src);
2781 }
2782
2783
uaddl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2784 LogicVRegister Simulator::uaddl(VectorFormat vform,
2785 LogicVRegister dst,
2786 const LogicVRegister& src1,
2787 const LogicVRegister& src2) {
2788 SimVRegister temp1, temp2;
2789 uxtl(vform, temp1, src1);
2790 uxtl(vform, temp2, src2);
2791 add(vform, dst, temp1, temp2);
2792 return dst;
2793 }
2794
2795
uaddl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2796 LogicVRegister Simulator::uaddl2(VectorFormat vform,
2797 LogicVRegister dst,
2798 const LogicVRegister& src1,
2799 const LogicVRegister& src2) {
2800 SimVRegister temp1, temp2;
2801 uxtl2(vform, temp1, src1);
2802 uxtl2(vform, temp2, src2);
2803 add(vform, dst, temp1, temp2);
2804 return dst;
2805 }
2806
2807
uaddw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2808 LogicVRegister Simulator::uaddw(VectorFormat vform,
2809 LogicVRegister dst,
2810 const LogicVRegister& src1,
2811 const LogicVRegister& src2) {
2812 SimVRegister temp;
2813 uxtl(vform, temp, src2);
2814 add(vform, dst, src1, temp);
2815 return dst;
2816 }
2817
2818
uaddw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2819 LogicVRegister Simulator::uaddw2(VectorFormat vform,
2820 LogicVRegister dst,
2821 const LogicVRegister& src1,
2822 const LogicVRegister& src2) {
2823 SimVRegister temp;
2824 uxtl2(vform, temp, src2);
2825 add(vform, dst, src1, temp);
2826 return dst;
2827 }
2828
2829
saddl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2830 LogicVRegister Simulator::saddl(VectorFormat vform,
2831 LogicVRegister dst,
2832 const LogicVRegister& src1,
2833 const LogicVRegister& src2) {
2834 SimVRegister temp1, temp2;
2835 sxtl(vform, temp1, src1);
2836 sxtl(vform, temp2, src2);
2837 add(vform, dst, temp1, temp2);
2838 return dst;
2839 }
2840
2841
saddl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2842 LogicVRegister Simulator::saddl2(VectorFormat vform,
2843 LogicVRegister dst,
2844 const LogicVRegister& src1,
2845 const LogicVRegister& src2) {
2846 SimVRegister temp1, temp2;
2847 sxtl2(vform, temp1, src1);
2848 sxtl2(vform, temp2, src2);
2849 add(vform, dst, temp1, temp2);
2850 return dst;
2851 }
2852
2853
saddw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2854 LogicVRegister Simulator::saddw(VectorFormat vform,
2855 LogicVRegister dst,
2856 const LogicVRegister& src1,
2857 const LogicVRegister& src2) {
2858 SimVRegister temp;
2859 sxtl(vform, temp, src2);
2860 add(vform, dst, src1, temp);
2861 return dst;
2862 }
2863
2864
saddw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2865 LogicVRegister Simulator::saddw2(VectorFormat vform,
2866 LogicVRegister dst,
2867 const LogicVRegister& src1,
2868 const LogicVRegister& src2) {
2869 SimVRegister temp;
2870 sxtl2(vform, temp, src2);
2871 add(vform, dst, src1, temp);
2872 return dst;
2873 }
2874
2875
usubl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2876 LogicVRegister Simulator::usubl(VectorFormat vform,
2877 LogicVRegister dst,
2878 const LogicVRegister& src1,
2879 const LogicVRegister& src2) {
2880 SimVRegister temp1, temp2;
2881 uxtl(vform, temp1, src1);
2882 uxtl(vform, temp2, src2);
2883 sub(vform, dst, temp1, temp2);
2884 return dst;
2885 }
2886
2887
usubl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2888 LogicVRegister Simulator::usubl2(VectorFormat vform,
2889 LogicVRegister dst,
2890 const LogicVRegister& src1,
2891 const LogicVRegister& src2) {
2892 SimVRegister temp1, temp2;
2893 uxtl2(vform, temp1, src1);
2894 uxtl2(vform, temp2, src2);
2895 sub(vform, dst, temp1, temp2);
2896 return dst;
2897 }
2898
2899
usubw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2900 LogicVRegister Simulator::usubw(VectorFormat vform,
2901 LogicVRegister dst,
2902 const LogicVRegister& src1,
2903 const LogicVRegister& src2) {
2904 SimVRegister temp;
2905 uxtl(vform, temp, src2);
2906 sub(vform, dst, src1, temp);
2907 return dst;
2908 }
2909
2910
usubw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2911 LogicVRegister Simulator::usubw2(VectorFormat vform,
2912 LogicVRegister dst,
2913 const LogicVRegister& src1,
2914 const LogicVRegister& src2) {
2915 SimVRegister temp;
2916 uxtl2(vform, temp, src2);
2917 sub(vform, dst, src1, temp);
2918 return dst;
2919 }
2920
2921
ssubl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2922 LogicVRegister Simulator::ssubl(VectorFormat vform,
2923 LogicVRegister dst,
2924 const LogicVRegister& src1,
2925 const LogicVRegister& src2) {
2926 SimVRegister temp1, temp2;
2927 sxtl(vform, temp1, src1);
2928 sxtl(vform, temp2, src2);
2929 sub(vform, dst, temp1, temp2);
2930 return dst;
2931 }
2932
2933
ssubl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2934 LogicVRegister Simulator::ssubl2(VectorFormat vform,
2935 LogicVRegister dst,
2936 const LogicVRegister& src1,
2937 const LogicVRegister& src2) {
2938 SimVRegister temp1, temp2;
2939 sxtl2(vform, temp1, src1);
2940 sxtl2(vform, temp2, src2);
2941 sub(vform, dst, temp1, temp2);
2942 return dst;
2943 }
2944
2945
ssubw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2946 LogicVRegister Simulator::ssubw(VectorFormat vform,
2947 LogicVRegister dst,
2948 const LogicVRegister& src1,
2949 const LogicVRegister& src2) {
2950 SimVRegister temp;
2951 sxtl(vform, temp, src2);
2952 sub(vform, dst, src1, temp);
2953 return dst;
2954 }
2955
2956
ssubw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2957 LogicVRegister Simulator::ssubw2(VectorFormat vform,
2958 LogicVRegister dst,
2959 const LogicVRegister& src1,
2960 const LogicVRegister& src2) {
2961 SimVRegister temp;
2962 sxtl2(vform, temp, src2);
2963 sub(vform, dst, src1, temp);
2964 return dst;
2965 }
2966
2967
uabal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2968 LogicVRegister Simulator::uabal(VectorFormat vform,
2969 LogicVRegister dst,
2970 const LogicVRegister& src1,
2971 const LogicVRegister& src2) {
2972 SimVRegister temp1, temp2;
2973 uxtl(vform, temp1, src1);
2974 uxtl(vform, temp2, src2);
2975 uaba(vform, dst, temp1, temp2);
2976 return dst;
2977 }
2978
2979
uabal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2980 LogicVRegister Simulator::uabal2(VectorFormat vform,
2981 LogicVRegister dst,
2982 const LogicVRegister& src1,
2983 const LogicVRegister& src2) {
2984 SimVRegister temp1, temp2;
2985 uxtl2(vform, temp1, src1);
2986 uxtl2(vform, temp2, src2);
2987 uaba(vform, dst, temp1, temp2);
2988 return dst;
2989 }
2990
2991
sabal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2992 LogicVRegister Simulator::sabal(VectorFormat vform,
2993 LogicVRegister dst,
2994 const LogicVRegister& src1,
2995 const LogicVRegister& src2) {
2996 SimVRegister temp1, temp2;
2997 sxtl(vform, temp1, src1);
2998 sxtl(vform, temp2, src2);
2999 saba(vform, dst, temp1, temp2);
3000 return dst;
3001 }
3002
3003
sabal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3004 LogicVRegister Simulator::sabal2(VectorFormat vform,
3005 LogicVRegister dst,
3006 const LogicVRegister& src1,
3007 const LogicVRegister& src2) {
3008 SimVRegister temp1, temp2;
3009 sxtl2(vform, temp1, src1);
3010 sxtl2(vform, temp2, src2);
3011 saba(vform, dst, temp1, temp2);
3012 return dst;
3013 }
3014
3015
uabdl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3016 LogicVRegister Simulator::uabdl(VectorFormat vform,
3017 LogicVRegister dst,
3018 const LogicVRegister& src1,
3019 const LogicVRegister& src2) {
3020 SimVRegister temp1, temp2;
3021 uxtl(vform, temp1, src1);
3022 uxtl(vform, temp2, src2);
3023 absdiff(vform, dst, temp1, temp2, false);
3024 return dst;
3025 }
3026
3027
uabdl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3028 LogicVRegister Simulator::uabdl2(VectorFormat vform,
3029 LogicVRegister dst,
3030 const LogicVRegister& src1,
3031 const LogicVRegister& src2) {
3032 SimVRegister temp1, temp2;
3033 uxtl2(vform, temp1, src1);
3034 uxtl2(vform, temp2, src2);
3035 absdiff(vform, dst, temp1, temp2, false);
3036 return dst;
3037 }
3038
3039
sabdl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3040 LogicVRegister Simulator::sabdl(VectorFormat vform,
3041 LogicVRegister dst,
3042 const LogicVRegister& src1,
3043 const LogicVRegister& src2) {
3044 SimVRegister temp1, temp2;
3045 sxtl(vform, temp1, src1);
3046 sxtl(vform, temp2, src2);
3047 absdiff(vform, dst, temp1, temp2, true);
3048 return dst;
3049 }
3050
3051
sabdl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3052 LogicVRegister Simulator::sabdl2(VectorFormat vform,
3053 LogicVRegister dst,
3054 const LogicVRegister& src1,
3055 const LogicVRegister& src2) {
3056 SimVRegister temp1, temp2;
3057 sxtl2(vform, temp1, src1);
3058 sxtl2(vform, temp2, src2);
3059 absdiff(vform, dst, temp1, temp2, true);
3060 return dst;
3061 }
3062
3063
umull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3064 LogicVRegister Simulator::umull(VectorFormat vform,
3065 LogicVRegister dst,
3066 const LogicVRegister& src1,
3067 const LogicVRegister& src2) {
3068 SimVRegister temp1, temp2;
3069 uxtl(vform, temp1, src1);
3070 uxtl(vform, temp2, src2);
3071 mul(vform, dst, temp1, temp2);
3072 return dst;
3073 }
3074
3075
umull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3076 LogicVRegister Simulator::umull2(VectorFormat vform,
3077 LogicVRegister dst,
3078 const LogicVRegister& src1,
3079 const LogicVRegister& src2) {
3080 SimVRegister temp1, temp2;
3081 uxtl2(vform, temp1, src1);
3082 uxtl2(vform, temp2, src2);
3083 mul(vform, dst, temp1, temp2);
3084 return dst;
3085 }
3086
3087
smull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3088 LogicVRegister Simulator::smull(VectorFormat vform,
3089 LogicVRegister dst,
3090 const LogicVRegister& src1,
3091 const LogicVRegister& src2) {
3092 SimVRegister temp1, temp2;
3093 sxtl(vform, temp1, src1);
3094 sxtl(vform, temp2, src2);
3095 mul(vform, dst, temp1, temp2);
3096 return dst;
3097 }
3098
3099
smull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3100 LogicVRegister Simulator::smull2(VectorFormat vform,
3101 LogicVRegister dst,
3102 const LogicVRegister& src1,
3103 const LogicVRegister& src2) {
3104 SimVRegister temp1, temp2;
3105 sxtl2(vform, temp1, src1);
3106 sxtl2(vform, temp2, src2);
3107 mul(vform, dst, temp1, temp2);
3108 return dst;
3109 }
3110
3111
umlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3112 LogicVRegister Simulator::umlsl(VectorFormat vform,
3113 LogicVRegister dst,
3114 const LogicVRegister& src1,
3115 const LogicVRegister& src2) {
3116 SimVRegister temp1, temp2;
3117 uxtl(vform, temp1, src1);
3118 uxtl(vform, temp2, src2);
3119 mls(vform, dst, temp1, temp2);
3120 return dst;
3121 }
3122
3123
umlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3124 LogicVRegister Simulator::umlsl2(VectorFormat vform,
3125 LogicVRegister dst,
3126 const LogicVRegister& src1,
3127 const LogicVRegister& src2) {
3128 SimVRegister temp1, temp2;
3129 uxtl2(vform, temp1, src1);
3130 uxtl2(vform, temp2, src2);
3131 mls(vform, dst, temp1, temp2);
3132 return dst;
3133 }
3134
3135
smlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3136 LogicVRegister Simulator::smlsl(VectorFormat vform,
3137 LogicVRegister dst,
3138 const LogicVRegister& src1,
3139 const LogicVRegister& src2) {
3140 SimVRegister temp1, temp2;
3141 sxtl(vform, temp1, src1);
3142 sxtl(vform, temp2, src2);
3143 mls(vform, dst, temp1, temp2);
3144 return dst;
3145 }
3146
3147
smlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3148 LogicVRegister Simulator::smlsl2(VectorFormat vform,
3149 LogicVRegister dst,
3150 const LogicVRegister& src1,
3151 const LogicVRegister& src2) {
3152 SimVRegister temp1, temp2;
3153 sxtl2(vform, temp1, src1);
3154 sxtl2(vform, temp2, src2);
3155 mls(vform, dst, temp1, temp2);
3156 return dst;
3157 }
3158
3159
umlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3160 LogicVRegister Simulator::umlal(VectorFormat vform,
3161 LogicVRegister dst,
3162 const LogicVRegister& src1,
3163 const LogicVRegister& src2) {
3164 SimVRegister temp1, temp2;
3165 uxtl(vform, temp1, src1);
3166 uxtl(vform, temp2, src2);
3167 mla(vform, dst, temp1, temp2);
3168 return dst;
3169 }
3170
3171
umlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3172 LogicVRegister Simulator::umlal2(VectorFormat vform,
3173 LogicVRegister dst,
3174 const LogicVRegister& src1,
3175 const LogicVRegister& src2) {
3176 SimVRegister temp1, temp2;
3177 uxtl2(vform, temp1, src1);
3178 uxtl2(vform, temp2, src2);
3179 mla(vform, dst, temp1, temp2);
3180 return dst;
3181 }
3182
3183
smlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3184 LogicVRegister Simulator::smlal(VectorFormat vform,
3185 LogicVRegister dst,
3186 const LogicVRegister& src1,
3187 const LogicVRegister& src2) {
3188 SimVRegister temp1, temp2;
3189 sxtl(vform, temp1, src1);
3190 sxtl(vform, temp2, src2);
3191 mla(vform, dst, temp1, temp2);
3192 return dst;
3193 }
3194
3195
smlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3196 LogicVRegister Simulator::smlal2(VectorFormat vform,
3197 LogicVRegister dst,
3198 const LogicVRegister& src1,
3199 const LogicVRegister& src2) {
3200 SimVRegister temp1, temp2;
3201 sxtl2(vform, temp1, src1);
3202 sxtl2(vform, temp2, src2);
3203 mla(vform, dst, temp1, temp2);
3204 return dst;
3205 }
3206
3207
sqdmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3208 LogicVRegister Simulator::sqdmlal(VectorFormat vform,
3209 LogicVRegister dst,
3210 const LogicVRegister& src1,
3211 const LogicVRegister& src2) {
3212 SimVRegister temp;
3213 LogicVRegister product = sqdmull(vform, temp, src1, src2);
3214 return add(vform, dst, dst, product).SignedSaturate(vform);
3215 }
3216
3217
sqdmlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3218 LogicVRegister Simulator::sqdmlal2(VectorFormat vform,
3219 LogicVRegister dst,
3220 const LogicVRegister& src1,
3221 const LogicVRegister& src2) {
3222 SimVRegister temp;
3223 LogicVRegister product = sqdmull2(vform, temp, src1, src2);
3224 return add(vform, dst, dst, product).SignedSaturate(vform);
3225 }
3226
3227
sqdmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3228 LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
3229 LogicVRegister dst,
3230 const LogicVRegister& src1,
3231 const LogicVRegister& src2) {
3232 SimVRegister temp;
3233 LogicVRegister product = sqdmull(vform, temp, src1, src2);
3234 return sub(vform, dst, dst, product).SignedSaturate(vform);
3235 }
3236
3237
sqdmlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3238 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform,
3239 LogicVRegister dst,
3240 const LogicVRegister& src1,
3241 const LogicVRegister& src2) {
3242 SimVRegister temp;
3243 LogicVRegister product = sqdmull2(vform, temp, src1, src2);
3244 return sub(vform, dst, dst, product).SignedSaturate(vform);
3245 }
3246
3247
sqdmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3248 LogicVRegister Simulator::sqdmull(VectorFormat vform,
3249 LogicVRegister dst,
3250 const LogicVRegister& src1,
3251 const LogicVRegister& src2) {
3252 SimVRegister temp;
3253 LogicVRegister product = smull(vform, temp, src1, src2);
3254 return add(vform, dst, product, product).SignedSaturate(vform);
3255 }
3256
3257
sqdmull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3258 LogicVRegister Simulator::sqdmull2(VectorFormat vform,
3259 LogicVRegister dst,
3260 const LogicVRegister& src1,
3261 const LogicVRegister& src2) {
3262 SimVRegister temp;
3263 LogicVRegister product = smull2(vform, temp, src1, src2);
3264 return add(vform, dst, product, product).SignedSaturate(vform);
3265 }
3266
3267
sqrdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round)3268 LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
3269 LogicVRegister dst,
3270 const LogicVRegister& src1,
3271 const LogicVRegister& src2,
3272 bool round) {
3273 // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow.
3274 // To avoid this, we use (src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
3275 // which is same as (2 * src1 * src2 + 1 << (esize - 1)) >> esize.
3276
3277 int esize = LaneSizeInBitsFromFormat(vform);
3278 int round_const = round ? (1 << (esize - 2)) : 0;
3279 int64_t product;
3280
3281 dst.ClearForWrite(vform);
3282 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3283 product = src1.Int(vform, i) * src2.Int(vform, i);
3284 product += round_const;
3285 product = product >> (esize - 1);
3286
3287 if (product > MaxIntFromFormat(vform)) {
3288 product = MaxIntFromFormat(vform);
3289 } else if (product < MinIntFromFormat(vform)) {
3290 product = MinIntFromFormat(vform);
3291 }
3292 dst.SetInt(vform, i, product);
3293 }
3294 return dst;
3295 }
3296
3297
sqdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3298 LogicVRegister Simulator::sqdmulh(VectorFormat vform,
3299 LogicVRegister dst,
3300 const LogicVRegister& src1,
3301 const LogicVRegister& src2) {
3302 return sqrdmulh(vform, dst, src1, src2, false);
3303 }
3304
3305
addhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3306 LogicVRegister Simulator::addhn(VectorFormat vform,
3307 LogicVRegister dst,
3308 const LogicVRegister& src1,
3309 const LogicVRegister& src2) {
3310 SimVRegister temp;
3311 add(VectorFormatDoubleWidth(vform), temp, src1, src2);
3312 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3313 return dst;
3314 }
3315
3316
addhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3317 LogicVRegister Simulator::addhn2(VectorFormat vform,
3318 LogicVRegister dst,
3319 const LogicVRegister& src1,
3320 const LogicVRegister& src2) {
3321 SimVRegister temp;
3322 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
3323 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3324 return dst;
3325 }
3326
3327
raddhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3328 LogicVRegister Simulator::raddhn(VectorFormat vform,
3329 LogicVRegister dst,
3330 const LogicVRegister& src1,
3331 const LogicVRegister& src2) {
3332 SimVRegister temp;
3333 add(VectorFormatDoubleWidth(vform), temp, src1, src2);
3334 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3335 return dst;
3336 }
3337
3338
raddhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3339 LogicVRegister Simulator::raddhn2(VectorFormat vform,
3340 LogicVRegister dst,
3341 const LogicVRegister& src1,
3342 const LogicVRegister& src2) {
3343 SimVRegister temp;
3344 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
3345 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3346 return dst;
3347 }
3348
3349
subhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3350 LogicVRegister Simulator::subhn(VectorFormat vform,
3351 LogicVRegister dst,
3352 const LogicVRegister& src1,
3353 const LogicVRegister& src2) {
3354 SimVRegister temp;
3355 sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
3356 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3357 return dst;
3358 }
3359
3360
subhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3361 LogicVRegister Simulator::subhn2(VectorFormat vform,
3362 LogicVRegister dst,
3363 const LogicVRegister& src1,
3364 const LogicVRegister& src2) {
3365 SimVRegister temp;
3366 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
3367 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3368 return dst;
3369 }
3370
3371
rsubhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3372 LogicVRegister Simulator::rsubhn(VectorFormat vform,
3373 LogicVRegister dst,
3374 const LogicVRegister& src1,
3375 const LogicVRegister& src2) {
3376 SimVRegister temp;
3377 sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
3378 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3379 return dst;
3380 }
3381
3382
rsubhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3383 LogicVRegister Simulator::rsubhn2(VectorFormat vform,
3384 LogicVRegister dst,
3385 const LogicVRegister& src1,
3386 const LogicVRegister& src2) {
3387 SimVRegister temp;
3388 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
3389 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3390 return dst;
3391 }
3392
3393
trn1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3394 LogicVRegister Simulator::trn1(VectorFormat vform,
3395 LogicVRegister dst,
3396 const LogicVRegister& src1,
3397 const LogicVRegister& src2) {
3398 uint64_t result[16];
3399 int laneCount = LaneCountFromFormat(vform);
3400 int pairs = laneCount / 2;
3401 for (int i = 0; i < pairs; ++i) {
3402 result[2 * i] = src1.Uint(vform, 2 * i);
3403 result[(2 * i) + 1] = src2.Uint(vform, 2 * i);
3404 }
3405
3406 dst.ClearForWrite(vform);
3407 for (int i = 0; i < laneCount; ++i) {
3408 dst.SetUint(vform, i, result[i]);
3409 }
3410 return dst;
3411 }
3412
3413
trn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3414 LogicVRegister Simulator::trn2(VectorFormat vform,
3415 LogicVRegister dst,
3416 const LogicVRegister& src1,
3417 const LogicVRegister& src2) {
3418 uint64_t result[16];
3419 int laneCount = LaneCountFromFormat(vform);
3420 int pairs = laneCount / 2;
3421 for (int i = 0; i < pairs; ++i) {
3422 result[2 * i] = src1.Uint(vform, (2 * i) + 1);
3423 result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1);
3424 }
3425
3426 dst.ClearForWrite(vform);
3427 for (int i = 0; i < laneCount; ++i) {
3428 dst.SetUint(vform, i, result[i]);
3429 }
3430 return dst;
3431 }
3432
3433
zip1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3434 LogicVRegister Simulator::zip1(VectorFormat vform,
3435 LogicVRegister dst,
3436 const LogicVRegister& src1,
3437 const LogicVRegister& src2) {
3438 uint64_t result[16];
3439 int laneCount = LaneCountFromFormat(vform);
3440 int pairs = laneCount / 2;
3441 for (int i = 0; i < pairs; ++i) {
3442 result[2 * i] = src1.Uint(vform, i);
3443 result[(2 * i) + 1] = src2.Uint(vform, i);
3444 }
3445
3446 dst.ClearForWrite(vform);
3447 for (int i = 0; i < laneCount; ++i) {
3448 dst.SetUint(vform, i, result[i]);
3449 }
3450 return dst;
3451 }
3452
3453
zip2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3454 LogicVRegister Simulator::zip2(VectorFormat vform,
3455 LogicVRegister dst,
3456 const LogicVRegister& src1,
3457 const LogicVRegister& src2) {
3458 uint64_t result[16];
3459 int laneCount = LaneCountFromFormat(vform);
3460 int pairs = laneCount / 2;
3461 for (int i = 0; i < pairs; ++i) {
3462 result[2 * i] = src1.Uint(vform, pairs + i);
3463 result[(2 * i) + 1] = src2.Uint(vform, pairs + i);
3464 }
3465
3466 dst.ClearForWrite(vform);
3467 for (int i = 0; i < laneCount; ++i) {
3468 dst.SetUint(vform, i, result[i]);
3469 }
3470 return dst;
3471 }
3472
3473
uzp1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3474 LogicVRegister Simulator::uzp1(VectorFormat vform,
3475 LogicVRegister dst,
3476 const LogicVRegister& src1,
3477 const LogicVRegister& src2) {
3478 uint64_t result[32];
3479 int laneCount = LaneCountFromFormat(vform);
3480 for (int i = 0; i < laneCount; ++i) {
3481 result[i] = src1.Uint(vform, i);
3482 result[laneCount + i] = src2.Uint(vform, i);
3483 }
3484
3485 dst.ClearForWrite(vform);
3486 for (int i = 0; i < laneCount; ++i) {
3487 dst.SetUint(vform, i, result[2 * i]);
3488 }
3489 return dst;
3490 }
3491
3492
uzp2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3493 LogicVRegister Simulator::uzp2(VectorFormat vform,
3494 LogicVRegister dst,
3495 const LogicVRegister& src1,
3496 const LogicVRegister& src2) {
3497 uint64_t result[32];
3498 int laneCount = LaneCountFromFormat(vform);
3499 for (int i = 0; i < laneCount; ++i) {
3500 result[i] = src1.Uint(vform, i);
3501 result[laneCount + i] = src2.Uint(vform, i);
3502 }
3503
3504 dst.ClearForWrite(vform);
3505 for (int i = 0; i < laneCount; ++i) {
3506 dst.SetUint(vform, i, result[ (2 * i) + 1]);
3507 }
3508 return dst;
3509 }
3510
3511
3512 template <typename T>
FPAdd(T op1,T op2)3513 T Simulator::FPAdd(T op1, T op2) {
3514 T result = FPProcessNaNs(op1, op2);
3515 if (std::isnan(result)) return result;
3516
3517 if (std::isinf(op1) && std::isinf(op2) && (op1 != op2)) {
3518 // inf + -inf returns the default NaN.
3519 FPProcessException();
3520 return FPDefaultNaN<T>();
3521 } else {
3522 // Other cases should be handled by standard arithmetic.
3523 return op1 + op2;
3524 }
3525 }
3526
3527
3528 template <typename T>
FPSub(T op1,T op2)3529 T Simulator::FPSub(T op1, T op2) {
3530 // NaNs should be handled elsewhere.
3531 VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2));
3532
3533 if (std::isinf(op1) && std::isinf(op2) && (op1 == op2)) {
3534 // inf - inf returns the default NaN.
3535 FPProcessException();
3536 return FPDefaultNaN<T>();
3537 } else {
3538 // Other cases should be handled by standard arithmetic.
3539 return op1 - op2;
3540 }
3541 }
3542
3543
3544 template <typename T>
FPMul(T op1,T op2)3545 T Simulator::FPMul(T op1, T op2) {
3546 // NaNs should be handled elsewhere.
3547 VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2));
3548
3549 if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
3550 // inf * 0.0 returns the default NaN.
3551 FPProcessException();
3552 return FPDefaultNaN<T>();
3553 } else {
3554 // Other cases should be handled by standard arithmetic.
3555 return op1 * op2;
3556 }
3557 }
3558
3559
3560 template<typename T>
FPMulx(T op1,T op2)3561 T Simulator::FPMulx(T op1, T op2) {
3562 if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
3563 // inf * 0.0 returns +/-2.0.
3564 T two = 2.0;
3565 return copysign(1.0, op1) * copysign(1.0, op2) * two;
3566 }
3567 return FPMul(op1, op2);
3568 }
3569
3570
3571 template<typename T>
FPMulAdd(T a,T op1,T op2)3572 T Simulator::FPMulAdd(T a, T op1, T op2) {
3573 T result = FPProcessNaNs3(a, op1, op2);
3574
3575 T sign_a = copysign(1.0, a);
3576 T sign_prod = copysign(1.0, op1) * copysign(1.0, op2);
3577 bool isinf_prod = std::isinf(op1) || std::isinf(op2);
3578 bool operation_generates_nan =
3579 (std::isinf(op1) && (op2 == 0.0)) || // inf * 0.0
3580 (std::isinf(op2) && (op1 == 0.0)) || // 0.0 * inf
3581 (std::isinf(a) && isinf_prod && (sign_a != sign_prod)); // inf - inf
3582
3583 if (std::isnan(result)) {
3584 // Generated NaNs override quiet NaNs propagated from a.
3585 if (operation_generates_nan && IsQuietNaN(a)) {
3586 FPProcessException();
3587 return FPDefaultNaN<T>();
3588 } else {
3589 return result;
3590 }
3591 }
3592
3593 // If the operation would produce a NaN, return the default NaN.
3594 if (operation_generates_nan) {
3595 FPProcessException();
3596 return FPDefaultNaN<T>();
3597 }
3598
3599 // Work around broken fma implementations for exact zero results: The sign of
3600 // exact 0.0 results is positive unless both a and op1 * op2 are negative.
3601 if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) {
3602 return ((sign_a < 0) && (sign_prod < 0)) ? -0.0 : 0.0;
3603 }
3604
3605 result = FusedMultiplyAdd(op1, op2, a);
3606 VIXL_ASSERT(!std::isnan(result));
3607
3608 // Work around broken fma implementations for rounded zero results: If a is
3609 // 0.0, the sign of the result is the sign of op1 * op2 before rounding.
3610 if ((a == 0.0) && (result == 0.0)) {
3611 return copysign(0.0, sign_prod);
3612 }
3613
3614 return result;
3615 }
3616
3617
3618 template <typename T>
FPDiv(T op1,T op2)3619 T Simulator::FPDiv(T op1, T op2) {
3620 // NaNs should be handled elsewhere.
3621 VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2));
3622
3623 if ((std::isinf(op1) && std::isinf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) {
3624 // inf / inf and 0.0 / 0.0 return the default NaN.
3625 FPProcessException();
3626 return FPDefaultNaN<T>();
3627 } else {
3628 if (op2 == 0.0) FPProcessException();
3629
3630 // Other cases should be handled by standard arithmetic.
3631 return op1 / op2;
3632 }
3633 }
3634
3635
3636 template <typename T>
FPSqrt(T op)3637 T Simulator::FPSqrt(T op) {
3638 if (std::isnan(op)) {
3639 return FPProcessNaN(op);
3640 } else if (op < 0.0) {
3641 FPProcessException();
3642 return FPDefaultNaN<T>();
3643 } else {
3644 return sqrt(op);
3645 }
3646 }
3647
3648
3649 template <typename T>
FPMax(T a,T b)3650 T Simulator::FPMax(T a, T b) {
3651 T result = FPProcessNaNs(a, b);
3652 if (std::isnan(result)) return result;
3653
3654 if ((a == 0.0) && (b == 0.0) &&
3655 (copysign(1.0, a) != copysign(1.0, b))) {
3656 // a and b are zero, and the sign differs: return +0.0.
3657 return 0.0;
3658 } else {
3659 return (a > b) ? a : b;
3660 }
3661 }
3662
3663
3664 template <typename T>
FPMaxNM(T a,T b)3665 T Simulator::FPMaxNM(T a, T b) {
3666 if (IsQuietNaN(a) && !IsQuietNaN(b)) {
3667 a = kFP64NegativeInfinity;
3668 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
3669 b = kFP64NegativeInfinity;
3670 }
3671
3672 T result = FPProcessNaNs(a, b);
3673 return std::isnan(result) ? result : FPMax(a, b);
3674 }
3675
3676
3677 template <typename T>
FPMin(T a,T b)3678 T Simulator::FPMin(T a, T b) {
3679 T result = FPProcessNaNs(a, b);
3680 if (std::isnan(result)) return result;
3681
3682 if ((a == 0.0) && (b == 0.0) &&
3683 (copysign(1.0, a) != copysign(1.0, b))) {
3684 // a and b are zero, and the sign differs: return -0.0.
3685 return -0.0;
3686 } else {
3687 return (a < b) ? a : b;
3688 }
3689 }
3690
3691
3692 template <typename T>
FPMinNM(T a,T b)3693 T Simulator::FPMinNM(T a, T b) {
3694 if (IsQuietNaN(a) && !IsQuietNaN(b)) {
3695 a = kFP64PositiveInfinity;
3696 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
3697 b = kFP64PositiveInfinity;
3698 }
3699
3700 T result = FPProcessNaNs(a, b);
3701 return std::isnan(result) ? result : FPMin(a, b);
3702 }
3703
3704
3705 template <typename T>
FPRecipStepFused(T op1,T op2)3706 T Simulator::FPRecipStepFused(T op1, T op2) {
3707 const T two = 2.0;
3708 if ((std::isinf(op1) && (op2 == 0.0))
3709 || ((op1 == 0.0) && (std::isinf(op2)))) {
3710 return two;
3711 } else if (std::isinf(op1) || std::isinf(op2)) {
3712 // Return +inf if signs match, otherwise -inf.
3713 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
3714 : kFP64NegativeInfinity;
3715 } else {
3716 return FusedMultiplyAdd(op1, op2, two);
3717 }
3718 }
3719
3720
3721 template <typename T>
FPRSqrtStepFused(T op1,T op2)3722 T Simulator::FPRSqrtStepFused(T op1, T op2) {
3723 const T one_point_five = 1.5;
3724 const T two = 2.0;
3725
3726 if ((std::isinf(op1) && (op2 == 0.0))
3727 || ((op1 == 0.0) && (std::isinf(op2)))) {
3728 return one_point_five;
3729 } else if (std::isinf(op1) || std::isinf(op2)) {
3730 // Return +inf if signs match, otherwise -inf.
3731 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
3732 : kFP64NegativeInfinity;
3733 } else {
3734 // The multiply-add-halve operation must be fully fused, so avoid interim
3735 // rounding by checking which operand can be losslessly divided by two
3736 // before doing the multiply-add.
3737 if (std::isnormal(op1 / two)) {
3738 return FusedMultiplyAdd(op1 / two, op2, one_point_five);
3739 } else if (std::isnormal(op2 / two)) {
3740 return FusedMultiplyAdd(op1, op2 / two, one_point_five);
3741 } else {
3742 // Neither operand is normal after halving: the result is dominated by
3743 // the addition term, so just return that.
3744 return one_point_five;
3745 }
3746 }
3747 }
3748
3749
FPRoundInt(double value,FPRounding round_mode)3750 double Simulator::FPRoundInt(double value, FPRounding round_mode) {
3751 if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
3752 (value == kFP64NegativeInfinity)) {
3753 return value;
3754 } else if (std::isnan(value)) {
3755 return FPProcessNaN(value);
3756 }
3757
3758 double int_result = std::floor(value);
3759 double error = value - int_result;
3760 switch (round_mode) {
3761 case FPTieAway: {
3762 // Take care of correctly handling the range ]-0.5, -0.0], which must
3763 // yield -0.0.
3764 if ((-0.5 < value) && (value < 0.0)) {
3765 int_result = -0.0;
3766
3767 } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) {
3768 // If the error is greater than 0.5, or is equal to 0.5 and the integer
3769 // result is positive, round up.
3770 int_result++;
3771 }
3772 break;
3773 }
3774 case FPTieEven: {
3775 // Take care of correctly handling the range [-0.5, -0.0], which must
3776 // yield -0.0.
3777 if ((-0.5 <= value) && (value < 0.0)) {
3778 int_result = -0.0;
3779
3780 // If the error is greater than 0.5, or is equal to 0.5 and the integer
3781 // result is odd, round up.
3782 } else if ((error > 0.5) ||
3783 ((error == 0.5) && (std::fmod(int_result, 2) != 0))) {
3784 int_result++;
3785 }
3786 break;
3787 }
3788 case FPZero: {
3789 // If value>0 then we take floor(value)
3790 // otherwise, ceil(value).
3791 if (value < 0) {
3792 int_result = ceil(value);
3793 }
3794 break;
3795 }
3796 case FPNegativeInfinity: {
3797 // We always use floor(value).
3798 break;
3799 }
3800 case FPPositiveInfinity: {
3801 // Take care of correctly handling the range ]-1.0, -0.0], which must
3802 // yield -0.0.
3803 if ((-1.0 < value) && (value < 0.0)) {
3804 int_result = -0.0;
3805
3806 // If the error is non-zero, round up.
3807 } else if (error > 0.0) {
3808 int_result++;
3809 }
3810 break;
3811 }
3812 default: VIXL_UNIMPLEMENTED();
3813 }
3814 return int_result;
3815 }
3816
3817
FPToInt32(double value,FPRounding rmode)3818 int32_t Simulator::FPToInt32(double value, FPRounding rmode) {
3819 value = FPRoundInt(value, rmode);
3820 if (value >= kWMaxInt) {
3821 return kWMaxInt;
3822 } else if (value < kWMinInt) {
3823 return kWMinInt;
3824 }
3825 return std::isnan(value) ? 0 : static_cast<int32_t>(value);
3826 }
3827
3828
FPToInt64(double value,FPRounding rmode)3829 int64_t Simulator::FPToInt64(double value, FPRounding rmode) {
3830 value = FPRoundInt(value, rmode);
3831 if (value >= kXMaxInt) {
3832 return kXMaxInt;
3833 } else if (value < kXMinInt) {
3834 return kXMinInt;
3835 }
3836 return std::isnan(value) ? 0 : static_cast<int64_t>(value);
3837 }
3838
3839
FPToUInt32(double value,FPRounding rmode)3840 uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) {
3841 value = FPRoundInt(value, rmode);
3842 if (value >= kWMaxUInt) {
3843 return kWMaxUInt;
3844 } else if (value < 0.0) {
3845 return 0;
3846 }
3847 return std::isnan(value) ? 0 : static_cast<uint32_t>(value);
3848 }
3849
3850
FPToUInt64(double value,FPRounding rmode)3851 uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) {
3852 value = FPRoundInt(value, rmode);
3853 if (value >= kXMaxUInt) {
3854 return kXMaxUInt;
3855 } else if (value < 0.0) {
3856 return 0;
3857 }
3858 return std::isnan(value) ? 0 : static_cast<uint64_t>(value);
3859 }
3860
3861
3862 #define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \
3863 template <typename T> \
3864 LogicVRegister Simulator::FN(VectorFormat vform, \
3865 LogicVRegister dst, \
3866 const LogicVRegister& src1, \
3867 const LogicVRegister& src2) { \
3868 dst.ClearForWrite(vform); \
3869 for (int i = 0; i < LaneCountFromFormat(vform); i++) { \
3870 T op1 = src1.Float<T>(i); \
3871 T op2 = src2.Float<T>(i); \
3872 T result; \
3873 if (PROCNAN) { \
3874 result = FPProcessNaNs(op1, op2); \
3875 if (!std::isnan(result)) { \
3876 result = OP(op1, op2); \
3877 } \
3878 } else { \
3879 result = OP(op1, op2); \
3880 } \
3881 dst.SetFloat(i, result); \
3882 } \
3883 return dst; \
3884 } \
3885 \
3886 LogicVRegister Simulator::FN(VectorFormat vform, \
3887 LogicVRegister dst, \
3888 const LogicVRegister& src1, \
3889 const LogicVRegister& src2) { \
3890 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { \
3891 FN<float>(vform, dst, src1, src2); \
3892 } else { \
3893 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); \
3894 FN<double>(vform, dst, src1, src2); \
3895 } \
3896 return dst; \
3897 }
NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)3898 NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)
3899 #undef DEFINE_NEON_FP_VECTOR_OP
3900
3901
3902 LogicVRegister Simulator::fnmul(VectorFormat vform,
3903 LogicVRegister dst,
3904 const LogicVRegister& src1,
3905 const LogicVRegister& src2) {
3906 SimVRegister temp;
3907 LogicVRegister product = fmul(vform, temp, src1, src2);
3908 return fneg(vform, dst, product);
3909 }
3910
3911
3912 template <typename T>
frecps(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3913 LogicVRegister Simulator::frecps(VectorFormat vform,
3914 LogicVRegister dst,
3915 const LogicVRegister& src1,
3916 const LogicVRegister& src2) {
3917 dst.ClearForWrite(vform);
3918 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3919 T op1 = -src1.Float<T>(i);
3920 T op2 = src2.Float<T>(i);
3921 T result = FPProcessNaNs(op1, op2);
3922 dst.SetFloat(i, std::isnan(result) ? result : FPRecipStepFused(op1, op2));
3923 }
3924 return dst;
3925 }
3926
3927
frecps(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3928 LogicVRegister Simulator::frecps(VectorFormat vform,
3929 LogicVRegister dst,
3930 const LogicVRegister& src1,
3931 const LogicVRegister& src2) {
3932 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
3933 frecps<float>(vform, dst, src1, src2);
3934 } else {
3935 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
3936 frecps<double>(vform, dst, src1, src2);
3937 }
3938 return dst;
3939 }
3940
3941
3942 template <typename T>
frsqrts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3943 LogicVRegister Simulator::frsqrts(VectorFormat vform,
3944 LogicVRegister dst,
3945 const LogicVRegister& src1,
3946 const LogicVRegister& src2) {
3947 dst.ClearForWrite(vform);
3948 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3949 T op1 = -src1.Float<T>(i);
3950 T op2 = src2.Float<T>(i);
3951 T result = FPProcessNaNs(op1, op2);
3952 dst.SetFloat(i, std::isnan(result) ? result : FPRSqrtStepFused(op1, op2));
3953 }
3954 return dst;
3955 }
3956
3957
frsqrts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3958 LogicVRegister Simulator::frsqrts(VectorFormat vform,
3959 LogicVRegister dst,
3960 const LogicVRegister& src1,
3961 const LogicVRegister& src2) {
3962 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
3963 frsqrts<float>(vform, dst, src1, src2);
3964 } else {
3965 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
3966 frsqrts<double>(vform, dst, src1, src2);
3967 }
3968 return dst;
3969 }
3970
3971
3972 template <typename T>
fcmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)3973 LogicVRegister Simulator::fcmp(VectorFormat vform,
3974 LogicVRegister dst,
3975 const LogicVRegister& src1,
3976 const LogicVRegister& src2,
3977 Condition cond) {
3978 dst.ClearForWrite(vform);
3979 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3980 bool result = false;
3981 T op1 = src1.Float<T>(i);
3982 T op2 = src2.Float<T>(i);
3983 T nan_result = FPProcessNaNs(op1, op2);
3984 if (!std::isnan(nan_result)) {
3985 switch (cond) {
3986 case eq: result = (op1 == op2); break;
3987 case ge: result = (op1 >= op2); break;
3988 case gt: result = (op1 > op2) ; break;
3989 case le: result = (op1 <= op2); break;
3990 case lt: result = (op1 < op2) ; break;
3991 default: VIXL_UNREACHABLE(); break;
3992 }
3993 }
3994 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
3995 }
3996 return dst;
3997 }
3998
3999
fcmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)4000 LogicVRegister Simulator::fcmp(VectorFormat vform,
4001 LogicVRegister dst,
4002 const LogicVRegister& src1,
4003 const LogicVRegister& src2,
4004 Condition cond) {
4005 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4006 fcmp<float>(vform, dst, src1, src2, cond);
4007 } else {
4008 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4009 fcmp<double>(vform, dst, src1, src2, cond);
4010 }
4011 return dst;
4012 }
4013
4014
fcmp_zero(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,Condition cond)4015 LogicVRegister Simulator::fcmp_zero(VectorFormat vform,
4016 LogicVRegister dst,
4017 const LogicVRegister& src,
4018 Condition cond) {
4019 SimVRegister temp;
4020 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4021 LogicVRegister zero_reg = dup_immediate(vform, temp, float_to_rawbits(0.0));
4022 fcmp<float>(vform, dst, src, zero_reg, cond);
4023 } else {
4024 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4025 LogicVRegister zero_reg = dup_immediate(vform, temp,
4026 double_to_rawbits(0.0));
4027 fcmp<double>(vform, dst, src, zero_reg, cond);
4028 }
4029 return dst;
4030 }
4031
4032
fabscmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)4033 LogicVRegister Simulator::fabscmp(VectorFormat vform,
4034 LogicVRegister dst,
4035 const LogicVRegister& src1,
4036 const LogicVRegister& src2,
4037 Condition cond) {
4038 SimVRegister temp1, temp2;
4039 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4040 LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1);
4041 LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2);
4042 fcmp<float>(vform, dst, abs_src1, abs_src2, cond);
4043 } else {
4044 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4045 LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1);
4046 LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2);
4047 fcmp<double>(vform, dst, abs_src1, abs_src2, cond);
4048 }
4049 return dst;
4050 }
4051
4052
4053 template <typename T>
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4054 LogicVRegister Simulator::fmla(VectorFormat vform,
4055 LogicVRegister dst,
4056 const LogicVRegister& src1,
4057 const LogicVRegister& src2) {
4058 dst.ClearForWrite(vform);
4059 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4060 T op1 = src1.Float<T>(i);
4061 T op2 = src2.Float<T>(i);
4062 T acc = dst.Float<T>(i);
4063 T result = FPMulAdd(acc, op1, op2);
4064 dst.SetFloat(i, result);
4065 }
4066 return dst;
4067 }
4068
4069
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4070 LogicVRegister Simulator::fmla(VectorFormat vform,
4071 LogicVRegister dst,
4072 const LogicVRegister& src1,
4073 const LogicVRegister& src2) {
4074 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4075 fmla<float>(vform, dst, src1, src2);
4076 } else {
4077 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4078 fmla<double>(vform, dst, src1, src2);
4079 }
4080 return dst;
4081 }
4082
4083
4084 template <typename T>
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4085 LogicVRegister Simulator::fmls(VectorFormat vform,
4086 LogicVRegister dst,
4087 const LogicVRegister& src1,
4088 const LogicVRegister& src2) {
4089 dst.ClearForWrite(vform);
4090 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4091 T op1 = -src1.Float<T>(i);
4092 T op2 = src2.Float<T>(i);
4093 T acc = dst.Float<T>(i);
4094 T result = FPMulAdd(acc, op1, op2);
4095 dst.SetFloat(i, result);
4096 }
4097 return dst;
4098 }
4099
4100
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4101 LogicVRegister Simulator::fmls(VectorFormat vform,
4102 LogicVRegister dst,
4103 const LogicVRegister& src1,
4104 const LogicVRegister& src2) {
4105 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4106 fmls<float>(vform, dst, src1, src2);
4107 } else {
4108 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4109 fmls<double>(vform, dst, src1, src2);
4110 }
4111 return dst;
4112 }
4113
4114
4115 template <typename T>
fneg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4116 LogicVRegister Simulator::fneg(VectorFormat vform,
4117 LogicVRegister dst,
4118 const LogicVRegister& src) {
4119 dst.ClearForWrite(vform);
4120 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4121 T op = src.Float<T>(i);
4122 op = -op;
4123 dst.SetFloat(i, op);
4124 }
4125 return dst;
4126 }
4127
4128
fneg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4129 LogicVRegister Simulator::fneg(VectorFormat vform,
4130 LogicVRegister dst,
4131 const LogicVRegister& src) {
4132 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4133 fneg<float>(vform, dst, src);
4134 } else {
4135 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4136 fneg<double>(vform, dst, src);
4137 }
4138 return dst;
4139 }
4140
4141
4142 template <typename T>
fabs_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4143 LogicVRegister Simulator::fabs_(VectorFormat vform,
4144 LogicVRegister dst,
4145 const LogicVRegister& src) {
4146 dst.ClearForWrite(vform);
4147 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4148 T op = src.Float<T>(i);
4149 if (copysign(1.0, op) < 0.0) {
4150 op = -op;
4151 }
4152 dst.SetFloat(i, op);
4153 }
4154 return dst;
4155 }
4156
4157
fabs_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4158 LogicVRegister Simulator::fabs_(VectorFormat vform,
4159 LogicVRegister dst,
4160 const LogicVRegister& src) {
4161 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4162 fabs_<float>(vform, dst, src);
4163 } else {
4164 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4165 fabs_<double>(vform, dst, src);
4166 }
4167 return dst;
4168 }
4169
4170
fabd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4171 LogicVRegister Simulator::fabd(VectorFormat vform,
4172 LogicVRegister dst,
4173 const LogicVRegister& src1,
4174 const LogicVRegister& src2) {
4175 SimVRegister temp;
4176 fsub(vform, temp, src1, src2);
4177 fabs_(vform, dst, temp);
4178 return dst;
4179 }
4180
4181
fsqrt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4182 LogicVRegister Simulator::fsqrt(VectorFormat vform,
4183 LogicVRegister dst,
4184 const LogicVRegister& src) {
4185 dst.ClearForWrite(vform);
4186 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4187 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4188 float result = FPSqrt(src.Float<float>(i));
4189 dst.SetFloat(i, result);
4190 }
4191 } else {
4192 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4193 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4194 double result = FPSqrt(src.Float<double>(i));
4195 dst.SetFloat(i, result);
4196 }
4197 }
4198 return dst;
4199 }
4200
4201
4202 #define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP) \
4203 LogicVRegister Simulator::FNP(VectorFormat vform, \
4204 LogicVRegister dst, \
4205 const LogicVRegister& src1, \
4206 const LogicVRegister& src2) { \
4207 SimVRegister temp1, temp2; \
4208 uzp1(vform, temp1, src1, src2); \
4209 uzp2(vform, temp2, src1, src2); \
4210 FN(vform, dst, temp1, temp2); \
4211 return dst; \
4212 } \
4213 \
4214 LogicVRegister Simulator::FNP(VectorFormat vform, \
4215 LogicVRegister dst, \
4216 const LogicVRegister& src) { \
4217 if (vform == kFormatS) { \
4218 float result = OP(src.Float<float>(0), src.Float<float>(1)); \
4219 dst.SetFloat(0, result); \
4220 } else { \
4221 VIXL_ASSERT(vform == kFormatD); \
4222 double result = OP(src.Float<double>(0), src.Float<double>(1)); \
4223 dst.SetFloat(0, result); \
4224 } \
4225 dst.ClearForWrite(vform); \
4226 return dst; \
4227 }
NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)4228 NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)
4229 #undef DEFINE_NEON_FP_PAIR_OP
4230
4231
4232 LogicVRegister Simulator::fminmaxv(VectorFormat vform,
4233 LogicVRegister dst,
4234 const LogicVRegister& src,
4235 FPMinMaxOp Op) {
4236 VIXL_ASSERT(vform == kFormat4S);
4237 USE(vform);
4238 float result1 = (this->*Op)(src.Float<float>(0), src.Float<float>(1));
4239 float result2 = (this->*Op)(src.Float<float>(2), src.Float<float>(3));
4240 float result = (this->*Op)(result1, result2);
4241 dst.ClearForWrite(kFormatS);
4242 dst.SetFloat<float>(0, result);
4243 return dst;
4244 }
4245
4246
fmaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4247 LogicVRegister Simulator::fmaxv(VectorFormat vform,
4248 LogicVRegister dst,
4249 const LogicVRegister& src) {
4250 return fminmaxv(vform, dst, src, &Simulator::FPMax);
4251 }
4252
4253
fminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4254 LogicVRegister Simulator::fminv(VectorFormat vform,
4255 LogicVRegister dst,
4256 const LogicVRegister& src) {
4257 return fminmaxv(vform, dst, src, &Simulator::FPMin);
4258 }
4259
4260
fmaxnmv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4261 LogicVRegister Simulator::fmaxnmv(VectorFormat vform,
4262 LogicVRegister dst,
4263 const LogicVRegister& src) {
4264 return fminmaxv(vform, dst, src, &Simulator::FPMaxNM);
4265 }
4266
4267
fminnmv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4268 LogicVRegister Simulator::fminnmv(VectorFormat vform,
4269 LogicVRegister dst,
4270 const LogicVRegister& src) {
4271 return fminmaxv(vform, dst, src, &Simulator::FPMinNM);
4272 }
4273
4274
fmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)4275 LogicVRegister Simulator::fmul(VectorFormat vform,
4276 LogicVRegister dst,
4277 const LogicVRegister& src1,
4278 const LogicVRegister& src2,
4279 int index) {
4280 dst.ClearForWrite(vform);
4281 SimVRegister temp;
4282 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4283 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
4284 fmul<float>(vform, dst, src1, index_reg);
4285
4286 } else {
4287 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4288 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
4289 fmul<double>(vform, dst, src1, index_reg);
4290 }
4291 return dst;
4292 }
4293
4294
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)4295 LogicVRegister Simulator::fmla(VectorFormat vform,
4296 LogicVRegister dst,
4297 const LogicVRegister& src1,
4298 const LogicVRegister& src2,
4299 int index) {
4300 dst.ClearForWrite(vform);
4301 SimVRegister temp;
4302 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4303 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
4304 fmla<float>(vform, dst, src1, index_reg);
4305
4306 } else {
4307 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4308 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
4309 fmla<double>(vform, dst, src1, index_reg);
4310 }
4311 return dst;
4312 }
4313
4314
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)4315 LogicVRegister Simulator::fmls(VectorFormat vform,
4316 LogicVRegister dst,
4317 const LogicVRegister& src1,
4318 const LogicVRegister& src2,
4319 int index) {
4320 dst.ClearForWrite(vform);
4321 SimVRegister temp;
4322 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4323 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
4324 fmls<float>(vform, dst, src1, index_reg);
4325
4326 } else {
4327 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4328 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
4329 fmls<double>(vform, dst, src1, index_reg);
4330 }
4331 return dst;
4332 }
4333
4334
fmulx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)4335 LogicVRegister Simulator::fmulx(VectorFormat vform,
4336 LogicVRegister dst,
4337 const LogicVRegister& src1,
4338 const LogicVRegister& src2,
4339 int index) {
4340 dst.ClearForWrite(vform);
4341 SimVRegister temp;
4342 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4343 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
4344 fmulx<float>(vform, dst, src1, index_reg);
4345
4346 } else {
4347 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4348 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
4349 fmulx<double>(vform, dst, src1, index_reg);
4350 }
4351 return dst;
4352 }
4353
4354
frint(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding rounding_mode,bool inexact_exception)4355 LogicVRegister Simulator::frint(VectorFormat vform,
4356 LogicVRegister dst,
4357 const LogicVRegister& src,
4358 FPRounding rounding_mode,
4359 bool inexact_exception) {
4360 dst.ClearForWrite(vform);
4361 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4362 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4363 float input = src.Float<float>(i);
4364 float rounded = FPRoundInt(input, rounding_mode);
4365 if (inexact_exception && !std::isnan(input) && (input != rounded)) {
4366 FPProcessException();
4367 }
4368 dst.SetFloat<float>(i, rounded);
4369 }
4370 } else {
4371 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4372 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4373 double input = src.Float<double>(i);
4374 double rounded = FPRoundInt(input, rounding_mode);
4375 if (inexact_exception && !std::isnan(input) && (input != rounded)) {
4376 FPProcessException();
4377 }
4378 dst.SetFloat<double>(i, rounded);
4379 }
4380 }
4381 return dst;
4382 }
4383
4384
fcvts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding rounding_mode,int fbits)4385 LogicVRegister Simulator::fcvts(VectorFormat vform,
4386 LogicVRegister dst,
4387 const LogicVRegister& src,
4388 FPRounding rounding_mode,
4389 int fbits) {
4390 dst.ClearForWrite(vform);
4391 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4392 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4393 float op = src.Float<float>(i) * std::pow(2.0f, fbits);
4394 dst.SetInt(vform, i, FPToInt32(op, rounding_mode));
4395 }
4396 } else {
4397 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4398 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4399 double op = src.Float<double>(i) * std::pow(2.0, fbits);
4400 dst.SetInt(vform, i, FPToInt64(op, rounding_mode));
4401 }
4402 }
4403 return dst;
4404 }
4405
4406
fcvtu(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding rounding_mode,int fbits)4407 LogicVRegister Simulator::fcvtu(VectorFormat vform,
4408 LogicVRegister dst,
4409 const LogicVRegister& src,
4410 FPRounding rounding_mode,
4411 int fbits) {
4412 dst.ClearForWrite(vform);
4413 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4414 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4415 float op = src.Float<float>(i) * std::pow(2.0f, fbits);
4416 dst.SetUint(vform, i, FPToUInt32(op, rounding_mode));
4417 }
4418 } else {
4419 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4420 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4421 double op = src.Float<double>(i) * std::pow(2.0, fbits);
4422 dst.SetUint(vform, i, FPToUInt64(op, rounding_mode));
4423 }
4424 }
4425 return dst;
4426 }
4427
4428
fcvtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4429 LogicVRegister Simulator::fcvtl(VectorFormat vform,
4430 LogicVRegister dst,
4431 const LogicVRegister& src) {
4432 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4433 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
4434 dst.SetFloat(i, FPToFloat(src.Float<float16>(i)));
4435 }
4436 } else {
4437 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4438 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
4439 dst.SetFloat(i, FPToDouble(src.Float<float>(i)));
4440 }
4441 }
4442 return dst;
4443 }
4444
4445
fcvtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4446 LogicVRegister Simulator::fcvtl2(VectorFormat vform,
4447 LogicVRegister dst,
4448 const LogicVRegister& src) {
4449 int lane_count = LaneCountFromFormat(vform);
4450 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4451 for (int i = 0; i < lane_count; i++) {
4452 dst.SetFloat(i, FPToFloat(src.Float<float16>(i + lane_count)));
4453 }
4454 } else {
4455 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4456 for (int i = 0; i < lane_count; i++) {
4457 dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count)));
4458 }
4459 }
4460 return dst;
4461 }
4462
4463
fcvtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4464 LogicVRegister Simulator::fcvtn(VectorFormat vform,
4465 LogicVRegister dst,
4466 const LogicVRegister& src) {
4467 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
4468 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4469 dst.SetFloat(i, FPToFloat16(src.Float<float>(i), FPTieEven));
4470 }
4471 } else {
4472 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
4473 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4474 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPTieEven));
4475 }
4476 }
4477 return dst;
4478 }
4479
4480
fcvtn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4481 LogicVRegister Simulator::fcvtn2(VectorFormat vform,
4482 LogicVRegister dst,
4483 const LogicVRegister& src) {
4484 int lane_count = LaneCountFromFormat(vform) / 2;
4485 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
4486 for (int i = lane_count - 1; i >= 0; i--) {
4487 dst.SetFloat(i + lane_count, FPToFloat16(src.Float<float>(i), FPTieEven));
4488 }
4489 } else {
4490 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
4491 for (int i = lane_count - 1; i >= 0; i--) {
4492 dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPTieEven));
4493 }
4494 }
4495 return dst;
4496 }
4497
4498
fcvtxn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4499 LogicVRegister Simulator::fcvtxn(VectorFormat vform,
4500 LogicVRegister dst,
4501 const LogicVRegister& src) {
4502 dst.ClearForWrite(vform);
4503 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
4504 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4505 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPRoundOdd));
4506 }
4507 return dst;
4508 }
4509
4510
fcvtxn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4511 LogicVRegister Simulator::fcvtxn2(VectorFormat vform,
4512 LogicVRegister dst,
4513 const LogicVRegister& src) {
4514 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
4515 int lane_count = LaneCountFromFormat(vform) / 2;
4516 for (int i = lane_count - 1; i >= 0; i--) {
4517 dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPRoundOdd));
4518 }
4519 return dst;
4520 }
4521
4522
4523 // Based on reference C function recip_sqrt_estimate from ARM ARM.
recip_sqrt_estimate(double a)4524 double Simulator::recip_sqrt_estimate(double a) {
4525 int q0, q1, s;
4526 double r;
4527 if (a < 0.5) {
4528 q0 = static_cast<int>(a * 512.0);
4529 r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0);
4530 } else {
4531 q1 = static_cast<int>(a * 256.0);
4532 r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0);
4533 }
4534 s = static_cast<int>(256.0 * r + 0.5);
4535 return static_cast<double>(s) / 256.0;
4536 }
4537
4538
Bits(uint64_t val,int start_bit,int end_bit)4539 static inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) {
4540 return unsigned_bitextract_64(start_bit, end_bit, val);
4541 }
4542
4543
4544 template <typename T>
FPRecipSqrtEstimate(T op)4545 T Simulator::FPRecipSqrtEstimate(T op) {
4546 if (std::isnan(op)) {
4547 return FPProcessNaN(op);
4548 } else if (op == 0.0) {
4549 if (copysign(1.0, op) < 0.0) {
4550 return kFP64NegativeInfinity;
4551 } else {
4552 return kFP64PositiveInfinity;
4553 }
4554 } else if (copysign(1.0, op) < 0.0) {
4555 FPProcessException();
4556 return FPDefaultNaN<T>();
4557 } else if (std::isinf(op)) {
4558 return 0.0;
4559 } else {
4560 uint64_t fraction;
4561 int exp, result_exp;
4562
4563 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
4564 exp = float_exp(op);
4565 fraction = float_mantissa(op);
4566 fraction <<= 29;
4567 } else {
4568 exp = double_exp(op);
4569 fraction = double_mantissa(op);
4570 }
4571
4572 if (exp == 0) {
4573 while (Bits(fraction, 51, 51) == 0) {
4574 fraction = Bits(fraction, 50, 0) << 1;
4575 exp -= 1;
4576 }
4577 fraction = Bits(fraction, 50, 0) << 1;
4578 }
4579
4580 double scaled;
4581 if (Bits(exp, 0, 0) == 0) {
4582 scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44);
4583 } else {
4584 scaled = double_pack(0, 1021, Bits(fraction, 51, 44) << 44);
4585 }
4586
4587 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
4588 result_exp = (380 - exp) / 2;
4589 } else {
4590 result_exp = (3068 - exp) / 2;
4591 }
4592
4593 uint64_t estimate = double_to_rawbits(recip_sqrt_estimate(scaled));
4594
4595 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
4596 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
4597 uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29));
4598 return float_pack(0, exp_bits, est_bits);
4599 } else {
4600 return double_pack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0));
4601 }
4602 }
4603 }
4604
4605
frsqrte(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4606 LogicVRegister Simulator::frsqrte(VectorFormat vform,
4607 LogicVRegister dst,
4608 const LogicVRegister& src) {
4609 dst.ClearForWrite(vform);
4610 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4611 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4612 float input = src.Float<float>(i);
4613 dst.SetFloat(i, FPRecipSqrtEstimate<float>(input));
4614 }
4615 } else {
4616 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4617 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4618 double input = src.Float<double>(i);
4619 dst.SetFloat(i, FPRecipSqrtEstimate<double>(input));
4620 }
4621 }
4622 return dst;
4623 }
4624
4625 template <typename T>
FPRecipEstimate(T op,FPRounding rounding)4626 T Simulator::FPRecipEstimate(T op, FPRounding rounding) {
4627 uint32_t sign;
4628
4629 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
4630 sign = float_sign(op);
4631 } else {
4632 sign = double_sign(op);
4633 }
4634
4635 if (std::isnan(op)) {
4636 return FPProcessNaN(op);
4637 } else if (std::isinf(op)) {
4638 return (sign == 1) ? -0.0 : 0.0;
4639 } else if (op == 0.0) {
4640 FPProcessException(); // FPExc_DivideByZero exception.
4641 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
4642 } else if (((sizeof(T) == sizeof(float)) && // NOLINT(runtime/sizeof)
4643 (std::fabs(op) < std::pow(2.0, -128.0))) ||
4644 ((sizeof(T) == sizeof(double)) && // NOLINT(runtime/sizeof)
4645 (std::fabs(op) < std::pow(2.0, -1024.0)))) {
4646 bool overflow_to_inf = false;
4647 switch (rounding) {
4648 case FPTieEven: overflow_to_inf = true; break;
4649 case FPPositiveInfinity: overflow_to_inf = (sign == 0); break;
4650 case FPNegativeInfinity: overflow_to_inf = (sign == 1); break;
4651 case FPZero: overflow_to_inf = false; break;
4652 default: break;
4653 }
4654 FPProcessException(); // FPExc_Overflow and FPExc_Inexact.
4655 if (overflow_to_inf) {
4656 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
4657 } else {
4658 // Return FPMaxNormal(sign).
4659 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
4660 return float_pack(sign, 0xfe, 0x07fffff);
4661 } else {
4662 return double_pack(sign, 0x7fe, 0x0fffffffffffffl);
4663 }
4664 }
4665 } else {
4666 uint64_t fraction;
4667 int exp, result_exp;
4668 uint32_t sign;
4669
4670 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
4671 sign = float_sign(op);
4672 exp = float_exp(op);
4673 fraction = float_mantissa(op);
4674 fraction <<= 29;
4675 } else {
4676 sign = double_sign(op);
4677 exp = double_exp(op);
4678 fraction = double_mantissa(op);
4679 }
4680
4681 if (exp == 0) {
4682 if (Bits(fraction, 51, 51) == 0) {
4683 exp -= 1;
4684 fraction = Bits(fraction, 49, 0) << 2;
4685 } else {
4686 fraction = Bits(fraction, 50, 0) << 1;
4687 }
4688 }
4689
4690 double scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44);
4691
4692 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
4693 result_exp = (253 - exp); // In range 253-254 = -1 to 253+1 = 254.
4694 } else {
4695 result_exp = (2045 - exp); // In range 2045-2046 = -1 to 2045+1 = 2046.
4696 }
4697
4698 double estimate = recip_estimate(scaled);
4699
4700 fraction = double_mantissa(estimate);
4701 if (result_exp == 0) {
4702 fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1);
4703 } else if (result_exp == -1) {
4704 fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2);
4705 result_exp = 0;
4706 }
4707 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
4708 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
4709 uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29));
4710 return float_pack(sign, exp_bits, frac_bits);
4711 } else {
4712 return double_pack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0));
4713 }
4714 }
4715 }
4716
4717
frecpe(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding round)4718 LogicVRegister Simulator::frecpe(VectorFormat vform,
4719 LogicVRegister dst,
4720 const LogicVRegister& src,
4721 FPRounding round) {
4722 dst.ClearForWrite(vform);
4723 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4724 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4725 float input = src.Float<float>(i);
4726 dst.SetFloat(i, FPRecipEstimate<float>(input, round));
4727 }
4728 } else {
4729 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4730 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4731 double input = src.Float<double>(i);
4732 dst.SetFloat(i, FPRecipEstimate<double>(input, round));
4733 }
4734 }
4735 return dst;
4736 }
4737
4738
ursqrte(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4739 LogicVRegister Simulator::ursqrte(VectorFormat vform,
4740 LogicVRegister dst,
4741 const LogicVRegister& src) {
4742 dst.ClearForWrite(vform);
4743 uint64_t operand;
4744 uint32_t result;
4745 double dp_operand, dp_result;
4746 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4747 operand = src.Uint(vform, i);
4748 if (operand <= 0x3FFFFFFF) {
4749 result = 0xFFFFFFFF;
4750 } else {
4751 dp_operand = operand * std::pow(2.0, -32);
4752 dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31);
4753 result = static_cast<uint32_t>(dp_result);
4754 }
4755 dst.SetUint(vform, i, result);
4756 }
4757 return dst;
4758 }
4759
4760
4761 // Based on reference C function recip_estimate from ARM ARM.
recip_estimate(double a)4762 double Simulator::recip_estimate(double a) {
4763 int q, s;
4764 double r;
4765 q = static_cast<int>(a * 512.0);
4766 r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0);
4767 s = static_cast<int>(256.0 * r + 0.5);
4768 return static_cast<double>(s) / 256.0;
4769 }
4770
4771
urecpe(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4772 LogicVRegister Simulator::urecpe(VectorFormat vform,
4773 LogicVRegister dst,
4774 const LogicVRegister& src) {
4775 dst.ClearForWrite(vform);
4776 uint64_t operand;
4777 uint32_t result;
4778 double dp_operand, dp_result;
4779 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4780 operand = src.Uint(vform, i);
4781 if (operand <= 0x7FFFFFFF) {
4782 result = 0xFFFFFFFF;
4783 } else {
4784 dp_operand = operand * std::pow(2.0, -32);
4785 dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31);
4786 result = static_cast<uint32_t>(dp_result);
4787 }
4788 dst.SetUint(vform, i, result);
4789 }
4790 return dst;
4791 }
4792
4793 template <typename T>
frecpx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4794 LogicVRegister Simulator::frecpx(VectorFormat vform,
4795 LogicVRegister dst,
4796 const LogicVRegister& src) {
4797 dst.ClearForWrite(vform);
4798 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4799 T op = src.Float<T>(i);
4800 T result;
4801 if (std::isnan(op)) {
4802 result = FPProcessNaN(op);
4803 } else {
4804 int exp;
4805 uint32_t sign;
4806 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
4807 sign = float_sign(op);
4808 exp = float_exp(op);
4809 exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0));
4810 result = float_pack(sign, exp, 0);
4811 } else {
4812 sign = double_sign(op);
4813 exp = double_exp(op);
4814 exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0));
4815 result = double_pack(sign, exp, 0);
4816 }
4817 }
4818 dst.SetFloat(i, result);
4819 }
4820 return dst;
4821 }
4822
4823
frecpx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4824 LogicVRegister Simulator::frecpx(VectorFormat vform,
4825 LogicVRegister dst,
4826 const LogicVRegister& src) {
4827 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4828 frecpx<float>(vform, dst, src);
4829 } else {
4830 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4831 frecpx<double>(vform, dst, src);
4832 }
4833 return dst;
4834 }
4835
scvtf(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int fbits,FPRounding round)4836 LogicVRegister Simulator::scvtf(VectorFormat vform,
4837 LogicVRegister dst,
4838 const LogicVRegister& src,
4839 int fbits,
4840 FPRounding round) {
4841 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4842 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4843 float result = FixedToFloat(src.Int(kFormatS, i), fbits, round);
4844 dst.SetFloat<float>(i, result);
4845 } else {
4846 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4847 double result = FixedToDouble(src.Int(kFormatD, i), fbits, round);
4848 dst.SetFloat<double>(i, result);
4849 }
4850 }
4851 return dst;
4852 }
4853
4854
ucvtf(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int fbits,FPRounding round)4855 LogicVRegister Simulator::ucvtf(VectorFormat vform,
4856 LogicVRegister dst,
4857 const LogicVRegister& src,
4858 int fbits,
4859 FPRounding round) {
4860 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4861 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4862 float result = UFixedToFloat(src.Uint(kFormatS, i), fbits, round);
4863 dst.SetFloat<float>(i, result);
4864 } else {
4865 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4866 double result = UFixedToDouble(src.Uint(kFormatD, i), fbits, round);
4867 dst.SetFloat<double>(i, result);
4868 }
4869 }
4870 return dst;
4871 }
4872
4873
4874 } // namespace vixl
4875