1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/execution/arm64/simulator-arm64.h"
6 
7 #if defined(USE_SIMULATOR)
8 
9 #include <cmath>
10 
11 namespace v8 {
12 namespace internal {
13 
14 namespace {
15 
16 // See FPRound for a description of this function.
FPRoundToDouble(int64_t sign,int64_t exponent,uint64_t mantissa,FPRounding round_mode)17 inline double FPRoundToDouble(int64_t sign, int64_t exponent, uint64_t mantissa,
18                               FPRounding round_mode) {
19   uint64_t bits = FPRound<uint64_t, kDoubleExponentBits, kDoubleMantissaBits>(
20       sign, exponent, mantissa, round_mode);
21   return bit_cast<double>(bits);
22 }
23 
24 // See FPRound for a description of this function.
FPRoundToFloat(int64_t sign,int64_t exponent,uint64_t mantissa,FPRounding round_mode)25 inline float FPRoundToFloat(int64_t sign, int64_t exponent, uint64_t mantissa,
26                             FPRounding round_mode) {
27   uint32_t bits = FPRound<uint32_t, kFloatExponentBits, kFloatMantissaBits>(
28       sign, exponent, mantissa, round_mode);
29   return bit_cast<float>(bits);
30 }
31 
32 // See FPRound for a description of this function.
FPRoundToFloat16(int64_t sign,int64_t exponent,uint64_t mantissa,FPRounding round_mode)33 inline float16 FPRoundToFloat16(int64_t sign, int64_t exponent,
34                                 uint64_t mantissa, FPRounding round_mode) {
35   return FPRound<float16, kFloat16ExponentBits, kFloat16MantissaBits>(
36       sign, exponent, mantissa, round_mode);
37 }
38 
39 }  // namespace
40 
FixedToDouble(int64_t src,int fbits,FPRounding round)41 double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) {
42   if (src >= 0) {
43     return UFixedToDouble(src, fbits, round);
44   } else if (src == INT64_MIN) {
45     return -UFixedToDouble(src, fbits, round);
46   } else {
47     return -UFixedToDouble(-src, fbits, round);
48   }
49 }
50 
UFixedToDouble(uint64_t src,int fbits,FPRounding round)51 double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) {
52   // An input of 0 is a special case because the result is effectively
53   // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
54   if (src == 0) {
55     return 0.0;
56   }
57 
58   // Calculate the exponent. The highest significant bit will have the value
59   // 2^exponent.
60   const int highest_significant_bit = 63 - CountLeadingZeros(src, 64);
61   const int64_t exponent = highest_significant_bit - fbits;
62 
63   return FPRoundToDouble(0, exponent, src, round);
64 }
65 
FixedToFloat(int64_t src,int fbits,FPRounding round)66 float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) {
67   if (src >= 0) {
68     return UFixedToFloat(src, fbits, round);
69   } else if (src == INT64_MIN) {
70     return -UFixedToFloat(src, fbits, round);
71   } else {
72     return -UFixedToFloat(-src, fbits, round);
73   }
74 }
75 
UFixedToFloat(uint64_t src,int fbits,FPRounding round)76 float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) {
77   // An input of 0 is a special case because the result is effectively
78   // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
79   if (src == 0) {
80     return 0.0f;
81   }
82 
83   // Calculate the exponent. The highest significant bit will have the value
84   // 2^exponent.
85   const int highest_significant_bit = 63 - CountLeadingZeros(src, 64);
86   const int32_t exponent = highest_significant_bit - fbits;
87 
88   return FPRoundToFloat(0, exponent, src, round);
89 }
90 
FPToDouble(float value)91 double Simulator::FPToDouble(float value) {
92   switch (std::fpclassify(value)) {
93     case FP_NAN: {
94       if (IsSignallingNaN(value)) {
95         FPProcessException();
96       }
97       if (DN()) return kFP64DefaultNaN;
98 
99       // Convert NaNs as the processor would:
100       //  - The sign is propagated.
101       //  - The mantissa is transferred entirely, except that the top bit is
102       //    forced to '1', making the result a quiet NaN. The unused (low-order)
103       //    mantissa bits are set to 0.
104       uint32_t raw = bit_cast<uint32_t>(value);
105 
106       uint64_t sign = raw >> 31;
107       uint64_t exponent = (1 << kDoubleExponentBits) - 1;
108       uint64_t mantissa = unsigned_bitextract_64(21, 0, raw);
109 
110       // Unused low-order bits remain zero.
111       mantissa <<= (kDoubleMantissaBits - kFloatMantissaBits);
112 
113       // Force a quiet NaN.
114       mantissa |= (UINT64_C(1) << (kDoubleMantissaBits - 1));
115 
116       return double_pack(sign, exponent, mantissa);
117     }
118 
119     case FP_ZERO:
120     case FP_NORMAL:
121     case FP_SUBNORMAL:
122     case FP_INFINITE: {
123       // All other inputs are preserved in a standard cast, because every value
124       // representable using an IEEE-754 float is also representable using an
125       // IEEE-754 double.
126       return static_cast<double>(value);
127     }
128   }
129 
130   UNREACHABLE();
131 }
132 
FPToFloat(float16 value)133 float Simulator::FPToFloat(float16 value) {
134   uint32_t sign = value >> 15;
135   uint32_t exponent =
136       unsigned_bitextract_32(kFloat16MantissaBits + kFloat16ExponentBits - 1,
137                              kFloat16MantissaBits, value);
138   uint32_t mantissa =
139       unsigned_bitextract_32(kFloat16MantissaBits - 1, 0, value);
140 
141   switch (float16classify(value)) {
142     case FP_ZERO:
143       return (sign == 0) ? 0.0f : -0.0f;
144 
145     case FP_INFINITE:
146       return (sign == 0) ? kFP32PositiveInfinity : kFP32NegativeInfinity;
147 
148     case FP_SUBNORMAL: {
149       // Calculate shift required to put mantissa into the most-significant bits
150       // of the destination mantissa.
151       int shift = CountLeadingZeros(mantissa << (32 - 10), 32);
152 
153       // Shift mantissa and discard implicit '1'.
154       mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits) + shift + 1;
155       mantissa &= (1 << kFloatMantissaBits) - 1;
156 
157       // Adjust the exponent for the shift applied, and rebias.
158       exponent = exponent - shift + (kFloatExponentBias - kFloat16ExponentBias);
159       break;
160     }
161 
162     case FP_NAN: {
163       if (IsSignallingNaN(value)) {
164         FPProcessException();
165       }
166       if (DN()) return kFP32DefaultNaN;
167 
168       // Convert NaNs as the processor would:
169       //  - The sign is propagated.
170       //  - The mantissa is transferred entirely, except that the top bit is
171       //    forced to '1', making the result a quiet NaN. The unused (low-order)
172       //    mantissa bits are set to 0.
173       exponent = (1 << kFloatExponentBits) - 1;
174 
175       // Increase bits in mantissa, making low-order bits 0.
176       mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
177       mantissa |= 1 << (kFloatMantissaBits - 1);  // Force a quiet NaN.
178       break;
179     }
180 
181     case FP_NORMAL: {
182       // Increase bits in mantissa, making low-order bits 0.
183       mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
184 
185       // Change exponent bias.
186       exponent += (kFloatExponentBias - kFloat16ExponentBias);
187       break;
188     }
189 
190     default:
191       UNREACHABLE();
192   }
193   return float_pack(sign, exponent, mantissa);
194 }
195 
FPToFloat16(float value,FPRounding round_mode)196 float16 Simulator::FPToFloat16(float value, FPRounding round_mode) {
197   // Only the FPTieEven rounding mode is implemented.
198   DCHECK_EQ(round_mode, FPTieEven);
199   USE(round_mode);
200 
201   int64_t sign = float_sign(value);
202   int64_t exponent =
203       static_cast<int64_t>(float_exp(value)) - kFloatExponentBias;
204   uint32_t mantissa = float_mantissa(value);
205 
206   switch (std::fpclassify(value)) {
207     case FP_NAN: {
208       if (IsSignallingNaN(value)) {
209         FPProcessException();
210       }
211       if (DN()) return kFP16DefaultNaN;
212 
213       // Convert NaNs as the processor would:
214       //  - The sign is propagated.
215       //  - The mantissa is transferred as much as possible, except that the top
216       //    bit is forced to '1', making the result a quiet NaN.
217       float16 result =
218           (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
219       result |= mantissa >> (kFloatMantissaBits - kFloat16MantissaBits);
220       result |= (1 << (kFloat16MantissaBits - 1));  // Force a quiet NaN;
221       return result;
222     }
223 
224     case FP_ZERO:
225       return (sign == 0) ? 0 : 0x8000;
226 
227     case FP_INFINITE:
228       return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
229 
230     case FP_NORMAL:
231     case FP_SUBNORMAL: {
232       // Convert float-to-half as the processor would, assuming that FPCR.FZ
233       // (flush-to-zero) is not set.
234 
235       // Add the implicit '1' bit to the mantissa.
236       mantissa += (1 << kFloatMantissaBits);
237       return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
238     }
239   }
240 
241   UNREACHABLE();
242 }
243 
FPToFloat16(double value,FPRounding round_mode)244 float16 Simulator::FPToFloat16(double value, FPRounding round_mode) {
245   // Only the FPTieEven rounding mode is implemented.
246   DCHECK_EQ(round_mode, FPTieEven);
247   USE(round_mode);
248 
249   int64_t sign = double_sign(value);
250   int64_t exponent =
251       static_cast<int64_t>(double_exp(value)) - kDoubleExponentBias;
252   uint64_t mantissa = double_mantissa(value);
253 
254   switch (std::fpclassify(value)) {
255     case FP_NAN: {
256       if (IsSignallingNaN(value)) {
257         FPProcessException();
258       }
259       if (DN()) return kFP16DefaultNaN;
260 
261       // Convert NaNs as the processor would:
262       //  - The sign is propagated.
263       //  - The mantissa is transferred as much as possible, except that the top
264       //    bit is forced to '1', making the result a quiet NaN.
265       float16 result =
266           (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
267       result |= mantissa >> (kDoubleMantissaBits - kFloat16MantissaBits);
268       result |= (1 << (kFloat16MantissaBits - 1));  // Force a quiet NaN;
269       return result;
270     }
271 
272     case FP_ZERO:
273       return (sign == 0) ? 0 : 0x8000;
274 
275     case FP_INFINITE:
276       return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
277 
278     case FP_NORMAL:
279     case FP_SUBNORMAL: {
280       // Convert double-to-half as the processor would, assuming that FPCR.FZ
281       // (flush-to-zero) is not set.
282 
283       // Add the implicit '1' bit to the mantissa.
284       mantissa += (UINT64_C(1) << kDoubleMantissaBits);
285       return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
286     }
287   }
288 
289   UNREACHABLE();
290 }
291 
FPToFloat(double value,FPRounding round_mode)292 float Simulator::FPToFloat(double value, FPRounding round_mode) {
293   // Only the FPTieEven rounding mode is implemented.
294   DCHECK((round_mode == FPTieEven) || (round_mode == FPRoundOdd));
295   USE(round_mode);
296 
297   switch (std::fpclassify(value)) {
298     case FP_NAN: {
299       if (IsSignallingNaN(value)) {
300         FPProcessException();
301       }
302       if (DN()) return kFP32DefaultNaN;
303 
304       // Convert NaNs as the processor would:
305       //  - The sign is propagated.
306       //  - The mantissa is transferred as much as possible, except that the
307       //    top bit is forced to '1', making the result a quiet NaN.
308 
309       uint64_t raw = bit_cast<uint64_t>(value);
310 
311       uint32_t sign = raw >> 63;
312       uint32_t exponent = (1 << 8) - 1;
313       uint32_t mantissa = static_cast<uint32_t>(unsigned_bitextract_64(
314           50, kDoubleMantissaBits - kFloatMantissaBits, raw));
315       mantissa |= (1 << (kFloatMantissaBits - 1));  // Force a quiet NaN.
316 
317       return float_pack(sign, exponent, mantissa);
318     }
319 
320     case FP_ZERO:
321     case FP_INFINITE: {
322       // In a C++ cast, any value representable in the target type will be
323       // unchanged. This is always the case for +/-0.0 and infinities.
324       return static_cast<float>(value);
325     }
326 
327     case FP_NORMAL:
328     case FP_SUBNORMAL: {
329       // Convert double-to-float as the processor would, assuming that FPCR.FZ
330       // (flush-to-zero) is not set.
331       uint32_t sign = double_sign(value);
332       int64_t exponent =
333           static_cast<int64_t>(double_exp(value)) - kDoubleExponentBias;
334       uint64_t mantissa = double_mantissa(value);
335       if (std::fpclassify(value) == FP_NORMAL) {
336         // For normal FP values, add the hidden bit.
337         mantissa |= (UINT64_C(1) << kDoubleMantissaBits);
338       }
339       return FPRoundToFloat(sign, exponent, mantissa, round_mode);
340     }
341   }
342 
343   UNREACHABLE();
344 }
345 
ld1(VectorFormat vform,LogicVRegister dst,uint64_t addr)346 void Simulator::ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
347   dst.ClearForWrite(vform);
348   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
349     dst.ReadUintFromMem(vform, i, addr);
350     addr += LaneSizeInBytesFromFormat(vform);
351   }
352 }
353 
ld1(VectorFormat vform,LogicVRegister dst,int index,uint64_t addr)354 void Simulator::ld1(VectorFormat vform, LogicVRegister dst, int index,
355                     uint64_t addr) {
356   dst.ReadUintFromMem(vform, index, addr);
357 }
358 
ld1r(VectorFormat vform,LogicVRegister dst,uint64_t addr)359 void Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
360   dst.ClearForWrite(vform);
361   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
362     dst.ReadUintFromMem(vform, i, addr);
363   }
364 }
365 
ld2(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,uint64_t addr1)366 void Simulator::ld2(VectorFormat vform, LogicVRegister dst1,
367                     LogicVRegister dst2, uint64_t addr1) {
368   dst1.ClearForWrite(vform);
369   dst2.ClearForWrite(vform);
370   int esize = LaneSizeInBytesFromFormat(vform);
371   uint64_t addr2 = addr1 + esize;
372   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
373     dst1.ReadUintFromMem(vform, i, addr1);
374     dst2.ReadUintFromMem(vform, i, addr2);
375     addr1 += 2 * esize;
376     addr2 += 2 * esize;
377   }
378 }
379 
ld2(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,int index,uint64_t addr1)380 void Simulator::ld2(VectorFormat vform, LogicVRegister dst1,
381                     LogicVRegister dst2, int index, uint64_t addr1) {
382   dst1.ClearForWrite(vform);
383   dst2.ClearForWrite(vform);
384   uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
385   dst1.ReadUintFromMem(vform, index, addr1);
386   dst2.ReadUintFromMem(vform, index, addr2);
387 }
388 
ld2r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,uint64_t addr)389 void Simulator::ld2r(VectorFormat vform, LogicVRegister dst1,
390                      LogicVRegister dst2, uint64_t addr) {
391   dst1.ClearForWrite(vform);
392   dst2.ClearForWrite(vform);
393   uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
394   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
395     dst1.ReadUintFromMem(vform, i, addr);
396     dst2.ReadUintFromMem(vform, i, addr2);
397   }
398 }
399 
ld3(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,uint64_t addr1)400 void Simulator::ld3(VectorFormat vform, LogicVRegister dst1,
401                     LogicVRegister dst2, LogicVRegister dst3, uint64_t addr1) {
402   dst1.ClearForWrite(vform);
403   dst2.ClearForWrite(vform);
404   dst3.ClearForWrite(vform);
405   int esize = LaneSizeInBytesFromFormat(vform);
406   uint64_t addr2 = addr1 + esize;
407   uint64_t addr3 = addr2 + esize;
408   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
409     dst1.ReadUintFromMem(vform, i, addr1);
410     dst2.ReadUintFromMem(vform, i, addr2);
411     dst3.ReadUintFromMem(vform, i, addr3);
412     addr1 += 3 * esize;
413     addr2 += 3 * esize;
414     addr3 += 3 * esize;
415   }
416 }
417 
ld3(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,int index,uint64_t addr1)418 void Simulator::ld3(VectorFormat vform, LogicVRegister dst1,
419                     LogicVRegister dst2, LogicVRegister dst3, int index,
420                     uint64_t addr1) {
421   dst1.ClearForWrite(vform);
422   dst2.ClearForWrite(vform);
423   dst3.ClearForWrite(vform);
424   uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
425   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
426   dst1.ReadUintFromMem(vform, index, addr1);
427   dst2.ReadUintFromMem(vform, index, addr2);
428   dst3.ReadUintFromMem(vform, index, addr3);
429 }
430 
ld3r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,uint64_t addr)431 void Simulator::ld3r(VectorFormat vform, LogicVRegister dst1,
432                      LogicVRegister dst2, LogicVRegister dst3, uint64_t addr) {
433   dst1.ClearForWrite(vform);
434   dst2.ClearForWrite(vform);
435   dst3.ClearForWrite(vform);
436   uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
437   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
438   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
439     dst1.ReadUintFromMem(vform, i, addr);
440     dst2.ReadUintFromMem(vform, i, addr2);
441     dst3.ReadUintFromMem(vform, i, addr3);
442   }
443 }
444 
ld4(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,uint64_t addr1)445 void Simulator::ld4(VectorFormat vform, LogicVRegister dst1,
446                     LogicVRegister dst2, LogicVRegister dst3,
447                     LogicVRegister dst4, uint64_t addr1) {
448   dst1.ClearForWrite(vform);
449   dst2.ClearForWrite(vform);
450   dst3.ClearForWrite(vform);
451   dst4.ClearForWrite(vform);
452   int esize = LaneSizeInBytesFromFormat(vform);
453   uint64_t addr2 = addr1 + esize;
454   uint64_t addr3 = addr2 + esize;
455   uint64_t addr4 = addr3 + esize;
456   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
457     dst1.ReadUintFromMem(vform, i, addr1);
458     dst2.ReadUintFromMem(vform, i, addr2);
459     dst3.ReadUintFromMem(vform, i, addr3);
460     dst4.ReadUintFromMem(vform, i, addr4);
461     addr1 += 4 * esize;
462     addr2 += 4 * esize;
463     addr3 += 4 * esize;
464     addr4 += 4 * esize;
465   }
466 }
467 
ld4(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,int index,uint64_t addr1)468 void Simulator::ld4(VectorFormat vform, LogicVRegister dst1,
469                     LogicVRegister dst2, LogicVRegister dst3,
470                     LogicVRegister dst4, int index, uint64_t addr1) {
471   dst1.ClearForWrite(vform);
472   dst2.ClearForWrite(vform);
473   dst3.ClearForWrite(vform);
474   dst4.ClearForWrite(vform);
475   uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
476   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
477   uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
478   dst1.ReadUintFromMem(vform, index, addr1);
479   dst2.ReadUintFromMem(vform, index, addr2);
480   dst3.ReadUintFromMem(vform, index, addr3);
481   dst4.ReadUintFromMem(vform, index, addr4);
482 }
483 
ld4r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,uint64_t addr)484 void Simulator::ld4r(VectorFormat vform, LogicVRegister dst1,
485                      LogicVRegister dst2, LogicVRegister dst3,
486                      LogicVRegister dst4, uint64_t addr) {
487   dst1.ClearForWrite(vform);
488   dst2.ClearForWrite(vform);
489   dst3.ClearForWrite(vform);
490   dst4.ClearForWrite(vform);
491   uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
492   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
493   uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
494   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
495     dst1.ReadUintFromMem(vform, i, addr);
496     dst2.ReadUintFromMem(vform, i, addr2);
497     dst3.ReadUintFromMem(vform, i, addr3);
498     dst4.ReadUintFromMem(vform, i, addr4);
499   }
500 }
501 
st1(VectorFormat vform,LogicVRegister src,uint64_t addr)502 void Simulator::st1(VectorFormat vform, LogicVRegister src, uint64_t addr) {
503   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
504     src.WriteUintToMem(vform, i, addr);
505     addr += LaneSizeInBytesFromFormat(vform);
506   }
507 }
508 
st1(VectorFormat vform,LogicVRegister src,int index,uint64_t addr)509 void Simulator::st1(VectorFormat vform, LogicVRegister src, int index,
510                     uint64_t addr) {
511   src.WriteUintToMem(vform, index, addr);
512 }
513 
st2(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,uint64_t addr)514 void Simulator::st2(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
515                     uint64_t addr) {
516   int esize = LaneSizeInBytesFromFormat(vform);
517   uint64_t addr2 = addr + esize;
518   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
519     dst.WriteUintToMem(vform, i, addr);
520     dst2.WriteUintToMem(vform, i, addr2);
521     addr += 2 * esize;
522     addr2 += 2 * esize;
523   }
524 }
525 
st2(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,int index,uint64_t addr)526 void Simulator::st2(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
527                     int index, uint64_t addr) {
528   int esize = LaneSizeInBytesFromFormat(vform);
529   dst.WriteUintToMem(vform, index, addr);
530   dst2.WriteUintToMem(vform, index, addr + 1 * esize);
531 }
532 
st3(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,LogicVRegister dst3,uint64_t addr)533 void Simulator::st3(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
534                     LogicVRegister dst3, uint64_t addr) {
535   int esize = LaneSizeInBytesFromFormat(vform);
536   uint64_t addr2 = addr + esize;
537   uint64_t addr3 = addr2 + esize;
538   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
539     dst.WriteUintToMem(vform, i, addr);
540     dst2.WriteUintToMem(vform, i, addr2);
541     dst3.WriteUintToMem(vform, i, addr3);
542     addr += 3 * esize;
543     addr2 += 3 * esize;
544     addr3 += 3 * esize;
545   }
546 }
547 
st3(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,LogicVRegister dst3,int index,uint64_t addr)548 void Simulator::st3(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
549                     LogicVRegister dst3, int index, uint64_t addr) {
550   int esize = LaneSizeInBytesFromFormat(vform);
551   dst.WriteUintToMem(vform, index, addr);
552   dst2.WriteUintToMem(vform, index, addr + 1 * esize);
553   dst3.WriteUintToMem(vform, index, addr + 2 * esize);
554 }
555 
st4(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,uint64_t addr)556 void Simulator::st4(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
557                     LogicVRegister dst3, LogicVRegister dst4, uint64_t addr) {
558   int esize = LaneSizeInBytesFromFormat(vform);
559   uint64_t addr2 = addr + esize;
560   uint64_t addr3 = addr2 + esize;
561   uint64_t addr4 = addr3 + esize;
562   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
563     dst.WriteUintToMem(vform, i, addr);
564     dst2.WriteUintToMem(vform, i, addr2);
565     dst3.WriteUintToMem(vform, i, addr3);
566     dst4.WriteUintToMem(vform, i, addr4);
567     addr += 4 * esize;
568     addr2 += 4 * esize;
569     addr3 += 4 * esize;
570     addr4 += 4 * esize;
571   }
572 }
573 
st4(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,int index,uint64_t addr)574 void Simulator::st4(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
575                     LogicVRegister dst3, LogicVRegister dst4, int index,
576                     uint64_t addr) {
577   int esize = LaneSizeInBytesFromFormat(vform);
578   dst.WriteUintToMem(vform, index, addr);
579   dst2.WriteUintToMem(vform, index, addr + 1 * esize);
580   dst3.WriteUintToMem(vform, index, addr + 2 * esize);
581   dst4.WriteUintToMem(vform, index, addr + 3 * esize);
582 }
583 
cmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)584 LogicVRegister Simulator::cmp(VectorFormat vform, LogicVRegister dst,
585                               const LogicVRegister& src1,
586                               const LogicVRegister& src2, Condition cond) {
587   dst.ClearForWrite(vform);
588   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
589     int64_t sa = src1.Int(vform, i);
590     int64_t sb = src2.Int(vform, i);
591     uint64_t ua = src1.Uint(vform, i);
592     uint64_t ub = src2.Uint(vform, i);
593     bool result = false;
594     switch (cond) {
595       case eq:
596         result = (ua == ub);
597         break;
598       case ge:
599         result = (sa >= sb);
600         break;
601       case gt:
602         result = (sa > sb);
603         break;
604       case hi:
605         result = (ua > ub);
606         break;
607       case hs:
608         result = (ua >= ub);
609         break;
610       case lt:
611         result = (sa < sb);
612         break;
613       case le:
614         result = (sa <= sb);
615         break;
616       default:
617         UNREACHABLE();
618     }
619     dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
620   }
621   return dst;
622 }
623 
cmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,int imm,Condition cond)624 LogicVRegister Simulator::cmp(VectorFormat vform, LogicVRegister dst,
625                               const LogicVRegister& src1, int imm,
626                               Condition cond) {
627   SimVRegister temp;
628   LogicVRegister imm_reg = dup_immediate(vform, temp, imm);
629   return cmp(vform, dst, src1, imm_reg, cond);
630 }
631 
cmptst(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)632 LogicVRegister Simulator::cmptst(VectorFormat vform, LogicVRegister dst,
633                                  const LogicVRegister& src1,
634                                  const LogicVRegister& src2) {
635   dst.ClearForWrite(vform);
636   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
637     uint64_t ua = src1.Uint(vform, i);
638     uint64_t ub = src2.Uint(vform, i);
639     dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0);
640   }
641   return dst;
642 }
643 
add(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)644 LogicVRegister Simulator::add(VectorFormat vform, LogicVRegister dst,
645                               const LogicVRegister& src1,
646                               const LogicVRegister& src2) {
647   int lane_size = LaneSizeInBitsFromFormat(vform);
648   dst.ClearForWrite(vform);
649   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
650     // Test for unsigned saturation.
651     uint64_t ua = src1.UintLeftJustified(vform, i);
652     uint64_t ub = src2.UintLeftJustified(vform, i);
653     uint64_t ur = ua + ub;
654     if (ur < ua) {
655       dst.SetUnsignedSat(i, true);
656     }
657 
658     // Test for signed saturation.
659     bool pos_a = (ua >> 63) == 0;
660     bool pos_b = (ub >> 63) == 0;
661     bool pos_r = (ur >> 63) == 0;
662     // If the signs of the operands are the same, but different from the result,
663     // there was an overflow.
664     if ((pos_a == pos_b) && (pos_a != pos_r)) {
665       dst.SetSignedSat(i, pos_a);
666     }
667 
668     dst.SetInt(vform, i, ur >> (64 - lane_size));
669   }
670   return dst;
671 }
672 
addp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)673 LogicVRegister Simulator::addp(VectorFormat vform, LogicVRegister dst,
674                                const LogicVRegister& src1,
675                                const LogicVRegister& src2) {
676   SimVRegister temp1, temp2;
677   uzp1(vform, temp1, src1, src2);
678   uzp2(vform, temp2, src1, src2);
679   add(vform, dst, temp1, temp2);
680   return dst;
681 }
682 
mla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)683 LogicVRegister Simulator::mla(VectorFormat vform, LogicVRegister dst,
684                               const LogicVRegister& src1,
685                               const LogicVRegister& src2) {
686   SimVRegister temp;
687   mul(vform, temp, src1, src2);
688   add(vform, dst, dst, temp);
689   return dst;
690 }
691 
mls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)692 LogicVRegister Simulator::mls(VectorFormat vform, LogicVRegister dst,
693                               const LogicVRegister& src1,
694                               const LogicVRegister& src2) {
695   SimVRegister temp;
696   mul(vform, temp, src1, src2);
697   sub(vform, dst, dst, temp);
698   return dst;
699 }
700 
mul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)701 LogicVRegister Simulator::mul(VectorFormat vform, LogicVRegister dst,
702                               const LogicVRegister& src1,
703                               const LogicVRegister& src2) {
704   dst.ClearForWrite(vform);
705   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
706     dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i));
707   }
708   return dst;
709 }
710 
mul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)711 LogicVRegister Simulator::mul(VectorFormat vform, LogicVRegister dst,
712                               const LogicVRegister& src1,
713                               const LogicVRegister& src2, int index) {
714   SimVRegister temp;
715   VectorFormat indexform = VectorFormatFillQ(vform);
716   return mul(vform, dst, src1, dup_element(indexform, temp, src2, index));
717 }
718 
mla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)719 LogicVRegister Simulator::mla(VectorFormat vform, LogicVRegister dst,
720                               const LogicVRegister& src1,
721                               const LogicVRegister& src2, int index) {
722   SimVRegister temp;
723   VectorFormat indexform = VectorFormatFillQ(vform);
724   return mla(vform, dst, src1, dup_element(indexform, temp, src2, index));
725 }
726 
mls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)727 LogicVRegister Simulator::mls(VectorFormat vform, LogicVRegister dst,
728                               const LogicVRegister& src1,
729                               const LogicVRegister& src2, int index) {
730   SimVRegister temp;
731   VectorFormat indexform = VectorFormatFillQ(vform);
732   return mls(vform, dst, src1, dup_element(indexform, temp, src2, index));
733 }
734 
smull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)735 LogicVRegister Simulator::smull(VectorFormat vform, LogicVRegister dst,
736                                 const LogicVRegister& src1,
737                                 const LogicVRegister& src2, int index) {
738   SimVRegister temp;
739   VectorFormat indexform =
740       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
741   return smull(vform, dst, src1, dup_element(indexform, temp, src2, index));
742 }
743 
smull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)744 LogicVRegister Simulator::smull2(VectorFormat vform, LogicVRegister dst,
745                                  const LogicVRegister& src1,
746                                  const LogicVRegister& src2, int index) {
747   SimVRegister temp;
748   VectorFormat indexform =
749       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
750   return smull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
751 }
752 
umull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)753 LogicVRegister Simulator::umull(VectorFormat vform, LogicVRegister dst,
754                                 const LogicVRegister& src1,
755                                 const LogicVRegister& src2, int index) {
756   SimVRegister temp;
757   VectorFormat indexform =
758       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
759   return umull(vform, dst, src1, dup_element(indexform, temp, src2, index));
760 }
761 
umull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)762 LogicVRegister Simulator::umull2(VectorFormat vform, LogicVRegister dst,
763                                  const LogicVRegister& src1,
764                                  const LogicVRegister& src2, int index) {
765   SimVRegister temp;
766   VectorFormat indexform =
767       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
768   return umull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
769 }
770 
smlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)771 LogicVRegister Simulator::smlal(VectorFormat vform, LogicVRegister dst,
772                                 const LogicVRegister& src1,
773                                 const LogicVRegister& src2, int index) {
774   SimVRegister temp;
775   VectorFormat indexform =
776       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
777   return smlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
778 }
779 
smlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)780 LogicVRegister Simulator::smlal2(VectorFormat vform, LogicVRegister dst,
781                                  const LogicVRegister& src1,
782                                  const LogicVRegister& src2, int index) {
783   SimVRegister temp;
784   VectorFormat indexform =
785       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
786   return smlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
787 }
788 
umlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)789 LogicVRegister Simulator::umlal(VectorFormat vform, LogicVRegister dst,
790                                 const LogicVRegister& src1,
791                                 const LogicVRegister& src2, int index) {
792   SimVRegister temp;
793   VectorFormat indexform =
794       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
795   return umlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
796 }
797 
umlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)798 LogicVRegister Simulator::umlal2(VectorFormat vform, LogicVRegister dst,
799                                  const LogicVRegister& src1,
800                                  const LogicVRegister& src2, int index) {
801   SimVRegister temp;
802   VectorFormat indexform =
803       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
804   return umlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
805 }
806 
smlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)807 LogicVRegister Simulator::smlsl(VectorFormat vform, LogicVRegister dst,
808                                 const LogicVRegister& src1,
809                                 const LogicVRegister& src2, int index) {
810   SimVRegister temp;
811   VectorFormat indexform =
812       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
813   return smlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
814 }
815 
smlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)816 LogicVRegister Simulator::smlsl2(VectorFormat vform, LogicVRegister dst,
817                                  const LogicVRegister& src1,
818                                  const LogicVRegister& src2, int index) {
819   SimVRegister temp;
820   VectorFormat indexform =
821       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
822   return smlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
823 }
824 
umlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)825 LogicVRegister Simulator::umlsl(VectorFormat vform, LogicVRegister dst,
826                                 const LogicVRegister& src1,
827                                 const LogicVRegister& src2, int index) {
828   SimVRegister temp;
829   VectorFormat indexform =
830       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
831   return umlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
832 }
833 
umlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)834 LogicVRegister Simulator::umlsl2(VectorFormat vform, LogicVRegister dst,
835                                  const LogicVRegister& src1,
836                                  const LogicVRegister& src2, int index) {
837   SimVRegister temp;
838   VectorFormat indexform =
839       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
840   return umlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
841 }
842 
sqdmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)843 LogicVRegister Simulator::sqdmull(VectorFormat vform, LogicVRegister dst,
844                                   const LogicVRegister& src1,
845                                   const LogicVRegister& src2, int index) {
846   SimVRegister temp;
847   VectorFormat indexform =
848       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
849   return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index));
850 }
851 
sqdmull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)852 LogicVRegister Simulator::sqdmull2(VectorFormat vform, LogicVRegister dst,
853                                    const LogicVRegister& src1,
854                                    const LogicVRegister& src2, int index) {
855   SimVRegister temp;
856   VectorFormat indexform =
857       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
858   return sqdmull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
859 }
860 
sqdmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)861 LogicVRegister Simulator::sqdmlal(VectorFormat vform, LogicVRegister dst,
862                                   const LogicVRegister& src1,
863                                   const LogicVRegister& src2, int index) {
864   SimVRegister temp;
865   VectorFormat indexform =
866       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
867   return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
868 }
869 
sqdmlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)870 LogicVRegister Simulator::sqdmlal2(VectorFormat vform, LogicVRegister dst,
871                                    const LogicVRegister& src1,
872                                    const LogicVRegister& src2, int index) {
873   SimVRegister temp;
874   VectorFormat indexform =
875       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
876   return sqdmlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
877 }
878 
sqdmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)879 LogicVRegister Simulator::sqdmlsl(VectorFormat vform, LogicVRegister dst,
880                                   const LogicVRegister& src1,
881                                   const LogicVRegister& src2, int index) {
882   SimVRegister temp;
883   VectorFormat indexform =
884       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
885   return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
886 }
887 
sqdmlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)888 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, LogicVRegister dst,
889                                    const LogicVRegister& src1,
890                                    const LogicVRegister& src2, int index) {
891   SimVRegister temp;
892   VectorFormat indexform =
893       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
894   return sqdmlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
895 }
896 
sqdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)897 LogicVRegister Simulator::sqdmulh(VectorFormat vform, LogicVRegister dst,
898                                   const LogicVRegister& src1,
899                                   const LogicVRegister& src2, int index) {
900   SimVRegister temp;
901   VectorFormat indexform = VectorFormatFillQ(vform);
902   return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
903 }
904 
sqrdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)905 LogicVRegister Simulator::sqrdmulh(VectorFormat vform, LogicVRegister dst,
906                                    const LogicVRegister& src1,
907                                    const LogicVRegister& src2, int index) {
908   SimVRegister temp;
909   VectorFormat indexform = VectorFormatFillQ(vform);
910   return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
911 }
912 
PolynomialMult(uint8_t op1,uint8_t op2)913 uint16_t Simulator::PolynomialMult(uint8_t op1, uint8_t op2) {
914   uint16_t result = 0;
915   uint16_t extended_op2 = op2;
916   for (int i = 0; i < 8; ++i) {
917     if ((op1 >> i) & 1) {
918       result = result ^ (extended_op2 << i);
919     }
920   }
921   return result;
922 }
923 
pmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)924 LogicVRegister Simulator::pmul(VectorFormat vform, LogicVRegister dst,
925                                const LogicVRegister& src1,
926                                const LogicVRegister& src2) {
927   dst.ClearForWrite(vform);
928   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
929     dst.SetUint(vform, i,
930                 PolynomialMult(src1.Uint(vform, i), src2.Uint(vform, i)));
931   }
932   return dst;
933 }
934 
pmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)935 LogicVRegister Simulator::pmull(VectorFormat vform, LogicVRegister dst,
936                                 const LogicVRegister& src1,
937                                 const LogicVRegister& src2) {
938   VectorFormat vform_src = VectorFormatHalfWidth(vform);
939   dst.ClearForWrite(vform);
940   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
941     dst.SetUint(
942         vform, i,
943         PolynomialMult(src1.Uint(vform_src, i), src2.Uint(vform_src, i)));
944   }
945   return dst;
946 }
947 
pmull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)948 LogicVRegister Simulator::pmull2(VectorFormat vform, LogicVRegister dst,
949                                  const LogicVRegister& src1,
950                                  const LogicVRegister& src2) {
951   VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform);
952   dst.ClearForWrite(vform);
953   int lane_count = LaneCountFromFormat(vform);
954   for (int i = 0; i < lane_count; i++) {
955     dst.SetUint(vform, i,
956                 PolynomialMult(src1.Uint(vform_src, lane_count + i),
957                                src2.Uint(vform_src, lane_count + i)));
958   }
959   return dst;
960 }
961 
sub(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)962 LogicVRegister Simulator::sub(VectorFormat vform, LogicVRegister dst,
963                               const LogicVRegister& src1,
964                               const LogicVRegister& src2) {
965   int lane_size = LaneSizeInBitsFromFormat(vform);
966   dst.ClearForWrite(vform);
967   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
968     // Test for unsigned saturation.
969     uint64_t ua = src1.UintLeftJustified(vform, i);
970     uint64_t ub = src2.UintLeftJustified(vform, i);
971     uint64_t ur = ua - ub;
972     if (ub > ua) {
973       dst.SetUnsignedSat(i, false);
974     }
975 
976     // Test for signed saturation.
977     bool pos_a = (ua >> 63) == 0;
978     bool pos_b = (ub >> 63) == 0;
979     bool pos_r = (ur >> 63) == 0;
980     // If the signs of the operands are different, and the sign of the first
981     // operand doesn't match the result, there was an overflow.
982     if ((pos_a != pos_b) && (pos_a != pos_r)) {
983       dst.SetSignedSat(i, pos_a);
984     }
985 
986     dst.SetInt(vform, i, ur >> (64 - lane_size));
987   }
988   return dst;
989 }
990 
and_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)991 LogicVRegister Simulator::and_(VectorFormat vform, LogicVRegister dst,
992                                const LogicVRegister& src1,
993                                const LogicVRegister& src2) {
994   dst.ClearForWrite(vform);
995   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
996     dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i));
997   }
998   return dst;
999 }
1000 
orr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1001 LogicVRegister Simulator::orr(VectorFormat vform, LogicVRegister dst,
1002                               const LogicVRegister& src1,
1003                               const LogicVRegister& src2) {
1004   dst.ClearForWrite(vform);
1005   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1006     dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i));
1007   }
1008   return dst;
1009 }
1010 
orn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1011 LogicVRegister Simulator::orn(VectorFormat vform, LogicVRegister dst,
1012                               const LogicVRegister& src1,
1013                               const LogicVRegister& src2) {
1014   dst.ClearForWrite(vform);
1015   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1016     dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i));
1017   }
1018   return dst;
1019 }
1020 
eor(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1021 LogicVRegister Simulator::eor(VectorFormat vform, LogicVRegister dst,
1022                               const LogicVRegister& src1,
1023                               const LogicVRegister& src2) {
1024   dst.ClearForWrite(vform);
1025   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1026     dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i));
1027   }
1028   return dst;
1029 }
1030 
bic(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1031 LogicVRegister Simulator::bic(VectorFormat vform, LogicVRegister dst,
1032                               const LogicVRegister& src1,
1033                               const LogicVRegister& src2) {
1034   dst.ClearForWrite(vform);
1035   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1036     dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i));
1037   }
1038   return dst;
1039 }
1040 
bic(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,uint64_t imm)1041 LogicVRegister Simulator::bic(VectorFormat vform, LogicVRegister dst,
1042                               const LogicVRegister& src, uint64_t imm) {
1043   uint64_t result[16];
1044   int laneCount = LaneCountFromFormat(vform);
1045   for (int i = 0; i < laneCount; ++i) {
1046     result[i] = src.Uint(vform, i) & ~imm;
1047   }
1048   dst.SetUintArray(vform, result);
1049   return dst;
1050 }
1051 
bif(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1052 LogicVRegister Simulator::bif(VectorFormat vform, LogicVRegister dst,
1053                               const LogicVRegister& src1,
1054                               const LogicVRegister& src2) {
1055   dst.ClearForWrite(vform);
1056   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1057     uint64_t operand1 = dst.Uint(vform, i);
1058     uint64_t operand2 = ~src2.Uint(vform, i);
1059     uint64_t operand3 = src1.Uint(vform, i);
1060     uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1061     dst.SetUint(vform, i, result);
1062   }
1063   return dst;
1064 }
1065 
bit(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1066 LogicVRegister Simulator::bit(VectorFormat vform, LogicVRegister dst,
1067                               const LogicVRegister& src1,
1068                               const LogicVRegister& src2) {
1069   dst.ClearForWrite(vform);
1070   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1071     uint64_t operand1 = dst.Uint(vform, i);
1072     uint64_t operand2 = src2.Uint(vform, i);
1073     uint64_t operand3 = src1.Uint(vform, i);
1074     uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1075     dst.SetUint(vform, i, result);
1076   }
1077   return dst;
1078 }
1079 
bsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1080 LogicVRegister Simulator::bsl(VectorFormat vform, LogicVRegister dst,
1081                               const LogicVRegister& src1,
1082                               const LogicVRegister& src2) {
1083   dst.ClearForWrite(vform);
1084   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1085     uint64_t operand1 = src2.Uint(vform, i);
1086     uint64_t operand2 = dst.Uint(vform, i);
1087     uint64_t operand3 = src1.Uint(vform, i);
1088     uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1089     dst.SetUint(vform, i, result);
1090   }
1091   return dst;
1092 }
1093 
SMinMax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1094 LogicVRegister Simulator::SMinMax(VectorFormat vform, LogicVRegister dst,
1095                                   const LogicVRegister& src1,
1096                                   const LogicVRegister& src2, bool max) {
1097   dst.ClearForWrite(vform);
1098   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1099     int64_t src1_val = src1.Int(vform, i);
1100     int64_t src2_val = src2.Int(vform, i);
1101     int64_t dst_val;
1102     if (max) {
1103       dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1104     } else {
1105       dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1106     }
1107     dst.SetInt(vform, i, dst_val);
1108   }
1109   return dst;
1110 }
1111 
smax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1112 LogicVRegister Simulator::smax(VectorFormat vform, LogicVRegister dst,
1113                                const LogicVRegister& src1,
1114                                const LogicVRegister& src2) {
1115   return SMinMax(vform, dst, src1, src2, true);
1116 }
1117 
smin(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1118 LogicVRegister Simulator::smin(VectorFormat vform, LogicVRegister dst,
1119                                const LogicVRegister& src1,
1120                                const LogicVRegister& src2) {
1121   return SMinMax(vform, dst, src1, src2, false);
1122 }
1123 
SMinMaxP(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1124 LogicVRegister Simulator::SMinMaxP(VectorFormat vform, LogicVRegister dst,
1125                                    const LogicVRegister& src1,
1126                                    const LogicVRegister& src2, bool max) {
1127   int lanes = LaneCountFromFormat(vform);
1128   int64_t result[kMaxLanesPerVector];
1129   const LogicVRegister* src = &src1;
1130   for (int j = 0; j < 2; j++) {
1131     for (int i = 0; i < lanes; i += 2) {
1132       int64_t first_val = src->Int(vform, i);
1133       int64_t second_val = src->Int(vform, i + 1);
1134       int64_t dst_val;
1135       if (max) {
1136         dst_val = (first_val > second_val) ? first_val : second_val;
1137       } else {
1138         dst_val = (first_val < second_val) ? first_val : second_val;
1139       }
1140       DCHECK_LT((i >> 1) + (j * lanes / 2), kMaxLanesPerVector);
1141       result[(i >> 1) + (j * lanes / 2)] = dst_val;
1142     }
1143     src = &src2;
1144   }
1145   dst.SetIntArray(vform, result);
1146   return dst;
1147 }
1148 
smaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1149 LogicVRegister Simulator::smaxp(VectorFormat vform, LogicVRegister dst,
1150                                 const LogicVRegister& src1,
1151                                 const LogicVRegister& src2) {
1152   return SMinMaxP(vform, dst, src1, src2, true);
1153 }
1154 
sminp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1155 LogicVRegister Simulator::sminp(VectorFormat vform, LogicVRegister dst,
1156                                 const LogicVRegister& src1,
1157                                 const LogicVRegister& src2) {
1158   return SMinMaxP(vform, dst, src1, src2, false);
1159 }
1160 
addp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1161 LogicVRegister Simulator::addp(VectorFormat vform, LogicVRegister dst,
1162                                const LogicVRegister& src) {
1163   DCHECK_EQ(vform, kFormatD);
1164 
1165   uint64_t dst_val = src.Uint(kFormat2D, 0) + src.Uint(kFormat2D, 1);
1166   dst.ClearForWrite(vform);
1167   dst.SetUint(vform, 0, dst_val);
1168   return dst;
1169 }
1170 
addv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1171 LogicVRegister Simulator::addv(VectorFormat vform, LogicVRegister dst,
1172                                const LogicVRegister& src) {
1173   VectorFormat vform_dst =
1174       ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
1175 
1176   int64_t dst_val = 0;
1177   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1178     dst_val += src.Int(vform, i);
1179   }
1180 
1181   dst.ClearForWrite(vform_dst);
1182   dst.SetInt(vform_dst, 0, dst_val);
1183   return dst;
1184 }
1185 
saddlv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1186 LogicVRegister Simulator::saddlv(VectorFormat vform, LogicVRegister dst,
1187                                  const LogicVRegister& src) {
1188   VectorFormat vform_dst =
1189       ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1190 
1191   int64_t dst_val = 0;
1192   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1193     dst_val += src.Int(vform, i);
1194   }
1195 
1196   dst.ClearForWrite(vform_dst);
1197   dst.SetInt(vform_dst, 0, dst_val);
1198   return dst;
1199 }
1200 
uaddlv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1201 LogicVRegister Simulator::uaddlv(VectorFormat vform, LogicVRegister dst,
1202                                  const LogicVRegister& src) {
1203   VectorFormat vform_dst =
1204       ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1205 
1206   uint64_t dst_val = 0;
1207   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1208     dst_val += src.Uint(vform, i);
1209   }
1210 
1211   dst.ClearForWrite(vform_dst);
1212   dst.SetUint(vform_dst, 0, dst_val);
1213   return dst;
1214 }
1215 
SMinMaxV(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool max)1216 LogicVRegister Simulator::SMinMaxV(VectorFormat vform, LogicVRegister dst,
1217                                    const LogicVRegister& src, bool max) {
1218   int64_t dst_val = max ? INT64_MIN : INT64_MAX;
1219   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1220     int64_t src_val = src.Int(vform, i);
1221     if (max) {
1222       dst_val = (src_val > dst_val) ? src_val : dst_val;
1223     } else {
1224       dst_val = (src_val < dst_val) ? src_val : dst_val;
1225     }
1226   }
1227   dst.ClearForWrite(ScalarFormatFromFormat(vform));
1228   dst.SetInt(vform, 0, dst_val);
1229   return dst;
1230 }
1231 
smaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1232 LogicVRegister Simulator::smaxv(VectorFormat vform, LogicVRegister dst,
1233                                 const LogicVRegister& src) {
1234   SMinMaxV(vform, dst, src, true);
1235   return dst;
1236 }
1237 
sminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1238 LogicVRegister Simulator::sminv(VectorFormat vform, LogicVRegister dst,
1239                                 const LogicVRegister& src) {
1240   SMinMaxV(vform, dst, src, false);
1241   return dst;
1242 }
1243 
UMinMax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1244 LogicVRegister Simulator::UMinMax(VectorFormat vform, LogicVRegister dst,
1245                                   const LogicVRegister& src1,
1246                                   const LogicVRegister& src2, bool max) {
1247   dst.ClearForWrite(vform);
1248   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1249     uint64_t src1_val = src1.Uint(vform, i);
1250     uint64_t src2_val = src2.Uint(vform, i);
1251     uint64_t dst_val;
1252     if (max) {
1253       dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1254     } else {
1255       dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1256     }
1257     dst.SetUint(vform, i, dst_val);
1258   }
1259   return dst;
1260 }
1261 
umax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1262 LogicVRegister Simulator::umax(VectorFormat vform, LogicVRegister dst,
1263                                const LogicVRegister& src1,
1264                                const LogicVRegister& src2) {
1265   return UMinMax(vform, dst, src1, src2, true);
1266 }
1267 
umin(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1268 LogicVRegister Simulator::umin(VectorFormat vform, LogicVRegister dst,
1269                                const LogicVRegister& src1,
1270                                const LogicVRegister& src2) {
1271   return UMinMax(vform, dst, src1, src2, false);
1272 }
1273 
UMinMaxP(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1274 LogicVRegister Simulator::UMinMaxP(VectorFormat vform, LogicVRegister dst,
1275                                    const LogicVRegister& src1,
1276                                    const LogicVRegister& src2, bool max) {
1277   int lanes = LaneCountFromFormat(vform);
1278   uint64_t result[kMaxLanesPerVector];
1279   const LogicVRegister* src = &src1;
1280   for (int j = 0; j < 2; j++) {
1281     for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {
1282       uint64_t first_val = src->Uint(vform, i);
1283       uint64_t second_val = src->Uint(vform, i + 1);
1284       uint64_t dst_val;
1285       if (max) {
1286         dst_val = (first_val > second_val) ? first_val : second_val;
1287       } else {
1288         dst_val = (first_val < second_val) ? first_val : second_val;
1289       }
1290       DCHECK_LT((i >> 1) + (j * lanes / 2), kMaxLanesPerVector);
1291       result[(i >> 1) + (j * lanes / 2)] = dst_val;
1292     }
1293     src = &src2;
1294   }
1295   dst.SetUintArray(vform, result);
1296   return dst;
1297 }
1298 
umaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1299 LogicVRegister Simulator::umaxp(VectorFormat vform, LogicVRegister dst,
1300                                 const LogicVRegister& src1,
1301                                 const LogicVRegister& src2) {
1302   return UMinMaxP(vform, dst, src1, src2, true);
1303 }
1304 
uminp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1305 LogicVRegister Simulator::uminp(VectorFormat vform, LogicVRegister dst,
1306                                 const LogicVRegister& src1,
1307                                 const LogicVRegister& src2) {
1308   return UMinMaxP(vform, dst, src1, src2, false);
1309 }
1310 
UMinMaxV(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool max)1311 LogicVRegister Simulator::UMinMaxV(VectorFormat vform, LogicVRegister dst,
1312                                    const LogicVRegister& src, bool max) {
1313   uint64_t dst_val = max ? 0 : UINT64_MAX;
1314   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1315     uint64_t src_val = src.Uint(vform, i);
1316     if (max) {
1317       dst_val = (src_val > dst_val) ? src_val : dst_val;
1318     } else {
1319       dst_val = (src_val < dst_val) ? src_val : dst_val;
1320     }
1321   }
1322   dst.ClearForWrite(ScalarFormatFromFormat(vform));
1323   dst.SetUint(vform, 0, dst_val);
1324   return dst;
1325 }
1326 
umaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1327 LogicVRegister Simulator::umaxv(VectorFormat vform, LogicVRegister dst,
1328                                 const LogicVRegister& src) {
1329   UMinMaxV(vform, dst, src, true);
1330   return dst;
1331 }
1332 
uminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1333 LogicVRegister Simulator::uminv(VectorFormat vform, LogicVRegister dst,
1334                                 const LogicVRegister& src) {
1335   UMinMaxV(vform, dst, src, false);
1336   return dst;
1337 }
1338 
shl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1339 LogicVRegister Simulator::shl(VectorFormat vform, LogicVRegister dst,
1340                               const LogicVRegister& src, int shift) {
1341   DCHECK_GE(shift, 0);
1342   SimVRegister temp;
1343   LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1344   return ushl(vform, dst, src, shiftreg);
1345 }
1346 
sshll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1347 LogicVRegister Simulator::sshll(VectorFormat vform, LogicVRegister dst,
1348                                 const LogicVRegister& src, int shift) {
1349   DCHECK_GE(shift, 0);
1350   SimVRegister temp1, temp2;
1351   LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1352   LogicVRegister extendedreg = sxtl(vform, temp2, src);
1353   return sshl(vform, dst, extendedreg, shiftreg);
1354 }
1355 
sshll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1356 LogicVRegister Simulator::sshll2(VectorFormat vform, LogicVRegister dst,
1357                                  const LogicVRegister& src, int shift) {
1358   DCHECK_GE(shift, 0);
1359   SimVRegister temp1, temp2;
1360   LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1361   LogicVRegister extendedreg = sxtl2(vform, temp2, src);
1362   return sshl(vform, dst, extendedreg, shiftreg);
1363 }
1364 
shll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1365 LogicVRegister Simulator::shll(VectorFormat vform, LogicVRegister dst,
1366                                const LogicVRegister& src) {
1367   int shift = LaneSizeInBitsFromFormat(vform) / 2;
1368   return sshll(vform, dst, src, shift);
1369 }
1370 
shll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1371 LogicVRegister Simulator::shll2(VectorFormat vform, LogicVRegister dst,
1372                                 const LogicVRegister& src) {
1373   int shift = LaneSizeInBitsFromFormat(vform) / 2;
1374   return sshll2(vform, dst, src, shift);
1375 }
1376 
ushll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1377 LogicVRegister Simulator::ushll(VectorFormat vform, LogicVRegister dst,
1378                                 const LogicVRegister& src, int shift) {
1379   DCHECK_GE(shift, 0);
1380   SimVRegister temp1, temp2;
1381   LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1382   LogicVRegister extendedreg = uxtl(vform, temp2, src);
1383   return ushl(vform, dst, extendedreg, shiftreg);
1384 }
1385 
ushll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1386 LogicVRegister Simulator::ushll2(VectorFormat vform, LogicVRegister dst,
1387                                  const LogicVRegister& src, int shift) {
1388   DCHECK_GE(shift, 0);
1389   SimVRegister temp1, temp2;
1390   LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1391   LogicVRegister extendedreg = uxtl2(vform, temp2, src);
1392   return ushl(vform, dst, extendedreg, shiftreg);
1393 }
1394 
sli(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1395 LogicVRegister Simulator::sli(VectorFormat vform, LogicVRegister dst,
1396                               const LogicVRegister& src, int shift) {
1397   dst.ClearForWrite(vform);
1398   int laneCount = LaneCountFromFormat(vform);
1399   for (int i = 0; i < laneCount; i++) {
1400     uint64_t src_lane = src.Uint(vform, i);
1401     uint64_t dst_lane = dst.Uint(vform, i);
1402     uint64_t shifted = src_lane << shift;
1403     uint64_t mask = MaxUintFromFormat(vform) << shift;
1404     dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1405   }
1406   return dst;
1407 }
1408 
sqshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1409 LogicVRegister Simulator::sqshl(VectorFormat vform, LogicVRegister dst,
1410                                 const LogicVRegister& src, int shift) {
1411   DCHECK_GE(shift, 0);
1412   SimVRegister temp;
1413   LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1414   return sshl(vform, dst, src, shiftreg).SignedSaturate(vform);
1415 }
1416 
uqshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1417 LogicVRegister Simulator::uqshl(VectorFormat vform, LogicVRegister dst,
1418                                 const LogicVRegister& src, int shift) {
1419   DCHECK_GE(shift, 0);
1420   SimVRegister temp;
1421   LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1422   return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1423 }
1424 
sqshlu(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1425 LogicVRegister Simulator::sqshlu(VectorFormat vform, LogicVRegister dst,
1426                                  const LogicVRegister& src, int shift) {
1427   DCHECK_GE(shift, 0);
1428   SimVRegister temp;
1429   LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1430   return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1431 }
1432 
sri(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1433 LogicVRegister Simulator::sri(VectorFormat vform, LogicVRegister dst,
1434                               const LogicVRegister& src, int shift) {
1435   dst.ClearForWrite(vform);
1436   int laneCount = LaneCountFromFormat(vform);
1437   DCHECK((shift > 0) &&
1438          (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform))));
1439   for (int i = 0; i < laneCount; i++) {
1440     uint64_t src_lane = src.Uint(vform, i);
1441     uint64_t dst_lane = dst.Uint(vform, i);
1442     uint64_t shifted;
1443     uint64_t mask;
1444     if (shift == 64) {
1445       shifted = 0;
1446       mask = 0;
1447     } else {
1448       shifted = src_lane >> shift;
1449       mask = MaxUintFromFormat(vform) >> shift;
1450     }
1451     dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1452   }
1453   return dst;
1454 }
1455 
ushr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1456 LogicVRegister Simulator::ushr(VectorFormat vform, LogicVRegister dst,
1457                                const LogicVRegister& src, int shift) {
1458   DCHECK_GE(shift, 0);
1459   SimVRegister temp;
1460   LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1461   return ushl(vform, dst, src, shiftreg);
1462 }
1463 
sshr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1464 LogicVRegister Simulator::sshr(VectorFormat vform, LogicVRegister dst,
1465                                const LogicVRegister& src, int shift) {
1466   DCHECK_GE(shift, 0);
1467   SimVRegister temp;
1468   LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1469   return sshl(vform, dst, src, shiftreg);
1470 }
1471 
ssra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1472 LogicVRegister Simulator::ssra(VectorFormat vform, LogicVRegister dst,
1473                                const LogicVRegister& src, int shift) {
1474   SimVRegister temp;
1475   LogicVRegister shifted_reg = sshr(vform, temp, src, shift);
1476   return add(vform, dst, dst, shifted_reg);
1477 }
1478 
usra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1479 LogicVRegister Simulator::usra(VectorFormat vform, LogicVRegister dst,
1480                                const LogicVRegister& src, int shift) {
1481   SimVRegister temp;
1482   LogicVRegister shifted_reg = ushr(vform, temp, src, shift);
1483   return add(vform, dst, dst, shifted_reg);
1484 }
1485 
srsra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1486 LogicVRegister Simulator::srsra(VectorFormat vform, LogicVRegister dst,
1487                                 const LogicVRegister& src, int shift) {
1488   SimVRegister temp;
1489   LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform);
1490   return add(vform, dst, dst, shifted_reg);
1491 }
1492 
ursra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1493 LogicVRegister Simulator::ursra(VectorFormat vform, LogicVRegister dst,
1494                                 const LogicVRegister& src, int shift) {
1495   SimVRegister temp;
1496   LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform);
1497   return add(vform, dst, dst, shifted_reg);
1498 }
1499 
cls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1500 LogicVRegister Simulator::cls(VectorFormat vform, LogicVRegister dst,
1501                               const LogicVRegister& src) {
1502   uint64_t result[16];
1503   int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1504   int laneCount = LaneCountFromFormat(vform);
1505   for (int i = 0; i < laneCount; i++) {
1506     result[i] = CountLeadingSignBits(src.Int(vform, i), laneSizeInBits);
1507   }
1508 
1509   dst.SetUintArray(vform, result);
1510   return dst;
1511 }
1512 
clz(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1513 LogicVRegister Simulator::clz(VectorFormat vform, LogicVRegister dst,
1514                               const LogicVRegister& src) {
1515   uint64_t result[16];
1516   int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1517   int laneCount = LaneCountFromFormat(vform);
1518   for (int i = 0; i < laneCount; i++) {
1519     result[i] = CountLeadingZeros(src.Uint(vform, i), laneSizeInBits);
1520   }
1521 
1522   dst.SetUintArray(vform, result);
1523   return dst;
1524 }
1525 
cnt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1526 LogicVRegister Simulator::cnt(VectorFormat vform, LogicVRegister dst,
1527                               const LogicVRegister& src) {
1528   uint64_t result[16];
1529   int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1530   int laneCount = LaneCountFromFormat(vform);
1531   for (int i = 0; i < laneCount; i++) {
1532     uint64_t value = src.Uint(vform, i);
1533     result[i] = 0;
1534     for (int j = 0; j < laneSizeInBits; j++) {
1535       result[i] += (value & 1);
1536       value >>= 1;
1537     }
1538   }
1539 
1540   dst.SetUintArray(vform, result);
1541   return dst;
1542 }
1543 
sshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1544 LogicVRegister Simulator::sshl(VectorFormat vform, LogicVRegister dst,
1545                                const LogicVRegister& src1,
1546                                const LogicVRegister& src2) {
1547   dst.ClearForWrite(vform);
1548   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1549     int8_t shift_val = src2.Int(vform, i);
1550     int64_t lj_src_val = src1.IntLeftJustified(vform, i);
1551 
1552     // Set signed saturation state.
1553     if ((shift_val > CountLeadingSignBits(lj_src_val, 64)) &&
1554         (lj_src_val != 0)) {
1555       dst.SetSignedSat(i, lj_src_val >= 0);
1556     }
1557 
1558     // Set unsigned saturation state.
1559     if (lj_src_val < 0) {
1560       dst.SetUnsignedSat(i, false);
1561     } else if ((shift_val > CountLeadingZeros(lj_src_val, 64)) &&
1562                (lj_src_val != 0)) {
1563       dst.SetUnsignedSat(i, true);
1564     }
1565 
1566     int64_t src_val = src1.Int(vform, i);
1567     bool src_is_negative = src_val < 0;
1568     if (shift_val > 63) {
1569       dst.SetInt(vform, i, 0);
1570     } else if (shift_val < -63) {
1571       dst.SetRounding(i, src_is_negative);
1572       dst.SetInt(vform, i, src_is_negative ? -1 : 0);
1573     } else {
1574       // Use unsigned types for shifts, as behaviour is undefined for signed
1575       // lhs.
1576       uint64_t usrc_val = static_cast<uint64_t>(src_val);
1577 
1578       if (shift_val < 0) {
1579         // Convert to right shift.
1580         shift_val = -shift_val;
1581 
1582         // Set rounding state by testing most-significant bit shifted out.
1583         // Rounding only needed on right shifts.
1584         if (((usrc_val >> (shift_val - 1)) & 1) == 1) {
1585           dst.SetRounding(i, true);
1586         }
1587 
1588         usrc_val >>= shift_val;
1589 
1590         if (src_is_negative) {
1591           // Simulate sign-extension.
1592           usrc_val |= (~UINT64_C(0) << (64 - shift_val));
1593         }
1594       } else {
1595         usrc_val <<= shift_val;
1596       }
1597       dst.SetUint(vform, i, usrc_val);
1598     }
1599   }
1600   return dst;
1601 }
1602 
ushl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1603 LogicVRegister Simulator::ushl(VectorFormat vform, LogicVRegister dst,
1604                                const LogicVRegister& src1,
1605                                const LogicVRegister& src2) {
1606   dst.ClearForWrite(vform);
1607   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1608     int8_t shift_val = src2.Int(vform, i);
1609     uint64_t lj_src_val = src1.UintLeftJustified(vform, i);
1610 
1611     // Set saturation state.
1612     if ((shift_val > CountLeadingZeros(lj_src_val, 64)) && (lj_src_val != 0)) {
1613       dst.SetUnsignedSat(i, true);
1614     }
1615 
1616     uint64_t src_val = src1.Uint(vform, i);
1617     if ((shift_val > 63) || (shift_val < -64)) {
1618       dst.SetUint(vform, i, 0);
1619     } else {
1620       if (shift_val < 0) {
1621         // Set rounding state. Rounding only needed on right shifts.
1622         if (((src_val >> (-shift_val - 1)) & 1) == 1) {
1623           dst.SetRounding(i, true);
1624         }
1625 
1626         if (shift_val == -64) {
1627           src_val = 0;
1628         } else {
1629           src_val >>= -shift_val;
1630         }
1631       } else {
1632         src_val <<= shift_val;
1633       }
1634       dst.SetUint(vform, i, src_val);
1635     }
1636   }
1637   return dst;
1638 }
1639 
neg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1640 LogicVRegister Simulator::neg(VectorFormat vform, LogicVRegister dst,
1641                               const LogicVRegister& src) {
1642   dst.ClearForWrite(vform);
1643   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1644     // Test for signed saturation.
1645     int64_t sa = src.Int(vform, i);
1646     if (sa == MinIntFromFormat(vform)) {
1647       dst.SetSignedSat(i, true);
1648     }
1649     dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
1650   }
1651   return dst;
1652 }
1653 
suqadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1654 LogicVRegister Simulator::suqadd(VectorFormat vform, LogicVRegister dst,
1655                                  const LogicVRegister& src) {
1656   dst.ClearForWrite(vform);
1657   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1658     int64_t sa = dst.IntLeftJustified(vform, i);
1659     uint64_t ub = src.UintLeftJustified(vform, i);
1660     uint64_t ur = sa + ub;
1661 
1662     int64_t sr = bit_cast<int64_t>(ur);
1663     if (sr < sa) {  // Test for signed positive saturation.
1664       dst.SetInt(vform, i, MaxIntFromFormat(vform));
1665     } else {
1666       dst.SetUint(vform, i, dst.Int(vform, i) + src.Uint(vform, i));
1667     }
1668   }
1669   return dst;
1670 }
1671 
usqadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1672 LogicVRegister Simulator::usqadd(VectorFormat vform, LogicVRegister dst,
1673                                  const LogicVRegister& src) {
1674   dst.ClearForWrite(vform);
1675   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1676     uint64_t ua = dst.UintLeftJustified(vform, i);
1677     int64_t sb = src.IntLeftJustified(vform, i);
1678     uint64_t ur = ua + sb;
1679 
1680     if ((sb > 0) && (ur <= ua)) {
1681       dst.SetUint(vform, i, MaxUintFromFormat(vform));  // Positive saturation.
1682     } else if ((sb < 0) && (ur >= ua)) {
1683       dst.SetUint(vform, i, 0);  // Negative saturation.
1684     } else {
1685       dst.SetUint(vform, i, dst.Uint(vform, i) + src.Int(vform, i));
1686     }
1687   }
1688   return dst;
1689 }
1690 
abs(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1691 LogicVRegister Simulator::abs(VectorFormat vform, LogicVRegister dst,
1692                               const LogicVRegister& src) {
1693   dst.ClearForWrite(vform);
1694   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1695     // Test for signed saturation.
1696     int64_t sa = src.Int(vform, i);
1697     if (sa == MinIntFromFormat(vform)) {
1698       dst.SetSignedSat(i, true);
1699     }
1700     if (sa < 0) {
1701       dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
1702     } else {
1703       dst.SetInt(vform, i, sa);
1704     }
1705   }
1706   return dst;
1707 }
1708 
ExtractNarrow(VectorFormat dstform,LogicVRegister dst,bool dstIsSigned,const LogicVRegister & src,bool srcIsSigned)1709 LogicVRegister Simulator::ExtractNarrow(VectorFormat dstform,
1710                                         LogicVRegister dst, bool dstIsSigned,
1711                                         const LogicVRegister& src,
1712                                         bool srcIsSigned) {
1713   bool upperhalf = false;
1714   VectorFormat srcform = kFormatUndefined;
1715   int64_t ssrc[8];
1716   uint64_t usrc[8];
1717 
1718   switch (dstform) {
1719     case kFormat8B:
1720       upperhalf = false;
1721       srcform = kFormat8H;
1722       break;
1723     case kFormat16B:
1724       upperhalf = true;
1725       srcform = kFormat8H;
1726       break;
1727     case kFormat4H:
1728       upperhalf = false;
1729       srcform = kFormat4S;
1730       break;
1731     case kFormat8H:
1732       upperhalf = true;
1733       srcform = kFormat4S;
1734       break;
1735     case kFormat2S:
1736       upperhalf = false;
1737       srcform = kFormat2D;
1738       break;
1739     case kFormat4S:
1740       upperhalf = true;
1741       srcform = kFormat2D;
1742       break;
1743     case kFormatB:
1744       upperhalf = false;
1745       srcform = kFormatH;
1746       break;
1747     case kFormatH:
1748       upperhalf = false;
1749       srcform = kFormatS;
1750       break;
1751     case kFormatS:
1752       upperhalf = false;
1753       srcform = kFormatD;
1754       break;
1755     default:
1756       UNIMPLEMENTED();
1757   }
1758 
1759   for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
1760     ssrc[i] = src.Int(srcform, i);
1761     usrc[i] = src.Uint(srcform, i);
1762   }
1763 
1764   int offset;
1765   if (upperhalf) {
1766     offset = LaneCountFromFormat(dstform) / 2;
1767   } else {
1768     offset = 0;
1769     dst.ClearForWrite(dstform);
1770   }
1771 
1772   for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
1773     // Test for signed saturation
1774     if (ssrc[i] > MaxIntFromFormat(dstform)) {
1775       dst.SetSignedSat(offset + i, true);
1776     } else if (ssrc[i] < MinIntFromFormat(dstform)) {
1777       dst.SetSignedSat(offset + i, false);
1778     }
1779 
1780     // Test for unsigned saturation
1781     if (srcIsSigned) {
1782       if (ssrc[i] > static_cast<int64_t>(MaxUintFromFormat(dstform))) {
1783         dst.SetUnsignedSat(offset + i, true);
1784       } else if (ssrc[i] < 0) {
1785         dst.SetUnsignedSat(offset + i, false);
1786       }
1787     } else {
1788       if (usrc[i] > MaxUintFromFormat(dstform)) {
1789         dst.SetUnsignedSat(offset + i, true);
1790       }
1791     }
1792 
1793     int64_t result;
1794     if (srcIsSigned) {
1795       result = ssrc[i] & MaxUintFromFormat(dstform);
1796     } else {
1797       result = usrc[i] & MaxUintFromFormat(dstform);
1798     }
1799 
1800     if (dstIsSigned) {
1801       dst.SetInt(dstform, offset + i, result);
1802     } else {
1803       dst.SetUint(dstform, offset + i, result);
1804     }
1805   }
1806   return dst;
1807 }
1808 
xtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1809 LogicVRegister Simulator::xtn(VectorFormat vform, LogicVRegister dst,
1810                               const LogicVRegister& src) {
1811   return ExtractNarrow(vform, dst, true, src, true);
1812 }
1813 
sqxtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1814 LogicVRegister Simulator::sqxtn(VectorFormat vform, LogicVRegister dst,
1815                                 const LogicVRegister& src) {
1816   return ExtractNarrow(vform, dst, true, src, true).SignedSaturate(vform);
1817 }
1818 
sqxtun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1819 LogicVRegister Simulator::sqxtun(VectorFormat vform, LogicVRegister dst,
1820                                  const LogicVRegister& src) {
1821   return ExtractNarrow(vform, dst, false, src, true).UnsignedSaturate(vform);
1822 }
1823 
uqxtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1824 LogicVRegister Simulator::uqxtn(VectorFormat vform, LogicVRegister dst,
1825                                 const LogicVRegister& src) {
1826   return ExtractNarrow(vform, dst, false, src, false).UnsignedSaturate(vform);
1827 }
1828 
AbsDiff(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool issigned)1829 LogicVRegister Simulator::AbsDiff(VectorFormat vform, LogicVRegister dst,
1830                                   const LogicVRegister& src1,
1831                                   const LogicVRegister& src2, bool issigned) {
1832   dst.ClearForWrite(vform);
1833   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1834     if (issigned) {
1835       int64_t sr = src1.Int(vform, i) - src2.Int(vform, i);
1836       sr = sr > 0 ? sr : -sr;
1837       dst.SetInt(vform, i, sr);
1838     } else {
1839       int64_t sr = src1.Uint(vform, i) - src2.Uint(vform, i);
1840       sr = sr > 0 ? sr : -sr;
1841       dst.SetUint(vform, i, sr);
1842     }
1843   }
1844   return dst;
1845 }
1846 
saba(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1847 LogicVRegister Simulator::saba(VectorFormat vform, LogicVRegister dst,
1848                                const LogicVRegister& src1,
1849                                const LogicVRegister& src2) {
1850   SimVRegister temp;
1851   dst.ClearForWrite(vform);
1852   AbsDiff(vform, temp, src1, src2, true);
1853   add(vform, dst, dst, temp);
1854   return dst;
1855 }
1856 
uaba(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1857 LogicVRegister Simulator::uaba(VectorFormat vform, LogicVRegister dst,
1858                                const LogicVRegister& src1,
1859                                const LogicVRegister& src2) {
1860   SimVRegister temp;
1861   dst.ClearForWrite(vform);
1862   AbsDiff(vform, temp, src1, src2, false);
1863   add(vform, dst, dst, temp);
1864   return dst;
1865 }
1866 
not_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1867 LogicVRegister Simulator::not_(VectorFormat vform, LogicVRegister dst,
1868                                const LogicVRegister& src) {
1869   dst.ClearForWrite(vform);
1870   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1871     dst.SetUint(vform, i, ~src.Uint(vform, i));
1872   }
1873   return dst;
1874 }
1875 
rbit(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1876 LogicVRegister Simulator::rbit(VectorFormat vform, LogicVRegister dst,
1877                                const LogicVRegister& src) {
1878   uint64_t result[16];
1879   int laneCount = LaneCountFromFormat(vform);
1880   int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1881   uint64_t reversed_value;
1882   uint64_t value;
1883   for (int i = 0; i < laneCount; i++) {
1884     value = src.Uint(vform, i);
1885     reversed_value = 0;
1886     for (int j = 0; j < laneSizeInBits; j++) {
1887       reversed_value = (reversed_value << 1) | (value & 1);
1888       value >>= 1;
1889     }
1890     result[i] = reversed_value;
1891   }
1892 
1893   dst.SetUintArray(vform, result);
1894   return dst;
1895 }
1896 
rev(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int revSize)1897 LogicVRegister Simulator::rev(VectorFormat vform, LogicVRegister dst,
1898                               const LogicVRegister& src, int revSize) {
1899   uint64_t result[16];
1900   int laneCount = LaneCountFromFormat(vform);
1901   int laneSize = LaneSizeInBytesFromFormat(vform);
1902   int lanesPerLoop = revSize / laneSize;
1903   for (int i = 0; i < laneCount; i += lanesPerLoop) {
1904     for (int j = 0; j < lanesPerLoop; j++) {
1905       result[i + lanesPerLoop - 1 - j] = src.Uint(vform, i + j);
1906     }
1907   }
1908   dst.SetUintArray(vform, result);
1909   return dst;
1910 }
1911 
rev16(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1912 LogicVRegister Simulator::rev16(VectorFormat vform, LogicVRegister dst,
1913                                 const LogicVRegister& src) {
1914   return rev(vform, dst, src, 2);
1915 }
1916 
rev32(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1917 LogicVRegister Simulator::rev32(VectorFormat vform, LogicVRegister dst,
1918                                 const LogicVRegister& src) {
1919   return rev(vform, dst, src, 4);
1920 }
1921 
rev64(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1922 LogicVRegister Simulator::rev64(VectorFormat vform, LogicVRegister dst,
1923                                 const LogicVRegister& src) {
1924   return rev(vform, dst, src, 8);
1925 }
1926 
addlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool is_signed,bool do_accumulate)1927 LogicVRegister Simulator::addlp(VectorFormat vform, LogicVRegister dst,
1928                                 const LogicVRegister& src, bool is_signed,
1929                                 bool do_accumulate) {
1930   VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform);
1931   DCHECK_LE(LaneSizeInBitsFromFormat(vformsrc), 32U);
1932   DCHECK_LE(LaneCountFromFormat(vform), 8);
1933 
1934   uint64_t result[8];
1935   int lane_count = LaneCountFromFormat(vform);
1936   for (int i = 0; i < lane_count; i++) {
1937     if (is_signed) {
1938       result[i] = static_cast<uint64_t>(src.Int(vformsrc, 2 * i) +
1939                                         src.Int(vformsrc, 2 * i + 1));
1940     } else {
1941       result[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1);
1942     }
1943   }
1944 
1945   dst.ClearForWrite(vform);
1946   for (int i = 0; i < lane_count; ++i) {
1947     if (do_accumulate) {
1948       result[i] += dst.Uint(vform, i);
1949     }
1950     dst.SetUint(vform, i, result[i]);
1951   }
1952 
1953   return dst;
1954 }
1955 
saddlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1956 LogicVRegister Simulator::saddlp(VectorFormat vform, LogicVRegister dst,
1957                                  const LogicVRegister& src) {
1958   return addlp(vform, dst, src, true, false);
1959 }
1960 
uaddlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1961 LogicVRegister Simulator::uaddlp(VectorFormat vform, LogicVRegister dst,
1962                                  const LogicVRegister& src) {
1963   return addlp(vform, dst, src, false, false);
1964 }
1965 
sadalp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1966 LogicVRegister Simulator::sadalp(VectorFormat vform, LogicVRegister dst,
1967                                  const LogicVRegister& src) {
1968   return addlp(vform, dst, src, true, true);
1969 }
1970 
uadalp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1971 LogicVRegister Simulator::uadalp(VectorFormat vform, LogicVRegister dst,
1972                                  const LogicVRegister& src) {
1973   return addlp(vform, dst, src, false, true);
1974 }
1975 
ext(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1976 LogicVRegister Simulator::ext(VectorFormat vform, LogicVRegister dst,
1977                               const LogicVRegister& src1,
1978                               const LogicVRegister& src2, int index) {
1979   uint8_t result[16];
1980   int laneCount = LaneCountFromFormat(vform);
1981   for (int i = 0; i < laneCount - index; ++i) {
1982     result[i] = src1.Uint(vform, i + index);
1983   }
1984   for (int i = 0; i < index; ++i) {
1985     result[laneCount - index + i] = src2.Uint(vform, i);
1986   }
1987   dst.ClearForWrite(vform);
1988   for (int i = 0; i < laneCount; ++i) {
1989     dst.SetUint(vform, i, result[i]);
1990   }
1991   return dst;
1992 }
1993 
dup_element(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int src_index)1994 LogicVRegister Simulator::dup_element(VectorFormat vform, LogicVRegister dst,
1995                                       const LogicVRegister& src,
1996                                       int src_index) {
1997   int laneCount = LaneCountFromFormat(vform);
1998   uint64_t value = src.Uint(vform, src_index);
1999   dst.ClearForWrite(vform);
2000   for (int i = 0; i < laneCount; ++i) {
2001     dst.SetUint(vform, i, value);
2002   }
2003   return dst;
2004 }
2005 
dup_immediate(VectorFormat vform,LogicVRegister dst,uint64_t imm)2006 LogicVRegister Simulator::dup_immediate(VectorFormat vform, LogicVRegister dst,
2007                                         uint64_t imm) {
2008   int laneCount = LaneCountFromFormat(vform);
2009   uint64_t value = imm & MaxUintFromFormat(vform);
2010   dst.ClearForWrite(vform);
2011   for (int i = 0; i < laneCount; ++i) {
2012     dst.SetUint(vform, i, value);
2013   }
2014   return dst;
2015 }
2016 
ins_element(VectorFormat vform,LogicVRegister dst,int dst_index,const LogicVRegister & src,int src_index)2017 LogicVRegister Simulator::ins_element(VectorFormat vform, LogicVRegister dst,
2018                                       int dst_index, const LogicVRegister& src,
2019                                       int src_index) {
2020   dst.SetUint(vform, dst_index, src.Uint(vform, src_index));
2021   return dst;
2022 }
2023 
ins_immediate(VectorFormat vform,LogicVRegister dst,int dst_index,uint64_t imm)2024 LogicVRegister Simulator::ins_immediate(VectorFormat vform, LogicVRegister dst,
2025                                         int dst_index, uint64_t imm) {
2026   uint64_t value = imm & MaxUintFromFormat(vform);
2027   dst.SetUint(vform, dst_index, value);
2028   return dst;
2029 }
2030 
movi(VectorFormat vform,LogicVRegister dst,uint64_t imm)2031 LogicVRegister Simulator::movi(VectorFormat vform, LogicVRegister dst,
2032                                uint64_t imm) {
2033   int laneCount = LaneCountFromFormat(vform);
2034   dst.ClearForWrite(vform);
2035   for (int i = 0; i < laneCount; ++i) {
2036     dst.SetUint(vform, i, imm);
2037   }
2038   return dst;
2039 }
2040 
mvni(VectorFormat vform,LogicVRegister dst,uint64_t imm)2041 LogicVRegister Simulator::mvni(VectorFormat vform, LogicVRegister dst,
2042                                uint64_t imm) {
2043   int laneCount = LaneCountFromFormat(vform);
2044   dst.ClearForWrite(vform);
2045   for (int i = 0; i < laneCount; ++i) {
2046     dst.SetUint(vform, i, ~imm);
2047   }
2048   return dst;
2049 }
2050 
orr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,uint64_t imm)2051 LogicVRegister Simulator::orr(VectorFormat vform, LogicVRegister dst,
2052                               const LogicVRegister& src, uint64_t imm) {
2053   uint64_t result[16];
2054   int laneCount = LaneCountFromFormat(vform);
2055   for (int i = 0; i < laneCount; ++i) {
2056     result[i] = src.Uint(vform, i) | imm;
2057   }
2058   dst.SetUintArray(vform, result);
2059   return dst;
2060 }
2061 
uxtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2062 LogicVRegister Simulator::uxtl(VectorFormat vform, LogicVRegister dst,
2063                                const LogicVRegister& src) {
2064   VectorFormat vform_half = VectorFormatHalfWidth(vform);
2065 
2066   dst.ClearForWrite(vform);
2067   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2068     dst.SetUint(vform, i, src.Uint(vform_half, i));
2069   }
2070   return dst;
2071 }
2072 
sxtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2073 LogicVRegister Simulator::sxtl(VectorFormat vform, LogicVRegister dst,
2074                                const LogicVRegister& src) {
2075   VectorFormat vform_half = VectorFormatHalfWidth(vform);
2076 
2077   dst.ClearForWrite(vform);
2078   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2079     dst.SetInt(vform, i, src.Int(vform_half, i));
2080   }
2081   return dst;
2082 }
2083 
uxtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2084 LogicVRegister Simulator::uxtl2(VectorFormat vform, LogicVRegister dst,
2085                                 const LogicVRegister& src) {
2086   VectorFormat vform_half = VectorFormatHalfWidth(vform);
2087   int lane_count = LaneCountFromFormat(vform);
2088 
2089   dst.ClearForWrite(vform);
2090   for (int i = 0; i < lane_count; i++) {
2091     dst.SetUint(vform, i, src.Uint(vform_half, lane_count + i));
2092   }
2093   return dst;
2094 }
2095 
sxtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2096 LogicVRegister Simulator::sxtl2(VectorFormat vform, LogicVRegister dst,
2097                                 const LogicVRegister& src) {
2098   VectorFormat vform_half = VectorFormatHalfWidth(vform);
2099   int lane_count = LaneCountFromFormat(vform);
2100 
2101   dst.ClearForWrite(vform);
2102   for (int i = 0; i < lane_count; i++) {
2103     dst.SetInt(vform, i, src.Int(vform_half, lane_count + i));
2104   }
2105   return dst;
2106 }
2107 
shrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2108 LogicVRegister Simulator::shrn(VectorFormat vform, LogicVRegister dst,
2109                                const LogicVRegister& src, int shift) {
2110   SimVRegister temp;
2111   VectorFormat vform_src = VectorFormatDoubleWidth(vform);
2112   VectorFormat vform_dst = vform;
2113   LogicVRegister shifted_src = ushr(vform_src, temp, src, shift);
2114   return ExtractNarrow(vform_dst, dst, false, shifted_src, false);
2115 }
2116 
shrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2117 LogicVRegister Simulator::shrn2(VectorFormat vform, LogicVRegister dst,
2118                                 const LogicVRegister& src, int shift) {
2119   SimVRegister temp;
2120   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2121   VectorFormat vformdst = vform;
2122   LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift);
2123   return ExtractNarrow(vformdst, dst, false, shifted_src, false);
2124 }
2125 
rshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2126 LogicVRegister Simulator::rshrn(VectorFormat vform, LogicVRegister dst,
2127                                 const LogicVRegister& src, int shift) {
2128   SimVRegister temp;
2129   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2130   VectorFormat vformdst = vform;
2131   LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
2132   return ExtractNarrow(vformdst, dst, false, shifted_src, false);
2133 }
2134 
rshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2135 LogicVRegister Simulator::rshrn2(VectorFormat vform, LogicVRegister dst,
2136                                  const LogicVRegister& src, int shift) {
2137   SimVRegister temp;
2138   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2139   VectorFormat vformdst = vform;
2140   LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
2141   return ExtractNarrow(vformdst, dst, false, shifted_src, false);
2142 }
2143 
Table(VectorFormat vform,LogicVRegister dst,const LogicVRegister & ind,bool zero_out_of_bounds,const LogicVRegister * tab1,const LogicVRegister * tab2,const LogicVRegister * tab3,const LogicVRegister * tab4)2144 LogicVRegister Simulator::Table(VectorFormat vform, LogicVRegister dst,
2145                                 const LogicVRegister& ind,
2146                                 bool zero_out_of_bounds,
2147                                 const LogicVRegister* tab1,
2148                                 const LogicVRegister* tab2,
2149                                 const LogicVRegister* tab3,
2150                                 const LogicVRegister* tab4) {
2151   DCHECK_NOT_NULL(tab1);
2152   const LogicVRegister* tab[4] = {tab1, tab2, tab3, tab4};
2153   uint64_t result[kMaxLanesPerVector];
2154   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2155     result[i] = zero_out_of_bounds ? 0 : dst.Uint(kFormat16B, i);
2156   }
2157   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2158     uint64_t j = ind.Uint(vform, i);
2159     int tab_idx = static_cast<int>(j >> 4);
2160     int j_idx = static_cast<int>(j & 15);
2161     if ((tab_idx < 4) && (tab[tab_idx] != nullptr)) {
2162       result[i] = tab[tab_idx]->Uint(kFormat16B, j_idx);
2163     }
2164   }
2165   dst.SetUintArray(vform, result);
2166   return dst;
2167 }
2168 
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & ind)2169 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,
2170                               const LogicVRegister& tab,
2171                               const LogicVRegister& ind) {
2172   return Table(vform, dst, ind, true, &tab);
2173 }
2174 
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & ind)2175 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,
2176                               const LogicVRegister& tab,
2177                               const LogicVRegister& tab2,
2178                               const LogicVRegister& ind) {
2179   return Table(vform, dst, ind, true, &tab, &tab2);
2180 }
2181 
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & ind)2182 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,
2183                               const LogicVRegister& tab,
2184                               const LogicVRegister& tab2,
2185                               const LogicVRegister& tab3,
2186                               const LogicVRegister& ind) {
2187   return Table(vform, dst, ind, true, &tab, &tab2, &tab3);
2188 }
2189 
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & tab4,const LogicVRegister & ind)2190 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,
2191                               const LogicVRegister& tab,
2192                               const LogicVRegister& tab2,
2193                               const LogicVRegister& tab3,
2194                               const LogicVRegister& tab4,
2195                               const LogicVRegister& ind) {
2196   return Table(vform, dst, ind, true, &tab, &tab2, &tab3, &tab4);
2197 }
2198 
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & ind)2199 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,
2200                               const LogicVRegister& tab,
2201                               const LogicVRegister& ind) {
2202   return Table(vform, dst, ind, false, &tab);
2203 }
2204 
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & ind)2205 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,
2206                               const LogicVRegister& tab,
2207                               const LogicVRegister& tab2,
2208                               const LogicVRegister& ind) {
2209   return Table(vform, dst, ind, false, &tab, &tab2);
2210 }
2211 
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & ind)2212 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,
2213                               const LogicVRegister& tab,
2214                               const LogicVRegister& tab2,
2215                               const LogicVRegister& tab3,
2216                               const LogicVRegister& ind) {
2217   return Table(vform, dst, ind, false, &tab, &tab2, &tab3);
2218 }
2219 
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & tab4,const LogicVRegister & ind)2220 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,
2221                               const LogicVRegister& tab,
2222                               const LogicVRegister& tab2,
2223                               const LogicVRegister& tab3,
2224                               const LogicVRegister& tab4,
2225                               const LogicVRegister& ind) {
2226   return Table(vform, dst, ind, false, &tab, &tab2, &tab3, &tab4);
2227 }
2228 
uqshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2229 LogicVRegister Simulator::uqshrn(VectorFormat vform, LogicVRegister dst,
2230                                  const LogicVRegister& src, int shift) {
2231   return shrn(vform, dst, src, shift).UnsignedSaturate(vform);
2232 }
2233 
uqshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2234 LogicVRegister Simulator::uqshrn2(VectorFormat vform, LogicVRegister dst,
2235                                   const LogicVRegister& src, int shift) {
2236   return shrn2(vform, dst, src, shift).UnsignedSaturate(vform);
2237 }
2238 
uqrshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2239 LogicVRegister Simulator::uqrshrn(VectorFormat vform, LogicVRegister dst,
2240                                   const LogicVRegister& src, int shift) {
2241   return rshrn(vform, dst, src, shift).UnsignedSaturate(vform);
2242 }
2243 
uqrshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2244 LogicVRegister Simulator::uqrshrn2(VectorFormat vform, LogicVRegister dst,
2245                                    const LogicVRegister& src, int shift) {
2246   return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform);
2247 }
2248 
sqshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2249 LogicVRegister Simulator::sqshrn(VectorFormat vform, LogicVRegister dst,
2250                                  const LogicVRegister& src, int shift) {
2251   SimVRegister temp;
2252   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2253   VectorFormat vformdst = vform;
2254   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2255   return sqxtn(vformdst, dst, shifted_src);
2256 }
2257 
sqshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2258 LogicVRegister Simulator::sqshrn2(VectorFormat vform, LogicVRegister dst,
2259                                   const LogicVRegister& src, int shift) {
2260   SimVRegister temp;
2261   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2262   VectorFormat vformdst = vform;
2263   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2264   return sqxtn(vformdst, dst, shifted_src);
2265 }
2266 
sqrshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2267 LogicVRegister Simulator::sqrshrn(VectorFormat vform, LogicVRegister dst,
2268                                   const LogicVRegister& src, int shift) {
2269   SimVRegister temp;
2270   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2271   VectorFormat vformdst = vform;
2272   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2273   return sqxtn(vformdst, dst, shifted_src);
2274 }
2275 
sqrshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2276 LogicVRegister Simulator::sqrshrn2(VectorFormat vform, LogicVRegister dst,
2277                                    const LogicVRegister& src, int shift) {
2278   SimVRegister temp;
2279   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2280   VectorFormat vformdst = vform;
2281   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2282   return sqxtn(vformdst, dst, shifted_src);
2283 }
2284 
sqshrun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2285 LogicVRegister Simulator::sqshrun(VectorFormat vform, LogicVRegister dst,
2286                                   const LogicVRegister& src, int shift) {
2287   SimVRegister temp;
2288   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2289   VectorFormat vformdst = vform;
2290   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2291   return sqxtun(vformdst, dst, shifted_src);
2292 }
2293 
sqshrun2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2294 LogicVRegister Simulator::sqshrun2(VectorFormat vform, LogicVRegister dst,
2295                                    const LogicVRegister& src, int shift) {
2296   SimVRegister temp;
2297   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2298   VectorFormat vformdst = vform;
2299   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2300   return sqxtun(vformdst, dst, shifted_src);
2301 }
2302 
sqrshrun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2303 LogicVRegister Simulator::sqrshrun(VectorFormat vform, LogicVRegister dst,
2304                                    const LogicVRegister& src, int shift) {
2305   SimVRegister temp;
2306   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2307   VectorFormat vformdst = vform;
2308   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2309   return sqxtun(vformdst, dst, shifted_src);
2310 }
2311 
sqrshrun2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2312 LogicVRegister Simulator::sqrshrun2(VectorFormat vform, LogicVRegister dst,
2313                                     const LogicVRegister& src, int shift) {
2314   SimVRegister temp;
2315   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2316   VectorFormat vformdst = vform;
2317   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2318   return sqxtun(vformdst, dst, shifted_src);
2319 }
2320 
uaddl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2321 LogicVRegister Simulator::uaddl(VectorFormat vform, LogicVRegister dst,
2322                                 const LogicVRegister& src1,
2323                                 const LogicVRegister& src2) {
2324   SimVRegister temp1, temp2;
2325   uxtl(vform, temp1, src1);
2326   uxtl(vform, temp2, src2);
2327   add(vform, dst, temp1, temp2);
2328   return dst;
2329 }
2330 
uaddl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2331 LogicVRegister Simulator::uaddl2(VectorFormat vform, LogicVRegister dst,
2332                                  const LogicVRegister& src1,
2333                                  const LogicVRegister& src2) {
2334   SimVRegister temp1, temp2;
2335   uxtl2(vform, temp1, src1);
2336   uxtl2(vform, temp2, src2);
2337   add(vform, dst, temp1, temp2);
2338   return dst;
2339 }
2340 
uaddw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2341 LogicVRegister Simulator::uaddw(VectorFormat vform, LogicVRegister dst,
2342                                 const LogicVRegister& src1,
2343                                 const LogicVRegister& src2) {
2344   SimVRegister temp;
2345   uxtl(vform, temp, src2);
2346   add(vform, dst, src1, temp);
2347   return dst;
2348 }
2349 
uaddw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2350 LogicVRegister Simulator::uaddw2(VectorFormat vform, LogicVRegister dst,
2351                                  const LogicVRegister& src1,
2352                                  const LogicVRegister& src2) {
2353   SimVRegister temp;
2354   uxtl2(vform, temp, src2);
2355   add(vform, dst, src1, temp);
2356   return dst;
2357 }
2358 
saddl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2359 LogicVRegister Simulator::saddl(VectorFormat vform, LogicVRegister dst,
2360                                 const LogicVRegister& src1,
2361                                 const LogicVRegister& src2) {
2362   SimVRegister temp1, temp2;
2363   sxtl(vform, temp1, src1);
2364   sxtl(vform, temp2, src2);
2365   add(vform, dst, temp1, temp2);
2366   return dst;
2367 }
2368 
saddl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2369 LogicVRegister Simulator::saddl2(VectorFormat vform, LogicVRegister dst,
2370                                  const LogicVRegister& src1,
2371                                  const LogicVRegister& src2) {
2372   SimVRegister temp1, temp2;
2373   sxtl2(vform, temp1, src1);
2374   sxtl2(vform, temp2, src2);
2375   add(vform, dst, temp1, temp2);
2376   return dst;
2377 }
2378 
saddw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2379 LogicVRegister Simulator::saddw(VectorFormat vform, LogicVRegister dst,
2380                                 const LogicVRegister& src1,
2381                                 const LogicVRegister& src2) {
2382   SimVRegister temp;
2383   sxtl(vform, temp, src2);
2384   add(vform, dst, src1, temp);
2385   return dst;
2386 }
2387 
saddw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2388 LogicVRegister Simulator::saddw2(VectorFormat vform, LogicVRegister dst,
2389                                  const LogicVRegister& src1,
2390                                  const LogicVRegister& src2) {
2391   SimVRegister temp;
2392   sxtl2(vform, temp, src2);
2393   add(vform, dst, src1, temp);
2394   return dst;
2395 }
2396 
usubl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2397 LogicVRegister Simulator::usubl(VectorFormat vform, LogicVRegister dst,
2398                                 const LogicVRegister& src1,
2399                                 const LogicVRegister& src2) {
2400   SimVRegister temp1, temp2;
2401   uxtl(vform, temp1, src1);
2402   uxtl(vform, temp2, src2);
2403   sub(vform, dst, temp1, temp2);
2404   return dst;
2405 }
2406 
usubl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2407 LogicVRegister Simulator::usubl2(VectorFormat vform, LogicVRegister dst,
2408                                  const LogicVRegister& src1,
2409                                  const LogicVRegister& src2) {
2410   SimVRegister temp1, temp2;
2411   uxtl2(vform, temp1, src1);
2412   uxtl2(vform, temp2, src2);
2413   sub(vform, dst, temp1, temp2);
2414   return dst;
2415 }
2416 
usubw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2417 LogicVRegister Simulator::usubw(VectorFormat vform, LogicVRegister dst,
2418                                 const LogicVRegister& src1,
2419                                 const LogicVRegister& src2) {
2420   SimVRegister temp;
2421   uxtl(vform, temp, src2);
2422   sub(vform, dst, src1, temp);
2423   return dst;
2424 }
2425 
usubw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2426 LogicVRegister Simulator::usubw2(VectorFormat vform, LogicVRegister dst,
2427                                  const LogicVRegister& src1,
2428                                  const LogicVRegister& src2) {
2429   SimVRegister temp;
2430   uxtl2(vform, temp, src2);
2431   sub(vform, dst, src1, temp);
2432   return dst;
2433 }
2434 
ssubl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2435 LogicVRegister Simulator::ssubl(VectorFormat vform, LogicVRegister dst,
2436                                 const LogicVRegister& src1,
2437                                 const LogicVRegister& src2) {
2438   SimVRegister temp1, temp2;
2439   sxtl(vform, temp1, src1);
2440   sxtl(vform, temp2, src2);
2441   sub(vform, dst, temp1, temp2);
2442   return dst;
2443 }
2444 
ssubl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2445 LogicVRegister Simulator::ssubl2(VectorFormat vform, LogicVRegister dst,
2446                                  const LogicVRegister& src1,
2447                                  const LogicVRegister& src2) {
2448   SimVRegister temp1, temp2;
2449   sxtl2(vform, temp1, src1);
2450   sxtl2(vform, temp2, src2);
2451   sub(vform, dst, temp1, temp2);
2452   return dst;
2453 }
2454 
ssubw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2455 LogicVRegister Simulator::ssubw(VectorFormat vform, LogicVRegister dst,
2456                                 const LogicVRegister& src1,
2457                                 const LogicVRegister& src2) {
2458   SimVRegister temp;
2459   sxtl(vform, temp, src2);
2460   sub(vform, dst, src1, temp);
2461   return dst;
2462 }
2463 
ssubw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2464 LogicVRegister Simulator::ssubw2(VectorFormat vform, LogicVRegister dst,
2465                                  const LogicVRegister& src1,
2466                                  const LogicVRegister& src2) {
2467   SimVRegister temp;
2468   sxtl2(vform, temp, src2);
2469   sub(vform, dst, src1, temp);
2470   return dst;
2471 }
2472 
uabal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2473 LogicVRegister Simulator::uabal(VectorFormat vform, LogicVRegister dst,
2474                                 const LogicVRegister& src1,
2475                                 const LogicVRegister& src2) {
2476   SimVRegister temp1, temp2;
2477   uxtl(vform, temp1, src1);
2478   uxtl(vform, temp2, src2);
2479   uaba(vform, dst, temp1, temp2);
2480   return dst;
2481 }
2482 
uabal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2483 LogicVRegister Simulator::uabal2(VectorFormat vform, LogicVRegister dst,
2484                                  const LogicVRegister& src1,
2485                                  const LogicVRegister& src2) {
2486   SimVRegister temp1, temp2;
2487   uxtl2(vform, temp1, src1);
2488   uxtl2(vform, temp2, src2);
2489   uaba(vform, dst, temp1, temp2);
2490   return dst;
2491 }
2492 
sabal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2493 LogicVRegister Simulator::sabal(VectorFormat vform, LogicVRegister dst,
2494                                 const LogicVRegister& src1,
2495                                 const LogicVRegister& src2) {
2496   SimVRegister temp1, temp2;
2497   sxtl(vform, temp1, src1);
2498   sxtl(vform, temp2, src2);
2499   saba(vform, dst, temp1, temp2);
2500   return dst;
2501 }
2502 
sabal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2503 LogicVRegister Simulator::sabal2(VectorFormat vform, LogicVRegister dst,
2504                                  const LogicVRegister& src1,
2505                                  const LogicVRegister& src2) {
2506   SimVRegister temp1, temp2;
2507   sxtl2(vform, temp1, src1);
2508   sxtl2(vform, temp2, src2);
2509   saba(vform, dst, temp1, temp2);
2510   return dst;
2511 }
2512 
uabdl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2513 LogicVRegister Simulator::uabdl(VectorFormat vform, LogicVRegister dst,
2514                                 const LogicVRegister& src1,
2515                                 const LogicVRegister& src2) {
2516   SimVRegister temp1, temp2;
2517   uxtl(vform, temp1, src1);
2518   uxtl(vform, temp2, src2);
2519   AbsDiff(vform, dst, temp1, temp2, false);
2520   return dst;
2521 }
2522 
uabdl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2523 LogicVRegister Simulator::uabdl2(VectorFormat vform, LogicVRegister dst,
2524                                  const LogicVRegister& src1,
2525                                  const LogicVRegister& src2) {
2526   SimVRegister temp1, temp2;
2527   uxtl2(vform, temp1, src1);
2528   uxtl2(vform, temp2, src2);
2529   AbsDiff(vform, dst, temp1, temp2, false);
2530   return dst;
2531 }
2532 
sabdl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2533 LogicVRegister Simulator::sabdl(VectorFormat vform, LogicVRegister dst,
2534                                 const LogicVRegister& src1,
2535                                 const LogicVRegister& src2) {
2536   SimVRegister temp1, temp2;
2537   sxtl(vform, temp1, src1);
2538   sxtl(vform, temp2, src2);
2539   AbsDiff(vform, dst, temp1, temp2, true);
2540   return dst;
2541 }
2542 
sabdl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2543 LogicVRegister Simulator::sabdl2(VectorFormat vform, LogicVRegister dst,
2544                                  const LogicVRegister& src1,
2545                                  const LogicVRegister& src2) {
2546   SimVRegister temp1, temp2;
2547   sxtl2(vform, temp1, src1);
2548   sxtl2(vform, temp2, src2);
2549   AbsDiff(vform, dst, temp1, temp2, true);
2550   return dst;
2551 }
2552 
umull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2553 LogicVRegister Simulator::umull(VectorFormat vform, LogicVRegister dst,
2554                                 const LogicVRegister& src1,
2555                                 const LogicVRegister& src2) {
2556   SimVRegister temp1, temp2;
2557   uxtl(vform, temp1, src1);
2558   uxtl(vform, temp2, src2);
2559   mul(vform, dst, temp1, temp2);
2560   return dst;
2561 }
2562 
umull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2563 LogicVRegister Simulator::umull2(VectorFormat vform, LogicVRegister dst,
2564                                  const LogicVRegister& src1,
2565                                  const LogicVRegister& src2) {
2566   SimVRegister temp1, temp2;
2567   uxtl2(vform, temp1, src1);
2568   uxtl2(vform, temp2, src2);
2569   mul(vform, dst, temp1, temp2);
2570   return dst;
2571 }
2572 
smull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2573 LogicVRegister Simulator::smull(VectorFormat vform, LogicVRegister dst,
2574                                 const LogicVRegister& src1,
2575                                 const LogicVRegister& src2) {
2576   SimVRegister temp1, temp2;
2577   sxtl(vform, temp1, src1);
2578   sxtl(vform, temp2, src2);
2579   mul(vform, dst, temp1, temp2);
2580   return dst;
2581 }
2582 
smull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2583 LogicVRegister Simulator::smull2(VectorFormat vform, LogicVRegister dst,
2584                                  const LogicVRegister& src1,
2585                                  const LogicVRegister& src2) {
2586   SimVRegister temp1, temp2;
2587   sxtl2(vform, temp1, src1);
2588   sxtl2(vform, temp2, src2);
2589   mul(vform, dst, temp1, temp2);
2590   return dst;
2591 }
2592 
umlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2593 LogicVRegister Simulator::umlsl(VectorFormat vform, LogicVRegister dst,
2594                                 const LogicVRegister& src1,
2595                                 const LogicVRegister& src2) {
2596   SimVRegister temp1, temp2;
2597   uxtl(vform, temp1, src1);
2598   uxtl(vform, temp2, src2);
2599   mls(vform, dst, temp1, temp2);
2600   return dst;
2601 }
2602 
umlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2603 LogicVRegister Simulator::umlsl2(VectorFormat vform, LogicVRegister dst,
2604                                  const LogicVRegister& src1,
2605                                  const LogicVRegister& src2) {
2606   SimVRegister temp1, temp2;
2607   uxtl2(vform, temp1, src1);
2608   uxtl2(vform, temp2, src2);
2609   mls(vform, dst, temp1, temp2);
2610   return dst;
2611 }
2612 
smlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2613 LogicVRegister Simulator::smlsl(VectorFormat vform, LogicVRegister dst,
2614                                 const LogicVRegister& src1,
2615                                 const LogicVRegister& src2) {
2616   SimVRegister temp1, temp2;
2617   sxtl(vform, temp1, src1);
2618   sxtl(vform, temp2, src2);
2619   mls(vform, dst, temp1, temp2);
2620   return dst;
2621 }
2622 
smlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2623 LogicVRegister Simulator::smlsl2(VectorFormat vform, LogicVRegister dst,
2624                                  const LogicVRegister& src1,
2625                                  const LogicVRegister& src2) {
2626   SimVRegister temp1, temp2;
2627   sxtl2(vform, temp1, src1);
2628   sxtl2(vform, temp2, src2);
2629   mls(vform, dst, temp1, temp2);
2630   return dst;
2631 }
2632 
umlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2633 LogicVRegister Simulator::umlal(VectorFormat vform, LogicVRegister dst,
2634                                 const LogicVRegister& src1,
2635                                 const LogicVRegister& src2) {
2636   SimVRegister temp1, temp2;
2637   uxtl(vform, temp1, src1);
2638   uxtl(vform, temp2, src2);
2639   mla(vform, dst, temp1, temp2);
2640   return dst;
2641 }
2642 
umlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2643 LogicVRegister Simulator::umlal2(VectorFormat vform, LogicVRegister dst,
2644                                  const LogicVRegister& src1,
2645                                  const LogicVRegister& src2) {
2646   SimVRegister temp1, temp2;
2647   uxtl2(vform, temp1, src1);
2648   uxtl2(vform, temp2, src2);
2649   mla(vform, dst, temp1, temp2);
2650   return dst;
2651 }
2652 
smlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2653 LogicVRegister Simulator::smlal(VectorFormat vform, LogicVRegister dst,
2654                                 const LogicVRegister& src1,
2655                                 const LogicVRegister& src2) {
2656   SimVRegister temp1, temp2;
2657   sxtl(vform, temp1, src1);
2658   sxtl(vform, temp2, src2);
2659   mla(vform, dst, temp1, temp2);
2660   return dst;
2661 }
2662 
smlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2663 LogicVRegister Simulator::smlal2(VectorFormat vform, LogicVRegister dst,
2664                                  const LogicVRegister& src1,
2665                                  const LogicVRegister& src2) {
2666   SimVRegister temp1, temp2;
2667   sxtl2(vform, temp1, src1);
2668   sxtl2(vform, temp2, src2);
2669   mla(vform, dst, temp1, temp2);
2670   return dst;
2671 }
2672 
sqdmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2673 LogicVRegister Simulator::sqdmlal(VectorFormat vform, LogicVRegister dst,
2674                                   const LogicVRegister& src1,
2675                                   const LogicVRegister& src2) {
2676   SimVRegister temp;
2677   LogicVRegister product = sqdmull(vform, temp, src1, src2);
2678   return add(vform, dst, dst, product).SignedSaturate(vform);
2679 }
2680 
sqdmlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2681 LogicVRegister Simulator::sqdmlal2(VectorFormat vform, LogicVRegister dst,
2682                                    const LogicVRegister& src1,
2683                                    const LogicVRegister& src2) {
2684   SimVRegister temp;
2685   LogicVRegister product = sqdmull2(vform, temp, src1, src2);
2686   return add(vform, dst, dst, product).SignedSaturate(vform);
2687 }
2688 
sqdmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2689 LogicVRegister Simulator::sqdmlsl(VectorFormat vform, LogicVRegister dst,
2690                                   const LogicVRegister& src1,
2691                                   const LogicVRegister& src2) {
2692   SimVRegister temp;
2693   LogicVRegister product = sqdmull(vform, temp, src1, src2);
2694   return sub(vform, dst, dst, product).SignedSaturate(vform);
2695 }
2696 
sqdmlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2697 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, LogicVRegister dst,
2698                                    const LogicVRegister& src1,
2699                                    const LogicVRegister& src2) {
2700   SimVRegister temp;
2701   LogicVRegister product = sqdmull2(vform, temp, src1, src2);
2702   return sub(vform, dst, dst, product).SignedSaturate(vform);
2703 }
2704 
sqdmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2705 LogicVRegister Simulator::sqdmull(VectorFormat vform, LogicVRegister dst,
2706                                   const LogicVRegister& src1,
2707                                   const LogicVRegister& src2) {
2708   SimVRegister temp;
2709   LogicVRegister product = smull(vform, temp, src1, src2);
2710   return add(vform, dst, product, product).SignedSaturate(vform);
2711 }
2712 
sqdmull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2713 LogicVRegister Simulator::sqdmull2(VectorFormat vform, LogicVRegister dst,
2714                                    const LogicVRegister& src1,
2715                                    const LogicVRegister& src2) {
2716   SimVRegister temp;
2717   LogicVRegister product = smull2(vform, temp, src1, src2);
2718   return add(vform, dst, product, product).SignedSaturate(vform);
2719 }
2720 
sqrdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round)2721 LogicVRegister Simulator::sqrdmulh(VectorFormat vform, LogicVRegister dst,
2722                                    const LogicVRegister& src1,
2723                                    const LogicVRegister& src2, bool round) {
2724   // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow.
2725   // To avoid this, we use (src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
2726   // which is same as (2 * src1 * src2 + 1 << (esize - 1)) >> esize.
2727 
2728   int esize = LaneSizeInBitsFromFormat(vform);
2729   int round_const = round ? (1 << (esize - 2)) : 0;
2730   int64_t product;
2731 
2732   dst.ClearForWrite(vform);
2733   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2734     product = src1.Int(vform, i) * src2.Int(vform, i);
2735     product += round_const;
2736     product = product >> (esize - 1);
2737 
2738     if (product > MaxIntFromFormat(vform)) {
2739       product = MaxIntFromFormat(vform);
2740     } else if (product < MinIntFromFormat(vform)) {
2741       product = MinIntFromFormat(vform);
2742     }
2743     dst.SetInt(vform, i, product);
2744   }
2745   return dst;
2746 }
2747 
sqdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2748 LogicVRegister Simulator::sqdmulh(VectorFormat vform, LogicVRegister dst,
2749                                   const LogicVRegister& src1,
2750                                   const LogicVRegister& src2) {
2751   return sqrdmulh(vform, dst, src1, src2, false);
2752 }
2753 
addhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2754 LogicVRegister Simulator::addhn(VectorFormat vform, LogicVRegister dst,
2755                                 const LogicVRegister& src1,
2756                                 const LogicVRegister& src2) {
2757   SimVRegister temp;
2758   add(VectorFormatDoubleWidth(vform), temp, src1, src2);
2759   shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2760   return dst;
2761 }
2762 
addhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2763 LogicVRegister Simulator::addhn2(VectorFormat vform, LogicVRegister dst,
2764                                  const LogicVRegister& src1,
2765                                  const LogicVRegister& src2) {
2766   SimVRegister temp;
2767   add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
2768   shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2769   return dst;
2770 }
2771 
raddhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2772 LogicVRegister Simulator::raddhn(VectorFormat vform, LogicVRegister dst,
2773                                  const LogicVRegister& src1,
2774                                  const LogicVRegister& src2) {
2775   SimVRegister temp;
2776   add(VectorFormatDoubleWidth(vform), temp, src1, src2);
2777   rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2778   return dst;
2779 }
2780 
raddhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2781 LogicVRegister Simulator::raddhn2(VectorFormat vform, LogicVRegister dst,
2782                                   const LogicVRegister& src1,
2783                                   const LogicVRegister& src2) {
2784   SimVRegister temp;
2785   add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
2786   rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2787   return dst;
2788 }
2789 
subhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2790 LogicVRegister Simulator::subhn(VectorFormat vform, LogicVRegister dst,
2791                                 const LogicVRegister& src1,
2792                                 const LogicVRegister& src2) {
2793   SimVRegister temp;
2794   sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
2795   shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2796   return dst;
2797 }
2798 
subhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2799 LogicVRegister Simulator::subhn2(VectorFormat vform, LogicVRegister dst,
2800                                  const LogicVRegister& src1,
2801                                  const LogicVRegister& src2) {
2802   SimVRegister temp;
2803   sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
2804   shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2805   return dst;
2806 }
2807 
rsubhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2808 LogicVRegister Simulator::rsubhn(VectorFormat vform, LogicVRegister dst,
2809                                  const LogicVRegister& src1,
2810                                  const LogicVRegister& src2) {
2811   SimVRegister temp;
2812   sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
2813   rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2814   return dst;
2815 }
2816 
rsubhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2817 LogicVRegister Simulator::rsubhn2(VectorFormat vform, LogicVRegister dst,
2818                                   const LogicVRegister& src1,
2819                                   const LogicVRegister& src2) {
2820   SimVRegister temp;
2821   sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
2822   rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2823   return dst;
2824 }
2825 
trn1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2826 LogicVRegister Simulator::trn1(VectorFormat vform, LogicVRegister dst,
2827                                const LogicVRegister& src1,
2828                                const LogicVRegister& src2) {
2829   uint64_t result[16];
2830   int laneCount = LaneCountFromFormat(vform);
2831   int pairs = laneCount / 2;
2832   for (int i = 0; i < pairs; ++i) {
2833     result[2 * i] = src1.Uint(vform, 2 * i);
2834     result[(2 * i) + 1] = src2.Uint(vform, 2 * i);
2835   }
2836 
2837   dst.SetUintArray(vform, result);
2838   return dst;
2839 }
2840 
trn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2841 LogicVRegister Simulator::trn2(VectorFormat vform, LogicVRegister dst,
2842                                const LogicVRegister& src1,
2843                                const LogicVRegister& src2) {
2844   uint64_t result[16];
2845   int laneCount = LaneCountFromFormat(vform);
2846   int pairs = laneCount / 2;
2847   for (int i = 0; i < pairs; ++i) {
2848     result[2 * i] = src1.Uint(vform, (2 * i) + 1);
2849     result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1);
2850   }
2851 
2852   dst.SetUintArray(vform, result);
2853   return dst;
2854 }
2855 
zip1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2856 LogicVRegister Simulator::zip1(VectorFormat vform, LogicVRegister dst,
2857                                const LogicVRegister& src1,
2858                                const LogicVRegister& src2) {
2859   uint64_t result[16];
2860   int laneCount = LaneCountFromFormat(vform);
2861   int pairs = laneCount / 2;
2862   for (int i = 0; i < pairs; ++i) {
2863     result[2 * i] = src1.Uint(vform, i);
2864     result[(2 * i) + 1] = src2.Uint(vform, i);
2865   }
2866 
2867   dst.SetUintArray(vform, result);
2868   return dst;
2869 }
2870 
zip2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2871 LogicVRegister Simulator::zip2(VectorFormat vform, LogicVRegister dst,
2872                                const LogicVRegister& src1,
2873                                const LogicVRegister& src2) {
2874   uint64_t result[16];
2875   int laneCount = LaneCountFromFormat(vform);
2876   int pairs = laneCount / 2;
2877   for (int i = 0; i < pairs; ++i) {
2878     result[2 * i] = src1.Uint(vform, pairs + i);
2879     result[(2 * i) + 1] = src2.Uint(vform, pairs + i);
2880   }
2881 
2882   dst.SetUintArray(vform, result);
2883   return dst;
2884 }
2885 
uzp1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2886 LogicVRegister Simulator::uzp1(VectorFormat vform, LogicVRegister dst,
2887                                const LogicVRegister& src1,
2888                                const LogicVRegister& src2) {
2889   uint64_t result[32];
2890   int laneCount = LaneCountFromFormat(vform);
2891   for (int i = 0; i < laneCount; ++i) {
2892     result[i] = src1.Uint(vform, i);
2893     result[laneCount + i] = src2.Uint(vform, i);
2894   }
2895 
2896   dst.ClearForWrite(vform);
2897   for (int i = 0; i < laneCount; ++i) {
2898     dst.SetUint(vform, i, result[2 * i]);
2899   }
2900   return dst;
2901 }
2902 
uzp2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2903 LogicVRegister Simulator::uzp2(VectorFormat vform, LogicVRegister dst,
2904                                const LogicVRegister& src1,
2905                                const LogicVRegister& src2) {
2906   uint64_t result[32];
2907   int laneCount = LaneCountFromFormat(vform);
2908   for (int i = 0; i < laneCount; ++i) {
2909     result[i] = src1.Uint(vform, i);
2910     result[laneCount + i] = src2.Uint(vform, i);
2911   }
2912 
2913   dst.ClearForWrite(vform);
2914   for (int i = 0; i < laneCount; ++i) {
2915     dst.SetUint(vform, i, result[(2 * i) + 1]);
2916   }
2917   return dst;
2918 }
2919 
2920 template <typename T>
FPAdd(T op1,T op2)2921 T Simulator::FPAdd(T op1, T op2) {
2922   T result = FPProcessNaNs(op1, op2);
2923   if (std::isnan(result)) return result;
2924 
2925   if (std::isinf(op1) && std::isinf(op2) && (op1 != op2)) {
2926     // inf + -inf returns the default NaN.
2927     FPProcessException();
2928     return FPDefaultNaN<T>();
2929   } else {
2930     // Other cases should be handled by standard arithmetic.
2931     return op1 + op2;
2932   }
2933 }
2934 
2935 template <typename T>
FPSub(T op1,T op2)2936 T Simulator::FPSub(T op1, T op2) {
2937   // NaNs should be handled elsewhere.
2938   DCHECK(!std::isnan(op1) && !std::isnan(op2));
2939 
2940   if (std::isinf(op1) && std::isinf(op2) && (op1 == op2)) {
2941     // inf - inf returns the default NaN.
2942     FPProcessException();
2943     return FPDefaultNaN<T>();
2944   } else {
2945     // Other cases should be handled by standard arithmetic.
2946     return op1 - op2;
2947   }
2948 }
2949 
2950 template <typename T>
FPMul(T op1,T op2)2951 T Simulator::FPMul(T op1, T op2) {
2952   // NaNs should be handled elsewhere.
2953   DCHECK(!std::isnan(op1) && !std::isnan(op2));
2954 
2955   if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
2956     // inf * 0.0 returns the default NaN.
2957     FPProcessException();
2958     return FPDefaultNaN<T>();
2959   } else {
2960     // Other cases should be handled by standard arithmetic.
2961     return op1 * op2;
2962   }
2963 }
2964 
2965 template <typename T>
FPMulx(T op1,T op2)2966 T Simulator::FPMulx(T op1, T op2) {
2967   if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
2968     // inf * 0.0 returns +/-2.0.
2969     T two = 2.0;
2970     return std::copysign(1.0, op1) * std::copysign(1.0, op2) * two;
2971   }
2972   return FPMul(op1, op2);
2973 }
2974 
2975 template <typename T>
FPMulAdd(T a,T op1,T op2)2976 T Simulator::FPMulAdd(T a, T op1, T op2) {
2977   T result = FPProcessNaNs3(a, op1, op2);
2978 
2979   T sign_a = std::copysign(1.0, a);
2980   T sign_prod = std::copysign(1.0, op1) * std::copysign(1.0, op2);
2981   bool isinf_prod = std::isinf(op1) || std::isinf(op2);
2982   bool operation_generates_nan =
2983       (std::isinf(op1) && (op2 == 0.0)) ||                     // inf * 0.0
2984       (std::isinf(op2) && (op1 == 0.0)) ||                     // 0.0 * inf
2985       (std::isinf(a) && isinf_prod && (sign_a != sign_prod));  // inf - inf
2986 
2987   if (std::isnan(result)) {
2988     // Generated NaNs override quiet NaNs propagated from a.
2989     if (operation_generates_nan && IsQuietNaN(a)) {
2990       FPProcessException();
2991       return FPDefaultNaN<T>();
2992     } else {
2993       return result;
2994     }
2995   }
2996 
2997   // If the operation would produce a NaN, return the default NaN.
2998   if (operation_generates_nan) {
2999     FPProcessException();
3000     return FPDefaultNaN<T>();
3001   }
3002 
3003   // Work around broken fma implementations for exact zero results: The sign of
3004   // exact 0.0 results is positive unless both a and op1 * op2 are negative.
3005   if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) {
3006     return ((sign_a < 0) && (sign_prod < 0)) ? -0.0 : 0.0;
3007   }
3008 
3009   result = FusedMultiplyAdd(op1, op2, a);
3010   DCHECK(!std::isnan(result));
3011 
3012   // Work around broken fma implementations for rounded zero results: If a is
3013   // 0.0, the sign of the result is the sign of op1 * op2 before rounding.
3014   if ((a == 0.0) && (result == 0.0)) {
3015     return std::copysign(0.0, sign_prod);
3016   }
3017 
3018   return result;
3019 }
3020 
3021 template <typename T>
FPDiv(T op1,T op2)3022 T Simulator::FPDiv(T op1, T op2) {
3023   // NaNs should be handled elsewhere.
3024   DCHECK(!std::isnan(op1) && !std::isnan(op2));
3025 
3026   if ((std::isinf(op1) && std::isinf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) {
3027     // inf / inf and 0.0 / 0.0 return the default NaN.
3028     FPProcessException();
3029     return FPDefaultNaN<T>();
3030   } else {
3031     if (op2 == 0.0) {
3032       FPProcessException();
3033       if (!std::isnan(op1)) {
3034         double op1_sign = std::copysign(1.0, op1);
3035         double op2_sign = std::copysign(1.0, op2);
3036         return static_cast<T>(op1_sign * op2_sign * kFP64PositiveInfinity);
3037       }
3038     }
3039 
3040     // Other cases should be handled by standard arithmetic.
3041     return op1 / op2;
3042   }
3043 }
3044 
3045 template <typename T>
FPSqrt(T op)3046 T Simulator::FPSqrt(T op) {
3047   if (std::isnan(op)) {
3048     return FPProcessNaN(op);
3049   } else if (op < 0.0) {
3050     FPProcessException();
3051     return FPDefaultNaN<T>();
3052   } else {
3053     return std::sqrt(op);
3054   }
3055 }
3056 
3057 template <typename T>
FPMax(T a,T b)3058 T Simulator::FPMax(T a, T b) {
3059   T result = FPProcessNaNs(a, b);
3060   if (std::isnan(result)) return result;
3061 
3062   if ((a == 0.0) && (b == 0.0) &&
3063       (std::copysign(1.0, a) != std::copysign(1.0, b))) {
3064     // a and b are zero, and the sign differs: return +0.0.
3065     return 0.0;
3066   } else {
3067     return (a > b) ? a : b;
3068   }
3069 }
3070 
3071 template <typename T>
FPMaxNM(T a,T b)3072 T Simulator::FPMaxNM(T a, T b) {
3073   if (IsQuietNaN(a) && !IsQuietNaN(b)) {
3074     a = kFP64NegativeInfinity;
3075   } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
3076     b = kFP64NegativeInfinity;
3077   }
3078 
3079   T result = FPProcessNaNs(a, b);
3080   return std::isnan(result) ? result : FPMax(a, b);
3081 }
3082 
3083 template <typename T>
FPMin(T a,T b)3084 T Simulator::FPMin(T a, T b) {
3085   T result = FPProcessNaNs(a, b);
3086   if (std::isnan(result)) return result;
3087 
3088   if ((a == 0.0) && (b == 0.0) &&
3089       (std::copysign(1.0, a) != std::copysign(1.0, b))) {
3090     // a and b are zero, and the sign differs: return -0.0.
3091     return -0.0;
3092   } else {
3093     return (a < b) ? a : b;
3094   }
3095 }
3096 
3097 template <typename T>
FPMinNM(T a,T b)3098 T Simulator::FPMinNM(T a, T b) {
3099   if (IsQuietNaN(a) && !IsQuietNaN(b)) {
3100     a = kFP64PositiveInfinity;
3101   } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
3102     b = kFP64PositiveInfinity;
3103   }
3104 
3105   T result = FPProcessNaNs(a, b);
3106   return std::isnan(result) ? result : FPMin(a, b);
3107 }
3108 
3109 template <typename T>
FPRecipStepFused(T op1,T op2)3110 T Simulator::FPRecipStepFused(T op1, T op2) {
3111   const T two = 2.0;
3112   if ((std::isinf(op1) && (op2 == 0.0)) ||
3113       ((op1 == 0.0) && (std::isinf(op2)))) {
3114     return two;
3115   } else if (std::isinf(op1) || std::isinf(op2)) {
3116     // Return +inf if signs match, otherwise -inf.
3117     return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
3118                                           : kFP64NegativeInfinity;
3119   } else {
3120     return FusedMultiplyAdd(op1, op2, two);
3121   }
3122 }
3123 
3124 template <typename T>
FPRSqrtStepFused(T op1,T op2)3125 T Simulator::FPRSqrtStepFused(T op1, T op2) {
3126   const T one_point_five = 1.5;
3127   const T two = 2.0;
3128 
3129   if ((std::isinf(op1) && (op2 == 0.0)) ||
3130       ((op1 == 0.0) && (std::isinf(op2)))) {
3131     return one_point_five;
3132   } else if (std::isinf(op1) || std::isinf(op2)) {
3133     // Return +inf if signs match, otherwise -inf.
3134     return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
3135                                           : kFP64NegativeInfinity;
3136   } else {
3137     // The multiply-add-halve operation must be fully fused, so avoid interim
3138     // rounding by checking which operand can be losslessly divided by two
3139     // before doing the multiply-add.
3140     if (std::isnormal(op1 / two)) {
3141       return FusedMultiplyAdd(op1 / two, op2, one_point_five);
3142     } else if (std::isnormal(op2 / two)) {
3143       return FusedMultiplyAdd(op1, op2 / two, one_point_five);
3144     } else {
3145       // Neither operand is normal after halving: the result is dominated by
3146       // the addition term, so just return that.
3147       return one_point_five;
3148     }
3149   }
3150 }
3151 
FPRoundInt(double value,FPRounding round_mode)3152 double Simulator::FPRoundInt(double value, FPRounding round_mode) {
3153   if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
3154       (value == kFP64NegativeInfinity)) {
3155     return value;
3156   } else if (std::isnan(value)) {
3157     return FPProcessNaN(value);
3158   }
3159 
3160   double int_result = std::floor(value);
3161   double error = value - int_result;
3162   switch (round_mode) {
3163     case FPTieAway: {
3164       // Take care of correctly handling the range ]-0.5, -0.0], which must
3165       // yield -0.0.
3166       if ((-0.5 < value) && (value < 0.0)) {
3167         int_result = -0.0;
3168 
3169       } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) {
3170         // If the error is greater than 0.5, or is equal to 0.5 and the integer
3171         // result is positive, round up.
3172         int_result++;
3173       }
3174       break;
3175     }
3176     case FPTieEven: {
3177       // Take care of correctly handling the range [-0.5, -0.0], which must
3178       // yield -0.0.
3179       if ((-0.5 <= value) && (value < 0.0)) {
3180         int_result = -0.0;
3181 
3182         // If the error is greater than 0.5, or is equal to 0.5 and the integer
3183         // result is odd, round up.
3184       } else if ((error > 0.5) ||
3185                  ((error == 0.5) && (std::fmod(int_result, 2) != 0))) {
3186         int_result++;
3187       }
3188       break;
3189     }
3190     case FPZero: {
3191       // If value>0 then we take floor(value)
3192       // otherwise, ceil(value).
3193       if (value < 0) {
3194         int_result = ceil(value);
3195       }
3196       break;
3197     }
3198     case FPNegativeInfinity: {
3199       // We always use floor(value).
3200       break;
3201     }
3202     case FPPositiveInfinity: {
3203       // Take care of correctly handling the range ]-1.0, -0.0], which must
3204       // yield -0.0.
3205       if ((-1.0 < value) && (value < 0.0)) {
3206         int_result = -0.0;
3207 
3208         // If the error is non-zero, round up.
3209       } else if (error > 0.0) {
3210         int_result++;
3211       }
3212       break;
3213     }
3214     default:
3215       UNIMPLEMENTED();
3216   }
3217   return int_result;
3218 }
3219 
FPToInt32(double value,FPRounding rmode)3220 int32_t Simulator::FPToInt32(double value, FPRounding rmode) {
3221   value = FPRoundInt(value, rmode);
3222   return base::saturated_cast<int32_t>(value);
3223 }
3224 
FPToInt64(double value,FPRounding rmode)3225 int64_t Simulator::FPToInt64(double value, FPRounding rmode) {
3226   value = FPRoundInt(value, rmode);
3227   return base::saturated_cast<int64_t>(value);
3228 }
3229 
FPToUInt32(double value,FPRounding rmode)3230 uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) {
3231   value = FPRoundInt(value, rmode);
3232   return base::saturated_cast<uint32_t>(value);
3233 }
3234 
FPToUInt64(double value,FPRounding rmode)3235 uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) {
3236   value = FPRoundInt(value, rmode);
3237   return base::saturated_cast<uint64_t>(value);
3238 }
3239 
3240 #define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN)                      \
3241   template <typename T>                                                \
3242   LogicVRegister Simulator::FN(VectorFormat vform, LogicVRegister dst, \
3243                                const LogicVRegister& src1,             \
3244                                const LogicVRegister& src2) {           \
3245     dst.ClearForWrite(vform);                                          \
3246     for (int i = 0; i < LaneCountFromFormat(vform); i++) {             \
3247       T op1 = src1.Float<T>(i);                                        \
3248       T op2 = src2.Float<T>(i);                                        \
3249       T result;                                                        \
3250       if (PROCNAN) {                                                   \
3251         result = FPProcessNaNs(op1, op2);                              \
3252         if (!std::isnan(result)) {                                     \
3253           result = OP(op1, op2);                                       \
3254         }                                                              \
3255       } else {                                                         \
3256         result = OP(op1, op2);                                         \
3257       }                                                                \
3258       dst.SetFloat(i, result);                                         \
3259     }                                                                  \
3260     return dst;                                                        \
3261   }                                                                    \
3262                                                                        \
3263   LogicVRegister Simulator::FN(VectorFormat vform, LogicVRegister dst, \
3264                                const LogicVRegister& src1,             \
3265                                const LogicVRegister& src2) {           \
3266     if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {               \
3267       FN<float>(vform, dst, src1, src2);                               \
3268     } else {                                                           \
3269       DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);          \
3270       FN<double>(vform, dst, src1, src2);                              \
3271     }                                                                  \
3272     return dst;                                                        \
3273   }
NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)3274 NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)
3275 #undef DEFINE_NEON_FP_VECTOR_OP
3276 
3277 LogicVRegister Simulator::fnmul(VectorFormat vform, LogicVRegister dst,
3278                                 const LogicVRegister& src1,
3279                                 const LogicVRegister& src2) {
3280   SimVRegister temp;
3281   LogicVRegister product = fmul(vform, temp, src1, src2);
3282   return fneg(vform, dst, product);
3283 }
3284 
3285 template <typename T>
frecps(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3286 LogicVRegister Simulator::frecps(VectorFormat vform, LogicVRegister dst,
3287                                  const LogicVRegister& src1,
3288                                  const LogicVRegister& src2) {
3289   dst.ClearForWrite(vform);
3290   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3291     T op1 = -src1.Float<T>(i);
3292     T op2 = src2.Float<T>(i);
3293     T result = FPProcessNaNs(op1, op2);
3294     dst.SetFloat(i, std::isnan(result) ? result : FPRecipStepFused(op1, op2));
3295   }
3296   return dst;
3297 }
3298 
frecps(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3299 LogicVRegister Simulator::frecps(VectorFormat vform, LogicVRegister dst,
3300                                  const LogicVRegister& src1,
3301                                  const LogicVRegister& src2) {
3302   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3303     frecps<float>(vform, dst, src1, src2);
3304   } else {
3305     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3306     frecps<double>(vform, dst, src1, src2);
3307   }
3308   return dst;
3309 }
3310 
3311 template <typename T>
frsqrts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3312 LogicVRegister Simulator::frsqrts(VectorFormat vform, LogicVRegister dst,
3313                                   const LogicVRegister& src1,
3314                                   const LogicVRegister& src2) {
3315   dst.ClearForWrite(vform);
3316   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3317     T op1 = -src1.Float<T>(i);
3318     T op2 = src2.Float<T>(i);
3319     T result = FPProcessNaNs(op1, op2);
3320     dst.SetFloat(i, std::isnan(result) ? result : FPRSqrtStepFused(op1, op2));
3321   }
3322   return dst;
3323 }
3324 
FPToFixedJS(double value)3325 int32_t Simulator::FPToFixedJS(double value) {
3326   // The Z-flag is set when the conversion from double precision floating-point
3327   // to 32-bit integer is exact. If the source value is +/-Infinity, -0.0, NaN,
3328   // outside the bounds of a 32-bit integer, or isn't an exact integer then the
3329   // Z-flag is unset.
3330   int Z = 1;
3331   int32_t result;
3332   if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
3333       (value == kFP64NegativeInfinity)) {
3334     // +/- zero and infinity all return zero, however -0 and +/- Infinity also
3335     // unset the Z-flag.
3336     result = 0.0;
3337     if ((value != 0.0) || std::signbit(value)) {
3338       Z = 0;
3339     }
3340   } else if (std::isnan(value)) {
3341     // NaN values unset the Z-flag and set the result to 0.
3342     result = 0;
3343     Z = 0;
3344   } else {
3345     // All other values are converted to an integer representation, rounded
3346     // toward zero.
3347     double int_result = std::floor(value);
3348     double error = value - int_result;
3349     if ((error != 0.0) && (int_result < 0.0)) {
3350       int_result++;
3351     }
3352     // Constrain the value into the range [INT32_MIN, INT32_MAX]. We can almost
3353     // write a one-liner with std::round, but the behaviour on ties is incorrect
3354     // for our purposes.
3355     double mod_const = static_cast<double>(UINT64_C(1) << 32);
3356     double mod_error =
3357         (int_result / mod_const) - std::floor(int_result / mod_const);
3358     double constrained;
3359     if (mod_error == 0.5) {
3360       constrained = INT32_MIN;
3361     } else {
3362       constrained = int_result - mod_const * round(int_result / mod_const);
3363     }
3364     DCHECK(std::floor(constrained) == constrained);
3365     DCHECK(constrained >= INT32_MIN);
3366     DCHECK(constrained <= INT32_MAX);
3367     // Take the bottom 32 bits of the result as a 32-bit integer.
3368     result = static_cast<int32_t>(constrained);
3369     if ((int_result < INT32_MIN) || (int_result > INT32_MAX) ||
3370         (error != 0.0)) {
3371       // If the integer result is out of range or the conversion isn't exact,
3372       // take exception and unset the Z-flag.
3373       FPProcessException();
3374       Z = 0;
3375     }
3376   }
3377   nzcv().SetN(0);
3378   nzcv().SetZ(Z);
3379   nzcv().SetC(0);
3380   nzcv().SetV(0);
3381   return result;
3382 }
3383 
frsqrts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3384 LogicVRegister Simulator::frsqrts(VectorFormat vform, LogicVRegister dst,
3385                                   const LogicVRegister& src1,
3386                                   const LogicVRegister& src2) {
3387   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3388     frsqrts<float>(vform, dst, src1, src2);
3389   } else {
3390     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3391     frsqrts<double>(vform, dst, src1, src2);
3392   }
3393   return dst;
3394 }
3395 
3396 template <typename T>
fcmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)3397 LogicVRegister Simulator::fcmp(VectorFormat vform, LogicVRegister dst,
3398                                const LogicVRegister& src1,
3399                                const LogicVRegister& src2, Condition cond) {
3400   dst.ClearForWrite(vform);
3401   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3402     bool result = false;
3403     T op1 = src1.Float<T>(i);
3404     T op2 = src2.Float<T>(i);
3405     T nan_result = FPProcessNaNs(op1, op2);
3406     if (!std::isnan(nan_result)) {
3407       switch (cond) {
3408         case eq:
3409           result = (op1 == op2);
3410           break;
3411         case ge:
3412           result = (op1 >= op2);
3413           break;
3414         case gt:
3415           result = (op1 > op2);
3416           break;
3417         case le:
3418           result = (op1 <= op2);
3419           break;
3420         case lt:
3421           result = (op1 < op2);
3422           break;
3423         default:
3424           UNREACHABLE();
3425       }
3426     }
3427     dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
3428   }
3429   return dst;
3430 }
3431 
fcmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)3432 LogicVRegister Simulator::fcmp(VectorFormat vform, LogicVRegister dst,
3433                                const LogicVRegister& src1,
3434                                const LogicVRegister& src2, Condition cond) {
3435   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3436     fcmp<float>(vform, dst, src1, src2, cond);
3437   } else {
3438     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3439     fcmp<double>(vform, dst, src1, src2, cond);
3440   }
3441   return dst;
3442 }
3443 
fcmp_zero(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,Condition cond)3444 LogicVRegister Simulator::fcmp_zero(VectorFormat vform, LogicVRegister dst,
3445                                     const LogicVRegister& src, Condition cond) {
3446   SimVRegister temp;
3447   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3448     LogicVRegister zero_reg =
3449         dup_immediate(vform, temp, bit_cast<uint32_t>(0.0f));
3450     fcmp<float>(vform, dst, src, zero_reg, cond);
3451   } else {
3452     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3453     LogicVRegister zero_reg =
3454         dup_immediate(vform, temp, bit_cast<uint64_t>(0.0));
3455     fcmp<double>(vform, dst, src, zero_reg, cond);
3456   }
3457   return dst;
3458 }
3459 
fabscmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)3460 LogicVRegister Simulator::fabscmp(VectorFormat vform, LogicVRegister dst,
3461                                   const LogicVRegister& src1,
3462                                   const LogicVRegister& src2, Condition cond) {
3463   SimVRegister temp1, temp2;
3464   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3465     LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1);
3466     LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2);
3467     fcmp<float>(vform, dst, abs_src1, abs_src2, cond);
3468   } else {
3469     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3470     LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1);
3471     LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2);
3472     fcmp<double>(vform, dst, abs_src1, abs_src2, cond);
3473   }
3474   return dst;
3475 }
3476 
3477 template <typename T>
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3478 LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst,
3479                                const LogicVRegister& src1,
3480                                const LogicVRegister& src2) {
3481   dst.ClearForWrite(vform);
3482   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3483     T op1 = src1.Float<T>(i);
3484     T op2 = src2.Float<T>(i);
3485     T acc = dst.Float<T>(i);
3486     T result = FPMulAdd(acc, op1, op2);
3487     dst.SetFloat(i, result);
3488   }
3489   return dst;
3490 }
3491 
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3492 LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst,
3493                                const LogicVRegister& src1,
3494                                const LogicVRegister& src2) {
3495   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3496     fmla<float>(vform, dst, src1, src2);
3497   } else {
3498     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3499     fmla<double>(vform, dst, src1, src2);
3500   }
3501   return dst;
3502 }
3503 
3504 template <typename T>
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3505 LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst,
3506                                const LogicVRegister& src1,
3507                                const LogicVRegister& src2) {
3508   dst.ClearForWrite(vform);
3509   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3510     T op1 = -src1.Float<T>(i);
3511     T op2 = src2.Float<T>(i);
3512     T acc = dst.Float<T>(i);
3513     T result = FPMulAdd(acc, op1, op2);
3514     dst.SetFloat(i, result);
3515   }
3516   return dst;
3517 }
3518 
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3519 LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst,
3520                                const LogicVRegister& src1,
3521                                const LogicVRegister& src2) {
3522   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3523     fmls<float>(vform, dst, src1, src2);
3524   } else {
3525     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3526     fmls<double>(vform, dst, src1, src2);
3527   }
3528   return dst;
3529 }
3530 
3531 template <typename T>
fneg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3532 LogicVRegister Simulator::fneg(VectorFormat vform, LogicVRegister dst,
3533                                const LogicVRegister& src) {
3534   dst.ClearForWrite(vform);
3535   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3536     T op = src.Float<T>(i);
3537     op = -op;
3538     dst.SetFloat(i, op);
3539   }
3540   return dst;
3541 }
3542 
fneg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3543 LogicVRegister Simulator::fneg(VectorFormat vform, LogicVRegister dst,
3544                                const LogicVRegister& src) {
3545   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3546     fneg<float>(vform, dst, src);
3547   } else {
3548     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3549     fneg<double>(vform, dst, src);
3550   }
3551   return dst;
3552 }
3553 
3554 template <typename T>
fabs_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3555 LogicVRegister Simulator::fabs_(VectorFormat vform, LogicVRegister dst,
3556                                 const LogicVRegister& src) {
3557   dst.ClearForWrite(vform);
3558   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3559     T op = src.Float<T>(i);
3560     if (std::copysign(1.0, op) < 0.0) {
3561       op = -op;
3562     }
3563     dst.SetFloat(i, op);
3564   }
3565   return dst;
3566 }
3567 
fabs_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3568 LogicVRegister Simulator::fabs_(VectorFormat vform, LogicVRegister dst,
3569                                 const LogicVRegister& src) {
3570   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3571     fabs_<float>(vform, dst, src);
3572   } else {
3573     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3574     fabs_<double>(vform, dst, src);
3575   }
3576   return dst;
3577 }
3578 
fabd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3579 LogicVRegister Simulator::fabd(VectorFormat vform, LogicVRegister dst,
3580                                const LogicVRegister& src1,
3581                                const LogicVRegister& src2) {
3582   SimVRegister temp;
3583   fsub(vform, temp, src1, src2);
3584   fabs_(vform, dst, temp);
3585   return dst;
3586 }
3587 
fsqrt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3588 LogicVRegister Simulator::fsqrt(VectorFormat vform, LogicVRegister dst,
3589                                 const LogicVRegister& src) {
3590   dst.ClearForWrite(vform);
3591   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3592     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3593       float result = FPSqrt(src.Float<float>(i));
3594       dst.SetFloat(i, result);
3595     }
3596   } else {
3597     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3598     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3599       double result = FPSqrt(src.Float<double>(i));
3600       dst.SetFloat(i, result);
3601     }
3602   }
3603   return dst;
3604 }
3605 
3606 #define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP)                             \
3607   LogicVRegister Simulator::FNP(VectorFormat vform, LogicVRegister dst, \
3608                                 const LogicVRegister& src1,             \
3609                                 const LogicVRegister& src2) {           \
3610     SimVRegister temp1, temp2;                                          \
3611     uzp1(vform, temp1, src1, src2);                                     \
3612     uzp2(vform, temp2, src1, src2);                                     \
3613     FN(vform, dst, temp1, temp2);                                       \
3614     return dst;                                                         \
3615   }                                                                     \
3616                                                                         \
3617   LogicVRegister Simulator::FNP(VectorFormat vform, LogicVRegister dst, \
3618                                 const LogicVRegister& src) {            \
3619     if (vform == kFormatS) {                                            \
3620       float result = OP(src.Float<float>(0), src.Float<float>(1));      \
3621       dst.SetFloat(0, result);                                          \
3622     } else {                                                            \
3623       DCHECK_EQ(vform, kFormatD);                                       \
3624       double result = OP(src.Float<double>(0), src.Float<double>(1));   \
3625       dst.SetFloat(0, result);                                          \
3626     }                                                                   \
3627     dst.ClearForWrite(vform);                                           \
3628     return dst;                                                         \
3629   }
NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)3630 NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)
3631 #undef DEFINE_NEON_FP_PAIR_OP
3632 
3633 LogicVRegister Simulator::FMinMaxV(VectorFormat vform, LogicVRegister dst,
3634                                    const LogicVRegister& src, FPMinMaxOp Op) {
3635   DCHECK_EQ(vform, kFormat4S);
3636   USE(vform);
3637   float result1 = (this->*Op)(src.Float<float>(0), src.Float<float>(1));
3638   float result2 = (this->*Op)(src.Float<float>(2), src.Float<float>(3));
3639   float result = (this->*Op)(result1, result2);
3640   dst.ClearForWrite(kFormatS);
3641   dst.SetFloat<float>(0, result);
3642   return dst;
3643 }
3644 
fmaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3645 LogicVRegister Simulator::fmaxv(VectorFormat vform, LogicVRegister dst,
3646                                 const LogicVRegister& src) {
3647   return FMinMaxV(vform, dst, src, &Simulator::FPMax);
3648 }
3649 
fminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3650 LogicVRegister Simulator::fminv(VectorFormat vform, LogicVRegister dst,
3651                                 const LogicVRegister& src) {
3652   return FMinMaxV(vform, dst, src, &Simulator::FPMin);
3653 }
3654 
fmaxnmv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3655 LogicVRegister Simulator::fmaxnmv(VectorFormat vform, LogicVRegister dst,
3656                                   const LogicVRegister& src) {
3657   return FMinMaxV(vform, dst, src, &Simulator::FPMaxNM);
3658 }
3659 
fminnmv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3660 LogicVRegister Simulator::fminnmv(VectorFormat vform, LogicVRegister dst,
3661                                   const LogicVRegister& src) {
3662   return FMinMaxV(vform, dst, src, &Simulator::FPMinNM);
3663 }
3664 
fmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)3665 LogicVRegister Simulator::fmul(VectorFormat vform, LogicVRegister dst,
3666                                const LogicVRegister& src1,
3667                                const LogicVRegister& src2, int index) {
3668   dst.ClearForWrite(vform);
3669   SimVRegister temp;
3670   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3671     LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
3672     fmul<float>(vform, dst, src1, index_reg);
3673   } else {
3674     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3675     LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
3676     fmul<double>(vform, dst, src1, index_reg);
3677   }
3678   return dst;
3679 }
3680 
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)3681 LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst,
3682                                const LogicVRegister& src1,
3683                                const LogicVRegister& src2, int index) {
3684   dst.ClearForWrite(vform);
3685   SimVRegister temp;
3686   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3687     LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
3688     fmla<float>(vform, dst, src1, index_reg);
3689   } else {
3690     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3691     LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
3692     fmla<double>(vform, dst, src1, index_reg);
3693   }
3694   return dst;
3695 }
3696 
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)3697 LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst,
3698                                const LogicVRegister& src1,
3699                                const LogicVRegister& src2, int index) {
3700   dst.ClearForWrite(vform);
3701   SimVRegister temp;
3702   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3703     LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
3704     fmls<float>(vform, dst, src1, index_reg);
3705   } else {
3706     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3707     LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
3708     fmls<double>(vform, dst, src1, index_reg);
3709   }
3710   return dst;
3711 }
3712 
fmulx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)3713 LogicVRegister Simulator::fmulx(VectorFormat vform, LogicVRegister dst,
3714                                 const LogicVRegister& src1,
3715                                 const LogicVRegister& src2, int index) {
3716   dst.ClearForWrite(vform);
3717   SimVRegister temp;
3718   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3719     LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
3720     fmulx<float>(vform, dst, src1, index_reg);
3721 
3722   } else {
3723     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3724     LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
3725     fmulx<double>(vform, dst, src1, index_reg);
3726   }
3727   return dst;
3728 }
3729 
frint(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding rounding_mode,bool inexact_exception)3730 LogicVRegister Simulator::frint(VectorFormat vform, LogicVRegister dst,
3731                                 const LogicVRegister& src,
3732                                 FPRounding rounding_mode,
3733                                 bool inexact_exception) {
3734   dst.ClearForWrite(vform);
3735   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3736     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3737       float input = src.Float<float>(i);
3738       float rounded = FPRoundInt(input, rounding_mode);
3739       if (inexact_exception && !std::isnan(input) && (input != rounded)) {
3740         FPProcessException();
3741       }
3742       dst.SetFloat<float>(i, rounded);
3743     }
3744   } else {
3745     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3746     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3747       double input = src.Float<double>(i);
3748       double rounded = FPRoundInt(input, rounding_mode);
3749       if (inexact_exception && !std::isnan(input) && (input != rounded)) {
3750         FPProcessException();
3751       }
3752       dst.SetFloat<double>(i, rounded);
3753     }
3754   }
3755   return dst;
3756 }
3757 
fcvts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding rounding_mode,int fbits)3758 LogicVRegister Simulator::fcvts(VectorFormat vform, LogicVRegister dst,
3759                                 const LogicVRegister& src,
3760                                 FPRounding rounding_mode, int fbits) {
3761   dst.ClearForWrite(vform);
3762   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3763     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3764       float op = src.Float<float>(i) * std::pow(2.0f, fbits);
3765       dst.SetInt(vform, i, FPToInt32(op, rounding_mode));
3766     }
3767   } else {
3768     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3769     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3770       double op = src.Float<double>(i) * std::pow(2.0, fbits);
3771       dst.SetInt(vform, i, FPToInt64(op, rounding_mode));
3772     }
3773   }
3774   return dst;
3775 }
3776 
fcvtu(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding rounding_mode,int fbits)3777 LogicVRegister Simulator::fcvtu(VectorFormat vform, LogicVRegister dst,
3778                                 const LogicVRegister& src,
3779                                 FPRounding rounding_mode, int fbits) {
3780   dst.ClearForWrite(vform);
3781   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3782     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3783       float op = src.Float<float>(i) * std::pow(2.0f, fbits);
3784       dst.SetUint(vform, i, FPToUInt32(op, rounding_mode));
3785     }
3786   } else {
3787     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3788     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3789       double op = src.Float<double>(i) * std::pow(2.0, fbits);
3790       dst.SetUint(vform, i, FPToUInt64(op, rounding_mode));
3791     }
3792   }
3793   return dst;
3794 }
3795 
fcvtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3796 LogicVRegister Simulator::fcvtl(VectorFormat vform, LogicVRegister dst,
3797                                 const LogicVRegister& src) {
3798   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3799     for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
3800       dst.SetFloat(i, FPToFloat(src.Float<float16>(i)));
3801     }
3802   } else {
3803     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3804     for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
3805       dst.SetFloat(i, FPToDouble(src.Float<float>(i)));
3806     }
3807   }
3808   return dst;
3809 }
3810 
fcvtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3811 LogicVRegister Simulator::fcvtl2(VectorFormat vform, LogicVRegister dst,
3812                                  const LogicVRegister& src) {
3813   int lane_count = LaneCountFromFormat(vform);
3814   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3815     for (int i = 0; i < lane_count; i++) {
3816       dst.SetFloat(i, FPToFloat(src.Float<float16>(i + lane_count)));
3817     }
3818   } else {
3819     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3820     for (int i = 0; i < lane_count; i++) {
3821       dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count)));
3822     }
3823   }
3824   return dst;
3825 }
3826 
fcvtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3827 LogicVRegister Simulator::fcvtn(VectorFormat vform, LogicVRegister dst,
3828                                 const LogicVRegister& src) {
3829   if (LaneSizeInBytesFromFormat(vform) == kHRegSize) {
3830     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3831       dst.SetFloat(i, FPToFloat16(src.Float<float>(i), FPTieEven));
3832     }
3833   } else {
3834     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);
3835     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3836       dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPTieEven));
3837     }
3838   }
3839   dst.ClearForWrite(vform);
3840   return dst;
3841 }
3842 
fcvtn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3843 LogicVRegister Simulator::fcvtn2(VectorFormat vform, LogicVRegister dst,
3844                                  const LogicVRegister& src) {
3845   int lane_count = LaneCountFromFormat(vform) / 2;
3846   if (LaneSizeInBytesFromFormat(vform) == kHRegSize) {
3847     for (int i = lane_count - 1; i >= 0; i--) {
3848       dst.SetFloat(i + lane_count, FPToFloat16(src.Float<float>(i), FPTieEven));
3849     }
3850   } else {
3851     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);
3852     for (int i = lane_count - 1; i >= 0; i--) {
3853       dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPTieEven));
3854     }
3855   }
3856   return dst;
3857 }
3858 
fcvtxn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3859 LogicVRegister Simulator::fcvtxn(VectorFormat vform, LogicVRegister dst,
3860                                  const LogicVRegister& src) {
3861   dst.ClearForWrite(vform);
3862   DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);
3863   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3864     dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPRoundOdd));
3865   }
3866   return dst;
3867 }
3868 
fcvtxn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3869 LogicVRegister Simulator::fcvtxn2(VectorFormat vform, LogicVRegister dst,
3870                                   const LogicVRegister& src) {
3871   DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);
3872   int lane_count = LaneCountFromFormat(vform) / 2;
3873   for (int i = lane_count - 1; i >= 0; i--) {
3874     dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPRoundOdd));
3875   }
3876   return dst;
3877 }
3878 
3879 // Based on reference C function recip_sqrt_estimate from ARM ARM.
recip_sqrt_estimate(double a)3880 double Simulator::recip_sqrt_estimate(double a) {
3881   int q0, q1, s;
3882   double r;
3883   if (a < 0.5) {
3884     q0 = static_cast<int>(a * 512.0);
3885     r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0);
3886   } else {
3887     q1 = static_cast<int>(a * 256.0);
3888     r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0);
3889   }
3890   s = static_cast<int>(256.0 * r + 0.5);
3891   return static_cast<double>(s) / 256.0;
3892 }
3893 
3894 namespace {
3895 
Bits(uint64_t val,int start_bit,int end_bit)3896 inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) {
3897   return unsigned_bitextract_64(start_bit, end_bit, val);
3898 }
3899 
3900 }  // anonymous namespace
3901 
3902 template <typename T>
FPRecipSqrtEstimate(T op)3903 T Simulator::FPRecipSqrtEstimate(T op) {
3904   static_assert(std::is_same<float, T>::value || std::is_same<double, T>::value,
3905                 "T must be a float or double");
3906 
3907   if (std::isnan(op)) {
3908     return FPProcessNaN(op);
3909   } else if (op == 0.0) {
3910     if (std::copysign(1.0, op) < 0.0) {
3911       return kFP64NegativeInfinity;
3912     } else {
3913       return kFP64PositiveInfinity;
3914     }
3915   } else if (std::copysign(1.0, op) < 0.0) {
3916     FPProcessException();
3917     return FPDefaultNaN<T>();
3918   } else if (std::isinf(op)) {
3919     return 0.0;
3920   } else {
3921     uint64_t fraction;
3922     int32_t exp, result_exp;
3923 
3924     if (sizeof(T) == sizeof(float)) {
3925       exp = static_cast<int32_t>(float_exp(op));
3926       fraction = float_mantissa(op);
3927       fraction <<= 29;
3928     } else {
3929       exp = static_cast<int32_t>(double_exp(op));
3930       fraction = double_mantissa(op);
3931     }
3932 
3933     if (exp == 0) {
3934       while (Bits(fraction, 51, 51) == 0) {
3935         fraction = Bits(fraction, 50, 0) << 1;
3936         exp -= 1;
3937       }
3938       fraction = Bits(fraction, 50, 0) << 1;
3939     }
3940 
3941     double scaled;
3942     if (Bits(exp, 0, 0) == 0) {
3943       scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44);
3944     } else {
3945       scaled = double_pack(0, 1021, Bits(fraction, 51, 44) << 44);
3946     }
3947 
3948     if (sizeof(T) == sizeof(float)) {
3949       result_exp = (380 - exp) / 2;
3950     } else {
3951       result_exp = (3068 - exp) / 2;
3952     }
3953 
3954     uint64_t estimate = bit_cast<uint64_t>(recip_sqrt_estimate(scaled));
3955 
3956     if (sizeof(T) == sizeof(float)) {
3957       uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
3958       uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29));
3959       return float_pack(0, exp_bits, est_bits);
3960     } else {
3961       return double_pack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0));
3962     }
3963   }
3964 }
3965 
frsqrte(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3966 LogicVRegister Simulator::frsqrte(VectorFormat vform, LogicVRegister dst,
3967                                   const LogicVRegister& src) {
3968   dst.ClearForWrite(vform);
3969   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3970     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3971       float input = src.Float<float>(i);
3972       dst.SetFloat(i, FPRecipSqrtEstimate<float>(input));
3973     }
3974   } else {
3975     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3976     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3977       double input = src.Float<double>(i);
3978       dst.SetFloat(i, FPRecipSqrtEstimate<double>(input));
3979     }
3980   }
3981   return dst;
3982 }
3983 
3984 template <typename T>
FPRecipEstimate(T op,FPRounding rounding)3985 T Simulator::FPRecipEstimate(T op, FPRounding rounding) {
3986   static_assert(std::is_same<float, T>::value || std::is_same<double, T>::value,
3987                 "T must be a float or double");
3988   uint32_t sign;
3989 
3990   if (sizeof(T) == sizeof(float)) {
3991     sign = float_sign(op);
3992   } else {
3993     sign = double_sign(op);
3994   }
3995 
3996   if (std::isnan(op)) {
3997     return FPProcessNaN(op);
3998   } else if (std::isinf(op)) {
3999     return (sign == 1) ? -0.0 : 0.0;
4000   } else if (op == 0.0) {
4001     FPProcessException();  // FPExc_DivideByZero exception.
4002     return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
4003   } else if (((sizeof(T) == sizeof(float)) &&
4004               (std::fabs(op) < std::pow(2.0, -128.0))) ||
4005              ((sizeof(T) == sizeof(double)) &&
4006               (std::fabs(op) < std::pow(2.0, -1024.0)))) {
4007     bool overflow_to_inf = false;
4008     switch (rounding) {
4009       case FPTieEven:
4010         overflow_to_inf = true;
4011         break;
4012       case FPPositiveInfinity:
4013         overflow_to_inf = (sign == 0);
4014         break;
4015       case FPNegativeInfinity:
4016         overflow_to_inf = (sign == 1);
4017         break;
4018       case FPZero:
4019         overflow_to_inf = false;
4020         break;
4021       default:
4022         break;
4023     }
4024     FPProcessException();  // FPExc_Overflow and FPExc_Inexact.
4025     if (overflow_to_inf) {
4026       return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
4027     } else {
4028       // Return FPMaxNormal(sign).
4029       if (sizeof(T) == sizeof(float)) {
4030         return float_pack(sign, 0xFE, 0x07FFFFF);
4031       } else {
4032         return double_pack(sign, 0x7FE, 0x0FFFFFFFFFFFFFl);
4033       }
4034     }
4035   } else {
4036     uint64_t fraction;
4037     int32_t exp, result_exp;
4038     uint32_t sign;
4039 
4040     if (sizeof(T) == sizeof(float)) {
4041       sign = float_sign(op);
4042       exp = static_cast<int32_t>(float_exp(op));
4043       fraction = float_mantissa(op);
4044       fraction <<= 29;
4045     } else {
4046       sign = double_sign(op);
4047       exp = static_cast<int32_t>(double_exp(op));
4048       fraction = double_mantissa(op);
4049     }
4050 
4051     if (exp == 0) {
4052       if (Bits(fraction, 51, 51) == 0) {
4053         exp -= 1;
4054         fraction = Bits(fraction, 49, 0) << 2;
4055       } else {
4056         fraction = Bits(fraction, 50, 0) << 1;
4057       }
4058     }
4059 
4060     double scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44);
4061 
4062     if (sizeof(T) == sizeof(float)) {
4063       result_exp = 253 - exp;
4064     } else {
4065       result_exp = 2045 - exp;
4066     }
4067 
4068     double estimate = recip_estimate(scaled);
4069 
4070     fraction = double_mantissa(estimate);
4071     if (result_exp == 0) {
4072       fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1);
4073     } else if (result_exp == -1) {
4074       fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2);
4075       result_exp = 0;
4076     }
4077     if (sizeof(T) == sizeof(float)) {
4078       uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
4079       uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29));
4080       return float_pack(sign, exp_bits, frac_bits);
4081     } else {
4082       return double_pack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0));
4083     }
4084   }
4085 }
4086 
frecpe(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding round)4087 LogicVRegister Simulator::frecpe(VectorFormat vform, LogicVRegister dst,
4088                                  const LogicVRegister& src, FPRounding round) {
4089   dst.ClearForWrite(vform);
4090   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
4091     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4092       float input = src.Float<float>(i);
4093       dst.SetFloat(i, FPRecipEstimate<float>(input, round));
4094     }
4095   } else {
4096     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
4097     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4098       double input = src.Float<double>(i);
4099       dst.SetFloat(i, FPRecipEstimate<double>(input, round));
4100     }
4101   }
4102   return dst;
4103 }
4104 
ursqrte(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4105 LogicVRegister Simulator::ursqrte(VectorFormat vform, LogicVRegister dst,
4106                                   const LogicVRegister& src) {
4107   dst.ClearForWrite(vform);
4108   uint64_t operand;
4109   uint32_t result;
4110   double dp_operand, dp_result;
4111   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4112     operand = src.Uint(vform, i);
4113     if (operand <= 0x3FFFFFFF) {
4114       result = 0xFFFFFFFF;
4115     } else {
4116       dp_operand = operand * std::pow(2.0, -32);
4117       dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31);
4118       result = static_cast<uint32_t>(dp_result);
4119     }
4120     dst.SetUint(vform, i, result);
4121   }
4122   return dst;
4123 }
4124 
4125 // Based on reference C function recip_estimate from ARM ARM.
recip_estimate(double a)4126 double Simulator::recip_estimate(double a) {
4127   int q, s;
4128   double r;
4129   q = static_cast<int>(a * 512.0);
4130   r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0);
4131   s = static_cast<int>(256.0 * r + 0.5);
4132   return static_cast<double>(s) / 256.0;
4133 }
4134 
urecpe(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4135 LogicVRegister Simulator::urecpe(VectorFormat vform, LogicVRegister dst,
4136                                  const LogicVRegister& src) {
4137   dst.ClearForWrite(vform);
4138   uint64_t operand;
4139   uint32_t result;
4140   double dp_operand, dp_result;
4141   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4142     operand = src.Uint(vform, i);
4143     if (operand <= 0x7FFFFFFF) {
4144       result = 0xFFFFFFFF;
4145     } else {
4146       dp_operand = operand * std::pow(2.0, -32);
4147       dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31);
4148       result = static_cast<uint32_t>(dp_result);
4149     }
4150     dst.SetUint(vform, i, result);
4151   }
4152   return dst;
4153 }
4154 
4155 template <typename T>
frecpx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4156 LogicVRegister Simulator::frecpx(VectorFormat vform, LogicVRegister dst,
4157                                  const LogicVRegister& src) {
4158   dst.ClearForWrite(vform);
4159   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4160     T op = src.Float<T>(i);
4161     T result;
4162     if (std::isnan(op)) {
4163       result = FPProcessNaN(op);
4164     } else {
4165       int exp;
4166       uint32_t sign;
4167       if (sizeof(T) == sizeof(float)) {
4168         sign = float_sign(op);
4169         exp = static_cast<int>(float_exp(op));
4170         exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0));
4171         result = float_pack(sign, exp, 0);
4172       } else {
4173         sign = double_sign(op);
4174         exp = static_cast<int>(double_exp(op));
4175         exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0));
4176         result = double_pack(sign, exp, 0);
4177       }
4178     }
4179     dst.SetFloat(i, result);
4180   }
4181   return dst;
4182 }
4183 
frecpx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4184 LogicVRegister Simulator::frecpx(VectorFormat vform, LogicVRegister dst,
4185                                  const LogicVRegister& src) {
4186   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
4187     frecpx<float>(vform, dst, src);
4188   } else {
4189     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
4190     frecpx<double>(vform, dst, src);
4191   }
4192   return dst;
4193 }
4194 
scvtf(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int fbits,FPRounding round)4195 LogicVRegister Simulator::scvtf(VectorFormat vform, LogicVRegister dst,
4196                                 const LogicVRegister& src, int fbits,
4197                                 FPRounding round) {
4198   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4199     if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
4200       float result = FixedToFloat(src.Int(kFormatS, i), fbits, round);
4201       dst.SetFloat<float>(i, result);
4202     } else {
4203       DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
4204       double result = FixedToDouble(src.Int(kFormatD, i), fbits, round);
4205       dst.SetFloat<double>(i, result);
4206     }
4207   }
4208   return dst;
4209 }
4210 
ucvtf(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int fbits,FPRounding round)4211 LogicVRegister Simulator::ucvtf(VectorFormat vform, LogicVRegister dst,
4212                                 const LogicVRegister& src, int fbits,
4213                                 FPRounding round) {
4214   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4215     if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
4216       float result = UFixedToFloat(src.Uint(kFormatS, i), fbits, round);
4217       dst.SetFloat<float>(i, result);
4218     } else {
4219       DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
4220       double result = UFixedToDouble(src.Uint(kFormatD, i), fbits, round);
4221       dst.SetFloat<double>(i, result);
4222     }
4223   }
4224   return dst;
4225 }
4226 
4227 }  // namespace internal
4228 }  // namespace v8
4229 
4230 #endif  // USE_SIMULATOR
4231