1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/execution/arm64/simulator-arm64.h"
6 
7 #if defined(USE_SIMULATOR)
8 
9 #include <cmath>
10 
11 namespace v8 {
12 namespace internal {
13 
14 namespace {
15 
16 // See FPRound for a description of this function.
FPRoundToDouble(int64_t sign,int64_t exponent,uint64_t mantissa,FPRounding round_mode)17 inline double FPRoundToDouble(int64_t sign, int64_t exponent, uint64_t mantissa,
18                               FPRounding round_mode) {
19   uint64_t bits = FPRound<uint64_t, kDoubleExponentBits, kDoubleMantissaBits>(
20       sign, exponent, mantissa, round_mode);
21   return bit_cast<double>(bits);
22 }
23 
24 // See FPRound for a description of this function.
FPRoundToFloat(int64_t sign,int64_t exponent,uint64_t mantissa,FPRounding round_mode)25 inline float FPRoundToFloat(int64_t sign, int64_t exponent, uint64_t mantissa,
26                             FPRounding round_mode) {
27   uint32_t bits = FPRound<uint32_t, kFloatExponentBits, kFloatMantissaBits>(
28       sign, exponent, mantissa, round_mode);
29   return bit_cast<float>(bits);
30 }
31 
32 // See FPRound for a description of this function.
FPRoundToFloat16(int64_t sign,int64_t exponent,uint64_t mantissa,FPRounding round_mode)33 inline float16 FPRoundToFloat16(int64_t sign, int64_t exponent,
34                                 uint64_t mantissa, FPRounding round_mode) {
35   return FPRound<float16, kFloat16ExponentBits, kFloat16MantissaBits>(
36       sign, exponent, mantissa, round_mode);
37 }
38 
39 }  // namespace
40 
FixedToDouble(int64_t src,int fbits,FPRounding round)41 double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) {
42   if (src >= 0) {
43     return UFixedToDouble(src, fbits, round);
44   } else if (src == INT64_MIN) {
45     return -UFixedToDouble(src, fbits, round);
46   } else {
47     return -UFixedToDouble(-src, fbits, round);
48   }
49 }
50 
UFixedToDouble(uint64_t src,int fbits,FPRounding round)51 double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) {
52   // An input of 0 is a special case because the result is effectively
53   // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
54   if (src == 0) {
55     return 0.0;
56   }
57 
58   // Calculate the exponent. The highest significant bit will have the value
59   // 2^exponent.
60   const int highest_significant_bit = 63 - CountLeadingZeros(src, 64);
61   const int64_t exponent = highest_significant_bit - fbits;
62 
63   return FPRoundToDouble(0, exponent, src, round);
64 }
65 
FixedToFloat(int64_t src,int fbits,FPRounding round)66 float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) {
67   if (src >= 0) {
68     return UFixedToFloat(src, fbits, round);
69   } else if (src == INT64_MIN) {
70     return -UFixedToFloat(src, fbits, round);
71   } else {
72     return -UFixedToFloat(-src, fbits, round);
73   }
74 }
75 
UFixedToFloat(uint64_t src,int fbits,FPRounding round)76 float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) {
77   // An input of 0 is a special case because the result is effectively
78   // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
79   if (src == 0) {
80     return 0.0f;
81   }
82 
83   // Calculate the exponent. The highest significant bit will have the value
84   // 2^exponent.
85   const int highest_significant_bit = 63 - CountLeadingZeros(src, 64);
86   const int32_t exponent = highest_significant_bit - fbits;
87 
88   return FPRoundToFloat(0, exponent, src, round);
89 }
90 
FPToDouble(float value)91 double Simulator::FPToDouble(float value) {
92   switch (std::fpclassify(value)) {
93     case FP_NAN: {
94       if (IsSignallingNaN(value)) {
95         FPProcessException();
96       }
97       if (DN()) return kFP64DefaultNaN;
98 
99       // Convert NaNs as the processor would:
100       //  - The sign is propagated.
101       //  - The mantissa is transferred entirely, except that the top bit is
102       //    forced to '1', making the result a quiet NaN. The unused (low-order)
103       //    mantissa bits are set to 0.
104       uint32_t raw = bit_cast<uint32_t>(value);
105 
106       uint64_t sign = raw >> 31;
107       uint64_t exponent = (1 << kDoubleExponentBits) - 1;
108       uint64_t mantissa = unsigned_bitextract_64(21, 0, raw);
109 
110       // Unused low-order bits remain zero.
111       mantissa <<= (kDoubleMantissaBits - kFloatMantissaBits);
112 
113       // Force a quiet NaN.
114       mantissa |= (UINT64_C(1) << (kDoubleMantissaBits - 1));
115 
116       return double_pack(sign, exponent, mantissa);
117     }
118 
119     case FP_ZERO:
120     case FP_NORMAL:
121     case FP_SUBNORMAL:
122     case FP_INFINITE: {
123       // All other inputs are preserved in a standard cast, because every value
124       // representable using an IEEE-754 float is also representable using an
125       // IEEE-754 double.
126       return static_cast<double>(value);
127     }
128   }
129 
130   UNREACHABLE();
131 }
132 
FPToFloat(float16 value)133 float Simulator::FPToFloat(float16 value) {
134   uint32_t sign = value >> 15;
135   uint32_t exponent =
136       unsigned_bitextract_32(kFloat16MantissaBits + kFloat16ExponentBits - 1,
137                              kFloat16MantissaBits, value);
138   uint32_t mantissa =
139       unsigned_bitextract_32(kFloat16MantissaBits - 1, 0, value);
140 
141   switch (float16classify(value)) {
142     case FP_ZERO:
143       return (sign == 0) ? 0.0f : -0.0f;
144 
145     case FP_INFINITE:
146       return (sign == 0) ? kFP32PositiveInfinity : kFP32NegativeInfinity;
147 
148     case FP_SUBNORMAL: {
149       // Calculate shift required to put mantissa into the most-significant bits
150       // of the destination mantissa.
151       int shift = CountLeadingZeros(mantissa << (32 - 10), 32);
152 
153       // Shift mantissa and discard implicit '1'.
154       mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits) + shift + 1;
155       mantissa &= (1 << kFloatMantissaBits) - 1;
156 
157       // Adjust the exponent for the shift applied, and rebias.
158       exponent = exponent - shift + (kFloatExponentBias - kFloat16ExponentBias);
159       break;
160     }
161 
162     case FP_NAN: {
163       if (IsSignallingNaN(value)) {
164         FPProcessException();
165       }
166       if (DN()) return kFP32DefaultNaN;
167 
168       // Convert NaNs as the processor would:
169       //  - The sign is propagated.
170       //  - The mantissa is transferred entirely, except that the top bit is
171       //    forced to '1', making the result a quiet NaN. The unused (low-order)
172       //    mantissa bits are set to 0.
173       exponent = (1 << kFloatExponentBits) - 1;
174 
175       // Increase bits in mantissa, making low-order bits 0.
176       mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
177       mantissa |= 1 << (kFloatMantissaBits - 1);  // Force a quiet NaN.
178       break;
179     }
180 
181     case FP_NORMAL: {
182       // Increase bits in mantissa, making low-order bits 0.
183       mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
184 
185       // Change exponent bias.
186       exponent += (kFloatExponentBias - kFloat16ExponentBias);
187       break;
188     }
189 
190     default:
191       UNREACHABLE();
192   }
193   return float_pack(sign, exponent, mantissa);
194 }
195 
FPToFloat16(float value,FPRounding round_mode)196 float16 Simulator::FPToFloat16(float value, FPRounding round_mode) {
197   // Only the FPTieEven rounding mode is implemented.
198   DCHECK_EQ(round_mode, FPTieEven);
199   USE(round_mode);
200 
201   int64_t sign = float_sign(value);
202   int64_t exponent =
203       static_cast<int64_t>(float_exp(value)) - kFloatExponentBias;
204   uint32_t mantissa = float_mantissa(value);
205 
206   switch (std::fpclassify(value)) {
207     case FP_NAN: {
208       if (IsSignallingNaN(value)) {
209         FPProcessException();
210       }
211       if (DN()) return kFP16DefaultNaN;
212 
213       // Convert NaNs as the processor would:
214       //  - The sign is propagated.
215       //  - The mantissa is transferred as much as possible, except that the top
216       //    bit is forced to '1', making the result a quiet NaN.
217       float16 result =
218           (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
219       result |= mantissa >> (kFloatMantissaBits - kFloat16MantissaBits);
220       result |= (1 << (kFloat16MantissaBits - 1));  // Force a quiet NaN;
221       return result;
222     }
223 
224     case FP_ZERO:
225       return (sign == 0) ? 0 : 0x8000;
226 
227     case FP_INFINITE:
228       return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
229 
230     case FP_NORMAL:
231     case FP_SUBNORMAL: {
232       // Convert float-to-half as the processor would, assuming that FPCR.FZ
233       // (flush-to-zero) is not set.
234 
235       // Add the implicit '1' bit to the mantissa.
236       mantissa += (1 << kFloatMantissaBits);
237       return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
238     }
239   }
240 
241   UNREACHABLE();
242 }
243 
FPToFloat16(double value,FPRounding round_mode)244 float16 Simulator::FPToFloat16(double value, FPRounding round_mode) {
245   // Only the FPTieEven rounding mode is implemented.
246   DCHECK_EQ(round_mode, FPTieEven);
247   USE(round_mode);
248 
249   int64_t sign = double_sign(value);
250   int64_t exponent =
251       static_cast<int64_t>(double_exp(value)) - kDoubleExponentBias;
252   uint64_t mantissa = double_mantissa(value);
253 
254   switch (std::fpclassify(value)) {
255     case FP_NAN: {
256       if (IsSignallingNaN(value)) {
257         FPProcessException();
258       }
259       if (DN()) return kFP16DefaultNaN;
260 
261       // Convert NaNs as the processor would:
262       //  - The sign is propagated.
263       //  - The mantissa is transferred as much as possible, except that the top
264       //    bit is forced to '1', making the result a quiet NaN.
265       float16 result =
266           (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
267       result |= mantissa >> (kDoubleMantissaBits - kFloat16MantissaBits);
268       result |= (1 << (kFloat16MantissaBits - 1));  // Force a quiet NaN;
269       return result;
270     }
271 
272     case FP_ZERO:
273       return (sign == 0) ? 0 : 0x8000;
274 
275     case FP_INFINITE:
276       return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
277 
278     case FP_NORMAL:
279     case FP_SUBNORMAL: {
280       // Convert double-to-half as the processor would, assuming that FPCR.FZ
281       // (flush-to-zero) is not set.
282 
283       // Add the implicit '1' bit to the mantissa.
284       mantissa += (UINT64_C(1) << kDoubleMantissaBits);
285       return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
286     }
287   }
288 
289   UNREACHABLE();
290 }
291 
FPToFloat(double value,FPRounding round_mode)292 float Simulator::FPToFloat(double value, FPRounding round_mode) {
293   // Only the FPTieEven rounding mode is implemented.
294   DCHECK((round_mode == FPTieEven) || (round_mode == FPRoundOdd));
295   USE(round_mode);
296 
297   switch (std::fpclassify(value)) {
298     case FP_NAN: {
299       if (IsSignallingNaN(value)) {
300         FPProcessException();
301       }
302       if (DN()) return kFP32DefaultNaN;
303 
304       // Convert NaNs as the processor would:
305       //  - The sign is propagated.
306       //  - The mantissa is transferred as much as possible, except that the
307       //    top bit is forced to '1', making the result a quiet NaN.
308 
309       uint64_t raw = bit_cast<uint64_t>(value);
310 
311       uint32_t sign = raw >> 63;
312       uint32_t exponent = (1 << 8) - 1;
313       uint32_t mantissa = static_cast<uint32_t>(unsigned_bitextract_64(
314           50, kDoubleMantissaBits - kFloatMantissaBits, raw));
315       mantissa |= (1 << (kFloatMantissaBits - 1));  // Force a quiet NaN.
316 
317       return float_pack(sign, exponent, mantissa);
318     }
319 
320     case FP_ZERO:
321     case FP_INFINITE: {
322       // In a C++ cast, any value representable in the target type will be
323       // unchanged. This is always the case for +/-0.0 and infinities.
324       return static_cast<float>(value);
325     }
326 
327     case FP_NORMAL:
328     case FP_SUBNORMAL: {
329       // Convert double-to-float as the processor would, assuming that FPCR.FZ
330       // (flush-to-zero) is not set.
331       uint32_t sign = double_sign(value);
332       int64_t exponent =
333           static_cast<int64_t>(double_exp(value)) - kDoubleExponentBias;
334       uint64_t mantissa = double_mantissa(value);
335       if (std::fpclassify(value) == FP_NORMAL) {
336         // For normal FP values, add the hidden bit.
337         mantissa |= (UINT64_C(1) << kDoubleMantissaBits);
338       }
339       return FPRoundToFloat(sign, exponent, mantissa, round_mode);
340     }
341   }
342 
343   UNREACHABLE();
344 }
345 
ld1(VectorFormat vform,LogicVRegister dst,uint64_t addr)346 void Simulator::ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
347   dst.ClearForWrite(vform);
348   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
349     dst.ReadUintFromMem(vform, i, addr);
350     addr += LaneSizeInBytesFromFormat(vform);
351   }
352 }
353 
ld1(VectorFormat vform,LogicVRegister dst,int index,uint64_t addr)354 void Simulator::ld1(VectorFormat vform, LogicVRegister dst, int index,
355                     uint64_t addr) {
356   dst.ReadUintFromMem(vform, index, addr);
357 }
358 
ld1r(VectorFormat vform,LogicVRegister dst,uint64_t addr)359 void Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
360   dst.ClearForWrite(vform);
361   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
362     dst.ReadUintFromMem(vform, i, addr);
363   }
364 }
365 
ld2(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,uint64_t addr1)366 void Simulator::ld2(VectorFormat vform, LogicVRegister dst1,
367                     LogicVRegister dst2, uint64_t addr1) {
368   dst1.ClearForWrite(vform);
369   dst2.ClearForWrite(vform);
370   int esize = LaneSizeInBytesFromFormat(vform);
371   uint64_t addr2 = addr1 + esize;
372   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
373     dst1.ReadUintFromMem(vform, i, addr1);
374     dst2.ReadUintFromMem(vform, i, addr2);
375     addr1 += 2 * esize;
376     addr2 += 2 * esize;
377   }
378 }
379 
ld2(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,int index,uint64_t addr1)380 void Simulator::ld2(VectorFormat vform, LogicVRegister dst1,
381                     LogicVRegister dst2, int index, uint64_t addr1) {
382   dst1.ClearForWrite(vform);
383   dst2.ClearForWrite(vform);
384   uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
385   dst1.ReadUintFromMem(vform, index, addr1);
386   dst2.ReadUintFromMem(vform, index, addr2);
387 }
388 
ld2r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,uint64_t addr)389 void Simulator::ld2r(VectorFormat vform, LogicVRegister dst1,
390                      LogicVRegister dst2, uint64_t addr) {
391   dst1.ClearForWrite(vform);
392   dst2.ClearForWrite(vform);
393   uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
394   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
395     dst1.ReadUintFromMem(vform, i, addr);
396     dst2.ReadUintFromMem(vform, i, addr2);
397   }
398 }
399 
ld3(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,uint64_t addr1)400 void Simulator::ld3(VectorFormat vform, LogicVRegister dst1,
401                     LogicVRegister dst2, LogicVRegister dst3, uint64_t addr1) {
402   dst1.ClearForWrite(vform);
403   dst2.ClearForWrite(vform);
404   dst3.ClearForWrite(vform);
405   int esize = LaneSizeInBytesFromFormat(vform);
406   uint64_t addr2 = addr1 + esize;
407   uint64_t addr3 = addr2 + esize;
408   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
409     dst1.ReadUintFromMem(vform, i, addr1);
410     dst2.ReadUintFromMem(vform, i, addr2);
411     dst3.ReadUintFromMem(vform, i, addr3);
412     addr1 += 3 * esize;
413     addr2 += 3 * esize;
414     addr3 += 3 * esize;
415   }
416 }
417 
ld3(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,int index,uint64_t addr1)418 void Simulator::ld3(VectorFormat vform, LogicVRegister dst1,
419                     LogicVRegister dst2, LogicVRegister dst3, int index,
420                     uint64_t addr1) {
421   dst1.ClearForWrite(vform);
422   dst2.ClearForWrite(vform);
423   dst3.ClearForWrite(vform);
424   uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
425   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
426   dst1.ReadUintFromMem(vform, index, addr1);
427   dst2.ReadUintFromMem(vform, index, addr2);
428   dst3.ReadUintFromMem(vform, index, addr3);
429 }
430 
ld3r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,uint64_t addr)431 void Simulator::ld3r(VectorFormat vform, LogicVRegister dst1,
432                      LogicVRegister dst2, LogicVRegister dst3, uint64_t addr) {
433   dst1.ClearForWrite(vform);
434   dst2.ClearForWrite(vform);
435   dst3.ClearForWrite(vform);
436   uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
437   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
438   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
439     dst1.ReadUintFromMem(vform, i, addr);
440     dst2.ReadUintFromMem(vform, i, addr2);
441     dst3.ReadUintFromMem(vform, i, addr3);
442   }
443 }
444 
ld4(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,uint64_t addr1)445 void Simulator::ld4(VectorFormat vform, LogicVRegister dst1,
446                     LogicVRegister dst2, LogicVRegister dst3,
447                     LogicVRegister dst4, uint64_t addr1) {
448   dst1.ClearForWrite(vform);
449   dst2.ClearForWrite(vform);
450   dst3.ClearForWrite(vform);
451   dst4.ClearForWrite(vform);
452   int esize = LaneSizeInBytesFromFormat(vform);
453   uint64_t addr2 = addr1 + esize;
454   uint64_t addr3 = addr2 + esize;
455   uint64_t addr4 = addr3 + esize;
456   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
457     dst1.ReadUintFromMem(vform, i, addr1);
458     dst2.ReadUintFromMem(vform, i, addr2);
459     dst3.ReadUintFromMem(vform, i, addr3);
460     dst4.ReadUintFromMem(vform, i, addr4);
461     addr1 += 4 * esize;
462     addr2 += 4 * esize;
463     addr3 += 4 * esize;
464     addr4 += 4 * esize;
465   }
466 }
467 
ld4(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,int index,uint64_t addr1)468 void Simulator::ld4(VectorFormat vform, LogicVRegister dst1,
469                     LogicVRegister dst2, LogicVRegister dst3,
470                     LogicVRegister dst4, int index, uint64_t addr1) {
471   dst1.ClearForWrite(vform);
472   dst2.ClearForWrite(vform);
473   dst3.ClearForWrite(vform);
474   dst4.ClearForWrite(vform);
475   uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
476   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
477   uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
478   dst1.ReadUintFromMem(vform, index, addr1);
479   dst2.ReadUintFromMem(vform, index, addr2);
480   dst3.ReadUintFromMem(vform, index, addr3);
481   dst4.ReadUintFromMem(vform, index, addr4);
482 }
483 
ld4r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,uint64_t addr)484 void Simulator::ld4r(VectorFormat vform, LogicVRegister dst1,
485                      LogicVRegister dst2, LogicVRegister dst3,
486                      LogicVRegister dst4, uint64_t addr) {
487   dst1.ClearForWrite(vform);
488   dst2.ClearForWrite(vform);
489   dst3.ClearForWrite(vform);
490   dst4.ClearForWrite(vform);
491   uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
492   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
493   uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
494   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
495     dst1.ReadUintFromMem(vform, i, addr);
496     dst2.ReadUintFromMem(vform, i, addr2);
497     dst3.ReadUintFromMem(vform, i, addr3);
498     dst4.ReadUintFromMem(vform, i, addr4);
499   }
500 }
501 
st1(VectorFormat vform,LogicVRegister src,uint64_t addr)502 void Simulator::st1(VectorFormat vform, LogicVRegister src, uint64_t addr) {
503   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
504     src.WriteUintToMem(vform, i, addr);
505     addr += LaneSizeInBytesFromFormat(vform);
506   }
507 }
508 
st1(VectorFormat vform,LogicVRegister src,int index,uint64_t addr)509 void Simulator::st1(VectorFormat vform, LogicVRegister src, int index,
510                     uint64_t addr) {
511   src.WriteUintToMem(vform, index, addr);
512 }
513 
st2(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,uint64_t addr)514 void Simulator::st2(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
515                     uint64_t addr) {
516   int esize = LaneSizeInBytesFromFormat(vform);
517   uint64_t addr2 = addr + esize;
518   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
519     dst.WriteUintToMem(vform, i, addr);
520     dst2.WriteUintToMem(vform, i, addr2);
521     addr += 2 * esize;
522     addr2 += 2 * esize;
523   }
524 }
525 
st2(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,int index,uint64_t addr)526 void Simulator::st2(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
527                     int index, uint64_t addr) {
528   int esize = LaneSizeInBytesFromFormat(vform);
529   dst.WriteUintToMem(vform, index, addr);
530   dst2.WriteUintToMem(vform, index, addr + 1 * esize);
531 }
532 
st3(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,LogicVRegister dst3,uint64_t addr)533 void Simulator::st3(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
534                     LogicVRegister dst3, uint64_t addr) {
535   int esize = LaneSizeInBytesFromFormat(vform);
536   uint64_t addr2 = addr + esize;
537   uint64_t addr3 = addr2 + esize;
538   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
539     dst.WriteUintToMem(vform, i, addr);
540     dst2.WriteUintToMem(vform, i, addr2);
541     dst3.WriteUintToMem(vform, i, addr3);
542     addr += 3 * esize;
543     addr2 += 3 * esize;
544     addr3 += 3 * esize;
545   }
546 }
547 
st3(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,LogicVRegister dst3,int index,uint64_t addr)548 void Simulator::st3(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
549                     LogicVRegister dst3, int index, uint64_t addr) {
550   int esize = LaneSizeInBytesFromFormat(vform);
551   dst.WriteUintToMem(vform, index, addr);
552   dst2.WriteUintToMem(vform, index, addr + 1 * esize);
553   dst3.WriteUintToMem(vform, index, addr + 2 * esize);
554 }
555 
st4(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,uint64_t addr)556 void Simulator::st4(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
557                     LogicVRegister dst3, LogicVRegister dst4, uint64_t addr) {
558   int esize = LaneSizeInBytesFromFormat(vform);
559   uint64_t addr2 = addr + esize;
560   uint64_t addr3 = addr2 + esize;
561   uint64_t addr4 = addr3 + esize;
562   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
563     dst.WriteUintToMem(vform, i, addr);
564     dst2.WriteUintToMem(vform, i, addr2);
565     dst3.WriteUintToMem(vform, i, addr3);
566     dst4.WriteUintToMem(vform, i, addr4);
567     addr += 4 * esize;
568     addr2 += 4 * esize;
569     addr3 += 4 * esize;
570     addr4 += 4 * esize;
571   }
572 }
573 
st4(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,int index,uint64_t addr)574 void Simulator::st4(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
575                     LogicVRegister dst3, LogicVRegister dst4, int index,
576                     uint64_t addr) {
577   int esize = LaneSizeInBytesFromFormat(vform);
578   dst.WriteUintToMem(vform, index, addr);
579   dst2.WriteUintToMem(vform, index, addr + 1 * esize);
580   dst3.WriteUintToMem(vform, index, addr + 2 * esize);
581   dst4.WriteUintToMem(vform, index, addr + 3 * esize);
582 }
583 
cmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)584 LogicVRegister Simulator::cmp(VectorFormat vform, LogicVRegister dst,
585                               const LogicVRegister& src1,
586                               const LogicVRegister& src2, Condition cond) {
587   dst.ClearForWrite(vform);
588   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
589     int64_t sa = src1.Int(vform, i);
590     int64_t sb = src2.Int(vform, i);
591     uint64_t ua = src1.Uint(vform, i);
592     uint64_t ub = src2.Uint(vform, i);
593     bool result = false;
594     switch (cond) {
595       case eq:
596         result = (ua == ub);
597         break;
598       case ge:
599         result = (sa >= sb);
600         break;
601       case gt:
602         result = (sa > sb);
603         break;
604       case hi:
605         result = (ua > ub);
606         break;
607       case hs:
608         result = (ua >= ub);
609         break;
610       case lt:
611         result = (sa < sb);
612         break;
613       case le:
614         result = (sa <= sb);
615         break;
616       default:
617         UNREACHABLE();
618     }
619     dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
620   }
621   return dst;
622 }
623 
cmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,int imm,Condition cond)624 LogicVRegister Simulator::cmp(VectorFormat vform, LogicVRegister dst,
625                               const LogicVRegister& src1, int imm,
626                               Condition cond) {
627   SimVRegister temp;
628   LogicVRegister imm_reg = dup_immediate(vform, temp, imm);
629   return cmp(vform, dst, src1, imm_reg, cond);
630 }
631 
cmptst(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)632 LogicVRegister Simulator::cmptst(VectorFormat vform, LogicVRegister dst,
633                                  const LogicVRegister& src1,
634                                  const LogicVRegister& src2) {
635   dst.ClearForWrite(vform);
636   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
637     uint64_t ua = src1.Uint(vform, i);
638     uint64_t ub = src2.Uint(vform, i);
639     dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0);
640   }
641   return dst;
642 }
643 
add(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)644 LogicVRegister Simulator::add(VectorFormat vform, LogicVRegister dst,
645                               const LogicVRegister& src1,
646                               const LogicVRegister& src2) {
647   int lane_size = LaneSizeInBitsFromFormat(vform);
648   dst.ClearForWrite(vform);
649   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
650     // Test for unsigned saturation.
651     uint64_t ua = src1.UintLeftJustified(vform, i);
652     uint64_t ub = src2.UintLeftJustified(vform, i);
653     uint64_t ur = ua + ub;
654     if (ur < ua) {
655       dst.SetUnsignedSat(i, true);
656     }
657 
658     // Test for signed saturation.
659     bool pos_a = (ua >> 63) == 0;
660     bool pos_b = (ub >> 63) == 0;
661     bool pos_r = (ur >> 63) == 0;
662     // If the signs of the operands are the same, but different from the result,
663     // there was an overflow.
664     if ((pos_a == pos_b) && (pos_a != pos_r)) {
665       dst.SetSignedSat(i, pos_a);
666     }
667 
668     dst.SetInt(vform, i, ur >> (64 - lane_size));
669   }
670   return dst;
671 }
672 
addp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)673 LogicVRegister Simulator::addp(VectorFormat vform, LogicVRegister dst,
674                                const LogicVRegister& src1,
675                                const LogicVRegister& src2) {
676   SimVRegister temp1, temp2;
677   uzp1(vform, temp1, src1, src2);
678   uzp2(vform, temp2, src1, src2);
679   add(vform, dst, temp1, temp2);
680   return dst;
681 }
682 
mla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)683 LogicVRegister Simulator::mla(VectorFormat vform, LogicVRegister dst,
684                               const LogicVRegister& src1,
685                               const LogicVRegister& src2) {
686   SimVRegister temp;
687   mul(vform, temp, src1, src2);
688   add(vform, dst, dst, temp);
689   return dst;
690 }
691 
mls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)692 LogicVRegister Simulator::mls(VectorFormat vform, LogicVRegister dst,
693                               const LogicVRegister& src1,
694                               const LogicVRegister& src2) {
695   SimVRegister temp;
696   mul(vform, temp, src1, src2);
697   sub(vform, dst, dst, temp);
698   return dst;
699 }
700 
mul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)701 LogicVRegister Simulator::mul(VectorFormat vform, LogicVRegister dst,
702                               const LogicVRegister& src1,
703                               const LogicVRegister& src2) {
704   dst.ClearForWrite(vform);
705   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
706     dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i));
707   }
708   return dst;
709 }
710 
mul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)711 LogicVRegister Simulator::mul(VectorFormat vform, LogicVRegister dst,
712                               const LogicVRegister& src1,
713                               const LogicVRegister& src2, int index) {
714   SimVRegister temp;
715   VectorFormat indexform = VectorFormatFillQ(vform);
716   return mul(vform, dst, src1, dup_element(indexform, temp, src2, index));
717 }
718 
mla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)719 LogicVRegister Simulator::mla(VectorFormat vform, LogicVRegister dst,
720                               const LogicVRegister& src1,
721                               const LogicVRegister& src2, int index) {
722   SimVRegister temp;
723   VectorFormat indexform = VectorFormatFillQ(vform);
724   return mla(vform, dst, src1, dup_element(indexform, temp, src2, index));
725 }
726 
mls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)727 LogicVRegister Simulator::mls(VectorFormat vform, LogicVRegister dst,
728                               const LogicVRegister& src1,
729                               const LogicVRegister& src2, int index) {
730   SimVRegister temp;
731   VectorFormat indexform = VectorFormatFillQ(vform);
732   return mls(vform, dst, src1, dup_element(indexform, temp, src2, index));
733 }
734 
smull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)735 LogicVRegister Simulator::smull(VectorFormat vform, LogicVRegister dst,
736                                 const LogicVRegister& src1,
737                                 const LogicVRegister& src2, int index) {
738   SimVRegister temp;
739   VectorFormat indexform =
740       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
741   return smull(vform, dst, src1, dup_element(indexform, temp, src2, index));
742 }
743 
smull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)744 LogicVRegister Simulator::smull2(VectorFormat vform, LogicVRegister dst,
745                                  const LogicVRegister& src1,
746                                  const LogicVRegister& src2, int index) {
747   SimVRegister temp;
748   VectorFormat indexform =
749       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
750   return smull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
751 }
752 
umull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)753 LogicVRegister Simulator::umull(VectorFormat vform, LogicVRegister dst,
754                                 const LogicVRegister& src1,
755                                 const LogicVRegister& src2, int index) {
756   SimVRegister temp;
757   VectorFormat indexform =
758       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
759   return umull(vform, dst, src1, dup_element(indexform, temp, src2, index));
760 }
761 
umull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)762 LogicVRegister Simulator::umull2(VectorFormat vform, LogicVRegister dst,
763                                  const LogicVRegister& src1,
764                                  const LogicVRegister& src2, int index) {
765   SimVRegister temp;
766   VectorFormat indexform =
767       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
768   return umull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
769 }
770 
smlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)771 LogicVRegister Simulator::smlal(VectorFormat vform, LogicVRegister dst,
772                                 const LogicVRegister& src1,
773                                 const LogicVRegister& src2, int index) {
774   SimVRegister temp;
775   VectorFormat indexform =
776       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
777   return smlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
778 }
779 
smlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)780 LogicVRegister Simulator::smlal2(VectorFormat vform, LogicVRegister dst,
781                                  const LogicVRegister& src1,
782                                  const LogicVRegister& src2, int index) {
783   SimVRegister temp;
784   VectorFormat indexform =
785       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
786   return smlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
787 }
788 
umlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)789 LogicVRegister Simulator::umlal(VectorFormat vform, LogicVRegister dst,
790                                 const LogicVRegister& src1,
791                                 const LogicVRegister& src2, int index) {
792   SimVRegister temp;
793   VectorFormat indexform =
794       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
795   return umlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
796 }
797 
umlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)798 LogicVRegister Simulator::umlal2(VectorFormat vform, LogicVRegister dst,
799                                  const LogicVRegister& src1,
800                                  const LogicVRegister& src2, int index) {
801   SimVRegister temp;
802   VectorFormat indexform =
803       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
804   return umlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
805 }
806 
smlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)807 LogicVRegister Simulator::smlsl(VectorFormat vform, LogicVRegister dst,
808                                 const LogicVRegister& src1,
809                                 const LogicVRegister& src2, int index) {
810   SimVRegister temp;
811   VectorFormat indexform =
812       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
813   return smlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
814 }
815 
smlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)816 LogicVRegister Simulator::smlsl2(VectorFormat vform, LogicVRegister dst,
817                                  const LogicVRegister& src1,
818                                  const LogicVRegister& src2, int index) {
819   SimVRegister temp;
820   VectorFormat indexform =
821       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
822   return smlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
823 }
824 
umlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)825 LogicVRegister Simulator::umlsl(VectorFormat vform, LogicVRegister dst,
826                                 const LogicVRegister& src1,
827                                 const LogicVRegister& src2, int index) {
828   SimVRegister temp;
829   VectorFormat indexform =
830       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
831   return umlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
832 }
833 
umlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)834 LogicVRegister Simulator::umlsl2(VectorFormat vform, LogicVRegister dst,
835                                  const LogicVRegister& src1,
836                                  const LogicVRegister& src2, int index) {
837   SimVRegister temp;
838   VectorFormat indexform =
839       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
840   return umlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
841 }
842 
sqdmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)843 LogicVRegister Simulator::sqdmull(VectorFormat vform, LogicVRegister dst,
844                                   const LogicVRegister& src1,
845                                   const LogicVRegister& src2, int index) {
846   SimVRegister temp;
847   VectorFormat indexform =
848       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
849   return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index));
850 }
851 
sqdmull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)852 LogicVRegister Simulator::sqdmull2(VectorFormat vform, LogicVRegister dst,
853                                    const LogicVRegister& src1,
854                                    const LogicVRegister& src2, int index) {
855   SimVRegister temp;
856   VectorFormat indexform =
857       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
858   return sqdmull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
859 }
860 
sqdmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)861 LogicVRegister Simulator::sqdmlal(VectorFormat vform, LogicVRegister dst,
862                                   const LogicVRegister& src1,
863                                   const LogicVRegister& src2, int index) {
864   SimVRegister temp;
865   VectorFormat indexform =
866       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
867   return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
868 }
869 
sqdmlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)870 LogicVRegister Simulator::sqdmlal2(VectorFormat vform, LogicVRegister dst,
871                                    const LogicVRegister& src1,
872                                    const LogicVRegister& src2, int index) {
873   SimVRegister temp;
874   VectorFormat indexform =
875       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
876   return sqdmlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
877 }
878 
sqdmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)879 LogicVRegister Simulator::sqdmlsl(VectorFormat vform, LogicVRegister dst,
880                                   const LogicVRegister& src1,
881                                   const LogicVRegister& src2, int index) {
882   SimVRegister temp;
883   VectorFormat indexform =
884       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
885   return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
886 }
887 
sqdmlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)888 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, LogicVRegister dst,
889                                    const LogicVRegister& src1,
890                                    const LogicVRegister& src2, int index) {
891   SimVRegister temp;
892   VectorFormat indexform =
893       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
894   return sqdmlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
895 }
896 
sqdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)897 LogicVRegister Simulator::sqdmulh(VectorFormat vform, LogicVRegister dst,
898                                   const LogicVRegister& src1,
899                                   const LogicVRegister& src2, int index) {
900   SimVRegister temp;
901   VectorFormat indexform = VectorFormatFillQ(vform);
902   return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
903 }
904 
sqrdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)905 LogicVRegister Simulator::sqrdmulh(VectorFormat vform, LogicVRegister dst,
906                                    const LogicVRegister& src1,
907                                    const LogicVRegister& src2, int index) {
908   SimVRegister temp;
909   VectorFormat indexform = VectorFormatFillQ(vform);
910   return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
911 }
912 
PolynomialMult(uint8_t op1,uint8_t op2)913 uint16_t Simulator::PolynomialMult(uint8_t op1, uint8_t op2) {
914   uint16_t result = 0;
915   uint16_t extended_op2 = op2;
916   for (int i = 0; i < 8; ++i) {
917     if ((op1 >> i) & 1) {
918       result = result ^ (extended_op2 << i);
919     }
920   }
921   return result;
922 }
923 
pmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)924 LogicVRegister Simulator::pmul(VectorFormat vform, LogicVRegister dst,
925                                const LogicVRegister& src1,
926                                const LogicVRegister& src2) {
927   dst.ClearForWrite(vform);
928   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
929     dst.SetUint(vform, i,
930                 PolynomialMult(src1.Uint(vform, i), src2.Uint(vform, i)));
931   }
932   return dst;
933 }
934 
pmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)935 LogicVRegister Simulator::pmull(VectorFormat vform, LogicVRegister dst,
936                                 const LogicVRegister& src1,
937                                 const LogicVRegister& src2) {
938   VectorFormat vform_src = VectorFormatHalfWidth(vform);
939   dst.ClearForWrite(vform);
940   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
941     dst.SetUint(
942         vform, i,
943         PolynomialMult(src1.Uint(vform_src, i), src2.Uint(vform_src, i)));
944   }
945   return dst;
946 }
947 
pmull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)948 LogicVRegister Simulator::pmull2(VectorFormat vform, LogicVRegister dst,
949                                  const LogicVRegister& src1,
950                                  const LogicVRegister& src2) {
951   VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform);
952   dst.ClearForWrite(vform);
953   int lane_count = LaneCountFromFormat(vform);
954   for (int i = 0; i < lane_count; i++) {
955     dst.SetUint(vform, i,
956                 PolynomialMult(src1.Uint(vform_src, lane_count + i),
957                                src2.Uint(vform_src, lane_count + i)));
958   }
959   return dst;
960 }
961 
sub(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)962 LogicVRegister Simulator::sub(VectorFormat vform, LogicVRegister dst,
963                               const LogicVRegister& src1,
964                               const LogicVRegister& src2) {
965   int lane_size = LaneSizeInBitsFromFormat(vform);
966   dst.ClearForWrite(vform);
967   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
968     // Test for unsigned saturation.
969     uint64_t ua = src1.UintLeftJustified(vform, i);
970     uint64_t ub = src2.UintLeftJustified(vform, i);
971     uint64_t ur = ua - ub;
972     if (ub > ua) {
973       dst.SetUnsignedSat(i, false);
974     }
975 
976     // Test for signed saturation.
977     bool pos_a = (ua >> 63) == 0;
978     bool pos_b = (ub >> 63) == 0;
979     bool pos_r = (ur >> 63) == 0;
980     // If the signs of the operands are different, and the sign of the first
981     // operand doesn't match the result, there was an overflow.
982     if ((pos_a != pos_b) && (pos_a != pos_r)) {
983       dst.SetSignedSat(i, pos_a);
984     }
985 
986     dst.SetInt(vform, i, ur >> (64 - lane_size));
987   }
988   return dst;
989 }
990 
and_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)991 LogicVRegister Simulator::and_(VectorFormat vform, LogicVRegister dst,
992                                const LogicVRegister& src1,
993                                const LogicVRegister& src2) {
994   dst.ClearForWrite(vform);
995   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
996     dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i));
997   }
998   return dst;
999 }
1000 
orr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1001 LogicVRegister Simulator::orr(VectorFormat vform, LogicVRegister dst,
1002                               const LogicVRegister& src1,
1003                               const LogicVRegister& src2) {
1004   dst.ClearForWrite(vform);
1005   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1006     dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i));
1007   }
1008   return dst;
1009 }
1010 
orn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1011 LogicVRegister Simulator::orn(VectorFormat vform, LogicVRegister dst,
1012                               const LogicVRegister& src1,
1013                               const LogicVRegister& src2) {
1014   dst.ClearForWrite(vform);
1015   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1016     dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i));
1017   }
1018   return dst;
1019 }
1020 
eor(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1021 LogicVRegister Simulator::eor(VectorFormat vform, LogicVRegister dst,
1022                               const LogicVRegister& src1,
1023                               const LogicVRegister& src2) {
1024   dst.ClearForWrite(vform);
1025   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1026     dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i));
1027   }
1028   return dst;
1029 }
1030 
bic(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1031 LogicVRegister Simulator::bic(VectorFormat vform, LogicVRegister dst,
1032                               const LogicVRegister& src1,
1033                               const LogicVRegister& src2) {
1034   dst.ClearForWrite(vform);
1035   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1036     dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i));
1037   }
1038   return dst;
1039 }
1040 
bic(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,uint64_t imm)1041 LogicVRegister Simulator::bic(VectorFormat vform, LogicVRegister dst,
1042                               const LogicVRegister& src, uint64_t imm) {
1043   uint64_t result[16];
1044   int laneCount = LaneCountFromFormat(vform);
1045   for (int i = 0; i < laneCount; ++i) {
1046     result[i] = src.Uint(vform, i) & ~imm;
1047   }
1048   dst.SetUintArray(vform, result);
1049   return dst;
1050 }
1051 
bif(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1052 LogicVRegister Simulator::bif(VectorFormat vform, LogicVRegister dst,
1053                               const LogicVRegister& src1,
1054                               const LogicVRegister& src2) {
1055   dst.ClearForWrite(vform);
1056   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1057     uint64_t operand1 = dst.Uint(vform, i);
1058     uint64_t operand2 = ~src2.Uint(vform, i);
1059     uint64_t operand3 = src1.Uint(vform, i);
1060     uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1061     dst.SetUint(vform, i, result);
1062   }
1063   return dst;
1064 }
1065 
bit(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1066 LogicVRegister Simulator::bit(VectorFormat vform, LogicVRegister dst,
1067                               const LogicVRegister& src1,
1068                               const LogicVRegister& src2) {
1069   dst.ClearForWrite(vform);
1070   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1071     uint64_t operand1 = dst.Uint(vform, i);
1072     uint64_t operand2 = src2.Uint(vform, i);
1073     uint64_t operand3 = src1.Uint(vform, i);
1074     uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1075     dst.SetUint(vform, i, result);
1076   }
1077   return dst;
1078 }
1079 
bsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1080 LogicVRegister Simulator::bsl(VectorFormat vform, LogicVRegister dst,
1081                               const LogicVRegister& src1,
1082                               const LogicVRegister& src2) {
1083   dst.ClearForWrite(vform);
1084   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1085     uint64_t operand1 = src2.Uint(vform, i);
1086     uint64_t operand2 = dst.Uint(vform, i);
1087     uint64_t operand3 = src1.Uint(vform, i);
1088     uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1089     dst.SetUint(vform, i, result);
1090   }
1091   return dst;
1092 }
1093 
SMinMax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1094 LogicVRegister Simulator::SMinMax(VectorFormat vform, LogicVRegister dst,
1095                                   const LogicVRegister& src1,
1096                                   const LogicVRegister& src2, bool max) {
1097   dst.ClearForWrite(vform);
1098   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1099     int64_t src1_val = src1.Int(vform, i);
1100     int64_t src2_val = src2.Int(vform, i);
1101     int64_t dst_val;
1102     if (max) {
1103       dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1104     } else {
1105       dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1106     }
1107     dst.SetInt(vform, i, dst_val);
1108   }
1109   return dst;
1110 }
1111 
smax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1112 LogicVRegister Simulator::smax(VectorFormat vform, LogicVRegister dst,
1113                                const LogicVRegister& src1,
1114                                const LogicVRegister& src2) {
1115   return SMinMax(vform, dst, src1, src2, true);
1116 }
1117 
smin(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1118 LogicVRegister Simulator::smin(VectorFormat vform, LogicVRegister dst,
1119                                const LogicVRegister& src1,
1120                                const LogicVRegister& src2) {
1121   return SMinMax(vform, dst, src1, src2, false);
1122 }
1123 
SMinMaxP(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1124 LogicVRegister Simulator::SMinMaxP(VectorFormat vform, LogicVRegister dst,
1125                                    const LogicVRegister& src1,
1126                                    const LogicVRegister& src2, bool max) {
1127   int lanes = LaneCountFromFormat(vform);
1128   int64_t result[kMaxLanesPerVector];
1129   const LogicVRegister* src = &src1;
1130   for (int j = 0; j < 2; j++) {
1131     for (int i = 0; i < lanes; i += 2) {
1132       int64_t first_val = src->Int(vform, i);
1133       int64_t second_val = src->Int(vform, i + 1);
1134       int64_t dst_val;
1135       if (max) {
1136         dst_val = (first_val > second_val) ? first_val : second_val;
1137       } else {
1138         dst_val = (first_val < second_val) ? first_val : second_val;
1139       }
1140       DCHECK_LT((i >> 1) + (j * lanes / 2), kMaxLanesPerVector);
1141       result[(i >> 1) + (j * lanes / 2)] = dst_val;
1142     }
1143     src = &src2;
1144   }
1145   dst.SetIntArray(vform, result);
1146   return dst;
1147 }
1148 
smaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1149 LogicVRegister Simulator::smaxp(VectorFormat vform, LogicVRegister dst,
1150                                 const LogicVRegister& src1,
1151                                 const LogicVRegister& src2) {
1152   return SMinMaxP(vform, dst, src1, src2, true);
1153 }
1154 
sminp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1155 LogicVRegister Simulator::sminp(VectorFormat vform, LogicVRegister dst,
1156                                 const LogicVRegister& src1,
1157                                 const LogicVRegister& src2) {
1158   return SMinMaxP(vform, dst, src1, src2, false);
1159 }
1160 
addp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1161 LogicVRegister Simulator::addp(VectorFormat vform, LogicVRegister dst,
1162                                const LogicVRegister& src) {
1163   DCHECK_EQ(vform, kFormatD);
1164 
1165   uint64_t dst_val = src.Uint(kFormat2D, 0) + src.Uint(kFormat2D, 1);
1166   dst.ClearForWrite(vform);
1167   dst.SetUint(vform, 0, dst_val);
1168   return dst;
1169 }
1170 
addv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1171 LogicVRegister Simulator::addv(VectorFormat vform, LogicVRegister dst,
1172                                const LogicVRegister& src) {
1173   VectorFormat vform_dst =
1174       ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
1175 
1176   int64_t dst_val = 0;
1177   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1178     dst_val += src.Int(vform, i);
1179   }
1180 
1181   dst.ClearForWrite(vform_dst);
1182   dst.SetInt(vform_dst, 0, dst_val);
1183   return dst;
1184 }
1185 
saddlv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1186 LogicVRegister Simulator::saddlv(VectorFormat vform, LogicVRegister dst,
1187                                  const LogicVRegister& src) {
1188   VectorFormat vform_dst =
1189       ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1190 
1191   int64_t dst_val = 0;
1192   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1193     dst_val += src.Int(vform, i);
1194   }
1195 
1196   dst.ClearForWrite(vform_dst);
1197   dst.SetInt(vform_dst, 0, dst_val);
1198   return dst;
1199 }
1200 
uaddlv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1201 LogicVRegister Simulator::uaddlv(VectorFormat vform, LogicVRegister dst,
1202                                  const LogicVRegister& src) {
1203   VectorFormat vform_dst =
1204       ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1205 
1206   uint64_t dst_val = 0;
1207   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1208     dst_val += src.Uint(vform, i);
1209   }
1210 
1211   dst.ClearForWrite(vform_dst);
1212   dst.SetUint(vform_dst, 0, dst_val);
1213   return dst;
1214 }
1215 
SMinMaxV(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool max)1216 LogicVRegister Simulator::SMinMaxV(VectorFormat vform, LogicVRegister dst,
1217                                    const LogicVRegister& src, bool max) {
1218   int64_t dst_val = max ? INT64_MIN : INT64_MAX;
1219   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1220     int64_t src_val = src.Int(vform, i);
1221     if (max) {
1222       dst_val = (src_val > dst_val) ? src_val : dst_val;
1223     } else {
1224       dst_val = (src_val < dst_val) ? src_val : dst_val;
1225     }
1226   }
1227   dst.ClearForWrite(ScalarFormatFromFormat(vform));
1228   dst.SetInt(vform, 0, dst_val);
1229   return dst;
1230 }
1231 
smaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1232 LogicVRegister Simulator::smaxv(VectorFormat vform, LogicVRegister dst,
1233                                 const LogicVRegister& src) {
1234   SMinMaxV(vform, dst, src, true);
1235   return dst;
1236 }
1237 
sminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1238 LogicVRegister Simulator::sminv(VectorFormat vform, LogicVRegister dst,
1239                                 const LogicVRegister& src) {
1240   SMinMaxV(vform, dst, src, false);
1241   return dst;
1242 }
1243 
UMinMax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1244 LogicVRegister Simulator::UMinMax(VectorFormat vform, LogicVRegister dst,
1245                                   const LogicVRegister& src1,
1246                                   const LogicVRegister& src2, bool max) {
1247   dst.ClearForWrite(vform);
1248   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1249     uint64_t src1_val = src1.Uint(vform, i);
1250     uint64_t src2_val = src2.Uint(vform, i);
1251     uint64_t dst_val;
1252     if (max) {
1253       dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1254     } else {
1255       dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1256     }
1257     dst.SetUint(vform, i, dst_val);
1258   }
1259   return dst;
1260 }
1261 
umax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1262 LogicVRegister Simulator::umax(VectorFormat vform, LogicVRegister dst,
1263                                const LogicVRegister& src1,
1264                                const LogicVRegister& src2) {
1265   return UMinMax(vform, dst, src1, src2, true);
1266 }
1267 
umin(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1268 LogicVRegister Simulator::umin(VectorFormat vform, LogicVRegister dst,
1269                                const LogicVRegister& src1,
1270                                const LogicVRegister& src2) {
1271   return UMinMax(vform, dst, src1, src2, false);
1272 }
1273 
UMinMaxP(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1274 LogicVRegister Simulator::UMinMaxP(VectorFormat vform, LogicVRegister dst,
1275                                    const LogicVRegister& src1,
1276                                    const LogicVRegister& src2, bool max) {
1277   int lanes = LaneCountFromFormat(vform);
1278   uint64_t result[kMaxLanesPerVector];
1279   const LogicVRegister* src = &src1;
1280   for (int j = 0; j < 2; j++) {
1281     for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {
1282       uint64_t first_val = src->Uint(vform, i);
1283       uint64_t second_val = src->Uint(vform, i + 1);
1284       uint64_t dst_val;
1285       if (max) {
1286         dst_val = (first_val > second_val) ? first_val : second_val;
1287       } else {
1288         dst_val = (first_val < second_val) ? first_val : second_val;
1289       }
1290       DCHECK_LT((i >> 1) + (j * lanes / 2), kMaxLanesPerVector);
1291       result[(i >> 1) + (j * lanes / 2)] = dst_val;
1292     }
1293     src = &src2;
1294   }
1295   dst.SetUintArray(vform, result);
1296   return dst;
1297 }
1298 
umaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1299 LogicVRegister Simulator::umaxp(VectorFormat vform, LogicVRegister dst,
1300                                 const LogicVRegister& src1,
1301                                 const LogicVRegister& src2) {
1302   return UMinMaxP(vform, dst, src1, src2, true);
1303 }
1304 
uminp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1305 LogicVRegister Simulator::uminp(VectorFormat vform, LogicVRegister dst,
1306                                 const LogicVRegister& src1,
1307                                 const LogicVRegister& src2) {
1308   return UMinMaxP(vform, dst, src1, src2, false);
1309 }
1310 
UMinMaxV(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool max)1311 LogicVRegister Simulator::UMinMaxV(VectorFormat vform, LogicVRegister dst,
1312                                    const LogicVRegister& src, bool max) {
1313   uint64_t dst_val = max ? 0 : UINT64_MAX;
1314   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1315     uint64_t src_val = src.Uint(vform, i);
1316     if (max) {
1317       dst_val = (src_val > dst_val) ? src_val : dst_val;
1318     } else {
1319       dst_val = (src_val < dst_val) ? src_val : dst_val;
1320     }
1321   }
1322   dst.ClearForWrite(ScalarFormatFromFormat(vform));
1323   dst.SetUint(vform, 0, dst_val);
1324   return dst;
1325 }
1326 
umaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1327 LogicVRegister Simulator::umaxv(VectorFormat vform, LogicVRegister dst,
1328                                 const LogicVRegister& src) {
1329   UMinMaxV(vform, dst, src, true);
1330   return dst;
1331 }
1332 
uminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1333 LogicVRegister Simulator::uminv(VectorFormat vform, LogicVRegister dst,
1334                                 const LogicVRegister& src) {
1335   UMinMaxV(vform, dst, src, false);
1336   return dst;
1337 }
1338 
shl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1339 LogicVRegister Simulator::shl(VectorFormat vform, LogicVRegister dst,
1340                               const LogicVRegister& src, int shift) {
1341   DCHECK_GE(shift, 0);
1342   SimVRegister temp;
1343   LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1344   return ushl(vform, dst, src, shiftreg);
1345 }
1346 
sshll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1347 LogicVRegister Simulator::sshll(VectorFormat vform, LogicVRegister dst,
1348                                 const LogicVRegister& src, int shift) {
1349   DCHECK_GE(shift, 0);
1350   SimVRegister temp1, temp2;
1351   LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1352   LogicVRegister extendedreg = sxtl(vform, temp2, src);
1353   return sshl(vform, dst, extendedreg, shiftreg);
1354 }
1355 
sshll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1356 LogicVRegister Simulator::sshll2(VectorFormat vform, LogicVRegister dst,
1357                                  const LogicVRegister& src, int shift) {
1358   DCHECK_GE(shift, 0);
1359   SimVRegister temp1, temp2;
1360   LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1361   LogicVRegister extendedreg = sxtl2(vform, temp2, src);
1362   return sshl(vform, dst, extendedreg, shiftreg);
1363 }
1364 
shll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1365 LogicVRegister Simulator::shll(VectorFormat vform, LogicVRegister dst,
1366                                const LogicVRegister& src) {
1367   int shift = LaneSizeInBitsFromFormat(vform) / 2;
1368   return sshll(vform, dst, src, shift);
1369 }
1370 
shll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1371 LogicVRegister Simulator::shll2(VectorFormat vform, LogicVRegister dst,
1372                                 const LogicVRegister& src) {
1373   int shift = LaneSizeInBitsFromFormat(vform) / 2;
1374   return sshll2(vform, dst, src, shift);
1375 }
1376 
ushll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1377 LogicVRegister Simulator::ushll(VectorFormat vform, LogicVRegister dst,
1378                                 const LogicVRegister& src, int shift) {
1379   DCHECK_GE(shift, 0);
1380   SimVRegister temp1, temp2;
1381   LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1382   LogicVRegister extendedreg = uxtl(vform, temp2, src);
1383   return ushl(vform, dst, extendedreg, shiftreg);
1384 }
1385 
ushll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1386 LogicVRegister Simulator::ushll2(VectorFormat vform, LogicVRegister dst,
1387                                  const LogicVRegister& src, int shift) {
1388   DCHECK_GE(shift, 0);
1389   SimVRegister temp1, temp2;
1390   LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1391   LogicVRegister extendedreg = uxtl2(vform, temp2, src);
1392   return ushl(vform, dst, extendedreg, shiftreg);
1393 }
1394 
sli(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1395 LogicVRegister Simulator::sli(VectorFormat vform, LogicVRegister dst,
1396                               const LogicVRegister& src, int shift) {
1397   dst.ClearForWrite(vform);
1398   int laneCount = LaneCountFromFormat(vform);
1399   for (int i = 0; i < laneCount; i++) {
1400     uint64_t src_lane = src.Uint(vform, i);
1401     uint64_t dst_lane = dst.Uint(vform, i);
1402     uint64_t shifted = src_lane << shift;
1403     uint64_t mask = MaxUintFromFormat(vform) << shift;
1404     dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1405   }
1406   return dst;
1407 }
1408 
sqshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1409 LogicVRegister Simulator::sqshl(VectorFormat vform, LogicVRegister dst,
1410                                 const LogicVRegister& src, int shift) {
1411   DCHECK_GE(shift, 0);
1412   SimVRegister temp;
1413   LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1414   return sshl(vform, dst, src, shiftreg).SignedSaturate(vform);
1415 }
1416 
uqshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1417 LogicVRegister Simulator::uqshl(VectorFormat vform, LogicVRegister dst,
1418                                 const LogicVRegister& src, int shift) {
1419   DCHECK_GE(shift, 0);
1420   SimVRegister temp;
1421   LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1422   return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1423 }
1424 
sqshlu(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1425 LogicVRegister Simulator::sqshlu(VectorFormat vform, LogicVRegister dst,
1426                                  const LogicVRegister& src, int shift) {
1427   DCHECK_GE(shift, 0);
1428   SimVRegister temp;
1429   LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1430   return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1431 }
1432 
sri(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1433 LogicVRegister Simulator::sri(VectorFormat vform, LogicVRegister dst,
1434                               const LogicVRegister& src, int shift) {
1435   dst.ClearForWrite(vform);
1436   int laneCount = LaneCountFromFormat(vform);
1437   DCHECK((shift > 0) &&
1438          (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform))));
1439   for (int i = 0; i < laneCount; i++) {
1440     uint64_t src_lane = src.Uint(vform, i);
1441     uint64_t dst_lane = dst.Uint(vform, i);
1442     uint64_t shifted;
1443     uint64_t mask;
1444     if (shift == 64) {
1445       shifted = 0;
1446       mask = 0;
1447     } else {
1448       shifted = src_lane >> shift;
1449       mask = MaxUintFromFormat(vform) >> shift;
1450     }
1451     dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1452   }
1453   return dst;
1454 }
1455 
ushr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1456 LogicVRegister Simulator::ushr(VectorFormat vform, LogicVRegister dst,
1457                                const LogicVRegister& src, int shift) {
1458   DCHECK_GE(shift, 0);
1459   SimVRegister temp;
1460   LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1461   return ushl(vform, dst, src, shiftreg);
1462 }
1463 
sshr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1464 LogicVRegister Simulator::sshr(VectorFormat vform, LogicVRegister dst,
1465                                const LogicVRegister& src, int shift) {
1466   DCHECK_GE(shift, 0);
1467   SimVRegister temp;
1468   LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1469   return sshl(vform, dst, src, shiftreg);
1470 }
1471 
ssra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1472 LogicVRegister Simulator::ssra(VectorFormat vform, LogicVRegister dst,
1473                                const LogicVRegister& src, int shift) {
1474   SimVRegister temp;
1475   LogicVRegister shifted_reg = sshr(vform, temp, src, shift);
1476   return add(vform, dst, dst, shifted_reg);
1477 }
1478 
usra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1479 LogicVRegister Simulator::usra(VectorFormat vform, LogicVRegister dst,
1480                                const LogicVRegister& src, int shift) {
1481   SimVRegister temp;
1482   LogicVRegister shifted_reg = ushr(vform, temp, src, shift);
1483   return add(vform, dst, dst, shifted_reg);
1484 }
1485 
srsra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1486 LogicVRegister Simulator::srsra(VectorFormat vform, LogicVRegister dst,
1487                                 const LogicVRegister& src, int shift) {
1488   SimVRegister temp;
1489   LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform);
1490   return add(vform, dst, dst, shifted_reg);
1491 }
1492 
ursra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1493 LogicVRegister Simulator::ursra(VectorFormat vform, LogicVRegister dst,
1494                                 const LogicVRegister& src, int shift) {
1495   SimVRegister temp;
1496   LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform);
1497   return add(vform, dst, dst, shifted_reg);
1498 }
1499 
cls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1500 LogicVRegister Simulator::cls(VectorFormat vform, LogicVRegister dst,
1501                               const LogicVRegister& src) {
1502   uint64_t result[16];
1503   int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1504   int laneCount = LaneCountFromFormat(vform);
1505   for (int i = 0; i < laneCount; i++) {
1506     result[i] = CountLeadingSignBits(src.Int(vform, i), laneSizeInBits);
1507   }
1508 
1509   dst.SetUintArray(vform, result);
1510   return dst;
1511 }
1512 
clz(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1513 LogicVRegister Simulator::clz(VectorFormat vform, LogicVRegister dst,
1514                               const LogicVRegister& src) {
1515   uint64_t result[16];
1516   int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1517   int laneCount = LaneCountFromFormat(vform);
1518   for (int i = 0; i < laneCount; i++) {
1519     result[i] = CountLeadingZeros(src.Uint(vform, i), laneSizeInBits);
1520   }
1521 
1522   dst.SetUintArray(vform, result);
1523   return dst;
1524 }
1525 
cnt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1526 LogicVRegister Simulator::cnt(VectorFormat vform, LogicVRegister dst,
1527                               const LogicVRegister& src) {
1528   uint64_t result[16];
1529   int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1530   int laneCount = LaneCountFromFormat(vform);
1531   for (int i = 0; i < laneCount; i++) {
1532     uint64_t value = src.Uint(vform, i);
1533     result[i] = 0;
1534     for (int j = 0; j < laneSizeInBits; j++) {
1535       result[i] += (value & 1);
1536       value >>= 1;
1537     }
1538   }
1539 
1540   dst.SetUintArray(vform, result);
1541   return dst;
1542 }
1543 
sshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1544 LogicVRegister Simulator::sshl(VectorFormat vform, LogicVRegister dst,
1545                                const LogicVRegister& src1,
1546                                const LogicVRegister& src2) {
1547   dst.ClearForWrite(vform);
1548   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1549     int8_t shift_val = src2.Int(vform, i);
1550     int64_t lj_src_val = src1.IntLeftJustified(vform, i);
1551 
1552     // Set signed saturation state.
1553     if ((shift_val > CountLeadingSignBits(lj_src_val, 64)) &&
1554         (lj_src_val != 0)) {
1555       dst.SetSignedSat(i, lj_src_val >= 0);
1556     }
1557 
1558     // Set unsigned saturation state.
1559     if (lj_src_val < 0) {
1560       dst.SetUnsignedSat(i, false);
1561     } else if ((shift_val > CountLeadingZeros(lj_src_val, 64)) &&
1562                (lj_src_val != 0)) {
1563       dst.SetUnsignedSat(i, true);
1564     }
1565 
1566     int64_t src_val = src1.Int(vform, i);
1567     bool src_is_negative = src_val < 0;
1568     if (shift_val > 63) {
1569       dst.SetInt(vform, i, 0);
1570     } else if (shift_val < -63) {
1571       dst.SetRounding(i, src_is_negative);
1572       dst.SetInt(vform, i, src_is_negative ? -1 : 0);
1573     } else {
1574       // Use unsigned types for shifts, as behaviour is undefined for signed
1575       // lhs.
1576       uint64_t usrc_val = static_cast<uint64_t>(src_val);
1577 
1578       if (shift_val < 0) {
1579         // Convert to right shift.
1580         shift_val = -shift_val;
1581 
1582         // Set rounding state by testing most-significant bit shifted out.
1583         // Rounding only needed on right shifts.
1584         if (((usrc_val >> (shift_val - 1)) & 1) == 1) {
1585           dst.SetRounding(i, true);
1586         }
1587 
1588         usrc_val >>= shift_val;
1589 
1590         if (src_is_negative) {
1591           // Simulate sign-extension.
1592           usrc_val |= (~UINT64_C(0) << (64 - shift_val));
1593         }
1594       } else {
1595         usrc_val <<= shift_val;
1596       }
1597       dst.SetUint(vform, i, usrc_val);
1598     }
1599   }
1600   return dst;
1601 }
1602 
ushl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1603 LogicVRegister Simulator::ushl(VectorFormat vform, LogicVRegister dst,
1604                                const LogicVRegister& src1,
1605                                const LogicVRegister& src2) {
1606   dst.ClearForWrite(vform);
1607   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1608     int8_t shift_val = src2.Int(vform, i);
1609     uint64_t lj_src_val = src1.UintLeftJustified(vform, i);
1610 
1611     // Set saturation state.
1612     if ((shift_val > CountLeadingZeros(lj_src_val, 64)) && (lj_src_val != 0)) {
1613       dst.SetUnsignedSat(i, true);
1614     }
1615 
1616     uint64_t src_val = src1.Uint(vform, i);
1617     if ((shift_val > 63) || (shift_val < -64)) {
1618       dst.SetUint(vform, i, 0);
1619     } else {
1620       if (shift_val < 0) {
1621         // Set rounding state. Rounding only needed on right shifts.
1622         if (((src_val >> (-shift_val - 1)) & 1) == 1) {
1623           dst.SetRounding(i, true);
1624         }
1625 
1626         if (shift_val == -64) {
1627           src_val = 0;
1628         } else {
1629           src_val >>= -shift_val;
1630         }
1631       } else {
1632         src_val <<= shift_val;
1633       }
1634       dst.SetUint(vform, i, src_val);
1635     }
1636   }
1637   return dst;
1638 }
1639 
neg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1640 LogicVRegister Simulator::neg(VectorFormat vform, LogicVRegister dst,
1641                               const LogicVRegister& src) {
1642   dst.ClearForWrite(vform);
1643   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1644     // Test for signed saturation.
1645     int64_t sa = src.Int(vform, i);
1646     if (sa == MinIntFromFormat(vform)) {
1647       dst.SetSignedSat(i, true);
1648     }
1649     dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
1650   }
1651   return dst;
1652 }
1653 
suqadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1654 LogicVRegister Simulator::suqadd(VectorFormat vform, LogicVRegister dst,
1655                                  const LogicVRegister& src) {
1656   dst.ClearForWrite(vform);
1657   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1658     int64_t sa = dst.IntLeftJustified(vform, i);
1659     uint64_t ub = src.UintLeftJustified(vform, i);
1660     uint64_t ur = sa + ub;
1661 
1662     int64_t sr = bit_cast<int64_t>(ur);
1663     if (sr < sa) {  // Test for signed positive saturation.
1664       dst.SetInt(vform, i, MaxIntFromFormat(vform));
1665     } else {
1666       dst.SetUint(vform, i, dst.Int(vform, i) + src.Uint(vform, i));
1667     }
1668   }
1669   return dst;
1670 }
1671 
usqadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1672 LogicVRegister Simulator::usqadd(VectorFormat vform, LogicVRegister dst,
1673                                  const LogicVRegister& src) {
1674   dst.ClearForWrite(vform);
1675   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1676     uint64_t ua = dst.UintLeftJustified(vform, i);
1677     int64_t sb = src.IntLeftJustified(vform, i);
1678     uint64_t ur = ua + sb;
1679 
1680     if ((sb > 0) && (ur <= ua)) {
1681       dst.SetUint(vform, i, MaxUintFromFormat(vform));  // Positive saturation.
1682     } else if ((sb < 0) && (ur >= ua)) {
1683       dst.SetUint(vform, i, 0);  // Negative saturation.
1684     } else {
1685       dst.SetUint(vform, i, dst.Uint(vform, i) + src.Int(vform, i));
1686     }
1687   }
1688   return dst;
1689 }
1690 
abs(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1691 LogicVRegister Simulator::abs(VectorFormat vform, LogicVRegister dst,
1692                               const LogicVRegister& src) {
1693   dst.ClearForWrite(vform);
1694   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1695     // Test for signed saturation.
1696     int64_t sa = src.Int(vform, i);
1697     if (sa == MinIntFromFormat(vform)) {
1698       dst.SetSignedSat(i, true);
1699     }
1700     if (sa < 0) {
1701       dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
1702     } else {
1703       dst.SetInt(vform, i, sa);
1704     }
1705   }
1706   return dst;
1707 }
1708 
ExtractNarrow(VectorFormat dstform,LogicVRegister dst,bool dstIsSigned,const LogicVRegister & src,bool srcIsSigned)1709 LogicVRegister Simulator::ExtractNarrow(VectorFormat dstform,
1710                                         LogicVRegister dst, bool dstIsSigned,
1711                                         const LogicVRegister& src,
1712                                         bool srcIsSigned) {
1713   bool upperhalf = false;
1714   VectorFormat srcform = kFormatUndefined;
1715   int64_t ssrc[8];
1716   uint64_t usrc[8];
1717 
1718   switch (dstform) {
1719     case kFormat8B:
1720       upperhalf = false;
1721       srcform = kFormat8H;
1722       break;
1723     case kFormat16B:
1724       upperhalf = true;
1725       srcform = kFormat8H;
1726       break;
1727     case kFormat4H:
1728       upperhalf = false;
1729       srcform = kFormat4S;
1730       break;
1731     case kFormat8H:
1732       upperhalf = true;
1733       srcform = kFormat4S;
1734       break;
1735     case kFormat2S:
1736       upperhalf = false;
1737       srcform = kFormat2D;
1738       break;
1739     case kFormat4S:
1740       upperhalf = true;
1741       srcform = kFormat2D;
1742       break;
1743     case kFormatB:
1744       upperhalf = false;
1745       srcform = kFormatH;
1746       break;
1747     case kFormatH:
1748       upperhalf = false;
1749       srcform = kFormatS;
1750       break;
1751     case kFormatS:
1752       upperhalf = false;
1753       srcform = kFormatD;
1754       break;
1755     default:
1756       UNIMPLEMENTED();
1757   }
1758 
1759   for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
1760     ssrc[i] = src.Int(srcform, i);
1761     usrc[i] = src.Uint(srcform, i);
1762   }
1763 
1764   int offset;
1765   if (upperhalf) {
1766     offset = LaneCountFromFormat(dstform) / 2;
1767   } else {
1768     offset = 0;
1769     dst.ClearForWrite(dstform);
1770   }
1771 
1772   for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
1773     // Test for signed saturation
1774     if (ssrc[i] > MaxIntFromFormat(dstform)) {
1775       dst.SetSignedSat(offset + i, true);
1776     } else if (ssrc[i] < MinIntFromFormat(dstform)) {
1777       dst.SetSignedSat(offset + i, false);
1778     }
1779 
1780     // Test for unsigned saturation
1781     if (srcIsSigned) {
1782       if (ssrc[i] > static_cast<int64_t>(MaxUintFromFormat(dstform))) {
1783         dst.SetUnsignedSat(offset + i, true);
1784       } else if (ssrc[i] < 0) {
1785         dst.SetUnsignedSat(offset + i, false);
1786       }
1787     } else {
1788       if (usrc[i] > MaxUintFromFormat(dstform)) {
1789         dst.SetUnsignedSat(offset + i, true);
1790       }
1791     }
1792 
1793     int64_t result;
1794     if (srcIsSigned) {
1795       result = ssrc[i] & MaxUintFromFormat(dstform);
1796     } else {
1797       result = usrc[i] & MaxUintFromFormat(dstform);
1798     }
1799 
1800     if (dstIsSigned) {
1801       dst.SetInt(dstform, offset + i, result);
1802     } else {
1803       dst.SetUint(dstform, offset + i, result);
1804     }
1805   }
1806   return dst;
1807 }
1808 
xtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1809 LogicVRegister Simulator::xtn(VectorFormat vform, LogicVRegister dst,
1810                               const LogicVRegister& src) {
1811   return ExtractNarrow(vform, dst, true, src, true);
1812 }
1813 
sqxtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1814 LogicVRegister Simulator::sqxtn(VectorFormat vform, LogicVRegister dst,
1815                                 const LogicVRegister& src) {
1816   return ExtractNarrow(vform, dst, true, src, true).SignedSaturate(vform);
1817 }
1818 
sqxtun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1819 LogicVRegister Simulator::sqxtun(VectorFormat vform, LogicVRegister dst,
1820                                  const LogicVRegister& src) {
1821   return ExtractNarrow(vform, dst, false, src, true).UnsignedSaturate(vform);
1822 }
1823 
uqxtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1824 LogicVRegister Simulator::uqxtn(VectorFormat vform, LogicVRegister dst,
1825                                 const LogicVRegister& src) {
1826   return ExtractNarrow(vform, dst, false, src, false).UnsignedSaturate(vform);
1827 }
1828 
AbsDiff(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool issigned)1829 LogicVRegister Simulator::AbsDiff(VectorFormat vform, LogicVRegister dst,
1830                                   const LogicVRegister& src1,
1831                                   const LogicVRegister& src2, bool issigned) {
1832   dst.ClearForWrite(vform);
1833   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1834     if (issigned) {
1835       int64_t sr = src1.Int(vform, i) - src2.Int(vform, i);
1836       sr = sr > 0 ? sr : -sr;
1837       dst.SetInt(vform, i, sr);
1838     } else {
1839       int64_t sr = src1.Uint(vform, i) - src2.Uint(vform, i);
1840       sr = sr > 0 ? sr : -sr;
1841       dst.SetUint(vform, i, sr);
1842     }
1843   }
1844   return dst;
1845 }
1846 
saba(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1847 LogicVRegister Simulator::saba(VectorFormat vform, LogicVRegister dst,
1848                                const LogicVRegister& src1,
1849                                const LogicVRegister& src2) {
1850   SimVRegister temp;
1851   dst.ClearForWrite(vform);
1852   AbsDiff(vform, temp, src1, src2, true);
1853   add(vform, dst, dst, temp);
1854   return dst;
1855 }
1856 
uaba(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1857 LogicVRegister Simulator::uaba(VectorFormat vform, LogicVRegister dst,
1858                                const LogicVRegister& src1,
1859                                const LogicVRegister& src2) {
1860   SimVRegister temp;
1861   dst.ClearForWrite(vform);
1862   AbsDiff(vform, temp, src1, src2, false);
1863   add(vform, dst, dst, temp);
1864   return dst;
1865 }
1866 
not_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1867 LogicVRegister Simulator::not_(VectorFormat vform, LogicVRegister dst,
1868                                const LogicVRegister& src) {
1869   dst.ClearForWrite(vform);
1870   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1871     dst.SetUint(vform, i, ~src.Uint(vform, i));
1872   }
1873   return dst;
1874 }
1875 
rbit(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1876 LogicVRegister Simulator::rbit(VectorFormat vform, LogicVRegister dst,
1877                                const LogicVRegister& src) {
1878   uint64_t result[16];
1879   int laneCount = LaneCountFromFormat(vform);
1880   int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1881   uint64_t reversed_value;
1882   uint64_t value;
1883   for (int i = 0; i < laneCount; i++) {
1884     value = src.Uint(vform, i);
1885     reversed_value = 0;
1886     for (int j = 0; j < laneSizeInBits; j++) {
1887       reversed_value = (reversed_value << 1) | (value & 1);
1888       value >>= 1;
1889     }
1890     result[i] = reversed_value;
1891   }
1892 
1893   dst.SetUintArray(vform, result);
1894   return dst;
1895 }
1896 
rev(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int revSize)1897 LogicVRegister Simulator::rev(VectorFormat vform, LogicVRegister dst,
1898                               const LogicVRegister& src, int revSize) {
1899   uint64_t result[16];
1900   int laneCount = LaneCountFromFormat(vform);
1901   int laneSize = LaneSizeInBytesFromFormat(vform);
1902   int lanesPerLoop = revSize / laneSize;
1903   for (int i = 0; i < laneCount; i += lanesPerLoop) {
1904     for (int j = 0; j < lanesPerLoop; j++) {
1905       result[i + lanesPerLoop - 1 - j] = src.Uint(vform, i + j);
1906     }
1907   }
1908   dst.SetUintArray(vform, result);
1909   return dst;
1910 }
1911 
rev16(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1912 LogicVRegister Simulator::rev16(VectorFormat vform, LogicVRegister dst,
1913                                 const LogicVRegister& src) {
1914   return rev(vform, dst, src, 2);
1915 }
1916 
rev32(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1917 LogicVRegister Simulator::rev32(VectorFormat vform, LogicVRegister dst,
1918                                 const LogicVRegister& src) {
1919   return rev(vform, dst, src, 4);
1920 }
1921 
rev64(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1922 LogicVRegister Simulator::rev64(VectorFormat vform, LogicVRegister dst,
1923                                 const LogicVRegister& src) {
1924   return rev(vform, dst, src, 8);
1925 }
1926 
addlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool is_signed,bool do_accumulate)1927 LogicVRegister Simulator::addlp(VectorFormat vform, LogicVRegister dst,
1928                                 const LogicVRegister& src, bool is_signed,
1929                                 bool do_accumulate) {
1930   VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform);
1931   DCHECK_LE(LaneSizeInBitsFromFormat(vformsrc), 32U);
1932   DCHECK_LE(LaneCountFromFormat(vform), 8);
1933 
1934   uint64_t result[8];
1935   int lane_count = LaneCountFromFormat(vform);
1936   for (int i = 0; i < lane_count; i++) {
1937     if (is_signed) {
1938       result[i] = static_cast<uint64_t>(src.Int(vformsrc, 2 * i) +
1939                                         src.Int(vformsrc, 2 * i + 1));
1940     } else {
1941       result[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1);
1942     }
1943   }
1944 
1945   dst.ClearForWrite(vform);
1946   for (int i = 0; i < lane_count; ++i) {
1947     if (do_accumulate) {
1948       result[i] += dst.Uint(vform, i);
1949     }
1950     dst.SetUint(vform, i, result[i]);
1951   }
1952 
1953   return dst;
1954 }
1955 
saddlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1956 LogicVRegister Simulator::saddlp(VectorFormat vform, LogicVRegister dst,
1957                                  const LogicVRegister& src) {
1958   return addlp(vform, dst, src, true, false);
1959 }
1960 
uaddlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1961 LogicVRegister Simulator::uaddlp(VectorFormat vform, LogicVRegister dst,
1962                                  const LogicVRegister& src) {
1963   return addlp(vform, dst, src, false, false);
1964 }
1965 
sadalp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1966 LogicVRegister Simulator::sadalp(VectorFormat vform, LogicVRegister dst,
1967                                  const LogicVRegister& src) {
1968   return addlp(vform, dst, src, true, true);
1969 }
1970 
uadalp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1971 LogicVRegister Simulator::uadalp(VectorFormat vform, LogicVRegister dst,
1972                                  const LogicVRegister& src) {
1973   return addlp(vform, dst, src, false, true);
1974 }
1975 
ext(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1976 LogicVRegister Simulator::ext(VectorFormat vform, LogicVRegister dst,
1977                               const LogicVRegister& src1,
1978                               const LogicVRegister& src2, int index) {
1979   uint8_t result[16];
1980   int laneCount = LaneCountFromFormat(vform);
1981   for (int i = 0; i < laneCount - index; ++i) {
1982     result[i] = src1.Uint(vform, i + index);
1983   }
1984   for (int i = 0; i < index; ++i) {
1985     result[laneCount - index + i] = src2.Uint(vform, i);
1986   }
1987   dst.ClearForWrite(vform);
1988   for (int i = 0; i < laneCount; ++i) {
1989     dst.SetUint(vform, i, result[i]);
1990   }
1991   return dst;
1992 }
1993 
dup_element(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int src_index)1994 LogicVRegister Simulator::dup_element(VectorFormat vform, LogicVRegister dst,
1995                                       const LogicVRegister& src,
1996                                       int src_index) {
1997   int laneCount = LaneCountFromFormat(vform);
1998   uint64_t value = src.Uint(vform, src_index);
1999   dst.ClearForWrite(vform);
2000   for (int i = 0; i < laneCount; ++i) {
2001     dst.SetUint(vform, i, value);
2002   }
2003   return dst;
2004 }
2005 
dup_immediate(VectorFormat vform,LogicVRegister dst,uint64_t imm)2006 LogicVRegister Simulator::dup_immediate(VectorFormat vform, LogicVRegister dst,
2007                                         uint64_t imm) {
2008   int laneCount = LaneCountFromFormat(vform);
2009   uint64_t value = imm & MaxUintFromFormat(vform);
2010   dst.ClearForWrite(vform);
2011   for (int i = 0; i < laneCount; ++i) {
2012     dst.SetUint(vform, i, value);
2013   }
2014   return dst;
2015 }
2016 
ins_element(VectorFormat vform,LogicVRegister dst,int dst_index,const LogicVRegister & src,int src_index)2017 LogicVRegister Simulator::ins_element(VectorFormat vform, LogicVRegister dst,
2018                                       int dst_index, const LogicVRegister& src,
2019                                       int src_index) {
2020   dst.SetUint(vform, dst_index, src.Uint(vform, src_index));
2021   return dst;
2022 }
2023 
ins_immediate(VectorFormat vform,LogicVRegister dst,int dst_index,uint64_t imm)2024 LogicVRegister Simulator::ins_immediate(VectorFormat vform, LogicVRegister dst,
2025                                         int dst_index, uint64_t imm) {
2026   uint64_t value = imm & MaxUintFromFormat(vform);
2027   dst.SetUint(vform, dst_index, value);
2028   return dst;
2029 }
2030 
movi(VectorFormat vform,LogicVRegister dst,uint64_t imm)2031 LogicVRegister Simulator::movi(VectorFormat vform, LogicVRegister dst,
2032                                uint64_t imm) {
2033   int laneCount = LaneCountFromFormat(vform);
2034   dst.ClearForWrite(vform);
2035   for (int i = 0; i < laneCount; ++i) {
2036     dst.SetUint(vform, i, imm);
2037   }
2038   return dst;
2039 }
2040 
mvni(VectorFormat vform,LogicVRegister dst,uint64_t imm)2041 LogicVRegister Simulator::mvni(VectorFormat vform, LogicVRegister dst,
2042                                uint64_t imm) {
2043   int laneCount = LaneCountFromFormat(vform);
2044   dst.ClearForWrite(vform);
2045   for (int i = 0; i < laneCount; ++i) {
2046     dst.SetUint(vform, i, ~imm);
2047   }
2048   return dst;
2049 }
2050 
orr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,uint64_t imm)2051 LogicVRegister Simulator::orr(VectorFormat vform, LogicVRegister dst,
2052                               const LogicVRegister& src, uint64_t imm) {
2053   uint64_t result[16];
2054   int laneCount = LaneCountFromFormat(vform);
2055   for (int i = 0; i < laneCount; ++i) {
2056     result[i] = src.Uint(vform, i) | imm;
2057   }
2058   dst.SetUintArray(vform, result);
2059   return dst;
2060 }
2061 
uxtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2062 LogicVRegister Simulator::uxtl(VectorFormat vform, LogicVRegister dst,
2063                                const LogicVRegister& src) {
2064   VectorFormat vform_half = VectorFormatHalfWidth(vform);
2065 
2066   dst.ClearForWrite(vform);
2067   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2068     dst.SetUint(vform, i, src.Uint(vform_half, i));
2069   }
2070   return dst;
2071 }
2072 
sxtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2073 LogicVRegister Simulator::sxtl(VectorFormat vform, LogicVRegister dst,
2074                                const LogicVRegister& src) {
2075   VectorFormat vform_half = VectorFormatHalfWidth(vform);
2076 
2077   dst.ClearForWrite(vform);
2078   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2079     dst.SetInt(vform, i, src.Int(vform_half, i));
2080   }
2081   return dst;
2082 }
2083 
uxtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2084 LogicVRegister Simulator::uxtl2(VectorFormat vform, LogicVRegister dst,
2085                                 const LogicVRegister& src) {
2086   VectorFormat vform_half = VectorFormatHalfWidth(vform);
2087   int lane_count = LaneCountFromFormat(vform);
2088 
2089   dst.ClearForWrite(vform);
2090   for (int i = 0; i < lane_count; i++) {
2091     dst.SetUint(vform, i, src.Uint(vform_half, lane_count + i));
2092   }
2093   return dst;
2094 }
2095 
sxtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2096 LogicVRegister Simulator::sxtl2(VectorFormat vform, LogicVRegister dst,
2097                                 const LogicVRegister& src) {
2098   VectorFormat vform_half = VectorFormatHalfWidth(vform);
2099   int lane_count = LaneCountFromFormat(vform);
2100 
2101   dst.ClearForWrite(vform);
2102   for (int i = 0; i < lane_count; i++) {
2103     dst.SetInt(vform, i, src.Int(vform_half, lane_count + i));
2104   }
2105   return dst;
2106 }
2107 
shrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2108 LogicVRegister Simulator::shrn(VectorFormat vform, LogicVRegister dst,
2109                                const LogicVRegister& src, int shift) {
2110   SimVRegister temp;
2111   VectorFormat vform_src = VectorFormatDoubleWidth(vform);
2112   VectorFormat vform_dst = vform;
2113   LogicVRegister shifted_src = ushr(vform_src, temp, src, shift);
2114   return ExtractNarrow(vform_dst, dst, false, shifted_src, false);
2115 }
2116 
shrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2117 LogicVRegister Simulator::shrn2(VectorFormat vform, LogicVRegister dst,
2118                                 const LogicVRegister& src, int shift) {
2119   SimVRegister temp;
2120   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2121   VectorFormat vformdst = vform;
2122   LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift);
2123   return ExtractNarrow(vformdst, dst, false, shifted_src, false);
2124 }
2125 
rshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2126 LogicVRegister Simulator::rshrn(VectorFormat vform, LogicVRegister dst,
2127                                 const LogicVRegister& src, int shift) {
2128   SimVRegister temp;
2129   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2130   VectorFormat vformdst = vform;
2131   LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
2132   return ExtractNarrow(vformdst, dst, false, shifted_src, false);
2133 }
2134 
rshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2135 LogicVRegister Simulator::rshrn2(VectorFormat vform, LogicVRegister dst,
2136                                  const LogicVRegister& src, int shift) {
2137   SimVRegister temp;
2138   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2139   VectorFormat vformdst = vform;
2140   LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
2141   return ExtractNarrow(vformdst, dst, false, shifted_src, false);
2142 }
2143 
Table(VectorFormat vform,LogicVRegister dst,const LogicVRegister & ind,bool zero_out_of_bounds,const LogicVRegister * tab1,const LogicVRegister * tab2,const LogicVRegister * tab3,const LogicVRegister * tab4)2144 LogicVRegister Simulator::Table(VectorFormat vform, LogicVRegister dst,
2145                                 const LogicVRegister& ind,
2146                                 bool zero_out_of_bounds,
2147                                 const LogicVRegister* tab1,
2148                                 const LogicVRegister* tab2,
2149                                 const LogicVRegister* tab3,
2150                                 const LogicVRegister* tab4) {
2151   DCHECK_NOT_NULL(tab1);
2152   const LogicVRegister* tab[4] = {tab1, tab2, tab3, tab4};
2153   uint64_t result[kMaxLanesPerVector];
2154   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2155     result[i] = zero_out_of_bounds ? 0 : dst.Uint(kFormat16B, i);
2156   }
2157   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2158     uint64_t j = ind.Uint(vform, i);
2159     int tab_idx = static_cast<int>(j >> 4);
2160     int j_idx = static_cast<int>(j & 15);
2161     if ((tab_idx < 4) && (tab[tab_idx] != nullptr)) {
2162       result[i] = tab[tab_idx]->Uint(kFormat16B, j_idx);
2163     }
2164   }
2165   dst.SetUintArray(vform, result);
2166   return dst;
2167 }
2168 
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & ind)2169 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,
2170                               const LogicVRegister& tab,
2171                               const LogicVRegister& ind) {
2172   return Table(vform, dst, ind, true, &tab);
2173 }
2174 
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & ind)2175 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,
2176                               const LogicVRegister& tab,
2177                               const LogicVRegister& tab2,
2178                               const LogicVRegister& ind) {
2179   return Table(vform, dst, ind, true, &tab, &tab2);
2180 }
2181 
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & ind)2182 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,
2183                               const LogicVRegister& tab,
2184                               const LogicVRegister& tab2,
2185                               const LogicVRegister& tab3,
2186                               const LogicVRegister& ind) {
2187   return Table(vform, dst, ind, true, &tab, &tab2, &tab3);
2188 }
2189 
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & tab4,const LogicVRegister & ind)2190 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,
2191                               const LogicVRegister& tab,
2192                               const LogicVRegister& tab2,
2193                               const LogicVRegister& tab3,
2194                               const LogicVRegister& tab4,
2195                               const LogicVRegister& ind) {
2196   return Table(vform, dst, ind, true, &tab, &tab2, &tab3, &tab4);
2197 }
2198 
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & ind)2199 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,
2200                               const LogicVRegister& tab,
2201                               const LogicVRegister& ind) {
2202   return Table(vform, dst, ind, false, &tab);
2203 }
2204 
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & ind)2205 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,
2206                               const LogicVRegister& tab,
2207                               const LogicVRegister& tab2,
2208                               const LogicVRegister& ind) {
2209   return Table(vform, dst, ind, false, &tab, &tab2);
2210 }
2211 
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & ind)2212 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,
2213                               const LogicVRegister& tab,
2214                               const LogicVRegister& tab2,
2215                               const LogicVRegister& tab3,
2216                               const LogicVRegister& ind) {
2217   return Table(vform, dst, ind, false, &tab, &tab2, &tab3);
2218 }
2219 
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & tab4,const LogicVRegister & ind)2220 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,
2221                               const LogicVRegister& tab,
2222                               const LogicVRegister& tab2,
2223                               const LogicVRegister& tab3,
2224                               const LogicVRegister& tab4,
2225                               const LogicVRegister& ind) {
2226   return Table(vform, dst, ind, false, &tab, &tab2, &tab3, &tab4);
2227 }
2228 
uqshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2229 LogicVRegister Simulator::uqshrn(VectorFormat vform, LogicVRegister dst,
2230                                  const LogicVRegister& src, int shift) {
2231   return shrn(vform, dst, src, shift).UnsignedSaturate(vform);
2232 }
2233 
uqshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2234 LogicVRegister Simulator::uqshrn2(VectorFormat vform, LogicVRegister dst,
2235                                   const LogicVRegister& src, int shift) {
2236   return shrn2(vform, dst, src, shift).UnsignedSaturate(vform);
2237 }
2238 
uqrshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2239 LogicVRegister Simulator::uqrshrn(VectorFormat vform, LogicVRegister dst,
2240                                   const LogicVRegister& src, int shift) {
2241   return rshrn(vform, dst, src, shift).UnsignedSaturate(vform);
2242 }
2243 
uqrshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2244 LogicVRegister Simulator::uqrshrn2(VectorFormat vform, LogicVRegister dst,
2245                                    const LogicVRegister& src, int shift) {
2246   return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform);
2247 }
2248 
sqshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2249 LogicVRegister Simulator::sqshrn(VectorFormat vform, LogicVRegister dst,
2250                                  const LogicVRegister& src, int shift) {
2251   SimVRegister temp;
2252   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2253   VectorFormat vformdst = vform;
2254   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2255   return sqxtn(vformdst, dst, shifted_src);
2256 }
2257 
sqshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2258 LogicVRegister Simulator::sqshrn2(VectorFormat vform, LogicVRegister dst,
2259                                   const LogicVRegister& src, int shift) {
2260   SimVRegister temp;
2261   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2262   VectorFormat vformdst = vform;
2263   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2264   return sqxtn(vformdst, dst, shifted_src);
2265 }
2266 
sqrshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2267 LogicVRegister Simulator::sqrshrn(VectorFormat vform, LogicVRegister dst,
2268                                   const LogicVRegister& src, int shift) {
2269   SimVRegister temp;
2270   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2271   VectorFormat vformdst = vform;
2272   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2273   return sqxtn(vformdst, dst, shifted_src);
2274 }
2275 
sqrshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2276 LogicVRegister Simulator::sqrshrn2(VectorFormat vform, LogicVRegister dst,
2277                                    const LogicVRegister& src, int shift) {
2278   SimVRegister temp;
2279   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2280   VectorFormat vformdst = vform;
2281   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2282   return sqxtn(vformdst, dst, shifted_src);
2283 }
2284 
sqshrun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2285 LogicVRegister Simulator::sqshrun(VectorFormat vform, LogicVRegister dst,
2286                                   const LogicVRegister& src, int shift) {
2287   SimVRegister temp;
2288   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2289   VectorFormat vformdst = vform;
2290   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2291   return sqxtun(vformdst, dst, shifted_src);
2292 }
2293 
sqshrun2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2294 LogicVRegister Simulator::sqshrun2(VectorFormat vform, LogicVRegister dst,
2295                                    const LogicVRegister& src, int shift) {
2296   SimVRegister temp;
2297   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2298   VectorFormat vformdst = vform;
2299   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2300   return sqxtun(vformdst, dst, shifted_src);
2301 }
2302 
sqrshrun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2303 LogicVRegister Simulator::sqrshrun(VectorFormat vform, LogicVRegister dst,
2304                                    const LogicVRegister& src, int shift) {
2305   SimVRegister temp;
2306   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2307   VectorFormat vformdst = vform;
2308   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2309   return sqxtun(vformdst, dst, shifted_src);
2310 }
2311 
sqrshrun2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2312 LogicVRegister Simulator::sqrshrun2(VectorFormat vform, LogicVRegister dst,
2313                                     const LogicVRegister& src, int shift) {
2314   SimVRegister temp;
2315   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2316   VectorFormat vformdst = vform;
2317   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2318   return sqxtun(vformdst, dst, shifted_src);
2319 }
2320 
uaddl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2321 LogicVRegister Simulator::uaddl(VectorFormat vform, LogicVRegister dst,
2322                                 const LogicVRegister& src1,
2323                                 const LogicVRegister& src2) {
2324   SimVRegister temp1, temp2;
2325   uxtl(vform, temp1, src1);
2326   uxtl(vform, temp2, src2);
2327   add(vform, dst, temp1, temp2);
2328   return dst;
2329 }
2330 
uaddl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2331 LogicVRegister Simulator::uaddl2(VectorFormat vform, LogicVRegister dst,
2332                                  const LogicVRegister& src1,
2333                                  const LogicVRegister& src2) {
2334   SimVRegister temp1, temp2;
2335   uxtl2(vform, temp1, src1);
2336   uxtl2(vform, temp2, src2);
2337   add(vform, dst, temp1, temp2);
2338   return dst;
2339 }
2340 
uaddw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2341 LogicVRegister Simulator::uaddw(VectorFormat vform, LogicVRegister dst,
2342                                 const LogicVRegister& src1,
2343                                 const LogicVRegister& src2) {
2344   SimVRegister temp;
2345   uxtl(vform, temp, src2);
2346   add(vform, dst, src1, temp);
2347   return dst;
2348 }
2349 
uaddw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2350 LogicVRegister Simulator::uaddw2(VectorFormat vform, LogicVRegister dst,
2351                                  const LogicVRegister& src1,
2352                                  const LogicVRegister& src2) {
2353   SimVRegister temp;
2354   uxtl2(vform, temp, src2);
2355   add(vform, dst, src1, temp);
2356   return dst;
2357 }
2358 
saddl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2359 LogicVRegister Simulator::saddl(VectorFormat vform, LogicVRegister dst,
2360                                 const LogicVRegister& src1,
2361                                 const LogicVRegister& src2) {
2362   SimVRegister temp1, temp2;
2363   sxtl(vform, temp1, src1);
2364   sxtl(vform, temp2, src2);
2365   add(vform, dst, temp1, temp2);
2366   return dst;
2367 }
2368 
saddl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2369 LogicVRegister Simulator::saddl2(VectorFormat vform, LogicVRegister dst,
2370                                  const LogicVRegister& src1,
2371                                  const LogicVRegister& src2) {
2372   SimVRegister temp1, temp2;
2373   sxtl2(vform, temp1, src1);
2374   sxtl2(vform, temp2, src2);
2375   add(vform, dst, temp1, temp2);
2376   return dst;
2377 }
2378 
saddw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2379 LogicVRegister Simulator::saddw(VectorFormat vform, LogicVRegister dst,
2380                                 const LogicVRegister& src1,
2381                                 const LogicVRegister& src2) {
2382   SimVRegister temp;
2383   sxtl(vform, temp, src2);
2384   add(vform, dst, src1, temp);
2385   return dst;
2386 }
2387 
saddw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2388 LogicVRegister Simulator::saddw2(VectorFormat vform, LogicVRegister dst,
2389                                  const LogicVRegister& src1,
2390                                  const LogicVRegister& src2) {
2391   SimVRegister temp;
2392   sxtl2(vform, temp, src2);
2393   add(vform, dst, src1, temp);
2394   return dst;
2395 }
2396 
usubl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2397 LogicVRegister Simulator::usubl(VectorFormat vform, LogicVRegister dst,
2398                                 const LogicVRegister& src1,
2399                                 const LogicVRegister& src2) {
2400   SimVRegister temp1, temp2;
2401   uxtl(vform, temp1, src1);
2402   uxtl(vform, temp2, src2);
2403   sub(vform, dst, temp1, temp2);
2404   return dst;
2405 }
2406 
usubl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2407 LogicVRegister Simulator::usubl2(VectorFormat vform, LogicVRegister dst,
2408                                  const LogicVRegister& src1,
2409                                  const LogicVRegister& src2) {
2410   SimVRegister temp1, temp2;
2411   uxtl2(vform, temp1, src1);
2412   uxtl2(vform, temp2, src2);
2413   sub(vform, dst, temp1, temp2);
2414   return dst;
2415 }
2416 
usubw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2417 LogicVRegister Simulator::usubw(VectorFormat vform, LogicVRegister dst,
2418                                 const LogicVRegister& src1,
2419                                 const LogicVRegister& src2) {
2420   SimVRegister temp;
2421   uxtl(vform, temp, src2);
2422   sub(vform, dst, src1, temp);
2423   return dst;
2424 }
2425 
usubw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2426 LogicVRegister Simulator::usubw2(VectorFormat vform, LogicVRegister dst,
2427                                  const LogicVRegister& src1,
2428                                  const LogicVRegister& src2) {
2429   SimVRegister temp;
2430   uxtl2(vform, temp, src2);
2431   sub(vform, dst, src1, temp);
2432   return dst;
2433 }
2434 
ssubl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2435 LogicVRegister Simulator::ssubl(VectorFormat vform, LogicVRegister dst,
2436                                 const LogicVRegister& src1,
2437                                 const LogicVRegister& src2) {
2438   SimVRegister temp1, temp2;
2439   sxtl(vform, temp1, src1);
2440   sxtl(vform, temp2, src2);
2441   sub(vform, dst, temp1, temp2);
2442   return dst;
2443 }
2444 
ssubl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2445 LogicVRegister Simulator::ssubl2(VectorFormat vform, LogicVRegister dst,
2446                                  const LogicVRegister& src1,
2447                                  const LogicVRegister& src2) {
2448   SimVRegister temp1, temp2;
2449   sxtl2(vform, temp1, src1);
2450   sxtl2(vform, temp2, src2);
2451   sub(vform, dst, temp1, temp2);
2452   return dst;
2453 }
2454 
ssubw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2455 LogicVRegister Simulator::ssubw(VectorFormat vform, LogicVRegister dst,
2456                                 const LogicVRegister& src1,
2457                                 const LogicVRegister& src2) {
2458   SimVRegister temp;
2459   sxtl(vform, temp, src2);
2460   sub(vform, dst, src1, temp);
2461   return dst;
2462 }
2463 
ssubw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2464 LogicVRegister Simulator::ssubw2(VectorFormat vform, LogicVRegister dst,
2465                                  const LogicVRegister& src1,
2466                                  const LogicVRegister& src2) {
2467   SimVRegister temp;
2468   sxtl2(vform, temp, src2);
2469   sub(vform, dst, src1, temp);
2470   return dst;
2471 }
2472 
uabal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2473 LogicVRegister Simulator::uabal(VectorFormat vform, LogicVRegister dst,
2474                                 const LogicVRegister& src1,
2475                                 const LogicVRegister& src2) {
2476   SimVRegister temp1, temp2;
2477   uxtl(vform, temp1, src1);
2478   uxtl(vform, temp2, src2);
2479   uaba(vform, dst, temp1, temp2);
2480   return dst;
2481 }
2482 
uabal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2483 LogicVRegister Simulator::uabal2(VectorFormat vform, LogicVRegister dst,
2484                                  const LogicVRegister& src1,
2485                                  const LogicVRegister& src2) {
2486   SimVRegister temp1, temp2;
2487   uxtl2(vform, temp1, src1);
2488   uxtl2(vform, temp2, src2);
2489   uaba(vform, dst, temp1, temp2);
2490   return dst;
2491 }
2492 
sabal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2493 LogicVRegister Simulator::sabal(VectorFormat vform, LogicVRegister dst,
2494                                 const LogicVRegister& src1,
2495                                 const LogicVRegister& src2) {
2496   SimVRegister temp1, temp2;
2497   sxtl(vform, temp1, src1);
2498   sxtl(vform, temp2, src2);
2499   saba(vform, dst, temp1, temp2);
2500   return dst;
2501 }
2502 
sabal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2503 LogicVRegister Simulator::sabal2(VectorFormat vform, LogicVRegister dst,
2504                                  const LogicVRegister& src1,
2505                                  const LogicVRegister& src2) {
2506   SimVRegister temp1, temp2;
2507   sxtl2(vform, temp1, src1);
2508   sxtl2(vform, temp2, src2);
2509   saba(vform, dst, temp1, temp2);
2510   return dst;
2511 }
2512 
uabdl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2513 LogicVRegister Simulator::uabdl(VectorFormat vform, LogicVRegister dst,
2514                                 const LogicVRegister& src1,
2515                                 const LogicVRegister& src2) {
2516   SimVRegister temp1, temp2;
2517   uxtl(vform, temp1, src1);
2518   uxtl(vform, temp2, src2);
2519   AbsDiff(vform, dst, temp1, temp2, false);
2520   return dst;
2521 }
2522 
uabdl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2523 LogicVRegister Simulator::uabdl2(VectorFormat vform, LogicVRegister dst,
2524                                  const LogicVRegister& src1,
2525                                  const LogicVRegister& src2) {
2526   SimVRegister temp1, temp2;
2527   uxtl2(vform, temp1, src1);
2528   uxtl2(vform, temp2, src2);
2529   AbsDiff(vform, dst, temp1, temp2, false);
2530   return dst;
2531 }
2532 
sabdl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2533 LogicVRegister Simulator::sabdl(VectorFormat vform, LogicVRegister dst,
2534                                 const LogicVRegister& src1,
2535                                 const LogicVRegister& src2) {
2536   SimVRegister temp1, temp2;
2537   sxtl(vform, temp1, src1);
2538   sxtl(vform, temp2, src2);
2539   AbsDiff(vform, dst, temp1, temp2, true);
2540   return dst;
2541 }
2542 
sabdl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2543 LogicVRegister Simulator::sabdl2(VectorFormat vform, LogicVRegister dst,
2544                                  const LogicVRegister& src1,
2545                                  const LogicVRegister& src2) {
2546   SimVRegister temp1, temp2;
2547   sxtl2(vform, temp1, src1);
2548   sxtl2(vform, temp2, src2);
2549   AbsDiff(vform, dst, temp1, temp2, true);
2550   return dst;
2551 }
2552 
umull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2553 LogicVRegister Simulator::umull(VectorFormat vform, LogicVRegister dst,
2554                                 const LogicVRegister& src1,
2555                                 const LogicVRegister& src2) {
2556   SimVRegister temp1, temp2;
2557   uxtl(vform, temp1, src1);
2558   uxtl(vform, temp2, src2);
2559   mul(vform, dst, temp1, temp2);
2560   return dst;
2561 }
2562 
umull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2563 LogicVRegister Simulator::umull2(VectorFormat vform, LogicVRegister dst,
2564                                  const LogicVRegister& src1,
2565                                  const LogicVRegister& src2) {
2566   SimVRegister temp1, temp2;
2567   uxtl2(vform, temp1, src1);
2568   uxtl2(vform, temp2, src2);
2569   mul(vform, dst, temp1, temp2);
2570   return dst;
2571 }
2572 
smull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2573 LogicVRegister Simulator::smull(VectorFormat vform, LogicVRegister dst,
2574                                 const LogicVRegister& src1,
2575                                 const LogicVRegister& src2) {
2576   SimVRegister temp1, temp2;
2577   sxtl(vform, temp1, src1);
2578   sxtl(vform, temp2, src2);
2579   mul(vform, dst, temp1, temp2);
2580   return dst;
2581 }
2582 
smull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2583 LogicVRegister Simulator::smull2(VectorFormat vform, LogicVRegister dst,
2584                                  const LogicVRegister& src1,
2585                                  const LogicVRegister& src2) {
2586   SimVRegister temp1, temp2;
2587   sxtl2(vform, temp1, src1);
2588   sxtl2(vform, temp2, src2);
2589   mul(vform, dst, temp1, temp2);
2590   return dst;
2591 }
2592 
umlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2593 LogicVRegister Simulator::umlsl(VectorFormat vform, LogicVRegister dst,
2594                                 const LogicVRegister& src1,
2595                                 const LogicVRegister& src2) {
2596   SimVRegister temp1, temp2;
2597   uxtl(vform, temp1, src1);
2598   uxtl(vform, temp2, src2);
2599   mls(vform, dst, temp1, temp2);
2600   return dst;
2601 }
2602 
umlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2603 LogicVRegister Simulator::umlsl2(VectorFormat vform, LogicVRegister dst,
2604                                  const LogicVRegister& src1,
2605                                  const LogicVRegister& src2) {
2606   SimVRegister temp1, temp2;
2607   uxtl2(vform, temp1, src1);
2608   uxtl2(vform, temp2, src2);
2609   mls(vform, dst, temp1, temp2);
2610   return dst;
2611 }
2612 
smlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2613 LogicVRegister Simulator::smlsl(VectorFormat vform, LogicVRegister dst,
2614                                 const LogicVRegister& src1,
2615                                 const LogicVRegister& src2) {
2616   SimVRegister temp1, temp2;
2617   sxtl(vform, temp1, src1);
2618   sxtl(vform, temp2, src2);
2619   mls(vform, dst, temp1, temp2);
2620   return dst;
2621 }
2622 
smlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2623 LogicVRegister Simulator::smlsl2(VectorFormat vform, LogicVRegister dst,
2624                                  const LogicVRegister& src1,
2625                                  const LogicVRegister& src2) {
2626   SimVRegister temp1, temp2;
2627   sxtl2(vform, temp1, src1);
2628   sxtl2(vform, temp2, src2);
2629   mls(vform, dst, temp1, temp2);
2630   return dst;
2631 }
2632 
umlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2633 LogicVRegister Simulator::umlal(VectorFormat vform, LogicVRegister dst,
2634                                 const LogicVRegister& src1,
2635                                 const LogicVRegister& src2) {
2636   SimVRegister temp1, temp2;
2637   uxtl(vform, temp1, src1);
2638   uxtl(vform, temp2, src2);
2639   mla(vform, dst, temp1, temp2);
2640   return dst;
2641 }
2642 
umlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2643 LogicVRegister Simulator::umlal2(VectorFormat vform, LogicVRegister dst,
2644                                  const LogicVRegister& src1,
2645                                  const LogicVRegister& src2) {
2646   SimVRegister temp1, temp2;
2647   uxtl2(vform, temp1, src1);
2648   uxtl2(vform, temp2, src2);
2649   mla(vform, dst, temp1, temp2);
2650   return dst;
2651 }
2652 
smlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2653 LogicVRegister Simulator::smlal(VectorFormat vform, LogicVRegister dst,
2654                                 const LogicVRegister& src1,
2655                                 const LogicVRegister& src2) {
2656   SimVRegister temp1, temp2;
2657   sxtl(vform, temp1, src1);
2658   sxtl(vform, temp2, src2);
2659   mla(vform, dst, temp1, temp2);
2660   return dst;
2661 }
2662 
smlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2663 LogicVRegister Simulator::smlal2(VectorFormat vform, LogicVRegister dst,
2664                                  const LogicVRegister& src1,
2665                                  const LogicVRegister& src2) {
2666   SimVRegister temp1, temp2;
2667   sxtl2(vform, temp1, src1);
2668   sxtl2(vform, temp2, src2);
2669   mla(vform, dst, temp1, temp2);
2670   return dst;
2671 }
2672 
sqdmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2673 LogicVRegister Simulator::sqdmlal(VectorFormat vform, LogicVRegister dst,
2674                                   const LogicVRegister& src1,
2675                                   const LogicVRegister& src2) {
2676   SimVRegister temp;
2677   LogicVRegister product = sqdmull(vform, temp, src1, src2);
2678   return add(vform, dst, dst, product).SignedSaturate(vform);
2679 }
2680 
sqdmlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2681 LogicVRegister Simulator::sqdmlal2(VectorFormat vform, LogicVRegister dst,
2682                                    const LogicVRegister& src1,
2683                                    const LogicVRegister& src2) {
2684   SimVRegister temp;
2685   LogicVRegister product = sqdmull2(vform, temp, src1, src2);
2686   return add(vform, dst, dst, product).SignedSaturate(vform);
2687 }
2688 
sqdmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2689 LogicVRegister Simulator::sqdmlsl(VectorFormat vform, LogicVRegister dst,
2690                                   const LogicVRegister& src1,
2691                                   const LogicVRegister& src2) {
2692   SimVRegister temp;
2693   LogicVRegister product = sqdmull(vform, temp, src1, src2);
2694   return sub(vform, dst, dst, product).SignedSaturate(vform);
2695 }
2696 
sqdmlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2697 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, LogicVRegister dst,
2698                                    const LogicVRegister& src1,
2699                                    const LogicVRegister& src2) {
2700   SimVRegister temp;
2701   LogicVRegister product = sqdmull2(vform, temp, src1, src2);
2702   return sub(vform, dst, dst, product).SignedSaturate(vform);
2703 }
2704 
sqdmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2705 LogicVRegister Simulator::sqdmull(VectorFormat vform, LogicVRegister dst,
2706                                   const LogicVRegister& src1,
2707                                   const LogicVRegister& src2) {
2708   SimVRegister temp;
2709   LogicVRegister product = smull(vform, temp, src1, src2);
2710   return add(vform, dst, product, product).SignedSaturate(vform);
2711 }
2712 
sqdmull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2713 LogicVRegister Simulator::sqdmull2(VectorFormat vform, LogicVRegister dst,
2714                                    const LogicVRegister& src1,
2715                                    const LogicVRegister& src2) {
2716   SimVRegister temp;
2717   LogicVRegister product = smull2(vform, temp, src1, src2);
2718   return add(vform, dst, product, product).SignedSaturate(vform);
2719 }
2720 
sqrdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round)2721 LogicVRegister Simulator::sqrdmulh(VectorFormat vform, LogicVRegister dst,
2722                                    const LogicVRegister& src1,
2723                                    const LogicVRegister& src2, bool round) {
2724   // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow.
2725   // To avoid this, we use (src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
2726   // which is same as (2 * src1 * src2 + 1 << (esize - 1)) >> esize.
2727 
2728   int esize = LaneSizeInBitsFromFormat(vform);
2729   int round_const = round ? (1 << (esize - 2)) : 0;
2730   int64_t product;
2731 
2732   dst.ClearForWrite(vform);
2733   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2734     product = src1.Int(vform, i) * src2.Int(vform, i);
2735     product += round_const;
2736     product = product >> (esize - 1);
2737 
2738     if (product > MaxIntFromFormat(vform)) {
2739       product = MaxIntFromFormat(vform);
2740     } else if (product < MinIntFromFormat(vform)) {
2741       product = MinIntFromFormat(vform);
2742     }
2743     dst.SetInt(vform, i, product);
2744   }
2745   return dst;
2746 }
2747 
sqdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2748 LogicVRegister Simulator::sqdmulh(VectorFormat vform, LogicVRegister dst,
2749                                   const LogicVRegister& src1,
2750                                   const LogicVRegister& src2) {
2751   return sqrdmulh(vform, dst, src1, src2, false);
2752 }
2753 
addhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2754 LogicVRegister Simulator::addhn(VectorFormat vform, LogicVRegister dst,
2755                                 const LogicVRegister& src1,
2756                                 const LogicVRegister& src2) {
2757   SimVRegister temp;
2758   add(VectorFormatDoubleWidth(vform), temp, src1, src2);
2759   shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2760   return dst;
2761 }
2762 
addhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2763 LogicVRegister Simulator::addhn2(VectorFormat vform, LogicVRegister dst,
2764                                  const LogicVRegister& src1,
2765                                  const LogicVRegister& src2) {
2766   SimVRegister temp;
2767   add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
2768   shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2769   return dst;
2770 }
2771 
raddhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2772 LogicVRegister Simulator::raddhn(VectorFormat vform, LogicVRegister dst,
2773                                  const LogicVRegister& src1,
2774                                  const LogicVRegister& src2) {
2775   SimVRegister temp;
2776   add(VectorFormatDoubleWidth(vform), temp, src1, src2);
2777   rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2778   return dst;
2779 }
2780 
raddhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2781 LogicVRegister Simulator::raddhn2(VectorFormat vform, LogicVRegister dst,
2782                                   const LogicVRegister& src1,
2783                                   const LogicVRegister& src2) {
2784   SimVRegister temp;
2785   add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
2786   rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2787   return dst;
2788 }
2789 
subhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2790 LogicVRegister Simulator::subhn(VectorFormat vform, LogicVRegister dst,
2791                                 const LogicVRegister& src1,
2792                                 const LogicVRegister& src2) {
2793   SimVRegister temp;
2794   sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
2795   shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2796   return dst;
2797 }
2798 
subhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2799 LogicVRegister Simulator::subhn2(VectorFormat vform, LogicVRegister dst,
2800                                  const LogicVRegister& src1,
2801                                  const LogicVRegister& src2) {
2802   SimVRegister temp;
2803   sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
2804   shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2805   return dst;
2806 }
2807 
rsubhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2808 LogicVRegister Simulator::rsubhn(VectorFormat vform, LogicVRegister dst,
2809                                  const LogicVRegister& src1,
2810                                  const LogicVRegister& src2) {
2811   SimVRegister temp;
2812   sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
2813   rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2814   return dst;
2815 }
2816 
rsubhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2817 LogicVRegister Simulator::rsubhn2(VectorFormat vform, LogicVRegister dst,
2818                                   const LogicVRegister& src1,
2819                                   const LogicVRegister& src2) {
2820   SimVRegister temp;
2821   sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
2822   rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2823   return dst;
2824 }
2825 
trn1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2826 LogicVRegister Simulator::trn1(VectorFormat vform, LogicVRegister dst,
2827                                const LogicVRegister& src1,
2828                                const LogicVRegister& src2) {
2829   uint64_t result[16];
2830   int laneCount = LaneCountFromFormat(vform);
2831   int pairs = laneCount / 2;
2832   for (int i = 0; i < pairs; ++i) {
2833     result[2 * i] = src1.Uint(vform, 2 * i);
2834     result[(2 * i) + 1] = src2.Uint(vform, 2 * i);
2835   }
2836 
2837   dst.SetUintArray(vform, result);
2838   return dst;
2839 }
2840 
trn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2841 LogicVRegister Simulator::trn2(VectorFormat vform, LogicVRegister dst,
2842                                const LogicVRegister& src1,
2843                                const LogicVRegister& src2) {
2844   uint64_t result[16];
2845   int laneCount = LaneCountFromFormat(vform);
2846   int pairs = laneCount / 2;
2847   for (int i = 0; i < pairs; ++i) {
2848     result[2 * i] = src1.Uint(vform, (2 * i) + 1);
2849     result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1);
2850   }
2851 
2852   dst.SetUintArray(vform, result);
2853   return dst;
2854 }
2855 
zip1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2856 LogicVRegister Simulator::zip1(VectorFormat vform, LogicVRegister dst,
2857                                const LogicVRegister& src1,
2858                                const LogicVRegister& src2) {
2859   uint64_t result[16];
2860   int laneCount = LaneCountFromFormat(vform);
2861   int pairs = laneCount / 2;
2862   for (int i = 0; i < pairs; ++i) {
2863     result[2 * i] = src1.Uint(vform, i);
2864     result[(2 * i) + 1] = src2.Uint(vform, i);
2865   }
2866 
2867   dst.SetUintArray(vform, result);
2868   return dst;
2869 }
2870 
zip2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2871 LogicVRegister Simulator::zip2(VectorFormat vform, LogicVRegister dst,
2872                                const LogicVRegister& src1,
2873                                const LogicVRegister& src2) {
2874   uint64_t result[16];
2875   int laneCount = LaneCountFromFormat(vform);
2876   int pairs = laneCount / 2;
2877   for (int i = 0; i < pairs; ++i) {
2878     result[2 * i] = src1.Uint(vform, pairs + i);
2879     result[(2 * i) + 1] = src2.Uint(vform, pairs + i);
2880   }
2881 
2882   dst.SetUintArray(vform, result);
2883   return dst;
2884 }
2885 
uzp1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2886 LogicVRegister Simulator::uzp1(VectorFormat vform, LogicVRegister dst,
2887                                const LogicVRegister& src1,
2888                                const LogicVRegister& src2) {
2889   uint64_t result[32];
2890   int laneCount = LaneCountFromFormat(vform);
2891   for (int i = 0; i < laneCount; ++i) {
2892     result[i] = src1.Uint(vform, i);
2893     result[laneCount + i] = src2.Uint(vform, i);
2894   }
2895 
2896   dst.ClearForWrite(vform);
2897   for (int i = 0; i < laneCount; ++i) {
2898     dst.SetUint(vform, i, result[2 * i]);
2899   }
2900   return dst;
2901 }
2902 
uzp2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2903 LogicVRegister Simulator::uzp2(VectorFormat vform, LogicVRegister dst,
2904                                const LogicVRegister& src1,
2905                                const LogicVRegister& src2) {
2906   uint64_t result[32];
2907   int laneCount = LaneCountFromFormat(vform);
2908   for (int i = 0; i < laneCount; ++i) {
2909     result[i] = src1.Uint(vform, i);
2910     result[laneCount + i] = src2.Uint(vform, i);
2911   }
2912 
2913   dst.ClearForWrite(vform);
2914   for (int i = 0; i < laneCount; ++i) {
2915     dst.SetUint(vform, i, result[(2 * i) + 1]);
2916   }
2917   return dst;
2918 }
2919 
2920 template <typename T>
FPAdd(T op1,T op2)2921 T Simulator::FPAdd(T op1, T op2) {
2922   T result = FPProcessNaNs(op1, op2);
2923   if (std::isnan(result)) return result;
2924 
2925   if (std::isinf(op1) && std::isinf(op2) && (op1 != op2)) {
2926     // inf + -inf returns the default NaN.
2927     FPProcessException();
2928     return FPDefaultNaN<T>();
2929   } else {
2930     // Other cases should be handled by standard arithmetic.
2931     return op1 + op2;
2932   }
2933 }
2934 
2935 template <typename T>
FPSub(T op1,T op2)2936 T Simulator::FPSub(T op1, T op2) {
2937   // NaNs should be handled elsewhere.
2938   DCHECK(!std::isnan(op1) && !std::isnan(op2));
2939 
2940   if (std::isinf(op1) && std::isinf(op2) && (op1 == op2)) {
2941     // inf - inf returns the default NaN.
2942     FPProcessException();
2943     return FPDefaultNaN<T>();
2944   } else {
2945     // Other cases should be handled by standard arithmetic.
2946     return op1 - op2;
2947   }
2948 }
2949 
2950 template <typename T>
FPMul(T op1,T op2)2951 T Simulator::FPMul(T op1, T op2) {
2952   // NaNs should be handled elsewhere.
2953   DCHECK(!std::isnan(op1) && !std::isnan(op2));
2954 
2955   if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
2956     // inf * 0.0 returns the default NaN.
2957     FPProcessException();
2958     return FPDefaultNaN<T>();
2959   } else {
2960     // Other cases should be handled by standard arithmetic.
2961     return op1 * op2;
2962   }
2963 }
2964 
2965 template <typename T>
FPMulx(T op1,T op2)2966 T Simulator::FPMulx(T op1, T op2) {
2967   if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
2968     // inf * 0.0 returns +/-2.0.
2969     T two = 2.0;
2970     return std::copysign(1.0, op1) * std::copysign(1.0, op2) * two;
2971   }
2972   return FPMul(op1, op2);
2973 }
2974 
2975 template <typename T>
FPMulAdd(T a,T op1,T op2)2976 T Simulator::FPMulAdd(T a, T op1, T op2) {
2977   T result = FPProcessNaNs3(a, op1, op2);
2978 
2979   T sign_a = std::copysign(1.0, a);
2980   T sign_prod = std::copysign(1.0, op1) * std::copysign(1.0, op2);
2981   bool isinf_prod = std::isinf(op1) || std::isinf(op2);
2982   bool operation_generates_nan =
2983       (std::isinf(op1) && (op2 == 0.0)) ||                     // inf * 0.0
2984       (std::isinf(op2) && (op1 == 0.0)) ||                     // 0.0 * inf
2985       (std::isinf(a) && isinf_prod && (sign_a != sign_prod));  // inf - inf
2986 
2987   if (std::isnan(result)) {
2988     // Generated NaNs override quiet NaNs propagated from a.
2989     if (operation_generates_nan && IsQuietNaN(a)) {
2990       FPProcessException();
2991       return FPDefaultNaN<T>();
2992     } else {
2993       return result;
2994     }
2995   }
2996 
2997   // If the operation would produce a NaN, return the default NaN.
2998   if (operation_generates_nan) {
2999     FPProcessException();
3000     return FPDefaultNaN<T>();
3001   }
3002 
3003   // Work around broken fma implementations for exact zero results: The sign of
3004   // exact 0.0 results is positive unless both a and op1 * op2 are negative.
3005   if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) {
3006     return ((sign_a < 0) && (sign_prod < 0)) ? -0.0 : 0.0;
3007   }
3008 
3009   result = FusedMultiplyAdd(op1, op2, a);
3010   DCHECK(!std::isnan(result));
3011 
3012   // Work around broken fma implementations for rounded zero results: If a is
3013   // 0.0, the sign of the result is the sign of op1 * op2 before rounding.
3014   if ((a == 0.0) && (result == 0.0)) {
3015     return std::copysign(0.0, sign_prod);
3016   }
3017 
3018   return result;
3019 }
3020 
3021 template <typename T>
FPDiv(T op1,T op2)3022 T Simulator::FPDiv(T op1, T op2) {
3023   // NaNs should be handled elsewhere.
3024   DCHECK(!std::isnan(op1) && !std::isnan(op2));
3025 
3026   if ((std::isinf(op1) && std::isinf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) {
3027     // inf / inf and 0.0 / 0.0 return the default NaN.
3028     FPProcessException();
3029     return FPDefaultNaN<T>();
3030   } else {
3031     if (op2 == 0.0) {
3032       FPProcessException();
3033       if (!std::isnan(op1)) {
3034         double op1_sign = std::copysign(1.0, op1);
3035         double op2_sign = std::copysign(1.0, op2);
3036         return static_cast<T>(op1_sign * op2_sign * kFP64PositiveInfinity);
3037       }
3038     }
3039 
3040     // Other cases should be handled by standard arithmetic.
3041     return op1 / op2;
3042   }
3043 }
3044 
3045 template <typename T>
FPSqrt(T op)3046 T Simulator::FPSqrt(T op) {
3047   if (std::isnan(op)) {
3048     return FPProcessNaN(op);
3049   } else if (op < 0.0) {
3050     FPProcessException();
3051     return FPDefaultNaN<T>();
3052   } else {
3053     return std::sqrt(op);
3054   }
3055 }
3056 
3057 template <typename T>
FPMax(T a,T b)3058 T Simulator::FPMax(T a, T b) {
3059   T result = FPProcessNaNs(a, b);
3060   if (std::isnan(result)) return result;
3061 
3062   if ((a == 0.0) && (b == 0.0) &&
3063       (std::copysign(1.0, a) != std::copysign(1.0, b))) {
3064     // a and b are zero, and the sign differs: return +0.0.
3065     return 0.0;
3066   } else {
3067     return (a > b) ? a : b;
3068   }
3069 }
3070 
3071 template <typename T>
FPMaxNM(T a,T b)3072 T Simulator::FPMaxNM(T a, T b) {
3073   if (IsQuietNaN(a) && !IsQuietNaN(b)) {
3074     a = kFP64NegativeInfinity;
3075   } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
3076     b = kFP64NegativeInfinity;
3077   }
3078 
3079   T result = FPProcessNaNs(a, b);
3080   return std::isnan(result) ? result : FPMax(a, b);
3081 }
3082 
3083 template <typename T>
FPMin(T a,T b)3084 T Simulator::FPMin(T a, T b) {
3085   T result = FPProcessNaNs(a, b);
3086   if (std::isnan(result)) return result;
3087 
3088   if ((a == 0.0) && (b == 0.0) &&
3089       (std::copysign(1.0, a) != std::copysign(1.0, b))) {
3090     // a and b are zero, and the sign differs: return -0.0.
3091     return -0.0;
3092   } else {
3093     return (a < b) ? a : b;
3094   }
3095 }
3096 
3097 template <typename T>
FPMinNM(T a,T b)3098 T Simulator::FPMinNM(T a, T b) {
3099   if (IsQuietNaN(a) && !IsQuietNaN(b)) {
3100     a = kFP64PositiveInfinity;
3101   } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
3102     b = kFP64PositiveInfinity;
3103   }
3104 
3105   T result = FPProcessNaNs(a, b);
3106   return std::isnan(result) ? result : FPMin(a, b);
3107 }
3108 
3109 template <typename T>
FPRecipStepFused(T op1,T op2)3110 T Simulator::FPRecipStepFused(T op1, T op2) {
3111   const T two = 2.0;
3112   if ((std::isinf(op1) && (op2 == 0.0)) ||
3113       ((op1 == 0.0) && (std::isinf(op2)))) {
3114     return two;
3115   } else if (std::isinf(op1) || std::isinf(op2)) {
3116     // Return +inf if signs match, otherwise -inf.
3117     return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
3118                                           : kFP64NegativeInfinity;
3119   } else {
3120     return FusedMultiplyAdd(op1, op2, two);
3121   }
3122 }
3123 
3124 template <typename T>
FPRSqrtStepFused(T op1,T op2)3125 T Simulator::FPRSqrtStepFused(T op1, T op2) {
3126   const T one_point_five = 1.5;
3127   const T two = 2.0;
3128 
3129   if ((std::isinf(op1) && (op2 == 0.0)) ||
3130       ((op1 == 0.0) && (std::isinf(op2)))) {
3131     return one_point_five;
3132   } else if (std::isinf(op1) || std::isinf(op2)) {
3133     // Return +inf if signs match, otherwise -inf.
3134     return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
3135                                           : kFP64NegativeInfinity;
3136   } else {
3137     // The multiply-add-halve operation must be fully fused, so avoid interim
3138     // rounding by checking which operand can be losslessly divided by two
3139     // before doing the multiply-add.
3140     if (std::isnormal(op1 / two)) {
3141       return FusedMultiplyAdd(op1 / two, op2, one_point_five);
3142     } else if (std::isnormal(op2 / two)) {
3143       return FusedMultiplyAdd(op1, op2 / two, one_point_five);
3144     } else {
3145       // Neither operand is normal after halving: the result is dominated by
3146       // the addition term, so just return that.
3147       return one_point_five;
3148     }
3149   }
3150 }
3151 
FPRoundInt(double value,FPRounding round_mode)3152 double Simulator::FPRoundInt(double value, FPRounding round_mode) {
3153   if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
3154       (value == kFP64NegativeInfinity)) {
3155     return value;
3156   } else if (std::isnan(value)) {
3157     return FPProcessNaN(value);
3158   }
3159 
3160   double int_result = std::floor(value);
3161   double error = value - int_result;
3162   switch (round_mode) {
3163     case FPTieAway: {
3164       // Take care of correctly handling the range ]-0.5, -0.0], which must
3165       // yield -0.0.
3166       if ((-0.5 < value) && (value < 0.0)) {
3167         int_result = -0.0;
3168 
3169       } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) {
3170         // If the error is greater than 0.5, or is equal to 0.5 and the integer
3171         // result is positive, round up.
3172         int_result++;
3173       }
3174       break;
3175     }
3176     case FPTieEven: {
3177       // Take care of correctly handling the range [-0.5, -0.0], which must
3178       // yield -0.0.
3179       if ((-0.5 <= value) && (value < 0.0)) {
3180         int_result = -0.0;
3181 
3182         // If the error is greater than 0.5, or is equal to 0.5 and the integer
3183         // result is odd, round up.
3184       } else if ((error > 0.5) ||
3185                  ((error == 0.5) && (std::fmod(int_result, 2) != 0))) {
3186         int_result++;
3187       }
3188       break;
3189     }
3190     case FPZero: {
3191       // If value>0 then we take floor(value)
3192       // otherwise, ceil(value).
3193       if (value < 0) {
3194         int_result = ceil(value);
3195       }
3196       break;
3197     }
3198     case FPNegativeInfinity: {
3199       // We always use floor(value).
3200       break;
3201     }
3202     case FPPositiveInfinity: {
3203       // Take care of correctly handling the range ]-1.0, -0.0], which must
3204       // yield -0.0.
3205       if ((-1.0 < value) && (value < 0.0)) {
3206         int_result = -0.0;
3207 
3208         // If the error is non-zero, round up.
3209       } else if (error > 0.0) {
3210         int_result++;
3211       }
3212       break;
3213     }
3214     default:
3215       UNIMPLEMENTED();
3216   }
3217   return int_result;
3218 }
3219 
FPToInt32(double value,FPRounding rmode)3220 int32_t Simulator::FPToInt32(double value, FPRounding rmode) {
3221   value = FPRoundInt(value, rmode);
3222   if (value >= kWMaxInt) {
3223     return kWMaxInt;
3224   } else if (value < kWMinInt) {
3225     return kWMinInt;
3226   }
3227   return std::isnan(value) ? 0 : static_cast<int32_t>(value);
3228 }
3229 
FPToInt64(double value,FPRounding rmode)3230 int64_t Simulator::FPToInt64(double value, FPRounding rmode) {
3231   value = FPRoundInt(value, rmode);
3232   if (value >= kXMaxInt) {
3233     return kXMaxInt;
3234   } else if (value < kXMinInt) {
3235     return kXMinInt;
3236   }
3237   return std::isnan(value) ? 0 : static_cast<int64_t>(value);
3238 }
3239 
FPToUInt32(double value,FPRounding rmode)3240 uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) {
3241   value = FPRoundInt(value, rmode);
3242   if (value >= kWMaxUInt) {
3243     return kWMaxUInt;
3244   } else if (value < 0.0) {
3245     return 0;
3246   }
3247   return std::isnan(value) ? 0 : static_cast<uint32_t>(value);
3248 }
3249 
FPToUInt64(double value,FPRounding rmode)3250 uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) {
3251   value = FPRoundInt(value, rmode);
3252   if (value >= kXMaxUInt) {
3253     return kXMaxUInt;
3254   } else if (value < 0.0) {
3255     return 0;
3256   }
3257   return std::isnan(value) ? 0 : static_cast<uint64_t>(value);
3258 }
3259 
3260 #define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN)                      \
3261   template <typename T>                                                \
3262   LogicVRegister Simulator::FN(VectorFormat vform, LogicVRegister dst, \
3263                                const LogicVRegister& src1,             \
3264                                const LogicVRegister& src2) {           \
3265     dst.ClearForWrite(vform);                                          \
3266     for (int i = 0; i < LaneCountFromFormat(vform); i++) {             \
3267       T op1 = src1.Float<T>(i);                                        \
3268       T op2 = src2.Float<T>(i);                                        \
3269       T result;                                                        \
3270       if (PROCNAN) {                                                   \
3271         result = FPProcessNaNs(op1, op2);                              \
3272         if (!std::isnan(result)) {                                     \
3273           result = OP(op1, op2);                                       \
3274         }                                                              \
3275       } else {                                                         \
3276         result = OP(op1, op2);                                         \
3277       }                                                                \
3278       dst.SetFloat(i, result);                                         \
3279     }                                                                  \
3280     return dst;                                                        \
3281   }                                                                    \
3282                                                                        \
3283   LogicVRegister Simulator::FN(VectorFormat vform, LogicVRegister dst, \
3284                                const LogicVRegister& src1,             \
3285                                const LogicVRegister& src2) {           \
3286     if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {               \
3287       FN<float>(vform, dst, src1, src2);                               \
3288     } else {                                                           \
3289       DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);          \
3290       FN<double>(vform, dst, src1, src2);                              \
3291     }                                                                  \
3292     return dst;                                                        \
3293   }
NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)3294 NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)
3295 #undef DEFINE_NEON_FP_VECTOR_OP
3296 
3297 LogicVRegister Simulator::fnmul(VectorFormat vform, LogicVRegister dst,
3298                                 const LogicVRegister& src1,
3299                                 const LogicVRegister& src2) {
3300   SimVRegister temp;
3301   LogicVRegister product = fmul(vform, temp, src1, src2);
3302   return fneg(vform, dst, product);
3303 }
3304 
3305 template <typename T>
frecps(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3306 LogicVRegister Simulator::frecps(VectorFormat vform, LogicVRegister dst,
3307                                  const LogicVRegister& src1,
3308                                  const LogicVRegister& src2) {
3309   dst.ClearForWrite(vform);
3310   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3311     T op1 = -src1.Float<T>(i);
3312     T op2 = src2.Float<T>(i);
3313     T result = FPProcessNaNs(op1, op2);
3314     dst.SetFloat(i, std::isnan(result) ? result : FPRecipStepFused(op1, op2));
3315   }
3316   return dst;
3317 }
3318 
frecps(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3319 LogicVRegister Simulator::frecps(VectorFormat vform, LogicVRegister dst,
3320                                  const LogicVRegister& src1,
3321                                  const LogicVRegister& src2) {
3322   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3323     frecps<float>(vform, dst, src1, src2);
3324   } else {
3325     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3326     frecps<double>(vform, dst, src1, src2);
3327   }
3328   return dst;
3329 }
3330 
3331 template <typename T>
frsqrts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3332 LogicVRegister Simulator::frsqrts(VectorFormat vform, LogicVRegister dst,
3333                                   const LogicVRegister& src1,
3334                                   const LogicVRegister& src2) {
3335   dst.ClearForWrite(vform);
3336   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3337     T op1 = -src1.Float<T>(i);
3338     T op2 = src2.Float<T>(i);
3339     T result = FPProcessNaNs(op1, op2);
3340     dst.SetFloat(i, std::isnan(result) ? result : FPRSqrtStepFused(op1, op2));
3341   }
3342   return dst;
3343 }
3344 
frsqrts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3345 LogicVRegister Simulator::frsqrts(VectorFormat vform, LogicVRegister dst,
3346                                   const LogicVRegister& src1,
3347                                   const LogicVRegister& src2) {
3348   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3349     frsqrts<float>(vform, dst, src1, src2);
3350   } else {
3351     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3352     frsqrts<double>(vform, dst, src1, src2);
3353   }
3354   return dst;
3355 }
3356 
3357 template <typename T>
fcmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)3358 LogicVRegister Simulator::fcmp(VectorFormat vform, LogicVRegister dst,
3359                                const LogicVRegister& src1,
3360                                const LogicVRegister& src2, Condition cond) {
3361   dst.ClearForWrite(vform);
3362   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3363     bool result = false;
3364     T op1 = src1.Float<T>(i);
3365     T op2 = src2.Float<T>(i);
3366     T nan_result = FPProcessNaNs(op1, op2);
3367     if (!std::isnan(nan_result)) {
3368       switch (cond) {
3369         case eq:
3370           result = (op1 == op2);
3371           break;
3372         case ge:
3373           result = (op1 >= op2);
3374           break;
3375         case gt:
3376           result = (op1 > op2);
3377           break;
3378         case le:
3379           result = (op1 <= op2);
3380           break;
3381         case lt:
3382           result = (op1 < op2);
3383           break;
3384         default:
3385           UNREACHABLE();
3386       }
3387     }
3388     dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
3389   }
3390   return dst;
3391 }
3392 
fcmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)3393 LogicVRegister Simulator::fcmp(VectorFormat vform, LogicVRegister dst,
3394                                const LogicVRegister& src1,
3395                                const LogicVRegister& src2, Condition cond) {
3396   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3397     fcmp<float>(vform, dst, src1, src2, cond);
3398   } else {
3399     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3400     fcmp<double>(vform, dst, src1, src2, cond);
3401   }
3402   return dst;
3403 }
3404 
fcmp_zero(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,Condition cond)3405 LogicVRegister Simulator::fcmp_zero(VectorFormat vform, LogicVRegister dst,
3406                                     const LogicVRegister& src, Condition cond) {
3407   SimVRegister temp;
3408   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3409     LogicVRegister zero_reg =
3410         dup_immediate(vform, temp, bit_cast<uint32_t>(0.0f));
3411     fcmp<float>(vform, dst, src, zero_reg, cond);
3412   } else {
3413     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3414     LogicVRegister zero_reg =
3415         dup_immediate(vform, temp, bit_cast<uint64_t>(0.0));
3416     fcmp<double>(vform, dst, src, zero_reg, cond);
3417   }
3418   return dst;
3419 }
3420 
fabscmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)3421 LogicVRegister Simulator::fabscmp(VectorFormat vform, LogicVRegister dst,
3422                                   const LogicVRegister& src1,
3423                                   const LogicVRegister& src2, Condition cond) {
3424   SimVRegister temp1, temp2;
3425   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3426     LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1);
3427     LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2);
3428     fcmp<float>(vform, dst, abs_src1, abs_src2, cond);
3429   } else {
3430     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3431     LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1);
3432     LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2);
3433     fcmp<double>(vform, dst, abs_src1, abs_src2, cond);
3434   }
3435   return dst;
3436 }
3437 
3438 template <typename T>
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3439 LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst,
3440                                const LogicVRegister& src1,
3441                                const LogicVRegister& src2) {
3442   dst.ClearForWrite(vform);
3443   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3444     T op1 = src1.Float<T>(i);
3445     T op2 = src2.Float<T>(i);
3446     T acc = dst.Float<T>(i);
3447     T result = FPMulAdd(acc, op1, op2);
3448     dst.SetFloat(i, result);
3449   }
3450   return dst;
3451 }
3452 
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3453 LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst,
3454                                const LogicVRegister& src1,
3455                                const LogicVRegister& src2) {
3456   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3457     fmla<float>(vform, dst, src1, src2);
3458   } else {
3459     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3460     fmla<double>(vform, dst, src1, src2);
3461   }
3462   return dst;
3463 }
3464 
3465 template <typename T>
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3466 LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst,
3467                                const LogicVRegister& src1,
3468                                const LogicVRegister& src2) {
3469   dst.ClearForWrite(vform);
3470   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3471     T op1 = -src1.Float<T>(i);
3472     T op2 = src2.Float<T>(i);
3473     T acc = dst.Float<T>(i);
3474     T result = FPMulAdd(acc, op1, op2);
3475     dst.SetFloat(i, result);
3476   }
3477   return dst;
3478 }
3479 
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3480 LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst,
3481                                const LogicVRegister& src1,
3482                                const LogicVRegister& src2) {
3483   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3484     fmls<float>(vform, dst, src1, src2);
3485   } else {
3486     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3487     fmls<double>(vform, dst, src1, src2);
3488   }
3489   return dst;
3490 }
3491 
3492 template <typename T>
fneg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3493 LogicVRegister Simulator::fneg(VectorFormat vform, LogicVRegister dst,
3494                                const LogicVRegister& src) {
3495   dst.ClearForWrite(vform);
3496   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3497     T op = src.Float<T>(i);
3498     op = -op;
3499     dst.SetFloat(i, op);
3500   }
3501   return dst;
3502 }
3503 
fneg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3504 LogicVRegister Simulator::fneg(VectorFormat vform, LogicVRegister dst,
3505                                const LogicVRegister& src) {
3506   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3507     fneg<float>(vform, dst, src);
3508   } else {
3509     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3510     fneg<double>(vform, dst, src);
3511   }
3512   return dst;
3513 }
3514 
3515 template <typename T>
fabs_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3516 LogicVRegister Simulator::fabs_(VectorFormat vform, LogicVRegister dst,
3517                                 const LogicVRegister& src) {
3518   dst.ClearForWrite(vform);
3519   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3520     T op = src.Float<T>(i);
3521     if (std::copysign(1.0, op) < 0.0) {
3522       op = -op;
3523     }
3524     dst.SetFloat(i, op);
3525   }
3526   return dst;
3527 }
3528 
fabs_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3529 LogicVRegister Simulator::fabs_(VectorFormat vform, LogicVRegister dst,
3530                                 const LogicVRegister& src) {
3531   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3532     fabs_<float>(vform, dst, src);
3533   } else {
3534     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3535     fabs_<double>(vform, dst, src);
3536   }
3537   return dst;
3538 }
3539 
fabd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3540 LogicVRegister Simulator::fabd(VectorFormat vform, LogicVRegister dst,
3541                                const LogicVRegister& src1,
3542                                const LogicVRegister& src2) {
3543   SimVRegister temp;
3544   fsub(vform, temp, src1, src2);
3545   fabs_(vform, dst, temp);
3546   return dst;
3547 }
3548 
fsqrt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3549 LogicVRegister Simulator::fsqrt(VectorFormat vform, LogicVRegister dst,
3550                                 const LogicVRegister& src) {
3551   dst.ClearForWrite(vform);
3552   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3553     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3554       float result = FPSqrt(src.Float<float>(i));
3555       dst.SetFloat(i, result);
3556     }
3557   } else {
3558     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3559     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3560       double result = FPSqrt(src.Float<double>(i));
3561       dst.SetFloat(i, result);
3562     }
3563   }
3564   return dst;
3565 }
3566 
3567 #define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP)                             \
3568   LogicVRegister Simulator::FNP(VectorFormat vform, LogicVRegister dst, \
3569                                 const LogicVRegister& src1,             \
3570                                 const LogicVRegister& src2) {           \
3571     SimVRegister temp1, temp2;                                          \
3572     uzp1(vform, temp1, src1, src2);                                     \
3573     uzp2(vform, temp2, src1, src2);                                     \
3574     FN(vform, dst, temp1, temp2);                                       \
3575     return dst;                                                         \
3576   }                                                                     \
3577                                                                         \
3578   LogicVRegister Simulator::FNP(VectorFormat vform, LogicVRegister dst, \
3579                                 const LogicVRegister& src) {            \
3580     if (vform == kFormatS) {                                            \
3581       float result = OP(src.Float<float>(0), src.Float<float>(1));      \
3582       dst.SetFloat(0, result);                                          \
3583     } else {                                                            \
3584       DCHECK_EQ(vform, kFormatD);                                       \
3585       double result = OP(src.Float<double>(0), src.Float<double>(1));   \
3586       dst.SetFloat(0, result);                                          \
3587     }                                                                   \
3588     dst.ClearForWrite(vform);                                           \
3589     return dst;                                                         \
3590   }
NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)3591 NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)
3592 #undef DEFINE_NEON_FP_PAIR_OP
3593 
3594 LogicVRegister Simulator::FMinMaxV(VectorFormat vform, LogicVRegister dst,
3595                                    const LogicVRegister& src, FPMinMaxOp Op) {
3596   DCHECK_EQ(vform, kFormat4S);
3597   USE(vform);
3598   float result1 = (this->*Op)(src.Float<float>(0), src.Float<float>(1));
3599   float result2 = (this->*Op)(src.Float<float>(2), src.Float<float>(3));
3600   float result = (this->*Op)(result1, result2);
3601   dst.ClearForWrite(kFormatS);
3602   dst.SetFloat<float>(0, result);
3603   return dst;
3604 }
3605 
fmaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3606 LogicVRegister Simulator::fmaxv(VectorFormat vform, LogicVRegister dst,
3607                                 const LogicVRegister& src) {
3608   return FMinMaxV(vform, dst, src, &Simulator::FPMax);
3609 }
3610 
fminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3611 LogicVRegister Simulator::fminv(VectorFormat vform, LogicVRegister dst,
3612                                 const LogicVRegister& src) {
3613   return FMinMaxV(vform, dst, src, &Simulator::FPMin);
3614 }
3615 
fmaxnmv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3616 LogicVRegister Simulator::fmaxnmv(VectorFormat vform, LogicVRegister dst,
3617                                   const LogicVRegister& src) {
3618   return FMinMaxV(vform, dst, src, &Simulator::FPMaxNM);
3619 }
3620 
fminnmv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3621 LogicVRegister Simulator::fminnmv(VectorFormat vform, LogicVRegister dst,
3622                                   const LogicVRegister& src) {
3623   return FMinMaxV(vform, dst, src, &Simulator::FPMinNM);
3624 }
3625 
fmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)3626 LogicVRegister Simulator::fmul(VectorFormat vform, LogicVRegister dst,
3627                                const LogicVRegister& src1,
3628                                const LogicVRegister& src2, int index) {
3629   dst.ClearForWrite(vform);
3630   SimVRegister temp;
3631   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3632     LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
3633     fmul<float>(vform, dst, src1, index_reg);
3634   } else {
3635     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3636     LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
3637     fmul<double>(vform, dst, src1, index_reg);
3638   }
3639   return dst;
3640 }
3641 
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)3642 LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst,
3643                                const LogicVRegister& src1,
3644                                const LogicVRegister& src2, int index) {
3645   dst.ClearForWrite(vform);
3646   SimVRegister temp;
3647   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3648     LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
3649     fmla<float>(vform, dst, src1, index_reg);
3650   } else {
3651     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3652     LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
3653     fmla<double>(vform, dst, src1, index_reg);
3654   }
3655   return dst;
3656 }
3657 
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)3658 LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst,
3659                                const LogicVRegister& src1,
3660                                const LogicVRegister& src2, int index) {
3661   dst.ClearForWrite(vform);
3662   SimVRegister temp;
3663   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3664     LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
3665     fmls<float>(vform, dst, src1, index_reg);
3666   } else {
3667     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3668     LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
3669     fmls<double>(vform, dst, src1, index_reg);
3670   }
3671   return dst;
3672 }
3673 
fmulx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)3674 LogicVRegister Simulator::fmulx(VectorFormat vform, LogicVRegister dst,
3675                                 const LogicVRegister& src1,
3676                                 const LogicVRegister& src2, int index) {
3677   dst.ClearForWrite(vform);
3678   SimVRegister temp;
3679   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3680     LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
3681     fmulx<float>(vform, dst, src1, index_reg);
3682 
3683   } else {
3684     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3685     LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
3686     fmulx<double>(vform, dst, src1, index_reg);
3687   }
3688   return dst;
3689 }
3690 
frint(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding rounding_mode,bool inexact_exception)3691 LogicVRegister Simulator::frint(VectorFormat vform, LogicVRegister dst,
3692                                 const LogicVRegister& src,
3693                                 FPRounding rounding_mode,
3694                                 bool inexact_exception) {
3695   dst.ClearForWrite(vform);
3696   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3697     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3698       float input = src.Float<float>(i);
3699       float rounded = FPRoundInt(input, rounding_mode);
3700       if (inexact_exception && !std::isnan(input) && (input != rounded)) {
3701         FPProcessException();
3702       }
3703       dst.SetFloat<float>(i, rounded);
3704     }
3705   } else {
3706     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3707     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3708       double input = src.Float<double>(i);
3709       double rounded = FPRoundInt(input, rounding_mode);
3710       if (inexact_exception && !std::isnan(input) && (input != rounded)) {
3711         FPProcessException();
3712       }
3713       dst.SetFloat<double>(i, rounded);
3714     }
3715   }
3716   return dst;
3717 }
3718 
fcvts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding rounding_mode,int fbits)3719 LogicVRegister Simulator::fcvts(VectorFormat vform, LogicVRegister dst,
3720                                 const LogicVRegister& src,
3721                                 FPRounding rounding_mode, int fbits) {
3722   dst.ClearForWrite(vform);
3723   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3724     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3725       float op = src.Float<float>(i) * std::pow(2.0f, fbits);
3726       dst.SetInt(vform, i, FPToInt32(op, rounding_mode));
3727     }
3728   } else {
3729     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3730     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3731       double op = src.Float<double>(i) * std::pow(2.0, fbits);
3732       dst.SetInt(vform, i, FPToInt64(op, rounding_mode));
3733     }
3734   }
3735   return dst;
3736 }
3737 
fcvtu(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding rounding_mode,int fbits)3738 LogicVRegister Simulator::fcvtu(VectorFormat vform, LogicVRegister dst,
3739                                 const LogicVRegister& src,
3740                                 FPRounding rounding_mode, int fbits) {
3741   dst.ClearForWrite(vform);
3742   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3743     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3744       float op = src.Float<float>(i) * std::pow(2.0f, fbits);
3745       dst.SetUint(vform, i, FPToUInt32(op, rounding_mode));
3746     }
3747   } else {
3748     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3749     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3750       double op = src.Float<double>(i) * std::pow(2.0, fbits);
3751       dst.SetUint(vform, i, FPToUInt64(op, rounding_mode));
3752     }
3753   }
3754   return dst;
3755 }
3756 
fcvtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3757 LogicVRegister Simulator::fcvtl(VectorFormat vform, LogicVRegister dst,
3758                                 const LogicVRegister& src) {
3759   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3760     for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
3761       dst.SetFloat(i, FPToFloat(src.Float<float16>(i)));
3762     }
3763   } else {
3764     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3765     for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
3766       dst.SetFloat(i, FPToDouble(src.Float<float>(i)));
3767     }
3768   }
3769   return dst;
3770 }
3771 
fcvtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3772 LogicVRegister Simulator::fcvtl2(VectorFormat vform, LogicVRegister dst,
3773                                  const LogicVRegister& src) {
3774   int lane_count = LaneCountFromFormat(vform);
3775   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3776     for (int i = 0; i < lane_count; i++) {
3777       dst.SetFloat(i, FPToFloat(src.Float<float16>(i + lane_count)));
3778     }
3779   } else {
3780     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3781     for (int i = 0; i < lane_count; i++) {
3782       dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count)));
3783     }
3784   }
3785   return dst;
3786 }
3787 
fcvtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3788 LogicVRegister Simulator::fcvtn(VectorFormat vform, LogicVRegister dst,
3789                                 const LogicVRegister& src) {
3790   if (LaneSizeInBytesFromFormat(vform) == kHRegSize) {
3791     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3792       dst.SetFloat(i, FPToFloat16(src.Float<float>(i), FPTieEven));
3793     }
3794   } else {
3795     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);
3796     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3797       dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPTieEven));
3798     }
3799   }
3800   return dst;
3801 }
3802 
fcvtn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3803 LogicVRegister Simulator::fcvtn2(VectorFormat vform, LogicVRegister dst,
3804                                  const LogicVRegister& src) {
3805   int lane_count = LaneCountFromFormat(vform) / 2;
3806   if (LaneSizeInBytesFromFormat(vform) == kHRegSize) {
3807     for (int i = lane_count - 1; i >= 0; i--) {
3808       dst.SetFloat(i + lane_count, FPToFloat16(src.Float<float>(i), FPTieEven));
3809     }
3810   } else {
3811     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);
3812     for (int i = lane_count - 1; i >= 0; i--) {
3813       dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPTieEven));
3814     }
3815   }
3816   return dst;
3817 }
3818 
fcvtxn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3819 LogicVRegister Simulator::fcvtxn(VectorFormat vform, LogicVRegister dst,
3820                                  const LogicVRegister& src) {
3821   dst.ClearForWrite(vform);
3822   DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);
3823   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3824     dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPRoundOdd));
3825   }
3826   return dst;
3827 }
3828 
fcvtxn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3829 LogicVRegister Simulator::fcvtxn2(VectorFormat vform, LogicVRegister dst,
3830                                   const LogicVRegister& src) {
3831   DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);
3832   int lane_count = LaneCountFromFormat(vform) / 2;
3833   for (int i = lane_count - 1; i >= 0; i--) {
3834     dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPRoundOdd));
3835   }
3836   return dst;
3837 }
3838 
3839 // Based on reference C function recip_sqrt_estimate from ARM ARM.
recip_sqrt_estimate(double a)3840 double Simulator::recip_sqrt_estimate(double a) {
3841   int q0, q1, s;
3842   double r;
3843   if (a < 0.5) {
3844     q0 = static_cast<int>(a * 512.0);
3845     r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0);
3846   } else {
3847     q1 = static_cast<int>(a * 256.0);
3848     r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0);
3849   }
3850   s = static_cast<int>(256.0 * r + 0.5);
3851   return static_cast<double>(s) / 256.0;
3852 }
3853 
3854 namespace {
3855 
Bits(uint64_t val,int start_bit,int end_bit)3856 inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) {
3857   return unsigned_bitextract_64(start_bit, end_bit, val);
3858 }
3859 
3860 }  // anonymous namespace
3861 
3862 template <typename T>
FPRecipSqrtEstimate(T op)3863 T Simulator::FPRecipSqrtEstimate(T op) {
3864   static_assert(std::is_same<float, T>::value || std::is_same<double, T>::value,
3865                 "T must be a float or double");
3866 
3867   if (std::isnan(op)) {
3868     return FPProcessNaN(op);
3869   } else if (op == 0.0) {
3870     if (std::copysign(1.0, op) < 0.0) {
3871       return kFP64NegativeInfinity;
3872     } else {
3873       return kFP64PositiveInfinity;
3874     }
3875   } else if (std::copysign(1.0, op) < 0.0) {
3876     FPProcessException();
3877     return FPDefaultNaN<T>();
3878   } else if (std::isinf(op)) {
3879     return 0.0;
3880   } else {
3881     uint64_t fraction;
3882     int32_t exp, result_exp;
3883 
3884     if (sizeof(T) == sizeof(float)) {
3885       exp = static_cast<int32_t>(float_exp(op));
3886       fraction = float_mantissa(op);
3887       fraction <<= 29;
3888     } else {
3889       exp = static_cast<int32_t>(double_exp(op));
3890       fraction = double_mantissa(op);
3891     }
3892 
3893     if (exp == 0) {
3894       while (Bits(fraction, 51, 51) == 0) {
3895         fraction = Bits(fraction, 50, 0) << 1;
3896         exp -= 1;
3897       }
3898       fraction = Bits(fraction, 50, 0) << 1;
3899     }
3900 
3901     double scaled;
3902     if (Bits(exp, 0, 0) == 0) {
3903       scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44);
3904     } else {
3905       scaled = double_pack(0, 1021, Bits(fraction, 51, 44) << 44);
3906     }
3907 
3908     if (sizeof(T) == sizeof(float)) {
3909       result_exp = (380 - exp) / 2;
3910     } else {
3911       result_exp = (3068 - exp) / 2;
3912     }
3913 
3914     uint64_t estimate = bit_cast<uint64_t>(recip_sqrt_estimate(scaled));
3915 
3916     if (sizeof(T) == sizeof(float)) {
3917       uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
3918       uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29));
3919       return float_pack(0, exp_bits, est_bits);
3920     } else {
3921       return double_pack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0));
3922     }
3923   }
3924 }
3925 
frsqrte(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3926 LogicVRegister Simulator::frsqrte(VectorFormat vform, LogicVRegister dst,
3927                                   const LogicVRegister& src) {
3928   dst.ClearForWrite(vform);
3929   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3930     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3931       float input = src.Float<float>(i);
3932       dst.SetFloat(i, FPRecipSqrtEstimate<float>(input));
3933     }
3934   } else {
3935     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3936     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3937       double input = src.Float<double>(i);
3938       dst.SetFloat(i, FPRecipSqrtEstimate<double>(input));
3939     }
3940   }
3941   return dst;
3942 }
3943 
3944 template <typename T>
FPRecipEstimate(T op,FPRounding rounding)3945 T Simulator::FPRecipEstimate(T op, FPRounding rounding) {
3946   static_assert(std::is_same<float, T>::value || std::is_same<double, T>::value,
3947                 "T must be a float or double");
3948   uint32_t sign;
3949 
3950   if (sizeof(T) == sizeof(float)) {
3951     sign = float_sign(op);
3952   } else {
3953     sign = double_sign(op);
3954   }
3955 
3956   if (std::isnan(op)) {
3957     return FPProcessNaN(op);
3958   } else if (std::isinf(op)) {
3959     return (sign == 1) ? -0.0 : 0.0;
3960   } else if (op == 0.0) {
3961     FPProcessException();  // FPExc_DivideByZero exception.
3962     return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
3963   } else if (((sizeof(T) == sizeof(float)) &&
3964               (std::fabs(op) < std::pow(2.0, -128.0))) ||
3965              ((sizeof(T) == sizeof(double)) &&
3966               (std::fabs(op) < std::pow(2.0, -1024.0)))) {
3967     bool overflow_to_inf = false;
3968     switch (rounding) {
3969       case FPTieEven:
3970         overflow_to_inf = true;
3971         break;
3972       case FPPositiveInfinity:
3973         overflow_to_inf = (sign == 0);
3974         break;
3975       case FPNegativeInfinity:
3976         overflow_to_inf = (sign == 1);
3977         break;
3978       case FPZero:
3979         overflow_to_inf = false;
3980         break;
3981       default:
3982         break;
3983     }
3984     FPProcessException();  // FPExc_Overflow and FPExc_Inexact.
3985     if (overflow_to_inf) {
3986       return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
3987     } else {
3988       // Return FPMaxNormal(sign).
3989       if (sizeof(T) == sizeof(float)) {
3990         return float_pack(sign, 0xFE, 0x07FFFFF);
3991       } else {
3992         return double_pack(sign, 0x7FE, 0x0FFFFFFFFFFFFFl);
3993       }
3994     }
3995   } else {
3996     uint64_t fraction;
3997     int32_t exp, result_exp;
3998     uint32_t sign;
3999 
4000     if (sizeof(T) == sizeof(float)) {
4001       sign = float_sign(op);
4002       exp = static_cast<int32_t>(float_exp(op));
4003       fraction = float_mantissa(op);
4004       fraction <<= 29;
4005     } else {
4006       sign = double_sign(op);
4007       exp = static_cast<int32_t>(double_exp(op));
4008       fraction = double_mantissa(op);
4009     }
4010 
4011     if (exp == 0) {
4012       if (Bits(fraction, 51, 51) == 0) {
4013         exp -= 1;
4014         fraction = Bits(fraction, 49, 0) << 2;
4015       } else {
4016         fraction = Bits(fraction, 50, 0) << 1;
4017       }
4018     }
4019 
4020     double scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44);
4021 
4022     if (sizeof(T) == sizeof(float)) {
4023       result_exp = 253 - exp;
4024     } else {
4025       result_exp = 2045 - exp;
4026     }
4027 
4028     double estimate = recip_estimate(scaled);
4029 
4030     fraction = double_mantissa(estimate);
4031     if (result_exp == 0) {
4032       fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1);
4033     } else if (result_exp == -1) {
4034       fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2);
4035       result_exp = 0;
4036     }
4037     if (sizeof(T) == sizeof(float)) {
4038       uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
4039       uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29));
4040       return float_pack(sign, exp_bits, frac_bits);
4041     } else {
4042       return double_pack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0));
4043     }
4044   }
4045 }
4046 
frecpe(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding round)4047 LogicVRegister Simulator::frecpe(VectorFormat vform, LogicVRegister dst,
4048                                  const LogicVRegister& src, FPRounding round) {
4049   dst.ClearForWrite(vform);
4050   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
4051     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4052       float input = src.Float<float>(i);
4053       dst.SetFloat(i, FPRecipEstimate<float>(input, round));
4054     }
4055   } else {
4056     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
4057     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4058       double input = src.Float<double>(i);
4059       dst.SetFloat(i, FPRecipEstimate<double>(input, round));
4060     }
4061   }
4062   return dst;
4063 }
4064 
ursqrte(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4065 LogicVRegister Simulator::ursqrte(VectorFormat vform, LogicVRegister dst,
4066                                   const LogicVRegister& src) {
4067   dst.ClearForWrite(vform);
4068   uint64_t operand;
4069   uint32_t result;
4070   double dp_operand, dp_result;
4071   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4072     operand = src.Uint(vform, i);
4073     if (operand <= 0x3FFFFFFF) {
4074       result = 0xFFFFFFFF;
4075     } else {
4076       dp_operand = operand * std::pow(2.0, -32);
4077       dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31);
4078       result = static_cast<uint32_t>(dp_result);
4079     }
4080     dst.SetUint(vform, i, result);
4081   }
4082   return dst;
4083 }
4084 
4085 // Based on reference C function recip_estimate from ARM ARM.
recip_estimate(double a)4086 double Simulator::recip_estimate(double a) {
4087   int q, s;
4088   double r;
4089   q = static_cast<int>(a * 512.0);
4090   r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0);
4091   s = static_cast<int>(256.0 * r + 0.5);
4092   return static_cast<double>(s) / 256.0;
4093 }
4094 
urecpe(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4095 LogicVRegister Simulator::urecpe(VectorFormat vform, LogicVRegister dst,
4096                                  const LogicVRegister& src) {
4097   dst.ClearForWrite(vform);
4098   uint64_t operand;
4099   uint32_t result;
4100   double dp_operand, dp_result;
4101   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4102     operand = src.Uint(vform, i);
4103     if (operand <= 0x7FFFFFFF) {
4104       result = 0xFFFFFFFF;
4105     } else {
4106       dp_operand = operand * std::pow(2.0, -32);
4107       dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31);
4108       result = static_cast<uint32_t>(dp_result);
4109     }
4110     dst.SetUint(vform, i, result);
4111   }
4112   return dst;
4113 }
4114 
4115 template <typename T>
frecpx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4116 LogicVRegister Simulator::frecpx(VectorFormat vform, LogicVRegister dst,
4117                                  const LogicVRegister& src) {
4118   dst.ClearForWrite(vform);
4119   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4120     T op = src.Float<T>(i);
4121     T result;
4122     if (std::isnan(op)) {
4123       result = FPProcessNaN(op);
4124     } else {
4125       int exp;
4126       uint32_t sign;
4127       if (sizeof(T) == sizeof(float)) {
4128         sign = float_sign(op);
4129         exp = static_cast<int>(float_exp(op));
4130         exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0));
4131         result = float_pack(sign, exp, 0);
4132       } else {
4133         sign = double_sign(op);
4134         exp = static_cast<int>(double_exp(op));
4135         exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0));
4136         result = double_pack(sign, exp, 0);
4137       }
4138     }
4139     dst.SetFloat(i, result);
4140   }
4141   return dst;
4142 }
4143 
frecpx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4144 LogicVRegister Simulator::frecpx(VectorFormat vform, LogicVRegister dst,
4145                                  const LogicVRegister& src) {
4146   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
4147     frecpx<float>(vform, dst, src);
4148   } else {
4149     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
4150     frecpx<double>(vform, dst, src);
4151   }
4152   return dst;
4153 }
4154 
scvtf(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int fbits,FPRounding round)4155 LogicVRegister Simulator::scvtf(VectorFormat vform, LogicVRegister dst,
4156                                 const LogicVRegister& src, int fbits,
4157                                 FPRounding round) {
4158   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4159     if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
4160       float result = FixedToFloat(src.Int(kFormatS, i), fbits, round);
4161       dst.SetFloat<float>(i, result);
4162     } else {
4163       DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
4164       double result = FixedToDouble(src.Int(kFormatD, i), fbits, round);
4165       dst.SetFloat<double>(i, result);
4166     }
4167   }
4168   return dst;
4169 }
4170 
ucvtf(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int fbits,FPRounding round)4171 LogicVRegister Simulator::ucvtf(VectorFormat vform, LogicVRegister dst,
4172                                 const LogicVRegister& src, int fbits,
4173                                 FPRounding round) {
4174   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4175     if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
4176       float result = UFixedToFloat(src.Uint(kFormatS, i), fbits, round);
4177       dst.SetFloat<float>(i, result);
4178     } else {
4179       DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
4180       double result = UFixedToDouble(src.Uint(kFormatD, i), fbits, round);
4181       dst.SetFloat<double>(i, result);
4182     }
4183   }
4184   return dst;
4185 }
4186 
4187 }  // namespace internal
4188 }  // namespace v8
4189 
4190 #endif  // USE_SIMULATOR
4191