1 /* This file is part of the dynarmic project.
2  * Copyright (c) 2018 MerryMage
3  * SPDX-License-Identifier: 0BSD
4  */
5 
6 #include "common/fp/rounding_mode.h"
7 #include "frontend/A64/translate/impl/impl.h"
8 
9 namespace Dynarmic::A64 {
10 namespace {
11 enum class ComparisonType {
12     EQ,
13     GE,
14     GT,
15     LE,
16     LT,
17 };
18 
CompareAgainstZero(TranslatorVisitor & v,bool Q,Imm<2> size,Vec Vn,Vec Vd,ComparisonType type)19 bool CompareAgainstZero(TranslatorVisitor& v, bool Q, Imm<2> size, Vec Vn, Vec Vd, ComparisonType type) {
20     if (size == 0b11 && !Q) {
21         return v.ReservedValue();
22     }
23 
24     const size_t esize = 8 << size.ZeroExtend();
25     const size_t datasize = Q ? 128 : 64;
26 
27     const IR::U128 operand = v.V(datasize, Vn);
28     const IR::U128 zero = v.ir.ZeroVector();
29     IR::U128 result = [&] {
30         switch (type) {
31         case ComparisonType::EQ:
32             return v.ir.VectorEqual(esize, operand, zero);
33         case ComparisonType::GE:
34             return v.ir.VectorGreaterEqualSigned(esize, operand, zero);
35         case ComparisonType::GT:
36             return v.ir.VectorGreaterSigned(esize, operand, zero);
37         case ComparisonType::LE:
38             return v.ir.VectorLessEqualSigned(esize, operand, zero);
39         case ComparisonType::LT:
40         default:
41             return v.ir.VectorLessSigned(esize, operand, zero);
42         }
43     }();
44 
45     if (datasize == 64) {
46         result = v.ir.VectorZeroUpper(result);
47     }
48 
49     v.V(datasize, Vd, result);
50     return true;
51 }
52 
FPCompareAgainstZero(TranslatorVisitor & v,bool Q,bool sz,Vec Vn,Vec Vd,ComparisonType type)53 bool FPCompareAgainstZero(TranslatorVisitor& v, bool Q, bool sz, Vec Vn, Vec Vd, ComparisonType type) {
54     if (sz && !Q) {
55         return v.ReservedValue();
56     }
57 
58     const size_t esize = sz ? 64 : 32;
59     const size_t datasize = Q ? 128 : 64;
60 
61     const IR::U128 operand = v.V(datasize, Vn);
62     const IR::U128 zero = v.ir.ZeroVector();
63     const IR::U128 result = [&] {
64         switch (type) {
65         case ComparisonType::EQ:
66             return v.ir.FPVectorEqual(esize, operand, zero);
67         case ComparisonType::GE:
68             return v.ir.FPVectorGreaterEqual(esize, operand, zero);
69         case ComparisonType::GT:
70             return v.ir.FPVectorGreater(esize, operand, zero);
71         case ComparisonType::LE:
72             return v.ir.FPVectorGreaterEqual(esize, zero, operand);
73         case ComparisonType::LT:
74             return v.ir.FPVectorGreater(esize, zero, operand);
75         }
76 
77         UNREACHABLE();
78     }();
79 
80     v.V(datasize, Vd, result);
81     return true;
82 }
83 
84 enum class Signedness {
85     Signed,
86     Unsigned
87 };
88 
IntegerConvertToFloat(TranslatorVisitor & v,bool Q,bool sz,Vec Vn,Vec Vd,Signedness signedness)89 bool IntegerConvertToFloat(TranslatorVisitor& v, bool Q, bool sz, Vec Vn, Vec Vd, Signedness signedness) {
90     if (sz && !Q) {
91         return v.ReservedValue();
92     }
93 
94     const size_t datasize = Q ? 128 : 64;
95     const size_t esize = sz ? 64 : 32;
96     const FP::RoundingMode rounding_mode = v.ir.current_location->FPCR().RMode();
97 
98     const IR::U128 operand = v.V(datasize, Vn);
99     const IR::U128 result = signedness == Signedness::Signed
100                           ? v.ir.FPVectorFromSignedFixed(esize, operand, 0, rounding_mode)
101                           : v.ir.FPVectorFromUnsignedFixed(esize, operand, 0, rounding_mode);
102 
103     v.V(datasize, Vd, result);
104     return true;
105 }
106 
FloatConvertToInteger(TranslatorVisitor & v,bool Q,bool sz,Vec Vn,Vec Vd,Signedness signedness,FP::RoundingMode rounding_mode)107 bool FloatConvertToInteger(TranslatorVisitor& v, bool Q, bool sz, Vec Vn, Vec Vd, Signedness signedness, FP::RoundingMode rounding_mode) {
108     if (sz && !Q) {
109         return v.ReservedValue();
110     }
111 
112     const size_t datasize = Q ? 128 : 64;
113     const size_t esize = sz ? 64 : 32;
114 
115     const IR::U128 operand = v.V(datasize, Vn);
116     const IR::U128 result = signedness == Signedness::Signed
117                           ? v.ir.FPVectorToSignedFixed(esize, operand, 0, rounding_mode)
118                           : v.ir.FPVectorToUnsignedFixed(esize, operand, 0, rounding_mode);
119 
120     v.V(datasize, Vd, result);
121     return true;
122 }
123 
FloatRoundToIntegral(TranslatorVisitor & v,bool Q,bool sz,Vec Vn,Vec Vd,FP::RoundingMode rounding_mode,bool exact)124 bool FloatRoundToIntegral(TranslatorVisitor& v, bool Q, bool sz, Vec Vn, Vec Vd, FP::RoundingMode rounding_mode, bool exact) {
125     if (sz && !Q) {
126         return v.ReservedValue();
127     }
128 
129     const size_t datasize = Q ? 128 : 64;
130     const size_t esize = sz ? 64 : 32;
131 
132     const IR::U128 operand = v.V(datasize, Vn);
133     const IR::U128 result = v.ir.FPVectorRoundInt(esize, operand, rounding_mode, exact);
134 
135     v.V(datasize, Vd, result);
136     return true;
137 }
138 
FloatRoundToIntegralHalfPrecision(TranslatorVisitor & v,bool Q,Vec Vn,Vec Vd,FP::RoundingMode rounding_mode,bool exact)139 bool FloatRoundToIntegralHalfPrecision(TranslatorVisitor& v, bool Q, Vec Vn, Vec Vd, FP::RoundingMode rounding_mode, bool exact) {
140     const size_t datasize = Q ? 128 : 64;
141     const size_t esize = 16;
142 
143     const IR::U128 operand = v.V(datasize, Vn);
144     const IR::U128 result = v.ir.FPVectorRoundInt(esize, operand, rounding_mode, exact);
145 
146     v.V(datasize, Vd, result);
147     return true;
148 }
149 
SaturatedNarrow(TranslatorVisitor & v,bool Q,Imm<2> size,Vec Vn,Vec Vd,IR::U128 (IR::IREmitter::* fn)(size_t,const IR::U128 &))150 bool SaturatedNarrow(TranslatorVisitor& v, bool Q, Imm<2> size, Vec Vn, Vec Vd, IR::U128 (IR::IREmitter::*fn)(size_t, const IR::U128&)) {
151     if (size == 0b11) {
152         return v.ReservedValue();
153     }
154 
155     const size_t esize = 8 << size.ZeroExtend<size_t>();
156     const size_t datasize = 64;
157     const size_t part = Q ? 1 : 0;
158 
159     const IR::U128 operand = v.V(2 * datasize, Vn);
160     const IR::U128 result = (v.ir.*fn)(2 * esize, operand);
161 
162     v.Vpart(datasize, Vd, part, result);
163     return true;
164 }
165 
166 enum class PairedAddLongExtraBehavior {
167     None,
168     Accumulate,
169 };
170 
PairedAddLong(TranslatorVisitor & v,bool Q,Imm<2> size,Vec Vn,Vec Vd,Signedness sign,PairedAddLongExtraBehavior behavior)171 bool PairedAddLong(TranslatorVisitor& v, bool Q, Imm<2> size, Vec Vn, Vec Vd, Signedness sign,
172                    PairedAddLongExtraBehavior behavior) {
173     if (size == 0b11) {
174         return v.ReservedValue();
175     }
176 
177     const size_t esize = 8 << size.ZeroExtend();
178     const size_t datasize = Q ? 128 : 64;
179 
180     const IR::U128 operand = v.V(datasize, Vn);
181     IR::U128 result = [&] {
182         if (sign == Signedness::Signed) {
183             return v.ir.VectorPairedAddSignedWiden(esize, operand);
184         }
185 
186         return v.ir.VectorPairedAddUnsignedWiden(esize, operand);
187     }();
188 
189     if (behavior == PairedAddLongExtraBehavior::Accumulate) {
190         result = v.ir.VectorAdd(esize * 2, v.V(datasize, Vd), result);
191     }
192 
193     if (datasize == 64) {
194         result = v.ir.VectorZeroUpper(result);
195     }
196 
197     v.V(datasize, Vd, result);
198     return true;
199 }
200 
201 } // Anonymous namespace
202 
CLS_asimd(bool Q,Imm<2> size,Vec Vn,Vec Vd)203 bool TranslatorVisitor::CLS_asimd(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
204     if (size == 0b11) {
205         return ReservedValue();
206     }
207 
208     const size_t esize = 8 << size.ZeroExtend();
209     const size_t datasize = Q ? 128 : 64;
210 
211     const IR::U128 operand = V(datasize, Vn);
212     const IR::U128 shifted = ir.VectorArithmeticShiftRight(esize, operand, static_cast<u8>(esize));
213     const IR::U128 xored = ir.VectorEor(operand, shifted);
214     const IR::U128 clz = ir.VectorCountLeadingZeros(esize, xored);
215     IR::U128 result = ir.VectorSub(esize, clz, ir.VectorBroadcast(esize, I(esize, 1)));
216 
217     if (datasize == 64) {
218         result = ir.VectorZeroUpper(result);
219     }
220 
221     V(datasize, Vd, result);
222     return true;
223 }
224 
CLZ_asimd(bool Q,Imm<2> size,Vec Vn,Vec Vd)225 bool TranslatorVisitor::CLZ_asimd(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
226     if (size == 0b11) {
227         return ReservedValue();
228     }
229 
230     const size_t esize = 8 << size.ZeroExtend();
231     const size_t datasize = Q ? 128 : 64;
232 
233     const IR::U128 operand = V(datasize, Vn);
234     IR::U128 result = ir.VectorCountLeadingZeros(esize, operand);
235 
236     if (datasize == 64) {
237         result = ir.VectorZeroUpper(result);
238     }
239 
240     V(datasize, Vd, result);
241     return true;
242 }
243 
CNT(bool Q,Imm<2> size,Vec Vn,Vec Vd)244 bool TranslatorVisitor::CNT(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
245     if (size != 0b00) {
246         return ReservedValue();
247     }
248     const size_t datasize = Q ? 128 : 64;
249 
250     const IR::U128 operand = V(datasize, Vn);
251     const IR::U128 result = ir.VectorPopulationCount(operand);
252 
253     V(datasize, Vd, result);
254     return true;
255 }
256 
CMGE_zero_2(bool Q,Imm<2> size,Vec Vn,Vec Vd)257 bool TranslatorVisitor::CMGE_zero_2(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
258     return CompareAgainstZero(*this, Q, size, Vn, Vd, ComparisonType::GE);
259 }
260 
CMGT_zero_2(bool Q,Imm<2> size,Vec Vn,Vec Vd)261 bool TranslatorVisitor::CMGT_zero_2(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
262     return CompareAgainstZero(*this, Q, size, Vn, Vd, ComparisonType::GT);
263 }
264 
CMEQ_zero_2(bool Q,Imm<2> size,Vec Vn,Vec Vd)265 bool TranslatorVisitor::CMEQ_zero_2(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
266     return CompareAgainstZero(*this, Q, size, Vn, Vd, ComparisonType::EQ);
267 }
268 
CMLE_2(bool Q,Imm<2> size,Vec Vn,Vec Vd)269 bool TranslatorVisitor::CMLE_2(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
270     return CompareAgainstZero(*this, Q, size, Vn, Vd, ComparisonType::LE);
271 }
272 
CMLT_2(bool Q,Imm<2> size,Vec Vn,Vec Vd)273 bool TranslatorVisitor::CMLT_2(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
274     return CompareAgainstZero(*this, Q, size, Vn, Vd, ComparisonType::LT);
275 }
276 
ABS_2(bool Q,Imm<2> size,Vec Vn,Vec Vd)277 bool TranslatorVisitor::ABS_2(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
278     if (!Q && size == 0b11) {
279         return ReservedValue();
280     }
281 
282     const size_t datasize = Q ? 128 : 64;
283     const size_t esize = 8 << size.ZeroExtend();
284 
285     const IR::U128 data = V(datasize, Vn);
286     const IR::U128 result = ir.VectorAbs(esize, data);
287 
288     V(datasize, Vd, result);
289     return true;
290 }
291 
XTN(bool Q,Imm<2> size,Vec Vn,Vec Vd)292 bool TranslatorVisitor::XTN(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
293     if (size == 0b11) {
294         return ReservedValue();
295     }
296     const size_t esize = 8 << size.ZeroExtend<size_t>();
297     const size_t datasize = 64;
298     const size_t part = Q ? 1 : 0;
299 
300     const IR::U128 operand = V(2 * datasize, Vn);
301     const IR::U128 result = ir.VectorNarrow(2 * esize, operand);
302 
303     Vpart(datasize, Vd, part, result);
304     return true;
305 }
306 
FABS_1(bool Q,Vec Vn,Vec Vd)307 bool TranslatorVisitor::FABS_1(bool Q, Vec Vn, Vec Vd) {
308     const size_t datasize = Q ? 128 : 64;
309     const size_t esize = 16;
310 
311     const IR::U128 operand = V(datasize, Vn);
312     const IR::U128 result = ir.FPVectorAbs(esize, operand);
313 
314     V(datasize, Vd, result);
315     return true;
316 }
317 
FABS_2(bool Q,bool sz,Vec Vn,Vec Vd)318 bool TranslatorVisitor::FABS_2(bool Q, bool sz, Vec Vn, Vec Vd) {
319     if (sz && !Q) {
320         return ReservedValue();
321     }
322 
323     const size_t datasize = Q ? 128 : 64;
324     const size_t esize = sz ? 64 : 32;
325 
326     const IR::U128 operand = V(datasize, Vn);
327     const IR::U128 result = ir.FPVectorAbs(esize, operand);
328 
329     V(datasize, Vd, result);
330     return true;
331 }
332 
FCMEQ_zero_3(bool Q,Vec Vn,Vec Vd)333 bool TranslatorVisitor::FCMEQ_zero_3(bool Q, Vec Vn, Vec Vd) {
334     const size_t datasize = Q ? 128 : 64;
335 
336     const IR::U128 operand = V(datasize, Vn);
337     const IR::U128 zero = ir.ZeroVector();
338     const IR::U128 result = ir.FPVectorEqual(16, operand, zero);
339 
340     V(datasize, Vd, result);
341     return true;
342 }
343 
FCMEQ_zero_4(bool Q,bool sz,Vec Vn,Vec Vd)344 bool TranslatorVisitor::FCMEQ_zero_4(bool Q, bool sz, Vec Vn, Vec Vd) {
345     return FPCompareAgainstZero(*this, Q, sz, Vn, Vd, ComparisonType::EQ);
346 }
347 
FCMGE_zero_4(bool Q,bool sz,Vec Vn,Vec Vd)348 bool TranslatorVisitor::FCMGE_zero_4(bool Q, bool sz, Vec Vn, Vec Vd) {
349     return FPCompareAgainstZero(*this, Q, sz, Vn, Vd, ComparisonType::GE);
350 }
351 
FCMGT_zero_4(bool Q,bool sz,Vec Vn,Vec Vd)352 bool TranslatorVisitor::FCMGT_zero_4(bool Q, bool sz, Vec Vn, Vec Vd) {
353     return FPCompareAgainstZero(*this, Q, sz, Vn, Vd, ComparisonType::GT);
354 }
355 
FCMLE_4(bool Q,bool sz,Vec Vn,Vec Vd)356 bool TranslatorVisitor::FCMLE_4(bool Q, bool sz, Vec Vn, Vec Vd) {
357     return FPCompareAgainstZero(*this, Q, sz, Vn, Vd, ComparisonType::LE);
358 }
359 
FCMLT_4(bool Q,bool sz,Vec Vn,Vec Vd)360 bool TranslatorVisitor::FCMLT_4(bool Q, bool sz, Vec Vn, Vec Vd) {
361     return FPCompareAgainstZero(*this, Q, sz, Vn, Vd, ComparisonType::LT);
362 }
363 
FCVTL(bool Q,bool sz,Vec Vn,Vec Vd)364 bool TranslatorVisitor::FCVTL(bool Q, bool sz, Vec Vn, Vec Vd) {
365     const size_t esize = sz ? 32 : 16;
366     const size_t datasize = 64;
367     const size_t num_elements = datasize / esize;
368 
369     const IR::U128 part = Vpart(64, Vn, Q);
370     const auto rounding_mode = ir.current_location->FPCR().RMode();
371     IR::U128 result = ir.ZeroVector();
372 
373     for (size_t i = 0; i < num_elements; i++) {
374         IR::U16U32U64 element = ir.VectorGetElement(esize, part, i);
375 
376         if (esize == 16) {
377             element = ir.FPHalfToSingle(element, rounding_mode);
378         } else if (esize == 32) {
379             element = ir.FPSingleToDouble(element, rounding_mode);
380         }
381 
382         result = ir.VectorSetElement(2 * esize, result, i, element);
383     }
384 
385     V(128, Vd, result);
386     return true;
387 }
388 
FCVTN(bool Q,bool sz,Vec Vn,Vec Vd)389 bool TranslatorVisitor::FCVTN(bool Q, bool sz, Vec Vn, Vec Vd) {
390     const size_t datasize = 64;
391     const size_t esize = sz ? 32 : 16;
392     const size_t num_elements = datasize / esize;
393 
394     const IR::U128 operand = V(128, Vn);
395     const auto rounding_mode = ir.current_location->FPCR().RMode();
396     IR::U128 result = ir.ZeroVector();
397 
398     for (size_t i = 0; i < num_elements; i++) {
399         IR::U16U32U64 element = ir.VectorGetElement(2 * esize, operand, i);
400 
401         if (esize == 16) {
402             element = ir.FPSingleToHalf(element, rounding_mode);
403         } else if (esize == 32) {
404             element = ir.FPDoubleToSingle(element, rounding_mode);
405         }
406 
407         result = ir.VectorSetElement(esize, result, i, element);
408     }
409 
410     Vpart(datasize, Vd, Q, result);
411     return true;
412 }
413 
FCVTNS_4(bool Q,bool sz,Vec Vn,Vec Vd)414 bool TranslatorVisitor::FCVTNS_4(bool Q, bool sz, Vec Vn, Vec Vd) {
415     return FloatConvertToInteger(*this, Q, sz, Vn, Vd, Signedness::Signed, FP::RoundingMode::ToNearest_TieEven);
416 }
417 
FCVTMS_4(bool Q,bool sz,Vec Vn,Vec Vd)418 bool TranslatorVisitor::FCVTMS_4(bool Q, bool sz, Vec Vn, Vec Vd) {
419     return FloatConvertToInteger(*this, Q, sz, Vn, Vd, Signedness::Signed, FP::RoundingMode::TowardsMinusInfinity);
420 }
421 
FCVTAS_4(bool Q,bool sz,Vec Vn,Vec Vd)422 bool TranslatorVisitor::FCVTAS_4(bool Q, bool sz, Vec Vn, Vec Vd) {
423     return FloatConvertToInteger(*this, Q, sz, Vn, Vd, Signedness::Signed, FP::RoundingMode::ToNearest_TieAwayFromZero);
424 }
425 
FCVTPS_4(bool Q,bool sz,Vec Vn,Vec Vd)426 bool TranslatorVisitor::FCVTPS_4(bool Q, bool sz, Vec Vn, Vec Vd) {
427     return FloatConvertToInteger(*this, Q, sz, Vn, Vd, Signedness::Signed, FP::RoundingMode::TowardsPlusInfinity);
428 }
429 
FCVTXN_2(bool Q,bool sz,Vec Vn,Vec Vd)430 bool TranslatorVisitor::FCVTXN_2(bool Q, bool sz, Vec Vn, Vec Vd) {
431     if (!sz) {
432         return UnallocatedEncoding();
433     }
434 
435     const size_t part = Q ? 1 : 0;
436     const auto operand = ir.GetQ(Vn);
437     auto result = ir.ZeroVector();
438 
439     for (size_t e = 0; e < 2; ++e) {
440         const IR::U64 element = ir.VectorGetElement(64, operand, e);
441         const IR::U32 converted = ir.FPDoubleToSingle(element, FP::RoundingMode::ToOdd);
442 
443         result = ir.VectorSetElement(32, result, e, converted);
444     }
445 
446     Vpart(64, Vd, part, result);
447     return true;
448 }
449 
FCVTZS_int_4(bool Q,bool sz,Vec Vn,Vec Vd)450 bool TranslatorVisitor::FCVTZS_int_4(bool Q, bool sz, Vec Vn, Vec Vd) {
451     return FloatConvertToInteger(*this, Q, sz, Vn, Vd, Signedness::Signed, FP::RoundingMode::TowardsZero);
452 }
453 
FCVTNU_4(bool Q,bool sz,Vec Vn,Vec Vd)454 bool TranslatorVisitor::FCVTNU_4(bool Q, bool sz, Vec Vn, Vec Vd) {
455     return FloatConvertToInteger(*this, Q, sz, Vn, Vd, Signedness::Unsigned, FP::RoundingMode::ToNearest_TieEven);
456 }
457 
FCVTMU_4(bool Q,bool sz,Vec Vn,Vec Vd)458 bool TranslatorVisitor::FCVTMU_4(bool Q, bool sz, Vec Vn, Vec Vd) {
459     return FloatConvertToInteger(*this, Q, sz, Vn, Vd, Signedness::Unsigned, FP::RoundingMode::TowardsMinusInfinity);
460 }
461 
FCVTAU_4(bool Q,bool sz,Vec Vn,Vec Vd)462 bool TranslatorVisitor::FCVTAU_4(bool Q, bool sz, Vec Vn, Vec Vd) {
463     return FloatConvertToInteger(*this, Q, sz, Vn, Vd, Signedness::Unsigned, FP::RoundingMode::ToNearest_TieAwayFromZero);
464 }
465 
FCVTPU_4(bool Q,bool sz,Vec Vn,Vec Vd)466 bool TranslatorVisitor::FCVTPU_4(bool Q, bool sz, Vec Vn, Vec Vd) {
467     return FloatConvertToInteger(*this, Q, sz, Vn, Vd, Signedness::Unsigned, FP::RoundingMode::TowardsPlusInfinity);
468 }
469 
FCVTZU_int_4(bool Q,bool sz,Vec Vn,Vec Vd)470 bool TranslatorVisitor::FCVTZU_int_4(bool Q, bool sz, Vec Vn, Vec Vd) {
471     return FloatConvertToInteger(*this, Q, sz, Vn, Vd, Signedness::Unsigned, FP::RoundingMode::TowardsZero);
472 }
473 
FRINTN_1(bool Q,Vec Vn,Vec Vd)474 bool TranslatorVisitor::FRINTN_1(bool Q, Vec Vn, Vec Vd) {
475     return FloatRoundToIntegralHalfPrecision(*this, Q, Vn, Vd, FP::RoundingMode::ToNearest_TieEven, false);
476 }
477 
FRINTN_2(bool Q,bool sz,Vec Vn,Vec Vd)478 bool TranslatorVisitor::FRINTN_2(bool Q, bool sz, Vec Vn, Vec Vd) {
479     return FloatRoundToIntegral(*this, Q, sz, Vn, Vd, FP::RoundingMode::ToNearest_TieEven, false);
480 }
481 
FRINTM_1(bool Q,Vec Vn,Vec Vd)482 bool TranslatorVisitor::FRINTM_1(bool Q, Vec Vn, Vec Vd) {
483     return FloatRoundToIntegralHalfPrecision(*this, Q, Vn, Vd, FP::RoundingMode::TowardsMinusInfinity, false);
484 }
485 
FRINTM_2(bool Q,bool sz,Vec Vn,Vec Vd)486 bool TranslatorVisitor::FRINTM_2(bool Q, bool sz, Vec Vn, Vec Vd) {
487     return FloatRoundToIntegral(*this, Q, sz, Vn, Vd, FP::RoundingMode::TowardsMinusInfinity, false);
488 }
489 
FRINTP_1(bool Q,Vec Vn,Vec Vd)490 bool TranslatorVisitor::FRINTP_1(bool Q, Vec Vn, Vec Vd) {
491     return FloatRoundToIntegralHalfPrecision(*this, Q, Vn, Vd, FP::RoundingMode::TowardsPlusInfinity, false);
492 }
493 
FRINTP_2(bool Q,bool sz,Vec Vn,Vec Vd)494 bool TranslatorVisitor::FRINTP_2(bool Q, bool sz, Vec Vn, Vec Vd) {
495     return FloatRoundToIntegral(*this, Q, sz, Vn, Vd, FP::RoundingMode::TowardsPlusInfinity, false);
496 }
497 
FRINTZ_1(bool Q,Vec Vn,Vec Vd)498 bool TranslatorVisitor::FRINTZ_1(bool Q, Vec Vn, Vec Vd) {
499     return FloatRoundToIntegralHalfPrecision(*this, Q, Vn, Vd, FP::RoundingMode::TowardsZero, false);
500 }
501 
FRINTZ_2(bool Q,bool sz,Vec Vn,Vec Vd)502 bool TranslatorVisitor::FRINTZ_2(bool Q, bool sz, Vec Vn, Vec Vd) {
503     return FloatRoundToIntegral(*this, Q, sz, Vn, Vd, FP::RoundingMode::TowardsZero, false);
504 }
505 
FRINTA_1(bool Q,Vec Vn,Vec Vd)506 bool TranslatorVisitor::FRINTA_1(bool Q, Vec Vn, Vec Vd) {
507     return FloatRoundToIntegralHalfPrecision(*this, Q, Vn, Vd, FP::RoundingMode::ToNearest_TieAwayFromZero, false);
508 }
509 
FRINTA_2(bool Q,bool sz,Vec Vn,Vec Vd)510 bool TranslatorVisitor::FRINTA_2(bool Q, bool sz, Vec Vn, Vec Vd) {
511     return FloatRoundToIntegral(*this, Q, sz, Vn, Vd, FP::RoundingMode::ToNearest_TieAwayFromZero, false);
512 }
513 
FRINTX_1(bool Q,Vec Vn,Vec Vd)514 bool TranslatorVisitor::FRINTX_1(bool Q, Vec Vn, Vec Vd) {
515     return FloatRoundToIntegralHalfPrecision(*this, Q, Vn, Vd, ir.current_location->FPCR().RMode(), true);
516 }
517 
FRINTX_2(bool Q,bool sz,Vec Vn,Vec Vd)518 bool TranslatorVisitor::FRINTX_2(bool Q, bool sz, Vec Vn, Vec Vd) {
519     return FloatRoundToIntegral(*this, Q, sz, Vn, Vd, ir.current_location->FPCR().RMode(), true);
520 }
521 
FRINTI_1(bool Q,Vec Vn,Vec Vd)522 bool TranslatorVisitor::FRINTI_1(bool Q, Vec Vn, Vec Vd) {
523     return FloatRoundToIntegralHalfPrecision(*this, Q, Vn, Vd, ir.current_location->FPCR().RMode(), false);
524 }
525 
FRINTI_2(bool Q,bool sz,Vec Vn,Vec Vd)526 bool TranslatorVisitor::FRINTI_2(bool Q, bool sz, Vec Vn, Vec Vd) {
527     return FloatRoundToIntegral(*this, Q, sz, Vn, Vd,ir.current_location->FPCR().RMode(), false);
528 }
529 
FRECPE_3(bool Q,Vec Vn,Vec Vd)530 bool TranslatorVisitor::FRECPE_3(bool Q, Vec Vn, Vec Vd) {
531     const size_t datasize = Q ? 128 : 64;
532     const size_t esize = 16;
533 
534     const IR::U128 operand = V(datasize, Vn);
535     const IR::U128 result = ir.FPVectorRecipEstimate(esize, operand);
536 
537     V(datasize, Vd, result);
538     return true;
539 }
540 
FRECPE_4(bool Q,bool sz,Vec Vn,Vec Vd)541 bool TranslatorVisitor::FRECPE_4(bool Q, bool sz, Vec Vn, Vec Vd) {
542     if (sz && !Q) {
543         return ReservedValue();
544     }
545 
546     const size_t datasize = Q ? 128 : 64;
547     const size_t esize = sz ? 64 : 32;
548 
549     const IR::U128 operand = V(datasize, Vn);
550     const IR::U128 result = ir.FPVectorRecipEstimate(esize, operand);
551 
552     V(datasize, Vd, result);
553     return true;
554 }
555 
FSQRT_2(bool Q,bool sz,Vec Vn,Vec Vd)556 bool TranslatorVisitor::FSQRT_2(bool Q, bool sz, Vec Vn, Vec Vd) {
557     if (sz && !Q) {
558         return ReservedValue();
559     }
560 
561     const size_t datasize = Q ? 128 : 64;
562     const size_t esize = sz ? 64 : 32;
563 
564     const IR::U128 operand = V(datasize, Vn);
565     const IR::U128 result = ir.FPVectorSqrt(esize, operand);
566 
567     V(datasize, Vd, result);
568     return true;
569 }
570 
FRSQRTE_3(bool Q,Vec Vn,Vec Vd)571 bool TranslatorVisitor::FRSQRTE_3(bool Q, Vec Vn, Vec Vd) {
572     const size_t datasize = Q ? 128 : 64;
573     const size_t esize = 16;
574 
575     const IR::U128 operand = V(datasize, Vn);
576     const IR::U128 result = ir.FPVectorRSqrtEstimate(esize, operand);
577 
578     V(datasize, Vd, result);
579     return true;
580 }
581 
FRSQRTE_4(bool Q,bool sz,Vec Vn,Vec Vd)582 bool TranslatorVisitor::FRSQRTE_4(bool Q, bool sz, Vec Vn, Vec Vd) {
583     if (sz && !Q) {
584         return ReservedValue();
585     }
586 
587     const size_t datasize = Q ? 128 : 64;
588     const size_t esize = sz ? 64 : 32;
589 
590     const IR::U128 operand = V(datasize, Vn);
591     const IR::U128 result = ir.FPVectorRSqrtEstimate(esize, operand);
592 
593     V(datasize, Vd, result);
594     return true;
595 }
596 
FNEG_1(bool Q,Vec Vn,Vec Vd)597 bool TranslatorVisitor::FNEG_1(bool Q, Vec Vn, Vec Vd) {
598     const size_t datasize = Q ? 128 : 64;
599 
600     const IR::U128 operand = V(datasize, Vn);
601     const IR::U128 mask = ir.VectorBroadcast(64, I(64, 0x8000800080008000));
602     const IR::U128 result = ir.VectorEor(operand, mask);
603 
604     V(datasize, Vd, result);
605     return true;
606 }
607 
FNEG_2(bool Q,bool sz,Vec Vn,Vec Vd)608 bool TranslatorVisitor::FNEG_2(bool Q, bool sz, Vec Vn, Vec Vd) {
609     if (sz && !Q) {
610         return ReservedValue();
611     }
612 
613     const size_t datasize = Q ? 128 : 64;
614     const size_t esize = sz ? 64 : 32;
615     const size_t mask_value = esize == 64 ? 0x8000000000000000 : 0x8000000080000000;
616 
617     const IR::U128 operand = V(datasize, Vn);
618     const IR::U128 mask = Q ? ir.VectorBroadcast(esize, I(esize, mask_value)) : ir.VectorBroadcastLower(esize, I(esize, mask_value));
619     const IR::U128 result = ir.VectorEor(operand, mask);
620 
621     V(datasize, Vd, result);
622     return true;
623 }
624 
NEG_2(bool Q,Imm<2> size,Vec Vn,Vec Vd)625 bool TranslatorVisitor::NEG_2(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
626     if (size == 0b11 && !Q) {
627         return ReservedValue();
628     }
629     const size_t esize = 8 << size.ZeroExtend<size_t>();
630     const size_t datasize = Q ? 128 : 64;
631 
632     const IR::U128 operand = V(datasize, Vn);
633     const IR::U128 zero = ir.ZeroVector();
634     const IR::U128 result = ir.VectorSub(esize, zero, operand);
635 
636     V(datasize, Vd, result);
637     return true;
638 }
639 
SQXTUN_2(bool Q,Imm<2> size,Vec Vn,Vec Vd)640 bool TranslatorVisitor::SQXTUN_2(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
641     return SaturatedNarrow(*this, Q, size, Vn, Vd, &IR::IREmitter::VectorSignedSaturatedNarrowToUnsigned);
642 }
643 
SQXTN_2(bool Q,Imm<2> size,Vec Vn,Vec Vd)644 bool TranslatorVisitor::SQXTN_2(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
645     return SaturatedNarrow(*this, Q, size, Vn, Vd, &IR::IREmitter::VectorSignedSaturatedNarrowToSigned);
646 }
647 
UQXTN_2(bool Q,Imm<2> size,Vec Vn,Vec Vd)648 bool TranslatorVisitor::UQXTN_2(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
649     return SaturatedNarrow(*this, Q, size, Vn, Vd, &IR::IREmitter::VectorUnsignedSaturatedNarrow);
650 }
651 
NOT(bool Q,Vec Vn,Vec Vd)652 bool TranslatorVisitor::NOT(bool Q, Vec Vn, Vec Vd) {
653     const size_t datasize = Q ? 128 : 64;
654 
655     const IR::U128 operand = V(datasize, Vn);
656     IR::U128 result = ir.VectorNot(operand);
657 
658     if (datasize == 64) {
659         result = ir.VectorZeroUpper(result);
660     }
661 
662     V(datasize, Vd, result);
663     return true;
664 }
665 
RBIT_asimd(bool Q,Vec Vn,Vec Vd)666 bool TranslatorVisitor::RBIT_asimd(bool Q, Vec Vn, Vec Vd) {
667     const size_t datasize = Q ? 128 : 64;
668 
669     const IR::U128 data = V(datasize, Vn);
670     const IR::U128 result = ir.VectorReverseBits(data);
671 
672     V(datasize, Vd, result);
673     return true;
674 }
675 
REV16_asimd(bool Q,Imm<2> size,Vec Vn,Vec Vd)676 bool TranslatorVisitor::REV16_asimd(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
677     if (size != 0) {
678         return UnallocatedEncoding();
679     }
680 
681     const size_t datasize = Q ? 128 : 64;
682     constexpr size_t esize = 16;
683 
684     const IR::U128 data = V(datasize, Vn);
685     const IR::U128 result = ir.VectorOr(ir.VectorLogicalShiftRight(esize, data, 8),
686                                         ir.VectorLogicalShiftLeft(esize, data, 8));
687 
688     V(datasize, Vd, result);
689     return true;
690 }
691 
REV32_asimd(bool Q,Imm<2> size,Vec Vn,Vec Vd)692 bool TranslatorVisitor::REV32_asimd(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
693     const u32 zext_size = size.ZeroExtend();
694 
695     if (zext_size > 1) {
696         return UnallocatedEncoding();
697     }
698 
699     const size_t datasize = Q ? 128 : 64;
700     const size_t esize = 16 << zext_size;
701     const u8 shift = static_cast<u8>(8 << zext_size);
702 
703     const IR::U128 data = V(datasize, Vn);
704 
705     // TODO: Consider factoring byte swapping code out into its own opcode.
706     //       Technically the rest of the following code can be a PSHUFB
707     //       in the presence of SSSE3.
708     IR::U128 result = ir.VectorOr(ir.VectorLogicalShiftRight(esize, data, shift),
709                                   ir.VectorLogicalShiftLeft(esize, data, shift));
710 
711     // If dealing with 8-bit elements we'll need to shuffle the bytes in each halfword
712     // e.g. Assume the following numbers point out bytes in a 32-bit word, we're essentially
713     //      changing [3, 2, 1, 0] to [2, 3, 0, 1]
714     if (zext_size == 0) {
715         result = ir.VectorShuffleLowHalfwords(result, 0b10110001);
716         result = ir.VectorShuffleHighHalfwords(result, 0b10110001);
717     }
718 
719     V(datasize, Vd, result);
720     return true;
721 }
722 
REV64_asimd(bool Q,Imm<2> size,Vec Vn,Vec Vd)723 bool TranslatorVisitor::REV64_asimd(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
724     const u32 zext_size = size.ZeroExtend();
725 
726     if (zext_size >= 3) {
727         return UnallocatedEncoding();
728     }
729 
730     const size_t datasize = Q ? 128 : 64;
731     const size_t esize = 16 << zext_size;
732     const u8 shift = static_cast<u8>(8 << zext_size);
733 
734     const IR::U128 data = V(datasize, Vn);
735 
736     // TODO: Consider factoring byte swapping code out into its own opcode.
737     //       Technically the rest of the following code can be a PSHUFB
738     //       in the presence of SSSE3.
739     IR::U128 result = ir.VectorOr(ir.VectorLogicalShiftRight(esize, data, shift),
740                                   ir.VectorLogicalShiftLeft(esize, data, shift));
741 
742     switch (zext_size) {
743         case 0: // 8-bit elements
744             result = ir.VectorShuffleLowHalfwords(result, 0b00011011);
745             result = ir.VectorShuffleHighHalfwords(result, 0b00011011);
746             break;
747         case 1: // 16-bit elements
748             result = ir.VectorShuffleLowHalfwords(result, 0b01001110);
749             result = ir.VectorShuffleHighHalfwords(result, 0b01001110);
750             break;
751     }
752 
753     V(datasize, Vd, result);
754     return true;
755 }
756 
SQABS_2(bool Q,Imm<2> size,Vec Vn,Vec Vd)757 bool TranslatorVisitor::SQABS_2(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
758     if (size == 0b11 && !Q) {
759         return ReservedValue();
760     }
761 
762     const size_t esize = 8 << size.ZeroExtend();
763     const size_t datasize = Q ? 128 : 64;
764 
765     const IR::U128 operand = V(datasize, Vn);
766     const IR::U128 result = ir.VectorSignedSaturatedAbs(esize, operand);
767 
768     V(datasize, Vd, result);
769     return true;
770 }
771 
SQNEG_2(bool Q,Imm<2> size,Vec Vn,Vec Vd)772 bool TranslatorVisitor::SQNEG_2(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
773     if (size == 0b11 && !Q) {
774         return ReservedValue();
775     }
776 
777     const size_t esize = 8 << size.ZeroExtend();
778     const size_t datasize = Q ? 128 : 64;
779 
780     const IR::U128 operand = V(datasize, Vn);
781     const IR::U128 result = ir.VectorSignedSaturatedNeg(esize, operand);
782 
783     V(datasize, Vd, result);
784     return true;
785 }
786 
SUQADD_2(bool Q,Imm<2> size,Vec Vn,Vec Vd)787 bool TranslatorVisitor::SUQADD_2(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
788     if (size == 0b11 && !Q) {
789         return ReservedValue();
790     }
791 
792     const size_t esize = 8 << size.ZeroExtend();
793     const size_t datasize = Q ? 128 : 64;
794 
795     const IR::U128 operand1 = V(datasize, Vn);
796     const IR::U128 operand2 = V(datasize, Vd);
797     const IR::U128 result = ir.VectorSignedSaturatedAccumulateUnsigned(esize, operand1, operand2);
798 
799     V(datasize, Vd, result);
800     return true;
801 }
802 
USQADD_2(bool Q,Imm<2> size,Vec Vn,Vec Vd)803 bool TranslatorVisitor::USQADD_2(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
804     if (size == 0b11 && !Q) {
805         return ReservedValue();
806     }
807 
808     const size_t esize = 8 << size.ZeroExtend();
809     const size_t datasize = Q ? 128 : 64;
810 
811     const IR::U128 operand1 = V(datasize, Vn);
812     const IR::U128 operand2 = V(datasize, Vd);
813     const IR::U128 result = ir.VectorUnsignedSaturatedAccumulateSigned(esize, operand1, operand2);
814 
815     V(datasize, Vd, result);
816     return true;
817 }
818 
SADALP(bool Q,Imm<2> size,Vec Vn,Vec Vd)819 bool TranslatorVisitor::SADALP(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
820     return PairedAddLong(*this, Q, size, Vn, Vd, Signedness::Signed, PairedAddLongExtraBehavior::Accumulate);
821 }
822 
SADDLP(bool Q,Imm<2> size,Vec Vn,Vec Vd)823 bool TranslatorVisitor::SADDLP(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
824     return PairedAddLong(*this, Q, size, Vn, Vd, Signedness::Signed, PairedAddLongExtraBehavior::None);
825 }
826 
UADALP(bool Q,Imm<2> size,Vec Vn,Vec Vd)827 bool TranslatorVisitor::UADALP(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
828     return PairedAddLong(*this, Q, size, Vn, Vd, Signedness::Unsigned, PairedAddLongExtraBehavior::Accumulate);
829 }
830 
UADDLP(bool Q,Imm<2> size,Vec Vn,Vec Vd)831 bool TranslatorVisitor::UADDLP(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
832     return PairedAddLong(*this, Q, size, Vn, Vd, Signedness::Unsigned, PairedAddLongExtraBehavior::None);
833 }
834 
URECPE(bool Q,bool sz,Vec Vn,Vec Vd)835 bool TranslatorVisitor::URECPE(bool Q, bool sz, Vec Vn, Vec Vd) {
836     if (sz) {
837         return ReservedValue();
838     }
839 
840     const size_t datasize = Q ? 128 : 64;
841 
842     const IR::U128 operand = V(datasize, Vn);
843     const IR::U128 result = ir.VectorUnsignedRecipEstimate(operand);
844 
845     V(datasize, Vd, result);
846     return true;
847 }
848 
URSQRTE(bool Q,bool sz,Vec Vn,Vec Vd)849 bool TranslatorVisitor::URSQRTE(bool Q, bool sz, Vec Vn, Vec Vd) {
850     if (sz) {
851         return ReservedValue();
852     }
853 
854     const size_t datasize = Q ? 128 : 64;
855 
856     const IR::U128 operand = V(datasize, Vn);
857     const IR::U128 result = ir.VectorUnsignedRecipSqrtEstimate(operand);
858 
859     V(datasize, Vd, result);
860     return true;
861 }
862 
SCVTF_int_4(bool Q,bool sz,Vec Vn,Vec Vd)863 bool TranslatorVisitor::SCVTF_int_4(bool Q, bool sz, Vec Vn, Vec Vd) {
864     return IntegerConvertToFloat(*this, Q, sz, Vn, Vd, Signedness::Signed);
865 }
866 
UCVTF_int_4(bool Q,bool sz,Vec Vn,Vec Vd)867 bool TranslatorVisitor::UCVTF_int_4(bool Q, bool sz, Vec Vn, Vec Vd) {
868     return IntegerConvertToFloat(*this, Q, sz, Vn, Vd, Signedness::Unsigned);
869 }
870 
SHLL(bool Q,Imm<2> size,Vec Vn,Vec Vd)871 bool TranslatorVisitor::SHLL(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
872     if (size == 0b11) {
873         return ReservedValue();
874     }
875 
876     const size_t esize = 8 << size.ZeroExtend();
877 
878     const IR::U128 operand = ir.VectorZeroExtend(esize, Vpart(64, Vn, Q));
879     const IR::U128 result = ir.VectorLogicalShiftLeft(esize * 2, operand, static_cast<u8>(esize));
880 
881     V(128, Vd, result);
882     return true;
883 }
884 
885 } // namespace Dynarmic::A64
886