1 /* This file is part of the dynarmic project.
2  * Copyright (c) 2018 MerryMage
3  * SPDX-License-Identifier: 0BSD
4  */
5 
6 #include "frontend/A64/translate/impl/impl.h"
7 
8 namespace Dynarmic::A64 {
9 namespace {
10 
11 using ExtensionFunction = IR::U32 (IREmitter::*)(const IR::UAny&);
12 
DotProduct(TranslatorVisitor & v,bool Q,Imm<2> size,Vec Vm,Vec Vn,Vec Vd,ExtensionFunction extension)13 bool DotProduct(TranslatorVisitor& v, bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd,
14                 ExtensionFunction extension) {
15     if (size != 0b10) {
16         return v.ReservedValue();
17     }
18 
19     const size_t esize = 8 << size.ZeroExtend();
20     const size_t datasize = Q ? 128 : 64;
21     const size_t elements = datasize / esize;
22 
23     const IR::U128 operand1 = v.V(datasize, Vn);
24     const IR::U128 operand2 = v.V(datasize, Vm);
25     IR::U128 result = v.V(datasize, Vd);
26 
27     for (size_t i = 0; i < elements; i++) {
28         IR::U32 res_element = v.ir.Imm32(0);
29 
30         for (size_t j = 0; j < 4; j++) {
31             const IR::U32 elem1 = (v.ir.*extension)(v.ir.VectorGetElement(8, operand1, 4 * i + j));
32             const IR::U32 elem2 = (v.ir.*extension)(v.ir.VectorGetElement(8, operand2, 4 * i + j));
33 
34             res_element = v.ir.Add(res_element, v.ir.Mul(elem1, elem2));
35         }
36 
37         res_element = v.ir.Add(v.ir.VectorGetElement(32, result, i), res_element);
38         result = v.ir.VectorSetElement(32, result, i, res_element);
39     }
40 
41     v.V(datasize, Vd, result);
42     return true;
43 }
44 
45 } // Anonymous namespace
46 
SDOT_vec(bool Q,Imm<2> size,Vec Vm,Vec Vn,Vec Vd)47 bool TranslatorVisitor::SDOT_vec(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd) {
48     return DotProduct(*this, Q, size, Vm, Vn, Vd, &IREmitter::SignExtendToWord);
49 }
50 
UDOT_vec(bool Q,Imm<2> size,Vec Vm,Vec Vn,Vec Vd)51 bool TranslatorVisitor::UDOT_vec(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd) {
52     return DotProduct(*this, Q, size, Vm, Vn, Vd, &IREmitter::ZeroExtendToWord);
53 }
54 
FCMLA_vec(bool Q,Imm<2> size,Vec Vm,Imm<2> rot,Vec Vn,Vec Vd)55 bool TranslatorVisitor::FCMLA_vec(bool Q, Imm<2> size, Vec Vm, Imm<2> rot, Vec Vn, Vec Vd) {
56     if (size == 0) {
57         return ReservedValue();
58     }
59 
60     if (!Q && size == 0b11) {
61         return ReservedValue();
62     }
63 
64     const size_t esize = 8U << size.ZeroExtend();
65 
66     // TODO: Currently we don't support half-precision floating point
67     if (esize == 16) {
68         return InterpretThisInstruction();
69     }
70 
71     const size_t datasize = Q ? 128 : 64;
72     const size_t num_elements = datasize / esize;
73     const size_t num_iterations = num_elements / 2;
74 
75     const IR::U128 operand1 = V(datasize, Vn);
76     const IR::U128 operand2 = V(datasize, Vm);
77     const IR::U128 operand3 = V(datasize, Vd);
78     IR::U128 result = ir.ZeroVector();
79 
80     IR::U32U64 element1;
81     IR::U32U64 element2;
82     IR::U32U64 element3;
83     IR::U32U64 element4;
84     for (size_t e = 0; e < num_iterations; ++e) {
85         const size_t first = e * 2;
86         const size_t second = first + 1;
87 
88         switch (rot.ZeroExtend()) {
89         case 0b00: // 0 degrees
90             element1 = ir.VectorGetElement(esize, operand2, first);
91             element2 = ir.VectorGetElement(esize, operand1, first);
92             element3 = ir.VectorGetElement(esize, operand2, second);
93             element4 = ir.VectorGetElement(esize, operand1, first);
94             break;
95         case 0b01: // 90 degrees
96             element1 = ir.FPNeg(ir.VectorGetElement(esize, operand2, second));
97             element2 = ir.VectorGetElement(esize, operand1, second);
98             element3 = ir.VectorGetElement(esize, operand2, first);
99             element4 = ir.VectorGetElement(esize, operand1, second);
100             break;
101         case 0b10: // 180 degrees
102             element1 = ir.FPNeg(ir.VectorGetElement(esize, operand2, first));
103             element2 = ir.VectorGetElement(esize, operand1, first);
104             element3 = ir.FPNeg(ir.VectorGetElement(esize, operand2, second));
105             element4 = ir.VectorGetElement(esize, operand1, first);
106             break;
107         case 0b11: // 270 degrees
108             element1 = ir.VectorGetElement(esize, operand2, second);
109             element2 = ir.VectorGetElement(esize, operand1, second);
110             element3 = ir.FPNeg(ir.VectorGetElement(esize, operand2, first));
111             element4 = ir.VectorGetElement(esize, operand1, second);
112             break;
113         }
114 
115         const IR::U32U64 operand3_elem1 = ir.VectorGetElement(esize, operand3, first);
116         const IR::U32U64 operand3_elem2 = ir.VectorGetElement(esize, operand3, second);
117 
118         result = ir.VectorSetElement(esize, result, first, ir.FPMulAdd(operand3_elem1, element2, element1));
119         result = ir.VectorSetElement(esize, result, second, ir.FPMulAdd(operand3_elem2, element4, element3));
120     }
121 
122     ir.SetQ(Vd, result);
123     return true;
124 }
125 
FCADD_vec(bool Q,Imm<2> size,Vec Vm,Imm<1> rot,Vec Vn,Vec Vd)126 bool TranslatorVisitor::FCADD_vec(bool Q, Imm<2> size, Vec Vm, Imm<1> rot, Vec Vn, Vec Vd) {
127     if (size == 0) {
128         return ReservedValue();
129     }
130 
131     if (!Q && size == 0b11) {
132         return ReservedValue();
133     }
134 
135     const size_t esize = 8U << size.ZeroExtend();
136 
137     // TODO: Currently we don't support half-precision floating point
138     if (esize == 16) {
139         return InterpretThisInstruction();
140     }
141 
142     const size_t datasize = Q ? 128 : 64;
143     const size_t num_elements = datasize / esize;
144     const size_t num_iterations = num_elements / 2;
145 
146     const IR::U128 operand1 = V(datasize, Vn);
147     const IR::U128 operand2 = V(datasize, Vm);
148     IR::U128 result = ir.ZeroVector();
149 
150     IR::U32U64 element1;
151     IR::U32U64 element3;
152     for (size_t e = 0; e < num_iterations; ++e) {
153         const size_t first = e * 2;
154         const size_t second = first + 1;
155 
156         if (rot == 0) {
157             element1 = ir.FPNeg(ir.VectorGetElement(esize, operand2, second));
158             element3 = ir.VectorGetElement(esize, operand2, first);
159         } else if (rot == 1) {
160             element1 = ir.VectorGetElement(esize, operand2, second);
161             element3 = ir.FPNeg(ir.VectorGetElement(esize, operand2, first));
162         }
163 
164         const IR::U32U64 operand1_elem1 = ir.VectorGetElement(esize, operand1, first);
165         const IR::U32U64 operand1_elem3 = ir.VectorGetElement(esize, operand1, second);
166 
167         result = ir.VectorSetElement(esize, result, first, ir.FPAdd(operand1_elem1, element1));
168         result = ir.VectorSetElement(esize, result, second, ir.FPAdd(operand1_elem3, element3));
169     }
170 
171     ir.SetQ(Vd, result);
172     return true;
173 }
174 
175 } // namespace Dynarmic::A64
176