1 #include "Jitter_CodeGen_AArch32.h"
2
3 using namespace Jitter;
4
LoadMemory128AddressInRegister(CAArch32Assembler::REGISTER dstReg,CSymbol * symbol,uint32 offset)5 void CCodeGen_AArch32::LoadMemory128AddressInRegister(CAArch32Assembler::REGISTER dstReg, CSymbol* symbol, uint32 offset)
6 {
7 switch(symbol->m_type)
8 {
9 case SYM_RELATIVE128:
10 LoadRelative128AddressInRegister(dstReg, symbol, offset);
11 break;
12 case SYM_TEMPORARY128:
13 LoadTemporary128AddressInRegister(dstReg, symbol, offset);
14 break;
15 default:
16 assert(0);
17 break;
18 }
19 }
20
LoadRelative128AddressInRegister(CAArch32Assembler::REGISTER dstReg,CSymbol * symbol,uint32 offset)21 void CCodeGen_AArch32::LoadRelative128AddressInRegister(CAArch32Assembler::REGISTER dstReg, CSymbol* symbol, uint32 offset)
22 {
23 assert(symbol->m_type == SYM_RELATIVE128);
24
25 uint8 immediate = 0;
26 uint8 shiftAmount = 0;
27 if(!TryGetAluImmediateParams(symbol->m_valueLow + offset, immediate, shiftAmount))
28 {
29 throw std::runtime_error("Failed to build immediate for symbol.");
30 }
31 m_assembler.Add(dstReg, g_baseRegister, CAArch32Assembler::MakeImmediateAluOperand(immediate, shiftAmount));
32 }
33
LoadTemporary128AddressInRegister(CAArch32Assembler::REGISTER dstReg,CSymbol * symbol,uint32 offset)34 void CCodeGen_AArch32::LoadTemporary128AddressInRegister(CAArch32Assembler::REGISTER dstReg, CSymbol* symbol, uint32 offset)
35 {
36 assert(symbol->m_type == SYM_TEMPORARY128);
37
38 uint8 immediate = 0;
39 uint8 shiftAmount = 0;
40 if(!TryGetAluImmediateParams(symbol->m_stackLocation + m_stackLevel + offset, immediate, shiftAmount))
41 {
42 throw std::runtime_error("Failed to build immediate for symbol.");
43 }
44 m_assembler.Add(dstReg, CAArch32Assembler::rSP, CAArch32Assembler::MakeImmediateAluOperand(immediate, shiftAmount));
45 }
46
LoadTemporary256ElementAddressInRegister(CAArch32Assembler::REGISTER dstReg,CSymbol * symbol,uint32 offset)47 void CCodeGen_AArch32::LoadTemporary256ElementAddressInRegister(CAArch32Assembler::REGISTER dstReg, CSymbol* symbol, uint32 offset)
48 {
49 assert(symbol->m_type == SYM_TEMPORARY256);
50
51 uint8 immediate = 0;
52 uint8 shiftAmount = 0;
53 if(!TryGetAluImmediateParams(symbol->m_stackLocation + m_stackLevel + offset, immediate, shiftAmount))
54 {
55 throw std::runtime_error("Failed to build immediate for symbol.");
56 }
57 m_assembler.Add(dstReg, CAArch32Assembler::rSP, CAArch32Assembler::MakeImmediateAluOperand(immediate, shiftAmount));
58 }
59
60 template <typename MDOP>
Emit_Md_MemMem(const STATEMENT & statement)61 void CCodeGen_AArch32::Emit_Md_MemMem(const STATEMENT& statement)
62 {
63 auto dst = statement.dst->GetSymbol().get();
64 auto src1 = statement.src1->GetSymbol().get();
65
66 auto dstAddrReg = CAArch32Assembler::r0;
67 auto src1AddrReg = CAArch32Assembler::r1;
68 auto dstReg = CAArch32Assembler::q0;
69 auto src1Reg = CAArch32Assembler::q1;
70
71 LoadMemory128AddressInRegister(dstAddrReg, dst);
72 LoadMemory128AddressInRegister(src1AddrReg, src1);
73
74 m_assembler.Vld1_32x4(src1Reg, src1AddrReg);
75 ((m_assembler).*(MDOP::OpReg()))(dstReg, src1Reg);
76 m_assembler.Vst1_32x4(dstReg, dstAddrReg);
77 }
78
79 template <typename MDOP>
Emit_Md_MemMemMem(const STATEMENT & statement)80 void CCodeGen_AArch32::Emit_Md_MemMemMem(const STATEMENT& statement)
81 {
82 auto dst = statement.dst->GetSymbol().get();
83 auto src1 = statement.src1->GetSymbol().get();
84 auto src2 = statement.src2->GetSymbol().get();
85
86 auto dstAddrReg = CAArch32Assembler::r0;
87 auto src1AddrReg = CAArch32Assembler::r1;
88 auto src2AddrReg = CAArch32Assembler::r2;
89 auto dstReg = CAArch32Assembler::q0;
90 auto src1Reg = CAArch32Assembler::q1;
91 auto src2Reg = CAArch32Assembler::q2;
92
93 LoadMemory128AddressInRegister(dstAddrReg, dst);
94 LoadMemory128AddressInRegister(src1AddrReg, src1);
95 LoadMemory128AddressInRegister(src2AddrReg, src2);
96
97 m_assembler.Vld1_32x4(src1Reg, src1AddrReg);
98 m_assembler.Vld1_32x4(src2Reg, src2AddrReg);
99 ((m_assembler).*(MDOP::OpReg()))(dstReg, src1Reg, src2Reg);
100 m_assembler.Vst1_32x4(dstReg, dstAddrReg);
101 }
102
103 template <typename MDSHIFTOP>
Emit_Md_Shift_MemMemCst(const STATEMENT & statement)104 void CCodeGen_AArch32::Emit_Md_Shift_MemMemCst(const STATEMENT& statement)
105 {
106 auto dst = statement.dst->GetSymbol().get();
107 auto src1 = statement.src1->GetSymbol().get();
108 auto src2 = statement.src2->GetSymbol().get();
109
110 auto dstAddrReg = CAArch32Assembler::r0;
111 auto src1AddrReg = CAArch32Assembler::r1;
112 auto dstReg = CAArch32Assembler::q0;
113 auto src1Reg = CAArch32Assembler::q1;
114
115 LoadMemory128AddressInRegister(dstAddrReg, dst);
116 LoadMemory128AddressInRegister(src1AddrReg, src1);
117
118 m_assembler.Vld1_32x4(src1Reg, src1AddrReg);
119 ((m_assembler).*(MDSHIFTOP::OpReg()))(dstReg, src1Reg, src2->m_valueLow);
120 m_assembler.Vst1_32x4(dstReg, dstAddrReg);
121 }
122
123 template <uint32 condition>
Emit_Md_Test_VarMem(const STATEMENT & statement)124 void CCodeGen_AArch32::Emit_Md_Test_VarMem(const STATEMENT& statement)
125 {
126 auto dst = statement.dst->GetSymbol().get();
127 auto src1 = statement.src1->GetSymbol().get();
128
129 auto src1AddrReg = CAArch32Assembler::r0;
130 auto src1Reg = CAArch32Assembler::q0;
131
132 LoadMemory128AddressInRegister(src1AddrReg, src1);
133 m_assembler.Vld1_32x4(src1Reg, src1AddrReg);
134
135 auto dstReg = PrepareSymbolRegisterDef(dst, CAArch32Assembler::r0);
136
137 static CAArch32Assembler::REGISTER regs[4] =
138 {
139 CAArch32Assembler::r2,
140 CAArch32Assembler::r3
141 };
142
143 m_assembler.Eor(dstReg, dstReg, dstReg);
144 for(unsigned int i = 0; i < 4; i++)
145 {
146 m_assembler.Vmov(regs[i & 1], static_cast<CAArch32Assembler::DOUBLE_REGISTER>(src1Reg + (i / 2)), i & 1);
147 m_assembler.Tst(regs[i & 1], regs[i & 1]);
148 uint8 immediate = 0, shiftAmount = 0;
149 if(!TryGetAluImmediateParams(1 << (3 - i), immediate, shiftAmount))
150 {
151 assert(false);
152 }
153 m_assembler.Or(static_cast<CAArch32Assembler::CONDITION>(condition), dstReg, dstReg,
154 CAArch32Assembler::MakeImmediateAluOperand(immediate, shiftAmount));
155 }
156
157 CommitSymbolRegister(dst, dstReg);
158 }
159
Emit_Md_Mov_MemMem(const STATEMENT & statement)160 void CCodeGen_AArch32::Emit_Md_Mov_MemMem(const STATEMENT& statement)
161 {
162 auto dst = statement.dst->GetSymbol().get();
163 auto src1 = statement.src1->GetSymbol().get();
164
165 auto dstAddrReg = CAArch32Assembler::r0;
166 auto src1AddrReg = CAArch32Assembler::r1;
167 auto tmpReg = CAArch32Assembler::q0;
168 LoadMemory128AddressInRegister(dstAddrReg, dst);
169 LoadMemory128AddressInRegister(src1AddrReg, src1);
170 m_assembler.Vld1_32x4(tmpReg, src1AddrReg);
171 m_assembler.Vst1_32x4(tmpReg, dstAddrReg);
172 }
173
Emit_Md_Not_MemMem(const STATEMENT & statement)174 void CCodeGen_AArch32::Emit_Md_Not_MemMem(const STATEMENT& statement)
175 {
176 auto dst = statement.dst->GetSymbol().get();
177 auto src1 = statement.src1->GetSymbol().get();
178
179 auto dstAddrReg = CAArch32Assembler::r0;
180 auto src1AddrReg = CAArch32Assembler::r1;
181 auto zeroReg = CAArch32Assembler::q0;
182 auto tmpReg = CAArch32Assembler::q1;
183
184 LoadMemory128AddressInRegister(dstAddrReg, dst);
185 LoadMemory128AddressInRegister(src1AddrReg, src1);
186
187 m_assembler.Vld1_32x4(tmpReg, src1AddrReg);
188 m_assembler.Veor(zeroReg, zeroReg, zeroReg);
189 m_assembler.Vorn(tmpReg, zeroReg, tmpReg);
190 m_assembler.Vst1_32x4(tmpReg, dstAddrReg);
191 }
192
Emit_Md_DivS_MemMemMem(const STATEMENT & statement)193 void CCodeGen_AArch32::Emit_Md_DivS_MemMemMem(const STATEMENT& statement)
194 {
195 auto dst = statement.dst->GetSymbol().get();
196 auto src1 = statement.src1->GetSymbol().get();
197 auto src2 = statement.src2->GetSymbol().get();
198
199 auto dstAddrReg = CAArch32Assembler::r0;
200 auto src1AddrReg = CAArch32Assembler::r1;
201 auto src2AddrReg = CAArch32Assembler::r2;
202 auto dstReg = CAArch32Assembler::q0;
203 auto src1Reg = CAArch32Assembler::q1;
204 auto src2Reg = CAArch32Assembler::q2;
205
206 LoadMemory128AddressInRegister(dstAddrReg, dst);
207 LoadMemory128AddressInRegister(src1AddrReg, src1);
208 LoadMemory128AddressInRegister(src2AddrReg, src2);
209
210 m_assembler.Vld1_32x4(src1Reg, src1AddrReg);
211 m_assembler.Vld1_32x4(src2Reg, src2AddrReg);
212
213 //No vector floating point divide on NEON, gotta do it 4x
214 for(unsigned int i = 0; i < 4; i++)
215 {
216 auto subDstReg = static_cast<CAArch32Assembler::SINGLE_REGISTER>(dstReg * 2 + i);
217 auto subSrc1Reg = static_cast<CAArch32Assembler::SINGLE_REGISTER>(src1Reg * 2 + i);
218 auto subSrc2Reg = static_cast<CAArch32Assembler::SINGLE_REGISTER>(src2Reg * 2 + i);
219 m_assembler.Vdiv_F32(subDstReg, subSrc1Reg, subSrc2Reg);
220 }
221
222 m_assembler.Vst1_32x4(dstReg, dstAddrReg);
223 }
224
Emit_Md_Srl256_MemMemCst(const STATEMENT & statement)225 void CCodeGen_AArch32::Emit_Md_Srl256_MemMemCst(const STATEMENT& statement)
226 {
227 auto dst = statement.dst->GetSymbol().get();
228 auto src1 = statement.src1->GetSymbol().get();
229 auto src2 = statement.src2->GetSymbol().get();
230
231 assert(src1->m_type == SYM_TEMPORARY256);
232 assert(src2->m_type == SYM_CONSTANT);
233
234 auto dstAddrReg = CAArch32Assembler::r0;
235 auto src1AddrReg = CAArch32Assembler::r1;
236 auto dstReg = CAArch32Assembler::q0;
237
238 uint32 offset = (src2->m_valueLow & 0x7F) / 8;
239
240 LoadMemory128AddressInRegister(dstAddrReg, dst);
241 LoadTemporary256ElementAddressInRegister(src1AddrReg, src1, offset);
242
243 m_assembler.Vld1_32x4_u(dstReg, src1AddrReg);
244 m_assembler.Vst1_32x4(dstReg, dstAddrReg);
245 }
246
Emit_Md_Srl256_MemMemVar(const STATEMENT & statement)247 void CCodeGen_AArch32::Emit_Md_Srl256_MemMemVar(const STATEMENT& statement)
248 {
249 auto dst = statement.dst->GetSymbol().get();
250 auto src1 = statement.src1->GetSymbol().get();
251 auto src2 = statement.src2->GetSymbol().get();
252
253 assert(src1->m_type == SYM_TEMPORARY256);
254
255 auto offsetRegister = CAArch32Assembler::r0;
256 auto dstAddrReg = CAArch32Assembler::r1;
257 auto src1AddrReg = CAArch32Assembler::r2;
258 auto src2Register = PrepareSymbolRegisterUse(src2, CAArch32Assembler::r3);
259
260 auto dstReg = CAArch32Assembler::q0;
261
262 auto offsetShift = CAArch32Assembler::MakeConstantShift(CAArch32Assembler::SHIFT_LSR, 3);
263
264 LoadMemory128AddressInRegister(dstAddrReg, dst);
265 LoadTemporary256ElementAddressInRegister(src1AddrReg, src1, 0);
266
267 //Compute offset and modify address
268 m_assembler.And(offsetRegister, src2Register, CAArch32Assembler::MakeImmediateAluOperand(0x7F, 0));
269 m_assembler.Mov(offsetRegister, CAArch32Assembler::MakeRegisterAluOperand(offsetRegister, offsetShift));
270 m_assembler.Add(src1AddrReg, src1AddrReg, offsetRegister);
271
272 m_assembler.Vld1_32x4_u(dstReg, src1AddrReg);
273 m_assembler.Vst1_32x4(dstReg, dstAddrReg);
274 }
275
Emit_Md_LoadFromRef_MemMem(const STATEMENT & statement)276 void CCodeGen_AArch32::Emit_Md_LoadFromRef_MemMem(const STATEMENT& statement)
277 {
278 auto dst = statement.dst->GetSymbol().get();
279 auto src1 = statement.src1->GetSymbol().get();
280
281 auto src1AddrReg = CAArch32Assembler::r0;
282 auto dstAddrReg = CAArch32Assembler::r1;
283
284 auto dstReg = CAArch32Assembler::q0;
285
286 LoadMemory128AddressInRegister(dstAddrReg, dst);
287 LoadMemoryReferenceInRegister(src1AddrReg, src1);
288
289 m_assembler.Vld1_32x4(dstReg, src1AddrReg);
290 m_assembler.Vst1_32x4(dstReg, dstAddrReg);
291 }
292
Emit_Md_StoreAtRef_MemMem(const STATEMENT & statement)293 void CCodeGen_AArch32::Emit_Md_StoreAtRef_MemMem(const STATEMENT& statement)
294 {
295 auto src1 = statement.src1->GetSymbol().get();
296 auto src2 = statement.src2->GetSymbol().get();
297
298 auto src1AddrReg = CAArch32Assembler::r0;
299 auto src2AddrReg = CAArch32Assembler::r1;
300
301 auto src2Reg = CAArch32Assembler::q0;
302
303 LoadMemoryReferenceInRegister(src1AddrReg, src1);
304 LoadMemory128AddressInRegister(src2AddrReg, src2);
305
306 m_assembler.Vld1_32x4(src2Reg, src2AddrReg);
307 m_assembler.Vst1_32x4(src2Reg, src1AddrReg);
308 }
309
Emit_Md_MovMasked_MemMemMem(const STATEMENT & statement)310 void CCodeGen_AArch32::Emit_Md_MovMasked_MemMemMem(const STATEMENT& statement)
311 {
312 auto dst = statement.dst->GetSymbol().get();
313 auto src1 = statement.src1->GetSymbol().get();
314 auto src2 = statement.src2->GetSymbol().get();
315
316 assert(dst->Equals(src1));
317
318 auto mask = static_cast<uint8>(statement.jmpCondition);
319
320 auto dstAddrReg = CAArch32Assembler::r0;
321 auto src2AddrReg = CAArch32Assembler::r2;
322 auto tmpReg = CAArch32Assembler::r3;
323 auto dstReg = CAArch32Assembler::q0;
324 auto src2Reg = CAArch32Assembler::q2;
325 auto dstRegLo = static_cast<CAArch32Assembler::DOUBLE_REGISTER>(dstReg + 0);
326 auto dstRegHi = static_cast<CAArch32Assembler::DOUBLE_REGISTER>(dstReg + 1);
327 auto src2RegLo = static_cast<CAArch32Assembler::DOUBLE_REGISTER>(src2Reg + 0);
328 auto src2RegHi = static_cast<CAArch32Assembler::DOUBLE_REGISTER>(src2Reg + 1);
329
330 LoadMemory128AddressInRegister(dstAddrReg, dst);
331 LoadMemory128AddressInRegister(src2AddrReg, src2);
332
333 m_assembler.Vld1_32x4(dstReg, dstAddrReg);
334 m_assembler.Vld1_32x4(src2Reg, src2AddrReg);
335 for(unsigned int i = 0; i < 4; i++)
336 {
337 if(mask & (1 << i))
338 {
339 m_assembler.Vmov(tmpReg, (i & 2) ? src2RegHi : src2RegLo, (i & 1));
340 m_assembler.Vmov((i & 2) ? dstRegHi : dstRegLo, tmpReg, (i & 1));
341 }
342 }
343
344 m_assembler.Vst1_32x4(dstReg, dstAddrReg);
345 }
346
Emit_Md_Expand_MemReg(const STATEMENT & statement)347 void CCodeGen_AArch32::Emit_Md_Expand_MemReg(const STATEMENT& statement)
348 {
349 auto dst = statement.dst->GetSymbol().get();
350 auto src1 = statement.src1->GetSymbol().get();
351
352 auto dstAddrReg = CAArch32Assembler::r0;
353 auto tmpReg = CAArch32Assembler::q0;
354
355 LoadMemory128AddressInRegister(dstAddrReg, dst);
356
357 m_assembler.Vdup(tmpReg, g_registers[src1->m_valueLow]);
358 m_assembler.Vst1_32x4(tmpReg, dstAddrReg);
359 }
360
Emit_Md_Expand_MemMem(const STATEMENT & statement)361 void CCodeGen_AArch32::Emit_Md_Expand_MemMem(const STATEMENT& statement)
362 {
363 auto dst = statement.dst->GetSymbol().get();
364 auto src1 = statement.src1->GetSymbol().get();
365
366 auto dstAddrReg = CAArch32Assembler::r0;
367 auto src1Reg = CAArch32Assembler::r1;
368 auto tmpReg = CAArch32Assembler::q0;
369
370 LoadMemoryInRegister(src1Reg, src1);
371 LoadMemory128AddressInRegister(dstAddrReg, dst);
372
373 m_assembler.Vdup(tmpReg, src1Reg);
374 m_assembler.Vst1_32x4(tmpReg, dstAddrReg);
375 }
376
Emit_Md_Expand_MemCst(const STATEMENT & statement)377 void CCodeGen_AArch32::Emit_Md_Expand_MemCst(const STATEMENT& statement)
378 {
379 auto dst = statement.dst->GetSymbol().get();
380 auto src1 = statement.src1->GetSymbol().get();
381
382 auto dstAddrReg = CAArch32Assembler::r0;
383 auto src1Reg = CAArch32Assembler::r1;
384 auto tmpReg = CAArch32Assembler::q0;
385
386 LoadConstantInRegister(src1Reg, src1->m_valueLow);
387 LoadMemory128AddressInRegister(dstAddrReg, dst);
388
389 m_assembler.Vdup(tmpReg, src1Reg);
390 m_assembler.Vst1_32x4(tmpReg, dstAddrReg);
391 }
392
Emit_Md_PackHB_MemMemMem(const STATEMENT & statement)393 void CCodeGen_AArch32::Emit_Md_PackHB_MemMemMem(const STATEMENT& statement)
394 {
395 auto dst = statement.dst->GetSymbol().get();
396 auto src1 = statement.src1->GetSymbol().get();
397 auto src2 = statement.src2->GetSymbol().get();
398
399 auto dstAddrReg = CAArch32Assembler::r0;
400 auto src1AddrReg = CAArch32Assembler::r1;
401 auto src2AddrReg = CAArch32Assembler::r2;
402 auto dstReg = CAArch32Assembler::q0;
403 auto src1Reg = CAArch32Assembler::q1;
404 auto src2Reg = CAArch32Assembler::q2;
405
406 LoadMemory128AddressInRegister(dstAddrReg, dst);
407 LoadMemory128AddressInRegister(src1AddrReg, src1);
408 LoadMemory128AddressInRegister(src2AddrReg, src2);
409
410 m_assembler.Vld1_32x4(src1Reg, src1AddrReg);
411 m_assembler.Vld1_32x4(src2Reg, src2AddrReg);
412 m_assembler.Vmovn_I16(static_cast<CAArch32Assembler::DOUBLE_REGISTER>(dstReg + 1), src1Reg);
413 m_assembler.Vmovn_I16(static_cast<CAArch32Assembler::DOUBLE_REGISTER>(dstReg + 0), src2Reg);
414 m_assembler.Vst1_32x4(dstReg, dstAddrReg);
415 }
416
Emit_Md_PackWH_MemMemMem(const STATEMENT & statement)417 void CCodeGen_AArch32::Emit_Md_PackWH_MemMemMem(const STATEMENT& statement)
418 {
419 auto dst = statement.dst->GetSymbol().get();
420 auto src1 = statement.src1->GetSymbol().get();
421 auto src2 = statement.src2->GetSymbol().get();
422
423 auto dstAddrReg = CAArch32Assembler::r0;
424 auto src1AddrReg = CAArch32Assembler::r1;
425 auto src2AddrReg = CAArch32Assembler::r2;
426 auto dstReg = CAArch32Assembler::q0;
427 auto src1Reg = CAArch32Assembler::q1;
428 auto src2Reg = CAArch32Assembler::q2;
429
430 LoadMemory128AddressInRegister(dstAddrReg, dst);
431 LoadMemory128AddressInRegister(src1AddrReg, src1);
432 LoadMemory128AddressInRegister(src2AddrReg, src2);
433
434 m_assembler.Vld1_32x4(src1Reg, src1AddrReg);
435 m_assembler.Vld1_32x4(src2Reg, src2AddrReg);
436 m_assembler.Vmovn_I32(static_cast<CAArch32Assembler::DOUBLE_REGISTER>(dstReg + 1), src1Reg);
437 m_assembler.Vmovn_I32(static_cast<CAArch32Assembler::DOUBLE_REGISTER>(dstReg + 0), src2Reg);
438 m_assembler.Vst1_32x4(dstReg, dstAddrReg);
439 }
440
441 template <uint32 offset>
Emit_Md_UnpackBH_MemMemMem(const STATEMENT & statement)442 void CCodeGen_AArch32::Emit_Md_UnpackBH_MemMemMem(const STATEMENT& statement)
443 {
444 auto dst = statement.dst->GetSymbol().get();
445 auto src1 = statement.src1->GetSymbol().get();
446 auto src2 = statement.src2->GetSymbol().get();
447
448 auto dstAddrReg = CAArch32Assembler::r0;
449 auto src1AddrReg = CAArch32Assembler::r1;
450 auto src2AddrReg = CAArch32Assembler::r2;
451 auto src1Reg = CAArch32Assembler::d0;
452 auto src2Reg = CAArch32Assembler::d1;
453
454 LoadMemory128AddressInRegister(dstAddrReg, dst);
455 LoadMemory128AddressInRegister(src1AddrReg, src1, offset);
456 LoadMemory128AddressInRegister(src2AddrReg, src2, offset);
457
458 //Warning: VZIP modifies both registers
459 m_assembler.Vld1_32x2(src1Reg, src2AddrReg);
460 m_assembler.Vld1_32x2(src2Reg, src1AddrReg);
461 m_assembler.Vzip_I8(src1Reg, src2Reg);
462 m_assembler.Vst1_32x4(static_cast<CAArch32Assembler::QUAD_REGISTER>(src1Reg), dstAddrReg);
463 }
464
465 template <uint32 offset>
Emit_Md_UnpackHW_MemMemMem(const STATEMENT & statement)466 void CCodeGen_AArch32::Emit_Md_UnpackHW_MemMemMem(const STATEMENT& statement)
467 {
468 auto dst = statement.dst->GetSymbol().get();
469 auto src1 = statement.src1->GetSymbol().get();
470 auto src2 = statement.src2->GetSymbol().get();
471
472 auto dstAddrReg = CAArch32Assembler::r0;
473 auto src1AddrReg = CAArch32Assembler::r1;
474 auto src2AddrReg = CAArch32Assembler::r2;
475 auto src1Reg = CAArch32Assembler::d0;
476 auto src2Reg = CAArch32Assembler::d1;
477
478 LoadMemory128AddressInRegister(dstAddrReg, dst);
479 LoadMemory128AddressInRegister(src1AddrReg, src1, offset);
480 LoadMemory128AddressInRegister(src2AddrReg, src2, offset);
481
482 //Warning: VZIP modifies both registers
483 m_assembler.Vld1_32x2(src1Reg, src2AddrReg);
484 m_assembler.Vld1_32x2(src2Reg, src1AddrReg);
485 m_assembler.Vzip_I16(src1Reg, src2Reg);
486 m_assembler.Vst1_32x4(static_cast<CAArch32Assembler::QUAD_REGISTER>(src1Reg), dstAddrReg);
487 }
488
489 template <uint32 offset>
Emit_Md_UnpackWD_MemMemMem(const STATEMENT & statement)490 void CCodeGen_AArch32::Emit_Md_UnpackWD_MemMemMem(const STATEMENT& statement)
491 {
492 auto dst = statement.dst->GetSymbol().get();
493 auto src1 = statement.src1->GetSymbol().get();
494 auto src2 = statement.src2->GetSymbol().get();
495
496 auto dstAddrReg = CAArch32Assembler::r0;
497 auto src1AddrReg = CAArch32Assembler::r1;
498 auto src2AddrReg = CAArch32Assembler::r2;
499 auto src1Reg = CAArch32Assembler::d0;
500 auto src2Reg = CAArch32Assembler::d2;
501
502 LoadMemory128AddressInRegister(dstAddrReg, dst);
503 LoadMemory128AddressInRegister(src1AddrReg, src1, offset);
504 LoadMemory128AddressInRegister(src2AddrReg, src2, offset);
505
506 //Warning: VZIP modifies both registers
507 m_assembler.Vld1_32x2(src1Reg, src2AddrReg);
508 m_assembler.Vld1_32x2(src2Reg, src1AddrReg);
509 m_assembler.Vzip_I32(static_cast<CAArch32Assembler::QUAD_REGISTER>(src1Reg), static_cast<CAArch32Assembler::QUAD_REGISTER>(src2Reg));
510 m_assembler.Vst1_32x4(static_cast<CAArch32Assembler::QUAD_REGISTER>(src1Reg), dstAddrReg);
511 }
512
Emit_MergeTo256_MemMemMem(const STATEMENT & statement)513 void CCodeGen_AArch32::Emit_MergeTo256_MemMemMem(const STATEMENT& statement)
514 {
515 auto dst = statement.dst->GetSymbol().get();
516 auto src1 = statement.src1->GetSymbol().get();
517 auto src2 = statement.src2->GetSymbol().get();
518
519 assert(dst->m_type == SYM_TEMPORARY256);
520
521 auto dstLoAddrReg = CAArch32Assembler::r0;
522 auto dstHiAddrReg = CAArch32Assembler::r1;
523 auto src1AddrReg = CAArch32Assembler::r2;
524 auto src2AddrReg = CAArch32Assembler::r3;
525 auto src1Reg = CAArch32Assembler::q0;
526 auto src2Reg = CAArch32Assembler::q1;
527
528 LoadTemporary256ElementAddressInRegister(dstLoAddrReg, dst, 0x00);
529 LoadTemporary256ElementAddressInRegister(dstHiAddrReg, dst, 0x10);
530 LoadMemory128AddressInRegister(src1AddrReg, src1);
531 LoadMemory128AddressInRegister(src2AddrReg, src2);
532
533 m_assembler.Vld1_32x4(src1Reg, src1AddrReg);
534 m_assembler.Vld1_32x4(src2Reg, src2AddrReg);
535 m_assembler.Vst1_32x4(src1Reg, dstLoAddrReg);
536 m_assembler.Vst1_32x4(src2Reg, dstHiAddrReg);
537 }
538
539 CCodeGen_AArch32::CONSTMATCHER CCodeGen_AArch32::g_mdConstMatchers[] =
540 {
541 { OP_MD_ADD_B, MATCH_MEMORY128, MATCH_MEMORY128, MATCH_MEMORY128, &CCodeGen_AArch32::Emit_Md_MemMemMem<MDOP_ADDB> },
542 { OP_MD_ADD_H, MATCH_MEMORY128, MATCH_MEMORY128, MATCH_MEMORY128, &CCodeGen_AArch32::Emit_Md_MemMemMem<MDOP_ADDH> },
543 { OP_MD_ADD_W, MATCH_MEMORY128, MATCH_MEMORY128, MATCH_MEMORY128, &CCodeGen_AArch32::Emit_Md_MemMemMem<MDOP_ADDW> },
544
545 { OP_MD_SUB_B, MATCH_MEMORY128, MATCH_MEMORY128, MATCH_MEMORY128, &CCodeGen_AArch32::Emit_Md_MemMemMem<MDOP_SUBB> },
546 { OP_MD_SUB_H, MATCH_MEMORY128, MATCH_MEMORY128, MATCH_MEMORY128, &CCodeGen_AArch32::Emit_Md_MemMemMem<MDOP_SUBH> },
547 { OP_MD_SUB_W, MATCH_MEMORY128, MATCH_MEMORY128, MATCH_MEMORY128, &CCodeGen_AArch32::Emit_Md_MemMemMem<MDOP_SUBW> },
548
549 { OP_MD_ADDUS_B, MATCH_MEMORY128, MATCH_MEMORY128, MATCH_MEMORY128, &CCodeGen_AArch32::Emit_Md_MemMemMem<MDOP_ADDBUS> },
550 { OP_MD_ADDUS_H, MATCH_MEMORY128, MATCH_MEMORY128, MATCH_MEMORY128, &CCodeGen_AArch32::Emit_Md_MemMemMem<MDOP_ADDHUS> },
551 { OP_MD_ADDUS_W, MATCH_MEMORY128, MATCH_MEMORY128, MATCH_MEMORY128, &CCodeGen_AArch32::Emit_Md_MemMemMem<MDOP_ADDWUS> },
552
553 { OP_MD_ADDSS_H, MATCH_MEMORY128, MATCH_MEMORY128, MATCH_MEMORY128, &CCodeGen_AArch32::Emit_Md_MemMemMem<MDOP_ADDHSS> },
554 { OP_MD_ADDSS_W, MATCH_MEMORY128, MATCH_MEMORY128, MATCH_MEMORY128, &CCodeGen_AArch32::Emit_Md_MemMemMem<MDOP_ADDWSS> },
555
556 { OP_MD_SUBUS_B, MATCH_MEMORY128, MATCH_MEMORY128, MATCH_MEMORY128, &CCodeGen_AArch32::Emit_Md_MemMemMem<MDOP_SUBBUS> },
557 { OP_MD_SUBUS_H, MATCH_MEMORY128, MATCH_MEMORY128, MATCH_MEMORY128, &CCodeGen_AArch32::Emit_Md_MemMemMem<MDOP_SUBHUS> },
558
559 { OP_MD_SUBSS_H, MATCH_MEMORY128, MATCH_MEMORY128, MATCH_MEMORY128, &CCodeGen_AArch32::Emit_Md_MemMemMem<MDOP_SUBHSS> },
560 { OP_MD_SUBSS_W, MATCH_MEMORY128, MATCH_MEMORY128, MATCH_MEMORY128, &CCodeGen_AArch32::Emit_Md_MemMemMem<MDOP_SUBWSS> },
561
562 { OP_MD_CMPEQ_W, MATCH_MEMORY128, MATCH_MEMORY128, MATCH_MEMORY128, &CCodeGen_AArch32::Emit_Md_MemMemMem<MDOP_CMPEQW> },
563
564 { OP_MD_CMPGT_H, MATCH_MEMORY128, MATCH_MEMORY128, MATCH_MEMORY128, &CCodeGen_AArch32::Emit_Md_MemMemMem<MDOP_CMPGTH> },
565
566 { OP_MD_MIN_H, MATCH_MEMORY128, MATCH_MEMORY128, MATCH_MEMORY128, &CCodeGen_AArch32::Emit_Md_MemMemMem<MDOP_MINH> },
567 { OP_MD_MIN_W, MATCH_MEMORY128, MATCH_MEMORY128, MATCH_MEMORY128, &CCodeGen_AArch32::Emit_Md_MemMemMem<MDOP_MINW> },
568
569 { OP_MD_MAX_H, MATCH_MEMORY128, MATCH_MEMORY128, MATCH_MEMORY128, &CCodeGen_AArch32::Emit_Md_MemMemMem<MDOP_MAXH> },
570 { OP_MD_MAX_W, MATCH_MEMORY128, MATCH_MEMORY128, MATCH_MEMORY128, &CCodeGen_AArch32::Emit_Md_MemMemMem<MDOP_MAXW> },
571
572 { OP_MD_ADD_S, MATCH_MEMORY128, MATCH_MEMORY128, MATCH_MEMORY128, &CCodeGen_AArch32::Emit_Md_MemMemMem<MDOP_ADDS> },
573 { OP_MD_SUB_S, MATCH_MEMORY128, MATCH_MEMORY128, MATCH_MEMORY128, &CCodeGen_AArch32::Emit_Md_MemMemMem<MDOP_SUBS> },
574 { OP_MD_MUL_S, MATCH_MEMORY128, MATCH_MEMORY128, MATCH_MEMORY128, &CCodeGen_AArch32::Emit_Md_MemMemMem<MDOP_MULS> },
575 { OP_MD_DIV_S, MATCH_MEMORY128, MATCH_MEMORY128, MATCH_MEMORY128, &CCodeGen_AArch32::Emit_Md_DivS_MemMemMem },
576
577 { OP_MD_ABS_S, MATCH_MEMORY128, MATCH_MEMORY128, MATCH_NIL, &CCodeGen_AArch32::Emit_Md_MemMem<MDOP_ABSS> },
578 { OP_MD_MIN_S, MATCH_MEMORY128, MATCH_MEMORY128, MATCH_MEMORY128, &CCodeGen_AArch32::Emit_Md_MemMemMem<FPUMDOP_MIN> },
579 { OP_MD_MAX_S, MATCH_MEMORY128, MATCH_MEMORY128, MATCH_MEMORY128, &CCodeGen_AArch32::Emit_Md_MemMemMem<FPUMDOP_MAX> },
580
581 { OP_MD_AND, MATCH_MEMORY128, MATCH_MEMORY128, MATCH_MEMORY128, &CCodeGen_AArch32::Emit_Md_MemMemMem<MDOP_AND> },
582 { OP_MD_OR, MATCH_MEMORY128, MATCH_MEMORY128, MATCH_MEMORY128, &CCodeGen_AArch32::Emit_Md_MemMemMem<MDOP_OR> },
583 { OP_MD_XOR, MATCH_MEMORY128, MATCH_MEMORY128, MATCH_MEMORY128, &CCodeGen_AArch32::Emit_Md_MemMemMem<MDOP_XOR> },
584
585 { OP_MD_NOT, MATCH_MEMORY128, MATCH_MEMORY128, MATCH_NIL, &CCodeGen_AArch32::Emit_Md_Not_MemMem },
586
587 { OP_MD_SLLH, MATCH_MEMORY128, MATCH_MEMORY128, MATCH_CONSTANT, &CCodeGen_AArch32::Emit_Md_Shift_MemMemCst<MDOP_SLLH> },
588 { OP_MD_SLLW, MATCH_MEMORY128, MATCH_MEMORY128, MATCH_CONSTANT, &CCodeGen_AArch32::Emit_Md_Shift_MemMemCst<MDOP_SLLW> },
589
590 { OP_MD_SRLH, MATCH_MEMORY128, MATCH_MEMORY128, MATCH_CONSTANT, &CCodeGen_AArch32::Emit_Md_Shift_MemMemCst<MDOP_SRLH> },
591 { OP_MD_SRLW, MATCH_MEMORY128, MATCH_MEMORY128, MATCH_CONSTANT, &CCodeGen_AArch32::Emit_Md_Shift_MemMemCst<MDOP_SRLW> },
592
593 { OP_MD_SRAH, MATCH_MEMORY128, MATCH_MEMORY128, MATCH_CONSTANT, &CCodeGen_AArch32::Emit_Md_Shift_MemMemCst<MDOP_SRAH> },
594 { OP_MD_SRAW, MATCH_MEMORY128, MATCH_MEMORY128, MATCH_CONSTANT, &CCodeGen_AArch32::Emit_Md_Shift_MemMemCst<MDOP_SRAW> },
595
596 { OP_MD_SRL256, MATCH_VARIABLE128, MATCH_MEMORY256, MATCH_VARIABLE, &CCodeGen_AArch32::Emit_Md_Srl256_MemMemVar },
597 { OP_MD_SRL256, MATCH_VARIABLE128, MATCH_MEMORY256, MATCH_CONSTANT, &CCodeGen_AArch32::Emit_Md_Srl256_MemMemCst },
598
599 { OP_MD_ISNEGATIVE, MATCH_VARIABLE, MATCH_MEMORY128, MATCH_NIL, &CCodeGen_AArch32::Emit_Md_Test_VarMem<CAArch32Assembler::CONDITION_MI> },
600 { OP_MD_ISZERO, MATCH_VARIABLE, MATCH_MEMORY128, MATCH_NIL, &CCodeGen_AArch32::Emit_Md_Test_VarMem<CAArch32Assembler::CONDITION_EQ> },
601
602 { OP_MD_TOSINGLE, MATCH_MEMORY128, MATCH_MEMORY128, MATCH_NIL, &CCodeGen_AArch32::Emit_Md_MemMem<MDOP_TOSINGLE> },
603 { OP_MD_TOWORD_TRUNCATE, MATCH_MEMORY128, MATCH_MEMORY128, MATCH_NIL, &CCodeGen_AArch32::Emit_Md_MemMem<MDOP_TOWORD> },
604
605 { OP_MOV, MATCH_MEMORY128, MATCH_MEMORY128, MATCH_NIL, &CCodeGen_AArch32::Emit_Md_Mov_MemMem },
606
607 { OP_LOADFROMREF, MATCH_MEMORY128, MATCH_MEM_REF, MATCH_NIL, &CCodeGen_AArch32::Emit_Md_LoadFromRef_MemMem },
608 { OP_STOREATREF, MATCH_NIL, MATCH_MEM_REF, MATCH_MEMORY128, &CCodeGen_AArch32::Emit_Md_StoreAtRef_MemMem },
609
610 { OP_MD_MOV_MASKED, MATCH_MEMORY128, MATCH_MEMORY128, MATCH_MEMORY128, &CCodeGen_AArch32::Emit_Md_MovMasked_MemMemMem },
611
612 { OP_MD_EXPAND, MATCH_MEMORY128, MATCH_REGISTER, MATCH_NIL, &CCodeGen_AArch32::Emit_Md_Expand_MemReg },
613 { OP_MD_EXPAND, MATCH_MEMORY128, MATCH_MEMORY, MATCH_NIL, &CCodeGen_AArch32::Emit_Md_Expand_MemMem },
614 { OP_MD_EXPAND, MATCH_MEMORY128, MATCH_CONSTANT, MATCH_NIL, &CCodeGen_AArch32::Emit_Md_Expand_MemCst },
615
616 { OP_MD_PACK_HB, MATCH_MEMORY128, MATCH_MEMORY128, MATCH_MEMORY128, &CCodeGen_AArch32::Emit_Md_PackHB_MemMemMem },
617 { OP_MD_PACK_WH, MATCH_MEMORY128, MATCH_MEMORY128, MATCH_MEMORY128, &CCodeGen_AArch32::Emit_Md_PackWH_MemMemMem },
618
619 { OP_MD_UNPACK_LOWER_BH, MATCH_MEMORY128, MATCH_MEMORY128, MATCH_MEMORY128, &CCodeGen_AArch32::Emit_Md_UnpackBH_MemMemMem<0> },
620 { OP_MD_UNPACK_LOWER_HW, MATCH_MEMORY128, MATCH_MEMORY128, MATCH_MEMORY128, &CCodeGen_AArch32::Emit_Md_UnpackHW_MemMemMem<0> },
621 { OP_MD_UNPACK_LOWER_WD, MATCH_MEMORY128, MATCH_MEMORY128, MATCH_MEMORY128, &CCodeGen_AArch32::Emit_Md_UnpackWD_MemMemMem<0> },
622
623 { OP_MD_UNPACK_UPPER_BH, MATCH_MEMORY128, MATCH_MEMORY128, MATCH_MEMORY128, &CCodeGen_AArch32::Emit_Md_UnpackBH_MemMemMem<8> },
624 { OP_MD_UNPACK_UPPER_HW, MATCH_MEMORY128, MATCH_MEMORY128, MATCH_MEMORY128, &CCodeGen_AArch32::Emit_Md_UnpackHW_MemMemMem<8> },
625 { OP_MD_UNPACK_UPPER_WD, MATCH_MEMORY128, MATCH_MEMORY128, MATCH_MEMORY128, &CCodeGen_AArch32::Emit_Md_UnpackWD_MemMemMem<8> },
626
627 { OP_MERGETO256, MATCH_MEMORY256, MATCH_VARIABLE128, MATCH_VARIABLE128, &CCodeGen_AArch32::Emit_MergeTo256_MemMemMem },
628
629 { OP_MOV, MATCH_NIL, MATCH_NIL, MATCH_NIL, NULL },
630 };
631