1 #include "Jitter_CodeGen_x86.h"
2
3 using namespace Jitter;
4
MakeRelative128SymbolElementAddress(CSymbol * symbol,unsigned int elementIdx)5 CX86Assembler::CAddress CCodeGen_x86::MakeRelative128SymbolElementAddress(CSymbol* symbol, unsigned int elementIdx)
6 {
7 assert(symbol->m_type == SYM_RELATIVE128);
8 assert((symbol->m_valueLow & 0xF) == 0);
9 return CX86Assembler::MakeIndRegOffAddress(CX86Assembler::rBP, symbol->m_valueLow + (elementIdx * 4));
10 }
11
MakeTemporary128SymbolElementAddress(CSymbol * symbol,unsigned int elementIdx)12 CX86Assembler::CAddress CCodeGen_x86::MakeTemporary128SymbolElementAddress(CSymbol* symbol, unsigned int elementIdx)
13 {
14 assert(symbol->m_type == SYM_TEMPORARY128);
15 // assert(((symbol->m_stackLocation + m_stackLevel) & 0xF) == 0);
16 return CX86Assembler::MakeIndRegOffAddress(CX86Assembler::rSP, symbol->m_stackLocation + m_stackLevel + (elementIdx * 4));
17 }
18
MakeTemporary256SymbolElementAddress(CSymbol * symbol,unsigned int elementIdx)19 CX86Assembler::CAddress CCodeGen_x86::MakeTemporary256SymbolElementAddress(CSymbol* symbol, unsigned int elementIdx)
20 {
21 assert(symbol->m_type == SYM_TEMPORARY256);
22 assert(((symbol->m_stackLocation + m_stackLevel) & 0x1F) == 0);
23 return CX86Assembler::MakeIndRegOffAddress(CX86Assembler::rSP, symbol->m_stackLocation + m_stackLevel + elementIdx);
24 }
25
MakeVariable128SymbolAddress(CSymbol * symbol)26 CX86Assembler::CAddress CCodeGen_x86::MakeVariable128SymbolAddress(CSymbol* symbol)
27 {
28 switch(symbol->m_type)
29 {
30 case SYM_REGISTER128:
31 return CX86Assembler::MakeXmmRegisterAddress(m_mdRegisters[symbol->m_valueLow]);
32 break;
33 case SYM_RELATIVE128:
34 return MakeRelative128SymbolElementAddress(symbol, 0);
35 break;
36 case SYM_TEMPORARY128:
37 return MakeTemporary128SymbolElementAddress(symbol, 0);
38 break;
39 default:
40 throw std::exception();
41 break;
42 }
43 }
44
MakeMemory128SymbolAddress(CSymbol * symbol)45 CX86Assembler::CAddress CCodeGen_x86::MakeMemory128SymbolAddress(CSymbol* symbol)
46 {
47 switch(symbol->m_type)
48 {
49 case SYM_RELATIVE128:
50 return MakeRelative128SymbolElementAddress(symbol, 0);
51 break;
52 case SYM_TEMPORARY128:
53 return MakeTemporary128SymbolElementAddress(symbol, 0);
54 break;
55 default:
56 throw std::exception();
57 break;
58 }
59 }
60
MakeMemory128SymbolElementAddress(CSymbol * symbol,unsigned int elementIdx)61 CX86Assembler::CAddress CCodeGen_x86::MakeMemory128SymbolElementAddress(CSymbol* symbol, unsigned int elementIdx)
62 {
63 switch(symbol->m_type)
64 {
65 case SYM_RELATIVE128:
66 return MakeRelative128SymbolElementAddress(symbol, elementIdx);
67 break;
68 case SYM_TEMPORARY128:
69 return MakeTemporary128SymbolElementAddress(symbol, elementIdx);
70 break;
71 default:
72 throw std::exception();
73 break;
74 }
75 }
76
77 template <typename MDOP>
Emit_Md_RegVar(const STATEMENT & statement)78 void CCodeGen_x86::Emit_Md_RegVar(const STATEMENT& statement)
79 {
80 auto dst = statement.dst->GetSymbol().get();
81 auto src1 = statement.src1->GetSymbol().get();
82
83 ((m_assembler).*(MDOP::OpVo()))(m_mdRegisters[dst->m_valueLow], MakeVariable128SymbolAddress(src1));
84 }
85
86 template <typename MDOP>
Emit_Md_MemVar(const STATEMENT & statement)87 void CCodeGen_x86::Emit_Md_MemVar(const STATEMENT& statement)
88 {
89 auto dst = statement.dst->GetSymbol().get();
90 auto src1 = statement.src1->GetSymbol().get();
91
92 auto dstRegister = CX86Assembler::xMM0;
93
94 ((m_assembler).*(MDOP::OpVo()))(dstRegister, MakeVariable128SymbolAddress(src1));
95 m_assembler.MovapsVo(MakeMemory128SymbolAddress(dst), dstRegister);
96 }
97
98 template <typename MDOP>
Emit_Md_RegRegReg(const STATEMENT & statement)99 void CCodeGen_x86::Emit_Md_RegRegReg(const STATEMENT& statement)
100 {
101 auto dst = statement.dst->GetSymbol().get();
102 auto src1 = statement.src1->GetSymbol().get();
103 auto src2 = statement.src2->GetSymbol().get();
104
105 if(dst->Equals(src1))
106 {
107 ((m_assembler).*(MDOP::OpVo()))(m_mdRegisters[dst->m_valueLow],
108 CX86Assembler::MakeXmmRegisterAddress(m_mdRegisters[src2->m_valueLow]));
109 }
110 else
111 {
112 auto src2Register = m_mdRegisters[src2->m_valueLow];
113
114 if(dst->Equals(src2))
115 {
116 m_assembler.MovapsVo(CX86Assembler::xMM0, CX86Assembler::MakeXmmRegisterAddress(m_mdRegisters[src2->m_valueLow]));
117 src2Register = CX86Assembler::xMM0;
118 }
119
120 m_assembler.MovapsVo(m_mdRegisters[dst->m_valueLow], CX86Assembler::MakeXmmRegisterAddress(m_mdRegisters[src1->m_valueLow]));
121 ((m_assembler).*(MDOP::OpVo()))(m_mdRegisters[dst->m_valueLow], CX86Assembler::MakeXmmRegisterAddress(src2Register));
122 }
123 }
124
125 template <typename MDOP>
Emit_Md_RegMemReg(const STATEMENT & statement)126 void CCodeGen_x86::Emit_Md_RegMemReg(const STATEMENT& statement)
127 {
128 auto dst = statement.dst->GetSymbol().get();
129 auto src1 = statement.src1->GetSymbol().get();
130 auto src2 = statement.src2->GetSymbol().get();
131
132 auto dstRegister = m_mdRegisters[dst->m_valueLow];
133 auto src2Register = m_mdRegisters[src2->m_valueLow];
134
135 if(dst->Equals(src2))
136 {
137 m_assembler.MovapsVo(CX86Assembler::xMM0, CX86Assembler::MakeXmmRegisterAddress(src2Register));
138 src2Register = CX86Assembler::xMM0;
139 }
140
141 m_assembler.MovapsVo(dstRegister, MakeVariable128SymbolAddress(src1));
142 ((m_assembler).*(MDOP::OpVo()))(dstRegister, CX86Assembler::MakeXmmRegisterAddress(src2Register));
143 }
144
145 template <typename MDOP>
Emit_Md_RegVarVar(const STATEMENT & statement)146 void CCodeGen_x86::Emit_Md_RegVarVar(const STATEMENT& statement)
147 {
148 auto dst = statement.dst->GetSymbol().get();
149 auto src1 = statement.src1->GetSymbol().get();
150 auto src2 = statement.src2->GetSymbol().get();
151
152 //If we get in here, it must absolutely mean that the second source isn't a register
153 //Otherwise, some of the assumuptions done below will be wrong (dst mustn't be equal to src2)
154 assert(src2->m_type != SYM_REGISTER128);
155
156 auto dstRegister = m_mdRegisters[dst->m_valueLow];
157
158 if(!dst->Equals(src1))
159 {
160 m_assembler.MovapsVo(dstRegister, MakeVariable128SymbolAddress(src1));
161 }
162
163 ((m_assembler).*(MDOP::OpVo()))(dstRegister, MakeVariable128SymbolAddress(src2));
164 }
165
166 template <typename MDOP>
Emit_Md_MemVarVar(const STATEMENT & statement)167 void CCodeGen_x86::Emit_Md_MemVarVar(const STATEMENT& statement)
168 {
169 auto dst = statement.dst->GetSymbol().get();
170 auto src1 = statement.src1->GetSymbol().get();
171 auto src2 = statement.src2->GetSymbol().get();
172
173 auto dstRegister = CX86Assembler::xMM0;
174
175 m_assembler.MovapsVo(dstRegister, MakeVariable128SymbolAddress(src1));
176 ((m_assembler).*(MDOP::OpVo()))(dstRegister, MakeVariable128SymbolAddress(src2));
177 m_assembler.MovapsVo(MakeMemory128SymbolAddress(dst), dstRegister);
178 }
179
180 template <typename MDOP>
Emit_Md_VarVarVarRev(const STATEMENT & statement)181 void CCodeGen_x86::Emit_Md_VarVarVarRev(const STATEMENT& statement)
182 {
183 //TODO: This could be improved further, but we might want
184 //to reverse the operands somewhere else as to not
185 //copy paste the code from the "non-reversed" path
186
187 auto dst = statement.dst->GetSymbol().get();
188 auto src1 = statement.src1->GetSymbol().get();
189 auto src2 = statement.src2->GetSymbol().get();
190
191 auto dstRegister = CX86Assembler::xMM0;
192
193 m_assembler.MovapsVo(dstRegister, MakeVariable128SymbolAddress(src2));
194 ((m_assembler).*(MDOP::OpVo()))(dstRegister, MakeVariable128SymbolAddress(src1));
195 m_assembler.MovapsVo(MakeVariable128SymbolAddress(dst), dstRegister);
196 }
197
198 template <typename MDOPSHIFT, uint8 SAMASK>
Emit_Md_Shift_RegVarCst(const STATEMENT & statement)199 void CCodeGen_x86::Emit_Md_Shift_RegVarCst(const STATEMENT& statement)
200 {
201 auto dst = statement.dst->GetSymbol().get();
202 auto src1 = statement.src1->GetSymbol().get();
203 auto src2 = statement.src2->GetSymbol().get();
204
205 auto dstRegister = m_mdRegisters[dst->m_valueLow];
206
207 if(!dst->Equals(src1))
208 {
209 m_assembler.MovapsVo(dstRegister, MakeVariable128SymbolAddress(src1));
210 }
211
212 ((m_assembler).*(MDOPSHIFT::OpVo()))(dstRegister, static_cast<uint8>(src2->m_valueLow & SAMASK));
213 }
214
215 template <typename MDOPSHIFT, uint8 SAMASK>
Emit_Md_Shift_MemVarCst(const STATEMENT & statement)216 void CCodeGen_x86::Emit_Md_Shift_MemVarCst(const STATEMENT& statement)
217 {
218 auto dst = statement.dst->GetSymbol().get();
219 auto src1 = statement.src1->GetSymbol().get();
220 auto src2 = statement.src2->GetSymbol().get();
221
222 auto tmpRegister = CX86Assembler::xMM0;
223
224 m_assembler.MovapsVo(tmpRegister, MakeVariable128SymbolAddress(src1));
225 ((m_assembler).*(MDOPSHIFT::OpVo()))(tmpRegister, static_cast<uint8>(src2->m_valueLow & SAMASK));
226 m_assembler.MovapsVo(MakeMemory128SymbolAddress(dst), tmpRegister);
227 }
228
229 template <typename MDOPSINGLEOP>
Emit_Md_SingleOp_RegVar(const STATEMENT & statement)230 void CCodeGen_x86::Emit_Md_SingleOp_RegVar(const STATEMENT& statement)
231 {
232 auto dst = statement.dst->GetSymbol().get();
233 auto src1 = statement.src1->GetSymbol().get();
234
235 auto resultRegister = m_mdRegisters[dst->m_valueLow];
236
237 if(!dst->Equals(src1))
238 {
239 m_assembler.MovapsVo(resultRegister, MakeVariable128SymbolAddress(src1));
240 }
241
242 ((*this).*(MDOPSINGLEOP::OpVr()))(resultRegister);
243 }
244
245 template <typename MDOPSINGLEOP>
Emit_Md_SingleOp_MemVar(const STATEMENT & statement)246 void CCodeGen_x86::Emit_Md_SingleOp_MemVar(const STATEMENT& statement)
247 {
248 auto dst = statement.dst->GetSymbol().get();
249 auto src1 = statement.src1->GetSymbol().get();
250
251 auto resultRegister = CX86Assembler::xMM0;
252
253 m_assembler.MovapsVo(resultRegister, MakeVariable128SymbolAddress(src1));
254 ((*this).*(MDOPSINGLEOP::OpVr()))(resultRegister);
255 m_assembler.MovapsVo(MakeMemory128SymbolAddress(dst), resultRegister);
256 }
257
258 template <typename MDOPFLAG>
Emit_Md_GetFlag_RegVar(const STATEMENT & statement)259 void CCodeGen_x86::Emit_Md_GetFlag_RegVar(const STATEMENT& statement)
260 {
261 auto dst = statement.dst->GetSymbol().get();
262 auto src1 = statement.src1->GetSymbol().get();
263
264 ((*this).*(MDOPFLAG::OpEd()))(m_registers[dst->m_valueLow], MakeVariable128SymbolAddress(src1));
265 }
266
267 template <typename MDOPFLAG>
Emit_Md_GetFlag_MemVar(const STATEMENT & statement)268 void CCodeGen_x86::Emit_Md_GetFlag_MemVar(const STATEMENT& statement)
269 {
270 auto dst = statement.dst->GetSymbol().get();
271 auto src1 = statement.src1->GetSymbol().get();
272
273 auto tmpRegister = CX86Assembler::rAX;
274 ((*this).*(MDOPFLAG::OpEd()))(tmpRegister, MakeVariable128SymbolAddress(src1));
275 m_assembler.MovGd(MakeMemorySymbolAddress(dst), tmpRegister);
276 }
277
Emit_Md_AddSSW_VarVarVar(const STATEMENT & statement)278 void CCodeGen_x86::Emit_Md_AddSSW_VarVarVar(const STATEMENT& statement)
279 {
280 auto dst = statement.dst->GetSymbol().get();
281 auto src1 = statement.src1->GetSymbol().get();
282 auto src2 = statement.src2->GetSymbol().get();
283
284 auto uxRegister = CX86Assembler::xMM0;
285 auto uyRegister = CX86Assembler::xMM1;
286 auto resRegister = CX86Assembler::xMM2;
287 auto cstRegister = CX86Assembler::xMM3;
288
289 // This is based on code from http://locklessinc.com/articles/sat_arithmetic/ modified to work without cmovns
290 // s32b sat_adds32b(s32b x, s32b y)
291 // {
292 // u32b ux = x;
293 // u32b uy = y;
294 // u32b res = ux + uy;
295 //
296 // /* Calculate overflowed result. (Don't change the sign bit of ux) */
297 // ux = (ux >> 31) + INT_MAX;
298 //
299 // s32b sign = (s32b) ((ux ^ uy) | ~(uy ^ res))
300 // sign >>= 31; /* Arithmetic shift, either 0 or ~0*/
301 // res = (res & sign) | (ux & ~sign);
302 //
303 // return res;
304 // }
305
306 //ux = src1
307 //uy = src2
308 m_assembler.MovapsVo(uxRegister, MakeVariable128SymbolAddress(src1));
309 m_assembler.MovapsVo(uyRegister, MakeVariable128SymbolAddress(src2));
310
311 //res = ux + uy
312 m_assembler.MovapsVo(resRegister, CX86Assembler::MakeXmmRegisterAddress(uxRegister));
313 m_assembler.PadddVo(resRegister, CX86Assembler::MakeXmmRegisterAddress(uyRegister));
314
315 //cst = 0x7FFFFFFF
316 m_assembler.PcmpeqdVo(cstRegister, CX86Assembler::MakeXmmRegisterAddress(cstRegister));
317 m_assembler.PsrldVo(cstRegister, 1);
318
319 //ux = (ux >> 31)
320 m_assembler.PsrldVo(uxRegister, 31);
321
322 //ux += 0x7FFFFFFF
323 m_assembler.PadddVo(uxRegister, CX86Assembler::MakeXmmRegisterAddress(cstRegister));
324
325 //uy = ~(uy ^ res)
326 //------
327 //uy ^ res
328 m_assembler.PxorVo(uyRegister, CX86Assembler::MakeXmmRegisterAddress(resRegister));
329
330 //~(uy ^ res)
331 m_assembler.PcmpeqdVo(cstRegister, CX86Assembler::MakeXmmRegisterAddress(cstRegister));
332 m_assembler.PxorVo(uyRegister, CX86Assembler::MakeXmmRegisterAddress(cstRegister));
333
334 //cst = ux ^ uy (reloading uy from src2 because we don't have any registers available)
335 m_assembler.MovapsVo(cstRegister ,CX86Assembler::MakeXmmRegisterAddress(uxRegister));
336 m_assembler.PxorVo(cstRegister, MakeVariable128SymbolAddress(src2));
337
338 //uy = ((ux ^ uy) | ~(uy ^ res)) >> 31; (signed operation)
339 m_assembler.PorVo(uyRegister, CX86Assembler::MakeXmmRegisterAddress(cstRegister));
340 m_assembler.PsradVo(uyRegister, 31);
341
342 //res = (res & uy) (uy is the sign value)
343 m_assembler.PandVo(resRegister, CX86Assembler::MakeXmmRegisterAddress(uyRegister));
344
345 //ux = (ux & ~uy)
346 //------
347 //~uy
348 m_assembler.PcmpeqdVo(cstRegister, CX86Assembler::MakeXmmRegisterAddress(cstRegister));
349 m_assembler.PxorVo(uyRegister, CX86Assembler::MakeXmmRegisterAddress(cstRegister));
350
351 //ux & ~uy
352 m_assembler.PandVo(uxRegister, CX86Assembler::MakeXmmRegisterAddress(uyRegister));
353
354 //res = (res & uy) | (ux & ~uy)
355 m_assembler.PorVo(resRegister, CX86Assembler::MakeXmmRegisterAddress(uxRegister));
356
357 //Copy final result
358 m_assembler.MovapsVo(MakeVariable128SymbolAddress(dst), resRegister);
359 }
360
Emit_Md_SubSSW_VarVarVar(const STATEMENT & statement)361 void CCodeGen_x86::Emit_Md_SubSSW_VarVarVar(const STATEMENT& statement)
362 {
363 auto dst = statement.dst->GetSymbol().get();
364 auto src1 = statement.src1->GetSymbol().get();
365 auto src2 = statement.src2->GetSymbol().get();
366
367 auto uxRegister = CX86Assembler::xMM0;
368 auto uyRegister = CX86Assembler::xMM1;
369 auto resRegister = CX86Assembler::xMM2;
370 auto cstRegister = CX86Assembler::xMM3;
371
372 // This is based on code from http://locklessinc.com/articles/sat_arithmetic/ modified to work without cmovns
373 // s32b sat_subs32b(s32b x, s32b y)
374 // {
375 // u32b ux = x;
376 // u32b uy = y;
377 // u32b res = ux - uy;
378 //
379 // ux = (ux >> 31) + INT_MAX;
380 //
381 // s32b sign = (s32b) ((ux ^ uy) & (ux ^ res))
382 // sign >>= 31; /* Arithmetic shift, either 0 or ~0*/
383 // res = (res & ~sign) | (ux & sign);
384 //
385 // return res;
386 // }
387
388 //ux = src1
389 //uy = src2
390 m_assembler.MovdqaVo(uxRegister, MakeVariable128SymbolAddress(src1));
391 m_assembler.MovdqaVo(uyRegister, MakeVariable128SymbolAddress(src2));
392
393 //res = ux - uy
394 m_assembler.MovdqaVo(resRegister, CX86Assembler::MakeXmmRegisterAddress(uxRegister));
395 m_assembler.PsubdVo(resRegister, CX86Assembler::MakeXmmRegisterAddress(uyRegister));
396
397 //cst = 0x7FFFFFFF
398 m_assembler.PcmpeqdVo(cstRegister, CX86Assembler::MakeXmmRegisterAddress(cstRegister));
399 m_assembler.PsrldVo(cstRegister, 1);
400
401 //ux = (ux >> 31)
402 m_assembler.PsrldVo(uxRegister, 31);
403
404 //ux += 0x7FFFFFFF
405 m_assembler.PadddVo(uxRegister, CX86Assembler::MakeXmmRegisterAddress(cstRegister));
406
407 //uy = (ux ^ res)
408 //------
409 //ux ^ res
410 m_assembler.MovdqaVo(uyRegister, CX86Assembler::MakeXmmRegisterAddress(uxRegister));
411 m_assembler.PxorVo(uyRegister, CX86Assembler::MakeXmmRegisterAddress(resRegister));
412
413 //cst = ux ^ uy (reloading uy from src2 because we don't have any registers available)
414 m_assembler.MovdqaVo(cstRegister ,CX86Assembler::MakeXmmRegisterAddress(uxRegister));
415 m_assembler.PxorVo(cstRegister, MakeVariable128SymbolAddress(src2));
416
417 //uy = ((ux ^ uy) & (ux ^ res)) >> 31; (signed operation)
418 m_assembler.PandVo(uyRegister, CX86Assembler::MakeXmmRegisterAddress(cstRegister));
419 m_assembler.PsradVo(uyRegister, 31);
420
421 //ux = (ux & uy) (uy is the sign value)
422 m_assembler.PandVo(uxRegister, CX86Assembler::MakeXmmRegisterAddress(uyRegister));
423
424 //res = (res & ~uy)
425 //------
426 //~uy
427 m_assembler.PcmpeqdVo(cstRegister, CX86Assembler::MakeXmmRegisterAddress(cstRegister));
428 m_assembler.PxorVo(uyRegister, CX86Assembler::MakeXmmRegisterAddress(cstRegister));
429
430 //res & ~uy
431 m_assembler.PandVo(resRegister, CX86Assembler::MakeXmmRegisterAddress(uyRegister));
432
433 //res = (res & ~uy) | (ux & uy)
434 m_assembler.PorVo(resRegister, CX86Assembler::MakeXmmRegisterAddress(uxRegister));
435
436 //Copy final result
437 m_assembler.MovdqaVo(MakeVariable128SymbolAddress(dst), resRegister);
438 }
439
Emit_Md_AddUSW_VarVarVar(const STATEMENT & statement)440 void CCodeGen_x86::Emit_Md_AddUSW_VarVarVar(const STATEMENT& statement)
441 {
442 auto dst = statement.dst->GetSymbol().get();
443 auto src1 = statement.src1->GetSymbol().get();
444 auto src2 = statement.src2->GetSymbol().get();
445
446 auto xRegister = CX86Assembler::xMM0;
447 auto resRegister = CX86Assembler::xMM1;
448 auto tmpRegister = CX86Assembler::xMM2;
449 auto tmp2Register = CX86Assembler::xMM3;
450
451 // This is based on code from http://locklessinc.com/articles/sat_arithmetic/
452 // u32b sat_addu32b(u32b x, u32b y)
453 // {
454 // u32b res = x + y;
455 // res |= -(res < x);
456 //
457 // return res;
458 // }
459
460 m_assembler.MovdqaVo(xRegister, MakeVariable128SymbolAddress(src1));
461 m_assembler.MovdqaVo(resRegister, CX86Assembler::MakeXmmRegisterAddress(xRegister));
462 m_assembler.PadddVo(resRegister, MakeVariable128SymbolAddress(src2));
463
464 //-(res < x)
465 m_assembler.PcmpeqdVo(tmpRegister, CX86Assembler::MakeXmmRegisterAddress(tmpRegister));
466 m_assembler.PslldVo(tmpRegister, 31);
467 m_assembler.PadddVo(tmpRegister, CX86Assembler::MakeXmmRegisterAddress(resRegister));
468
469 m_assembler.PcmpeqdVo(tmp2Register, CX86Assembler::MakeXmmRegisterAddress(tmp2Register));
470 m_assembler.PslldVo(tmp2Register, 31);
471 m_assembler.PadddVo(tmp2Register, CX86Assembler::MakeXmmRegisterAddress(xRegister));
472
473 m_assembler.PcmpgtdVo(tmp2Register, CX86Assembler::MakeXmmRegisterAddress(tmpRegister));
474
475 //res |= -(res < x)
476 m_assembler.PorVo(resRegister, CX86Assembler::MakeXmmRegisterAddress(tmp2Register));
477
478 //Store result
479 m_assembler.MovdqaVo(MakeVariable128SymbolAddress(dst), resRegister);
480 }
481
Emit_Md_MinW_VarVarVar(const STATEMENT & statement)482 void CCodeGen_x86::Emit_Md_MinW_VarVarVar(const STATEMENT& statement)
483 {
484 auto dst = statement.dst->GetSymbol().get();
485 auto src1 = statement.src1->GetSymbol().get();
486 auto src2 = statement.src2->GetSymbol().get();
487
488 auto src1Register = CX86Assembler::xMM0;
489 auto src2Register = CX86Assembler::xMM1;
490 auto mask1Register = CX86Assembler::xMM2;
491 auto mask2Register = CX86Assembler::xMM3;
492
493 m_assembler.MovdqaVo(src1Register, MakeVariable128SymbolAddress(src1));
494 m_assembler.MovdqaVo(src2Register, MakeVariable128SymbolAddress(src2));
495
496 m_assembler.MovdqaVo(mask1Register, CX86Assembler::MakeXmmRegisterAddress(src2Register));
497 m_assembler.PcmpgtdVo(mask1Register, CX86Assembler::MakeXmmRegisterAddress(src1Register));
498 m_assembler.MovdqaVo(mask2Register, CX86Assembler::MakeXmmRegisterAddress(mask1Register));
499
500 m_assembler.PandVo(mask1Register, CX86Assembler::MakeXmmRegisterAddress(src1Register));
501 m_assembler.PandnVo(mask2Register, CX86Assembler::MakeXmmRegisterAddress(src2Register));
502 m_assembler.PorVo(mask1Register, CX86Assembler::MakeXmmRegisterAddress(mask2Register));
503
504 m_assembler.MovdqaVo(MakeVariable128SymbolAddress(dst), mask1Register);
505 }
506
Emit_Md_MaxW_VarVarVar(const STATEMENT & statement)507 void CCodeGen_x86::Emit_Md_MaxW_VarVarVar(const STATEMENT& statement)
508 {
509 auto dst = statement.dst->GetSymbol().get();
510 auto src1 = statement.src1->GetSymbol().get();
511 auto src2 = statement.src2->GetSymbol().get();
512
513 auto src1Register = CX86Assembler::xMM0;
514 auto src2Register = CX86Assembler::xMM1;
515 auto mask1Register = CX86Assembler::xMM2;
516 auto mask2Register = CX86Assembler::xMM3;
517
518 m_assembler.MovdqaVo(src1Register, MakeVariable128SymbolAddress(src1));
519 m_assembler.MovdqaVo(src2Register, MakeVariable128SymbolAddress(src2));
520
521 m_assembler.MovdqaVo(mask1Register, CX86Assembler::MakeXmmRegisterAddress(src1Register));
522 m_assembler.PcmpgtdVo(mask1Register, CX86Assembler::MakeXmmRegisterAddress(src2Register));
523 m_assembler.MovdqaVo(mask2Register, CX86Assembler::MakeXmmRegisterAddress(mask1Register));
524
525 m_assembler.PandVo(mask1Register, CX86Assembler::MakeXmmRegisterAddress(src1Register));
526 m_assembler.PandnVo(mask2Register, CX86Assembler::MakeXmmRegisterAddress(src2Register));
527 m_assembler.PorVo(mask1Register, CX86Assembler::MakeXmmRegisterAddress(mask2Register));
528
529 m_assembler.MovdqaVo(MakeVariable128SymbolAddress(dst), mask1Register);
530 }
531
Emit_Md_PackHB_VarVarVar(const STATEMENT & statement)532 void CCodeGen_x86::Emit_Md_PackHB_VarVarVar(const STATEMENT& statement)
533 {
534 auto dst = statement.dst->GetSymbol().get();
535 auto src1 = statement.src1->GetSymbol().get();
536 auto src2 = statement.src2->GetSymbol().get();
537
538 auto resultRegister = CX86Assembler::xMM0;
539 auto tempRegister = CX86Assembler::xMM1;
540 auto maskRegister = CX86Assembler::xMM2;
541
542 m_assembler.MovapsVo(resultRegister, MakeVariable128SymbolAddress(src2));
543 m_assembler.MovapsVo(tempRegister, MakeVariable128SymbolAddress(src1));
544
545 //Generate mask (0x00FF x8)
546 m_assembler.PcmpeqdVo(maskRegister, CX86Assembler::MakeXmmRegisterAddress(maskRegister));
547 m_assembler.PsrlwVo(maskRegister, 0x08);
548
549 //Mask both operands
550 m_assembler.PandVo(resultRegister, CX86Assembler::MakeXmmRegisterAddress(maskRegister));
551 m_assembler.PandVo(tempRegister, CX86Assembler::MakeXmmRegisterAddress(maskRegister));
552
553 //Pack
554 m_assembler.PackuswbVo(resultRegister, CX86Assembler::MakeXmmRegisterAddress(tempRegister));
555
556 m_assembler.MovapsVo(MakeVariable128SymbolAddress(dst), resultRegister);
557 }
558
Emit_Md_PackWH_VarVarVar(const STATEMENT & statement)559 void CCodeGen_x86::Emit_Md_PackWH_VarVarVar(const STATEMENT& statement)
560 {
561 auto dst = statement.dst->GetSymbol().get();
562 auto src1 = statement.src1->GetSymbol().get();
563 auto src2 = statement.src2->GetSymbol().get();
564
565 auto resultRegister = CX86Assembler::xMM0;
566 auto tempRegister = CX86Assembler::xMM1;
567
568 m_assembler.MovapsVo(resultRegister, MakeVariable128SymbolAddress(src2));
569 m_assembler.MovapsVo(tempRegister, MakeVariable128SymbolAddress(src1));
570
571 //Sign extend the lower half word of our registers
572 m_assembler.PslldVo(resultRegister, 0x10);
573 m_assembler.PsradVo(resultRegister, 0x10);
574
575 m_assembler.PslldVo(tempRegister, 0x10);
576 m_assembler.PsradVo(tempRegister, 0x10);
577
578 //Pack
579 m_assembler.PackssdwVo(resultRegister, CX86Assembler::MakeXmmRegisterAddress(tempRegister));
580
581 m_assembler.MovapsVo(MakeVariable128SymbolAddress(dst), resultRegister);
582 }
583
Emit_Md_MovMasked_VarVarVar(const STATEMENT & statement)584 void CCodeGen_x86::Emit_Md_MovMasked_VarVarVar(const STATEMENT& statement)
585 {
586 auto dst = statement.dst->GetSymbol().get();
587 auto src1 = statement.src1->GetSymbol().get();
588 auto src2 = statement.src2->GetSymbol().get();
589
590 uint8 mask = static_cast<uint8>(statement.jmpCondition);
591 auto mask0Register = CX86Assembler::xMM0;
592 auto mask1Register = CX86Assembler::xMM1;
593
594 m_assembler.MovId(CX86Assembler::rAX, ~0);
595 m_assembler.MovdVo(mask0Register, CX86Assembler::MakeRegisterAddress(CX86Assembler::rAX));
596
597 //Generate shuffle selector
598 //0x00 -> gives us 0x00000000
599 //0x02 -> gives us 0xFFFFFFFF
600 uint8 shuffleSelector = 0;
601 for(unsigned int i = 0; i < 4; i++)
602 {
603 if(mask & (1 << i))
604 {
605 shuffleSelector |= (0x02) << (i * 2);
606 }
607 }
608
609 //mask0 -> proper mask
610 m_assembler.PshufdVo(mask0Register, CX86Assembler::MakeXmmRegisterAddress(mask0Register), shuffleSelector);
611
612 //mask1 -> mask inverse
613 m_assembler.PcmpeqdVo(mask1Register, CX86Assembler::MakeXmmRegisterAddress(mask1Register));
614 m_assembler.PxorVo(mask1Register, CX86Assembler::MakeXmmRegisterAddress(mask0Register));
615
616 //Generate result
617 m_assembler.PandVo(mask0Register, MakeVariable128SymbolAddress(src1));
618 m_assembler.PandVo(mask1Register, MakeVariable128SymbolAddress(src2));
619 m_assembler.PorVo(mask0Register, CX86Assembler::MakeXmmRegisterAddress(mask1Register));
620
621 m_assembler.MovdqaVo(MakeVariable128SymbolAddress(dst), mask0Register);
622 }
623
Emit_Md_Mov_RegVar(const STATEMENT & statement)624 void CCodeGen_x86::Emit_Md_Mov_RegVar(const STATEMENT& statement)
625 {
626 auto dst = statement.dst->GetSymbol().get();
627 auto src1 = statement.src1->GetSymbol().get();
628
629 m_assembler.MovapsVo(m_mdRegisters[dst->m_valueLow], MakeVariable128SymbolAddress(src1));
630 }
631
Emit_Md_Mov_MemReg(const STATEMENT & statement)632 void CCodeGen_x86::Emit_Md_Mov_MemReg(const STATEMENT& statement)
633 {
634 auto dst = statement.dst->GetSymbol().get();
635 auto src1 = statement.src1->GetSymbol().get();
636
637 m_assembler.MovapsVo(MakeMemory128SymbolAddress(dst), m_mdRegisters[src1->m_valueLow]);
638 }
639
Emit_Md_Mov_MemMem(const STATEMENT & statement)640 void CCodeGen_x86::Emit_Md_Mov_MemMem(const STATEMENT& statement)
641 {
642 CSymbol* dst = statement.dst->GetSymbol().get();
643 CSymbol* src1 = statement.src1->GetSymbol().get();
644
645 CX86Assembler::XMMREGISTER resultRegister = CX86Assembler::xMM0;
646
647 m_assembler.MovapsVo(resultRegister, MakeMemory128SymbolAddress(src1));
648 m_assembler.MovapsVo(MakeMemory128SymbolAddress(dst), resultRegister);
649 }
650
Emit_Md_Abs(CX86Assembler::XMMREGISTER dstRegister)651 void CCodeGen_x86::Emit_Md_Abs(CX86Assembler::XMMREGISTER dstRegister)
652 {
653 auto maskRegister = CX86Assembler::xMM1;
654
655 assert(dstRegister != maskRegister);
656
657 m_assembler.PcmpeqdVo(maskRegister, CX86Assembler::MakeXmmRegisterAddress(maskRegister));
658 m_assembler.PsrldVo(maskRegister, 1);
659 m_assembler.PandVo(dstRegister, CX86Assembler::MakeXmmRegisterAddress(maskRegister));
660 }
661
Emit_Md_Not(CX86Assembler::XMMREGISTER dstRegister)662 void CCodeGen_x86::Emit_Md_Not(CX86Assembler::XMMREGISTER dstRegister)
663 {
664 auto cstRegister = CX86Assembler::xMM1;
665
666 assert(dstRegister != cstRegister);
667
668 m_assembler.PcmpeqdVo(cstRegister, CX86Assembler::MakeXmmRegisterAddress(cstRegister));
669 m_assembler.PxorVo(dstRegister, CX86Assembler::MakeXmmRegisterAddress(cstRegister));
670 }
671
Emit_Md_IsNegative(CX86Assembler::REGISTER dstRegister,const CX86Assembler::CAddress & srcAddress)672 void CCodeGen_x86::Emit_Md_IsNegative(CX86Assembler::REGISTER dstRegister, const CX86Assembler::CAddress& srcAddress)
673 {
674 auto valueRegister = CX86Assembler::xMM0;
675 auto zeroRegister = CX86Assembler::xMM1;
676 auto tmpRegister = CX86Assembler::xMM2;
677 auto shuffleSelectRegister = CX86Assembler::xMM3;
678 auto tmpFlagRegister = CX86Assembler::rDX;
679
680 assert(dstRegister != tmpFlagRegister);
681
682 //valueRegister = [srcAddress]
683 m_assembler.MovdqaVo(valueRegister, srcAddress);
684
685 //----- Generate isZero
686
687 //tmpRegister = 0
688 m_assembler.PandnVo(tmpRegister, CX86Assembler::MakeXmmRegisterAddress(tmpRegister));
689
690 //zeroRegister = 0xFFFFFFFF
691 m_assembler.PcmpeqdVo(zeroRegister, CX86Assembler::MakeXmmRegisterAddress(zeroRegister));
692
693 //zeroRegister = 0x7FFFFFFF
694 m_assembler.PsrldVo(zeroRegister, 1);
695
696 //zeroRegister &= valueRegister
697 m_assembler.PandVo(zeroRegister, CX86Assembler::MakeXmmRegisterAddress(valueRegister));
698
699 //zeroRegister = (zeroRegister == tmpRegister)
700 m_assembler.PcmpeqdVo(zeroRegister, CX86Assembler::MakeXmmRegisterAddress(tmpRegister));
701
702 //----- Generate isNegative
703 //valueRegister >>= 31 (s-extended)
704 m_assembler.PsradVo(valueRegister, 31);
705
706 //----- Generate result
707 //zeroRegister = (not zeroRegister) & valueRegister
708 m_assembler.PandnVo(zeroRegister, CX86Assembler::MakeXmmRegisterAddress(valueRegister));
709
710 //Extract bits
711 m_assembler.MovId(tmpFlagRegister, 0x03070B0F);
712 m_assembler.MovdVo(shuffleSelectRegister, CX86Assembler::MakeRegisterAddress(tmpFlagRegister));
713 m_assembler.PshufbVo(zeroRegister, CX86Assembler::MakeXmmRegisterAddress(shuffleSelectRegister));
714 m_assembler.PmovmskbVo(dstRegister, zeroRegister);
715 m_assembler.AndId(CX86Assembler::MakeRegisterAddress(dstRegister), 0x0F);
716 }
717
Emit_Md_IsZero(CX86Assembler::REGISTER dstRegister,const CX86Assembler::CAddress & srcAddress)718 void CCodeGen_x86::Emit_Md_IsZero(CX86Assembler::REGISTER dstRegister, const CX86Assembler::CAddress& srcAddress)
719 {
720 auto valueRegister = CX86Assembler::xMM0;
721 auto zeroRegister = CX86Assembler::xMM1;
722 auto shuffleSelectRegister = CX86Assembler::xMM2;
723 auto tmpFlagRegister = CX86Assembler::rDX;
724
725 assert(dstRegister != tmpFlagRegister);
726
727 //Get value - And with 0x7FFFFFFF to remove sign bit
728 m_assembler.PcmpeqdVo(valueRegister, CX86Assembler::MakeXmmRegisterAddress(valueRegister));
729 m_assembler.PsrldVo(valueRegister, 1);
730 m_assembler.PandVo(valueRegister, srcAddress);
731
732 //Generate zero and compare
733 m_assembler.PandnVo(zeroRegister, CX86Assembler::MakeXmmRegisterAddress(zeroRegister));
734 m_assembler.PcmpeqdVo(valueRegister, CX86Assembler::MakeXmmRegisterAddress(zeroRegister));
735
736 //Extract bits
737 m_assembler.MovId(tmpFlagRegister, 0x03070B0F);
738 m_assembler.MovdVo(shuffleSelectRegister, CX86Assembler::MakeRegisterAddress(tmpFlagRegister));
739 m_assembler.PshufbVo(valueRegister, CX86Assembler::MakeXmmRegisterAddress(shuffleSelectRegister));
740 m_assembler.PmovmskbVo(dstRegister, valueRegister);
741 m_assembler.AndId(CX86Assembler::MakeRegisterAddress(dstRegister), 0x0F);
742 }
743
Emit_Md_Expand_RegReg(const STATEMENT & statement)744 void CCodeGen_x86::Emit_Md_Expand_RegReg(const STATEMENT& statement)
745 {
746 auto dst = statement.dst->GetSymbol().get();
747 auto src1 = statement.src1->GetSymbol().get();
748
749 auto resultRegister = m_mdRegisters[dst->m_valueLow];
750
751 m_assembler.MovdVo(resultRegister, CX86Assembler::MakeRegisterAddress(m_registers[src1->m_valueLow]));
752 m_assembler.PshufdVo(resultRegister, CX86Assembler::MakeXmmRegisterAddress(resultRegister), 0x00);
753 }
754
Emit_Md_Expand_RegMem(const STATEMENT & statement)755 void CCodeGen_x86::Emit_Md_Expand_RegMem(const STATEMENT& statement)
756 {
757 auto dst = statement.dst->GetSymbol().get();
758 auto src1 = statement.src1->GetSymbol().get();
759
760 auto resultRegister = m_mdRegisters[dst->m_valueLow];
761
762 m_assembler.MovssEd(resultRegister, MakeMemorySymbolAddress(src1));
763 m_assembler.ShufpsVo(resultRegister, CX86Assembler::MakeXmmRegisterAddress(resultRegister), 0x00);
764 }
765
Emit_Md_Expand_RegCst(const STATEMENT & statement)766 void CCodeGen_x86::Emit_Md_Expand_RegCst(const STATEMENT& statement)
767 {
768 auto dst = statement.dst->GetSymbol().get();
769 auto src1 = statement.src1->GetSymbol().get();
770
771 auto cstRegister = CX86Assembler::rAX;
772 auto resultRegister = m_mdRegisters[dst->m_valueLow];
773
774 m_assembler.MovId(cstRegister, src1->m_valueLow);
775 m_assembler.MovdVo(resultRegister, CX86Assembler::MakeRegisterAddress(cstRegister));
776 m_assembler.PshufdVo(resultRegister, CX86Assembler::MakeXmmRegisterAddress(resultRegister), 0x00);
777 }
778
Emit_Md_Expand_MemReg(const STATEMENT & statement)779 void CCodeGen_x86::Emit_Md_Expand_MemReg(const STATEMENT& statement)
780 {
781 auto dst = statement.dst->GetSymbol().get();
782 auto src1 = statement.src1->GetSymbol().get();
783
784 auto resultRegister = CX86Assembler::xMM0;
785
786 m_assembler.MovdVo(resultRegister, CX86Assembler::MakeRegisterAddress(m_registers[src1->m_valueLow]));
787 m_assembler.ShufpsVo(resultRegister, CX86Assembler::MakeXmmRegisterAddress(resultRegister), 0x00);
788 m_assembler.MovapsVo(MakeMemory128SymbolAddress(dst), resultRegister);
789 }
790
Emit_Md_Expand_MemMem(const STATEMENT & statement)791 void CCodeGen_x86::Emit_Md_Expand_MemMem(const STATEMENT& statement)
792 {
793 auto dst = statement.dst->GetSymbol().get();
794 auto src1 = statement.src1->GetSymbol().get();
795
796 auto resultRegister = CX86Assembler::xMM0;
797
798 m_assembler.MovssEd(resultRegister, MakeMemorySymbolAddress(src1));
799 m_assembler.ShufpsVo(resultRegister, CX86Assembler::MakeXmmRegisterAddress(resultRegister), 0x00);
800 m_assembler.MovapsVo(MakeMemory128SymbolAddress(dst), resultRegister);
801 }
802
Emit_Md_Expand_MemCst(const STATEMENT & statement)803 void CCodeGen_x86::Emit_Md_Expand_MemCst(const STATEMENT& statement)
804 {
805 auto dst = statement.dst->GetSymbol().get();
806 auto src1 = statement.src1->GetSymbol().get();
807
808 auto cstRegister = CX86Assembler::rAX;
809 auto resultRegister = CX86Assembler::xMM0;
810
811 m_assembler.MovId(cstRegister, src1->m_valueLow);
812 m_assembler.MovdVo(resultRegister, CX86Assembler::MakeRegisterAddress(cstRegister));
813 m_assembler.PshufdVo(resultRegister, CX86Assembler::MakeXmmRegisterAddress(resultRegister), 0x00);
814 m_assembler.MovdqaVo(MakeMemory128SymbolAddress(dst), resultRegister);
815 }
816
Emit_Md_Srl256_VarMem(CSymbol * dst,CSymbol * src1,const CX86Assembler::CAddress & offsetAddress)817 void CCodeGen_x86::Emit_Md_Srl256_VarMem(CSymbol* dst, CSymbol* src1, const CX86Assembler::CAddress& offsetAddress)
818 {
819 auto offsetRegister = CX86Assembler::rAX;
820 auto resultRegister = CX86Assembler::xMM0;
821
822 assert(src1->m_type == SYM_TEMPORARY256);
823
824 m_assembler.MovEd(offsetRegister, offsetAddress);
825 m_assembler.AndId(CX86Assembler::MakeRegisterAddress(offsetRegister), 0x7F);
826 m_assembler.ShrEd(CX86Assembler::MakeRegisterAddress(offsetRegister), 3);
827 m_assembler.AddId(CX86Assembler::MakeRegisterAddress(offsetRegister), src1->m_stackLocation + m_stackLevel);
828
829 m_assembler.MovdquVo(resultRegister, CX86Assembler::MakeBaseIndexScaleAddress(CX86Assembler::rSP, offsetRegister, 1));
830 m_assembler.MovdqaVo(MakeVariable128SymbolAddress(dst), resultRegister);
831 }
832
Emit_Md_Srl256_VarMemVar(const STATEMENT & statement)833 void CCodeGen_x86::Emit_Md_Srl256_VarMemVar(const STATEMENT& statement)
834 {
835 auto dst = statement.dst->GetSymbol().get();
836 auto src1 = statement.src1->GetSymbol().get();
837 auto src2 = statement.src2->GetSymbol().get();
838
839 Emit_Md_Srl256_VarMem(dst, src1, MakeVariableSymbolAddress(src2));
840 }
841
Emit_Md_Srl256_VarMemCst(const STATEMENT & statement)842 void CCodeGen_x86::Emit_Md_Srl256_VarMemCst(const STATEMENT& statement)
843 {
844 auto dst = statement.dst->GetSymbol().get();
845 auto src1 = statement.src1->GetSymbol().get();
846 auto src2 = statement.src2->GetSymbol().get();
847
848 auto resultRegister = CX86Assembler::xMM0;
849
850 assert(src1->m_type == SYM_TEMPORARY256);
851 assert(src2->m_type == SYM_CONSTANT);
852
853 uint32 offset = (src2->m_valueLow & 0x7F) / 8;
854
855 m_assembler.MovdquVo(resultRegister, MakeTemporary256SymbolElementAddress(src1, offset));
856 m_assembler.MovdqaVo(MakeVariable128SymbolAddress(dst), resultRegister);
857 }
858
Emit_MergeTo256_MemVarVar(const STATEMENT & statement)859 void CCodeGen_x86::Emit_MergeTo256_MemVarVar(const STATEMENT& statement)
860 {
861 auto dst = statement.dst->GetSymbol().get();
862 auto src1 = statement.src1->GetSymbol().get();
863 auto src2 = statement.src2->GetSymbol().get();
864
865 assert(dst->m_type == SYM_TEMPORARY256);
866
867 auto src1Register = CX86Assembler::xMM0;
868 auto src2Register = CX86Assembler::xMM1;
869
870 //TODO: Improve this to write out registers directly to temporary's memory space
871 //instead of passing by temporary registers
872
873 m_assembler.MovdqaVo(src1Register, MakeVariable128SymbolAddress(src1));
874 m_assembler.MovdqaVo(src2Register, MakeVariable128SymbolAddress(src2));
875
876 m_assembler.MovdqaVo(MakeTemporary256SymbolElementAddress(dst, 0x00), src1Register);
877 m_assembler.MovdqaVo(MakeTemporary256SymbolElementAddress(dst, 0x10), src2Register);
878 }
879
880 #define MD_CONST_MATCHERS_SHIFT(MDOP_CST, MDOP, SAMASK) \
881 { MDOP_CST, MATCH_REGISTER128, MATCH_VARIABLE128, MATCH_CONSTANT, &CCodeGen_x86::Emit_Md_Shift_RegVarCst<MDOP, SAMASK> }, \
882 { MDOP_CST, MATCH_MEMORY128, MATCH_VARIABLE128, MATCH_CONSTANT, &CCodeGen_x86::Emit_Md_Shift_MemVarCst<MDOP, SAMASK> },
883
884 #define MD_CONST_MATCHERS_2OPS(MDOP_CST, MDOP) \
885 { MDOP_CST, MATCH_REGISTER128, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Md_RegVar<MDOP> }, \
886 { MDOP_CST, MATCH_MEMORY128, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Md_MemVar<MDOP> },
887
888 #define MD_CONST_MATCHERS_3OPS(MDOP_CST, MDOP) \
889 { MDOP_CST, MATCH_REGISTER128, MATCH_REGISTER128, MATCH_REGISTER128, &CCodeGen_x86::Emit_Md_RegRegReg<MDOP> }, \
890 { MDOP_CST, MATCH_REGISTER128, MATCH_MEMORY128, MATCH_REGISTER128, &CCodeGen_x86::Emit_Md_RegMemReg<MDOP> }, \
891 { MDOP_CST, MATCH_REGISTER128, MATCH_VARIABLE128, MATCH_VARIABLE128, &CCodeGen_x86::Emit_Md_RegVarVar<MDOP> }, \
892 { MDOP_CST, MATCH_MEMORY128, MATCH_VARIABLE128, MATCH_VARIABLE128, &CCodeGen_x86::Emit_Md_MemVarVar<MDOP> },
893
894 #define MD_CONST_MATCHERS_3OPS_REV(MDOP_CST, MDOP) \
895 { MDOP_CST, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_VARIABLE128, &CCodeGen_x86::Emit_Md_VarVarVarRev<MDOP> },
896
897 #define MD_CONST_MATCHERS_SINGLEOP(MDOP_CST, MDOP) \
898 { MDOP_CST, MATCH_REGISTER128, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Md_SingleOp_RegVar<MDOP> }, \
899 { MDOP_CST, MATCH_MEMORY128, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Md_SingleOp_MemVar<MDOP> },
900
901 CCodeGen_x86::CONSTMATCHER CCodeGen_x86::g_mdConstMatchers[] =
902 {
903 MD_CONST_MATCHERS_3OPS(OP_MD_ADD_B, MDOP_ADDB)
904 MD_CONST_MATCHERS_3OPS(OP_MD_ADD_H, MDOP_ADDH)
905 MD_CONST_MATCHERS_3OPS(OP_MD_ADD_W, MDOP_ADDW)
906
907 MD_CONST_MATCHERS_3OPS(OP_MD_ADDSS_H, MDOP_ADDSSH)
908 { OP_MD_ADDSS_W, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_VARIABLE128, &CCodeGen_x86::Emit_Md_AddSSW_VarVarVar },
909
910 MD_CONST_MATCHERS_3OPS(OP_MD_ADDUS_B, MDOP_ADDUSB)
911 MD_CONST_MATCHERS_3OPS(OP_MD_ADDUS_H, MDOP_ADDUSH)
912 { OP_MD_ADDUS_W, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_VARIABLE128, &CCodeGen_x86::Emit_Md_AddUSW_VarVarVar },
913
914 MD_CONST_MATCHERS_3OPS(OP_MD_SUB_B, MDOP_SUBB)
915 MD_CONST_MATCHERS_3OPS(OP_MD_SUB_H, MDOP_SUBH)
916 MD_CONST_MATCHERS_3OPS(OP_MD_SUB_W, MDOP_SUBW)
917
918 MD_CONST_MATCHERS_3OPS(OP_MD_SUBSS_H, MDOP_SUBSSH)
919 { OP_MD_SUBSS_W, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_VARIABLE128, &CCodeGen_x86::Emit_Md_SubSSW_VarVarVar },
920
921 MD_CONST_MATCHERS_3OPS(OP_MD_SUBUS_B, MDOP_SUBUSB)
922 MD_CONST_MATCHERS_3OPS(OP_MD_SUBUS_H, MDOP_SUBUSH)
923
924 MD_CONST_MATCHERS_3OPS(OP_MD_CMPEQ_B, MDOP_CMPEQB)
925 MD_CONST_MATCHERS_3OPS(OP_MD_CMPEQ_H, MDOP_CMPEQH)
926 MD_CONST_MATCHERS_3OPS(OP_MD_CMPEQ_W, MDOP_CMPEQW)
927 MD_CONST_MATCHERS_3OPS(OP_MD_CMPGT_B, MDOP_CMPGTB)
928 MD_CONST_MATCHERS_3OPS(OP_MD_CMPGT_H, MDOP_CMPGTH)
929 MD_CONST_MATCHERS_3OPS(OP_MD_CMPGT_W, MDOP_CMPGTW)
930
931 MD_CONST_MATCHERS_3OPS(OP_MD_MIN_H, MDOP_MINH)
932
933 MD_CONST_MATCHERS_3OPS(OP_MD_MAX_H, MDOP_MAXH)
934
935 MD_CONST_MATCHERS_3OPS(OP_MD_AND, MDOP_AND)
936 MD_CONST_MATCHERS_3OPS(OP_MD_OR, MDOP_OR)
937 MD_CONST_MATCHERS_3OPS(OP_MD_XOR, MDOP_XOR)
938
939 MD_CONST_MATCHERS_SHIFT(OP_MD_SRLH, MDOP_SRLH, 0x0F)
940 MD_CONST_MATCHERS_SHIFT(OP_MD_SRAH, MDOP_SRAH, 0x0F)
941 MD_CONST_MATCHERS_SHIFT(OP_MD_SLLH, MDOP_SLLH, 0x0F)
942
943 MD_CONST_MATCHERS_SHIFT(OP_MD_SRLW, MDOP_SRLW, 0x1F)
944 MD_CONST_MATCHERS_SHIFT(OP_MD_SRAW, MDOP_SRAW, 0x1F)
945 MD_CONST_MATCHERS_SHIFT(OP_MD_SLLW, MDOP_SLLW, 0x1F)
946
947 { OP_MD_SRL256, MATCH_VARIABLE128, MATCH_MEMORY256, MATCH_VARIABLE, &CCodeGen_x86::Emit_Md_Srl256_VarMemVar },
948 { OP_MD_SRL256, MATCH_VARIABLE128, MATCH_MEMORY256, MATCH_CONSTANT, &CCodeGen_x86::Emit_Md_Srl256_VarMemCst },
949
950 { OP_MD_EXPAND, MATCH_REGISTER128, MATCH_REGISTER, MATCH_NIL, &CCodeGen_x86::Emit_Md_Expand_RegReg },
951 { OP_MD_EXPAND, MATCH_REGISTER128, MATCH_MEMORY, MATCH_NIL, &CCodeGen_x86::Emit_Md_Expand_RegMem },
952 { OP_MD_EXPAND, MATCH_REGISTER128, MATCH_CONSTANT, MATCH_NIL, &CCodeGen_x86::Emit_Md_Expand_RegCst },
953 { OP_MD_EXPAND, MATCH_MEMORY128, MATCH_REGISTER, MATCH_NIL, &CCodeGen_x86::Emit_Md_Expand_MemReg },
954 { OP_MD_EXPAND, MATCH_MEMORY128, MATCH_MEMORY, MATCH_NIL, &CCodeGen_x86::Emit_Md_Expand_MemMem },
955 { OP_MD_EXPAND, MATCH_MEMORY128, MATCH_CONSTANT, MATCH_NIL, &CCodeGen_x86::Emit_Md_Expand_MemCst },
956
957 { OP_MD_PACK_HB, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_VARIABLE128, &CCodeGen_x86::Emit_Md_PackHB_VarVarVar, },
958 { OP_MD_PACK_WH, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_VARIABLE128, &CCodeGen_x86::Emit_Md_PackWH_VarVarVar, },
959
960 MD_CONST_MATCHERS_3OPS_REV(OP_MD_UNPACK_LOWER_BH, MDOP_UNPACK_LOWER_BH)
961 MD_CONST_MATCHERS_3OPS_REV(OP_MD_UNPACK_LOWER_HW, MDOP_UNPACK_LOWER_HW)
962 MD_CONST_MATCHERS_3OPS_REV(OP_MD_UNPACK_LOWER_WD, MDOP_UNPACK_LOWER_WD)
963
964 MD_CONST_MATCHERS_3OPS_REV(OP_MD_UNPACK_UPPER_BH, MDOP_UNPACK_UPPER_BH)
965 MD_CONST_MATCHERS_3OPS_REV(OP_MD_UNPACK_UPPER_HW, MDOP_UNPACK_UPPER_HW)
966 MD_CONST_MATCHERS_3OPS_REV(OP_MD_UNPACK_UPPER_WD, MDOP_UNPACK_UPPER_WD)
967
968 MD_CONST_MATCHERS_3OPS(OP_MD_ADD_S, MDOP_ADDS)
969 MD_CONST_MATCHERS_3OPS(OP_MD_SUB_S, MDOP_SUBS)
970 MD_CONST_MATCHERS_3OPS(OP_MD_MUL_S, MDOP_MULS)
971 MD_CONST_MATCHERS_3OPS(OP_MD_DIV_S, MDOP_DIVS)
972
973 MD_CONST_MATCHERS_3OPS(OP_MD_MIN_S, MDOP_MINS)
974 MD_CONST_MATCHERS_3OPS(OP_MD_MAX_S, MDOP_MAXS)
975
976 MD_CONST_MATCHERS_SINGLEOP(OP_MD_ABS_S, MDOP_ABS)
977 MD_CONST_MATCHERS_SINGLEOP(OP_MD_NOT, MDOP_NOT)
978
979 { OP_MD_ISNEGATIVE, MATCH_REGISTER, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Md_GetFlag_RegVar<MDOP_ISNEGATIVE> },
980 { OP_MD_ISNEGATIVE, MATCH_MEMORY, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Md_GetFlag_MemVar<MDOP_ISNEGATIVE> },
981
982 { OP_MD_ISZERO, MATCH_REGISTER, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Md_GetFlag_RegVar<MDOP_ISZERO> },
983 { OP_MD_ISZERO, MATCH_MEMORY, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Md_GetFlag_MemVar<MDOP_ISZERO> },
984
985 MD_CONST_MATCHERS_2OPS(OP_MD_TOWORD_TRUNCATE, MDOP_TOWORD_TRUNCATE)
986 MD_CONST_MATCHERS_2OPS(OP_MD_TOSINGLE, MDOP_TOSINGLE)
987
988 { OP_MOV, MATCH_REGISTER128, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Md_Mov_RegVar, },
989 { OP_MOV, MATCH_MEMORY128, MATCH_REGISTER128, MATCH_NIL, &CCodeGen_x86::Emit_Md_Mov_MemReg },
990 { OP_MOV, MATCH_MEMORY128, MATCH_MEMORY128, MATCH_NIL, &CCodeGen_x86::Emit_Md_Mov_MemMem },
991 { OP_MD_MOV_MASKED, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_VARIABLE128, &CCodeGen_x86::Emit_Md_MovMasked_VarVarVar },
992
993 { OP_MERGETO256, MATCH_MEMORY256, MATCH_VARIABLE128, MATCH_VARIABLE128, &CCodeGen_x86::Emit_MergeTo256_MemVarVar },
994
995 { OP_MOV, MATCH_NIL, MATCH_NIL, MATCH_NIL, NULL },
996 };
997
998 CCodeGen_x86::CONSTMATCHER CCodeGen_x86::g_mdMinMaxWConstMatchers[] =
999 {
1000 { OP_MD_MIN_W, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_VARIABLE128, &CCodeGen_x86::Emit_Md_MinW_VarVarVar },
1001 { OP_MD_MAX_W, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_VARIABLE128, &CCodeGen_x86::Emit_Md_MaxW_VarVarVar },
1002
1003 { OP_MOV, MATCH_NIL, MATCH_NIL, MATCH_NIL, nullptr },
1004 };
1005
1006 CCodeGen_x86::CONSTMATCHER CCodeGen_x86::g_mdMinMaxWSse41ConstMatchers[] =
1007 {
1008 MD_CONST_MATCHERS_3OPS(OP_MD_MIN_W, MDOP_MINW)
1009 MD_CONST_MATCHERS_3OPS(OP_MD_MAX_W, MDOP_MAXW)
1010
1011 { OP_MOV, MATCH_NIL, MATCH_NIL, MATCH_NIL, nullptr },
1012 };
1013