1 #include "Jitter_CodeGen_x86.h"
2
3 using namespace Jitter;
4
5 const LITERAL128 CCodeGen_x86::g_makeSzShufflePattern = { 0x00020406080A0C0E, 0x8080808080808080 };
6
MakeRelative128SymbolElementAddress(CSymbol * symbol,unsigned int elementIdx)7 CX86Assembler::CAddress CCodeGen_x86::MakeRelative128SymbolElementAddress(CSymbol* symbol, unsigned int elementIdx)
8 {
9 assert(symbol->m_type == SYM_RELATIVE128);
10 assert((symbol->m_valueLow & 0xF) == 0);
11 return CX86Assembler::MakeIndRegOffAddress(CX86Assembler::rBP, symbol->m_valueLow + (elementIdx * 4));
12 }
13
MakeTemporary128SymbolElementAddress(CSymbol * symbol,unsigned int elementIdx)14 CX86Assembler::CAddress CCodeGen_x86::MakeTemporary128SymbolElementAddress(CSymbol* symbol, unsigned int elementIdx)
15 {
16 assert(symbol->m_type == SYM_TEMPORARY128);
17 // assert(((symbol->m_stackLocation + m_stackLevel) & 0xF) == 0);
18 return CX86Assembler::MakeIndRegOffAddress(CX86Assembler::rSP, symbol->m_stackLocation + m_stackLevel + (elementIdx * 4));
19 }
20
MakeTemporary256SymbolElementAddress(CSymbol * symbol,unsigned int elementIdx)21 CX86Assembler::CAddress CCodeGen_x86::MakeTemporary256SymbolElementAddress(CSymbol* symbol, unsigned int elementIdx)
22 {
23 assert(symbol->m_type == SYM_TEMPORARY256);
24 assert(((symbol->m_stackLocation + m_stackLevel) & 0x1F) == 0);
25 return CX86Assembler::MakeIndRegOffAddress(CX86Assembler::rSP, symbol->m_stackLocation + m_stackLevel + elementIdx);
26 }
27
MakeVariable128SymbolAddress(CSymbol * symbol)28 CX86Assembler::CAddress CCodeGen_x86::MakeVariable128SymbolAddress(CSymbol* symbol)
29 {
30 switch(symbol->m_type)
31 {
32 case SYM_REGISTER128:
33 return CX86Assembler::MakeXmmRegisterAddress(m_mdRegisters[symbol->m_valueLow]);
34 break;
35 case SYM_RELATIVE128:
36 return MakeRelative128SymbolElementAddress(symbol, 0);
37 break;
38 case SYM_TEMPORARY128:
39 return MakeTemporary128SymbolElementAddress(symbol, 0);
40 break;
41 default:
42 throw std::exception();
43 break;
44 }
45 }
46
MakeMemory128SymbolAddress(CSymbol * symbol)47 CX86Assembler::CAddress CCodeGen_x86::MakeMemory128SymbolAddress(CSymbol* symbol)
48 {
49 switch(symbol->m_type)
50 {
51 case SYM_RELATIVE128:
52 return MakeRelative128SymbolElementAddress(symbol, 0);
53 break;
54 case SYM_TEMPORARY128:
55 return MakeTemporary128SymbolElementAddress(symbol, 0);
56 break;
57 default:
58 throw std::exception();
59 break;
60 }
61 }
62
MakeMemory128SymbolElementAddress(CSymbol * symbol,unsigned int elementIdx)63 CX86Assembler::CAddress CCodeGen_x86::MakeMemory128SymbolElementAddress(CSymbol* symbol, unsigned int elementIdx)
64 {
65 switch(symbol->m_type)
66 {
67 case SYM_RELATIVE128:
68 return MakeRelative128SymbolElementAddress(symbol, elementIdx);
69 break;
70 case SYM_TEMPORARY128:
71 return MakeTemporary128SymbolElementAddress(symbol, elementIdx);
72 break;
73 default:
74 throw std::exception();
75 break;
76 }
77 }
78
79 template <typename MDOP>
Emit_Md_RegVar(const STATEMENT & statement)80 void CCodeGen_x86::Emit_Md_RegVar(const STATEMENT& statement)
81 {
82 auto dst = statement.dst->GetSymbol().get();
83 auto src1 = statement.src1->GetSymbol().get();
84
85 ((m_assembler).*(MDOP::OpVo()))(m_mdRegisters[dst->m_valueLow], MakeVariable128SymbolAddress(src1));
86 }
87
88 template <typename MDOP>
Emit_Md_MemVar(const STATEMENT & statement)89 void CCodeGen_x86::Emit_Md_MemVar(const STATEMENT& statement)
90 {
91 auto dst = statement.dst->GetSymbol().get();
92 auto src1 = statement.src1->GetSymbol().get();
93
94 auto dstRegister = CX86Assembler::xMM0;
95
96 ((m_assembler).*(MDOP::OpVo()))(dstRegister, MakeVariable128SymbolAddress(src1));
97 m_assembler.MovapsVo(MakeMemory128SymbolAddress(dst), dstRegister);
98 }
99
100 template <typename MDOP>
Emit_Md_RegRegReg(const STATEMENT & statement)101 void CCodeGen_x86::Emit_Md_RegRegReg(const STATEMENT& statement)
102 {
103 auto dst = statement.dst->GetSymbol().get();
104 auto src1 = statement.src1->GetSymbol().get();
105 auto src2 = statement.src2->GetSymbol().get();
106
107 if(dst->Equals(src1))
108 {
109 ((m_assembler).*(MDOP::OpVo()))(m_mdRegisters[dst->m_valueLow],
110 CX86Assembler::MakeXmmRegisterAddress(m_mdRegisters[src2->m_valueLow]));
111 }
112 else
113 {
114 auto src2Register = m_mdRegisters[src2->m_valueLow];
115
116 if(dst->Equals(src2))
117 {
118 m_assembler.MovapsVo(CX86Assembler::xMM0, CX86Assembler::MakeXmmRegisterAddress(m_mdRegisters[src2->m_valueLow]));
119 src2Register = CX86Assembler::xMM0;
120 }
121
122 m_assembler.MovapsVo(m_mdRegisters[dst->m_valueLow], CX86Assembler::MakeXmmRegisterAddress(m_mdRegisters[src1->m_valueLow]));
123 ((m_assembler).*(MDOP::OpVo()))(m_mdRegisters[dst->m_valueLow], CX86Assembler::MakeXmmRegisterAddress(src2Register));
124 }
125 }
126
127 template <typename MDOP>
Emit_Md_RegMemReg(const STATEMENT & statement)128 void CCodeGen_x86::Emit_Md_RegMemReg(const STATEMENT& statement)
129 {
130 auto dst = statement.dst->GetSymbol().get();
131 auto src1 = statement.src1->GetSymbol().get();
132 auto src2 = statement.src2->GetSymbol().get();
133
134 auto dstRegister = m_mdRegisters[dst->m_valueLow];
135 auto src2Register = m_mdRegisters[src2->m_valueLow];
136
137 if(dst->Equals(src2))
138 {
139 m_assembler.MovapsVo(CX86Assembler::xMM0, CX86Assembler::MakeXmmRegisterAddress(src2Register));
140 src2Register = CX86Assembler::xMM0;
141 }
142
143 m_assembler.MovapsVo(dstRegister, MakeVariable128SymbolAddress(src1));
144 ((m_assembler).*(MDOP::OpVo()))(dstRegister, CX86Assembler::MakeXmmRegisterAddress(src2Register));
145 }
146
147 template <typename MDOP>
Emit_Md_RegVarVar(const STATEMENT & statement)148 void CCodeGen_x86::Emit_Md_RegVarVar(const STATEMENT& statement)
149 {
150 auto dst = statement.dst->GetSymbol().get();
151 auto src1 = statement.src1->GetSymbol().get();
152 auto src2 = statement.src2->GetSymbol().get();
153
154 //If we get in here, it must absolutely mean that the second source isn't a register
155 //Otherwise, some of the assumuptions done below will be wrong (dst mustn't be equal to src2)
156 assert(src2->m_type != SYM_REGISTER128);
157
158 auto dstRegister = m_mdRegisters[dst->m_valueLow];
159
160 if(!dst->Equals(src1))
161 {
162 m_assembler.MovapsVo(dstRegister, MakeVariable128SymbolAddress(src1));
163 }
164
165 ((m_assembler).*(MDOP::OpVo()))(dstRegister, MakeVariable128SymbolAddress(src2));
166 }
167
168 template <typename MDOP>
Emit_Md_MemVarVar(const STATEMENT & statement)169 void CCodeGen_x86::Emit_Md_MemVarVar(const STATEMENT& statement)
170 {
171 auto dst = statement.dst->GetSymbol().get();
172 auto src1 = statement.src1->GetSymbol().get();
173 auto src2 = statement.src2->GetSymbol().get();
174
175 auto dstRegister = CX86Assembler::xMM0;
176
177 m_assembler.MovapsVo(dstRegister, MakeVariable128SymbolAddress(src1));
178 ((m_assembler).*(MDOP::OpVo()))(dstRegister, MakeVariable128SymbolAddress(src2));
179 m_assembler.MovapsVo(MakeMemory128SymbolAddress(dst), dstRegister);
180 }
181
182 template <typename MDOP>
Emit_Md_VarVarVarRev(const STATEMENT & statement)183 void CCodeGen_x86::Emit_Md_VarVarVarRev(const STATEMENT& statement)
184 {
185 //TODO: This could be improved further, but we might want
186 //to reverse the operands somewhere else as to not
187 //copy paste the code from the "non-reversed" path
188
189 auto dst = statement.dst->GetSymbol().get();
190 auto src1 = statement.src1->GetSymbol().get();
191 auto src2 = statement.src2->GetSymbol().get();
192
193 auto dstRegister = CX86Assembler::xMM0;
194
195 m_assembler.MovapsVo(dstRegister, MakeVariable128SymbolAddress(src2));
196 ((m_assembler).*(MDOP::OpVo()))(dstRegister, MakeVariable128SymbolAddress(src1));
197 m_assembler.MovapsVo(MakeVariable128SymbolAddress(dst), dstRegister);
198 }
199
200 template <typename MDOPSHIFT, uint8 SAMASK>
Emit_Md_Shift_RegVarCst(const STATEMENT & statement)201 void CCodeGen_x86::Emit_Md_Shift_RegVarCst(const STATEMENT& statement)
202 {
203 auto dst = statement.dst->GetSymbol().get();
204 auto src1 = statement.src1->GetSymbol().get();
205 auto src2 = statement.src2->GetSymbol().get();
206
207 auto dstRegister = m_mdRegisters[dst->m_valueLow];
208
209 if(!dst->Equals(src1))
210 {
211 m_assembler.MovapsVo(dstRegister, MakeVariable128SymbolAddress(src1));
212 }
213
214 ((m_assembler).*(MDOPSHIFT::OpVo()))(dstRegister, static_cast<uint8>(src2->m_valueLow & SAMASK));
215 }
216
217 template <typename MDOPSHIFT, uint8 SAMASK>
Emit_Md_Shift_MemVarCst(const STATEMENT & statement)218 void CCodeGen_x86::Emit_Md_Shift_MemVarCst(const STATEMENT& statement)
219 {
220 auto dst = statement.dst->GetSymbol().get();
221 auto src1 = statement.src1->GetSymbol().get();
222 auto src2 = statement.src2->GetSymbol().get();
223
224 auto tmpRegister = CX86Assembler::xMM0;
225
226 m_assembler.MovapsVo(tmpRegister, MakeVariable128SymbolAddress(src1));
227 ((m_assembler).*(MDOPSHIFT::OpVo()))(tmpRegister, static_cast<uint8>(src2->m_valueLow & SAMASK));
228 m_assembler.MovapsVo(MakeMemory128SymbolAddress(dst), tmpRegister);
229 }
230
231 template <typename MDOPSINGLEOP>
Emit_Md_SingleOp_RegVar(const STATEMENT & statement)232 void CCodeGen_x86::Emit_Md_SingleOp_RegVar(const STATEMENT& statement)
233 {
234 auto dst = statement.dst->GetSymbol().get();
235 auto src1 = statement.src1->GetSymbol().get();
236
237 auto resultRegister = m_mdRegisters[dst->m_valueLow];
238
239 if(!dst->Equals(src1))
240 {
241 m_assembler.MovapsVo(resultRegister, MakeVariable128SymbolAddress(src1));
242 }
243
244 ((*this).*(MDOPSINGLEOP::OpVr()))(resultRegister);
245 }
246
247 template <typename MDOPSINGLEOP>
Emit_Md_SingleOp_MemVar(const STATEMENT & statement)248 void CCodeGen_x86::Emit_Md_SingleOp_MemVar(const STATEMENT& statement)
249 {
250 auto dst = statement.dst->GetSymbol().get();
251 auto src1 = statement.src1->GetSymbol().get();
252
253 auto resultRegister = CX86Assembler::xMM0;
254
255 m_assembler.MovapsVo(resultRegister, MakeVariable128SymbolAddress(src1));
256 ((*this).*(MDOPSINGLEOP::OpVr()))(resultRegister);
257 m_assembler.MovapsVo(MakeMemory128SymbolAddress(dst), resultRegister);
258 }
259
Emit_Md_AddSSW_VarVarVar(const STATEMENT & statement)260 void CCodeGen_x86::Emit_Md_AddSSW_VarVarVar(const STATEMENT& statement)
261 {
262 auto dst = statement.dst->GetSymbol().get();
263 auto src1 = statement.src1->GetSymbol().get();
264 auto src2 = statement.src2->GetSymbol().get();
265
266 auto uxRegister = CX86Assembler::xMM0;
267 auto uyRegister = CX86Assembler::xMM1;
268 auto resRegister = CX86Assembler::xMM2;
269 auto cstRegister = CX86Assembler::xMM3;
270
271 // This is based on code from http://locklessinc.com/articles/sat_arithmetic/ modified to work without cmovns
272 // s32b sat_adds32b(s32b x, s32b y)
273 // {
274 // u32b ux = x;
275 // u32b uy = y;
276 // u32b res = ux + uy;
277 //
278 // /* Calculate overflowed result. (Don't change the sign bit of ux) */
279 // ux = (ux >> 31) + INT_MAX;
280 //
281 // s32b sign = (s32b) ((ux ^ uy) | ~(uy ^ res))
282 // sign >>= 31; /* Arithmetic shift, either 0 or ~0*/
283 // res = (res & sign) | (ux & ~sign);
284 //
285 // return res;
286 // }
287
288 //ux = src1
289 //uy = src2
290 m_assembler.MovapsVo(uxRegister, MakeVariable128SymbolAddress(src1));
291 m_assembler.MovapsVo(uyRegister, MakeVariable128SymbolAddress(src2));
292
293 //res = ux + uy
294 m_assembler.MovapsVo(resRegister, CX86Assembler::MakeXmmRegisterAddress(uxRegister));
295 m_assembler.PadddVo(resRegister, CX86Assembler::MakeXmmRegisterAddress(uyRegister));
296
297 //cst = 0x7FFFFFFF
298 m_assembler.PcmpeqdVo(cstRegister, CX86Assembler::MakeXmmRegisterAddress(cstRegister));
299 m_assembler.PsrldVo(cstRegister, 1);
300
301 //ux = (ux >> 31)
302 m_assembler.PsrldVo(uxRegister, 31);
303
304 //ux += 0x7FFFFFFF
305 m_assembler.PadddVo(uxRegister, CX86Assembler::MakeXmmRegisterAddress(cstRegister));
306
307 //uy = ~(uy ^ res)
308 //------
309 //uy ^ res
310 m_assembler.PxorVo(uyRegister, CX86Assembler::MakeXmmRegisterAddress(resRegister));
311
312 //~(uy ^ res)
313 m_assembler.PcmpeqdVo(cstRegister, CX86Assembler::MakeXmmRegisterAddress(cstRegister));
314 m_assembler.PxorVo(uyRegister, CX86Assembler::MakeXmmRegisterAddress(cstRegister));
315
316 //cst = ux ^ uy (reloading uy from src2 because we don't have any registers available)
317 m_assembler.MovapsVo(cstRegister, CX86Assembler::MakeXmmRegisterAddress(uxRegister));
318 m_assembler.PxorVo(cstRegister, MakeVariable128SymbolAddress(src2));
319
320 //uy = ((ux ^ uy) | ~(uy ^ res)) >> 31; (signed operation)
321 m_assembler.PorVo(uyRegister, CX86Assembler::MakeXmmRegisterAddress(cstRegister));
322 m_assembler.PsradVo(uyRegister, 31);
323
324 //res = (res & uy) (uy is the sign value)
325 m_assembler.PandVo(resRegister, CX86Assembler::MakeXmmRegisterAddress(uyRegister));
326
327 //ux = (ux & ~uy)
328 //------
329 //~uy
330 m_assembler.PcmpeqdVo(cstRegister, CX86Assembler::MakeXmmRegisterAddress(cstRegister));
331 m_assembler.PxorVo(uyRegister, CX86Assembler::MakeXmmRegisterAddress(cstRegister));
332
333 //ux & ~uy
334 m_assembler.PandVo(uxRegister, CX86Assembler::MakeXmmRegisterAddress(uyRegister));
335
336 //res = (res & uy) | (ux & ~uy)
337 m_assembler.PorVo(resRegister, CX86Assembler::MakeXmmRegisterAddress(uxRegister));
338
339 //Copy final result
340 m_assembler.MovapsVo(MakeVariable128SymbolAddress(dst), resRegister);
341 }
342
Emit_Md_SubSSW_VarVarVar(const STATEMENT & statement)343 void CCodeGen_x86::Emit_Md_SubSSW_VarVarVar(const STATEMENT& statement)
344 {
345 auto dst = statement.dst->GetSymbol().get();
346 auto src1 = statement.src1->GetSymbol().get();
347 auto src2 = statement.src2->GetSymbol().get();
348
349 auto uxRegister = CX86Assembler::xMM0;
350 auto uyRegister = CX86Assembler::xMM1;
351 auto resRegister = CX86Assembler::xMM2;
352 auto cstRegister = CX86Assembler::xMM3;
353
354 // This is based on code from http://locklessinc.com/articles/sat_arithmetic/ modified to work without cmovns
355 // s32b sat_subs32b(s32b x, s32b y)
356 // {
357 // u32b ux = x;
358 // u32b uy = y;
359 // u32b res = ux - uy;
360 //
361 // ux = (ux >> 31) + INT_MAX;
362 //
363 // s32b sign = (s32b) ((ux ^ uy) & (ux ^ res))
364 // sign >>= 31; /* Arithmetic shift, either 0 or ~0*/
365 // res = (res & ~sign) | (ux & sign);
366 //
367 // return res;
368 // }
369
370 //ux = src1
371 //uy = src2
372 m_assembler.MovdqaVo(uxRegister, MakeVariable128SymbolAddress(src1));
373 m_assembler.MovdqaVo(uyRegister, MakeVariable128SymbolAddress(src2));
374
375 //res = ux - uy
376 m_assembler.MovdqaVo(resRegister, CX86Assembler::MakeXmmRegisterAddress(uxRegister));
377 m_assembler.PsubdVo(resRegister, CX86Assembler::MakeXmmRegisterAddress(uyRegister));
378
379 //cst = 0x7FFFFFFF
380 m_assembler.PcmpeqdVo(cstRegister, CX86Assembler::MakeXmmRegisterAddress(cstRegister));
381 m_assembler.PsrldVo(cstRegister, 1);
382
383 //ux = (ux >> 31)
384 m_assembler.PsrldVo(uxRegister, 31);
385
386 //ux += 0x7FFFFFFF
387 m_assembler.PadddVo(uxRegister, CX86Assembler::MakeXmmRegisterAddress(cstRegister));
388
389 //uy = (ux ^ res)
390 //------
391 //ux ^ res
392 m_assembler.MovdqaVo(uyRegister, CX86Assembler::MakeXmmRegisterAddress(uxRegister));
393 m_assembler.PxorVo(uyRegister, CX86Assembler::MakeXmmRegisterAddress(resRegister));
394
395 //cst = ux ^ uy (reloading uy from src2 because we don't have any registers available)
396 m_assembler.MovdqaVo(cstRegister, CX86Assembler::MakeXmmRegisterAddress(uxRegister));
397 m_assembler.PxorVo(cstRegister, MakeVariable128SymbolAddress(src2));
398
399 //uy = ((ux ^ uy) & (ux ^ res)) >> 31; (signed operation)
400 m_assembler.PandVo(uyRegister, CX86Assembler::MakeXmmRegisterAddress(cstRegister));
401 m_assembler.PsradVo(uyRegister, 31);
402
403 //ux = (ux & uy) (uy is the sign value)
404 m_assembler.PandVo(uxRegister, CX86Assembler::MakeXmmRegisterAddress(uyRegister));
405
406 //res = (res & ~uy)
407 //------
408 //~uy
409 m_assembler.PcmpeqdVo(cstRegister, CX86Assembler::MakeXmmRegisterAddress(cstRegister));
410 m_assembler.PxorVo(uyRegister, CX86Assembler::MakeXmmRegisterAddress(cstRegister));
411
412 //res & ~uy
413 m_assembler.PandVo(resRegister, CX86Assembler::MakeXmmRegisterAddress(uyRegister));
414
415 //res = (res & ~uy) | (ux & uy)
416 m_assembler.PorVo(resRegister, CX86Assembler::MakeXmmRegisterAddress(uxRegister));
417
418 //Copy final result
419 m_assembler.MovdqaVo(MakeVariable128SymbolAddress(dst), resRegister);
420 }
421
Emit_Md_AddUSW_VarVarVar(const STATEMENT & statement)422 void CCodeGen_x86::Emit_Md_AddUSW_VarVarVar(const STATEMENT& statement)
423 {
424 auto dst = statement.dst->GetSymbol().get();
425 auto src1 = statement.src1->GetSymbol().get();
426 auto src2 = statement.src2->GetSymbol().get();
427
428 auto xRegister = CX86Assembler::xMM0;
429 auto resRegister = CX86Assembler::xMM1;
430 auto tmpRegister = CX86Assembler::xMM2;
431 auto tmp2Register = CX86Assembler::xMM3;
432
433 // This is based on code from http://locklessinc.com/articles/sat_arithmetic/
434 // u32b sat_addu32b(u32b x, u32b y)
435 // {
436 // u32b res = x + y;
437 // res |= -(res < x);
438 //
439 // return res;
440 // }
441
442 m_assembler.MovdqaVo(xRegister, MakeVariable128SymbolAddress(src1));
443 m_assembler.MovdqaVo(resRegister, CX86Assembler::MakeXmmRegisterAddress(xRegister));
444 m_assembler.PadddVo(resRegister, MakeVariable128SymbolAddress(src2));
445
446 //-(res < x)
447 //PCMPGT will compare two signed integers, but we want unsigned comparison
448 //Thus, we add 0x80000000 to both values to "convert" them to signed
449 m_assembler.PcmpeqdVo(tmpRegister, CX86Assembler::MakeXmmRegisterAddress(tmpRegister));
450 m_assembler.PslldVo(tmpRegister, 31);
451 m_assembler.PadddVo(tmpRegister, CX86Assembler::MakeXmmRegisterAddress(resRegister));
452
453 m_assembler.PcmpeqdVo(tmp2Register, CX86Assembler::MakeXmmRegisterAddress(tmp2Register));
454 m_assembler.PslldVo(tmp2Register, 31);
455 m_assembler.PadddVo(tmp2Register, CX86Assembler::MakeXmmRegisterAddress(xRegister));
456
457 m_assembler.PcmpgtdVo(tmp2Register, CX86Assembler::MakeXmmRegisterAddress(tmpRegister));
458
459 //res |= -(res < x)
460 m_assembler.PorVo(resRegister, CX86Assembler::MakeXmmRegisterAddress(tmp2Register));
461
462 //Store result
463 m_assembler.MovdqaVo(MakeVariable128SymbolAddress(dst), resRegister);
464 }
465
Emit_Md_SubUSW_VarVarVar(const STATEMENT & statement)466 void CCodeGen_x86::Emit_Md_SubUSW_VarVarVar(const STATEMENT& statement)
467 {
468 auto dst = statement.dst->GetSymbol().get();
469 auto src1 = statement.src1->GetSymbol().get();
470 auto src2 = statement.src2->GetSymbol().get();
471
472 auto xRegister = CX86Assembler::xMM0;
473 auto resRegister = CX86Assembler::xMM1;
474 auto tmpRegister = CX86Assembler::xMM2;
475 auto tmp2Register = CX86Assembler::xMM3;
476
477 // This is based on code from http://locklessinc.com/articles/sat_arithmetic/
478 // u32b sat_subu32b(u32b x, u32b y)
479 // {
480 // u32b res = x - y;
481 // res &= -(res <= x);
482 //
483 // return res;
484 // }
485
486 m_assembler.MovdqaVo(xRegister, MakeVariable128SymbolAddress(src1));
487 m_assembler.MovdqaVo(resRegister, CX86Assembler::MakeXmmRegisterAddress(xRegister));
488 m_assembler.PsubdVo(resRegister, MakeVariable128SymbolAddress(src2));
489
490 //-(res <= x)
491 //PCMPGT will compare two signed integers, but we want unsigned comparison
492 //Thus, we add 0x80000000 to both values to "convert" them to signed
493 m_assembler.PcmpeqdVo(tmpRegister, CX86Assembler::MakeXmmRegisterAddress(tmpRegister));
494 m_assembler.PslldVo(tmpRegister, 31);
495 m_assembler.PadddVo(tmpRegister, CX86Assembler::MakeXmmRegisterAddress(resRegister));
496
497 m_assembler.PcmpeqdVo(tmp2Register, CX86Assembler::MakeXmmRegisterAddress(tmp2Register));
498 m_assembler.PslldVo(tmp2Register, 31);
499 m_assembler.PadddVo(tmp2Register, CX86Assembler::MakeXmmRegisterAddress(xRegister));
500
501 m_assembler.MovdqaVo(xRegister, CX86Assembler::MakeXmmRegisterAddress(tmp2Register));
502
503 m_assembler.PcmpeqdVo(xRegister, CX86Assembler::MakeXmmRegisterAddress(tmpRegister));
504 m_assembler.PcmpgtdVo(tmp2Register, CX86Assembler::MakeXmmRegisterAddress(tmpRegister));
505 m_assembler.PorVo(tmp2Register, CX86Assembler::MakeXmmRegisterAddress(xRegister));
506
507 //res &= -(res <= x);
508 m_assembler.PandVo(resRegister, CX86Assembler::MakeXmmRegisterAddress(tmp2Register));
509
510 //Store result
511 m_assembler.MovdqaVo(MakeVariable128SymbolAddress(dst), resRegister);
512 }
513
Emit_Md_MinW_VarVarVar(const STATEMENT & statement)514 void CCodeGen_x86::Emit_Md_MinW_VarVarVar(const STATEMENT& statement)
515 {
516 auto dst = statement.dst->GetSymbol().get();
517 auto src1 = statement.src1->GetSymbol().get();
518 auto src2 = statement.src2->GetSymbol().get();
519
520 auto src1Register = CX86Assembler::xMM0;
521 auto src2Register = CX86Assembler::xMM1;
522 auto mask1Register = CX86Assembler::xMM2;
523 auto mask2Register = CX86Assembler::xMM3;
524
525 m_assembler.MovdqaVo(src1Register, MakeVariable128SymbolAddress(src1));
526 m_assembler.MovdqaVo(src2Register, MakeVariable128SymbolAddress(src2));
527
528 m_assembler.MovdqaVo(mask1Register, CX86Assembler::MakeXmmRegisterAddress(src2Register));
529 m_assembler.PcmpgtdVo(mask1Register, CX86Assembler::MakeXmmRegisterAddress(src1Register));
530 m_assembler.MovdqaVo(mask2Register, CX86Assembler::MakeXmmRegisterAddress(mask1Register));
531
532 m_assembler.PandVo(mask1Register, CX86Assembler::MakeXmmRegisterAddress(src1Register));
533 m_assembler.PandnVo(mask2Register, CX86Assembler::MakeXmmRegisterAddress(src2Register));
534 m_assembler.PorVo(mask1Register, CX86Assembler::MakeXmmRegisterAddress(mask2Register));
535
536 m_assembler.MovdqaVo(MakeVariable128SymbolAddress(dst), mask1Register);
537 }
538
Emit_Md_MaxW_VarVarVar(const STATEMENT & statement)539 void CCodeGen_x86::Emit_Md_MaxW_VarVarVar(const STATEMENT& statement)
540 {
541 auto dst = statement.dst->GetSymbol().get();
542 auto src1 = statement.src1->GetSymbol().get();
543 auto src2 = statement.src2->GetSymbol().get();
544
545 auto src1Register = CX86Assembler::xMM0;
546 auto src2Register = CX86Assembler::xMM1;
547 auto mask1Register = CX86Assembler::xMM2;
548 auto mask2Register = CX86Assembler::xMM3;
549
550 m_assembler.MovdqaVo(src1Register, MakeVariable128SymbolAddress(src1));
551 m_assembler.MovdqaVo(src2Register, MakeVariable128SymbolAddress(src2));
552
553 m_assembler.MovdqaVo(mask1Register, CX86Assembler::MakeXmmRegisterAddress(src1Register));
554 m_assembler.PcmpgtdVo(mask1Register, CX86Assembler::MakeXmmRegisterAddress(src2Register));
555 m_assembler.MovdqaVo(mask2Register, CX86Assembler::MakeXmmRegisterAddress(mask1Register));
556
557 m_assembler.PandVo(mask1Register, CX86Assembler::MakeXmmRegisterAddress(src1Register));
558 m_assembler.PandnVo(mask2Register, CX86Assembler::MakeXmmRegisterAddress(src2Register));
559 m_assembler.PorVo(mask1Register, CX86Assembler::MakeXmmRegisterAddress(mask2Register));
560
561 m_assembler.MovdqaVo(MakeVariable128SymbolAddress(dst), mask1Register);
562 }
563
Emit_Md_PackHB_VarVarVar(const STATEMENT & statement)564 void CCodeGen_x86::Emit_Md_PackHB_VarVarVar(const STATEMENT& statement)
565 {
566 auto dst = statement.dst->GetSymbol().get();
567 auto src1 = statement.src1->GetSymbol().get();
568 auto src2 = statement.src2->GetSymbol().get();
569
570 auto resultRegister = CX86Assembler::xMM0;
571 auto tempRegister = CX86Assembler::xMM1;
572 auto maskRegister = CX86Assembler::xMM2;
573
574 m_assembler.MovapsVo(resultRegister, MakeVariable128SymbolAddress(src2));
575 m_assembler.MovapsVo(tempRegister, MakeVariable128SymbolAddress(src1));
576
577 //Generate mask (0x00FF x8)
578 m_assembler.PcmpeqdVo(maskRegister, CX86Assembler::MakeXmmRegisterAddress(maskRegister));
579 m_assembler.PsrlwVo(maskRegister, 0x08);
580
581 //Mask both operands
582 m_assembler.PandVo(resultRegister, CX86Assembler::MakeXmmRegisterAddress(maskRegister));
583 m_assembler.PandVo(tempRegister, CX86Assembler::MakeXmmRegisterAddress(maskRegister));
584
585 //Pack
586 m_assembler.PackuswbVo(resultRegister, CX86Assembler::MakeXmmRegisterAddress(tempRegister));
587
588 m_assembler.MovapsVo(MakeVariable128SymbolAddress(dst), resultRegister);
589 }
590
Emit_Md_PackWH_VarVarVar(const STATEMENT & statement)591 void CCodeGen_x86::Emit_Md_PackWH_VarVarVar(const STATEMENT& statement)
592 {
593 auto dst = statement.dst->GetSymbol().get();
594 auto src1 = statement.src1->GetSymbol().get();
595 auto src2 = statement.src2->GetSymbol().get();
596
597 auto resultRegister = CX86Assembler::xMM0;
598 auto tempRegister = CX86Assembler::xMM1;
599
600 m_assembler.MovapsVo(resultRegister, MakeVariable128SymbolAddress(src2));
601 m_assembler.MovapsVo(tempRegister, MakeVariable128SymbolAddress(src1));
602
603 //Sign extend the lower half word of our registers
604 m_assembler.PslldVo(resultRegister, 0x10);
605 m_assembler.PsradVo(resultRegister, 0x10);
606
607 m_assembler.PslldVo(tempRegister, 0x10);
608 m_assembler.PsradVo(tempRegister, 0x10);
609
610 //Pack
611 m_assembler.PackssdwVo(resultRegister, CX86Assembler::MakeXmmRegisterAddress(tempRegister));
612
613 m_assembler.MovapsVo(MakeVariable128SymbolAddress(dst), resultRegister);
614 }
615
Emit_Md_MovMasked_VarVarVar(const STATEMENT & statement)616 void CCodeGen_x86::Emit_Md_MovMasked_VarVarVar(const STATEMENT& statement)
617 {
618 auto dst = statement.dst->GetSymbol().get();
619 auto src1 = statement.src1->GetSymbol().get();
620 auto src2 = statement.src2->GetSymbol().get();
621 uint8 mask = static_cast<uint8>(statement.jmpCondition);
622
623 auto mask0Register = CX86Assembler::xMM0;
624 auto mask1Register = CX86Assembler::xMM1;
625
626 m_assembler.MovId(CX86Assembler::rAX, ~0);
627 m_assembler.MovdVo(mask0Register, CX86Assembler::MakeRegisterAddress(CX86Assembler::rAX));
628
629 //Generate shuffle selector
630 //0x00 -> gives us 0x00000000
631 //0x02 -> gives us 0xFFFFFFFF
632 uint8 shuffleSelector = 0;
633 for(unsigned int i = 0; i < 4; i++)
634 {
635 if(mask & (1 << i))
636 {
637 shuffleSelector |= (0x02) << (i * 2);
638 }
639 }
640
641 //mask0 -> proper mask
642 m_assembler.PshufdVo(mask0Register, CX86Assembler::MakeXmmRegisterAddress(mask0Register), shuffleSelector);
643
644 //mask1 -> mask inverse
645 m_assembler.PcmpeqdVo(mask1Register, CX86Assembler::MakeXmmRegisterAddress(mask1Register));
646 m_assembler.PxorVo(mask1Register, CX86Assembler::MakeXmmRegisterAddress(mask0Register));
647
648 //Generate result
649 m_assembler.PandVo(mask0Register, MakeVariable128SymbolAddress(src1));
650 m_assembler.PandVo(mask1Register, MakeVariable128SymbolAddress(src2));
651 m_assembler.PorVo(mask0Register, CX86Assembler::MakeXmmRegisterAddress(mask1Register));
652
653 m_assembler.MovdqaVo(MakeVariable128SymbolAddress(dst), mask0Register);
654 }
655
Emit_Md_MovMasked_Sse41_VarVarVar(const STATEMENT & statement)656 void CCodeGen_x86::Emit_Md_MovMasked_Sse41_VarVarVar(const STATEMENT& statement)
657 {
658 auto dst = statement.dst->GetSymbol().get();
659 auto src1 = statement.src1->GetSymbol().get();
660 auto src2 = statement.src2->GetSymbol().get();
661 uint8 mask = static_cast<uint8>(statement.jmpCondition);
662
663 //This could be improved if src1 and src2 are different
664 assert(dst->Equals(src1));
665
666 if(dst->IsRegister() && dst->Equals(src1))
667 {
668 m_assembler.BlendpsVo(m_mdRegisters[dst->m_valueLow], MakeVariable128SymbolAddress(src2), mask);
669 }
670 else
671 {
672 auto tempRegister = CX86Assembler::xMM0;
673 m_assembler.MovapsVo(tempRegister, MakeVariable128SymbolAddress(src1));
674 m_assembler.BlendpsVo(tempRegister, MakeVariable128SymbolAddress(src2), mask);
675 m_assembler.MovapsVo(MakeVariable128SymbolAddress(dst), tempRegister);
676 }
677 }
678
Emit_Md_Mov_RegVar(const STATEMENT & statement)679 void CCodeGen_x86::Emit_Md_Mov_RegVar(const STATEMENT& statement)
680 {
681 auto dst = statement.dst->GetSymbol().get();
682 auto src1 = statement.src1->GetSymbol().get();
683
684 m_assembler.MovapsVo(m_mdRegisters[dst->m_valueLow], MakeVariable128SymbolAddress(src1));
685 }
686
Emit_Md_Mov_MemReg(const STATEMENT & statement)687 void CCodeGen_x86::Emit_Md_Mov_MemReg(const STATEMENT& statement)
688 {
689 auto dst = statement.dst->GetSymbol().get();
690 auto src1 = statement.src1->GetSymbol().get();
691
692 m_assembler.MovapsVo(MakeMemory128SymbolAddress(dst), m_mdRegisters[src1->m_valueLow]);
693 }
694
Emit_Md_Mov_MemMem(const STATEMENT & statement)695 void CCodeGen_x86::Emit_Md_Mov_MemMem(const STATEMENT& statement)
696 {
697 CSymbol* dst = statement.dst->GetSymbol().get();
698 CSymbol* src1 = statement.src1->GetSymbol().get();
699
700 CX86Assembler::XMMREGISTER resultRegister = CX86Assembler::xMM0;
701
702 m_assembler.MovapsVo(resultRegister, MakeMemory128SymbolAddress(src1));
703 m_assembler.MovapsVo(MakeMemory128SymbolAddress(dst), resultRegister);
704 }
705
Emit_Md_Abs(CX86Assembler::XMMREGISTER dstRegister)706 void CCodeGen_x86::Emit_Md_Abs(CX86Assembler::XMMREGISTER dstRegister)
707 {
708 auto maskRegister = CX86Assembler::xMM1;
709
710 assert(dstRegister != maskRegister);
711
712 m_assembler.PcmpeqdVo(maskRegister, CX86Assembler::MakeXmmRegisterAddress(maskRegister));
713 m_assembler.PsrldVo(maskRegister, 1);
714 m_assembler.PandVo(dstRegister, CX86Assembler::MakeXmmRegisterAddress(maskRegister));
715 }
716
Emit_Md_Not(CX86Assembler::XMMREGISTER dstRegister)717 void CCodeGen_x86::Emit_Md_Not(CX86Assembler::XMMREGISTER dstRegister)
718 {
719 auto cstRegister = CX86Assembler::xMM1;
720
721 assert(dstRegister != cstRegister);
722
723 m_assembler.PcmpeqdVo(cstRegister, CX86Assembler::MakeXmmRegisterAddress(cstRegister));
724 m_assembler.PxorVo(dstRegister, CX86Assembler::MakeXmmRegisterAddress(cstRegister));
725 }
726
Emit_Md_MakeSz(CX86Assembler::XMMREGISTER dstRegister,const CX86Assembler::CAddress & srcAddress)727 void CCodeGen_x86::Emit_Md_MakeSz(CX86Assembler::XMMREGISTER dstRegister, const CX86Assembler::CAddress& srcAddress)
728 {
729 auto zeroRegister = CX86Assembler::xMM1;
730 assert(dstRegister != zeroRegister);
731
732 m_assembler.MovdqaVo(dstRegister, srcAddress);
733 m_assembler.PsradVo(dstRegister, 31);
734
735 m_assembler.PxorVo(zeroRegister, CX86Assembler::MakeXmmRegisterAddress(zeroRegister));
736 m_assembler.CmppsVo(zeroRegister, srcAddress, CX86Assembler::SSE_CMP_EQ);
737
738 m_assembler.PackssdwVo(dstRegister, CX86Assembler::MakeXmmRegisterAddress(zeroRegister));
739 }
740
Emit_Md_MakeSz_VarVar(const STATEMENT & statement)741 void CCodeGen_x86::Emit_Md_MakeSz_VarVar(const STATEMENT& statement)
742 {
743 auto dst = statement.dst->GetSymbol().get();
744 auto src1 = statement.src1->GetSymbol().get();
745
746 auto szRegister = CX86Assembler::xMM0;
747 auto tmpFlagRegister = CX86Assembler::rAX;
748 auto dstRegister = PrepareSymbolRegisterDef(dst, CX86Assembler::rDX);
749
750 Emit_Md_MakeSz(szRegister, MakeVariable128SymbolAddress(src1));
751
752 //Extract bits
753 m_assembler.PmovmskbVo(tmpFlagRegister, szRegister);
754
755 //Generate bit field
756 m_assembler.XorEd(dstRegister, CX86Assembler::MakeRegisterAddress(dstRegister));
757 for(unsigned int i = 0; i < 8; i++)
758 {
759 m_assembler.ShrEd(CX86Assembler::MakeRegisterAddress(tmpFlagRegister), 2);
760 m_assembler.RclEd(CX86Assembler::MakeRegisterAddress(dstRegister), 1);
761 }
762
763 CommitSymbolRegister(dst, dstRegister);
764 }
765
Emit_Md_MakeSz_Ssse3_VarVar(const STATEMENT & statement)766 void CCodeGen_x86::Emit_Md_MakeSz_Ssse3_VarVar(const STATEMENT& statement)
767 {
768 auto dst = statement.dst->GetSymbol().get();
769 auto src1 = statement.src1->GetSymbol().get();
770
771 auto szRegister = CX86Assembler::xMM0;
772 auto dstRegister = PrepareSymbolRegisterDef(dst, CX86Assembler::rDX);
773
774 Emit_Md_MakeSz(szRegister, MakeVariable128SymbolAddress(src1));
775
776 //Extract bits
777 m_assembler.PshufbVo(szRegister, MakeConstant128Address(g_makeSzShufflePattern));
778 m_assembler.PmovmskbVo(dstRegister, szRegister);
779
780 CommitSymbolRegister(dst, dstRegister);
781 }
782
Emit_Md_Expand_RegReg(const STATEMENT & statement)783 void CCodeGen_x86::Emit_Md_Expand_RegReg(const STATEMENT& statement)
784 {
785 auto dst = statement.dst->GetSymbol().get();
786 auto src1 = statement.src1->GetSymbol().get();
787
788 auto resultRegister = m_mdRegisters[dst->m_valueLow];
789
790 m_assembler.MovdVo(resultRegister, CX86Assembler::MakeRegisterAddress(m_registers[src1->m_valueLow]));
791 m_assembler.PshufdVo(resultRegister, CX86Assembler::MakeXmmRegisterAddress(resultRegister), 0x00);
792 }
793
Emit_Md_Expand_RegMem(const STATEMENT & statement)794 void CCodeGen_x86::Emit_Md_Expand_RegMem(const STATEMENT& statement)
795 {
796 auto dst = statement.dst->GetSymbol().get();
797 auto src1 = statement.src1->GetSymbol().get();
798
799 auto resultRegister = m_mdRegisters[dst->m_valueLow];
800
801 m_assembler.MovssEd(resultRegister, MakeMemorySymbolAddress(src1));
802 m_assembler.ShufpsVo(resultRegister, CX86Assembler::MakeXmmRegisterAddress(resultRegister), 0x00);
803 }
804
Emit_Md_Expand_RegCst(const STATEMENT & statement)805 void CCodeGen_x86::Emit_Md_Expand_RegCst(const STATEMENT& statement)
806 {
807 auto dst = statement.dst->GetSymbol().get();
808 auto src1 = statement.src1->GetSymbol().get();
809
810 auto cstRegister = CX86Assembler::rAX;
811 auto resultRegister = m_mdRegisters[dst->m_valueLow];
812
813 m_assembler.MovId(cstRegister, src1->m_valueLow);
814 m_assembler.MovdVo(resultRegister, CX86Assembler::MakeRegisterAddress(cstRegister));
815 m_assembler.PshufdVo(resultRegister, CX86Assembler::MakeXmmRegisterAddress(resultRegister), 0x00);
816 }
817
Emit_Md_Expand_MemReg(const STATEMENT & statement)818 void CCodeGen_x86::Emit_Md_Expand_MemReg(const STATEMENT& statement)
819 {
820 auto dst = statement.dst->GetSymbol().get();
821 auto src1 = statement.src1->GetSymbol().get();
822
823 auto resultRegister = CX86Assembler::xMM0;
824
825 m_assembler.MovdVo(resultRegister, CX86Assembler::MakeRegisterAddress(m_registers[src1->m_valueLow]));
826 m_assembler.ShufpsVo(resultRegister, CX86Assembler::MakeXmmRegisterAddress(resultRegister), 0x00);
827 m_assembler.MovapsVo(MakeMemory128SymbolAddress(dst), resultRegister);
828 }
829
Emit_Md_Expand_MemMem(const STATEMENT & statement)830 void CCodeGen_x86::Emit_Md_Expand_MemMem(const STATEMENT& statement)
831 {
832 auto dst = statement.dst->GetSymbol().get();
833 auto src1 = statement.src1->GetSymbol().get();
834
835 auto resultRegister = CX86Assembler::xMM0;
836
837 m_assembler.MovssEd(resultRegister, MakeMemorySymbolAddress(src1));
838 m_assembler.ShufpsVo(resultRegister, CX86Assembler::MakeXmmRegisterAddress(resultRegister), 0x00);
839 m_assembler.MovapsVo(MakeMemory128SymbolAddress(dst), resultRegister);
840 }
841
Emit_Md_Expand_MemCst(const STATEMENT & statement)842 void CCodeGen_x86::Emit_Md_Expand_MemCst(const STATEMENT& statement)
843 {
844 auto dst = statement.dst->GetSymbol().get();
845 auto src1 = statement.src1->GetSymbol().get();
846
847 auto cstRegister = CX86Assembler::rAX;
848 auto resultRegister = CX86Assembler::xMM0;
849
850 m_assembler.MovId(cstRegister, src1->m_valueLow);
851 m_assembler.MovdVo(resultRegister, CX86Assembler::MakeRegisterAddress(cstRegister));
852 m_assembler.PshufdVo(resultRegister, CX86Assembler::MakeXmmRegisterAddress(resultRegister), 0x00);
853 m_assembler.MovdqaVo(MakeMemory128SymbolAddress(dst), resultRegister);
854 }
855
Emit_Md_Srl256_VarMem(CSymbol * dst,CSymbol * src1,const CX86Assembler::CAddress & offsetAddress)856 void CCodeGen_x86::Emit_Md_Srl256_VarMem(CSymbol* dst, CSymbol* src1, const CX86Assembler::CAddress& offsetAddress)
857 {
858 auto offsetRegister = CX86Assembler::rAX;
859 auto resultRegister = CX86Assembler::xMM0;
860
861 assert(src1->m_type == SYM_TEMPORARY256);
862
863 m_assembler.MovEd(offsetRegister, offsetAddress);
864 m_assembler.AndId(CX86Assembler::MakeRegisterAddress(offsetRegister), 0x7F);
865 m_assembler.ShrEd(CX86Assembler::MakeRegisterAddress(offsetRegister), 3);
866 m_assembler.AddId(CX86Assembler::MakeRegisterAddress(offsetRegister), src1->m_stackLocation + m_stackLevel);
867
868 m_assembler.MovdquVo(resultRegister, CX86Assembler::MakeBaseIndexScaleAddress(CX86Assembler::rSP, offsetRegister, 1));
869 m_assembler.MovdqaVo(MakeVariable128SymbolAddress(dst), resultRegister);
870 }
871
Emit_Md_Srl256_VarMemVar(const STATEMENT & statement)872 void CCodeGen_x86::Emit_Md_Srl256_VarMemVar(const STATEMENT& statement)
873 {
874 auto dst = statement.dst->GetSymbol().get();
875 auto src1 = statement.src1->GetSymbol().get();
876 auto src2 = statement.src2->GetSymbol().get();
877
878 Emit_Md_Srl256_VarMem(dst, src1, MakeVariableSymbolAddress(src2));
879 }
880
Emit_Md_Srl256_VarMemCst(const STATEMENT & statement)881 void CCodeGen_x86::Emit_Md_Srl256_VarMemCst(const STATEMENT& statement)
882 {
883 auto dst = statement.dst->GetSymbol().get();
884 auto src1 = statement.src1->GetSymbol().get();
885 auto src2 = statement.src2->GetSymbol().get();
886
887 auto resultRegister = CX86Assembler::xMM0;
888
889 assert(src1->m_type == SYM_TEMPORARY256);
890 assert(src2->m_type == SYM_CONSTANT);
891
892 uint32 offset = (src2->m_valueLow & 0x7F) / 8;
893
894 m_assembler.MovdquVo(resultRegister, MakeTemporary256SymbolElementAddress(src1, offset));
895 m_assembler.MovdqaVo(MakeVariable128SymbolAddress(dst), resultRegister);
896 }
897
Emit_MergeTo256_MemVarVar(const STATEMENT & statement)898 void CCodeGen_x86::Emit_MergeTo256_MemVarVar(const STATEMENT& statement)
899 {
900 auto dst = statement.dst->GetSymbol().get();
901 auto src1 = statement.src1->GetSymbol().get();
902 auto src2 = statement.src2->GetSymbol().get();
903
904 assert(dst->m_type == SYM_TEMPORARY256);
905
906 auto src1Register = CX86Assembler::xMM0;
907 auto src2Register = CX86Assembler::xMM1;
908
909 //TODO: Improve this to write out registers directly to temporary's memory space
910 //instead of passing by temporary registers
911
912 m_assembler.MovdqaVo(src1Register, MakeVariable128SymbolAddress(src1));
913 m_assembler.MovdqaVo(src2Register, MakeVariable128SymbolAddress(src2));
914
915 m_assembler.MovdqaVo(MakeTemporary256SymbolElementAddress(dst, 0x00), src1Register);
916 m_assembler.MovdqaVo(MakeTemporary256SymbolElementAddress(dst, 0x10), src2Register);
917 }
918
919 #define MD_CONST_MATCHERS_SHIFT(MDOP_CST, MDOP, SAMASK) \
920 { MDOP_CST, MATCH_REGISTER128, MATCH_VARIABLE128, MATCH_CONSTANT, MATCH_NIL, &CCodeGen_x86::Emit_Md_Shift_RegVarCst<MDOP, SAMASK> }, \
921 { MDOP_CST, MATCH_MEMORY128, MATCH_VARIABLE128, MATCH_CONSTANT, MATCH_NIL, &CCodeGen_x86::Emit_Md_Shift_MemVarCst<MDOP, SAMASK> },
922
923 #define MD_CONST_MATCHERS_2OPS(MDOP_CST, MDOP) \
924 { MDOP_CST, MATCH_REGISTER128, MATCH_VARIABLE128, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Md_RegVar<MDOP> }, \
925 { MDOP_CST, MATCH_MEMORY128, MATCH_VARIABLE128, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Md_MemVar<MDOP> },
926
927 #define MD_CONST_MATCHERS_3OPS(MDOP_CST, MDOP) \
928 { MDOP_CST, MATCH_REGISTER128, MATCH_REGISTER128, MATCH_REGISTER128, MATCH_NIL, &CCodeGen_x86::Emit_Md_RegRegReg<MDOP> }, \
929 { MDOP_CST, MATCH_REGISTER128, MATCH_MEMORY128, MATCH_REGISTER128, MATCH_NIL, &CCodeGen_x86::Emit_Md_RegMemReg<MDOP> }, \
930 { MDOP_CST, MATCH_REGISTER128, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Md_RegVarVar<MDOP> }, \
931 { MDOP_CST, MATCH_MEMORY128, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Md_MemVarVar<MDOP> },
932
933 #define MD_CONST_MATCHERS_3OPS_REV(MDOP_CST, MDOP) \
934 { MDOP_CST, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Md_VarVarVarRev<MDOP> },
935
936 #define MD_CONST_MATCHERS_SINGLEOP(MDOP_CST, MDOP) \
937 { MDOP_CST, MATCH_REGISTER128, MATCH_VARIABLE128, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Md_SingleOp_RegVar<MDOP> }, \
938 { MDOP_CST, MATCH_MEMORY128, MATCH_VARIABLE128, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Md_SingleOp_MemVar<MDOP> },
939
940 CCodeGen_x86::CONSTMATCHER CCodeGen_x86::g_mdConstMatchers[] =
941 {
942 MD_CONST_MATCHERS_3OPS(OP_MD_ADD_B, MDOP_ADDB)
943 MD_CONST_MATCHERS_3OPS(OP_MD_ADD_H, MDOP_ADDH)
944 MD_CONST_MATCHERS_3OPS(OP_MD_ADD_W, MDOP_ADDW)
945
946 MD_CONST_MATCHERS_3OPS(OP_MD_ADDSS_B, MDOP_ADDSSB)
947 MD_CONST_MATCHERS_3OPS(OP_MD_ADDSS_H, MDOP_ADDSSH)
948 { OP_MD_ADDSS_W, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Md_AddSSW_VarVarVar },
949
950 MD_CONST_MATCHERS_3OPS(OP_MD_ADDUS_B, MDOP_ADDUSB)
951 MD_CONST_MATCHERS_3OPS(OP_MD_ADDUS_H, MDOP_ADDUSH)
952 { OP_MD_ADDUS_W, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Md_AddUSW_VarVarVar },
953
954 MD_CONST_MATCHERS_3OPS(OP_MD_SUB_B, MDOP_SUBB)
955 MD_CONST_MATCHERS_3OPS(OP_MD_SUB_H, MDOP_SUBH)
956 MD_CONST_MATCHERS_3OPS(OP_MD_SUB_W, MDOP_SUBW)
957
958 MD_CONST_MATCHERS_3OPS(OP_MD_SUBSS_H, MDOP_SUBSSH)
959 { OP_MD_SUBSS_W, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Md_SubSSW_VarVarVar },
960
961 MD_CONST_MATCHERS_3OPS(OP_MD_SUBUS_B, MDOP_SUBUSB)
962 MD_CONST_MATCHERS_3OPS(OP_MD_SUBUS_H, MDOP_SUBUSH)
963 { OP_MD_SUBUS_W, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Md_SubUSW_VarVarVar },
964
965 MD_CONST_MATCHERS_3OPS(OP_MD_CMPEQ_B, MDOP_CMPEQB)
966 MD_CONST_MATCHERS_3OPS(OP_MD_CMPEQ_H, MDOP_CMPEQH)
967 MD_CONST_MATCHERS_3OPS(OP_MD_CMPEQ_W, MDOP_CMPEQW)
968 MD_CONST_MATCHERS_3OPS(OP_MD_CMPGT_B, MDOP_CMPGTB)
969 MD_CONST_MATCHERS_3OPS(OP_MD_CMPGT_H, MDOP_CMPGTH)
970 MD_CONST_MATCHERS_3OPS(OP_MD_CMPGT_W, MDOP_CMPGTW)
971
972 MD_CONST_MATCHERS_3OPS(OP_MD_MIN_H, MDOP_MINH)
973
974 MD_CONST_MATCHERS_3OPS(OP_MD_MAX_H, MDOP_MAXH)
975
976 MD_CONST_MATCHERS_3OPS(OP_MD_AND, MDOP_AND)
977 MD_CONST_MATCHERS_3OPS(OP_MD_OR, MDOP_OR)
978 MD_CONST_MATCHERS_3OPS(OP_MD_XOR, MDOP_XOR)
979
980 MD_CONST_MATCHERS_SHIFT(OP_MD_SRLH, MDOP_SRLH, 0x0F)
981 MD_CONST_MATCHERS_SHIFT(OP_MD_SRAH, MDOP_SRAH, 0x0F)
982 MD_CONST_MATCHERS_SHIFT(OP_MD_SLLH, MDOP_SLLH, 0x0F)
983
984 MD_CONST_MATCHERS_SHIFT(OP_MD_SRLW, MDOP_SRLW, 0x1F)
985 MD_CONST_MATCHERS_SHIFT(OP_MD_SRAW, MDOP_SRAW, 0x1F)
986 MD_CONST_MATCHERS_SHIFT(OP_MD_SLLW, MDOP_SLLW, 0x1F)
987
988 { OP_MD_SRL256, MATCH_VARIABLE128, MATCH_MEMORY256, MATCH_VARIABLE, MATCH_NIL, &CCodeGen_x86::Emit_Md_Srl256_VarMemVar },
989 { OP_MD_SRL256, MATCH_VARIABLE128, MATCH_MEMORY256, MATCH_CONSTANT, MATCH_NIL, &CCodeGen_x86::Emit_Md_Srl256_VarMemCst },
990
991 { OP_MD_EXPAND, MATCH_REGISTER128, MATCH_REGISTER, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Md_Expand_RegReg },
992 { OP_MD_EXPAND, MATCH_REGISTER128, MATCH_MEMORY, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Md_Expand_RegMem },
993 { OP_MD_EXPAND, MATCH_REGISTER128, MATCH_CONSTANT, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Md_Expand_RegCst },
994 { OP_MD_EXPAND, MATCH_MEMORY128, MATCH_REGISTER, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Md_Expand_MemReg },
995 { OP_MD_EXPAND, MATCH_MEMORY128, MATCH_MEMORY, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Md_Expand_MemMem },
996 { OP_MD_EXPAND, MATCH_MEMORY128, MATCH_CONSTANT, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Md_Expand_MemCst },
997
998 { OP_MD_PACK_HB, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Md_PackHB_VarVarVar },
999 { OP_MD_PACK_WH, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Md_PackWH_VarVarVar },
1000
1001 MD_CONST_MATCHERS_3OPS_REV(OP_MD_UNPACK_LOWER_BH, MDOP_UNPACK_LOWER_BH)
1002 MD_CONST_MATCHERS_3OPS_REV(OP_MD_UNPACK_LOWER_HW, MDOP_UNPACK_LOWER_HW)
1003 MD_CONST_MATCHERS_3OPS_REV(OP_MD_UNPACK_LOWER_WD, MDOP_UNPACK_LOWER_WD)
1004
1005 MD_CONST_MATCHERS_3OPS_REV(OP_MD_UNPACK_UPPER_BH, MDOP_UNPACK_UPPER_BH)
1006 MD_CONST_MATCHERS_3OPS_REV(OP_MD_UNPACK_UPPER_HW, MDOP_UNPACK_UPPER_HW)
1007 MD_CONST_MATCHERS_3OPS_REV(OP_MD_UNPACK_UPPER_WD, MDOP_UNPACK_UPPER_WD)
1008
1009 MD_CONST_MATCHERS_3OPS(OP_MD_ADD_S, MDOP_ADDS)
1010 MD_CONST_MATCHERS_3OPS(OP_MD_SUB_S, MDOP_SUBS)
1011 MD_CONST_MATCHERS_3OPS(OP_MD_MUL_S, MDOP_MULS)
1012 MD_CONST_MATCHERS_3OPS(OP_MD_DIV_S, MDOP_DIVS)
1013 MD_CONST_MATCHERS_3OPS(OP_MD_CMPLT_S, MDOP_CMPLTS)
1014 MD_CONST_MATCHERS_3OPS(OP_MD_CMPGT_S, MDOP_CMPGTS)
1015
1016 MD_CONST_MATCHERS_3OPS(OP_MD_MIN_S, MDOP_MINS)
1017 MD_CONST_MATCHERS_3OPS(OP_MD_MAX_S, MDOP_MAXS)
1018
1019 MD_CONST_MATCHERS_SINGLEOP(OP_MD_ABS_S, MDOP_ABS)
1020 MD_CONST_MATCHERS_SINGLEOP(OP_MD_NOT, MDOP_NOT)
1021
1022 MD_CONST_MATCHERS_2OPS(OP_MD_TOWORD_TRUNCATE, MDOP_TOWORD_TRUNCATE)
1023 MD_CONST_MATCHERS_2OPS(OP_MD_TOSINGLE, MDOP_TOSINGLE)
1024
1025 { OP_MOV, MATCH_REGISTER128, MATCH_VARIABLE128, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Md_Mov_RegVar },
1026 { OP_MOV, MATCH_MEMORY128, MATCH_REGISTER128, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Md_Mov_MemReg },
1027 { OP_MOV, MATCH_MEMORY128, MATCH_MEMORY128, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Md_Mov_MemMem },
1028
1029 { OP_MERGETO256, MATCH_MEMORY256, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_MergeTo256_MemVarVar },
1030
1031 { OP_MOV, MATCH_NIL, MATCH_NIL, MATCH_NIL, MATCH_NIL, nullptr },
1032 };
1033
1034 CCodeGen_x86::CONSTMATCHER CCodeGen_x86::g_mdMinMaxWConstMatchers[] =
1035 {
1036 { OP_MD_MIN_W, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Md_MinW_VarVarVar },
1037 { OP_MD_MAX_W, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Md_MaxW_VarVarVar },
1038
1039 { OP_MOV, MATCH_NIL, MATCH_NIL, MATCH_NIL, MATCH_NIL, nullptr },
1040 };
1041
1042 CCodeGen_x86::CONSTMATCHER CCodeGen_x86::g_mdMinMaxWSse41ConstMatchers[] =
1043 {
1044 MD_CONST_MATCHERS_3OPS(OP_MD_MIN_W, MDOP_MINW)
1045 MD_CONST_MATCHERS_3OPS(OP_MD_MAX_W, MDOP_MAXW)
1046
1047 { OP_MOV, MATCH_NIL, MATCH_NIL, MATCH_NIL, MATCH_NIL, nullptr },
1048 };
1049
1050 CCodeGen_x86::CONSTMATCHER CCodeGen_x86::g_mdMovMaskedConstMatchers[] =
1051 {
1052 { OP_MD_MOV_MASKED, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Md_MovMasked_VarVarVar },
1053
1054 { OP_MOV, MATCH_NIL, MATCH_NIL, MATCH_NIL, MATCH_NIL, nullptr },
1055 };
1056
1057 CCodeGen_x86::CONSTMATCHER CCodeGen_x86::g_mdMovMaskedSse41ConstMatchers[] =
1058 {
1059 { OP_MD_MOV_MASKED, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Md_MovMasked_Sse41_VarVarVar },
1060
1061 { OP_MOV, MATCH_NIL, MATCH_NIL, MATCH_NIL, MATCH_NIL, nullptr },
1062 };
1063
1064 CCodeGen_x86::CONSTMATCHER CCodeGen_x86::g_mdFpFlagConstMatchers[] =
1065 {
1066 { OP_MD_MAKESZ, MATCH_VARIABLE, MATCH_VARIABLE128, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Md_MakeSz_VarVar },
1067
1068 { OP_MOV, MATCH_NIL, MATCH_NIL, MATCH_NIL, MATCH_NIL, nullptr },
1069 };
1070
1071 CCodeGen_x86::CONSTMATCHER CCodeGen_x86::g_mdFpFlagSsse3ConstMatchers[] =
1072 {
1073 { OP_MD_MAKESZ, MATCH_VARIABLE, MATCH_VARIABLE128, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Md_MakeSz_Ssse3_VarVar },
1074
1075 { OP_MOV, MATCH_NIL, MATCH_NIL, MATCH_NIL, MATCH_NIL, nullptr },
1076 };
1077