1 #include "Jitter_CodeGen_x86.h"
2 
3 using namespace Jitter;
4 
MakeRelative128SymbolElementAddress(CSymbol * symbol,unsigned int elementIdx)5 CX86Assembler::CAddress CCodeGen_x86::MakeRelative128SymbolElementAddress(CSymbol* symbol, unsigned int elementIdx)
6 {
7 	assert(symbol->m_type == SYM_RELATIVE128);
8 	assert((symbol->m_valueLow & 0xF) == 0);
9 	return CX86Assembler::MakeIndRegOffAddress(CX86Assembler::rBP, symbol->m_valueLow + (elementIdx * 4));
10 }
11 
MakeTemporary128SymbolElementAddress(CSymbol * symbol,unsigned int elementIdx)12 CX86Assembler::CAddress CCodeGen_x86::MakeTemporary128SymbolElementAddress(CSymbol* symbol, unsigned int elementIdx)
13 {
14 	assert(symbol->m_type == SYM_TEMPORARY128);
15 //	assert(((symbol->m_stackLocation + m_stackLevel) & 0xF) == 0);
16 	return CX86Assembler::MakeIndRegOffAddress(CX86Assembler::rSP, symbol->m_stackLocation + m_stackLevel + (elementIdx * 4));
17 }
18 
MakeTemporary256SymbolElementAddress(CSymbol * symbol,unsigned int elementIdx)19 CX86Assembler::CAddress CCodeGen_x86::MakeTemporary256SymbolElementAddress(CSymbol* symbol, unsigned int elementIdx)
20 {
21 	assert(symbol->m_type == SYM_TEMPORARY256);
22 	assert(((symbol->m_stackLocation + m_stackLevel) & 0x1F) == 0);
23 	return CX86Assembler::MakeIndRegOffAddress(CX86Assembler::rSP, symbol->m_stackLocation + m_stackLevel + elementIdx);
24 }
25 
MakeVariable128SymbolAddress(CSymbol * symbol)26 CX86Assembler::CAddress CCodeGen_x86::MakeVariable128SymbolAddress(CSymbol* symbol)
27 {
28 	switch(symbol->m_type)
29 	{
30 	case SYM_REGISTER128:
31 		return CX86Assembler::MakeXmmRegisterAddress(m_mdRegisters[symbol->m_valueLow]);
32 		break;
33 	case SYM_RELATIVE128:
34 		return MakeRelative128SymbolElementAddress(symbol, 0);
35 		break;
36 	case SYM_TEMPORARY128:
37 		return MakeTemporary128SymbolElementAddress(symbol, 0);
38 		break;
39 	default:
40 		throw std::exception();
41 		break;
42 	}
43 }
44 
MakeMemory128SymbolAddress(CSymbol * symbol)45 CX86Assembler::CAddress CCodeGen_x86::MakeMemory128SymbolAddress(CSymbol* symbol)
46 {
47 	switch(symbol->m_type)
48 	{
49 	case SYM_RELATIVE128:
50 		return MakeRelative128SymbolElementAddress(symbol, 0);
51 		break;
52 	case SYM_TEMPORARY128:
53 		return MakeTemporary128SymbolElementAddress(symbol, 0);
54 		break;
55 	default:
56 		throw std::exception();
57 		break;
58 	}
59 }
60 
MakeMemory128SymbolElementAddress(CSymbol * symbol,unsigned int elementIdx)61 CX86Assembler::CAddress CCodeGen_x86::MakeMemory128SymbolElementAddress(CSymbol* symbol, unsigned int elementIdx)
62 {
63 	switch(symbol->m_type)
64 	{
65 	case SYM_RELATIVE128:
66 		return MakeRelative128SymbolElementAddress(symbol, elementIdx);
67 		break;
68 	case SYM_TEMPORARY128:
69 		return MakeTemporary128SymbolElementAddress(symbol, elementIdx);
70 		break;
71 	default:
72 		throw std::exception();
73 		break;
74 	}
75 }
76 
77 template <typename MDOP>
Emit_Md_RegVar(const STATEMENT & statement)78 void CCodeGen_x86::Emit_Md_RegVar(const STATEMENT& statement)
79 {
80 	auto dst = statement.dst->GetSymbol().get();
81 	auto src1 = statement.src1->GetSymbol().get();
82 
83 	((m_assembler).*(MDOP::OpVo()))(m_mdRegisters[dst->m_valueLow], MakeVariable128SymbolAddress(src1));
84 }
85 
86 template <typename MDOP>
Emit_Md_MemVar(const STATEMENT & statement)87 void CCodeGen_x86::Emit_Md_MemVar(const STATEMENT& statement)
88 {
89 	auto dst = statement.dst->GetSymbol().get();
90 	auto src1 = statement.src1->GetSymbol().get();
91 
92 	auto dstRegister = CX86Assembler::xMM0;
93 
94 	((m_assembler).*(MDOP::OpVo()))(dstRegister, MakeVariable128SymbolAddress(src1));
95 	m_assembler.MovapsVo(MakeMemory128SymbolAddress(dst), dstRegister);
96 }
97 
98 template <typename MDOP>
Emit_Md_RegRegReg(const STATEMENT & statement)99 void CCodeGen_x86::Emit_Md_RegRegReg(const STATEMENT& statement)
100 {
101 	auto dst = statement.dst->GetSymbol().get();
102 	auto src1 = statement.src1->GetSymbol().get();
103 	auto src2 = statement.src2->GetSymbol().get();
104 
105 	if(dst->Equals(src1))
106 	{
107 		((m_assembler).*(MDOP::OpVo()))(m_mdRegisters[dst->m_valueLow],
108 			CX86Assembler::MakeXmmRegisterAddress(m_mdRegisters[src2->m_valueLow]));
109 	}
110 	else
111 	{
112 		auto src2Register = m_mdRegisters[src2->m_valueLow];
113 
114 		if(dst->Equals(src2))
115 		{
116 			m_assembler.MovapsVo(CX86Assembler::xMM0, CX86Assembler::MakeXmmRegisterAddress(m_mdRegisters[src2->m_valueLow]));
117 			src2Register = CX86Assembler::xMM0;
118 		}
119 
120 		m_assembler.MovapsVo(m_mdRegisters[dst->m_valueLow], CX86Assembler::MakeXmmRegisterAddress(m_mdRegisters[src1->m_valueLow]));
121 		((m_assembler).*(MDOP::OpVo()))(m_mdRegisters[dst->m_valueLow], CX86Assembler::MakeXmmRegisterAddress(src2Register));
122 	}
123 }
124 
125 template <typename MDOP>
Emit_Md_RegMemReg(const STATEMENT & statement)126 void CCodeGen_x86::Emit_Md_RegMemReg(const STATEMENT& statement)
127 {
128 	auto dst = statement.dst->GetSymbol().get();
129 	auto src1 = statement.src1->GetSymbol().get();
130 	auto src2 = statement.src2->GetSymbol().get();
131 
132 	auto dstRegister = m_mdRegisters[dst->m_valueLow];
133 	auto src2Register = m_mdRegisters[src2->m_valueLow];
134 
135 	if(dst->Equals(src2))
136 	{
137 		m_assembler.MovapsVo(CX86Assembler::xMM0, CX86Assembler::MakeXmmRegisterAddress(src2Register));
138 		src2Register = CX86Assembler::xMM0;
139 	}
140 
141 	m_assembler.MovapsVo(dstRegister, MakeVariable128SymbolAddress(src1));
142 	((m_assembler).*(MDOP::OpVo()))(dstRegister, CX86Assembler::MakeXmmRegisterAddress(src2Register));
143 }
144 
145 template <typename MDOP>
Emit_Md_RegVarVar(const STATEMENT & statement)146 void CCodeGen_x86::Emit_Md_RegVarVar(const STATEMENT& statement)
147 {
148 	auto dst = statement.dst->GetSymbol().get();
149 	auto src1 = statement.src1->GetSymbol().get();
150 	auto src2 = statement.src2->GetSymbol().get();
151 
152 	//If we get in here, it must absolutely mean that the second source isn't a register
153 	//Otherwise, some of the assumuptions done below will be wrong (dst mustn't be equal to src2)
154 	assert(src2->m_type != SYM_REGISTER128);
155 
156 	auto dstRegister = m_mdRegisters[dst->m_valueLow];
157 
158 	if(!dst->Equals(src1))
159 	{
160 		m_assembler.MovapsVo(dstRegister, MakeVariable128SymbolAddress(src1));
161 	}
162 
163 	((m_assembler).*(MDOP::OpVo()))(dstRegister, MakeVariable128SymbolAddress(src2));
164 }
165 
166 template <typename MDOP>
Emit_Md_MemVarVar(const STATEMENT & statement)167 void CCodeGen_x86::Emit_Md_MemVarVar(const STATEMENT& statement)
168 {
169 	auto dst = statement.dst->GetSymbol().get();
170 	auto src1 = statement.src1->GetSymbol().get();
171 	auto src2 = statement.src2->GetSymbol().get();
172 
173 	auto dstRegister = CX86Assembler::xMM0;
174 
175 	m_assembler.MovapsVo(dstRegister, MakeVariable128SymbolAddress(src1));
176 	((m_assembler).*(MDOP::OpVo()))(dstRegister, MakeVariable128SymbolAddress(src2));
177 	m_assembler.MovapsVo(MakeMemory128SymbolAddress(dst), dstRegister);
178 }
179 
180 template <typename MDOP>
Emit_Md_VarVarVarRev(const STATEMENT & statement)181 void CCodeGen_x86::Emit_Md_VarVarVarRev(const STATEMENT& statement)
182 {
183 	//TODO: This could be improved further, but we might want
184 	//to reverse the operands somewhere else as to not
185 	//copy paste the code from the "non-reversed" path
186 
187 	auto dst = statement.dst->GetSymbol().get();
188 	auto src1 = statement.src1->GetSymbol().get();
189 	auto src2 = statement.src2->GetSymbol().get();
190 
191 	auto dstRegister = CX86Assembler::xMM0;
192 
193 	m_assembler.MovapsVo(dstRegister, MakeVariable128SymbolAddress(src2));
194 	((m_assembler).*(MDOP::OpVo()))(dstRegister, MakeVariable128SymbolAddress(src1));
195 	m_assembler.MovapsVo(MakeVariable128SymbolAddress(dst), dstRegister);
196 }
197 
198 template <typename MDOPSHIFT, uint8 SAMASK>
Emit_Md_Shift_RegVarCst(const STATEMENT & statement)199 void CCodeGen_x86::Emit_Md_Shift_RegVarCst(const STATEMENT& statement)
200 {
201 	auto dst = statement.dst->GetSymbol().get();
202 	auto src1 = statement.src1->GetSymbol().get();
203 	auto src2 = statement.src2->GetSymbol().get();
204 
205 	auto dstRegister = m_mdRegisters[dst->m_valueLow];
206 
207 	if(!dst->Equals(src1))
208 	{
209 		m_assembler.MovapsVo(dstRegister, MakeVariable128SymbolAddress(src1));
210 	}
211 
212 	((m_assembler).*(MDOPSHIFT::OpVo()))(dstRegister, static_cast<uint8>(src2->m_valueLow & SAMASK));
213 }
214 
215 template <typename MDOPSHIFT, uint8 SAMASK>
Emit_Md_Shift_MemVarCst(const STATEMENT & statement)216 void CCodeGen_x86::Emit_Md_Shift_MemVarCst(const STATEMENT& statement)
217 {
218 	auto dst = statement.dst->GetSymbol().get();
219 	auto src1 = statement.src1->GetSymbol().get();
220 	auto src2 = statement.src2->GetSymbol().get();
221 
222 	auto tmpRegister = CX86Assembler::xMM0;
223 
224 	m_assembler.MovapsVo(tmpRegister, MakeVariable128SymbolAddress(src1));
225 	((m_assembler).*(MDOPSHIFT::OpVo()))(tmpRegister, static_cast<uint8>(src2->m_valueLow & SAMASK));
226 	m_assembler.MovapsVo(MakeMemory128SymbolAddress(dst), tmpRegister);
227 }
228 
229 template <typename MDOPSINGLEOP>
Emit_Md_SingleOp_RegVar(const STATEMENT & statement)230 void CCodeGen_x86::Emit_Md_SingleOp_RegVar(const STATEMENT& statement)
231 {
232 	auto dst = statement.dst->GetSymbol().get();
233 	auto src1 = statement.src1->GetSymbol().get();
234 
235 	auto resultRegister = m_mdRegisters[dst->m_valueLow];
236 
237 	if(!dst->Equals(src1))
238 	{
239 		m_assembler.MovapsVo(resultRegister, MakeVariable128SymbolAddress(src1));
240 	}
241 
242 	((*this).*(MDOPSINGLEOP::OpVr()))(resultRegister);
243 }
244 
245 template <typename MDOPSINGLEOP>
Emit_Md_SingleOp_MemVar(const STATEMENT & statement)246 void CCodeGen_x86::Emit_Md_SingleOp_MemVar(const STATEMENT& statement)
247 {
248 	auto dst = statement.dst->GetSymbol().get();
249 	auto src1 = statement.src1->GetSymbol().get();
250 
251 	auto resultRegister = CX86Assembler::xMM0;
252 
253 	m_assembler.MovapsVo(resultRegister, MakeVariable128SymbolAddress(src1));
254 	((*this).*(MDOPSINGLEOP::OpVr()))(resultRegister);
255 	m_assembler.MovapsVo(MakeMemory128SymbolAddress(dst), resultRegister);
256 }
257 
258 template <typename MDOPFLAG>
Emit_Md_GetFlag_RegVar(const STATEMENT & statement)259 void CCodeGen_x86::Emit_Md_GetFlag_RegVar(const STATEMENT& statement)
260 {
261 	auto dst = statement.dst->GetSymbol().get();
262 	auto src1 = statement.src1->GetSymbol().get();
263 
264 	((*this).*(MDOPFLAG::OpEd()))(m_registers[dst->m_valueLow], MakeVariable128SymbolAddress(src1));
265 }
266 
267 template <typename MDOPFLAG>
Emit_Md_GetFlag_MemVar(const STATEMENT & statement)268 void CCodeGen_x86::Emit_Md_GetFlag_MemVar(const STATEMENT& statement)
269 {
270 	auto dst = statement.dst->GetSymbol().get();
271 	auto src1 = statement.src1->GetSymbol().get();
272 
273 	auto tmpRegister = CX86Assembler::rAX;
274 	((*this).*(MDOPFLAG::OpEd()))(tmpRegister, MakeVariable128SymbolAddress(src1));
275 	m_assembler.MovGd(MakeMemorySymbolAddress(dst), tmpRegister);
276 }
277 
Emit_Md_AddSSW_VarVarVar(const STATEMENT & statement)278 void CCodeGen_x86::Emit_Md_AddSSW_VarVarVar(const STATEMENT& statement)
279 {
280 	auto dst = statement.dst->GetSymbol().get();
281 	auto src1 = statement.src1->GetSymbol().get();
282 	auto src2 = statement.src2->GetSymbol().get();
283 
284 	auto uxRegister = CX86Assembler::xMM0;
285 	auto uyRegister = CX86Assembler::xMM1;
286 	auto resRegister = CX86Assembler::xMM2;
287 	auto cstRegister = CX86Assembler::xMM3;
288 
289 //	This is based on code from http://locklessinc.com/articles/sat_arithmetic/ modified to work without cmovns
290 //	s32b sat_adds32b(s32b x, s32b y)
291 //	{
292 //		u32b ux = x;
293 //		u32b uy = y;
294 //		u32b res = ux + uy;
295 //
296 //		/* Calculate overflowed result. (Don't change the sign bit of ux) */
297 //		ux = (ux >> 31) + INT_MAX;
298 //
299 //		s32b sign = (s32b) ((ux ^ uy) | ~(uy ^ res))
300 //		sign >>= 31;		/* Arithmetic shift, either 0 or ~0*/
301 //		res = (res & sign) | (ux & ~sign);
302 //
303 //		return res;
304 //	}
305 
306 	//ux = src1
307 	//uy = src2
308 	m_assembler.MovapsVo(uxRegister, MakeVariable128SymbolAddress(src1));
309 	m_assembler.MovapsVo(uyRegister, MakeVariable128SymbolAddress(src2));
310 
311 	//res = ux + uy
312 	m_assembler.MovapsVo(resRegister, CX86Assembler::MakeXmmRegisterAddress(uxRegister));
313 	m_assembler.PadddVo(resRegister, CX86Assembler::MakeXmmRegisterAddress(uyRegister));
314 
315 	//cst = 0x7FFFFFFF
316 	m_assembler.PcmpeqdVo(cstRegister, CX86Assembler::MakeXmmRegisterAddress(cstRegister));
317 	m_assembler.PsrldVo(cstRegister, 1);
318 
319 	//ux = (ux >> 31)
320 	m_assembler.PsrldVo(uxRegister, 31);
321 
322 	//ux += 0x7FFFFFFF
323 	m_assembler.PadddVo(uxRegister, CX86Assembler::MakeXmmRegisterAddress(cstRegister));
324 
325 	//uy = ~(uy ^ res)
326 	//------
327 	//uy ^ res
328 	m_assembler.PxorVo(uyRegister, CX86Assembler::MakeXmmRegisterAddress(resRegister));
329 
330 	//~(uy ^ res)
331 	m_assembler.PcmpeqdVo(cstRegister, CX86Assembler::MakeXmmRegisterAddress(cstRegister));
332 	m_assembler.PxorVo(uyRegister, CX86Assembler::MakeXmmRegisterAddress(cstRegister));
333 
334 	//cst = ux ^ uy (reloading uy from src2 because we don't have any registers available)
335 	m_assembler.MovapsVo(cstRegister ,CX86Assembler::MakeXmmRegisterAddress(uxRegister));
336 	m_assembler.PxorVo(cstRegister, MakeVariable128SymbolAddress(src2));
337 
338 	//uy = ((ux ^ uy) | ~(uy ^ res)) >> 31; (signed operation)
339 	m_assembler.PorVo(uyRegister, CX86Assembler::MakeXmmRegisterAddress(cstRegister));
340 	m_assembler.PsradVo(uyRegister, 31);
341 
342 	//res = (res & uy)	(uy is the sign value)
343 	m_assembler.PandVo(resRegister, CX86Assembler::MakeXmmRegisterAddress(uyRegister));
344 
345 	//ux = (ux & ~uy)
346 	//------
347 	//~uy
348 	m_assembler.PcmpeqdVo(cstRegister, CX86Assembler::MakeXmmRegisterAddress(cstRegister));
349 	m_assembler.PxorVo(uyRegister, CX86Assembler::MakeXmmRegisterAddress(cstRegister));
350 
351 	//ux & ~uy
352 	m_assembler.PandVo(uxRegister, CX86Assembler::MakeXmmRegisterAddress(uyRegister));
353 
354 	//res = (res & uy) | (ux & ~uy)
355 	m_assembler.PorVo(resRegister, CX86Assembler::MakeXmmRegisterAddress(uxRegister));
356 
357 	//Copy final result
358 	m_assembler.MovapsVo(MakeVariable128SymbolAddress(dst), resRegister);
359 }
360 
Emit_Md_SubSSW_VarVarVar(const STATEMENT & statement)361 void CCodeGen_x86::Emit_Md_SubSSW_VarVarVar(const STATEMENT& statement)
362 {
363 	auto dst = statement.dst->GetSymbol().get();
364 	auto src1 = statement.src1->GetSymbol().get();
365 	auto src2 = statement.src2->GetSymbol().get();
366 
367 	auto uxRegister = CX86Assembler::xMM0;
368 	auto uyRegister = CX86Assembler::xMM1;
369 	auto resRegister = CX86Assembler::xMM2;
370 	auto cstRegister = CX86Assembler::xMM3;
371 
372 //	This is based on code from http://locklessinc.com/articles/sat_arithmetic/ modified to work without cmovns
373 //	s32b sat_subs32b(s32b x, s32b y)
374 //	{
375 //		u32b ux = x;
376 //		u32b uy = y;
377 //		u32b res = ux - uy;
378 //
379 //		ux = (ux >> 31) + INT_MAX;
380 //
381 //		s32b sign = (s32b) ((ux ^ uy) & (ux ^ res))
382 //		sign >>= 31;		/* Arithmetic shift, either 0 or ~0*/
383 //		res = (res & ~sign) | (ux & sign);
384 //
385 //		return res;
386 //	}
387 
388 	//ux = src1
389 	//uy = src2
390 	m_assembler.MovdqaVo(uxRegister, MakeVariable128SymbolAddress(src1));
391 	m_assembler.MovdqaVo(uyRegister, MakeVariable128SymbolAddress(src2));
392 
393 	//res = ux - uy
394 	m_assembler.MovdqaVo(resRegister, CX86Assembler::MakeXmmRegisterAddress(uxRegister));
395 	m_assembler.PsubdVo(resRegister, CX86Assembler::MakeXmmRegisterAddress(uyRegister));
396 
397 	//cst = 0x7FFFFFFF
398 	m_assembler.PcmpeqdVo(cstRegister, CX86Assembler::MakeXmmRegisterAddress(cstRegister));
399 	m_assembler.PsrldVo(cstRegister, 1);
400 
401 	//ux = (ux >> 31)
402 	m_assembler.PsrldVo(uxRegister, 31);
403 
404 	//ux += 0x7FFFFFFF
405 	m_assembler.PadddVo(uxRegister, CX86Assembler::MakeXmmRegisterAddress(cstRegister));
406 
407 	//uy = (ux ^ res)
408 	//------
409 	//ux ^ res
410 	m_assembler.MovdqaVo(uyRegister, CX86Assembler::MakeXmmRegisterAddress(uxRegister));
411 	m_assembler.PxorVo(uyRegister, CX86Assembler::MakeXmmRegisterAddress(resRegister));
412 
413 	//cst = ux ^ uy (reloading uy from src2 because we don't have any registers available)
414 	m_assembler.MovdqaVo(cstRegister ,CX86Assembler::MakeXmmRegisterAddress(uxRegister));
415 	m_assembler.PxorVo(cstRegister, MakeVariable128SymbolAddress(src2));
416 
417 	//uy = ((ux ^ uy) & (ux ^ res)) >> 31; (signed operation)
418 	m_assembler.PandVo(uyRegister, CX86Assembler::MakeXmmRegisterAddress(cstRegister));
419 	m_assembler.PsradVo(uyRegister, 31);
420 
421 	//ux = (ux & uy)	(uy is the sign value)
422 	m_assembler.PandVo(uxRegister, CX86Assembler::MakeXmmRegisterAddress(uyRegister));
423 
424 	//res = (res & ~uy)
425 	//------
426 	//~uy
427 	m_assembler.PcmpeqdVo(cstRegister, CX86Assembler::MakeXmmRegisterAddress(cstRegister));
428 	m_assembler.PxorVo(uyRegister, CX86Assembler::MakeXmmRegisterAddress(cstRegister));
429 
430 	//res & ~uy
431 	m_assembler.PandVo(resRegister, CX86Assembler::MakeXmmRegisterAddress(uyRegister));
432 
433 	//res = (res & ~uy) | (ux & uy)
434 	m_assembler.PorVo(resRegister, CX86Assembler::MakeXmmRegisterAddress(uxRegister));
435 
436 	//Copy final result
437 	m_assembler.MovdqaVo(MakeVariable128SymbolAddress(dst), resRegister);
438 }
439 
Emit_Md_AddUSW_VarVarVar(const STATEMENT & statement)440 void CCodeGen_x86::Emit_Md_AddUSW_VarVarVar(const STATEMENT& statement)
441 {
442 	auto dst = statement.dst->GetSymbol().get();
443 	auto src1 = statement.src1->GetSymbol().get();
444 	auto src2 = statement.src2->GetSymbol().get();
445 
446 	auto xRegister = CX86Assembler::xMM0;
447 	auto resRegister = CX86Assembler::xMM1;
448 	auto tmpRegister = CX86Assembler::xMM2;
449 	auto tmp2Register = CX86Assembler::xMM3;
450 
451 //	This is based on code from http://locklessinc.com/articles/sat_arithmetic/
452 //	u32b sat_addu32b(u32b x, u32b y)
453 //	{
454 //		u32b res = x + y;
455 //		res |= -(res < x);
456 //
457 //		return res;
458 //	}
459 
460 	m_assembler.MovdqaVo(xRegister, MakeVariable128SymbolAddress(src1));
461 	m_assembler.MovdqaVo(resRegister, CX86Assembler::MakeXmmRegisterAddress(xRegister));
462 	m_assembler.PadddVo(resRegister, MakeVariable128SymbolAddress(src2));
463 
464 	//-(res < x)
465 	m_assembler.PcmpeqdVo(tmpRegister, CX86Assembler::MakeXmmRegisterAddress(tmpRegister));
466 	m_assembler.PslldVo(tmpRegister, 31);
467 	m_assembler.PadddVo(tmpRegister, CX86Assembler::MakeXmmRegisterAddress(resRegister));
468 
469 	m_assembler.PcmpeqdVo(tmp2Register, CX86Assembler::MakeXmmRegisterAddress(tmp2Register));
470 	m_assembler.PslldVo(tmp2Register, 31);
471 	m_assembler.PadddVo(tmp2Register, CX86Assembler::MakeXmmRegisterAddress(xRegister));
472 
473 	m_assembler.PcmpgtdVo(tmp2Register, CX86Assembler::MakeXmmRegisterAddress(tmpRegister));
474 
475 	//res |= -(res < x)
476 	m_assembler.PorVo(resRegister, CX86Assembler::MakeXmmRegisterAddress(tmp2Register));
477 
478 	//Store result
479 	m_assembler.MovdqaVo(MakeVariable128SymbolAddress(dst), resRegister);
480 }
481 
Emit_Md_MinW_VarVarVar(const STATEMENT & statement)482 void CCodeGen_x86::Emit_Md_MinW_VarVarVar(const STATEMENT& statement)
483 {
484 	auto dst = statement.dst->GetSymbol().get();
485 	auto src1 = statement.src1->GetSymbol().get();
486 	auto src2 = statement.src2->GetSymbol().get();
487 
488 	auto src1Register = CX86Assembler::xMM0;
489 	auto src2Register = CX86Assembler::xMM1;
490 	auto mask1Register = CX86Assembler::xMM2;
491 	auto mask2Register = CX86Assembler::xMM3;
492 
493 	m_assembler.MovdqaVo(src1Register, MakeVariable128SymbolAddress(src1));
494 	m_assembler.MovdqaVo(src2Register, MakeVariable128SymbolAddress(src2));
495 
496 	m_assembler.MovdqaVo(mask1Register, CX86Assembler::MakeXmmRegisterAddress(src2Register));
497 	m_assembler.PcmpgtdVo(mask1Register, CX86Assembler::MakeXmmRegisterAddress(src1Register));
498 	m_assembler.MovdqaVo(mask2Register, CX86Assembler::MakeXmmRegisterAddress(mask1Register));
499 
500 	m_assembler.PandVo(mask1Register, CX86Assembler::MakeXmmRegisterAddress(src1Register));
501 	m_assembler.PandnVo(mask2Register, CX86Assembler::MakeXmmRegisterAddress(src2Register));
502 	m_assembler.PorVo(mask1Register, CX86Assembler::MakeXmmRegisterAddress(mask2Register));
503 
504 	m_assembler.MovdqaVo(MakeVariable128SymbolAddress(dst), mask1Register);
505 }
506 
Emit_Md_MaxW_VarVarVar(const STATEMENT & statement)507 void CCodeGen_x86::Emit_Md_MaxW_VarVarVar(const STATEMENT& statement)
508 {
509 	auto dst = statement.dst->GetSymbol().get();
510 	auto src1 = statement.src1->GetSymbol().get();
511 	auto src2 = statement.src2->GetSymbol().get();
512 
513 	auto src1Register = CX86Assembler::xMM0;
514 	auto src2Register = CX86Assembler::xMM1;
515 	auto mask1Register = CX86Assembler::xMM2;
516 	auto mask2Register = CX86Assembler::xMM3;
517 
518 	m_assembler.MovdqaVo(src1Register, MakeVariable128SymbolAddress(src1));
519 	m_assembler.MovdqaVo(src2Register, MakeVariable128SymbolAddress(src2));
520 
521 	m_assembler.MovdqaVo(mask1Register, CX86Assembler::MakeXmmRegisterAddress(src1Register));
522 	m_assembler.PcmpgtdVo(mask1Register, CX86Assembler::MakeXmmRegisterAddress(src2Register));
523 	m_assembler.MovdqaVo(mask2Register, CX86Assembler::MakeXmmRegisterAddress(mask1Register));
524 
525 	m_assembler.PandVo(mask1Register, CX86Assembler::MakeXmmRegisterAddress(src1Register));
526 	m_assembler.PandnVo(mask2Register, CX86Assembler::MakeXmmRegisterAddress(src2Register));
527 	m_assembler.PorVo(mask1Register, CX86Assembler::MakeXmmRegisterAddress(mask2Register));
528 
529 	m_assembler.MovdqaVo(MakeVariable128SymbolAddress(dst), mask1Register);
530 }
531 
Emit_Md_PackHB_VarVarVar(const STATEMENT & statement)532 void CCodeGen_x86::Emit_Md_PackHB_VarVarVar(const STATEMENT& statement)
533 {
534 	auto dst = statement.dst->GetSymbol().get();
535 	auto src1 = statement.src1->GetSymbol().get();
536 	auto src2 = statement.src2->GetSymbol().get();
537 
538 	auto resultRegister = CX86Assembler::xMM0;
539 	auto tempRegister = CX86Assembler::xMM1;
540 	auto maskRegister = CX86Assembler::xMM2;
541 
542 	m_assembler.MovapsVo(resultRegister, MakeVariable128SymbolAddress(src2));
543 	m_assembler.MovapsVo(tempRegister, MakeVariable128SymbolAddress(src1));
544 
545 	//Generate mask (0x00FF x8)
546 	m_assembler.PcmpeqdVo(maskRegister, CX86Assembler::MakeXmmRegisterAddress(maskRegister));
547 	m_assembler.PsrlwVo(maskRegister, 0x08);
548 
549 	//Mask both operands
550 	m_assembler.PandVo(resultRegister, CX86Assembler::MakeXmmRegisterAddress(maskRegister));
551 	m_assembler.PandVo(tempRegister, CX86Assembler::MakeXmmRegisterAddress(maskRegister));
552 
553 	//Pack
554 	m_assembler.PackuswbVo(resultRegister, CX86Assembler::MakeXmmRegisterAddress(tempRegister));
555 
556 	m_assembler.MovapsVo(MakeVariable128SymbolAddress(dst), resultRegister);
557 }
558 
Emit_Md_PackWH_VarVarVar(const STATEMENT & statement)559 void CCodeGen_x86::Emit_Md_PackWH_VarVarVar(const STATEMENT& statement)
560 {
561 	auto dst = statement.dst->GetSymbol().get();
562 	auto src1 = statement.src1->GetSymbol().get();
563 	auto src2 = statement.src2->GetSymbol().get();
564 
565 	auto resultRegister = CX86Assembler::xMM0;
566 	auto tempRegister = CX86Assembler::xMM1;
567 
568 	m_assembler.MovapsVo(resultRegister, MakeVariable128SymbolAddress(src2));
569 	m_assembler.MovapsVo(tempRegister, MakeVariable128SymbolAddress(src1));
570 
571 	//Sign extend the lower half word of our registers
572 	m_assembler.PslldVo(resultRegister, 0x10);
573 	m_assembler.PsradVo(resultRegister, 0x10);
574 
575 	m_assembler.PslldVo(tempRegister, 0x10);
576 	m_assembler.PsradVo(tempRegister, 0x10);
577 
578 	//Pack
579 	m_assembler.PackssdwVo(resultRegister, CX86Assembler::MakeXmmRegisterAddress(tempRegister));
580 
581 	m_assembler.MovapsVo(MakeVariable128SymbolAddress(dst), resultRegister);
582 }
583 
Emit_Md_MovMasked_VarVarVar(const STATEMENT & statement)584 void CCodeGen_x86::Emit_Md_MovMasked_VarVarVar(const STATEMENT& statement)
585 {
586 	auto dst = statement.dst->GetSymbol().get();
587 	auto src1 = statement.src1->GetSymbol().get();
588 	auto src2 = statement.src2->GetSymbol().get();
589 
590 	uint8 mask = static_cast<uint8>(statement.jmpCondition);
591 	auto mask0Register = CX86Assembler::xMM0;
592 	auto mask1Register = CX86Assembler::xMM1;
593 
594 	m_assembler.MovId(CX86Assembler::rAX, ~0);
595 	m_assembler.MovdVo(mask0Register, CX86Assembler::MakeRegisterAddress(CX86Assembler::rAX));
596 
597 	//Generate shuffle selector
598 	//0x00 -> gives us 0x00000000
599 	//0x02 -> gives us 0xFFFFFFFF
600 	uint8 shuffleSelector = 0;
601 	for(unsigned int i = 0; i < 4; i++)
602 	{
603 		if(mask & (1 << i))
604 		{
605 			shuffleSelector |= (0x02) << (i * 2);
606 		}
607 	}
608 
609 	//mask0 -> proper mask
610 	m_assembler.PshufdVo(mask0Register, CX86Assembler::MakeXmmRegisterAddress(mask0Register), shuffleSelector);
611 
612 	//mask1 -> mask inverse
613 	m_assembler.PcmpeqdVo(mask1Register, CX86Assembler::MakeXmmRegisterAddress(mask1Register));
614 	m_assembler.PxorVo(mask1Register, CX86Assembler::MakeXmmRegisterAddress(mask0Register));
615 
616 	//Generate result
617 	m_assembler.PandVo(mask0Register, MakeVariable128SymbolAddress(src1));
618 	m_assembler.PandVo(mask1Register, MakeVariable128SymbolAddress(src2));
619 	m_assembler.PorVo(mask0Register, CX86Assembler::MakeXmmRegisterAddress(mask1Register));
620 
621 	m_assembler.MovdqaVo(MakeVariable128SymbolAddress(dst), mask0Register);
622 }
623 
Emit_Md_Mov_RegVar(const STATEMENT & statement)624 void CCodeGen_x86::Emit_Md_Mov_RegVar(const STATEMENT& statement)
625 {
626 	auto dst = statement.dst->GetSymbol().get();
627 	auto src1 = statement.src1->GetSymbol().get();
628 
629 	m_assembler.MovapsVo(m_mdRegisters[dst->m_valueLow], MakeVariable128SymbolAddress(src1));
630 }
631 
Emit_Md_Mov_MemReg(const STATEMENT & statement)632 void CCodeGen_x86::Emit_Md_Mov_MemReg(const STATEMENT& statement)
633 {
634 	auto dst = statement.dst->GetSymbol().get();
635 	auto src1 = statement.src1->GetSymbol().get();
636 
637 	m_assembler.MovapsVo(MakeMemory128SymbolAddress(dst), m_mdRegisters[src1->m_valueLow]);
638 }
639 
Emit_Md_Mov_MemMem(const STATEMENT & statement)640 void CCodeGen_x86::Emit_Md_Mov_MemMem(const STATEMENT& statement)
641 {
642 	CSymbol* dst = statement.dst->GetSymbol().get();
643 	CSymbol* src1 = statement.src1->GetSymbol().get();
644 
645 	CX86Assembler::XMMREGISTER resultRegister = CX86Assembler::xMM0;
646 
647 	m_assembler.MovapsVo(resultRegister, MakeMemory128SymbolAddress(src1));
648 	m_assembler.MovapsVo(MakeMemory128SymbolAddress(dst), resultRegister);
649 }
650 
Emit_Md_Abs(CX86Assembler::XMMREGISTER dstRegister)651 void CCodeGen_x86::Emit_Md_Abs(CX86Assembler::XMMREGISTER dstRegister)
652 {
653 	auto maskRegister = CX86Assembler::xMM1;
654 
655 	assert(dstRegister != maskRegister);
656 
657 	m_assembler.PcmpeqdVo(maskRegister, CX86Assembler::MakeXmmRegisterAddress(maskRegister));
658 	m_assembler.PsrldVo(maskRegister, 1);
659 	m_assembler.PandVo(dstRegister, CX86Assembler::MakeXmmRegisterAddress(maskRegister));
660 }
661 
Emit_Md_Not(CX86Assembler::XMMREGISTER dstRegister)662 void CCodeGen_x86::Emit_Md_Not(CX86Assembler::XMMREGISTER dstRegister)
663 {
664 	auto cstRegister = CX86Assembler::xMM1;
665 
666 	assert(dstRegister != cstRegister);
667 
668 	m_assembler.PcmpeqdVo(cstRegister, CX86Assembler::MakeXmmRegisterAddress(cstRegister));
669 	m_assembler.PxorVo(dstRegister, CX86Assembler::MakeXmmRegisterAddress(cstRegister));
670 }
671 
Emit_Md_IsNegative(CX86Assembler::REGISTER dstRegister,const CX86Assembler::CAddress & srcAddress)672 void CCodeGen_x86::Emit_Md_IsNegative(CX86Assembler::REGISTER dstRegister, const CX86Assembler::CAddress& srcAddress)
673 {
674 	auto valueRegister = CX86Assembler::xMM0;
675 	auto zeroRegister = CX86Assembler::xMM1;
676 	auto tmpRegister = CX86Assembler::xMM2;
677 	auto shuffleSelectRegister = CX86Assembler::xMM3;
678 	auto tmpFlagRegister = CX86Assembler::rDX;
679 
680 	assert(dstRegister != tmpFlagRegister);
681 
682 	//valueRegister = [srcAddress]
683 	m_assembler.MovdqaVo(valueRegister, srcAddress);
684 
685 	//----- Generate isZero
686 
687 	//tmpRegister = 0
688 	m_assembler.PandnVo(tmpRegister, CX86Assembler::MakeXmmRegisterAddress(tmpRegister));
689 
690 	//zeroRegister = 0xFFFFFFFF
691 	m_assembler.PcmpeqdVo(zeroRegister, CX86Assembler::MakeXmmRegisterAddress(zeroRegister));
692 
693 	//zeroRegister = 0x7FFFFFFF
694 	m_assembler.PsrldVo(zeroRegister, 1);
695 
696 	//zeroRegister &= valueRegister
697 	m_assembler.PandVo(zeroRegister, CX86Assembler::MakeXmmRegisterAddress(valueRegister));
698 
699 	//zeroRegister = (zeroRegister == tmpRegister)
700 	m_assembler.PcmpeqdVo(zeroRegister, CX86Assembler::MakeXmmRegisterAddress(tmpRegister));
701 
702 	//----- Generate isNegative
703 	//valueRegister >>= 31 (s-extended)
704 	m_assembler.PsradVo(valueRegister, 31);
705 
706 	//----- Generate result
707 	//zeroRegister = (not zeroRegister) & valueRegister
708 	m_assembler.PandnVo(zeroRegister, CX86Assembler::MakeXmmRegisterAddress(valueRegister));
709 
710 	//Extract bits
711 	m_assembler.MovId(tmpFlagRegister, 0x03070B0F);
712 	m_assembler.MovdVo(shuffleSelectRegister, CX86Assembler::MakeRegisterAddress(tmpFlagRegister));
713 	m_assembler.PshufbVo(zeroRegister, CX86Assembler::MakeXmmRegisterAddress(shuffleSelectRegister));
714 	m_assembler.PmovmskbVo(dstRegister, zeroRegister);
715 	m_assembler.AndId(CX86Assembler::MakeRegisterAddress(dstRegister), 0x0F);
716 }
717 
Emit_Md_IsZero(CX86Assembler::REGISTER dstRegister,const CX86Assembler::CAddress & srcAddress)718 void CCodeGen_x86::Emit_Md_IsZero(CX86Assembler::REGISTER dstRegister, const CX86Assembler::CAddress& srcAddress)
719 {
720 	auto valueRegister = CX86Assembler::xMM0;
721 	auto zeroRegister = CX86Assembler::xMM1;
722 	auto shuffleSelectRegister = CX86Assembler::xMM2;
723 	auto tmpFlagRegister = CX86Assembler::rDX;
724 
725 	assert(dstRegister != tmpFlagRegister);
726 
727 	//Get value - And with 0x7FFFFFFF to remove sign bit
728 	m_assembler.PcmpeqdVo(valueRegister, CX86Assembler::MakeXmmRegisterAddress(valueRegister));
729 	m_assembler.PsrldVo(valueRegister, 1);
730 	m_assembler.PandVo(valueRegister, srcAddress);
731 
732 	//Generate zero and compare
733 	m_assembler.PandnVo(zeroRegister, CX86Assembler::MakeXmmRegisterAddress(zeroRegister));
734 	m_assembler.PcmpeqdVo(valueRegister, CX86Assembler::MakeXmmRegisterAddress(zeroRegister));
735 
736 	//Extract bits
737 	m_assembler.MovId(tmpFlagRegister, 0x03070B0F);
738 	m_assembler.MovdVo(shuffleSelectRegister, CX86Assembler::MakeRegisterAddress(tmpFlagRegister));
739 	m_assembler.PshufbVo(valueRegister, CX86Assembler::MakeXmmRegisterAddress(shuffleSelectRegister));
740 	m_assembler.PmovmskbVo(dstRegister, valueRegister);
741 	m_assembler.AndId(CX86Assembler::MakeRegisterAddress(dstRegister), 0x0F);
742 }
743 
Emit_Md_Expand_RegReg(const STATEMENT & statement)744 void CCodeGen_x86::Emit_Md_Expand_RegReg(const STATEMENT& statement)
745 {
746 	auto dst = statement.dst->GetSymbol().get();
747 	auto src1 = statement.src1->GetSymbol().get();
748 
749 	auto resultRegister = m_mdRegisters[dst->m_valueLow];
750 
751 	m_assembler.MovdVo(resultRegister, CX86Assembler::MakeRegisterAddress(m_registers[src1->m_valueLow]));
752 	m_assembler.PshufdVo(resultRegister, CX86Assembler::MakeXmmRegisterAddress(resultRegister), 0x00);
753 }
754 
Emit_Md_Expand_RegMem(const STATEMENT & statement)755 void CCodeGen_x86::Emit_Md_Expand_RegMem(const STATEMENT& statement)
756 {
757 	auto dst = statement.dst->GetSymbol().get();
758 	auto src1 = statement.src1->GetSymbol().get();
759 
760 	auto resultRegister = m_mdRegisters[dst->m_valueLow];
761 
762 	m_assembler.MovssEd(resultRegister, MakeMemorySymbolAddress(src1));
763 	m_assembler.ShufpsVo(resultRegister, CX86Assembler::MakeXmmRegisterAddress(resultRegister), 0x00);
764 }
765 
Emit_Md_Expand_RegCst(const STATEMENT & statement)766 void CCodeGen_x86::Emit_Md_Expand_RegCst(const STATEMENT& statement)
767 {
768 	auto dst = statement.dst->GetSymbol().get();
769 	auto src1 = statement.src1->GetSymbol().get();
770 
771 	auto cstRegister = CX86Assembler::rAX;
772 	auto resultRegister = m_mdRegisters[dst->m_valueLow];
773 
774 	m_assembler.MovId(cstRegister, src1->m_valueLow);
775 	m_assembler.MovdVo(resultRegister, CX86Assembler::MakeRegisterAddress(cstRegister));
776 	m_assembler.PshufdVo(resultRegister, CX86Assembler::MakeXmmRegisterAddress(resultRegister), 0x00);
777 }
778 
Emit_Md_Expand_MemReg(const STATEMENT & statement)779 void CCodeGen_x86::Emit_Md_Expand_MemReg(const STATEMENT& statement)
780 {
781 	auto dst = statement.dst->GetSymbol().get();
782 	auto src1 = statement.src1->GetSymbol().get();
783 
784 	auto resultRegister = CX86Assembler::xMM0;
785 
786 	m_assembler.MovdVo(resultRegister, CX86Assembler::MakeRegisterAddress(m_registers[src1->m_valueLow]));
787 	m_assembler.ShufpsVo(resultRegister, CX86Assembler::MakeXmmRegisterAddress(resultRegister), 0x00);
788 	m_assembler.MovapsVo(MakeMemory128SymbolAddress(dst), resultRegister);
789 }
790 
Emit_Md_Expand_MemMem(const STATEMENT & statement)791 void CCodeGen_x86::Emit_Md_Expand_MemMem(const STATEMENT& statement)
792 {
793 	auto dst = statement.dst->GetSymbol().get();
794 	auto src1 = statement.src1->GetSymbol().get();
795 
796 	auto resultRegister = CX86Assembler::xMM0;
797 
798 	m_assembler.MovssEd(resultRegister, MakeMemorySymbolAddress(src1));
799 	m_assembler.ShufpsVo(resultRegister, CX86Assembler::MakeXmmRegisterAddress(resultRegister), 0x00);
800 	m_assembler.MovapsVo(MakeMemory128SymbolAddress(dst), resultRegister);
801 }
802 
Emit_Md_Expand_MemCst(const STATEMENT & statement)803 void CCodeGen_x86::Emit_Md_Expand_MemCst(const STATEMENT& statement)
804 {
805 	auto dst = statement.dst->GetSymbol().get();
806 	auto src1 = statement.src1->GetSymbol().get();
807 
808 	auto cstRegister = CX86Assembler::rAX;
809 	auto resultRegister = CX86Assembler::xMM0;
810 
811 	m_assembler.MovId(cstRegister, src1->m_valueLow);
812 	m_assembler.MovdVo(resultRegister, CX86Assembler::MakeRegisterAddress(cstRegister));
813 	m_assembler.PshufdVo(resultRegister, CX86Assembler::MakeXmmRegisterAddress(resultRegister), 0x00);
814 	m_assembler.MovdqaVo(MakeMemory128SymbolAddress(dst), resultRegister);
815 }
816 
Emit_Md_Srl256_VarMem(CSymbol * dst,CSymbol * src1,const CX86Assembler::CAddress & offsetAddress)817 void CCodeGen_x86::Emit_Md_Srl256_VarMem(CSymbol* dst, CSymbol* src1, const CX86Assembler::CAddress& offsetAddress)
818 {
819 	auto offsetRegister = CX86Assembler::rAX;
820 	auto resultRegister = CX86Assembler::xMM0;
821 
822 	assert(src1->m_type == SYM_TEMPORARY256);
823 
824 	m_assembler.MovEd(offsetRegister, offsetAddress);
825 	m_assembler.AndId(CX86Assembler::MakeRegisterAddress(offsetRegister), 0x7F);
826 	m_assembler.ShrEd(CX86Assembler::MakeRegisterAddress(offsetRegister), 3);
827 	m_assembler.AddId(CX86Assembler::MakeRegisterAddress(offsetRegister), src1->m_stackLocation + m_stackLevel);
828 
829 	m_assembler.MovdquVo(resultRegister, CX86Assembler::MakeBaseIndexScaleAddress(CX86Assembler::rSP, offsetRegister, 1));
830 	m_assembler.MovdqaVo(MakeVariable128SymbolAddress(dst), resultRegister);
831 }
832 
Emit_Md_Srl256_VarMemVar(const STATEMENT & statement)833 void CCodeGen_x86::Emit_Md_Srl256_VarMemVar(const STATEMENT& statement)
834 {
835 	auto dst = statement.dst->GetSymbol().get();
836 	auto src1 = statement.src1->GetSymbol().get();
837 	auto src2 = statement.src2->GetSymbol().get();
838 
839 	Emit_Md_Srl256_VarMem(dst, src1, MakeVariableSymbolAddress(src2));
840 }
841 
Emit_Md_Srl256_VarMemCst(const STATEMENT & statement)842 void CCodeGen_x86::Emit_Md_Srl256_VarMemCst(const STATEMENT& statement)
843 {
844 	auto dst = statement.dst->GetSymbol().get();
845 	auto src1 = statement.src1->GetSymbol().get();
846 	auto src2 = statement.src2->GetSymbol().get();
847 
848 	auto resultRegister = CX86Assembler::xMM0;
849 
850 	assert(src1->m_type == SYM_TEMPORARY256);
851 	assert(src2->m_type == SYM_CONSTANT);
852 
853 	uint32 offset = (src2->m_valueLow & 0x7F) / 8;
854 
855 	m_assembler.MovdquVo(resultRegister, MakeTemporary256SymbolElementAddress(src1, offset));
856 	m_assembler.MovdqaVo(MakeVariable128SymbolAddress(dst), resultRegister);
857 }
858 
Emit_MergeTo256_MemVarVar(const STATEMENT & statement)859 void CCodeGen_x86::Emit_MergeTo256_MemVarVar(const STATEMENT& statement)
860 {
861 	auto dst = statement.dst->GetSymbol().get();
862 	auto src1 = statement.src1->GetSymbol().get();
863 	auto src2 = statement.src2->GetSymbol().get();
864 
865 	assert(dst->m_type == SYM_TEMPORARY256);
866 
867 	auto src1Register = CX86Assembler::xMM0;
868 	auto src2Register = CX86Assembler::xMM1;
869 
870 	//TODO: Improve this to write out registers directly to temporary's memory space
871 	//instead of passing by temporary registers
872 
873 	m_assembler.MovdqaVo(src1Register, MakeVariable128SymbolAddress(src1));
874 	m_assembler.MovdqaVo(src2Register, MakeVariable128SymbolAddress(src2));
875 
876 	m_assembler.MovdqaVo(MakeTemporary256SymbolElementAddress(dst, 0x00), src1Register);
877 	m_assembler.MovdqaVo(MakeTemporary256SymbolElementAddress(dst, 0x10), src2Register);
878 }
879 
880 #define MD_CONST_MATCHERS_SHIFT(MDOP_CST, MDOP, SAMASK) \
881 	{ MDOP_CST,				MATCH_REGISTER128,			MATCH_VARIABLE128,			MATCH_CONSTANT,			&CCodeGen_x86::Emit_Md_Shift_RegVarCst<MDOP, SAMASK>	}, \
882 	{ MDOP_CST,				MATCH_MEMORY128,			MATCH_VARIABLE128,			MATCH_CONSTANT,			&CCodeGen_x86::Emit_Md_Shift_MemVarCst<MDOP, SAMASK>	},
883 
884 #define MD_CONST_MATCHERS_2OPS(MDOP_CST, MDOP) \
885 	{ MDOP_CST,				MATCH_REGISTER128,			MATCH_VARIABLE128,			MATCH_NIL,				&CCodeGen_x86::Emit_Md_RegVar<MDOP>						}, \
886 	{ MDOP_CST,				MATCH_MEMORY128,			MATCH_VARIABLE128,			MATCH_NIL,				&CCodeGen_x86::Emit_Md_MemVar<MDOP>						},
887 
888 #define MD_CONST_MATCHERS_3OPS(MDOP_CST, MDOP) \
889 	{ MDOP_CST,				MATCH_REGISTER128,			MATCH_REGISTER128,			MATCH_REGISTER128,		&CCodeGen_x86::Emit_Md_RegRegReg<MDOP>					}, \
890 	{ MDOP_CST,				MATCH_REGISTER128,			MATCH_MEMORY128,			MATCH_REGISTER128,		&CCodeGen_x86::Emit_Md_RegMemReg<MDOP>					}, \
891 	{ MDOP_CST,				MATCH_REGISTER128,			MATCH_VARIABLE128,			MATCH_VARIABLE128,		&CCodeGen_x86::Emit_Md_RegVarVar<MDOP>					}, \
892 	{ MDOP_CST,				MATCH_MEMORY128,			MATCH_VARIABLE128,			MATCH_VARIABLE128,		&CCodeGen_x86::Emit_Md_MemVarVar<MDOP>					},
893 
894 #define MD_CONST_MATCHERS_3OPS_REV(MDOP_CST, MDOP) \
895 	{ MDOP_CST,				MATCH_VARIABLE128,			MATCH_VARIABLE128,			MATCH_VARIABLE128,		&CCodeGen_x86::Emit_Md_VarVarVarRev<MDOP>				},
896 
897 #define MD_CONST_MATCHERS_SINGLEOP(MDOP_CST, MDOP) \
898 	{ MDOP_CST,				MATCH_REGISTER128,			MATCH_VARIABLE128,			MATCH_NIL,				&CCodeGen_x86::Emit_Md_SingleOp_RegVar<MDOP>			}, \
899 	{ MDOP_CST,				MATCH_MEMORY128,			MATCH_VARIABLE128,			MATCH_NIL,				&CCodeGen_x86::Emit_Md_SingleOp_MemVar<MDOP>			},
900 
901 CCodeGen_x86::CONSTMATCHER CCodeGen_x86::g_mdConstMatchers[] =
902 {
903 	MD_CONST_MATCHERS_3OPS(OP_MD_ADD_B,		MDOP_ADDB)
904 	MD_CONST_MATCHERS_3OPS(OP_MD_ADD_H,		MDOP_ADDH)
905 	MD_CONST_MATCHERS_3OPS(OP_MD_ADD_W,		MDOP_ADDW)
906 
907 	MD_CONST_MATCHERS_3OPS(OP_MD_ADDSS_H,	MDOP_ADDSSH)
908 	{ OP_MD_ADDSS_W,			MATCH_VARIABLE128,			MATCH_VARIABLE128,			MATCH_VARIABLE128,		&CCodeGen_x86::Emit_Md_AddSSW_VarVarVar						},
909 
910 	MD_CONST_MATCHERS_3OPS(OP_MD_ADDUS_B,	MDOP_ADDUSB)
911 	MD_CONST_MATCHERS_3OPS(OP_MD_ADDUS_H,	MDOP_ADDUSH)
912 	{ OP_MD_ADDUS_W,			MATCH_VARIABLE128,			MATCH_VARIABLE128,			MATCH_VARIABLE128,		&CCodeGen_x86::Emit_Md_AddUSW_VarVarVar						},
913 
914 	MD_CONST_MATCHERS_3OPS(OP_MD_SUB_B,		MDOP_SUBB)
915 	MD_CONST_MATCHERS_3OPS(OP_MD_SUB_H,		MDOP_SUBH)
916 	MD_CONST_MATCHERS_3OPS(OP_MD_SUB_W,		MDOP_SUBW)
917 
918 	MD_CONST_MATCHERS_3OPS(OP_MD_SUBSS_H,	MDOP_SUBSSH)
919 	{ OP_MD_SUBSS_W,			MATCH_VARIABLE128,			MATCH_VARIABLE128,			MATCH_VARIABLE128,		&CCodeGen_x86::Emit_Md_SubSSW_VarVarVar						},
920 
921 	MD_CONST_MATCHERS_3OPS(OP_MD_SUBUS_B,	MDOP_SUBUSB)
922 	MD_CONST_MATCHERS_3OPS(OP_MD_SUBUS_H,	MDOP_SUBUSH)
923 
924 	MD_CONST_MATCHERS_3OPS(OP_MD_CMPEQ_B,	MDOP_CMPEQB)
925 	MD_CONST_MATCHERS_3OPS(OP_MD_CMPEQ_H,	MDOP_CMPEQH)
926 	MD_CONST_MATCHERS_3OPS(OP_MD_CMPEQ_W,	MDOP_CMPEQW)
927 	MD_CONST_MATCHERS_3OPS(OP_MD_CMPGT_B,	MDOP_CMPGTB)
928 	MD_CONST_MATCHERS_3OPS(OP_MD_CMPGT_H,	MDOP_CMPGTH)
929 	MD_CONST_MATCHERS_3OPS(OP_MD_CMPGT_W,	MDOP_CMPGTW)
930 
931 	MD_CONST_MATCHERS_3OPS(OP_MD_MIN_H,		MDOP_MINH)
932 
933 	MD_CONST_MATCHERS_3OPS(OP_MD_MAX_H,		MDOP_MAXH)
934 
935 	MD_CONST_MATCHERS_3OPS(OP_MD_AND,		MDOP_AND)
936 	MD_CONST_MATCHERS_3OPS(OP_MD_OR,		MDOP_OR)
937 	MD_CONST_MATCHERS_3OPS(OP_MD_XOR,		MDOP_XOR)
938 
939 	MD_CONST_MATCHERS_SHIFT(OP_MD_SRLH,		MDOP_SRLH, 0x0F)
940 	MD_CONST_MATCHERS_SHIFT(OP_MD_SRAH,		MDOP_SRAH, 0x0F)
941 	MD_CONST_MATCHERS_SHIFT(OP_MD_SLLH,		MDOP_SLLH, 0x0F)
942 
943 	MD_CONST_MATCHERS_SHIFT(OP_MD_SRLW,		MDOP_SRLW, 0x1F)
944 	MD_CONST_MATCHERS_SHIFT(OP_MD_SRAW,		MDOP_SRAW, 0x1F)
945 	MD_CONST_MATCHERS_SHIFT(OP_MD_SLLW,		MDOP_SLLW, 0x1F)
946 
947 	{ OP_MD_SRL256,				MATCH_VARIABLE128,			MATCH_MEMORY256,			MATCH_VARIABLE,			&CCodeGen_x86::Emit_Md_Srl256_VarMemVar						},
948 	{ OP_MD_SRL256,				MATCH_VARIABLE128,			MATCH_MEMORY256,			MATCH_CONSTANT,			&CCodeGen_x86::Emit_Md_Srl256_VarMemCst						},
949 
950 	{ OP_MD_EXPAND,				MATCH_REGISTER128,			MATCH_REGISTER,				MATCH_NIL,				&CCodeGen_x86::Emit_Md_Expand_RegReg						},
951 	{ OP_MD_EXPAND,				MATCH_REGISTER128,			MATCH_MEMORY,				MATCH_NIL,				&CCodeGen_x86::Emit_Md_Expand_RegMem						},
952 	{ OP_MD_EXPAND,				MATCH_REGISTER128,			MATCH_CONSTANT,				MATCH_NIL,				&CCodeGen_x86::Emit_Md_Expand_RegCst						},
953 	{ OP_MD_EXPAND,				MATCH_MEMORY128,			MATCH_REGISTER,				MATCH_NIL,				&CCodeGen_x86::Emit_Md_Expand_MemReg						},
954 	{ OP_MD_EXPAND,				MATCH_MEMORY128,			MATCH_MEMORY,				MATCH_NIL,				&CCodeGen_x86::Emit_Md_Expand_MemMem						},
955 	{ OP_MD_EXPAND,				MATCH_MEMORY128,			MATCH_CONSTANT,				MATCH_NIL,				&CCodeGen_x86::Emit_Md_Expand_MemCst						},
956 
957 	{ OP_MD_PACK_HB,			MATCH_VARIABLE128,			MATCH_VARIABLE128,			MATCH_VARIABLE128,		&CCodeGen_x86::Emit_Md_PackHB_VarVarVar,					},
958 	{ OP_MD_PACK_WH,			MATCH_VARIABLE128,			MATCH_VARIABLE128,			MATCH_VARIABLE128,		&CCodeGen_x86::Emit_Md_PackWH_VarVarVar,					},
959 
960 	MD_CONST_MATCHERS_3OPS_REV(OP_MD_UNPACK_LOWER_BH, MDOP_UNPACK_LOWER_BH)
961 	MD_CONST_MATCHERS_3OPS_REV(OP_MD_UNPACK_LOWER_HW, MDOP_UNPACK_LOWER_HW)
962 	MD_CONST_MATCHERS_3OPS_REV(OP_MD_UNPACK_LOWER_WD, MDOP_UNPACK_LOWER_WD)
963 
964 	MD_CONST_MATCHERS_3OPS_REV(OP_MD_UNPACK_UPPER_BH, MDOP_UNPACK_UPPER_BH)
965 	MD_CONST_MATCHERS_3OPS_REV(OP_MD_UNPACK_UPPER_HW, MDOP_UNPACK_UPPER_HW)
966 	MD_CONST_MATCHERS_3OPS_REV(OP_MD_UNPACK_UPPER_WD, MDOP_UNPACK_UPPER_WD)
967 
968 	MD_CONST_MATCHERS_3OPS(OP_MD_ADD_S, MDOP_ADDS)
969 	MD_CONST_MATCHERS_3OPS(OP_MD_SUB_S, MDOP_SUBS)
970 	MD_CONST_MATCHERS_3OPS(OP_MD_MUL_S, MDOP_MULS)
971 	MD_CONST_MATCHERS_3OPS(OP_MD_DIV_S, MDOP_DIVS)
972 
973 	MD_CONST_MATCHERS_3OPS(OP_MD_MIN_S, MDOP_MINS)
974 	MD_CONST_MATCHERS_3OPS(OP_MD_MAX_S, MDOP_MAXS)
975 
976 	MD_CONST_MATCHERS_SINGLEOP(OP_MD_ABS_S,	MDOP_ABS)
977 	MD_CONST_MATCHERS_SINGLEOP(OP_MD_NOT,	MDOP_NOT)
978 
979 	{ OP_MD_ISNEGATIVE,			MATCH_REGISTER,				MATCH_VARIABLE128,			MATCH_NIL,				&CCodeGen_x86::Emit_Md_GetFlag_RegVar<MDOP_ISNEGATIVE>		},
980 	{ OP_MD_ISNEGATIVE,			MATCH_MEMORY,				MATCH_VARIABLE128,			MATCH_NIL,				&CCodeGen_x86::Emit_Md_GetFlag_MemVar<MDOP_ISNEGATIVE>		},
981 
982 	{ OP_MD_ISZERO,				MATCH_REGISTER,				MATCH_VARIABLE128,			MATCH_NIL,				&CCodeGen_x86::Emit_Md_GetFlag_RegVar<MDOP_ISZERO>			},
983 	{ OP_MD_ISZERO,				MATCH_MEMORY,				MATCH_VARIABLE128,			MATCH_NIL,				&CCodeGen_x86::Emit_Md_GetFlag_MemVar<MDOP_ISZERO>			},
984 
985 	MD_CONST_MATCHERS_2OPS(OP_MD_TOWORD_TRUNCATE,	MDOP_TOWORD_TRUNCATE)
986 	MD_CONST_MATCHERS_2OPS(OP_MD_TOSINGLE,			MDOP_TOSINGLE)
987 
988 	{ OP_MOV,					MATCH_REGISTER128,			MATCH_VARIABLE128,			MATCH_NIL,				&CCodeGen_x86::Emit_Md_Mov_RegVar,							},
989 	{ OP_MOV,					MATCH_MEMORY128,			MATCH_REGISTER128,			MATCH_NIL,				&CCodeGen_x86::Emit_Md_Mov_MemReg							},
990 	{ OP_MOV,					MATCH_MEMORY128,			MATCH_MEMORY128,			MATCH_NIL,				&CCodeGen_x86::Emit_Md_Mov_MemMem							},
991 	{ OP_MD_MOV_MASKED,			MATCH_VARIABLE128,			MATCH_VARIABLE128,			MATCH_VARIABLE128,		&CCodeGen_x86::Emit_Md_MovMasked_VarVarVar					},
992 
993 	{ OP_MERGETO256,			MATCH_MEMORY256,			MATCH_VARIABLE128,			MATCH_VARIABLE128,		&CCodeGen_x86::Emit_MergeTo256_MemVarVar					},
994 
995 	{ OP_MOV,					MATCH_NIL,					MATCH_NIL,					MATCH_NIL,				NULL														},
996 };
997 
998 CCodeGen_x86::CONSTMATCHER CCodeGen_x86::g_mdMinMaxWConstMatchers[] =
999 {
1000 	{ OP_MD_MIN_W,	MATCH_VARIABLE128,	MATCH_VARIABLE128,	MATCH_VARIABLE128,	&CCodeGen_x86::Emit_Md_MinW_VarVarVar	},
1001 	{ OP_MD_MAX_W,	MATCH_VARIABLE128,	MATCH_VARIABLE128,	MATCH_VARIABLE128,	&CCodeGen_x86::Emit_Md_MaxW_VarVarVar	},
1002 
1003 	{ OP_MOV, MATCH_NIL, MATCH_NIL, MATCH_NIL, nullptr },
1004 };
1005 
1006 CCodeGen_x86::CONSTMATCHER CCodeGen_x86::g_mdMinMaxWSse41ConstMatchers[] =
1007 {
1008 	MD_CONST_MATCHERS_3OPS(OP_MD_MIN_W,		MDOP_MINW)
1009 	MD_CONST_MATCHERS_3OPS(OP_MD_MAX_W,		MDOP_MAXW)
1010 
1011 	{ OP_MOV, MATCH_NIL, MATCH_NIL, MATCH_NIL, nullptr },
1012 };
1013