1 //
2 // arch/x86_64/rsp/vcmp.h
3 //
4 // This file is subject to the terms and conditions defined in
5 // 'LICENSE', which is part of this source code package.
6 //
7 
rsp_veq(__m128i vs,__m128i vt,__m128i zero,__m128i * le,__m128i eq,__m128i sign)8 static inline __m128i rsp_veq(__m128i vs, __m128i vt, __m128i zero, __m128i *le, __m128i eq, __m128i sign)
9 {
10 	__m128i equal = _mm_cmpeq_epi16(vs, vt);
11 
12 	*le = _mm_andnot_si128(eq, equal);
13 
14 #ifdef __SSE4_1__
15 	return _mm_blendv_epi8(vt, vs, *le);
16 #else
17 	vs = _mm_and_si128(*le, vs);
18 	vt = _mm_andnot_si128(*le, vt);
19 	return _mm_or_si128(vs, vt);
20 #endif
21 }
22 
rsp_vge(__m128i vs,__m128i vt,__m128i zero,__m128i * le,__m128i eq,__m128i sign)23 static inline __m128i rsp_vge(__m128i vs, __m128i vt, __m128i zero, __m128i *le, __m128i eq, __m128i sign)
24 {
25 	__m128i equal = _mm_cmpeq_epi16(vs, vt);
26 
27 	__m128i gt = _mm_cmpgt_epi16(vs, vt);
28 	__m128i equalsign = _mm_and_si128(eq, sign);
29 
30 	equal = _mm_andnot_si128(equalsign, equal);
31 	*le = _mm_or_si128(gt, equal);
32 
33 #ifdef __SSE4_1__
34 	return _mm_blendv_epi8(vt, vs, *le);
35 #else
36 	vs = _mm_and_si128(*le, vs);
37 	vt = _mm_andnot_si128(*le, vt);
38 	return _mm_or_si128(vs, vt);
39 #endif
40 }
41 
rsp_vlt(__m128i vs,__m128i vt,__m128i zero,__m128i * le,__m128i eq,__m128i sign)42 static inline __m128i rsp_vlt(__m128i vs, __m128i vt, __m128i zero, __m128i *le, __m128i eq, __m128i sign)
43 {
44 	__m128i equal = _mm_cmpeq_epi16(vs, vt);
45 	__m128i lt = _mm_cmplt_epi16(vs, vt);
46 
47 	equal = _mm_and_si128(eq, equal);
48 	equal = _mm_and_si128(sign, equal);
49 	*le = _mm_or_si128(lt, equal);
50 
51 #ifdef __SSE4_1__
52 	return _mm_blendv_epi8(vt, vs, *le);
53 #else
54 	vs = _mm_and_si128(*le, vs);
55 	vt = _mm_andnot_si128(*le, vt);
56 	return _mm_or_si128(vs, vt);
57 #endif
58 }
59 
rsp_vne(__m128i vs,__m128i vt,__m128i zero,__m128i * le,__m128i eq,__m128i sign)60 static inline __m128i rsp_vne(__m128i vs, __m128i vt, __m128i zero, __m128i *le, __m128i eq, __m128i sign)
61 {
62 	__m128i equal = _mm_cmpeq_epi16(vs, vt);
63 	__m128i nequal = _mm_cmpeq_epi16(equal, zero);
64 
65 	*le = _mm_and_si128(eq, equal);
66 	*le = _mm_or_si128(*le, nequal);
67 
68 #ifdef INTENSE_DEBUG
69 	for (unsigned i = 0; i < 8; i++)
70 		fprintf(stderr, "VS[%d] = %d\n", i, reinterpret_cast<int16_t *>(&vs)[i]);
71 	for (unsigned i = 0; i < 8; i++)
72 		fprintf(stderr, "VT[%d] = %d\n", i, reinterpret_cast<int16_t *>(&vt)[i]);
73 #endif
74 
75 #ifdef __SSE4_1__
76 	return _mm_blendv_epi8(vt, vs, *le);
77 #else
78 	vs = _mm_and_si128(*le, vs);
79 	vt = _mm_andnot_si128(*le, vt);
80 	return _mm_or_si128(vs, vt);
81 #endif
82 }
83