1 #include <nmmintrin.h>
2 #include <string.h>
3
4 #define CFLAG 0x00000001
5 #define ZFLAG 0x00000002
6 #define SFLAG 0x00000004
7 #define OFLAG 0x00000008
8 #define AFLAG 0x00000010
9 #define PFLAG 0x00000020
10
11 #define PCMPSTR_EQ(X, Y, RES) \
12 { \
13 int __size = (sizeof (*X) ^ 3) * 8; \
14 int __i, __j; \
15 for (__i = 0; __i < __size; __i++) \
16 for (__j = 0; __j < __size; __j++) \
17 RES[__j][__i] = (X[__i] == Y[__j]); \
18 }
19
20 #define PCMPSTR_RNG(X, Y, RES) \
21 { \
22 int __size = (sizeof (*X) ^ 3) * 8; \
23 int __i, __j; \
24 for (__j = 0; __j < __size; __j++) \
25 for (__i = 0; __i < __size - 1; __i += 2) \
26 { \
27 RES[__j][__i] = (Y[__j] >= X[__i]); \
28 RES[__j][__i+1] = (Y[__j] <= X[__i + 1]); \
29 } \
30 }
31
32 static void
override_invalid(unsigned char res[16][16],int la,int lb,const int mode,int dim)33 override_invalid (unsigned char res[16][16], int la, int lb,
34 const int mode, int dim)
35 {
36 int i, j;
37
38 for (j = 0; j < dim; j++)
39 for (i = 0; i < dim; i++)
40 if (i < la && j >= lb)
41 res[j][i] = 0;
42 else if (i >= la)
43 switch ((mode & 0x0C))
44 {
45 case _SIDD_CMP_EQUAL_ANY:
46 case _SIDD_CMP_RANGES:
47 res[j][i] = 0;
48 break;
49 case _SIDD_CMP_EQUAL_EACH:
50 res[j][i] = (j >= lb) ? 1: 0;
51 break;
52 case _SIDD_CMP_EQUAL_ORDERED:
53 res[j][i] = 1;
54 break;
55 }
56 }
57
58 static void
calc_matrix(__m128i a,int la,__m128i b,int lb,const int mode,unsigned char res[16][16])59 calc_matrix (__m128i a, int la, __m128i b, int lb, const int mode,
60 unsigned char res[16][16])
61 {
62 union
63 {
64 __m128i x;
65 signed char sc[16];
66 unsigned char uc[16];
67 signed short ss[8];
68 unsigned short us[8];
69 } d, s;
70
71 d.x = a;
72 s.x = b;
73
74 switch ((mode & 3))
75 {
76 case _SIDD_UBYTE_OPS:
77 if ((mode & 0x0C) == _SIDD_CMP_RANGES)
78 {
79 PCMPSTR_RNG (d.uc, s.uc, res);
80 }
81 else
82 {
83 PCMPSTR_EQ (d.uc, s.uc, res);
84 }
85 break;
86 case _SIDD_UWORD_OPS:
87 if ((mode & 0x0C) == _SIDD_CMP_RANGES)
88 {
89 PCMPSTR_RNG (d.us, s.us, res);
90 }
91 else
92 {
93 PCMPSTR_EQ (d.us, s.us, res);
94 }
95 break;
96 case _SIDD_SBYTE_OPS:
97 if ((mode & 0x0C) == _SIDD_CMP_RANGES)
98 {
99 PCMPSTR_RNG (d.sc, s.sc, res);
100 }
101 else
102 {
103 PCMPSTR_EQ (d.sc, s.sc, res);
104 }
105 break;
106 case _SIDD_SWORD_OPS:
107 if ((mode & 0x0C) == _SIDD_CMP_RANGES)
108 {
109 PCMPSTR_RNG (d.ss, s.ss, res);
110 }
111 else
112 {
113 PCMPSTR_EQ (d.ss, s.ss, res);
114 }
115 break;
116 }
117
118 override_invalid (res, la, lb, mode, (mode & 1) == 0 ? 16 : 8);
119 }
120
121 static int
calc_res(__m128i a,int la,__m128i b,int lb,const int mode)122 calc_res (__m128i a, int la, __m128i b, int lb, const int mode)
123 {
124 unsigned char mtx[16][16];
125 int i, j, k, dim, res = 0;
126
127 memset (mtx, 0, sizeof (mtx));
128
129 dim = (mode & 1) == 0 ? 16 : 8;
130
131 if (la < 0)
132 la = -la;
133
134 if (lb < 0)
135 lb = -lb;
136
137 if (la > dim)
138 la = dim;
139
140 if (lb > dim)
141 lb = dim;
142
143 calc_matrix (a, la, b, lb, mode, mtx);
144
145 switch ((mode & 0x0C))
146 {
147 case _SIDD_CMP_EQUAL_ANY:
148 for (i = 0; i < dim; i++)
149 for (j = 0; j < dim; j++)
150 if (mtx[i][j])
151 res |= (1 << i);
152 break;
153
154 case _SIDD_CMP_RANGES:
155 for (i = 0; i < dim; i += 2)
156 for(j = 0; j < dim; j++)
157 if (mtx[j][i] && mtx[j][i+1])
158 res |= (1 << j);
159 break;
160
161 case _SIDD_CMP_EQUAL_EACH:
162 for(i = 0; i < dim; i++)
163 if (mtx[i][i])
164 res |= (1 << i);
165 break;
166
167 case _SIDD_CMP_EQUAL_ORDERED:
168 for(i = 0; i < dim; i++)
169 {
170 unsigned char val = 1;
171
172 for (j = 0, k = i; j < dim - i && k < dim; j++, k++)
173 val &= mtx[k][j];
174
175 if (val)
176 res |= (1 << i);
177 else
178 res &= ~(1 << i);
179 }
180 break;
181 }
182
183 switch ((mode & 0x30))
184 {
185 case _SIDD_POSITIVE_POLARITY:
186 case _SIDD_MASKED_POSITIVE_POLARITY:
187 break;
188
189 case _SIDD_NEGATIVE_POLARITY:
190 res ^= -1;
191 break;
192
193 case _SIDD_MASKED_NEGATIVE_POLARITY:
194 for (i = 0; i < lb; i++)
195 if (res & (1 << i))
196 res &= ~(1 << i);
197 else
198 res |= (1 << i);
199 break;
200 }
201
202 return res & ((dim == 8) ? 0xFF : 0xFFFF);
203 }
204
205 static int
cmp_flags(__m128i a,int la,__m128i b,int lb,int mode,int res2,int is_implicit)206 cmp_flags (__m128i a, int la, __m128i b, int lb,
207 int mode, int res2, int is_implicit)
208 {
209 int i;
210 int flags = 0;
211 int is_bytes_mode = (mode & 1) == 0;
212 union
213 {
214 __m128i x;
215 unsigned char uc[16];
216 unsigned short us[8];
217 } d, s;
218
219 d.x = a;
220 s.x = b;
221
222 /* CF: reset if (RES2 == 0), set otherwise. */
223 if (res2 != 0)
224 flags |= CFLAG;
225
226 if (is_implicit)
227 {
228 /* ZF: set if any byte/word of src xmm operand is null, reset
229 otherwise.
230 SF: set if any byte/word of dst xmm operand is null, reset
231 otherwise. */
232
233 if (is_bytes_mode)
234 {
235 for (i = 0; i < 16; i++)
236 {
237 if (s.uc[i] == 0)
238 flags |= ZFLAG;
239 if (d.uc[i] == 0)
240 flags |= SFLAG;
241 }
242 }
243 else
244 {
245 for (i = 0; i < 8; i++)
246 {
247 if (s.us[i] == 0)
248 flags |= ZFLAG;
249 if (d.us[i] == 0)
250 flags |= SFLAG;
251 }
252 }
253 }
254 else
255 {
256 /* ZF: set if abs value of EDX/RDX < 16 (8), reset otherwise.
257 SF: set if abs value of EAX/RAX < 16 (8), reset otherwise. */
258 int max_ind = is_bytes_mode ? 16 : 8;
259
260 if (la < 0)
261 la = -la;
262 if (lb < 0)
263 lb = -lb;
264
265 if (lb < max_ind)
266 flags |= ZFLAG;
267 if (la < max_ind)
268 flags |= SFLAG;
269 }
270
271 /* OF: equal to RES2[0]. */
272 if ((res2 & 0x1))
273 flags |= OFLAG;
274
275 /* AF: Reset.
276 PF: Reset. */
277 return flags;
278 }
279
280 static int
cmp_indexed(__m128i a,int la,__m128i b,int lb,const int mode,int * res2)281 cmp_indexed (__m128i a, int la, __m128i b, int lb,
282 const int mode, int *res2)
283 {
284 int i, ndx;
285 int dim = (mode & 1) == 0 ? 16 : 8;
286 int r2;
287
288 r2 = calc_res (a, la, b, lb, mode);
289
290 ndx = dim;
291 if ((mode & 0x40))
292 {
293 for (i = dim - 1; i >= 0; i--)
294 if (r2 & (1 << i))
295 {
296 ndx = i;
297 break;
298 }
299 }
300 else
301 {
302 for (i = 0; i < dim; i++)
303 if ((r2 & (1 << i)))
304 {
305 ndx = i;
306 break;
307 }
308 }
309
310 *res2 = r2;
311 return ndx;
312 }
313
314 static __m128i
cmp_masked(__m128i a,int la,__m128i b,int lb,const int mode,int * res2)315 cmp_masked (__m128i a, int la, __m128i b, int lb,
316 const int mode, int *res2)
317 {
318 union
319 {
320 __m128i x;
321 char c[16];
322 short s[8];
323 } ret;
324 int i;
325 int dim = (mode & 1) == 0 ? 16 : 8;
326 union
327 {
328 int i;
329 char c[4];
330 short s[2];
331 } r2;
332
333 r2.i = calc_res (a, la, b, lb, mode);
334
335 memset (&ret, 0, sizeof (ret));
336
337 if (mode & 0x40)
338 {
339 for (i = 0; i < dim; i++)
340 if (dim == 8)
341 ret.s [i] = (r2.i & (1 << i)) ? -1 : 0;
342 else
343 ret.c [i] = (r2.i & (1 << i)) ? -1 : 0;
344 }
345 else
346 {
347 if (dim == 16)
348 ret.s[0] = r2.s[0];
349 else
350 ret.c[0] = r2.c[0];
351 }
352
353 *res2 = r2.i;
354
355 return ret.x;
356 }
357
358 static int
calc_str_len(__m128i a,const int mode)359 calc_str_len (__m128i a, const int mode)
360 {
361 union
362 {
363 __m128i x;
364 char c[16];
365 short s[8];
366 } s;
367 int i;
368 int dim = (mode & 1) == 0 ? 16 : 8;
369
370 s.x = a;
371
372 if ((mode & 1))
373 {
374 for (i = 0; i < dim; i++)
375 if (s.s[i] == 0)
376 break;
377 }
378 else
379 {
380 for (i = 0; i < dim; i++)
381 if (s.c[i] == 0)
382 break;
383 }
384
385 return i;
386 }
387
388 static inline int
cmp_ei(__m128i * a,int la,__m128i * b,int lb,const int mode,int * flags)389 cmp_ei (__m128i *a, int la, __m128i *b, int lb,
390 const int mode, int *flags)
391 {
392 int res2;
393 int index = cmp_indexed (*a, la, *b, lb, mode, &res2);
394
395 if (flags != NULL)
396 *flags = cmp_flags (*a, la, *b, lb, mode, res2, 0);
397
398 return index;
399 }
400
401 static inline int
cmp_ii(__m128i * a,__m128i * b,const int mode,int * flags)402 cmp_ii (__m128i *a, __m128i *b, const int mode, int *flags)
403 {
404 int la, lb;
405 int res2;
406 int index;
407
408 la = calc_str_len (*a, mode);
409 lb = calc_str_len (*b, mode);
410
411 index = cmp_indexed (*a, la, *b, lb, mode, &res2);
412
413 if (flags != NULL)
414 *flags = cmp_flags (*a, la, *b, lb, mode, res2, 1);
415
416 return index;
417 }
418
419 static inline __m128i
cmp_em(__m128i * a,int la,__m128i * b,int lb,const int mode,int * flags)420 cmp_em (__m128i *a, int la, __m128i *b, int lb,
421 const int mode, int *flags )
422 {
423 int res2;
424 __m128i mask = cmp_masked (*a, la, *b, lb, mode, &res2);
425
426 if (flags != NULL)
427 *flags = cmp_flags (*a, la, *b, lb, mode, res2, 0);
428
429 return mask;
430 }
431
432 static inline __m128i
cmp_im(__m128i * a,__m128i * b,const int mode,int * flags)433 cmp_im (__m128i *a, __m128i *b, const int mode, int *flags)
434 {
435 int la, lb;
436 int res2;
437 __m128i mask;
438
439 la = calc_str_len (*a, mode);
440 lb = calc_str_len (*b, mode);
441
442 mask = cmp_masked (*a, la, *b, lb, mode, &res2);
443 if (flags != NULL)
444 *flags = cmp_flags (*a, la, *b, lb, mode, res2, 1);
445
446 return mask;
447 }
448