1 #include <nmmintrin.h>
2 #include <string.h>
3 
4 #define CFLAG 0x00000001
5 #define ZFLAG 0x00000002
6 #define SFLAG 0x00000004
7 #define OFLAG 0x00000008
8 #define AFLAG 0x00000010
9 #define PFLAG 0x00000020
10 
11 #define PCMPSTR_EQ(X, Y, RES) \
12   {							\
13     int __size = (sizeof (*X) ^ 3) * 8;			\
14     int __i, __j;					\
15     for (__i = 0; __i < __size; __i++)			\
16       for (__j = 0; __j < __size; __j++)		\
17         RES[__j][__i] = (X[__i] == Y[__j]);		\
18   }
19 
20 #define PCMPSTR_RNG(X, Y, RES) \
21   {							\
22     int __size = (sizeof (*X) ^ 3) * 8;			\
23     int __i, __j;					\
24     for (__j = 0; __j < __size; __j++)			\
25       for (__i = 0; __i < __size - 1; __i += 2)		\
26 	{						\
27 	  RES[__j][__i] = (Y[__j] >= X[__i]);		\
28 	  RES[__j][__i+1] = (Y[__j] <= X[__i + 1]);	\
29 	}						\
30   }
31 
32 static void
override_invalid(unsigned char res[16][16],int la,int lb,const int mode,int dim)33 override_invalid (unsigned char res[16][16], int la, int lb,
34 		  const int mode, int dim)
35 {
36   int i, j;
37 
38   for (j = 0; j < dim; j++)
39     for (i = 0; i < dim; i++)
40       if (i < la && j >= lb)
41 	res[j][i] = 0;
42       else if (i >= la)
43 	switch ((mode & 0x0C))
44 	  {
45 	  case _SIDD_CMP_EQUAL_ANY:
46 	  case _SIDD_CMP_RANGES:
47 	    res[j][i] = 0;
48 	    break;
49 	  case _SIDD_CMP_EQUAL_EACH:
50 	    res[j][i] = (j >= lb) ? 1: 0;
51 	    break;
52 	  case _SIDD_CMP_EQUAL_ORDERED:
53 	    res[j][i] = 1;
54 	    break;
55           }
56 }
57 
58 static void
calc_matrix(__m128i a,int la,__m128i b,int lb,const int mode,unsigned char res[16][16])59 calc_matrix (__m128i a, int la, __m128i b, int lb, const int mode,
60 	     unsigned char res[16][16])
61 {
62   union
63     {
64       __m128i x;
65       signed char sc[16];
66       unsigned char uc[16];
67       signed short ss[8];
68       unsigned short us[8];
69     } d, s;
70 
71   d.x = a;
72   s.x = b;
73 
74   switch ((mode & 3))
75     {
76     case _SIDD_UBYTE_OPS:
77       if ((mode & 0x0C) == _SIDD_CMP_RANGES)
78 	{
79 	  PCMPSTR_RNG (d.uc, s.uc, res);
80 	}
81       else
82 	{
83 	  PCMPSTR_EQ (d.uc, s.uc, res);
84 	}
85       break;
86     case _SIDD_UWORD_OPS:
87       if ((mode & 0x0C) == _SIDD_CMP_RANGES)
88 	{
89 	  PCMPSTR_RNG (d.us, s.us, res);
90 	}
91       else
92 	{
93 	  PCMPSTR_EQ (d.us, s.us, res);
94 	}
95       break;
96     case _SIDD_SBYTE_OPS:
97       if ((mode & 0x0C) == _SIDD_CMP_RANGES)
98 	{
99 	  PCMPSTR_RNG (d.sc, s.sc, res);
100 	}
101       else
102 	{
103 	  PCMPSTR_EQ (d.sc, s.sc, res);
104 	}
105       break;
106     case _SIDD_SWORD_OPS:
107       if ((mode & 0x0C) == _SIDD_CMP_RANGES)
108 	{
109 	  PCMPSTR_RNG (d.ss, s.ss, res);
110 	}
111       else
112 	{
113 	  PCMPSTR_EQ (d.ss, s.ss, res);
114 	}
115       break;
116     }
117 
118   override_invalid (res, la, lb, mode, (mode & 1) == 0 ? 16 : 8);
119 }
120 
121 static int
calc_res(__m128i a,int la,__m128i b,int lb,const int mode)122 calc_res (__m128i a, int la, __m128i b, int lb, const int mode)
123 {
124   unsigned char mtx[16][16];
125   int i, j, k, dim, res = 0;
126 
127   memset (mtx, 0, sizeof (mtx));
128 
129   dim = (mode & 1) == 0 ? 16 : 8;
130 
131   if (la < 0)
132     la = -la;
133 
134   if (lb < 0)
135     lb = -lb;
136 
137   if (la > dim)
138     la = dim;
139 
140   if (lb > dim)
141     lb = dim;
142 
143   calc_matrix (a, la, b, lb, mode, mtx);
144 
145   switch ((mode & 0x0C))
146     {
147     case _SIDD_CMP_EQUAL_ANY:
148       for (i = 0; i < dim; i++)
149 	for (j = 0; j < dim; j++)
150 	  if (mtx[i][j])
151 	    res |= (1 << i);
152       break;
153 
154      case _SIDD_CMP_RANGES:
155       for (i = 0; i < dim; i += 2)
156 	for(j = 0; j < dim; j++)
157 	  if (mtx[j][i] && mtx[j][i+1])
158 	    res |= (1 << j);
159       break;
160 
161      case _SIDD_CMP_EQUAL_EACH:
162       for(i = 0; i < dim; i++)
163 	if (mtx[i][i])
164 	  res |= (1 << i);
165       break;
166 
167      case _SIDD_CMP_EQUAL_ORDERED:
168       for(i = 0; i < dim; i++)
169 	{
170 	  unsigned char val = 1;
171 
172 	  for (j = 0, k = i; j < dim - i && k < dim; j++, k++)
173 	    val &= mtx[k][j];
174 
175 	  if (val)
176 	    res |= (1 << i);
177 	  else
178 	    res &= ~(1 << i);
179 	}
180       break;
181     }
182 
183   switch ((mode & 0x30))
184     {
185     case _SIDD_POSITIVE_POLARITY:
186     case _SIDD_MASKED_POSITIVE_POLARITY:
187       break;
188 
189     case _SIDD_NEGATIVE_POLARITY:
190       res ^= -1;
191       break;
192 
193     case _SIDD_MASKED_NEGATIVE_POLARITY:
194       for (i = 0; i < lb; i++)
195 	if (res & (1 << i))
196 	  res &= ~(1 << i);
197 	else
198 	  res |= (1 << i);
199       break;
200     }
201 
202   return res & ((dim == 8) ? 0xFF : 0xFFFF);
203 }
204 
205 static int
cmp_flags(__m128i a,int la,__m128i b,int lb,int mode,int res2,int is_implicit)206 cmp_flags (__m128i a, int la, __m128i b, int lb,
207 	   int mode, int res2, int is_implicit)
208 {
209   int i;
210   int flags = 0;
211   int is_bytes_mode = (mode & 1) == 0;
212   union
213     {
214       __m128i x;
215       unsigned char uc[16];
216       unsigned short us[8];
217     } d, s;
218 
219   d.x = a;
220   s.x = b;
221 
222   /* CF: reset if (RES2 == 0), set otherwise.  */
223   if (res2 != 0)
224     flags |= CFLAG;
225 
226   if (is_implicit)
227     {
228       /* ZF: set if any byte/word of src xmm operand is null, reset
229 	 otherwise.
230 	 SF: set if any byte/word of dst xmm operand is null, reset
231 	 otherwise.  */
232 
233       if (is_bytes_mode)
234 	{
235 	  for (i = 0; i < 16; i++)
236 	    {
237 	      if (s.uc[i] == 0)
238 		flags |= ZFLAG;
239 	      if (d.uc[i] == 0)
240 		flags |= SFLAG;
241             }
242 	}
243       else
244 	{
245 	  for (i = 0; i < 8; i++)
246 	    {
247 	      if (s.us[i] == 0)
248 		flags |= ZFLAG;
249 	      if (d.us[i] == 0)
250 		flags |= SFLAG;
251             }
252         }
253     }
254   else
255     {
256       /* ZF: set if abs value of EDX/RDX < 16 (8), reset otherwise.
257 	 SF: set if abs value of EAX/RAX < 16 (8), reset otherwise.  */
258       int max_ind = is_bytes_mode ? 16 : 8;
259 
260       if (la < 0)
261 	la = -la;
262       if (lb < 0)
263 	lb = -lb;
264 
265       if (lb < max_ind)
266 	flags |= ZFLAG;
267       if (la < max_ind)
268 	flags |= SFLAG;
269     }
270 
271   /* OF: equal to RES2[0].  */
272   if ((res2 & 0x1))
273     flags |= OFLAG;
274 
275   /* AF: Reset.
276      PF: Reset.  */
277   return flags;
278 }
279 
280 static int
cmp_indexed(__m128i a,int la,__m128i b,int lb,const int mode,int * res2)281 cmp_indexed (__m128i a, int la, __m128i b, int lb,
282 	     const int mode, int *res2)
283 {
284   int i, ndx;
285   int dim = (mode & 1) == 0 ? 16 : 8;
286   int r2;
287 
288   r2 = calc_res (a, la, b, lb, mode);
289 
290   ndx = dim;
291   if ((mode & 0x40))
292     {
293       for (i = dim - 1; i >= 0; i--)
294 	if (r2 & (1 << i))
295 	  {
296 	    ndx = i;
297 	    break;
298 	  }
299     }
300   else
301     {
302       for (i = 0; i < dim; i++)
303 	if ((r2 & (1 << i)))
304 	  {
305 	    ndx = i;
306 	    break;
307 	  }
308     }
309 
310    *res2 = r2;
311    return ndx;
312 }
313 
314 static __m128i
cmp_masked(__m128i a,int la,__m128i b,int lb,const int mode,int * res2)315 cmp_masked (__m128i a, int la, __m128i b, int lb,
316 	    const int mode, int *res2)
317 {
318   union
319     {
320       __m128i x;
321       char c[16];
322       short s[8];
323     } ret;
324   int i;
325   int dim = (mode & 1) == 0 ? 16 : 8;
326   union
327     {
328       int i;
329       char c[4];
330       short s[2];
331     } r2;
332 
333   r2.i = calc_res (a, la, b, lb, mode);
334 
335   memset (&ret, 0, sizeof (ret));
336 
337   if (mode & 0x40)
338     {
339       for (i = 0; i < dim; i++)
340 	if (dim == 8)
341 	  ret.s [i] = (r2.i & (1 << i)) ? -1 : 0;
342 	else
343 	  ret.c [i] = (r2.i & (1 << i)) ? -1 : 0;
344     }
345   else
346     {
347       if (dim == 16)
348 	ret.s[0] = r2.s[0];
349       else
350 	ret.c[0] = r2.c[0];
351     }
352 
353    *res2 = r2.i;
354 
355    return ret.x;
356 }
357 
358 static int
calc_str_len(__m128i a,const int mode)359 calc_str_len (__m128i a, const int mode)
360 {
361   union
362     {
363       __m128i x;
364       char c[16];
365       short s[8];
366     } s;
367   int i;
368   int dim  = (mode & 1) == 0 ? 16 : 8;
369 
370   s.x = a;
371 
372   if ((mode & 1))
373     {
374       for (i = 0; i < dim; i++)
375 	if (s.s[i] == 0)
376 	  break;
377     }
378   else
379     {
380       for (i = 0; i < dim; i++)
381        if (s.c[i] == 0)
382 	 break;
383     }
384 
385   return i;
386 }
387 
388 static inline int
cmp_ei(__m128i * a,int la,__m128i * b,int lb,const int mode,int * flags)389 cmp_ei (__m128i *a, int la, __m128i *b, int lb,
390 	const int mode, int *flags)
391 {
392   int res2;
393   int index = cmp_indexed (*a, la, *b, lb, mode, &res2);
394 
395   if (flags != NULL)
396     *flags = cmp_flags (*a, la, *b, lb, mode, res2, 0);
397 
398   return index;
399 }
400 
401 static inline int
cmp_ii(__m128i * a,__m128i * b,const int mode,int * flags)402 cmp_ii (__m128i *a, __m128i *b, const int mode, int *flags)
403 {
404   int la, lb;
405   int res2;
406   int index;
407 
408   la = calc_str_len (*a, mode);
409   lb = calc_str_len (*b, mode);
410 
411   index = cmp_indexed (*a, la, *b, lb, mode, &res2);
412 
413   if (flags != NULL)
414     *flags = cmp_flags (*a, la, *b, lb, mode, res2, 1);
415 
416   return index;
417 }
418 
419 static inline __m128i
cmp_em(__m128i * a,int la,__m128i * b,int lb,const int mode,int * flags)420 cmp_em (__m128i *a, int la, __m128i *b, int lb,
421 	const int mode, int *flags )
422 {
423   int res2;
424   __m128i mask = cmp_masked (*a, la, *b, lb, mode, &res2);
425 
426   if (flags != NULL)
427     *flags = cmp_flags (*a, la, *b, lb, mode, res2, 0);
428 
429   return mask;
430 }
431 
432 static inline __m128i
cmp_im(__m128i * a,__m128i * b,const int mode,int * flags)433 cmp_im (__m128i *a, __m128i *b, const int mode, int *flags)
434 {
435   int la, lb;
436   int res2;
437   __m128i mask;
438 
439   la = calc_str_len (*a, mode);
440   lb = calc_str_len (*b, mode);
441 
442   mask = cmp_masked (*a, la, *b, lb, mode, &res2);
443   if (flags != NULL)
444     *flags = cmp_flags (*a, la, *b, lb, mode, res2, 1);
445 
446   return mask;
447 }
448