1 /* { dg-do run } */
2 /* { dg-require-effective-target sse4 } */
3 /* { dg-options "-O2 -msse4.1" } */
4 
5 #ifndef CHECK_H
6 #define CHECK_H "sse4_1-check.h"
7 #endif
8 
9 #ifndef TEST
10 #define TEST sse4_1_test
11 #endif
12 
13 #include CHECK_H
14 
15 #include <smmintrin.h>
16 #include <string.h>
17 
18 #define msk0 0xC0
19 #define msk1 0x01
20 #define msk2 0xF2
21 #define msk3 0x03
22 #define msk4 0x84
23 #define msk5 0x05
24 #define msk6 0xE6
25 #define msk7 0x67
26 
27 static __m128i
compute_mpsadbw(unsigned char * v1,unsigned char * v2,int mask)28 compute_mpsadbw (unsigned char *v1, unsigned char *v2, int mask)
29 {
30   union
31     {
32       __m128i x;
33       unsigned short s[8];
34     } ret;
35   unsigned char s[4];
36   int i, j;
37   int offs1, offs2;
38 
39   offs2 = 4 * (mask & 3);
40   for (i = 0; i < 4; i++)
41     s[i] = v2[offs2 + i];
42 
43   offs1 = 4 * ((mask & 4) >> 2);
44   for (j = 0; j < 8; j++)
45     {
46       ret.s[j] = 0;
47       for (i = 0; i < 4; i++)
48 	ret.s[j] += abs (v1[offs1 + j + i] - s[i]);
49     }
50 
51   return ret.x;
52 }
53 
54 static void
TEST(void)55 TEST (void)
56 {
57   union
58     {
59       __m128i x;
60       unsigned int i[4];
61       unsigned char c[16];
62     } val1, val2, val3 [8];
63   __m128i res[8], tmp;
64   unsigned char masks[8];
65   int i;
66 
67   val1.i[0] = 0x35251505;
68   val1.i[1] = 0x75655545;
69   val1.i[2] = 0xB5A59585;
70   val1.i[3] = 0xF5E5D5C5;
71 
72   val2.i[0] = 0x31211101;
73   val2.i[1] = 0x71615141;
74   val2.i[2] = 0xB1A19181;
75   val2.i[3] = 0xF1E1D1C1;
76 
77   for (i=0; i < 8; i++)
78     switch (i % 3)
79       {
80       case 1:
81 	val3[i].i[0] = 0xF1E1D1C1;
82 	val3[i].i[1] = 0xB1A19181;
83 	val3[i].i[2] = 0x71615141;
84 	val3[i].i[3] = 0x31211101;
85 	break;
86       default:
87 	val3[i].x = val2.x;
88 	break;
89       }
90 
91   /* Check mpsadbw imm8, xmm, xmm.  */
92   res[0] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk0);
93   res[1] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk1);
94   res[2] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk2);
95   res[3] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk3);
96   res[4] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk4);
97   res[5] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk5);
98   res[6] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk6);
99   res[7] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk7);
100 
101   masks[0] = msk0;
102   masks[1] = msk1;
103   masks[2] = msk2;
104   masks[3] = msk3;
105   masks[4] = msk4;
106   masks[5] = msk5;
107   masks[6] = msk6;
108   masks[7] = msk7;
109 
110   for (i=0; i < 8; i++)
111     {
112       tmp = compute_mpsadbw (val1.c, val2.c, masks[i]);
113       if (memcmp (&tmp, &res[i], sizeof (tmp)))
114 	abort ();
115     }
116 
117   /* Check mpsadbw imm8, m128, xmm.  */
118   for (i=0; i < 8; i++)
119     {
120       res[i] = _mm_mpsadbw_epu8 (val1.x, val3[i].x, msk4);
121       masks[i] = msk4;
122     }
123 
124   for (i=0; i < 8; i++)
125     {
126       tmp = compute_mpsadbw (val1.c, val3[i].c, masks[i]);
127       if (memcmp (&tmp, &res[i], sizeof (tmp)))
128 	abort ();
129     }
130 }
131