1 /* { dg-do run } */
2 /* { dg-require-effective-target sse2 } */
3 /* { dg-options "-O3 -msse2" } */
4 
5 #ifndef CHECK_H
6 #define CHECK_H "sse2-check.h"
7 #endif
8 
9 #ifndef TEST
10 #define TEST sse2_test
11 #endif
12 
13 #include CHECK_H
14 
15 #include <stdlib.h>
16 
17 /* mingw runtime don't provide random().  */
18 #ifdef __MINGW32__
19 #define random rand
20 #endif
21 
22 #define N 512
23 static short a1[N], a2[N], a3[N];
24 static unsigned short b1[N], b2[N], b3[N];
25 static int c1[N], c2[N], c3[N];
26 static unsigned int d1[N], d2[N], d3[N];
27 static long long e1[N], e2[N], e3[N];
28 static unsigned long long g1[N], g2[N], g3[N];
29 
30 __attribute__((noinline, noclone)) void
f1(void)31 f1 (void)
32 {
33   int i;
34   for (i = 0; i < N; ++i)
35     a1[i] = a2[i] * a3[i];
36 }
37 
38 __attribute__((noinline, noclone)) void
f2(void)39 f2 (void)
40 {
41   int i;
42   for (i = 0; i < N; ++i)
43     b1[i] = b2[i] * b3[i];
44 }
45 
46 __attribute__((noinline, noclone)) void
f3(void)47 f3 (void)
48 {
49   int i;
50   for (i = 0; i < N; ++i)
51     c1[i] = c2[i] * c3[i];
52 }
53 
54 __attribute__((noinline, noclone)) void
f4(void)55 f4 (void)
56 {
57   int i;
58   for (i = 0; i < N; ++i)
59     d1[i] = d2[i] * d3[i];
60 }
61 
62 __attribute__((noinline, noclone)) void
f5(void)63 f5 (void)
64 {
65   int i;
66   for (i = 0; i < N; ++i)
67     e1[i] = e2[i] * e3[i];
68 }
69 
70 __attribute__((noinline, noclone)) void
f6(void)71 f6 (void)
72 {
73   int i;
74   for (i = 0; i < N; ++i)
75     g1[i] = g2[i] * g3[i];
76 }
77 
78 __attribute__((noinline, noclone)) void
f7(void)79 f7 (void)
80 {
81   int i;
82   for (i = 0; i < N; ++i)
83     c1[i] = a2[i] * a3[i];
84 }
85 
86 __attribute__((noinline, noclone)) void
f8(void)87 f8 (void)
88 {
89   int i;
90   for (i = 0; i < N; ++i)
91     d1[i] = (unsigned int) b2[i] * b3[i];
92 }
93 
94 __attribute__((noinline, noclone)) void
f9(void)95 f9 (void)
96 {
97   int i;
98   for (i = 0; i < N; ++i)
99     e1[i] = (long long) c2[i] * (long long) c3[i];
100 }
101 
102 __attribute__((noinline, noclone)) void
f10(void)103 f10 (void)
104 {
105   int i;
106   for (i = 0; i < N; ++i)
107     g1[i] = (unsigned long long) d2[i] * (unsigned long long) d3[i];
108 }
109 
110 __attribute__((noinline, noclone)) int
f11(void)111 f11 (void)
112 {
113   int i, r = 0;
114   for (i = 0; i < N; ++i)
115     r += a2[i] * a3[i];
116   return r;
117 }
118 
119 __attribute__((noinline, noclone)) unsigned int
f12(void)120 f12 (void)
121 {
122   int i;
123   unsigned r = 0;
124   for (i = 0; i < N; ++i)
125     r += (unsigned int) b2[i] * b3[i];
126   return r;
127 }
128 
129 __attribute__((noinline, noclone)) long long
f13(void)130 f13 (void)
131 {
132   int i;
133   long long r = 0;
134   for (i = 0; i < N; ++i)
135     r += (long long) c2[i] * (long long) c3[i];
136   return r;
137 }
138 
139 __attribute__((noinline, noclone)) unsigned long long
f14(void)140 f14 (void)
141 {
142   int i;
143   unsigned long long r = 0;
144   for (i = 0; i < N; ++i)
145     r += (unsigned long long) d2[i] * (unsigned long long) d3[i];
146   return r;
147 }
148 
149 static void
TEST(void)150 TEST (void)
151 {
152   int i;
153   int s1 = 0;
154   unsigned int s2 = 0;
155   long long s3 = 0;
156   unsigned long long s4 = 0;
157   for (i = 0; i < N; ++i)
158     {
159       asm volatile ("" : : "r" (&s1) : "memory");
160       asm volatile ("" : : "r" (&s2) : "memory");
161       asm volatile ("" : : "r" (&s3) : "memory");
162       asm volatile ("" : : "r" (&s4) : "memory");
163       b2[i] = (int) random ();
164       b3[i] = (int) random ();
165       a2[i] = b2[i];
166       a3[i] = b3[i];
167       d2[i] = (((int) random ()) << 16) | b2[i];
168       d3[i] = (((int) random ()) << 16) | b3[i];
169       c2[i] = d2[i];
170       c3[i] = d3[i];
171       s1 += a2[i] * a3[i];
172       s2 += (unsigned int) b2[i] * b3[i];
173       s3 += (long long) c2[i] * (long long) c3[i];
174       s4 += (unsigned long long) d2[i] * (unsigned long long) d3[i];
175     }
176   f1 ();
177   f2 ();
178   f3 ();
179   f4 ();
180   f5 ();
181   f6 ();
182   for (i = 0; i < N; ++i)
183     {
184       if (a1[i] != (short) (a2[i] * a3[i]))
185 	abort ();
186       if (b1[i] != (unsigned short) (b2[i] * b3[i]))
187 	abort ();
188       if (c1[i] != c2[i] * c3[i])
189 	abort ();
190       if (d1[i] != d2[i] * d3[i])
191 	abort ();
192       if (e1[i] != e2[i] * e3[i])
193 	abort ();
194       if (g1[i] != g2[i] * g3[i])
195 	abort ();
196     }
197   f7 ();
198   f8 ();
199   f9 ();
200   f10 ();
201   for (i = 0; i < N; ++i)
202     {
203       if (c1[i] != a2[i] * a3[i])
204 	abort ();
205       if (d1[i] != b2[i] * b3[i])
206 	abort ();
207       if (e1[i] != (long long) c2[i] * (long long) c3[i])
208 	abort ();
209       if (g1[i] != (unsigned long long) d2[i] * (unsigned long long) d3[i])
210 	abort ();
211     }
212   if (f11 () != s1 || f12 () != s2 || f13 () != s3 || f14 () != s4)
213     abort ();
214 }
215