1 // { dg-require-effective-target size32plus }
2 // { dg-additional-options "-O2 -fopenmp -fdump-tree-vect-details" }
3 // { dg-additional-options "-msse2" { target sse2_runtime } }
4 // { dg-additional-options "-mavx" { target avx_runtime } }
5 // { dg-final { scan-tree-dump-times "vectorized \[2-6] loops" 2 "vect" { target sse2_runtime } } }
6
7 extern "C" void abort ();
8 int r, a[1024], b[1024], q;
9
10 template <typename T, typename U>
11 __attribute__((noipa)) void
foo(T a,T b,U r)12 foo (T a, T b, U r)
13 {
14 #pragma omp for simd if (0) reduction (inscan, +:r)
15 for (int i = 0; i < 1024; i++)
16 {
17 b[i] = r;
18 #pragma omp scan exclusive(r)
19 r += a[i];
20 }
21 }
22
23 template <typename T>
24 __attribute__((noipa)) T
bar()25 bar ()
26 {
27 T &s = q;
28 q = 0;
29 #pragma omp parallel
30 #pragma omp for simd reduction (inscan, +:s) simdlen(1)
31 for (int i = 0; i < 1024; i++)
32 {
33 b[i] = s;
34 #pragma omp scan exclusive(s)
35 s += 2 * a[i];
36 }
37 return s;
38 }
39
40 template <typename T>
41 __attribute__((noipa)) void
baz(T * a,T * b,T & r)42 baz (T *a, T *b, T &r)
43 {
44 #pragma omp parallel for simd reduction (inscan, +:r)
45 for (T i = 0; i < 1024; i++)
46 {
47 b[i] = r;
48 #pragma omp scan exclusive(r)
49 r += a[i];
50 }
51 }
52
53 template <typename T>
54 __attribute__((noipa)) int
qux()55 qux ()
56 {
57 T s = q;
58 q = 0;
59 #pragma omp parallel for simd reduction (inscan, +:s)
60 for (int i = 0; i < 1024; i++)
61 {
62 b[i] = s;
63 #pragma omp scan exclusive(s)
64 s += 2 * a[i];
65 }
66 return s;
67 }
68
69 int
main()70 main ()
71 {
72 int s = 0;
73 for (int i = 0; i < 1024; ++i)
74 {
75 a[i] = i;
76 b[i] = -1;
77 asm ("" : "+g" (i));
78 }
79 #pragma omp parallel
80 foo<int *, int &> (a, b, r);
81 if (r != 1024 * 1023 / 2)
82 abort ();
83 for (int i = 0; i < 1024; ++i)
84 {
85 if (b[i] != s)
86 abort ();
87 else
88 b[i] = 25;
89 s += i;
90 }
91 if (bar<int> () != 1024 * 1023)
92 abort ();
93 s = 0;
94 for (int i = 0; i < 1024; ++i)
95 {
96 if (b[i] != s)
97 abort ();
98 else
99 b[i] = -1;
100 s += 2 * i;
101 }
102 r = 0;
103 baz<int> (a, b, r);
104 if (r != 1024 * 1023 / 2)
105 abort ();
106 s = 0;
107 for (int i = 0; i < 1024; ++i)
108 {
109 if (b[i] != s)
110 abort ();
111 else
112 b[i] = -25;
113 s += i;
114 }
115 if (qux<int &> () != 1024 * 1023)
116 abort ();
117 s = 0;
118 for (int i = 0; i < 1024; ++i)
119 {
120 if (b[i] != s)
121 abort ();
122 s += 2 * i;
123 }
124 }
125