1 /* { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
2 /* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
3 /* { dg-additional-options "-std=c99 -ftree-slp-vectorize -foffload=-ftree-slp-vectorize -foffload=-fdump-tree-slp1 -foffload=-save-temps -save-temps" } */
4
5 #include <stdio.h>
6 #include <sys/time.h>
7
8 long long int p[32 *1000] __attribute__((aligned(16)));
9 long long int p2[32 *1000] __attribute__((aligned(16)));
10
11 int
main(void)12 main (void)
13 {
14 #pragma acc parallel num_gangs(1) num_workers(1) vector_length(32)
15 {
16 if (((unsigned long int)p & (0xfULL)) != 0)
17 __builtin_abort ();
18 if (((unsigned long int)p2 & (0xfULL)) != 0)
19 __builtin_abort ();
20
21 for (unsigned int k = 0; k < 10000; k += 1)
22 {
23 #pragma acc loop vector
24 for (unsigned long long int j = 0; j < 32; j += 1)
25 {
26 unsigned long long a, b;
27 unsigned long long *p3, *p4;
28 p3 = (unsigned long long *)((unsigned long long int)p & (~0xfULL));
29 p4 = (unsigned long long *)((unsigned long long int)p2 & (~0xfULL));
30
31 for (unsigned int i = 0; i < 1000; i += 2)
32 {
33 a = p3[j * 1000 + i];
34 b = p3[j * 1000 + i + 1];
35
36 p4[j * 1000 + i] = a;
37 p4[j * 1000 + i + 1] = b;
38 }
39 }
40 }
41 }
42
43 return 0;
44 }
45
46 /* Todo: make a scan-tree-dump variant that scans vec.o instead. */
47 /* { dg-final { file copy -force [glob vec.o.*] [regsub \.o\. [glob vec.o.*] \.c\.] } } */
48 /* { dg-final { scan-tree-dump "vector\\(2\\) long long unsigned int" "slp1" } } */
49