1 /* { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
2 /* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
3 /* { dg-additional-options "-std=c99 -ftree-slp-vectorize -foffload=-ftree-slp-vectorize -foffload=-fdump-tree-slp1 -foffload=-save-temps -save-temps" } */
4 
5 #include <stdio.h>
6 #include <sys/time.h>
7 
8 long long int p[32 *1000] __attribute__((aligned(16)));
9 long long int p2[32 *1000] __attribute__((aligned(16)));
10 
11 int
main(void)12 main (void)
13 {
14 #pragma acc parallel num_gangs(1) num_workers(1) vector_length(32)
15   {
16     if (((unsigned long int)p & (0xfULL)) != 0)
17       __builtin_abort ();
18     if (((unsigned long int)p2 & (0xfULL)) != 0)
19       __builtin_abort ();
20 
21     for (unsigned int k = 0; k < 10000; k += 1)
22       {
23 #pragma acc loop vector
24 	for (unsigned long long int j = 0; j < 32; j += 1)
25 	  {
26 	    unsigned long long a, b;
27 	    unsigned long long *p3, *p4;
28 	    p3 = (unsigned long long *)((unsigned long long int)p & (~0xfULL));
29 	    p4 = (unsigned long long *)((unsigned long long int)p2 & (~0xfULL));
30 
31 	    for (unsigned int i = 0; i < 1000; i += 2)
32 	      {
33 		a = p3[j * 1000 + i];
34 		b = p3[j * 1000 + i + 1];
35 
36 		p4[j * 1000 + i] = a;
37 		p4[j * 1000 + i + 1] = b;
38 	      }
39 	  }
40       }
41   }
42 
43   return 0;
44 }
45 
46 /* Todo: make a scan-tree-dump variant that scans vec.o instead.  */
47 /* { dg-final { file copy -force [glob vec.o.*] [regsub \.o\. [glob vec.o.*] \.c\.] } } */
48 /* { dg-final { scan-tree-dump "vector\\(2\\) long long unsigned int" "slp1" } } */
49