1; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-code \
2; RUN: -disable-output < %s | \
3; RUN: FileCheck -check-prefix=CODE %s
4
5; RUN: opt %loadPolly -polly-codegen-ppcg -S < %s | \
6; RUN: FileCheck %s -check-prefix=IR
7
8; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-kernel-ir \
9; RUN: -disable-output < %s | \
10; RUN: FileCheck %s -check-prefix=KERNEL-IR
11
12; REQUIRES: pollyacc
13
14; Approximate C source:
15; void kernel_dynprog(int c[50]) {
16;     int iter = 0;
17;     int outl = 0;
18;
19;      while(1) {
20;         for(int indvar = 1 ; indvar <= 49; indvar++) {
21;             c[indvar] = undef;
22;         }
23;         add78 = c[49] + outl;
24;         inc80 = iter + 1;
25;
26;         if (true) break;
27;
28;         outl = add78;
29;         iter = inc80;
30;      }
31;}
32target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
33target triple = "x86_64-unknown-linux-gnu"
34
35; CODE:       cudaCheckReturn(cudaMalloc((void **) &dev_MemRef_c, (50) * sizeof(i32)));
36
37; CODE:       {
38; CODE-NEXT:    dim3 k0_dimBlock(32);
39; CODE-NEXT:    dim3 k0_dimGrid(2);
40; CODE-NEXT:    kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_c);
41; CODE-NEXT:    cudaCheckKernel();
42; CODE-NEXT:  }
43
44; CODE:       cudaCheckReturn(cudaMemcpy(MemRef_c, dev_MemRef_c, (50) * sizeof(i32), cudaMemcpyDeviceToHost));
45; CODE-NEXT:  cudaCheckReturn(cudaFree(dev_MemRef_c));
46
47; CODE: # kernel0
48; CODE-NEXT: if (32 * b0 + t0 <= 48)
49; CODE-NEXT:     Stmt_for_body17(0, 32 * b0 + t0);
50
51; IR-LABEL: call void @polly_freeKernel
52; IR:       [[REGC:%.+]] =   bitcast i32* %{{[0-9]+}} to i8*
53; IR-NEXT:  call void @polly_copyFromDeviceToHost(i8* %p_dev_array_MemRef_c, i8* [[REGC]], i64 196)
54
55; KERNEL-IR: define ptx_kernel void @FUNC_kernel_dynprog_SCOP_0_KERNEL_0(i8 addrspace(1)* %MemRef_c, i32 %0) #0 {
56; KERNEL-IR: %polly.access.MemRef_c = getelementptr i32, i32 addrspace(1)* %polly.access.cast.MemRef_c, i64 %10
57; KERNEL-IR-NEXT: store i32 %0, i32 addrspace(1)* %polly.access.MemRef_c, align 4
58
59define void @kernel_dynprog([50 x i32]* %c) {
60entry:
61  %arrayidx77 = getelementptr inbounds [50 x i32], [50 x i32]* %c, i64 0, i64 49
62  br label %for.cond1.preheader
63
64for.cond1.preheader:                              ; preds = %for.cond15.for.cond12.loopexit_crit_edge, %entry
65  %out_l.055 = phi i32 [ 0, %entry ], [ %add78, %for.cond15.for.cond12.loopexit_crit_edge ]
66  %iter.054 = phi i32 [ 0, %entry ], [ %inc80, %for.cond15.for.cond12.loopexit_crit_edge ]
67  br label %for.body17
68
69for.cond15.for.cond12.loopexit_crit_edge:         ; preds = %for.body17
70  %tmp = load i32, i32* %arrayidx77, align 4
71  %add78 = add nsw i32 %tmp, %out_l.055
72  %inc80 = add nuw nsw i32 %iter.054, 1
73  br i1 false, label %for.cond1.preheader, label %for.end81
74
75for.body17:                                       ; preds = %for.body17, %for.cond1.preheader
76  %indvars.iv71 = phi i64 [ 1, %for.cond1.preheader ], [ %indvars.iv.next72, %for.body17 ]
77  %arrayidx69 = getelementptr inbounds [50 x i32], [50 x i32]* %c, i64 0, i64 %indvars.iv71
78  store i32 undef, i32* %arrayidx69, align 4
79  %indvars.iv.next72 = add nuw nsw i64 %indvars.iv71, 1
80  %lftr.wideiv74 = trunc i64 %indvars.iv.next72 to i32
81  %exitcond75 = icmp ne i32 %lftr.wideiv74, 50
82  br i1 %exitcond75, label %for.body17, label %for.cond15.for.cond12.loopexit_crit_edge
83
84for.end81:                                        ; preds = %for.cond15.for.cond12.loopexit_crit_edge
85  ret void
86}
87