1; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-code \ 2; RUN: -disable-output < %s | \ 3; RUN: FileCheck -check-prefix=CODE %s 4 5; RUN: opt %loadPolly -polly-codegen-ppcg -S < %s | \ 6; RUN: FileCheck %s -check-prefix=IR 7 8; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-kernel-ir \ 9; RUN: -disable-output < %s | \ 10; RUN: FileCheck %s -check-prefix=KERNEL-IR 11 12; REQUIRES: pollyacc 13 14; Approximate C source: 15; void kernel_dynprog(int c[50]) { 16; int iter = 0; 17; int outl = 0; 18; 19; while(1) { 20; for(int indvar = 1 ; indvar <= 49; indvar++) { 21; c[indvar] = undef; 22; } 23; add78 = c[49] + outl; 24; inc80 = iter + 1; 25; 26; if (true) break; 27; 28; outl = add78; 29; iter = inc80; 30; } 31;} 32target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 33target triple = "x86_64-unknown-linux-gnu" 34 35; CODE: cudaCheckReturn(cudaMalloc((void **) &dev_MemRef_c, (50) * sizeof(i32))); 36 37; CODE: { 38; CODE-NEXT: dim3 k0_dimBlock(32); 39; CODE-NEXT: dim3 k0_dimGrid(2); 40; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_c); 41; CODE-NEXT: cudaCheckKernel(); 42; CODE-NEXT: } 43 44; CODE: cudaCheckReturn(cudaMemcpy(MemRef_c, dev_MemRef_c, (50) * sizeof(i32), cudaMemcpyDeviceToHost)); 45; CODE-NEXT: cudaCheckReturn(cudaFree(dev_MemRef_c)); 46 47; CODE: # kernel0 48; CODE-NEXT: if (32 * b0 + t0 <= 48) 49; CODE-NEXT: Stmt_for_body17(0, 32 * b0 + t0); 50 51; IR-LABEL: call void @polly_freeKernel 52; IR: [[REGC:%.+]] = bitcast i32* %{{[0-9]+}} to i8* 53; IR-NEXT: call void @polly_copyFromDeviceToHost(i8* %p_dev_array_MemRef_c, i8* [[REGC]], i64 196) 54 55; KERNEL-IR: define ptx_kernel void @FUNC_kernel_dynprog_SCOP_0_KERNEL_0(i8 addrspace(1)* %MemRef_c, i32 %0) #0 { 56; KERNEL-IR: %polly.access.MemRef_c = getelementptr i32, i32 addrspace(1)* %polly.access.cast.MemRef_c, i64 %10 57; KERNEL-IR-NEXT: store i32 %0, i32 addrspace(1)* %polly.access.MemRef_c, align 4 58 59define void @kernel_dynprog([50 x i32]* %c) { 60entry: 61 %arrayidx77 = getelementptr inbounds [50 x i32], [50 x i32]* %c, i64 0, i64 49 62 br label %for.cond1.preheader 63 64for.cond1.preheader: ; preds = %for.cond15.for.cond12.loopexit_crit_edge, %entry 65 %out_l.055 = phi i32 [ 0, %entry ], [ %add78, %for.cond15.for.cond12.loopexit_crit_edge ] 66 %iter.054 = phi i32 [ 0, %entry ], [ %inc80, %for.cond15.for.cond12.loopexit_crit_edge ] 67 br label %for.body17 68 69for.cond15.for.cond12.loopexit_crit_edge: ; preds = %for.body17 70 %tmp = load i32, i32* %arrayidx77, align 4 71 %add78 = add nsw i32 %tmp, %out_l.055 72 %inc80 = add nuw nsw i32 %iter.054, 1 73 br i1 false, label %for.cond1.preheader, label %for.end81 74 75for.body17: ; preds = %for.body17, %for.cond1.preheader 76 %indvars.iv71 = phi i64 [ 1, %for.cond1.preheader ], [ %indvars.iv.next72, %for.body17 ] 77 %arrayidx69 = getelementptr inbounds [50 x i32], [50 x i32]* %c, i64 0, i64 %indvars.iv71 78 store i32 undef, i32* %arrayidx69, align 4 79 %indvars.iv.next72 = add nuw nsw i64 %indvars.iv71, 1 80 %lftr.wideiv74 = trunc i64 %indvars.iv.next72 to i32 81 %exitcond75 = icmp ne i32 %lftr.wideiv74, 50 82 br i1 %exitcond75, label %for.body17, label %for.cond15.for.cond12.loopexit_crit_edge 83 84for.end81: ; preds = %for.cond15.for.cond12.loopexit_crit_edge 85 ret void 86} 87