1! Test OpenACC 'kernels' construct decomposition. 2 3! { dg-additional-options "-fopt-info-omp-all" } 4! { dg-additional-options "-fdump-tree-gimple" } 5! { dg-additional-options "--param=openacc-kernels=decompose" } 6! { dg-additional-options "-fdump-tree-omp_oacc_kernels_decompose" } 7 8! See also '../../c-c++-common/goacc/kernels-decompose-1.c'. 9 10! It's only with Tcl 8.5 (released in 2007) that "the variable 'varName' 11! passed to 'incr' may be unset, and in that case, it will be set to [...]", 12! so to maintain compatibility with earlier Tcl releases, we manually 13! initialize counter variables: 14! { dg-line l_dummy[variable c_loop_i 0] } 15! { dg-message "dummy" "" { target iN-VAl-Id } l_dummy } to avoid 16! "WARNING: dg-line var l_dummy defined, but not used". 17 18program main 19 implicit none 20 integer, parameter :: N = 1024 21 integer, dimension (1:N) :: a 22 integer :: i, sum 23 24 !$acc kernels copyin(a(1:N)) copy(sum) 25 ! { dg-bogus "optimized: assigned OpenACC seq loop parallelism" "TODO" { xfail *-*-* } .-1 } 26 !TODO Is this maybe the report that belongs to the XFAILed report further down? */ 27 28 !$acc loop ! { dg-line l_loop_i[incr c_loop_i] } 29 ! { dg-message "note: forwarded loop nest in OpenACC 'kernels' region to 'parloops' for analysis" "" { target *-*-* } l_loop_i$c_loop_i } 30 ! { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_loop_i$c_loop_i } 31 do i = 1, N 32 sum = sum + a(i) 33 end do 34 35 sum = sum + 1 ! { dg-message "note: beginning 'gang-single' part in OpenACC 'kernels' region" } 36 a(1) = a(1) + 1 37 38 !$acc loop independent ! { dg-line l_loop_i[incr c_loop_i] } 39 ! { dg-message "note: parallelized loop nest in OpenACC 'kernels' region" "" { target *-*-* } l_loop_i$c_loop_i } 40 ! { dg-optimized "assigned OpenACC gang vector loop parallelism" "" { target *-*-* } l_loop_i$c_loop_i } 41 do i = 1, N 42 sum = sum + a(i) 43 end do 44 45 if (sum .gt. 10) then ! { dg-message "note: beginning 'parloops' part in OpenACC 'kernels' region" } 46 !$acc loop ! { dg-line l_loop_i[incr c_loop_i] } 47 ! { dg-missed "unparallelized loop nest in OpenACC 'kernels' region: it's executed conditionally" "" { target *-*-* } l_loop_i$c_loop_i } 48 !TODO { dg-optimized "assigned OpenACC seq loop parallelism" "TODO" { xfail *-*-* } l_loop_i$c_loop_i } 49 do i = 1, N 50 sum = sum + a(i) 51 end do 52 end if 53 54 !$acc loop auto ! { dg-line l_loop_i[incr c_loop_i] } 55 ! { dg-message "note: forwarded loop nest in OpenACC 'kernels' region to 'parloops' for analysis" "" { target *-*-* } l_loop_i$c_loop_i } 56 ! { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_loop_i$c_loop_i } 57 do i = 1, N 58 sum = sum + a(i) 59 end do 60 61 !$acc end kernels 62end program main 63 64! { dg-final { scan-tree-dump-times {(?n)#pragma omp target oacc_kernels map\(to:a\[_[0-9]+\] \[len: _[0-9]+\]\) map\(alloc:a \[pointer assign, bias: _[0-9]+\]\) map\(tofrom:sum \[len: [0-9]+\]\)$} 1 "gimple" } } 65 66! { dg-final { scan-tree-dump-times {(?n)#pragma acc loop private\(i\)$} 2 "gimple" } } 67! { dg-final { scan-tree-dump-times {(?n)#pragma acc loop private\(i\) independent$} 1 "gimple" } } 68! { dg-final { scan-tree-dump-times {(?n)#pragma acc loop private\(i\) auto$} 1 "gimple" } } 69! { dg-final { scan-tree-dump-times {(?n)#pragma acc loop} 4 "gimple" } } 70 71! Check that the OpenACC 'kernels' got decomposed into 'data' and an enclosed 72! sequence of compute constructs. 73! { dg-final { scan-tree-dump-times {(?n)#pragma omp target oacc_data_kernels map\(to:a\[_[0-9]+\] \[len: _[0-9]+\]\) map\(tofrom:sum \[len: [0-9]+\]\)$} 1 "omp_oacc_kernels_decompose" } } 74! As noted above, we get three "old-style" kernel regions, one gang-single region, and one parallelized loop region. 75! { dg-final { scan-tree-dump-times {(?n)#pragma omp target oacc_kernels async\(-1\) map\(force_present:a\[_[0-9]+\] \[len: _[0-9]+\]\) map\(alloc:a \[pointer assign, bias: _[0-9]+\]\) map\(force_present:sum \[len: [0-9]+\]\)$} 3 "omp_oacc_kernels_decompose" } } 76! { dg-final { scan-tree-dump-times {(?n)#pragma omp target oacc_parallel_kernels_parallelized async\(-1\) map\(force_present:a\[_[0-9]+\] \[len: _[0-9]+\]\) map\(alloc:a \[pointer assign, bias: _[0-9]+\]\) map\(force_present:sum \[len: [0-9]+\]\)$} 1 "omp_oacc_kernels_decompose" } } 77! { dg-final { scan-tree-dump-times {(?n)#pragma omp target oacc_parallel_kernels_gang_single async\(-1\) num_gangs\(1\) map\(force_present:a\[_[0-9]+\] \[len: _[0-9]+\]\) map\(alloc:a \[pointer assign, bias: _[0-9]+\]\) map\(force_present:sum \[len: [0-9]+\]\)$} 1 "omp_oacc_kernels_decompose" } } 78! 79! 'data' plus five CCs. 80! { dg-final { scan-tree-dump-times {(?n)#pragma omp target } 6 "omp_oacc_kernels_decompose" } } 81 82! { dg-final { scan-tree-dump-times {(?n)#pragma acc loop private\(i\)$} 2 "omp_oacc_kernels_decompose" } } 83! { dg-final { scan-tree-dump-times {(?n)#pragma acc loop private\(i\) independent$} 1 "omp_oacc_kernels_decompose" } } 84! { dg-final { scan-tree-dump-times {(?n)#pragma acc loop private\(i\) auto} 1 "omp_oacc_kernels_decompose" } } 85! { dg-final { scan-tree-dump-times {(?n)#pragma acc loop} 4 "omp_oacc_kernels_decompose" } } 86 87! Each of the parallel regions is async, and there is a final call to 88! __builtin_GOACC_wait. 89! { dg-final { scan-tree-dump-times "__builtin_GOACC_wait" 1 "omp_oacc_kernels_decompose" } } 90