1! Exercise three levels of parallelism using SGEMM from BLAS. 2 3! { dg-do run } 4! { dg-additional-options "-fopenacc-dim=::128" } 5 6! Implicitly set vector_length to 128 using -fopenacc-dim. 7subroutine openacc_sgemm (m, n, k, alpha, a, b, beta, c) 8 integer :: m, n, k 9 real :: alpha, beta 10 real :: a(k,*), b(k,*), c(m,*) 11 12 integer :: i, j, l 13 real :: temp 14 15 !$acc parallel loop copy(c(1:m,1:n)) copyin(a(1:k,1:m),b(1:k,1:n)) firstprivate (temp) 16 do j = 1, n 17 !$acc loop 18 do i = 1, m 19 temp = 0.0 20 !$acc loop reduction(+:temp) 21 do l = 1, k 22 temp = temp + a(l,i)*b(l,j) 23 end do 24 if(beta == 0.0) then 25 c(i,j) = alpha*temp 26 else 27 c(i,j) = alpha*temp + beta*c(i,j) 28 end if 29 end do 30 end do 31end subroutine openacc_sgemm 32 33subroutine host_sgemm (m, n, k, alpha, a, b, beta, c) 34 integer :: m, n, k 35 real :: alpha, beta 36 real :: a(k,*), b(k,*), c(m,*) 37 38 integer :: i, j, l 39 real :: temp 40 41 do j = 1, n 42 do i = 1, m 43 temp = 0.0 44 do l = 1, k 45 temp = temp + a(l,i)*b(l,j) 46 end do 47 if(beta == 0.0) then 48 c(i,j) = alpha*temp 49 else 50 c(i,j) = alpha*temp + beta*c(i,j) 51 end if 52 end do 53 end do 54end subroutine host_sgemm 55 56program main 57 integer, parameter :: M = 100, N = 50, K = 2000 58 real :: a(K, M), b(K, N), c(M, N), d (M, N), e (M, N) 59 real alpha, beta 60 integer i, j 61 62 a(:,:) = 1.0 63 b(:,:) = 0.25 64 65 c(:,:) = 0.0 66 d(:,:) = 0.0 67 e(:,:) = 0.0 68 69 alpha = 1.05 70 beta = 1.25 71 72 call openacc_sgemm (M, N, K, alpha, a, b, beta, c) 73 call host_sgemm (M, N, K, alpha, a, b, beta, e) 74 75 do i = 1, m 76 do j = 1, n 77 if (c(i,j) /= e(i,j)) stop 1 78 end do 79 end do 80end program main 81