1! Exercise three levels of parallelism using SGEMM from BLAS. 2 3! { dg-do run } 4 5! Explicitly set vector_length to 128 using a vector_length clause. 6subroutine openacc_sgemm_128 (m, n, k, alpha, a, b, beta, c) 7 integer :: m, n, k 8 real :: alpha, beta 9 real :: a(k,*), b(k,*), c(m,*) 10 11 integer :: i, j, l 12 real :: temp 13 14 !$acc parallel loop copy(c(1:m,1:n)) copyin(a(1:k,1:m),b(1:k,1:n)) vector_length (128) firstprivate (temp) 15 do j = 1, n 16 !$acc loop 17 do i = 1, m 18 temp = 0.0 19 !$acc loop reduction(+:temp) 20 do l = 1, k 21 temp = temp + a(l,i)*b(l,j) 22 end do 23 if(beta == 0.0) then 24 c(i,j) = alpha*temp 25 else 26 c(i,j) = alpha*temp + beta*c(i,j) 27 end if 28 end do 29 end do 30end subroutine openacc_sgemm_128 31 32subroutine host_sgemm (m, n, k, alpha, a, b, beta, c) 33 integer :: m, n, k 34 real :: alpha, beta 35 real :: a(k,*), b(k,*), c(m,*) 36 37 integer :: i, j, l 38 real :: temp 39 40 do j = 1, n 41 do i = 1, m 42 temp = 0.0 43 do l = 1, k 44 temp = temp + a(l,i)*b(l,j) 45 end do 46 if(beta == 0.0) then 47 c(i,j) = alpha*temp 48 else 49 c(i,j) = alpha*temp + beta*c(i,j) 50 end if 51 end do 52 end do 53end subroutine host_sgemm 54 55program main 56 integer, parameter :: M = 100, N = 50, K = 2000 57 real :: a(K, M), b(K, N), c(M, N), d (M, N), e (M, N) 58 real alpha, beta 59 integer i, j 60 61 a(:,:) = 1.0 62 b(:,:) = 0.25 63 64 c(:,:) = 0.0 65 d(:,:) = 0.0 66 e(:,:) = 0.0 67 68 alpha = 1.05 69 beta = 1.25 70 71 call openacc_sgemm_128 (M, N, K, alpha, a, b, beta, d) 72 call host_sgemm (M, N, K, alpha, a, b, beta, e) 73 74 do i = 1, m 75 do j = 1, n 76 if (d(i,j) /= e(i,j)) stop 1 77 end do 78 end do 79end program main 80