1! Exercise three levels of parallelism using SGEMM from BLAS.
2
3! { dg-do run }
4! { dg-additional-options "-fopenacc-dim=::128" }
5
6! Implicitly set vector_length to 128 using -fopenacc-dim.
7subroutine openacc_sgemm (m, n, k, alpha, a, b, beta, c)
8  integer :: m, n, k
9  real :: alpha, beta
10  real :: a(k,*), b(k,*), c(m,*)
11
12  integer :: i, j, l
13  real :: temp
14
15  !$acc parallel loop copy(c(1:m,1:n)) copyin(a(1:k,1:m),b(1:k,1:n)) firstprivate (temp)
16  do j = 1, n
17     !$acc loop
18     do i = 1, m
19        temp = 0.0
20        !$acc loop reduction(+:temp)
21        do l = 1, k
22           temp = temp + a(l,i)*b(l,j)
23        end do
24        if(beta == 0.0) then
25           c(i,j) = alpha*temp
26        else
27           c(i,j) = alpha*temp + beta*c(i,j)
28        end if
29     end do
30  end do
31end subroutine openacc_sgemm
32
33subroutine host_sgemm (m, n, k, alpha, a, b, beta, c)
34  integer :: m, n, k
35  real :: alpha, beta
36  real :: a(k,*), b(k,*), c(m,*)
37
38  integer :: i, j, l
39  real :: temp
40
41  do j = 1, n
42     do i = 1, m
43        temp = 0.0
44        do l = 1, k
45           temp = temp + a(l,i)*b(l,j)
46        end do
47        if(beta == 0.0) then
48           c(i,j) = alpha*temp
49        else
50           c(i,j) = alpha*temp + beta*c(i,j)
51        end if
52     end do
53  end do
54end subroutine host_sgemm
55
56program main
57  integer, parameter :: M = 100, N = 50, K = 2000
58  real :: a(K, M), b(K, N), c(M, N), d (M, N), e (M, N)
59  real alpha, beta
60  integer i, j
61
62  a(:,:) = 1.0
63  b(:,:) = 0.25
64
65  c(:,:) = 0.0
66  d(:,:) = 0.0
67  e(:,:) = 0.0
68
69  alpha = 1.05
70  beta = 1.25
71
72  call openacc_sgemm (M, N, K, alpha, a, b, beta, c)
73  call host_sgemm (M, N, K, alpha, a, b, beta, e)
74
75  do i = 1, m
76     do j = 1, n
77        if (c(i,j) /= e(i,j)) stop 1
78     end do
79  end do
80end program main
81