1! Exercise three levels of parallelism using SGEMM from BLAS.
2
3! { dg-do run }
4
5! Explicitly set vector_length to 128 using a vector_length clause.
6subroutine openacc_sgemm_128 (m, n, k, alpha, a, b, beta, c)
7  integer :: m, n, k
8  real :: alpha, beta
9  real :: a(k,*), b(k,*), c(m,*)
10
11  integer :: i, j, l
12  real :: temp
13
14  !$acc parallel loop copy(c(1:m,1:n)) copyin(a(1:k,1:m),b(1:k,1:n)) vector_length (128) firstprivate (temp)
15  do j = 1, n
16     !$acc loop
17     do i = 1, m
18        temp = 0.0
19        !$acc loop reduction(+:temp)
20        do l = 1, k
21           temp = temp + a(l,i)*b(l,j)
22        end do
23        if(beta == 0.0) then
24           c(i,j) = alpha*temp
25        else
26           c(i,j) = alpha*temp + beta*c(i,j)
27        end if
28     end do
29  end do
30end subroutine openacc_sgemm_128
31
32subroutine host_sgemm (m, n, k, alpha, a, b, beta, c)
33  integer :: m, n, k
34  real :: alpha, beta
35  real :: a(k,*), b(k,*), c(m,*)
36
37  integer :: i, j, l
38  real :: temp
39
40  do j = 1, n
41     do i = 1, m
42        temp = 0.0
43        do l = 1, k
44           temp = temp + a(l,i)*b(l,j)
45        end do
46        if(beta == 0.0) then
47           c(i,j) = alpha*temp
48        else
49           c(i,j) = alpha*temp + beta*c(i,j)
50        end if
51     end do
52  end do
53end subroutine host_sgemm
54
55program main
56  integer, parameter :: M = 100, N = 50, K = 2000
57  real :: a(K, M), b(K, N), c(M, N), d (M, N), e (M, N)
58  real alpha, beta
59  integer i, j
60
61  a(:,:) = 1.0
62  b(:,:) = 0.25
63
64  c(:,:) = 0.0
65  d(:,:) = 0.0
66  e(:,:) = 0.0
67
68  alpha = 1.05
69  beta = 1.25
70
71  call openacc_sgemm_128 (M, N, K, alpha, a, b, beta, d)
72  call host_sgemm (M, N, K, alpha, a, b, beta, e)
73
74  do i = 1, m
75     do j = 1, n
76        if (d(i,j) /= e(i,j)) stop 1
77     end do
78  end do
79end program main
80