1! Test host_data interoperability with CUDA blas using modules.
2
3! { dg-do run { target openacc_nvidia_accel_selected } }
4! { dg-additional-options "-lcublas -Wall -Wextra" }
5! { dg-require-effective-target openacc_cublas }
6
7module cublas
8  interface
9     subroutine cublassaxpy(N, alpha, x, incx, y, incy) bind(c, name="cublasSaxpy")
10       use iso_c_binding
11       integer(kind=c_int), value :: N
12       real(kind=c_float), value :: alpha
13       type(*), dimension(*) :: x
14       integer(kind=c_int), value :: incx
15       type(*), dimension(*) :: y
16       integer(kind=c_int), value :: incy
17     end subroutine cublassaxpy
18  end interface
19
20contains
21  subroutine saxpy (nn, aa, xx, yy)
22    integer :: nn
23    real*4 :: aa, xx(nn), yy(nn)
24    integer i
25    !$acc routine
26
27    do i = 1, nn
28       yy(i) = yy(i) + aa * xx(i)
29    end do
30  end subroutine saxpy
31
32  subroutine validate_results (n, a, b)
33    integer :: n
34    real*4 :: a(n), b(n)
35
36    do i = 1, N
37       if (abs(a(i) - b(i)) > 0.0001) stop 1
38    end do
39  end subroutine validate_results
40end module cublas
41
42program test
43  use cublas
44  implicit none
45
46  integer, parameter :: N = 10
47  integer :: i
48  real*4 :: x_ref(N), y_ref(N), x(N), y(N), a
49
50  a = 2.0
51
52  do i = 1, N
53     x(i) = 4.0 * i
54     y(i) = 3.0
55     x_ref(i) = x(i)
56     y_ref(i) = y(i)
57  end do
58
59  call saxpy (N, a, x_ref, y_ref)
60
61  !$acc data copyin (x) copy (y)
62  !$acc host_data use_device (x, y)
63  call cublassaxpy(N, a, x, 1, y, 1)
64  !$acc end host_data
65  !$acc end data
66
67  call validate_results (N, y, y_ref)
68
69  !$acc data create (x) copyout (y)
70  !$acc parallel loop
71  do i = 1, N
72     y(i) = 3.0
73  end do
74  !$acc end parallel loop
75
76  !$acc host_data use_device (x, y)
77  call cublassaxpy(N, a, x, 1, y, 1)
78  !$acc end host_data
79  !$acc end data
80
81  call validate_results (N, y, y_ref)
82
83  y(:) = 3.0
84
85  !$acc data copyin (x) copyin (a) copy (y)
86  !$acc parallel present (x) pcopy (y) present (a)
87  call saxpy (N, a, x, y)
88  !$acc end parallel
89  !$acc end data
90
91  call validate_results (N, y, y_ref)
92
93  y(:) = 3.0
94
95  !$acc enter data copyin (x, a, y)
96  !$acc parallel present (x) pcopy (y) present (a)
97  call saxpy (N, a, x, y)
98  !$acc end parallel
99  !$acc exit data delete (x, a) copyout (y)
100
101  call validate_results (N, y, y_ref)
102end program test
103