1! Test host_data interoperability with CUDA blas using modules.
2
3! { dg-do run { target openacc_nvidia_accel_selected } }
4! { dg-additional-options "-lcublas -Wall -Wextra" }
5
6module cublas
7  interface
8     subroutine cublassaxpy(N, alpha, x, incx, y, incy) bind(c, name="cublasSaxpy")
9       use iso_c_binding
10       integer(kind=c_int), value :: N
11       real(kind=c_float), value :: alpha
12       type(*), dimension(*) :: x
13       integer(kind=c_int), value :: incx
14       type(*), dimension(*) :: y
15       integer(kind=c_int), value :: incy
16     end subroutine cublassaxpy
17  end interface
18
19contains
20  subroutine saxpy (nn, aa, xx, yy)
21    integer :: nn
22    real*4 :: aa, xx(nn), yy(nn)
23    integer i
24    !$acc routine
25
26    do i = 1, nn
27       yy(i) = yy(i) + aa * xx(i)
28    end do
29  end subroutine saxpy
30
31  subroutine validate_results (n, a, b)
32    integer :: n
33    real*4 :: a(n), b(n)
34
35    do i = 1, N
36       if (abs(a(i) - b(i)) > 0.0001) stop 1
37    end do
38  end subroutine validate_results
39end module cublas
40
41program test
42  use cublas
43  implicit none
44
45  integer, parameter :: N = 10
46  integer :: i
47  real*4 :: x_ref(N), y_ref(N), x(N), y(N), a
48
49  a = 2.0
50
51  do i = 1, N
52     x(i) = 4.0 * i
53     y(i) = 3.0
54     x_ref(i) = x(i)
55     y_ref(i) = y(i)
56  end do
57
58  call saxpy (N, a, x_ref, y_ref)
59
60  !$acc data copyin (x) copy (y)
61  !$acc host_data use_device (x, y)
62  call cublassaxpy(N, a, x, 1, y, 1)
63  !$acc end host_data
64  !$acc end data
65
66  call validate_results (N, y, y_ref)
67
68  !$acc data create (x) copyout (y)
69  !$acc parallel loop
70  do i = 1, N
71     y(i) = 3.0
72  end do
73  !$acc end parallel loop
74
75  !$acc host_data use_device (x, y)
76  call cublassaxpy(N, a, x, 1, y, 1)
77  !$acc end host_data
78  !$acc end data
79
80  call validate_results (N, y, y_ref)
81
82  y(:) = 3.0
83
84  !$acc data copyin (x) copyin (a) copy (y)
85  !$acc parallel present (x) pcopy (y) present (a)
86  call saxpy (N, a, x, y)
87  !$acc end parallel
88  !$acc end data
89
90  call validate_results (N, y, y_ref)
91
92  y(:) = 3.0
93
94  !$acc enter data copyin (x, a, y)
95  !$acc parallel present (x) pcopy (y) present (a)
96  call saxpy (N, a, x, y)
97  !$acc end parallel
98  !$acc exit data delete (x, a) copyout (y)
99
100  call validate_results (N, y, y_ref)
101end program test
102