1! Fixed-mode host_data interaction with CUDA BLAS.
2
3! { dg-do run { target openacc_nvidia_accel_selected } }
4! { dg-additional-options "-lcublas -Wall -Wextra" }
5
6      include "cublas-fixed.h"
7
8      integer, parameter :: N = 10
9      integer :: i
10      real*4 :: x_ref(N), y_ref(N), x(N), y(N), a
11
12      a = 2.0
13
14      do i = 1, N
15         x(i) = 4.0 * i
16         y(i) = 3.0
17         x_ref(i) = x(i)
18         y_ref(i) = y(i)
19      end do
20
21      call saxpy (N, a, x_ref, y_ref)
22
23!$acc data copyin (x) copy (y)
24!$acc host_data use_device (x, y)
25      call cublassaxpy(N, a, x, 1, y, 1)
26!$acc end host_data
27!$acc end data
28
29      call validate_results (N, y, y_ref)
30
31!$acc data create (x) copyout (y)
32!$acc parallel loop
33      do i = 1, N
34         y(i) = 3.0
35      end do
36!$acc end parallel loop
37
38!$acc host_data use_device (x, y)
39      call cublassaxpy(N, a, x, 1, y, 1)
40!$acc end host_data
41!$acc end data
42
43      call validate_results (N, y, y_ref)
44
45      y(:) = 3.0
46
47!$acc data copyin (x) copyin (a) copy (y)
48!$acc parallel present (x) pcopy (y) present (a)
49      call saxpy (N, a, x, y)
50!$acc end parallel
51!$acc end data
52
53      call validate_results (N, y, y_ref)
54
55      y(:) = 3.0
56
57!$acc enter data copyin (x, a, y)
58!$acc parallel present (x) pcopy (y) present (a)
59      call saxpy (N, a, x, y)
60!$acc end parallel
61!$acc exit data delete (x, a) copyout (y)
62
63      call validate_results (N, y, y_ref)
64      end
65
66      subroutine saxpy (nn, aa, xx, yy)
67      integer :: nn
68      real*4 :: aa, xx(nn), yy(nn)
69      integer i
70!$acc routine
71
72      do i = 1, nn
73         yy(i) = yy(i) + aa * xx(i)
74      end do
75      end subroutine saxpy
76
77      subroutine validate_results (n, a, b)
78      integer :: n
79      real*4 :: a(n), b(n)
80
81      do i = 1, N
82         if (abs(a(i) - b(i)) > 0.0001) stop 1
83      end do
84      end subroutine validate_results
85
86