1! Test host_data interoperability with CUDA blas using modules. 2 3! { dg-do run { target openacc_nvidia_accel_selected } } 4! { dg-additional-options "-lcublas -Wall -Wextra" } 5! { dg-require-effective-target openacc_cublas } 6 7module cublas 8 interface 9 subroutine cublassaxpy(N, alpha, x, incx, y, incy) bind(c, name="cublasSaxpy") 10 use iso_c_binding 11 integer(kind=c_int), value :: N 12 real(kind=c_float), value :: alpha 13 type(*), dimension(*) :: x 14 integer(kind=c_int), value :: incx 15 type(*), dimension(*) :: y 16 integer(kind=c_int), value :: incy 17 end subroutine cublassaxpy 18 end interface 19 20contains 21 subroutine saxpy (nn, aa, xx, yy) 22 integer :: nn 23 real*4 :: aa, xx(nn), yy(nn) 24 integer i 25 !$acc routine 26 27 do i = 1, nn 28 yy(i) = yy(i) + aa * xx(i) 29 end do 30 end subroutine saxpy 31 32 subroutine validate_results (n, a, b) 33 integer :: n 34 real*4 :: a(n), b(n) 35 36 do i = 1, N 37 if (abs(a(i) - b(i)) > 0.0001) stop 1 38 end do 39 end subroutine validate_results 40end module cublas 41 42program test 43 use cublas 44 implicit none 45 46 integer, parameter :: N = 10 47 integer :: i 48 real*4 :: x_ref(N), y_ref(N), x(N), y(N), a 49 50 a = 2.0 51 52 do i = 1, N 53 x(i) = 4.0 * i 54 y(i) = 3.0 55 x_ref(i) = x(i) 56 y_ref(i) = y(i) 57 end do 58 59 call saxpy (N, a, x_ref, y_ref) 60 61 !$acc data copyin (x) copy (y) 62 !$acc host_data use_device (x, y) 63 call cublassaxpy(N, a, x, 1, y, 1) 64 !$acc end host_data 65 !$acc end data 66 67 call validate_results (N, y, y_ref) 68 69 !$acc data create (x) copyout (y) 70 !$acc parallel loop 71 do i = 1, N 72 y(i) = 3.0 73 end do 74 !$acc end parallel loop 75 76 !$acc host_data use_device (x, y) 77 call cublassaxpy(N, a, x, 1, y, 1) 78 !$acc end host_data 79 !$acc end data 80 81 call validate_results (N, y, y_ref) 82 83 y(:) = 3.0 84 85 !$acc data copyin (x) copyin (a) copy (y) 86 !$acc parallel present (x) pcopy (y) present (a) 87 call saxpy (N, a, x, y) 88 !$acc end parallel 89 !$acc end data 90 91 call validate_results (N, y, y_ref) 92 93 y(:) = 3.0 94 95 !$acc enter data copyin (x, a, y) 96 !$acc parallel present (x) pcopy (y) present (a) 97 call saxpy (N, a, x, y) 98 !$acc end parallel 99 !$acc exit data delete (x, a) copyout (y) 100 101 call validate_results (N, y, y_ref) 102end program test 103