1 /* { dg-do run { target openacc_nvidia_accel_selected } } */
2 /* { dg-additional-options "-lcuda -lcublas -lcudart" } */
3 /* { dg-require-effective-target openacc_cublas } */
4 /* { dg-require-effective-target openacc_cudart } */
5 
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <cuda.h>
9 #include <cuda_runtime_api.h>
10 #include <cublas_v2.h>
11 #include <openacc.h>
12 
13 void
saxpy(int n,float a,float * x,float * y)14 saxpy (int n, float a, float *x, float *y)
15 {
16     int i;
17 
18     for (i = 0; i < n; i++)
19     {
20         y[i] = a * x[i] + y[i];
21     }
22 }
23 
24 void
context_check(CUcontext ctx1)25 context_check (CUcontext ctx1)
26 {
27     CUcontext ctx2, ctx3;
28     CUresult r;
29 
30     r = cuCtxGetCurrent (&ctx2);
31     if (r != CUDA_SUCCESS)
32     {
33         fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
34         exit (EXIT_FAILURE);
35     }
36 
37     if (ctx1 != ctx2)
38     {
39         fprintf (stderr, "new context established\n");
40         exit (EXIT_FAILURE);
41     }
42 
43     ctx3 = (CUcontext) acc_get_current_cuda_context ();
44 
45     if (ctx1 != ctx3)
46     {
47         fprintf (stderr, "acc_get_current_cuda_context returned wrong value\n");
48         exit (EXIT_FAILURE);
49     }
50 
51     return;
52 }
53 
54 int
main(int argc,char ** argv)55 main (int argc, char **argv)
56 {
57     cublasStatus_t s;
58     cublasHandle_t h;
59     CUcontext pctx;
60     CUresult r;
61     int i;
62     const int N = 256;
63     float *h_X, *h_Y1, *h_Y2;
64     float *d_X,*d_Y;
65     float alpha = 2.0f;
66     float error_norm;
67     float ref_norm;
68 
69     /* Test 3 - OpenACC creates, cuBLAS shares.  */
70 
71     acc_set_device_num (0, acc_device_nvidia);
72 
73     r = cuCtxGetCurrent (&pctx);
74     if (r != CUDA_SUCCESS)
75     {
76         fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
77         exit (EXIT_FAILURE);
78     }
79 
80     h_X = (float *) malloc (N * sizeof (float));
81     if (h_X == 0)
82     {
83         fprintf (stderr, "malloc failed: for h_X\n");
84         exit (EXIT_FAILURE);
85     }
86 
87     h_Y1 = (float *) malloc (N * sizeof (float));
88     if (h_Y1 == 0)
89     {
90         fprintf (stderr, "malloc failed: for h_Y1\n");
91         exit (EXIT_FAILURE);
92     }
93 
94     h_Y2 = (float *) malloc (N * sizeof (float));
95     if (h_Y2 == 0)
96     {
97         fprintf (stderr, "malloc failed: for h_Y2\n");
98         exit (EXIT_FAILURE);
99     }
100 
101     for (i = 0; i < N; i++)
102     {
103         h_X[i] = rand () / (float) RAND_MAX;
104         h_Y2[i] = h_Y1[i] = rand () / (float) RAND_MAX;
105     }
106 
107     d_X = (float *) acc_copyin (&h_X[0], N * sizeof (float));
108     if (d_X == NULL)
109     {
110         fprintf (stderr, "copyin error h_X\n");
111         exit (EXIT_FAILURE);
112     }
113 
114     d_Y = (float *) acc_copyin (&h_Y1[0], N * sizeof (float));
115     if (d_Y == NULL)
116     {
117         fprintf (stderr, "copyin error h_Y1\n");
118         exit (EXIT_FAILURE);
119     }
120 
121     context_check (pctx);
122 
123     s = cublasCreate (&h);
124     if (s != CUBLAS_STATUS_SUCCESS)
125     {
126         fprintf (stderr, "cublasCreate failed: %d\n", s);
127         exit (EXIT_FAILURE);
128     }
129 
130     context_check (pctx);
131 
132     s = cublasSaxpy (h, N, &alpha, d_X, 1, d_Y, 1);
133     if (s != CUBLAS_STATUS_SUCCESS)
134     {
135         fprintf (stderr, "cublasSaxpy failed: %d\n", s);
136         exit (EXIT_FAILURE);
137     }
138 
139     context_check (pctx);
140 
141     acc_memcpy_from_device (&h_Y1[0], d_Y, N * sizeof (float));
142 
143     context_check (pctx);
144 
145     saxpy (N, alpha, h_X, h_Y2);
146 
147     error_norm = 0;
148     ref_norm = 0;
149 
150     for (i = 0; i < N; ++i)
151     {
152         float diff;
153 
154         diff = h_Y1[i] - h_Y2[i];
155         error_norm += diff * diff;
156         ref_norm += h_Y2[i] * h_Y2[i];
157     }
158 
159     error_norm = (float) sqrt ((double) error_norm);
160     ref_norm = (float) sqrt ((double) ref_norm);
161 
162     if ((fabs (ref_norm) < 1e-7) || ((error_norm / ref_norm) >= 1e-6f))
163     {
164         fprintf (stderr, "math error\n");
165         exit (EXIT_FAILURE);
166     }
167 
168     acc_delete (&h_X[0], N * sizeof (float));
169     acc_delete (&h_Y1[0], N * sizeof (float));
170 
171     free (h_X);
172     free (h_Y1);
173     free (h_Y2);
174 
175     context_check (pctx);
176 
177     s = cublasDestroy (h);
178     if (s != CUBLAS_STATUS_SUCCESS)
179     {
180         fprintf (stderr, "cublasDestroy failed: %d\n", s);
181         exit (EXIT_FAILURE);
182     }
183 
184     context_check (pctx);
185 
186     acc_shutdown (acc_device_nvidia);
187 
188     r = cuCtxGetCurrent (&pctx);
189     if (r != CUDA_SUCCESS)
190     {
191         fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
192         exit (EXIT_FAILURE);
193     }
194 
195     if (pctx)
196     {
197         fprintf (stderr, "Unexpected context\n");
198         exit (EXIT_FAILURE);
199     }
200 
201     return EXIT_SUCCESS;
202 }
203