1 /* { dg-do run { target openacc_nvidia_accel_selected } } */
2 /* { dg-additional-options "-lcuda -lcublas -lcudart" } */
3 
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <cuda.h>
7 #include <cuda_runtime_api.h>
8 #include <cublas_v2.h>
9 #include <openacc.h>
10 
11 void
saxpy(int n,float a,float * x,float * y)12 saxpy (int n, float a, float *x, float *y)
13 {
14     int i;
15 
16     for (i = 0; i < n; i++)
17     {
18         y[i] = a * x[i] + y[i];
19     }
20 }
21 
22 void
context_check(CUcontext ctx1)23 context_check (CUcontext ctx1)
24 {
25     CUcontext ctx2, ctx3;
26     CUresult r;
27 
28     r = cuCtxGetCurrent (&ctx2);
29     if (r != CUDA_SUCCESS)
30     {
31         fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
32         exit (EXIT_FAILURE);
33     }
34 
35     if (ctx1 != ctx2)
36     {
37         fprintf (stderr, "new context established\n");
38         exit (EXIT_FAILURE);
39     }
40 
41     ctx3 = (CUcontext) acc_get_current_cuda_context ();
42 
43     if (ctx1 != ctx3)
44     {
45         fprintf (stderr, "acc_get_current_cuda_context returned wrong value\n");
46         exit (EXIT_FAILURE);
47     }
48 
49     return;
50 }
51 
52 int
main(int argc,char ** argv)53 main (int argc, char **argv)
54 {
55     cublasStatus_t s;
56     cublasHandle_t h;
57     CUcontext pctx;
58     CUresult r;
59     int i;
60     const int N = 256;
61     float *h_X, *h_Y1, *h_Y2;
62     float *d_X,*d_Y;
63     float alpha = 2.0f;
64     float error_norm;
65     float ref_norm;
66 
67     /* Test 3 - OpenACC creates, cuBLAS shares.  */
68 
69     acc_set_device_num (0, acc_device_nvidia);
70 
71     r = cuCtxGetCurrent (&pctx);
72     if (r != CUDA_SUCCESS)
73     {
74         fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
75         exit (EXIT_FAILURE);
76     }
77 
78     h_X = (float *) malloc (N * sizeof (float));
79     if (h_X == 0)
80     {
81         fprintf (stderr, "malloc failed: for h_X\n");
82         exit (EXIT_FAILURE);
83     }
84 
85     h_Y1 = (float *) malloc (N * sizeof (float));
86     if (h_Y1 == 0)
87     {
88         fprintf (stderr, "malloc failed: for h_Y1\n");
89         exit (EXIT_FAILURE);
90     }
91 
92     h_Y2 = (float *) malloc (N * sizeof (float));
93     if (h_Y2 == 0)
94     {
95         fprintf (stderr, "malloc failed: for h_Y2\n");
96         exit (EXIT_FAILURE);
97     }
98 
99     for (i = 0; i < N; i++)
100     {
101         h_X[i] = rand () / (float) RAND_MAX;
102         h_Y2[i] = h_Y1[i] = rand () / (float) RAND_MAX;
103     }
104 
105     d_X = (float *) acc_copyin (&h_X[0], N * sizeof (float));
106     if (d_X == NULL)
107     {
108         fprintf (stderr, "copyin error h_X\n");
109         exit (EXIT_FAILURE);
110     }
111 
112     d_Y = (float *) acc_copyin (&h_Y1[0], N * sizeof (float));
113     if (d_Y == NULL)
114     {
115         fprintf (stderr, "copyin error h_Y1\n");
116         exit (EXIT_FAILURE);
117     }
118 
119     context_check (pctx);
120 
121     s = cublasCreate (&h);
122     if (s != CUBLAS_STATUS_SUCCESS)
123     {
124         fprintf (stderr, "cublasCreate failed: %d\n", s);
125         exit (EXIT_FAILURE);
126     }
127 
128     context_check (pctx);
129 
130     s = cublasSaxpy (h, N, &alpha, d_X, 1, d_Y, 1);
131     if (s != CUBLAS_STATUS_SUCCESS)
132     {
133         fprintf (stderr, "cublasSaxpy failed: %d\n", s);
134         exit (EXIT_FAILURE);
135     }
136 
137     context_check (pctx);
138 
139     acc_memcpy_from_device (&h_Y1[0], d_Y, N * sizeof (float));
140 
141     context_check (pctx);
142 
143     saxpy (N, alpha, h_X, h_Y2);
144 
145     error_norm = 0;
146     ref_norm = 0;
147 
148     for (i = 0; i < N; ++i)
149     {
150         float diff;
151 
152         diff = h_Y1[i] - h_Y2[i];
153         error_norm += diff * diff;
154         ref_norm += h_Y2[i] * h_Y2[i];
155     }
156 
157     error_norm = (float) sqrt ((double) error_norm);
158     ref_norm = (float) sqrt ((double) ref_norm);
159 
160     if ((fabs (ref_norm) < 1e-7) || ((error_norm / ref_norm) >= 1e-6f))
161     {
162         fprintf (stderr, "math error\n");
163         exit (EXIT_FAILURE);
164     }
165 
166     free (h_X);
167     free (h_Y1);
168     free (h_Y2);
169 
170     acc_free (d_X);
171     acc_free (d_Y);
172 
173     context_check (pctx);
174 
175     s = cublasDestroy (h);
176     if (s != CUBLAS_STATUS_SUCCESS)
177     {
178         fprintf (stderr, "cublasDestroy failed: %d\n", s);
179         exit (EXIT_FAILURE);
180     }
181 
182     context_check (pctx);
183 
184     acc_shutdown (acc_device_nvidia);
185 
186     r = cuCtxGetCurrent (&pctx);
187     if (r != CUDA_SUCCESS)
188     {
189         fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
190         exit (EXIT_FAILURE);
191     }
192 
193     if (pctx)
194     {
195         fprintf (stderr, "Unexpected context\n");
196         exit (EXIT_FAILURE);
197     }
198 
199     return EXIT_SUCCESS;
200 }
201