1 /* { dg-do run { target openacc_nvidia_accel_selected } } */
2 /* { dg-additional-options "-lcuda -lcublas -lcudart" } */
3
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <cuda.h>
7 #include <cuda_runtime_api.h>
8 #include <cublas_v2.h>
9 #include <openacc.h>
10
11 void
saxpy(int n,float a,float * x,float * y)12 saxpy (int n, float a, float *x, float *y)
13 {
14 int i;
15
16 for (i = 0; i < n; i++)
17 {
18 y[i] = a * x[i] + y[i];
19 }
20 }
21
22 void
context_check(CUcontext ctx1)23 context_check (CUcontext ctx1)
24 {
25 CUcontext ctx2, ctx3;
26 CUresult r;
27
28 r = cuCtxGetCurrent (&ctx2);
29 if (r != CUDA_SUCCESS)
30 {
31 fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
32 exit (EXIT_FAILURE);
33 }
34
35 if (ctx1 != ctx2)
36 {
37 fprintf (stderr, "new context established\n");
38 exit (EXIT_FAILURE);
39 }
40
41 ctx3 = (CUcontext) acc_get_current_cuda_context ();
42
43 if (ctx1 != ctx3)
44 {
45 fprintf (stderr, "acc_get_current_cuda_context returned wrong value\n");
46 exit (EXIT_FAILURE);
47 }
48
49 return;
50 }
51
52 int
main(int argc,char ** argv)53 main (int argc, char **argv)
54 {
55 cublasStatus_t s;
56 cublasHandle_t h;
57 CUcontext pctx;
58 CUresult r;
59 int i;
60 const int N = 256;
61 float *h_X, *h_Y1, *h_Y2;
62 float *d_X,*d_Y;
63 float alpha = 2.0f;
64 float error_norm;
65 float ref_norm;
66
67 /* Test 4 - OpenACC creates, cuBLAS shares. */
68
69 acc_set_device_num (0, acc_device_nvidia);
70
71 r = cuCtxGetCurrent (&pctx);
72 if (r != CUDA_SUCCESS)
73 {
74 fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
75 exit (EXIT_FAILURE);
76 }
77
78 h_X = (float *) malloc (N * sizeof (float));
79 if (h_X == 0)
80 {
81 fprintf (stderr, "malloc failed: for h_X\n");
82 exit (EXIT_FAILURE);
83 }
84
85 h_Y1 = (float *) malloc (N * sizeof (float));
86 if (h_Y1 == 0)
87 {
88 fprintf (stderr, "malloc failed: for h_Y1\n");
89 exit (EXIT_FAILURE);
90 }
91
92 h_Y2 = (float *) malloc (N * sizeof (float));
93 if (h_Y2 == 0)
94 {
95 fprintf (stderr, "malloc failed: for h_Y2\n");
96 exit (EXIT_FAILURE);
97 }
98
99 for (i = 0; i < N; i++)
100 {
101 h_X[i] = rand () / (float) RAND_MAX;
102 h_Y2[i] = h_Y1[i] = rand () / (float) RAND_MAX;
103 }
104
105 #pragma acc parallel copyin (h_X[0:N]), copy (h_Y2[0:N]) copy (alpha)
106 {
107 int i;
108
109 for (i = 0; i < N; i++)
110 {
111 h_Y2[i] = alpha * h_X[i] + h_Y2[i];
112 }
113 }
114
115 r = cuCtxGetCurrent (&pctx);
116 if (r != CUDA_SUCCESS)
117 {
118 fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
119 exit (EXIT_FAILURE);
120 }
121
122 d_X = (float *) acc_copyin (&h_X[0], N * sizeof (float));
123 if (d_X == NULL)
124 {
125 fprintf (stderr, "copyin error h_Y1\n");
126 exit (EXIT_FAILURE);
127 }
128
129 d_Y = (float *) acc_copyin (&h_Y1[0], N * sizeof (float));
130 if (d_Y == NULL)
131 {
132 fprintf (stderr, "copyin error h_Y1\n");
133 exit (EXIT_FAILURE);
134 }
135
136 s = cublasCreate (&h);
137 if (s != CUBLAS_STATUS_SUCCESS)
138 {
139 fprintf (stderr, "cublasCreate failed: %d\n", s);
140 exit (EXIT_FAILURE);
141 }
142
143 context_check (pctx);
144
145 s = cublasSaxpy (h, N, &alpha, d_X, 1, d_Y, 1);
146 if (s != CUBLAS_STATUS_SUCCESS)
147 {
148 fprintf (stderr, "cublasSaxpy failed: %d\n", s);
149 exit (EXIT_FAILURE);
150 }
151
152 context_check (pctx);
153
154 acc_memcpy_from_device (&h_Y1[0], d_Y, N * sizeof (float));
155
156 context_check (pctx);
157
158 error_norm = 0;
159 ref_norm = 0;
160
161 for (i = 0; i < N; ++i)
162 {
163 float diff;
164
165 diff = h_Y1[i] - h_Y2[i];
166 error_norm += diff * diff;
167 ref_norm += h_Y2[i] * h_Y2[i];
168 }
169
170 error_norm = (float) sqrt ((double) error_norm);
171 ref_norm = (float) sqrt ((double) ref_norm);
172
173 if ((fabs (ref_norm) < 1e-7) || ((error_norm / ref_norm) >= 1e-6f))
174 {
175 fprintf (stderr, "math error\n");
176 exit (EXIT_FAILURE);
177 }
178
179 acc_delete (&h_X[0], N * sizeof (float));
180 acc_delete (&h_Y1[0], N * sizeof (float));
181
182 free (h_X);
183 free (h_Y1);
184 free (h_Y2);
185
186 context_check (pctx);
187
188 s = cublasDestroy (h);
189 if (s != CUBLAS_STATUS_SUCCESS)
190 {
191 fprintf (stderr, "cublasDestroy failed: %d\n", s);
192 exit (EXIT_FAILURE);
193 }
194
195 context_check (pctx);
196
197 acc_shutdown (acc_device_nvidia);
198
199 r = cuCtxGetCurrent (&pctx);
200 if (r != CUDA_SUCCESS)
201 {
202 fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
203 exit (EXIT_FAILURE);
204 }
205
206 if (pctx)
207 {
208 fprintf (stderr, "Unexpected context\n");
209 exit (EXIT_FAILURE);
210 }
211
212 return EXIT_SUCCESS;
213 }
214