1 /* { dg-do run { target openacc_nvidia_accel_selected } } */
2 /* { dg-additional-options "-lcuda -lcublas -lcudart" } */
3 /* { dg-require-effective-target openacc_cublas } */
4 /* { dg-require-effective-target openacc_cudart } */
5
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <cuda.h>
9 #include <cuda_runtime_api.h>
10 #include <cublas_v2.h>
11 #include <openacc.h>
12
13 void
saxpy(int n,float a,float * x,float * y)14 saxpy (int n, float a, float *x, float *y)
15 {
16 int i;
17
18 for (i = 0; i < n; i++)
19 {
20 y[i] = a * x[i] + y[i];
21 }
22 }
23
24 void
context_check(CUcontext ctx1)25 context_check (CUcontext ctx1)
26 {
27 CUcontext ctx2, ctx3;
28 CUresult r;
29
30 r = cuCtxGetCurrent (&ctx2);
31 if (r != CUDA_SUCCESS)
32 {
33 fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
34 exit (EXIT_FAILURE);
35 }
36
37 if (ctx1 != ctx2)
38 {
39 fprintf (stderr, "new context established\n");
40 exit (EXIT_FAILURE);
41 }
42
43 ctx3 = (CUcontext) acc_get_current_cuda_context ();
44
45 if (ctx1 != ctx3)
46 {
47 fprintf (stderr, "acc_get_current_cuda_context returned wrong value\n");
48 exit (EXIT_FAILURE);
49 }
50
51 return;
52 }
53
54 int
main(int argc,char ** argv)55 main (int argc, char **argv)
56 {
57 cublasStatus_t s;
58 cublasHandle_t h;
59 CUcontext pctx;
60 CUresult r;
61 int i;
62 const int N = 256;
63 float *h_X, *h_Y1, *h_Y2;
64 float *d_X,*d_Y;
65 float alpha = 2.0f;
66 float error_norm;
67 float ref_norm;
68
69 /* Test 4 - OpenACC creates, cuBLAS shares. */
70
71 acc_set_device_num (0, acc_device_nvidia);
72
73 r = cuCtxGetCurrent (&pctx);
74 if (r != CUDA_SUCCESS)
75 {
76 fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
77 exit (EXIT_FAILURE);
78 }
79
80 h_X = (float *) malloc (N * sizeof (float));
81 if (h_X == 0)
82 {
83 fprintf (stderr, "malloc failed: for h_X\n");
84 exit (EXIT_FAILURE);
85 }
86
87 h_Y1 = (float *) malloc (N * sizeof (float));
88 if (h_Y1 == 0)
89 {
90 fprintf (stderr, "malloc failed: for h_Y1\n");
91 exit (EXIT_FAILURE);
92 }
93
94 h_Y2 = (float *) malloc (N * sizeof (float));
95 if (h_Y2 == 0)
96 {
97 fprintf (stderr, "malloc failed: for h_Y2\n");
98 exit (EXIT_FAILURE);
99 }
100
101 for (i = 0; i < N; i++)
102 {
103 h_X[i] = rand () / (float) RAND_MAX;
104 h_Y2[i] = h_Y1[i] = rand () / (float) RAND_MAX;
105 }
106
107 #pragma acc parallel copyin (h_X[0:N]), copy (h_Y2[0:N]) copy (alpha)
108 {
109 int i;
110
111 for (i = 0; i < N; i++)
112 {
113 h_Y2[i] = alpha * h_X[i] + h_Y2[i];
114 }
115 }
116
117 r = cuCtxGetCurrent (&pctx);
118 if (r != CUDA_SUCCESS)
119 {
120 fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
121 exit (EXIT_FAILURE);
122 }
123
124 d_X = (float *) acc_copyin (&h_X[0], N * sizeof (float));
125 if (d_X == NULL)
126 {
127 fprintf (stderr, "copyin error h_Y1\n");
128 exit (EXIT_FAILURE);
129 }
130
131 d_Y = (float *) acc_copyin (&h_Y1[0], N * sizeof (float));
132 if (d_Y == NULL)
133 {
134 fprintf (stderr, "copyin error h_Y1\n");
135 exit (EXIT_FAILURE);
136 }
137
138 s = cublasCreate (&h);
139 if (s != CUBLAS_STATUS_SUCCESS)
140 {
141 fprintf (stderr, "cublasCreate failed: %d\n", s);
142 exit (EXIT_FAILURE);
143 }
144
145 context_check (pctx);
146
147 s = cublasSaxpy (h, N, &alpha, d_X, 1, d_Y, 1);
148 if (s != CUBLAS_STATUS_SUCCESS)
149 {
150 fprintf (stderr, "cublasSaxpy failed: %d\n", s);
151 exit (EXIT_FAILURE);
152 }
153
154 context_check (pctx);
155
156 acc_memcpy_from_device (&h_Y1[0], d_Y, N * sizeof (float));
157
158 context_check (pctx);
159
160 error_norm = 0;
161 ref_norm = 0;
162
163 for (i = 0; i < N; ++i)
164 {
165 float diff;
166
167 diff = h_Y1[i] - h_Y2[i];
168 error_norm += diff * diff;
169 ref_norm += h_Y2[i] * h_Y2[i];
170 }
171
172 error_norm = (float) sqrt ((double) error_norm);
173 ref_norm = (float) sqrt ((double) ref_norm);
174
175 if ((fabs (ref_norm) < 1e-7) || ((error_norm / ref_norm) >= 1e-6f))
176 {
177 fprintf (stderr, "math error\n");
178 exit (EXIT_FAILURE);
179 }
180
181 acc_delete (&h_X[0], N * sizeof (float));
182 acc_delete (&h_Y1[0], N * sizeof (float));
183
184 free (h_X);
185 free (h_Y1);
186 free (h_Y2);
187
188 context_check (pctx);
189
190 s = cublasDestroy (h);
191 if (s != CUBLAS_STATUS_SUCCESS)
192 {
193 fprintf (stderr, "cublasDestroy failed: %d\n", s);
194 exit (EXIT_FAILURE);
195 }
196
197 context_check (pctx);
198
199 acc_shutdown (acc_device_nvidia);
200
201 r = cuCtxGetCurrent (&pctx);
202 if (r != CUDA_SUCCESS)
203 {
204 fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
205 exit (EXIT_FAILURE);
206 }
207
208 if (pctx)
209 {
210 fprintf (stderr, "Unexpected context\n");
211 exit (EXIT_FAILURE);
212 }
213
214 return EXIT_SUCCESS;
215 }
216