1 /* ************************************************************************
2  * Copyright 2013 Advanced Micro Devices, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  * ************************************************************************/
16 
17 
18 //#define DEBUG_GER
19 
20 #include <stdio.h>
21 #include <string.h>
22 #include <clBLAS.h>
23 
24 #include <devinfo.h>
25 #include "clblas-internal.h"
26 #include "solution_seq.h"
27 
28 
29 clblasStatus
doGer(CLBlasKargs * kargs,clblasOrder order,size_t M,size_t N,const cl_mem X,size_t offx,int incx,const cl_mem Y,size_t offy,int incy,cl_mem A,size_t offa,size_t lda,int doConj,cl_uint numCommandQueues,cl_command_queue * commandQueues,cl_uint numEventsInWaitList,const cl_event * eventWaitList,cl_event * events)30 doGer(
31 	CLBlasKargs *kargs,
32 	clblasOrder order,
33     size_t M,
34     size_t N,
35     const cl_mem X,
36     size_t offx,
37     int incx,
38     const cl_mem Y,
39     size_t offy,
40     int incy,
41     cl_mem  A,
42     size_t offa,
43     size_t lda,
44 	int doConj,
45     cl_uint numCommandQueues,
46     cl_command_queue *commandQueues,
47     cl_uint numEventsInWaitList,
48     const cl_event *eventWaitList,
49     cl_event *events)
50 	{
51 		cl_int err;
52 		ListHead seq;
53         clblasStatus retCode = clblasSuccess;
54 
55 		if (!clblasInitialized) {
56         return clblasNotInitialized;
57 		}
58 
59 		/* Validate arguments */
60 
61 		if ((retCode = checkMemObjects(A, X, Y, true, A_MAT_ERRSET, X_VEC_ERRSET, Y_VEC_ERRSET))) {
62 			#ifdef DEBUG_GER
63 			printf("Invalid mem object..\n");
64 			#endif
65             return retCode;
66 		}
67 
68 		// Check wheather enough memory was allocated
69 
70 		if ((retCode = checkMatrixSizes(kargs->dtype, order, clblasNoTrans, M, N, A, offa, lda, A_MAT_ERRSET))) {
71 
72 			#ifdef DEBUG_GER
73 			printf("Invalid Size for A %d\n",retCode );
74 			#endif
75             return retCode;
76 		}
77 		if ((retCode = checkVectorSizes(kargs->dtype, M, X, offx, incx, X_VEC_ERRSET))) {
78 			#ifdef DEBUG_GER
79 			printf("Invalid Size for X\n");
80 			#endif
81             return retCode;
82 		}
83 		if ((retCode = checkVectorSizes(kargs->dtype, N, Y, offy, incy, Y_VEC_ERRSET))) {
84 			#ifdef DEBUG_GER
85 			printf("Invalid Size for Y\n");
86 			#endif
87             return retCode;
88 		}
89 		///////////////////////////////////////////////////////////////
90 
91 		if ((commandQueues == NULL) || (numCommandQueues == 0))
92 		{
93 			return clblasInvalidValue;
94 		}
95 
96 		/* numCommandQueues will be hardcoded to 1 as of now. No multi-gpu support */
97 		numCommandQueues = 1;
98 		if (commandQueues[0] == NULL)
99 		{
100 			return clblasInvalidCommandQueue;
101 		}
102 
103 		if ((numEventsInWaitList !=0) && (eventWaitList == NULL))
104 		{
105 			return clblasInvalidEventWaitList;
106 		}
107 
108 		/*
109  		 * ASSUMPTION:
110  		 * doTRMV assumes "commandQueue" of 0. The same is reflected in
111 		 * "makeSolutionSeq" as well. If either of them changes in future,
112 		 * this code needs to be revisited.
113   		 */
114 
115 		kargs->order = order;
116 		kargs->M = M;
117 		kargs->N = N;
118 		kargs->A = A;
119 		kargs->offa = offa;
120 		kargs->offA = offa;
121 		kargs->lda.matrix = lda;
122 		kargs->B = X;
123 		kargs->offBX = offx;
124 		kargs->ldb.vector = incx;	// Will be using this as incx
125 		kargs->C = Y;
126 		kargs->offCY = offy;
127 		kargs->ldc.vector = incy;	// Will be using this as incy
128 		kargs->offsetM = 0;
129 		kargs->offsetN = 0;
130 		kargs->scimage[0] = 0;
131 		kargs->scimage[1] = 0;
132 		kargs->K = (size_t)doConj; // Will be using K as doConj parameter
133 
134 		#ifdef DEBUG_GER
135 		printf("Calling makeSolutionSeq from DoGer: GER\n");
136 		#endif
137 
138 		listInitHead(&seq);
139 		err = makeSolutionSeq(CLBLAS_GER, kargs, numCommandQueues, commandQueues,
140         					  numEventsInWaitList, eventWaitList, events, &seq);
141 		if (err == CL_SUCCESS) {
142        		err = executeSolutionSeq(&seq);
143 		}
144 
145 		freeSolutionSeq(&seq);
146 
147 		return (clblasStatus)err;
148 	}
149 
150 
151 
152 
153 
154 clblasStatus
clblasSger(clblasOrder order,size_t M,size_t N,float alpha,const cl_mem X,size_t offx,int incx,const cl_mem Y,size_t offy,int incy,cl_mem A,size_t offa,size_t lda,cl_uint numCommandQueues,cl_command_queue * commandQueues,cl_uint numEventsInWaitList,const cl_event * eventWaitList,cl_event * events)155 clblasSger(
156     clblasOrder order,
157     size_t M,
158     size_t N,
159     float alpha,
160     const cl_mem X,
161     size_t offx,
162     int incx,
163     const cl_mem Y,
164     size_t offy,
165     int incy,
166     cl_mem  A,
167     size_t offa,
168     size_t lda,
169     cl_uint numCommandQueues,
170     cl_command_queue *commandQueues,
171     cl_uint numEventsInWaitList,
172     const cl_event *eventWaitList,
173     cl_event *events)
174 	{
175 		CLBlasKargs kargs;
176 		int doConj;
177 
178 		#ifdef DEBUG_GER
179 		printf("\nSGER Called\n");
180 		#endif
181 
182 		memset(&kargs, 0, sizeof(kargs));
183 		kargs.dtype = TYPE_FLOAT;
184 		kargs.alpha.argFloat = alpha;
185 		doConj = 0;
186 
187 		return doGer(&kargs, order, M, N, X, offx, incx, Y, offy, incy, A, offa, lda, doConj,
188 						numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events);
189 	}
190 
191 clblasStatus
clblasDger(clblasOrder order,size_t M,size_t N,double alpha,const cl_mem X,size_t offx,int incx,const cl_mem Y,size_t offy,int incy,cl_mem A,size_t offa,size_t lda,cl_uint numCommandQueues,cl_command_queue * commandQueues,cl_uint numEventsInWaitList,const cl_event * eventWaitList,cl_event * events)192 clblasDger(
193     clblasOrder order,
194     size_t M,
195     size_t N,
196     double alpha,
197     const cl_mem X,
198     size_t offx,
199     int incx,
200     const cl_mem Y,
201     size_t offy,
202     int incy,
203     cl_mem A,
204     size_t offa,
205     size_t lda,
206     cl_uint numCommandQueues,
207     cl_command_queue *commandQueues,
208     cl_uint numEventsInWaitList,
209     const cl_event *eventWaitList,
210     cl_event *events)
211 	{
212 		CLBlasKargs kargs;
213 		int doConj;
214 
215 		#ifdef DEBUG_GER
216 		printf("\nDGER Called\n");
217 		#endif
218 
219 		memset(&kargs, 0, sizeof(kargs));
220 		kargs.dtype = TYPE_DOUBLE;
221 		kargs.alpha.argDouble = alpha;
222 		doConj = 0;
223 
224 		return doGer(&kargs, order, M, N, X, offx, incx, Y, offy, incy, A, offa, lda, doConj,
225 						numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events);
226 	}
227 
228 clblasStatus
clblasCgeru(clblasOrder order,size_t M,size_t N,cl_float2 alpha,const cl_mem X,size_t offx,int incx,const cl_mem Y,size_t offy,int incy,cl_mem A,size_t offa,size_t lda,cl_uint numCommandQueues,cl_command_queue * commandQueues,cl_uint numEventsInWaitList,const cl_event * eventWaitList,cl_event * events)229 clblasCgeru(
230     clblasOrder order,
231     size_t M,
232     size_t N,
233     cl_float2 alpha,
234     const cl_mem X,
235     size_t offx,
236     int incx,
237     const cl_mem Y,
238     size_t offy,
239     int  incy,
240     cl_mem A,
241     size_t offa,
242     size_t lda,
243     cl_uint numCommandQueues,
244     cl_command_queue *commandQueues,
245     cl_uint numEventsInWaitList,
246     const cl_event *eventWaitList,
247     cl_event *events)
248 	{
249 		CLBlasKargs kargs;
250 		int doConj;
251 
252 		#ifdef DEBUG_GER
253 		printf("\nCGERU Called\n");
254 		#endif
255 
256 		memset(&kargs, 0, sizeof(kargs));
257 		kargs.dtype = TYPE_COMPLEX_FLOAT;
258 		kargs.alpha.argFloatComplex = alpha;
259 		doConj = 0;
260 
261 		return doGer(&kargs, order, M, N, X, offx, incx, Y, offy, incy, A, offa, lda, doConj,
262 						numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events);
263 	}
264 
265 clblasStatus
clblasZgeru(clblasOrder order,size_t M,size_t N,cl_double2 alpha,const cl_mem X,size_t offx,int incx,const cl_mem Y,size_t offy,int incy,cl_mem A,size_t offa,size_t lda,cl_uint numCommandQueues,cl_command_queue * commandQueues,cl_uint numEventsInWaitList,const cl_event * eventWaitList,cl_event * events)266 clblasZgeru(
267     clblasOrder order,
268     size_t M,
269     size_t N,
270     cl_double2 alpha,
271     const cl_mem X,
272     size_t offx,
273     int incx,
274     const cl_mem Y,
275     size_t offy,
276     int incy,
277     cl_mem A,
278     size_t offa,
279     size_t lda,
280     cl_uint numCommandQueues,
281     cl_command_queue *commandQueues,
282     cl_uint numEventsInWaitList,
283     const cl_event *eventWaitList,
284     cl_event *events)
285 	{
286 		CLBlasKargs kargs;
287 		int doConj;
288 
289 		#ifdef DEBUG_GER
290 		printf("\nZGERU Called\n");
291 		#endif
292 
293 		memset(&kargs, 0, sizeof(kargs));
294 		kargs.dtype = TYPE_COMPLEX_DOUBLE;
295 		kargs.alpha.argDoubleComplex = alpha;
296 		doConj = 0;
297 
298 		return doGer(&kargs, order, M, N, X, offx, incx, Y, offy, incy, A, offa, lda, doConj,
299 						numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events);
300 	}
301 
302 clblasStatus
clblasCgerc(clblasOrder order,size_t M,size_t N,cl_float2 alpha,const cl_mem X,size_t offx,int incx,const cl_mem Y,size_t offy,int incy,cl_mem A,size_t offa,size_t lda,cl_uint numCommandQueues,cl_command_queue * commandQueues,cl_uint numEventsInWaitList,const cl_event * eventWaitList,cl_event * events)303 clblasCgerc(
304     clblasOrder order,
305     size_t M,
306     size_t N,
307     cl_float2 alpha,
308     const cl_mem X,
309     size_t offx,
310     int incx,
311     const cl_mem Y,
312     size_t offy,
313     int  incy,
314     cl_mem A,
315     size_t offa,
316     size_t lda,
317     cl_uint numCommandQueues,
318     cl_command_queue *commandQueues,
319     cl_uint numEventsInWaitList,
320     const cl_event *eventWaitList,
321     cl_event *events)
322 	{
323 		CLBlasKargs kargs;
324 		int doConj;
325 
326 		#ifdef DEBUG_GER
327 		printf("\nCGERC Called\n");
328 		#endif
329 
330 		memset(&kargs, 0, sizeof(kargs));
331 		kargs.dtype = TYPE_COMPLEX_FLOAT;
332 		kargs.alpha.argFloatComplex = alpha;
333 		doConj = 1;
334 
335 		return doGer(&kargs, order, M, N, X, offx, incx, Y, offy, incy, A, offa, lda, doConj,
336 						numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events);
337 	}
338 
339 clblasStatus
clblasZgerc(clblasOrder order,size_t M,size_t N,cl_double2 alpha,const cl_mem X,size_t offx,int incx,const cl_mem Y,size_t offy,int incy,cl_mem A,size_t offa,size_t lda,cl_uint numCommandQueues,cl_command_queue * commandQueues,cl_uint numEventsInWaitList,const cl_event * eventWaitList,cl_event * events)340 clblasZgerc(
341     clblasOrder order,
342     size_t M,
343     size_t N,
344     cl_double2 alpha,
345     const cl_mem X,
346     size_t offx,
347     int incx,
348     const cl_mem Y,
349     size_t offy,
350     int incy,
351     cl_mem A,
352     size_t offa,
353     size_t lda,
354     cl_uint numCommandQueues,
355     cl_command_queue *commandQueues,
356     cl_uint numEventsInWaitList,
357     const cl_event *eventWaitList,
358     cl_event *events)
359 	{
360 		CLBlasKargs kargs;
361 		int doConj;
362 
363 		#ifdef DEBUG_GER
364 		printf("\nZGERC Called\n");
365 		#endif
366 
367 		memset(&kargs, 0, sizeof(kargs));
368 		kargs.dtype = TYPE_COMPLEX_DOUBLE;
369 		kargs.alpha.argDoubleComplex = alpha;
370 		doConj = 1;
371 
372 		return doGer(&kargs, order, M, N, X, offx, incx, Y, offy, incy, A, offa, lda, doConj,
373 						numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events);
374 	}
375 
376 
377