1 /* ************************************************************************
2  * Copyright 2013 Advanced Micro Devices, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  * ************************************************************************/
16 
17 //#define DEBUG_ROTG
18 
19 #include <stdio.h>
20 #include <string.h>
21 #include <clBLAS.h>
22 
23 #include <devinfo.h>
24 #include "clblas-internal.h"
25 #include "solution_seq.h"
26 
27 
28 clblasStatus
doRotg(CLBlasKargs * kargs,cl_mem A,size_t offA,cl_mem B,size_t offB,cl_mem C,size_t offC,cl_mem S,size_t offS,cl_uint numCommandQueues,cl_command_queue * commandQueues,cl_uint numEventsInWaitList,const cl_event * eventWaitList,cl_event * events)29 doRotg(
30 	CLBlasKargs *kargs,
31     cl_mem A,
32     size_t offA,
33     cl_mem B,
34     size_t offB,
35     cl_mem C,
36     size_t offC,
37     cl_mem S,
38     size_t offS,
39     cl_uint numCommandQueues,
40     cl_command_queue *commandQueues,
41     cl_uint numEventsInWaitList,
42     const cl_event *eventWaitList,
43     cl_event *events)
44 	{
45 		cl_int err;
46 		ListHead seq;
47         clblasStatus retCode = clblasSuccess;
48 
49         // C is of real type even for complex numbers
50         DataType cType = (kargs->dtype == TYPE_COMPLEX_FLOAT)? TYPE_FLOAT :
51                             ((kargs->dtype == TYPE_COMPLEX_DOUBLE)? TYPE_DOUBLE : (kargs->dtype));
52 
53 		if (!clblasInitialized) {
54         return clblasNotInitialized;
55 		}
56 
57 		/* Validate arguments */
58 
59         retCode = checkMemObjects(A, B, A, false, X_VEC_ERRSET, Y_VEC_ERRSET, X_VEC_ERRSET );
60 		if (retCode) {      // for mem objects A, B
61 			#ifdef DEBUG_ROTG
62             printf("Invalid mem object..\n");
63             #endif
64             return retCode;
65 		}
66 		retCode = checkMemObjects(C, S, C, false, X_VEC_ERRSET, Y_VEC_ERRSET, X_VEC_ERRSET );
67 		if (retCode) {      // for mem objects C, S
68 			#ifdef DEBUG_ROTG
69             printf("Invalid mem object..\n");
70             #endif
71             return retCode;
72 		}
73 
74 		// Check wheather enough memory was allocated
75 
76 		if ((retCode = checkVectorSizes(kargs->dtype, 1, A, offA, 1, X_VEC_ERRSET))) {
77 			#ifdef DEBUG_ROTG
78             printf("Invalid Size for A\n");
79             #endif
80             return retCode;
81 		}
82 		if ((retCode = checkVectorSizes(kargs->dtype, 1, B, offB, 1, Y_VEC_ERRSET))) {
83 			#ifdef DEBUG_ROTG
84             printf("Invalid Size for B\n");
85             #endif
86             return retCode;
87 		}
88 
89 		if ((retCode = checkVectorSizes(cType, 1, C, offC, 1, X_VEC_ERRSET))) {
90 			#ifdef DEBUG_ROTG
91             printf("Invalid Size for C\n");
92             #endif
93             return retCode;
94 		}
95 
96 		if ((retCode = checkVectorSizes(kargs->dtype, 1, S, offS, 1, Y_VEC_ERRSET))) {
97 			#ifdef DEBUG_ROTG
98             printf("Invalid Size for S\n");
99             #endif
100             return retCode;
101 		}
102 		///////////////////////////////////////////////////////////////
103 
104 		if ((commandQueues == NULL) || (numCommandQueues == 0))
105 		{
106 			return clblasInvalidValue;
107 		}
108 
109 		/* numCommandQueues will be hardcoded to 1 as of now. No multi-gpu support */
110 		numCommandQueues = 1;
111 		if (commandQueues[0] == NULL)
112 		{
113 			return clblasInvalidCommandQueue;
114 		}
115 
116 		if ((numEventsInWaitList !=0) && (eventWaitList == NULL))
117 		{
118 			return clblasInvalidEventWaitList;
119 		}
120 
121 		kargs->A = A;
122     	kargs->B = B;
123 		kargs->C = C;
124     	kargs->D = S;
125 		kargs->offa = offA;
126 		kargs->offb = offB;
127         kargs->offc = offC;
128         kargs->offd = offS;
129 
130 		listInitHead(&seq);
131 		err = makeSolutionSeq(CLBLAS_ROTG, kargs, numCommandQueues, commandQueues,
132         					        numEventsInWaitList, eventWaitList, events, &seq);
133 		if (err == CL_SUCCESS) {
134        		err = executeSolutionSeq(&seq);
135 		}
136 
137 		freeSolutionSeq(&seq);
138 
139 		return (clblasStatus)err;
140 	}
141 
142 
143 
144 clblasStatus
clblasSrotg(cl_mem SA,size_t offSA,cl_mem SB,size_t offSB,cl_mem C,size_t offC,cl_mem S,size_t offS,cl_uint numCommandQueues,cl_command_queue * commandQueues,cl_uint numEventsInWaitList,const cl_event * eventWaitList,cl_event * events)145 clblasSrotg(
146     cl_mem SA,
147     size_t offSA,
148     cl_mem SB,
149     size_t offSB,
150     cl_mem C,
151     size_t offC,
152     cl_mem S,
153     size_t offS,
154     cl_uint numCommandQueues,
155     cl_command_queue *commandQueues,
156     cl_uint numEventsInWaitList,
157     const cl_event *eventWaitList,
158     cl_event *events)
159 	{
160 		CLBlasKargs kargs;
161 
162 		memset(&kargs, 0, sizeof(kargs));
163 		kargs.dtype = TYPE_FLOAT;
164 
165 		return doRotg(&kargs, SA, offSA, SB, offSB, C, offC, S, offS,
166 						numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events);
167 	}
168 
169 clblasStatus
clblasDrotg(cl_mem DA,size_t offDA,cl_mem DB,size_t offDB,cl_mem C,size_t offC,cl_mem S,size_t offS,cl_uint numCommandQueues,cl_command_queue * commandQueues,cl_uint numEventsInWaitList,const cl_event * eventWaitList,cl_event * events)170 clblasDrotg(
171     cl_mem DA,
172     size_t offDA,
173     cl_mem DB,
174     size_t offDB,
175     cl_mem C,
176     size_t offC,
177     cl_mem S,
178     size_t offS,
179     cl_uint numCommandQueues,
180     cl_command_queue *commandQueues,
181     cl_uint numEventsInWaitList,
182     const cl_event *eventWaitList,
183     cl_event *events)
184 	{
185 		CLBlasKargs kargs;
186 
187 		memset(&kargs, 0, sizeof(kargs));
188 		kargs.dtype = TYPE_DOUBLE;
189 
190 		return doRotg(&kargs, DA, offDA, DB, offDB, C, offC, S, offS,
191 						numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events);
192 	}
193 
194 clblasStatus
clblasCrotg(cl_mem CA,size_t offCA,cl_mem CB,size_t offCB,cl_mem C,size_t offC,cl_mem S,size_t offS,cl_uint numCommandQueues,cl_command_queue * commandQueues,cl_uint numEventsInWaitList,const cl_event * eventWaitList,cl_event * events)195 clblasCrotg(
196     cl_mem CA,
197     size_t offCA,
198     cl_mem CB,
199     size_t offCB,
200     cl_mem C,
201     size_t offC,
202     cl_mem S,
203     size_t offS,
204     cl_uint numCommandQueues,
205     cl_command_queue *commandQueues,
206     cl_uint numEventsInWaitList,
207     const cl_event *eventWaitList,
208     cl_event *events)
209 	{
210 		CLBlasKargs kargs;
211 
212 		memset(&kargs, 0, sizeof(kargs));
213 		kargs.dtype = TYPE_COMPLEX_FLOAT;
214 
215 		return doRotg(&kargs, CA, offCA, CB, offCB, C, offC, S, offS,
216 						numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events);
217 	}
218 
219 clblasStatus
clblasZrotg(cl_mem CA,size_t offCA,cl_mem CB,size_t offCB,cl_mem C,size_t offC,cl_mem S,size_t offS,cl_uint numCommandQueues,cl_command_queue * commandQueues,cl_uint numEventsInWaitList,const cl_event * eventWaitList,cl_event * events)220 clblasZrotg(
221     cl_mem CA,
222     size_t offCA,
223     cl_mem CB,
224     size_t offCB,
225     cl_mem C,
226     size_t offC,
227     cl_mem S,
228     size_t offS,
229     cl_uint numCommandQueues,
230     cl_command_queue *commandQueues,
231     cl_uint numEventsInWaitList,
232     const cl_event *eventWaitList,
233     cl_event *events)
234 	{
235 		CLBlasKargs kargs;
236 
237 		memset(&kargs, 0, sizeof(kargs));
238 		kargs.dtype = TYPE_COMPLEX_DOUBLE;
239 
240 		return doRotg(&kargs, CA, offCA, CB, offCB, C, offC, S, offS,
241 						numCommandQueues, commandQueues, numEventsInWaitList, eventWaitList, events);
242 	}
243