1 /************************************************************************
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 * ************************************************************************/
16
17 #include <string.h>
18 #include <clBLAS.h>
19 #include <limits.h>
20
21 #include <functor.h>
22 #include <functor_selector.h>
23
24
25 #define SWAP(TYPE,a,b) do { TYPE swap_tmp_ = a ; a = b ; b = swap_tmp_ ; } while(0)
26
27 // Return true if the area starting from pint (x,y) and of size (w,h) is
28 // within the array of size d1 x d2
inside2d(size_t d1,size_t d2,int x,int y,size_t w,size_t h)29 static int inside2d( size_t d1, size_t d2, int x, int y, size_t w, size_t h )
30 {
31 // Very very large dimensions are likely a bug
32 size_t MAXDIM = ((size_t)INT_MAX) ;
33 if ( d1 >= MAXDIM ) return 0 ;
34 if ( d2 >= MAXDIM ) return 0 ;
35 if ( w >= MAXDIM ) return 0 ;
36 if ( h >= MAXDIM ) return 0 ;
37
38 if ( x < 0 || x >= (int)d1 ) return 0 ;
39 size_t max_w = (size_t)(d1-x) ;
40 if ( w > max_w ) return 0 ;
41
42 if ( y < 0 || y >= (int)d2 ) return 0 ;
43 size_t max_h = (size_t)(d2-y) ;
44 if ( h > max_h ) return 0 ;
45
46 return 1 ;
47 }
48
49 extern "C"
clblasFillVectorAsync(size_t nb_elem,size_t element_size,cl_mem A,size_t offA,const void * host,cl_command_queue command_queue,cl_uint numEventsInWaitList,const cl_event * eventWaitList,cl_event * event)50 clblasStatus clblasFillVectorAsync( size_t nb_elem,
51 size_t element_size,
52 cl_mem A, size_t offA,
53 const void * host,
54 cl_command_queue command_queue,
55 cl_uint numEventsInWaitList,
56 const cl_event *eventWaitList,
57 cl_event *event)
58 {
59
60 return (clblasStatus) clEnqueueFillBuffer(command_queue,
61 A,
62 host,
63 element_size,
64 offA*element_size,
65 nb_elem*element_size,
66 numEventsInWaitList,
67 eventWaitList,
68 event);
69 }
70
71
72
73 extern "C"
clblasFillVector(size_t nb_elem,size_t element_size,cl_mem A,size_t offA,const void * host,cl_command_queue command_queue,cl_uint numEventsInWaitList,const cl_event * eventWaitList)74 clblasStatus clblasFillVector(
75 size_t nb_elem,
76 size_t element_size,
77 cl_mem A, size_t offA,
78 const void * host,
79 cl_command_queue command_queue,
80 cl_uint numEventsInWaitList,
81 const cl_event *eventWaitList)
82 {
83 cl_event event ;
84 cl_int err = clblasFillVectorAsync(
85 nb_elem,
86 element_size,
87 A, offA,
88 host,
89 command_queue,
90 numEventsInWaitList, eventWaitList,
91 &event) ;
92
93 if (err == clblasSuccess) {
94 err = clWaitForEvents(1,&event) ;
95 }
96
97 return (clblasStatus) err ;
98 }
99
100 extern "C"
clblasFillSubMatrixAsync(clblasOrder order,size_t element_size,cl_mem A,size_t offA,size_t ldA,size_t nrA,size_t ncA,int xA,int yA,size_t nx,size_t ny,const void * host,cl_command_queue command_queue,cl_uint numEventsInWaitList,const cl_event * eventWaitList,cl_event * event)101 clblasStatus clblasFillSubMatrixAsync(
102 clblasOrder order,
103 size_t element_size,
104 cl_mem A, size_t offA, size_t ldA,
105 size_t nrA, size_t ncA,
106 int xA, int yA,
107 size_t nx, size_t ny,
108 const void *host,
109 cl_command_queue command_queue,
110 cl_uint numEventsInWaitList,
111 const cl_event *eventWaitList,
112 cl_event *event)
113 {
114 // Transform Row-major into equivalent ColumnMajor so X becomes the contiguous dimension.
115 if( order == clblasRowMajor )
116 {
117 SWAP(size_t, nrA, ncA);
118 SWAP(int, xA, yA);
119 SWAP(size_t, nx, ny);
120 }
121
122 // Check that the specified area is within the array
123 if ( !inside2d( nrA,ncA, xA,yA , nx, ny ) ) {
124 return clblasInvalidValue ;
125 }
126
127 // If the area to fill is contiguous then use clblasFillVector
128 if ( nx==ldA || ny==1 )
129 {
130 return clblasFillVectorAsync( nx*ny,
131 element_size,
132 A,
133 offA + xA + yA*ldA,
134 host,
135 command_queue,
136 numEventsInWaitList,
137 eventWaitList,
138 event) ;
139 }
140 else if (1)
141 {
142
143 clblasFill2DFunctor::Args args(A,
144 offA + xA + yA*ldA,
145 nx,ny,
146 ldA,
147 element_size,
148 host,
149 command_queue,
150 numEventsInWaitList,
151 eventWaitList,
152 event) ;
153
154 clblasFunctorSelector * fselector = clblasFunctorSelector::find(command_queue);
155
156 clblasFill2DFunctor * functor = fselector->select_fill2d_specific(args);
157
158 if (!functor)
159 return clblasInvalidValue ;
160
161 cl_int err = functor->execute(args);
162
163 functor->release();
164 return (clblasStatus) err ;
165 }
166 else
167 {
168 // Temporary: perform one fill per row
169 cl_int err ;
170 for( size_t i=0; i<ny ; i++ )
171 {
172 err = clblasFillVectorAsync( nx ,
173 element_size,
174 A,
175 offA + xA + (yA+i)*ldA,
176 host,
177 command_queue,
178 numEventsInWaitList,
179 eventWaitList,
180 event) ;
181 if (err!=clblasSuccess)
182 return (clblasStatus) err ;
183 }
184 return clblasSuccess ;
185 }
186 }
187
188 extern "C"
clblasFillSubMatrix(clblasOrder order,size_t element_size,cl_mem A,size_t offA,size_t ldA,size_t nrA,size_t ncA,size_t xA,size_t yA,size_t nx,size_t ny,const void * host,cl_command_queue command_queue,cl_uint numEventsInWaitList,const cl_event * eventWaitList)189 clblasStatus clblasFillSubMatrix(
190 clblasOrder order,
191 size_t element_size,
192 cl_mem A, size_t offA, size_t ldA,
193 size_t nrA, size_t ncA,
194 size_t xA, size_t yA,
195 size_t nx, size_t ny,
196 const void *host,
197 cl_command_queue command_queue,
198 cl_uint numEventsInWaitList,
199 const cl_event *eventWaitList)
200 {
201 cl_event event ;
202 cl_int err = clblasFillSubMatrixAsync(order,
203 element_size,
204 A, offA, ldA,
205 nrA, ncA,
206 xA, yA,
207 nx, ny,
208 host,
209 command_queue,
210 numEventsInWaitList,
211 eventWaitList,
212 &event
213 ) ;
214
215 if (err == clblasSuccess)
216 {
217 err = clWaitForEvents(1,&event) ;
218 }
219
220 return (clblasStatus)err ;
221 }
222
223
224 extern "C"
clblasFillMatrix(clblasOrder order,size_t element_size,cl_mem A,size_t offA,size_t ldA,size_t sxA,size_t syA,const void * host,cl_command_queue command_queue,cl_uint numEventsInWaitList,const cl_event * eventWaitList)225 clblasStatus clblasFillMatrix( clblasOrder order,
226 size_t element_size,
227 cl_mem A, size_t offA, size_t ldA,
228 size_t sxA, size_t syA,
229 const void *host,
230 cl_command_queue command_queue,
231 cl_uint numEventsInWaitList,
232 const cl_event *eventWaitList)
233 {
234 return clblasFillSubMatrix( order,
235 element_size,
236 A, offA, ldA,
237 sxA, syA,
238 0, 0,
239 sxA, syA,
240 host,
241 command_queue,
242 numEventsInWaitList,
243 eventWaitList) ;
244 }
245
246
247 extern "C"
clblasFillMatrixAsync(clblasOrder order,size_t element_size,cl_mem A,size_t offA,size_t ldA,size_t sxA,size_t syA,const void * host,cl_command_queue command_queue,cl_uint numEventsInWaitList,const cl_event * eventWaitList,cl_event * event)248 clblasStatus clblasFillMatrixAsync( clblasOrder order,
249 size_t element_size,
250 cl_mem A, size_t offA, size_t ldA,
251 size_t sxA, size_t syA,
252 const void *host,
253 cl_command_queue command_queue,
254 cl_uint numEventsInWaitList,
255 const cl_event *eventWaitList,
256 cl_event *event)
257 {
258
259 return clblasFillSubMatrixAsync( order,
260 element_size,
261 A, offA, ldA,
262 sxA, syA,
263 0, 0,
264 sxA, syA,
265 host,
266 command_queue,
267 numEventsInWaitList,
268 eventWaitList,
269 event) ;
270
271 }
272
273