1 /* ************************************************************************
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 * ************************************************************************/
16
17 #include<stdio.h>
18 #include<stdlib.h>
19 #include<limits.h>
20 #include<clBLAS.h>
21
22 #define SWAP(TYPE,a,b) do { TYPE swap_tmp_ = a ; a = b ; b = swap_tmp_ ; } while(0)
23
24 // Return true if the area starting from pint (x,y) and of size (w,h) is
25 // within the array of size d1 x d2
inside2d(size_t d1,size_t d2,int x,int y,size_t w,size_t h)26 static int inside2d( size_t d1, size_t d2, int x, int y, size_t w, size_t h )
27 {
28 // Very very large dimensions are likely a bug
29 size_t MAXDIM = ((size_t)INT_MAX) ;
30 size_t max_w = (size_t)(d1-x) ;
31 size_t max_h = (size_t)(d2-y) ;
32
33 if ( d1 >= MAXDIM ) return 0 ;
34 if ( d2 >= MAXDIM ) return 0 ;
35 if ( w >= MAXDIM ) return 0 ;
36 if ( h >= MAXDIM ) return 0 ;
37
38 if ( x < 0 || x >= (int)d1 ) return 0 ;
39 if ( w > max_w ) return 0 ;
40
41 if ( y < 0 || y >= (int)d2 ) return 0 ;
42 if ( h > max_h ) return 0 ;
43
44 return 1 ;
45 }
46
clblasMatrixSizeInfo(clblasOrder order,size_t rows,size_t columns,size_t elemsize,size_t padding,size_t * ld,size_t * fullsize)47 clblasStatus clblasMatrixSizeInfo(clblasOrder order,
48 size_t rows,
49 size_t columns,
50 size_t elemsize,
51 size_t padding,
52 size_t * ld,
53 size_t * fullsize)
54 {
55 size_t x;
56 size_t y;
57
58 if( order == clblasRowMajor )
59 {
60 x = columns;
61 y = rows;
62 }
63 else
64 {
65 x = rows;
66 y = columns;
67 }
68
69 // set if not NULL
70 if( ld ) *ld = x + padding;
71 if( fullsize ) *fullsize = (size_t) ( (x + padding) * y * elemsize );
72
73 return clblasSuccess;
74 }
75
76
clblasCreateMatrix(cl_context context,clblasOrder order,size_t rows,size_t columns,size_t elemsize,size_t padding,size_t * ld,size_t * fullsize,cl_int * err)77 cl_mem clblasCreateMatrix(
78 cl_context context,
79 clblasOrder order,
80 size_t rows,
81 size_t columns,
82 size_t elemsize,
83 size_t padding,
84 size_t * ld,
85 size_t * fullsize,
86 cl_int * err)
87 {
88 size_t tmp_fullsize;
89 cl_mem_flags flags = CL_MEM_READ_WRITE;
90
91 clblasMatrixSizeInfo(
92 order,
93 rows,
94 columns,
95 elemsize,
96 padding,
97 ld,
98 &tmp_fullsize);
99
100 // set if not NULL
101 if(fullsize != NULL) *fullsize = tmp_fullsize;
102
103 return clCreateBuffer(
104 context,
105 flags,
106 tmp_fullsize,
107 NULL,
108 err);
109 }
110
111 /*
112 * Internal function:
113 * see clblasCreateMatrix()
114 */
clblasCreateMatrixWithLd(cl_context context,clblasOrder order,size_t rows,size_t columns,size_t elemsize,size_t ld,size_t * fullsize,cl_int * err)115 cl_mem clblasCreateMatrixWithLd(
116 cl_context context,
117 clblasOrder order,
118 size_t rows,
119 size_t columns,
120 size_t elemsize,
121 size_t ld,
122 size_t * fullsize,
123 cl_int * err)
124 {
125 int nbelem;
126 cl_mem_flags flags = CL_MEM_READ_WRITE;
127
128 // compute number of elements
129 if( order == clblasRowMajor )
130 {
131 // check ld
132 if( ld < columns )
133 {
134 *err = clblasInvalidValue;
135 return 0;
136 }
137
138 nbelem = rows * ld;
139 }
140 else if( order == clblasColumnMajor )
141 {
142 // check ld
143 if( ld < rows )
144 {
145 *err = clblasInvalidValue;
146 return 0;
147 }
148
149 nbelem = ld * columns;
150 }
151
152 // set if not NULL
153 if( fullsize ) *fullsize = (size_t) (nbelem * elemsize );
154
155 // allocate
156 return clCreateBuffer(
157 context,
158 flags,
159 *fullsize,
160 NULL,
161 err);
162 }
163
164
clblasCreateMatrixFromHost(cl_context context,clblasOrder order,size_t rows,size_t columns,size_t elemsize,size_t ld,void * host,size_t off_host,size_t ld_host,cl_command_queue command_queue,cl_uint numEventsInWaitList,const cl_event * eventWaitList,cl_int * err)165 cl_mem clblasCreateMatrixFromHost(
166 cl_context context,
167 clblasOrder order,
168 size_t rows,
169 size_t columns,
170 size_t elemsize,
171 size_t ld,
172 void * host,
173 size_t off_host,
174 size_t ld_host,
175 cl_command_queue command_queue,
176 cl_uint numEventsInWaitList,
177 const cl_event *eventWaitList,
178 cl_int * err)
179 {
180 size_t fullsize;
181 cl_mem out;
182 size_t i;
183
184 out = clblasCreateMatrixWithLd(
185 context,
186 order,
187 rows,
188 columns,
189 elemsize,
190 ld,
191 &fullsize,
192 err);
193
194 if( ! *err )
195 {
196 printf("ok\n");
197 // TODO use ReadMatrix instead ?
198 if( order == clblasRowMajor )
199 {
200 for( i = 0; i < rows; i++ )
201 {
202 const size_t host_orig[3] = {off_host, off_host, 0};
203 const size_t buff_orig[3] = {0, 0, 0};
204 const size_t region[3] = {columns*elemsize, rows, 1};
205 *err = clEnqueueWriteBufferRect(
206 command_queue,
207 out,
208 CL_TRUE,
209 buff_orig,
210 host_orig,
211 region,
212 columns * elemsize,
213 0,
214 ld_host * elemsize,
215 0,
216 host,
217 numEventsInWaitList,
218 eventWaitList,
219 NULL);
220 }
221 }
222 }
223
224 return out;
225 }
226
227 /*
228 * Internal function:
229 * enqueue event in list and wait for it if blocking
230 */
emptyAction(cl_command_queue command_queue,cl_uint numEventsInWaitList,const cl_event * eventWaitList,cl_event * event,cl_bool blocking)231 static clblasStatus emptyAction(
232 cl_command_queue command_queue,
233 cl_uint numEventsInWaitList,
234 const cl_event *eventWaitList,
235 cl_event *event,
236 cl_bool blocking)
237 {
238 cl_int err ;
239
240 err = clEnqueueBarrierWithWaitList(
241 command_queue,
242 numEventsInWaitList,
243 eventWaitList,
244 event);
245
246 if (err != clblasSuccess)
247 return (clblasStatus)err;
248
249 if(blocking)
250 return (clblasStatus)clWaitForEvents(1, event);
251 else
252 return (clblasStatus)err;
253 }
254
255 /*
256 * Internal function:
257 * Generic version of clblasWriteSubMatrix with blocking arg
258 * event must be non-NULL if blocking is set to CL_TRUE
259 */
_clblasWriteSubMatrix(clblasOrder order,size_t element_size,const void * A,size_t offA,size_t ldA,size_t nrA,size_t ncA,size_t xA,size_t yA,cl_mem B,size_t offB,size_t ldB,size_t nrB,size_t ncB,size_t xB,size_t yB,size_t nx,size_t ny,cl_command_queue command_queue,cl_uint numEventsInWaitList,const cl_event * eventWaitList,cl_event * event,cl_bool blocking)260 static clblasStatus _clblasWriteSubMatrix(
261 clblasOrder order,
262 size_t element_size,
263 const void *A, size_t offA, size_t ldA,
264 size_t nrA, size_t ncA,
265 size_t xA, size_t yA,
266 cl_mem B, size_t offB, size_t ldB,
267 size_t nrB, size_t ncB,
268 size_t xB, size_t yB,
269 size_t nx, size_t ny,
270 cl_command_queue command_queue,
271 cl_uint numEventsInWaitList,
272 const cl_event *eventWaitList,
273 cl_event *event,
274 cl_bool blocking)
275 {
276
277 if( order == clblasRowMajor )
278 {
279 SWAP(size_t, xA, yA);
280 SWAP(size_t, nrA, ncA);
281 SWAP(size_t, xB, yB);
282 SWAP(size_t, nrB, ncB);
283 SWAP(size_t, nx, ny);
284 }
285
286 // Check that the specified area is within the array A
287 if ( !inside2d( nrA,ncA, xA,yA , nx,ny ) ) {
288 return clblasInvalidValue ;
289 }
290
291 // Check that the specified area is within the array B
292 if ( !inside2d( nrB,ncB, xB,yB , nx,ny ) ) {
293 return clblasInvalidValue ;
294 }
295
296
297 if( nx == 0 || ny == 0 )
298 {
299 return emptyAction(
300 command_queue,
301 numEventsInWaitList,
302 eventWaitList,
303 event,
304 blocking);
305 }
306
307 {
308 const size_t origA[3] = { (xA+offA)*element_size, yA, 0 };
309 const size_t origB[3] = { (xB+offB)*element_size, yB, 0 };
310 const size_t region[3] = { nx * element_size, ny, 1 };
311
312 return (clblasStatus) clEnqueueWriteBufferRect(
313 command_queue,
314 B,
315 blocking,
316 origB,
317 origA,
318 region,
319 ldB * element_size,
320 0,
321 ldA * element_size,
322 0,
323 A,
324 numEventsInWaitList,
325 eventWaitList,
326 event);
327 }
328 }
329
clblasWriteSubMatrix(clblasOrder order,size_t element_size,const void * A,size_t offA,size_t ldA,size_t nrA,size_t ncA,size_t xA,size_t yA,cl_mem B,size_t offB,size_t ldB,size_t nrB,size_t ncB,size_t xB,size_t yB,size_t nx,size_t ny,cl_command_queue command_queue,cl_uint numEventsInWaitList,const cl_event * eventWaitList)330 clblasStatus clblasWriteSubMatrix(
331 clblasOrder order,
332 size_t element_size,
333 const void *A, size_t offA, size_t ldA,
334 size_t nrA, size_t ncA,
335 size_t xA, size_t yA,
336 cl_mem B, size_t offB, size_t ldB,
337 size_t nrB, size_t ncB,
338 size_t xB, size_t yB,
339 size_t nx, size_t ny,
340 cl_command_queue command_queue,
341 cl_uint numEventsInWaitList,
342 const cl_event *eventWaitList)
343 {
344 cl_event evt;
345
346 return _clblasWriteSubMatrix(
347 order,
348 element_size,
349 A, offA, ldA,
350 nrA, ncA,
351 xA, yA,
352 B, offB, ldB,
353 nrB, ncB,
354 xB, yB,
355 nx, ny,
356 command_queue,
357 numEventsInWaitList,
358 eventWaitList,
359 &evt,
360 CL_TRUE);
361 }
362
clblasWriteSubMatrixAsync(clblasOrder order,size_t element_size,const void * A,size_t offA,size_t ldA,size_t nrA,size_t ncA,size_t xA,size_t yA,cl_mem B,size_t offB,size_t ldB,size_t nrB,size_t ncB,size_t xB,size_t yB,size_t nx,size_t ny,cl_command_queue command_queue,cl_uint numEventsInWaitList,const cl_event * eventWaitList,cl_event * event)363 clblasStatus clblasWriteSubMatrixAsync(
364 clblasOrder order,
365 size_t element_size,
366 const void *A, size_t offA, size_t ldA,
367 size_t nrA, size_t ncA,
368 size_t xA, size_t yA,
369 cl_mem B, size_t offB, size_t ldB,
370 size_t nrB, size_t ncB,
371 size_t xB, size_t yB,
372 size_t nx, size_t ny,
373 cl_command_queue command_queue,
374 cl_uint numEventsInWaitList,
375 const cl_event *eventWaitList,
376 cl_event *event)
377 {
378 return _clblasWriteSubMatrix(
379 order,
380 element_size,
381 A, offA, ldA,
382 nrA, ncA,
383 xA, yA,
384 B, offB, ldB,
385 nrB, ncB,
386 xB, yB,
387 nx, ny,
388 command_queue,
389 numEventsInWaitList,
390 eventWaitList,
391 event,
392 CL_FALSE);
393 }
394
395
396 /*
397 * Internal function:
398 * Generic version of clblasReadSubMatrix with blocking arg
399 * event must be non-NULL if blocking is set to CL_TRUE
400 */
_clblasReadSubMatrix(clblasOrder order,size_t element_size,const cl_mem A,size_t offA,size_t ldA,size_t nrA,size_t ncA,size_t xA,size_t yA,void * B,size_t offB,size_t ldB,size_t nrB,size_t ncB,size_t xB,size_t yB,size_t nx,size_t ny,cl_command_queue command_queue,cl_uint numEventsInWaitList,const cl_event * eventWaitList,cl_event * event,cl_bool blocking)401 static clblasStatus _clblasReadSubMatrix(
402 clblasOrder order,
403 size_t element_size,
404 const cl_mem A, size_t offA, size_t ldA,
405 size_t nrA, size_t ncA,
406 size_t xA, size_t yA,
407 void *B, size_t offB, size_t ldB,
408 size_t nrB, size_t ncB,
409 size_t xB, size_t yB,
410 size_t nx, size_t ny,
411 cl_command_queue command_queue,
412 cl_uint numEventsInWaitList,
413 const cl_event *eventWaitList,
414 cl_event *event,
415 cl_bool blocking)
416 {
417
418 if( order == clblasRowMajor )
419 {
420 SWAP(size_t, xA, yA);
421 SWAP(size_t, nrA, ncA);
422 SWAP(size_t, xB, yB);
423 SWAP(size_t, nrB, ncB);
424 SWAP(size_t, nx, ny);
425 }
426
427 if( nx == 0 || ny == 0 )
428 {
429 return emptyAction(
430 command_queue,
431 numEventsInWaitList,
432 eventWaitList,
433 event,
434 blocking);
435 }
436
437 // Check that the specified area is within the array A
438 if ( !inside2d( nrA,ncA, xA,yA , nx,ny ) ) {
439 return clblasInvalidValue ;
440 }
441
442 // Check that the specified area is within the array B
443 if ( !inside2d( nrB,ncB, xB,yB , nx,ny ) ) {
444 return clblasInvalidValue ;
445 }
446
447 {
448 const size_t origA[3] = { (xA+offA)*element_size, yA, 0 };
449 const size_t origB[3] = { (xB+offB)*element_size, yB, 0 };
450 const size_t region[3] = { nx * element_size, ny, 1 };
451
452 return (clblasStatus) clEnqueueReadBufferRect(
453 command_queue,
454 A,
455 blocking,
456 origA,
457 origB,
458 region,
459 ldA * element_size,
460 0,
461 ldB * element_size,
462 0,
463 B,
464 numEventsInWaitList,
465 eventWaitList,
466 event);
467 }
468 }
469
470
clblasReadSubMatrix(clblasOrder order,size_t element_size,const cl_mem A,size_t offA,size_t ldA,size_t nrA,size_t ncA,size_t xA,size_t yA,void * B,size_t offB,size_t ldB,size_t nrB,size_t ncB,size_t xB,size_t yB,size_t nx,size_t ny,cl_command_queue command_queue,cl_uint numEventsInWaitList,const cl_event * eventWaitList)471 clblasStatus clblasReadSubMatrix(
472 clblasOrder order,
473 size_t element_size,
474 const cl_mem A, size_t offA, size_t ldA,
475 size_t nrA, size_t ncA,
476 size_t xA, size_t yA,
477 void *B, size_t offB, size_t ldB,
478 size_t nrB, size_t ncB,
479 size_t xB, size_t yB,
480 size_t nx, size_t ny,
481 cl_command_queue command_queue,
482 cl_uint numEventsInWaitList,
483 const cl_event *eventWaitList)
484 {
485 cl_event evt;
486
487 return _clblasReadSubMatrix(
488 order,
489 element_size,
490 A, offA, ldA,
491 nrA, ncA,
492 xA, yA,
493 B, offB, ldB,
494 nrB, ncB,
495 xB, yB,
496 nx, ny,
497 command_queue,
498 numEventsInWaitList,
499 eventWaitList,
500 &evt,
501 CL_TRUE);
502 }
503
504
clblasReadSubMatrixAsync(clblasOrder order,size_t element_size,const cl_mem A,size_t offA,size_t ldA,size_t nrA,size_t ncA,size_t xA,size_t yA,void * B,size_t offB,size_t ldB,size_t nrB,size_t ncB,size_t xB,size_t yB,size_t nx,size_t ny,cl_command_queue command_queue,cl_uint numEventsInWaitList,const cl_event * eventWaitList,cl_event * event)505 clblasStatus clblasReadSubMatrixAsync(
506 clblasOrder order,
507 size_t element_size,
508 const cl_mem A, size_t offA, size_t ldA,
509 size_t nrA, size_t ncA,
510 size_t xA, size_t yA,
511 void *B, size_t offB, size_t ldB,
512 size_t nrB, size_t ncB,
513 size_t xB, size_t yB,
514 size_t nx, size_t ny,
515 cl_command_queue command_queue,
516 cl_uint numEventsInWaitList,
517 const cl_event *eventWaitList,
518 cl_event *event)
519 {
520 return _clblasReadSubMatrix(
521 order,
522 element_size,
523 A, offA, ldA,
524 nrA, ncA,
525 xA, yA,
526 B, offB, ldB,
527 nrB, ncB,
528 xB, yB,
529 nx, ny,
530 command_queue,
531 numEventsInWaitList,
532 eventWaitList,
533 event,
534 CL_TRUE);
535 }
536
537
538 /*
539 * Internal function:
540 * Generic version of clblasCopySubMatrix with blocking arg
541 * event must be non-NULL if blocking is set to CL_TRUE
542 */
_clblasCopySubMatrix(clblasOrder order,size_t element_size,const cl_mem A,size_t offA,size_t ldA,size_t nrA,size_t ncA,size_t xA,size_t yA,cl_mem B,size_t offB,size_t ldB,size_t nrB,size_t ncB,size_t xB,size_t yB,size_t nx,size_t ny,cl_command_queue command_queue,cl_uint numEventsInWaitList,const cl_event * eventWaitList,cl_event * event,cl_bool blocking)543 static clblasStatus _clblasCopySubMatrix(
544 clblasOrder order,
545 size_t element_size,
546 const cl_mem A, size_t offA, size_t ldA,
547 size_t nrA, size_t ncA,
548 size_t xA, size_t yA,
549 cl_mem B, size_t offB, size_t ldB,
550 size_t nrB, size_t ncB,
551 size_t xB, size_t yB,
552 size_t nx, size_t ny,
553 cl_command_queue command_queue,
554 cl_uint numEventsInWaitList,
555 const cl_event *eventWaitList,
556 cl_event *event,
557 cl_bool blocking)
558 {
559 cl_int err;
560 if( order == clblasRowMajor )
561 {
562 SWAP(size_t, xA, yA);
563 SWAP(size_t, nrA, ncA);
564 SWAP(size_t, xB, yB);
565 SWAP(size_t, nrB, ncB);
566 SWAP(size_t, nx, ny);
567 }
568
569 if( nx == 0 || ny == 0 )
570 {
571 return emptyAction(
572 command_queue,
573 numEventsInWaitList,
574 eventWaitList,
575 event,
576 CL_FALSE);
577 }
578
579 // Check that the specified area is within the array A
580 if ( !inside2d( nrA,ncA, xA,yA , nx,ny ) ) {
581 return clblasInvalidValue ;
582 }
583
584 // Check that the specified area is within the array B
585 if ( !inside2d( nrB,ncB, xB,yB , nx,ny ) ) {
586 return clblasInvalidValue ;
587 }
588
589 {
590 const size_t origA[3] = { (xA+offA)*element_size, yA, 0 };
591 const size_t origB[3] = { (xB+offB)*element_size, yB, 0 };
592 const size_t region[3] = { nx * element_size, ny, 1 };
593
594 err = clEnqueueCopyBufferRect(
595 command_queue,
596 A,
597 B,
598 origA,
599 origB,
600 region,
601 ldA * element_size,
602 0,
603 ldB * element_size,
604 0,
605 numEventsInWaitList,
606 eventWaitList,
607 event);
608 }
609
610 if (err != clblasSuccess)
611 return (clblasStatus)err;
612
613 if(blocking)
614 return (clblasStatus)clWaitForEvents(1, event);
615 else
616 return (clblasStatus)err;
617 }
618
619
clblasCopySubMatrix(clblasOrder order,size_t element_size,const cl_mem A,size_t offA,size_t ldA,size_t nrA,size_t ncA,size_t xA,size_t yA,cl_mem B,size_t offB,size_t ldB,size_t nrB,size_t ncB,size_t xB,size_t yB,size_t nx,size_t ny,cl_command_queue command_queue,cl_uint numEventsInWaitList,const cl_event * eventWaitList)620 clblasStatus clblasCopySubMatrix(
621 clblasOrder order,
622 size_t element_size,
623 const cl_mem A, size_t offA, size_t ldA,
624 size_t nrA, size_t ncA,
625 size_t xA, size_t yA,
626 cl_mem B, size_t offB, size_t ldB,
627 size_t nrB, size_t ncB,
628 size_t xB, size_t yB,
629 size_t nx, size_t ny,
630 cl_command_queue command_queue,
631 cl_uint numEventsInWaitList,
632 const cl_event *eventWaitList)
633 {
634 cl_event evt;
635
636 return (clblasStatus) _clblasCopySubMatrix(
637 order,
638 element_size,
639 A, offA, ldA,
640 nrA, ncA,
641 xA, yA,
642 B, offB, ldB,
643 nrB, ncB,
644 xB, yB,
645 nx, ny,
646 command_queue,
647 numEventsInWaitList,
648 eventWaitList,
649 &evt,
650 CL_TRUE);
651 }
652
653
clblasCopySubMatrixAsync(clblasOrder order,size_t element_size,const cl_mem A,size_t offA,size_t ldA,size_t nrA,size_t ncA,size_t xA,size_t yA,cl_mem B,size_t offB,size_t ldB,size_t nrB,size_t ncB,size_t xB,size_t yB,size_t nx,size_t ny,cl_command_queue command_queue,cl_uint numEventsInWaitList,const cl_event * eventWaitList,cl_event * event)654 clblasStatus clblasCopySubMatrixAsync(
655 clblasOrder order,
656 size_t element_size,
657 const cl_mem A, size_t offA, size_t ldA,
658 size_t nrA, size_t ncA,
659 size_t xA, size_t yA,
660 cl_mem B, size_t offB, size_t ldB,
661 size_t nrB, size_t ncB,
662 size_t xB, size_t yB,
663 size_t nx, size_t ny,
664 cl_command_queue command_queue,
665 cl_uint numEventsInWaitList,
666 const cl_event *eventWaitList,
667 cl_event *event)
668 {
669 return (clblasStatus) _clblasCopySubMatrix(
670 order,
671 element_size,
672 A, offA, ldA,
673 nrA, ncA,
674 xA, yA,
675 B, offB, ldB,
676 nrB, ncB,
677 xB, yB,
678 nx, ny,
679 command_queue,
680 numEventsInWaitList,
681 eventWaitList,
682 event,
683 CL_FALSE);
684 }
685
686
clblasWriteVector(size_t nb_elem,size_t element_size,const void * A,size_t offA,cl_mem B,size_t offB,cl_command_queue command_queue,cl_uint numEventsInWaitList,const cl_event * eventWaitList)687 clblasStatus clblasWriteVector(
688 size_t nb_elem,
689 size_t element_size,
690 const void *A, size_t offA,
691 cl_mem B, size_t offB,
692 cl_command_queue command_queue,
693 cl_uint numEventsInWaitList,
694 const cl_event *eventWaitList)
695 {
696 return clblasWriteMatrix(
697 clblasColumnMajor,
698 nb_elem, 1,
699 element_size,
700 A, offA, nb_elem,
701 B, offB, nb_elem,
702 command_queue,
703 numEventsInWaitList,
704 eventWaitList);
705 }
706
707
clblasWriteVectorAsync(size_t nb_elem,size_t element_size,const void * A,size_t offA,cl_mem B,size_t offB,cl_command_queue command_queue,cl_uint numEventsInWaitList,const cl_event * eventWaitList,cl_event * events)708 clblasStatus clblasWriteVectorAsync(
709 size_t nb_elem,
710 size_t element_size,
711 const void *A, size_t offA,
712 cl_mem B, size_t offB,
713 cl_command_queue command_queue,
714 cl_uint numEventsInWaitList,
715 const cl_event *eventWaitList,
716 cl_event *events)
717 {
718 return clblasWriteMatrixAsync(
719 clblasColumnMajor,
720 nb_elem, 1,
721 element_size,
722 A, offA, nb_elem,
723 B, offB, nb_elem,
724 command_queue,
725 numEventsInWaitList,
726 eventWaitList,
727 events);
728 }
729
730
clblasReadVector(size_t nb_elem,size_t element_size,const cl_mem A,size_t offA,void * B,size_t offB,cl_command_queue command_queue,cl_uint numEventsInWaitList,const cl_event * eventWaitList)731 clblasStatus clblasReadVector(
732 size_t nb_elem,
733 size_t element_size,
734 const cl_mem A, size_t offA,
735 void * B, size_t offB,
736 cl_command_queue command_queue,
737 cl_uint numEventsInWaitList,
738 const cl_event *eventWaitList)
739 {
740 return clblasReadMatrix(
741 clblasColumnMajor,
742 nb_elem, 1,
743 element_size,
744 A, offA, nb_elem,
745 B, offB, nb_elem,
746 command_queue,
747 numEventsInWaitList,
748 eventWaitList);
749 }
750
751
clblasReadVectorAsync(size_t nb_elem,size_t element_size,const cl_mem A,size_t offA,void * B,size_t offB,cl_command_queue command_queue,cl_uint numEventsInWaitList,const cl_event * eventWaitList,cl_event * events)752 clblasStatus clblasReadVectorAsync(
753 size_t nb_elem,
754 size_t element_size,
755 const cl_mem A, size_t offA,
756 void * B, size_t offB,
757 cl_command_queue command_queue,
758 cl_uint numEventsInWaitList,
759 const cl_event *eventWaitList,
760 cl_event *events)
761 {
762 return clblasReadMatrixAsync(
763 clblasColumnMajor,
764 nb_elem, 1,
765 element_size,
766 A, offA, nb_elem,
767 B, offB, nb_elem,
768 command_queue,
769 numEventsInWaitList,
770 eventWaitList,
771 events);
772 }
773
774
clblasCopyVector(size_t nb_elem,size_t element_size,const cl_mem A,size_t offA,cl_mem B,size_t offB,cl_command_queue command_queue,cl_uint numEventsInWaitList,const cl_event * eventWaitList)775 clblasStatus clblasCopyVector(
776 size_t nb_elem,
777 size_t element_size,
778 const cl_mem A, size_t offA,
779 cl_mem B, size_t offB,
780 cl_command_queue command_queue,
781 cl_uint numEventsInWaitList,
782 const cl_event *eventWaitList)
783 {
784 return clblasCopyMatrix(
785 clblasColumnMajor,
786 nb_elem, 1,
787 element_size,
788 A, offA, nb_elem,
789 B, offB, nb_elem,
790 command_queue,
791 numEventsInWaitList,
792 eventWaitList);
793 }
794
795
clblasCopyVectorAsync(size_t nb_elem,size_t element_size,const cl_mem A,size_t offA,cl_mem B,size_t offB,cl_command_queue command_queue,cl_uint numEventsInWaitList,const cl_event * eventWaitList,cl_event * events)796 clblasStatus clblasCopyVectorAsync(
797 size_t nb_elem,
798 size_t element_size,
799 const cl_mem A, size_t offA,
800 cl_mem B, size_t offB,
801 cl_command_queue command_queue,
802 cl_uint numEventsInWaitList,
803 const cl_event *eventWaitList,
804 cl_event *events)
805 {
806 return clblasCopyMatrixAsync(
807 clblasColumnMajor,
808 nb_elem, 1,
809 element_size,
810 A, offA, nb_elem,
811 B, offB, nb_elem,
812 command_queue,
813 numEventsInWaitList,
814 eventWaitList,
815 events);
816 }
817
818
clblasWriteMatrix(clblasOrder order,size_t sx,size_t sy,size_t element_size,const void * A,size_t offA,size_t ldA,cl_mem B,size_t offB,size_t ldB,cl_command_queue command_queue,cl_uint numEventsInWaitList,const cl_event * eventWaitList)819 clblasStatus clblasWriteMatrix(
820 clblasOrder order,
821 size_t sx, size_t sy,
822 size_t element_size,
823 const void *A, size_t offA, size_t ldA,
824 cl_mem B, size_t offB, size_t ldB,
825 cl_command_queue command_queue,
826 cl_uint numEventsInWaitList,
827 const cl_event *eventWaitList)
828 {
829 return clblasWriteSubMatrix(
830 order,
831 element_size,
832 A, offA, ldA,
833 sx, sy,
834 0, 0,
835 B, offB, ldB,
836 sx, sy,
837 0, 0,
838 sx, sy,
839 command_queue,
840 numEventsInWaitList,
841 eventWaitList);
842 }
843
844
clblasWriteMatrixAsync(clblasOrder order,size_t sx,size_t sy,size_t element_size,const void * A,size_t offA,size_t ldA,cl_mem B,size_t offB,size_t ldB,cl_command_queue command_queue,cl_uint numEventsInWaitList,const cl_event * eventWaitList,cl_event * events)845 clblasStatus clblasWriteMatrixAsync(
846 clblasOrder order,
847 size_t sx, size_t sy,
848 size_t element_size,
849 const void *A, size_t offA, size_t ldA,
850 cl_mem B, size_t offB, size_t ldB,
851 cl_command_queue command_queue,
852 cl_uint numEventsInWaitList,
853 const cl_event *eventWaitList,
854 cl_event *events)
855 {
856 return clblasWriteSubMatrixAsync(
857 order,
858 element_size,
859 A, offA, ldA,
860 sx, sy,
861 0, 0,
862 B, offB, ldB,
863 sx, sy,
864 0, 0,
865 sx, sy,
866 command_queue,
867 numEventsInWaitList,
868 eventWaitList,
869 events);
870 }
871
872
clblasReadMatrix(clblasOrder order,size_t sx,size_t sy,size_t element_size,const cl_mem A,size_t offA,size_t ldA,void * B,size_t offB,size_t ldB,cl_command_queue command_queue,cl_uint numEventsInWaitList,const cl_event * eventWaitList)873 clblasStatus clblasReadMatrix(
874 clblasOrder order,
875 size_t sx, size_t sy,
876 size_t element_size,
877 const cl_mem A, size_t offA, size_t ldA,
878 void * B, size_t offB, size_t ldB,
879 cl_command_queue command_queue,
880 cl_uint numEventsInWaitList,
881 const cl_event *eventWaitList)
882 {
883 return clblasReadSubMatrix(
884 order,
885 element_size,
886 A, offA, ldA,
887 sx, sy,
888 0, 0,
889 B, offB, ldB,
890 sx, sy,
891 0, 0,
892 sx, sy,
893 command_queue,
894 numEventsInWaitList,
895 eventWaitList);
896 }
897
898
clblasReadMatrixAsync(clblasOrder order,size_t sx,size_t sy,size_t element_size,const cl_mem A,size_t offA,size_t ldA,void * B,size_t offB,size_t ldB,cl_command_queue command_queue,cl_uint numEventsInWaitList,const cl_event * eventWaitList,cl_event * events)899 clblasStatus clblasReadMatrixAsync(
900 clblasOrder order,
901 size_t sx, size_t sy,
902 size_t element_size,
903 const cl_mem A, size_t offA, size_t ldA,
904 void * B, size_t offB, size_t ldB,
905 cl_command_queue command_queue,
906 cl_uint numEventsInWaitList,
907 const cl_event *eventWaitList,
908 cl_event *events)
909 {
910 return clblasReadSubMatrixAsync(
911 order,
912 element_size,
913 A, offA, ldA,
914 sx, sy,
915 0, 0,
916 B, offB, ldB,
917 sx, sy,
918 0, 0,
919 sx, sy,
920 command_queue,
921 numEventsInWaitList,
922 eventWaitList,
923 events);
924 }
925
926
clblasCopyMatrix(clblasOrder order,size_t sx,size_t sy,size_t element_size,const cl_mem A,size_t offA,size_t ldA,cl_mem B,size_t offB,size_t ldB,cl_command_queue command_queue,cl_uint numEventsInWaitList,const cl_event * eventWaitList)927 clblasStatus clblasCopyMatrix(
928 clblasOrder order,
929 size_t sx, size_t sy,
930 size_t element_size,
931 const cl_mem A, size_t offA, size_t ldA,
932 cl_mem B, size_t offB, size_t ldB,
933 cl_command_queue command_queue,
934 cl_uint numEventsInWaitList,
935 const cl_event *eventWaitList)
936 {
937 return clblasCopySubMatrix(
938 order,
939 element_size,
940 A, offA, ldA,
941 sx, sy,
942 0, 0,
943 B, offB, ldB,
944 sx, sy,
945 0, 0,
946 sx, sy,
947 command_queue,
948 numEventsInWaitList,
949 eventWaitList);
950 }
951
952
clblasCopyMatrixAsync(clblasOrder order,size_t sx,size_t sy,size_t element_size,const cl_mem A,size_t offA,size_t ldA,cl_mem B,size_t offB,size_t ldB,cl_command_queue command_queue,cl_uint numEventsInWaitList,const cl_event * eventWaitList,cl_event * events)953 clblasStatus clblasCopyMatrixAsync(
954 clblasOrder order,
955 size_t sx, size_t sy,
956 size_t element_size,
957 const cl_mem A, size_t offA, size_t ldA,
958 cl_mem B, size_t offB, size_t ldB,
959 cl_command_queue command_queue,
960 cl_uint numEventsInWaitList,
961 const cl_event *eventWaitList,
962 cl_event *events)
963 {
964 return clblasCopySubMatrixAsync(
965 order,
966 element_size,
967 A, offA, ldA,
968 sx, sy,
969 0, 0,
970 B, offB, ldB,
971 sx, sy,
972 0, 0,
973 sx, sy,
974 command_queue,
975 numEventsInWaitList,
976 eventWaitList,
977 events);
978 }
979
980