1 /* ************************************************************************
2  * Copyright 2013 Advanced Micro Devices, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  * ************************************************************************/
16 
17 
18 
19 #ifndef CLBLAS_H_
20 #define CLBLAS_H_
21 
22 /**
23  * @mainpage OpenCL BLAS
24  *
25  * This is an implementation of
26  * <A HREF="http://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms">
27  * Basic Linear Algebra Subprograms</A>, levels 1, 2 and 3 using
28  * <A HREF="http://www.khronos.org/opencl/">OpenCL</A> and optimized for
29  * the AMD GPU hardware.
30  */
31 
32 #if defined(__APPLE__) || defined(__MACOSX)
33 #include <OpenCL/cl.h>
34 #else
35 #include <CL/cl.h>
36 #endif
37 
38 #include <clBLAS-complex.h>
39 
40 #ifdef __cplusplus
41 extern "C" {
42 #endif
43 
44 /**
45  * @defgroup OVERVIEW Overview
46  *
47  * This library provides an implementation of the Basic Linear Algebra Subprograms levels 1, 2 and 3,
48  * using OpenCL and optimized for AMD GPU hardware. It provides BLAS-1 functions
49  * SWAP, SCAL, COPY, AXPY, DOT, DOTU, DOTC, ROTG, ROTMG, ROT, ROTM, iAMAX, ASUM and NRM2,
50  * BLAS-2 functions GEMV, SYMV, TRMV, TRSV, HEMV, SYR, SYR2, HER, HER2, GER, GERU, GERC,
51  * TPMV, SPMV, HPMV, TPSV, SPR, SPR2, HPR, HPR2, GBMV, TBMV, SBMV, HBMV and TBSV
52  * and BLAS-3 functions GEMM, SYMM, TRMM, TRSM, HEMM, HERK, HER2K, SYRK and SYR2K.
53  *
54  * This library’s primary goal is to assist the end user to enqueue OpenCL
55  * kernels to process BLAS functions in an OpenCL-efficient manner, while
56  * keeping interfaces familiar to users who know how to use BLAS. All
57  * functions accept matrices through buffer objects.
58  *
59  * This library is entirely thread-safe with the exception of the following API :
60  * clblasSetup and clblasTeardown.
61  * Developers using the library can safely using any blas routine from different thread.
62  *
63  * @section deprecated
64  * This library provided support for the creation of scratch images to achieve better performance
65  * on older <a href="http://developer.amd.com/gpu/AMDAPPSDK/Pages/default.aspx">AMD APP SDK's</a>.
66  * However, memory buffers now give the same performance as buffers objects in the current SDK's.
67  * Scratch image buffers are being deprecated and users are advised not to use scratch images in
68  * new applications.
69  */
70 
71 /**
72  * @defgroup TYPES clblas types
73  */
74 /*@{*/
75 
76 
77 /** Shows how matrices are placed in memory. */
78 typedef enum clblasOrder_ {
79     clblasRowMajor,           /**< Every row is placed sequentially */
80     clblasColumnMajor         /**< Every column is placed sequentially */
81 } clblasOrder;
82 
83 /** Used to specify whether the matrix is to be transposed or not. */
84 typedef enum clblasTranspose_ {
85     clblasNoTrans,           /**< Operate with the matrix. */
86     clblasTrans,             /**< Operate with the transpose of the matrix. */
87     clblasConjTrans          /**< Operate with the conjugate transpose of
88                                      the matrix. */
89 } clblasTranspose;
90 
91 /** Used by the Hermitian, symmetric and triangular matrix
92  * routines to specify whether the upper or lower triangle is being referenced.
93  */
94 typedef enum clblasUplo_ {
95     clblasUpper,               /**< Upper triangle. */
96     clblasLower                /**< Lower triangle. */
97 } clblasUplo;
98 
99 /** It is used by the triangular matrix routines to specify whether the
100  * matrix is unit triangular.
101  */
102 typedef enum clblasDiag_ {
103     clblasUnit,               /**< Unit triangular. */
104     clblasNonUnit             /**< Non-unit triangular. */
105 } clblasDiag;
106 
107 /** Indicates the side matrix A is located relative to matrix B during multiplication. */
108 typedef enum clblasSide_ {
109     clblasLeft,        /**< Multiply general matrix by symmetric,
110                                Hermitian or triangular matrix on the left. */
111     clblasRight        /**< Multiply general matrix by symmetric,
112                                Hermitian or triangular matrix on the right. */
113 } clblasSide;
114 
115 /**
116  *   @brief clblas error codes definition, incorporating OpenCL error
117  *   definitions.
118  *
119  *   This enumeration is a subset of the OpenCL error codes extended with some
120  *   additional extra codes.  For example, CL_OUT_OF_HOST_MEMORY, which is
121  *   defined in cl.h is aliased as clblasOutOfHostMemory.
122  */
123 typedef enum clblasStatus_ {
124     clblasSuccess                         = CL_SUCCESS,
125     clblasInvalidValue                    = CL_INVALID_VALUE,
126     clblasInvalidCommandQueue             = CL_INVALID_COMMAND_QUEUE,
127     clblasInvalidContext                  = CL_INVALID_CONTEXT,
128     clblasInvalidMemObject                = CL_INVALID_MEM_OBJECT,
129     clblasInvalidDevice                   = CL_INVALID_DEVICE,
130     clblasInvalidEventWaitList            = CL_INVALID_EVENT_WAIT_LIST,
131     clblasOutOfResources                  = CL_OUT_OF_RESOURCES,
132     clblasOutOfHostMemory                 = CL_OUT_OF_HOST_MEMORY,
133     clblasInvalidOperation                = CL_INVALID_OPERATION,
134     clblasCompilerNotAvailable            = CL_COMPILER_NOT_AVAILABLE,
135     clblasBuildProgramFailure             = CL_BUILD_PROGRAM_FAILURE,
136     /* Extended error codes */
137     clblasNotImplemented         = -1024, /**< Functionality is not implemented */
138     clblasNotInitialized,                 /**< clblas library is not initialized yet */
139     clblasInvalidMatA,                    /**< Matrix A is not a valid memory object */
140     clblasInvalidMatB,                    /**< Matrix B is not a valid memory object */
141     clblasInvalidMatC,                    /**< Matrix C is not a valid memory object */
142     clblasInvalidVecX,                    /**< Vector X is not a valid memory object */
143     clblasInvalidVecY,                    /**< Vector Y is not a valid memory object */
144     clblasInvalidDim,                     /**< An input dimension (M,N,K) is invalid */
145     clblasInvalidLeadDimA,                /**< Leading dimension A must not be less than the size of the first dimension */
146     clblasInvalidLeadDimB,                /**< Leading dimension B must not be less than the size of the second dimension */
147     clblasInvalidLeadDimC,                /**< Leading dimension C must not be less than the size of the third dimension */
148     clblasInvalidIncX,                    /**< The increment for a vector X must not be 0 */
149     clblasInvalidIncY,                    /**< The increment for a vector Y must not be 0 */
150     clblasInsufficientMemMatA,            /**< The memory object for Matrix A is too small */
151     clblasInsufficientMemMatB,            /**< The memory object for Matrix B is too small */
152     clblasInsufficientMemMatC,            /**< The memory object for Matrix C is too small */
153     clblasInsufficientMemVecX,            /**< The memory object for Vector X is too small */
154     clblasInsufficientMemVecY             /**< The memory object for Vector Y is too small */
155 } clblasStatus;
156 
157 
158 /*@}*/
159 
160 /**
161  * @defgroup VERSION Version information
162  */
163 /*@{*/
164 
165 /**
166  * @brief Get the clblas library version info.
167  *
168  * @param[out] major        Location to store library's major version.
169  * @param[out] minor        Location to store library's minor version.
170  * @param[out] patch        Location to store library's patch version.
171  *
172  * @returns always \b clblasSuccess.
173  *
174  * @ingroup VERSION
175  */
176 clblasStatus
177 clblasGetVersion(cl_uint* major, cl_uint* minor, cl_uint* patch);
178 
179 /*@}*/
180 
181 /**
182  * @defgroup INIT Initialize library
183  */
184 /*@{*/
185 
186 /**
187  * @brief Initialize the clblas library.
188  *
189  * Must be called before any other clblas API function is invoked.
190  * @note This function is not thread-safe.
191  *
192  * @return
193  *   - \b clblasSucces on success;
194  *   - \b clblasOutOfHostMemory if there is not enough of memory to allocate
195  *     library's internal structures;
196  *   - \b clblasOutOfResources in case of requested resources scarcity.
197  *
198  * @ingroup INIT
199  */
200 clblasStatus
201 clblasSetup(void);
202 
203 /**
204  * @brief Finalize the usage of the clblas library.
205  *
206  * Frees all memory allocated for different computational kernel and other
207  * internal data.
208  * @note This function is not thread-safe.
209  *
210  * @ingroup INIT
211  */
212 void
213 clblasTeardown(void);
214 
215 /*@}*/
216 
217 /**
218  * @defgroup BLAS1 BLAS-1 functions
219  *
220  * The Level 1 Basic Linear Algebra Subprograms are functions that perform
221  * vector-vector operations.
222  */
223 /*@{*/
224 /*@}*/
225 
226 /**
227  * @defgroup SWAP SWAP  - Swap elements from 2 vectors
228  * @ingroup BLAS1
229  */
230 /*@{*/
231 
232 /**
233  * @brief interchanges two vectors of float.
234  *
235  *
236  * @param[in] N         Number of elements in vector \b X.
237  * @param[out] X        Buffer object storing vector \b X.
238  * @param[in] offx      Offset of first element of vector \b X in buffer object.
239  *                      Counted in elements.
240  * @param[in] incx      Increment for the elements of \b X. Must not be zero.
241  * @param[out] Y        Buffer object storing the vector \b Y.
242  * @param[in] offy      Offset of first element of vector \b Y in buffer object.
243  *                      Counted in elements.
244  * @param[in] incy      Increment for the elements of \b Y. Must not be zero.
245  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
246  *                                task is to be performed.
247  * @param[in] commandQueues       OpenCL command queues.
248  * @param[in] numEventsInWaitList Number of events in the event wait list.
249  * @param[in] eventWaitList       Event wait list.
250  * @param[in] events     Event objects per each command queue that identify
251  *                       a particular kernel execution instance.
252  *
253  * @return
254  *   - \b clblasSuccess on success;
255  *   - \b clblasNotInitialized if clblasSetup() was not called;
256  *   - \b clblasInvalidValue if invalid parameters are passed:
257  *     - \b N is zero, or
258  *     - either \b incx or \b incy is zero, or
259  *     - the vector sizes along with the increments lead to
260  *       accessing outside of any of the buffers;
261  *   - \b clblasInvalidMemObject if either \b X, or \b Y object is
262  *     Invalid, or an image object rather than the buffer one;
263  *   - \b clblasOutOfHostMemory if the library can't allocate memory for
264  *     internal structures;
265  *   - \b clblasInvalidCommandQueue if the passed command queue is invalid;
266  *   - \b clblasInvalidContext if a context a passed command queue belongs
267  *     to was released;
268  *   - \b clblasInvalidOperation if kernel compilation relating to a previous
269  *     call has not completed for any of the target devices;
270  *   - \b clblasCompilerNotAvailable if a compiler is not available;
271  *   - \b clblasBuildProgramFailure if there is a failure to build a program
272  *     executable.
273  *
274  * @ingroup SWAP
275  */
276 clblasStatus
277 clblasSswap(
278     size_t N,
279     cl_mem X,
280     size_t offx,
281     int incx,
282     cl_mem Y,
283     size_t offy,
284     int incy,
285     cl_uint numCommandQueues,
286     cl_command_queue *commandQueues,
287     cl_uint numEventsInWaitList,
288     const cl_event *eventWaitList,
289     cl_event *events);
290 
291 /**
292  * @example example_sswap.c
293  * Example of how to use the @ref clblasSswap function.
294  */
295 
296  /**
297  * @brief interchanges two vectors of double.
298  *
299  *
300  * @param[in] N         Number of elements in vector \b X.
301  * @param[out] X        Buffer object storing vector \b X.
302  * @param[in] offx      Offset of first element of vector \b X in buffer object.
303  *                      Counted in elements.
304  * @param[in] incx      Increment for the elements of \b X. Must not be zero.
305  * @param[out] Y        Buffer object storing the vector \b Y.
306  * @param[in] offy      Offset of first element of vector \b Y in buffer object.
307  *                      Counted in elements.
308  * @param[in] incy      Increment for the elements of \b Y. Must not be zero.
309  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
310  *                                task is to be performed.
311  * @param[in] commandQueues       OpenCL command queues.
312  * @param[in] numEventsInWaitList Number of events in the event wait list.
313  * @param[in] eventWaitList       Event wait list.
314  * @param[in] events     Event objects per each command queue that identify
315  *                       a particular kernel execution instance.
316  *
317  * @return
318  *   - \b clblasSuccess on success;
319  *   - \b clblasInvalidDevice if a target device does not support the
320  *     floating point arithmetic with double precision;
321  *   - the same error codes as the clblasSswap() function otherwise.
322  *
323  * @ingroup SWAP
324  */
325 clblasStatus
326 clblasDswap(
327     size_t N,
328     cl_mem X,
329     size_t offx,
330     int incx,
331     cl_mem Y,
332     size_t offy,
333     int incy,
334     cl_uint numCommandQueues,
335     cl_command_queue *commandQueues,
336     cl_uint numEventsInWaitList,
337     const cl_event *eventWaitList,
338     cl_event *events);
339 
340 /**
341  * @brief interchanges two vectors of complex-float elements.
342  *
343  *
344  * @param[in] N         Number of elements in vector \b X.
345  * @param[out] X        Buffer object storing vector \b X.
346  * @param[in] offx      Offset of first element of vector \b X in buffer object.
347  *                      Counted in elements.
348  * @param[in] incx      Increment for the elements of \b X. Must not be zero.
349  * @param[out] Y        Buffer object storing the vector \b Y.
350  * @param[in] offy      Offset of first element of vector \b Y in buffer object.
351  *                      Counted in elements.
352  * @param[in] incy      Increment for the elements of \b Y. Must not be zero.
353  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
354  *                                task is to be performed.
355  * @param[in] commandQueues       OpenCL command queues.
356  * @param[in] numEventsInWaitList Number of events in the event wait list.
357  * @param[in] eventWaitList       Event wait list.
358  * @param[in] events     Event objects per each command queue that identify
359  *                       a particular kernel execution instance.
360  *
361  * @return
362  *   - \b clblasSuccess on success;
363  *   - the same error codes as the clblasSwap() function otherwise.
364  *
365  * @ingroup SWAP
366  */
367 clblasStatus
368 clblasCswap(
369     size_t N,
370     cl_mem X,
371     size_t offx,
372     int incx,
373     cl_mem Y,
374     size_t offy,
375     int incy,
376     cl_uint numCommandQueues,
377     cl_command_queue *commandQueues,
378     cl_uint numEventsInWaitList,
379     const cl_event *eventWaitList,
380     cl_event *events);
381 
382 /**
383  * @brief interchanges two vectors of double-complex elements.
384  *
385  *
386  * @param[in] N         Number of elements in vector \b X.
387  * @param[out] X        Buffer object storing vector \b X.
388  * @param[in] offx      Offset of first element of vector \b X in buffer object.
389  *                      Counted in elements.
390  * @param[in] incx      Increment for the elements of \b X. Must not be zero.
391  * @param[out] Y        Buffer object storing the vector \b Y.
392  * @param[in] offy      Offset of first element of vector \b Y in buffer object.
393  *                      Counted in elements.
394  * @param[in] incy      Increment for the elements of \b Y. Must not be zero.
395  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
396  *                                task is to be performed.
397  * @param[in] commandQueues       OpenCL command queues.
398  * @param[in] numEventsInWaitList Number of events in the event wait list.
399  * @param[in] eventWaitList       Event wait list.
400  * @param[in] events     Event objects per each command queue that identify
401  *                       a particular kernel execution instance.
402  *
403  * @return
404  *   - \b clblasSuccess on success;
405  *   - the same error codes as the clblasDwap() function otherwise.
406  *
407  * @ingroup SWAP
408  */
409 clblasStatus
410 clblasZswap(
411     size_t N,
412     cl_mem X,
413     size_t offx,
414     int incx,
415     cl_mem Y,
416     size_t offy,
417     int incy,
418     cl_uint numCommandQueues,
419     cl_command_queue *commandQueues,
420     cl_uint numEventsInWaitList,
421     const cl_event *eventWaitList,
422     cl_event *events);
423 
424 /*@}*/
425 
426 
427 /**
428  * @defgroup SCAL SCAL  - Scales a vector by a constant
429  * @ingroup BLAS1
430  */
431 /*@{*/
432 
433 /**
434  * @brief Scales a float vector by a float constant
435  *
436  *   - \f$ X \leftarrow \alpha X \f$
437  *
438  * @param[in] N         Number of elements in vector \b X.
439  * @param[in] alpha     The constant factor for vector \b X.
440  * @param[out] X        Buffer object storing vector \b X.
441  * @param[in] offx      Offset of first element of vector \b X in buffer object.
442  *                      Counted in elements.
443  * @param[in] incx      Increment for the elements of \b X. Must not be zero.
444  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
445  *                                task is to be performed.
446  * @param[in] commandQueues       OpenCL command queues.
447  * @param[in] numEventsInWaitList Number of events in the event wait list.
448  * @param[in] eventWaitList       Event wait list.
449  * @param[in] events     Event objects per each command queue that identify
450  *                       a particular kernel execution instance.
451  *
452  * @return
453  *   - \b clblasSuccess on success;
454  *   - \b clblasNotInitialized if clblasSetup() was not called;
455  *   - \b clblasInvalidValue if invalid parameters are passed:
456  *     - \b N is zero, or
457  *     - \b incx zero, or
458  *     - the vector sizes along with the increments lead to
459  *       accessing outside of any of the buffers;
460  *   - \b clblasInvalidMemObject if either \b X, or \b Y object is
461  *     Invalid, or an image object rather than the buffer one;
462  *   - \b clblasOutOfHostMemory if the library can't allocate memory for
463  *     internal structures;
464  *   - \b clblasInvalidCommandQueue if the passed command queue is invalid;
465  *   - \b clblasInvalidContext if a context a passed command queue belongs
466  *     to was released;
467  *   - \b clblasInvalidOperation if kernel compilation relating to a previous
468  *     call has not completed for any of the target devices;
469  *   - \b clblasCompilerNotAvailable if a compiler is not available;
470  *   - \b clblasBuildProgramFailure if there is a failure to build a program
471  *     executable.
472  *
473  * @ingroup SCAL
474  */
475 clblasStatus
476 clblasSscal(
477     size_t N,
478     cl_float alpha,
479     cl_mem X,
480     size_t offx,
481     int incx,
482     cl_uint numCommandQueues,
483     cl_command_queue *commandQueues,
484     cl_uint numEventsInWaitList,
485     const cl_event *eventWaitList,
486     cl_event *events);
487 /**
488  * @example example_sscal.c
489  * Example of how to use the @ref clblasSscal function.
490  */
491 
492  /**
493  * @brief Scales a double vector by a double constant
494  *
495  *   - \f$ X \leftarrow \alpha X \f$
496  *
497  * @param[in] N         Number of elements in vector \b X.
498  * @param[in] alpha     The constant factor for vector \b X.
499  * @param[out] X        Buffer object storing vector \b X.
500  * @param[in] offx      Offset of first element of vector \b X in buffer object.
501  *                      Counted in elements.
502  * @param[in] incx      Increment for the elements of \b X. Must not be zero.
503  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
504  *                                task is to be performed.
505  * @param[in] commandQueues       OpenCL command queues.
506  * @param[in] numEventsInWaitList Number of events in the event wait list.
507  * @param[in] eventWaitList       Event wait list.
508  * @param[in] events     Event objects per each command queue that identify
509  *                       a particular kernel execution instance.
510  *
511  * @return
512  *   - \b clblasSuccess on success;
513  *   - \b clblasInvalidDevice if a target device does not support the
514  *     floating point arithmetic with double precision;
515  *   - the same error codes as the clblasSscal() function otherwise.
516  *
517  * @ingroup SCAL
518  */
519 clblasStatus
520 clblasDscal(
521     size_t N,
522     cl_double alpha,
523     cl_mem X,
524     size_t offx,
525     int incx,
526     cl_uint numCommandQueues,
527     cl_command_queue *commandQueues,
528     cl_uint numEventsInWaitList,
529     const cl_event *eventWaitList,
530     cl_event *events);
531 
532 /**
533  * @brief Scales a complex-float vector by a complex-float constant
534  *
535   *   - \f$ X \leftarrow \alpha X \f$
536  *
537  * @param[in] N         Number of elements in vector \b X.
538  * @param[in] alpha     The constant factor for vector \b X.
539  * @param[out] X        Buffer object storing vector \b X.
540  * @param[in] offx      Offset of first element of vector \b X in buffer object.
541  *                      Counted in elements.
542  * @param[in] incx      Increment for the elements of \b X. Must not be zero.
543  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
544  *                                task is to be performed.
545  * @param[in] commandQueues       OpenCL command queues.
546  * @param[in] numEventsInWaitList Number of events in the event wait list.
547  * @param[in] eventWaitList       Event wait list.
548  * @param[in] events     Event objects per each command queue that identify
549  *                       a particular kernel execution instance.
550  *
551  * @return
552  *   - \b clblasSuccess on success;
553  *   - the same error codes as the clblasSscal() function otherwise.
554  *
555  * @ingroup SCAL
556  */
557 clblasStatus
558 clblasCscal(
559     size_t N,
560     cl_float2 alpha,
561     cl_mem X,
562     size_t offx,
563     int incx,
564     cl_uint numCommandQueues,
565     cl_command_queue *commandQueues,
566     cl_uint numEventsInWaitList,
567     const cl_event *eventWaitList,
568     cl_event *events);
569 
570 /**
571  * @brief Scales a complex-double vector by a complex-double constant
572  *
573  *   - \f$ X \leftarrow \alpha X \f$
574  *
575  * @param[in] N         Number of elements in vector \b X.
576  * @param[in] alpha     The constant factor for vector \b X.
577  * @param[out] X        Buffer object storing vector \b X.
578  * @param[in] offx      Offset of first element of vector \b X in buffer object.
579  *                      Counted in elements.
580  * @param[in] incx      Increment for the elements of \b X. Must not be zero.
581  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
582  *                                task is to be performed.
583  * @param[in] commandQueues       OpenCL command queues.
584  * @param[in] numEventsInWaitList Number of events in the event wait list.
585  * @param[in] eventWaitList       Event wait list.
586  * @param[in] events     Event objects per each command queue that identify
587  *                       a particular kernel execution instance.
588  *
589  * @return
590  *   - \b clblasSuccess on success;
591  *   - the same error codes as the clblasDscal() function otherwise.
592  *
593  * @ingroup SCAL
594  */
595 clblasStatus
596 clblasZscal(
597     size_t N,
598     cl_double2 alpha,
599     cl_mem X,
600     size_t offx,
601     int incx,
602     cl_uint numCommandQueues,
603     cl_command_queue *commandQueues,
604     cl_uint numEventsInWaitList,
605     const cl_event *eventWaitList,
606     cl_event *events);
607 
608 /*@}*/
609 
610 /**
611  * @defgroup SSCAL SSCAL  - Scales a complex vector by a real constant
612  * @ingroup BLAS1
613  */
614 /*@{*/
615 
616 /**
617  * @brief Scales a complex-float vector by a float constant
618  *
619  *   - \f$ X \leftarrow \alpha X \f$
620  *
621  * @param[in] N         Number of elements in vector \b X.
622  * @param[in] alpha     The constant factor for vector \b X.
623  * @param[out] X        Buffer object storing vector \b X.
624  * @param[in] offx      Offset of first element of vector \b X in buffer object.
625  *                      Counted in elements.
626  * @param[in] incx      Increment for the elements of \b X. Must not be zero.
627  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
628  *                                task is to be performed.
629  * @param[in] commandQueues       OpenCL command queues.
630  * @param[in] numEventsInWaitList Number of events in the event wait list.
631  * @param[in] eventWaitList       Event wait list.
632  * @param[in] events     Event objects per each command queue that identify
633  *                       a particular kernel execution instance.
634  *
635  * @return
636  *   - \b clblasSuccess on success;
637  *   - \b clblasNotInitialized if clblasSetup() was not called;
638  *   - \b clblasInvalidValue if invalid parameters are passed:
639  *     - \b N is zero, or
640  *     - \b incx zero, or
641  *     - the vector sizes along with the increments lead to
642  *       accessing outside of any of the buffers;
643  *   - \b clblasInvalidMemObject if either \b X, or \b Y object is
644  *     Invalid, or an image object rather than the buffer one;
645  *   - \b clblasOutOfHostMemory if the library can't allocate memory for
646  *     internal structures;
647  *   - \b clblasInvalidCommandQueue if the passed command queue is invalid;
648  *   - \b clblasInvalidContext if a context a passed command queue belongs
649  *     to was released;
650  *   - \b clblasInvalidOperation if kernel compilation relating to a previous
651  *     call has not completed for any of the target devices;
652  *   - \b clblasCompilerNotAvailable if a compiler is not available;
653  *   - \b clblasBuildProgramFailure if there is a failure to build a program
654  *     executable.
655  *
656  * @ingroup SSCAL
657  */
658 clblasStatus
659 clblasCsscal(
660     size_t N,
661     cl_float alpha,
662     cl_mem X,
663     size_t offx,
664     int incx,
665     cl_uint numCommandQueues,
666     cl_command_queue *commandQueues,
667     cl_uint numEventsInWaitList,
668     const cl_event *eventWaitList,
669     cl_event *events);
670 /**
671  * @example example_csscal.c
672  * Example of how to use the @ref clblasCsscal function.
673  */
674 
675 /**
676  * @brief Scales a complex-double vector by a double constant
677  *
678  *   - \f$ X \leftarrow \alpha X \f$
679  *
680  * @param[in] N         Number of elements in vector \b X.
681  * @param[in] alpha     The constant factor for vector \b X.
682  * @param[out] X        Buffer object storing vector \b X.
683  * @param[in] offx      Offset of first element of vector \b X in buffer object.
684  *                      Counted in elements.
685  * @param[in] incx      Increment for the elements of \b X. Must not be zero.
686  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
687  *                                task is to be performed.
688  * @param[in] commandQueues       OpenCL command queues.
689  * @param[in] numEventsInWaitList Number of events in the event wait list.
690  * @param[in] eventWaitList       Event wait list.
691  * @param[in] events     Event objects per each command queue that identify
692  *                       a particular kernel execution instance.
693  *
694  * @return
695  *   - \b clblasSuccess on success;
696  *   - \b clblasInvalidDevice if a target device does not support the
697  *     floating point arithmetic with double precision;
698  *   - the same error codes as the clblasCsscal() function otherwise.
699  *
700  * @ingroup SSCAL
701  */
702 clblasStatus
703 clblasZdscal(
704     size_t N,
705     cl_double alpha,
706     cl_mem X,
707     size_t offx,
708     int incx,
709     cl_uint numCommandQueues,
710     cl_command_queue *commandQueues,
711     cl_uint numEventsInWaitList,
712     const cl_event *eventWaitList,
713     cl_event *events);
714 
715  /*@}*/
716 
717 
718 /**
719  * @defgroup COPY COPY  - Copies elements from vector X to vector Y
720  * @ingroup BLAS1
721  */
722 /*@{*/
723 
724 /**
725  * @brief Copies float elements from vector X to vector Y
726  *
727  *   - \f$ Y \leftarrow X \f$
728  *
729  * @param[in] N         Number of elements in vector \b X.
730  * @param[in] X         Buffer object storing vector \b X.
731  * @param[in] offx      Offset of first element of vector \b X in buffer object.
732  *                      Counted in elements.
733  * @param[in] incx      Increment for the elements of \b X. Must not be zero.
734  * @param[out] Y        Buffer object storing the vector \b Y.
735  * @param[in] offy      Offset of first element of vector \b Y in buffer object.
736  *                      Counted in elements.
737  * @param[in] incy      Increment for the elements of \b Y. Must not be zero.
738  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
739  *                                task is to be performed.
740  * @param[in] commandQueues       OpenCL command queues.
741  * @param[in] numEventsInWaitList Number of events in the event wait list.
742  * @param[in] eventWaitList       Event wait list.
743  * @param[in] events     Event objects per each command queue that identify
744  *                       a particular kernel execution instance.
745  *
746  * @return
747  *   - \b clblasSuccess on success;
748  *   - \b clblasNotInitialized if clblasSetup() was not called;
749  *   - \b clblasInvalidValue if invalid parameters are passed:
750  *     - \b N is zero, or
751  *     - either \b incx or \b incy is zero, or
752  *     - the vector sizes along with the increments lead to
753  *       accessing outside of any of the buffers;
754  *   - \b clblasInvalidMemObject if either \b X, or \b Y object is
755  *     Invalid, or an image object rather than the buffer one;
756  *   - \b clblasOutOfHostMemory if the library can't allocate memory for
757  *     internal structures;
758  *   - \b clblasInvalidCommandQueue if the passed command queue is invalid;
759  *   - \b clblasInvalidContext if a context a passed command queue belongs
760  *     to was released;
761  *   - \b clblasInvalidOperation if kernel compilation relating to a previous
762  *     call has not completed for any of the target devices;
763  *   - \b clblasCompilerNotAvailable if a compiler is not available;
764  *   - \b clblasBuildProgramFailure if there is a failure to build a program
765  *     executable.
766  *
767  * @ingroup COPY
768  */
769 clblasStatus
770 clblasScopy(
771     size_t N,
772     const cl_mem X,
773     size_t offx,
774     int incx,
775     cl_mem Y,
776     size_t offy,
777     int incy,
778     cl_uint numCommandQueues,
779     cl_command_queue *commandQueues,
780     cl_uint numEventsInWaitList,
781     const cl_event *eventWaitList,
782     cl_event *events);
783 
784 /**
785  * @example example_scopy.c
786  * Example of how to use the @ref clblasScopy function.
787  */
788 
789  /**
790  * @brief Copies double elements from vector X to vector Y
791  *
792  *   - \f$ Y \leftarrow X \f$
793  *
794  * @param[in] N         Number of elements in vector \b X.
795  * @param[in] X         Buffer object storing vector \b X.
796  * @param[in] offx      Offset of first element of vector \b X in buffer object.
797  *                      Counted in elements.
798  * @param[in] incx      Increment for the elements of \b X. Must not be zero.
799  * @param[out] Y        Buffer object storing the vector \b Y.
800  * @param[in] offy      Offset of first element of vector \b Y in buffer object.
801  *                      Counted in elements.
802  * @param[in] incy      Increment for the elements of \b Y. Must not be zero.
803  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
804  *                                task is to be performed.
805  * @param[in] commandQueues       OpenCL command queues.
806  * @param[in] numEventsInWaitList Number of events in the event wait list.
807  * @param[in] eventWaitList       Event wait list.
808  * @param[in] events     Event objects per each command queue that identify
809  *                       a particular kernel execution instance.
810  *
811  * @return
812  *   - \b clblasSuccess on success;
813  *   - \b clblasInvalidDevice if a target device does not support the
814  *     floating point arithmetic with double precision;
815  *   - the same error codes as the clblasScopy() function otherwise.
816  *
817  * @ingroup COPY
818  */
819 clblasStatus
820 clblasDcopy(
821     size_t N,
822     const cl_mem X,
823     size_t offx,
824     int incx,
825     cl_mem Y,
826     size_t offy,
827     int incy,
828     cl_uint numCommandQueues,
829     cl_command_queue *commandQueues,
830     cl_uint numEventsInWaitList,
831     const cl_event *eventWaitList,
832     cl_event *events);
833 
834 /**
835  * @brief Copies complex-float elements from vector X to vector Y
836  *
837  *   - \f$ Y \leftarrow X \f$
838  *
839  * @param[in] N         Number of elements in vector \b X.
840  * @param[in] X         Buffer object storing vector \b X.
841  * @param[in] offx      Offset of first element of vector \b X in buffer object.
842  *                      Counted in elements.
843  * @param[in] incx      Increment for the elements of \b X. Must not be zero.
844  * @param[out] Y        Buffer object storing the vector \b Y.
845  * @param[in] offy      Offset of first element of vector \b Y in buffer object.
846  *                      Counted in elements.
847  * @param[in] incy      Increment for the elements of \b Y. Must not be zero.
848  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
849  *                                task is to be performed.
850  * @param[in] commandQueues       OpenCL command queues.
851  * @param[in] numEventsInWaitList Number of events in the event wait list.
852  * @param[in] eventWaitList       Event wait list.
853  * @param[in] events     Event objects per each command queue that identify
854  *                       a particular kernel execution instance.
855  *
856  * @return
857  *   - \b clblasSuccess on success;
858  *   - the same error codes as the clblasScopy() function otherwise.
859  *
860  * @ingroup COPY
861  */
862 clblasStatus
863 clblasCcopy(
864     size_t N,
865     const cl_mem X,
866     size_t offx,
867     int incx,
868     cl_mem Y,
869     size_t offy,
870     int incy,
871     cl_uint numCommandQueues,
872     cl_command_queue *commandQueues,
873     cl_uint numEventsInWaitList,
874     const cl_event *eventWaitList,
875     cl_event *events);
876 
877 /**
878  * @brief Copies complex-double elements from vector X to vector Y
879  *
880  *   - \f$ Y \leftarrow X \f$
881  *
882  * @param[in] N         Number of elements in vector \b X.
883  * @param[in] X         Buffer object storing vector \b X.
884  * @param[in] offx      Offset of first element of vector \b X in buffer object.
885  *                      Counted in elements.
886  * @param[in] incx      Increment for the elements of \b X. Must not be zero.
887  * @param[out] Y        Buffer object storing the vector \b Y.
888  * @param[in] offy      Offset of first element of vector \b Y in buffer object.
889  *                      Counted in elements.
890  * @param[in] incy      Increment for the elements of \b Y. Must not be zero.
891  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
892  *                                task is to be performed.
893  * @param[in] commandQueues       OpenCL command queues.
894  * @param[in] numEventsInWaitList Number of events in the event wait list.
895  * @param[in] eventWaitList       Event wait list.
896  * @param[in] events     Event objects per each command queue that identify
897  *                       a particular kernel execution instance.
898  *
899  * @return
900  *   - \b clblasSuccess on success;
901  *   - the same error codes as the clblasDcopy() function otherwise.
902  *
903  * @ingroup COPY
904  */
905 clblasStatus
906 clblasZcopy(
907     size_t N,
908     const cl_mem X,
909     size_t offx,
910     int incx,
911     cl_mem Y,
912     size_t offy,
913     int incy,
914     cl_uint numCommandQueues,
915     cl_command_queue *commandQueues,
916     cl_uint numEventsInWaitList,
917     const cl_event *eventWaitList,
918     cl_event *events);
919 
920  /*@}*/
921 
922 /**
923  * @defgroup AXPY AXPY  - Scale X and add to Y
924  * @ingroup BLAS1
925  */
926 /*@{*/
927 
928 /**
929  * @brief Scale vector X of float elements and add to Y
930  *
931  *   - \f$ Y \leftarrow \alpha X + Y \f$
932  *
933  * @param[in] N         Number of elements in vector \b X.
934  * @param[in] alpha     The constant factor for vector \b X.
935  * @param[in] X         Buffer object storing vector \b X.
936  * @param[in] offx      Offset of first element of vector \b X in buffer object.
937  *                      Counted in elements.
938  * @param[in] incx      Increment for the elements of \b X. Must not be zero.
939  * @param[out] Y        Buffer object storing the vector \b Y.
940  * @param[in] offy      Offset of first element of vector \b Y in buffer object.
941  *                      Counted in elements.
942  * @param[in] incy      Increment for the elements of \b Y. Must not be zero.
943  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
944  *                                task is to be performed.
945  * @param[in] commandQueues       OpenCL command queues.
946  * @param[in] numEventsInWaitList Number of events in the event wait list.
947  * @param[in] eventWaitList       Event wait list.
948  * @param[in] events     Event objects per each command queue that identify
949  *                       a particular kernel execution instance.
950  *
951  * @return
952  *   - \b clblasSuccess on success;
953  *   - \b clblasNotInitialized if clblasSetup() was not called;
954  *   - \b clblasInvalidValue if invalid parameters are passed:
955  *     - \b N is zero, or
956  *     - either \b incx or \b incy is zero, or
957  *     - the vector sizes along with the increments lead to
958  *       accessing outside of any of the buffers;
959  *   - \b clblasInvalidMemObject if either \b X, or \b Y object is
960  *     Invalid, or an image object rather than the buffer one;
961  *   - \b clblasOutOfHostMemory if the library can't allocate memory for
962  *     internal structures;
963  *   - \b clblasInvalidCommandQueue if the passed command queue is invalid;
964  *   - \b clblasInvalidContext if a context a passed command queue belongs
965  *     to was released;
966  *   - \b clblasInvalidOperation if kernel compilation relating to a previous
967  *     call has not completed for any of the target devices;
968  *   - \b clblasCompilerNotAvailable if a compiler is not available;
969  *   - \b clblasBuildProgramFailure if there is a failure to build a program
970  *     executable.
971  *
972  * @ingroup AXPY
973  */
974 clblasStatus
975 clblasSaxpy(
976     size_t N,
977     cl_float alpha,
978     const cl_mem X,
979     size_t offx,
980     int incx,
981     cl_mem Y,
982     size_t offy,
983     int incy,
984     cl_uint numCommandQueues,
985     cl_command_queue *commandQueues,
986     cl_uint numEventsInWaitList,
987     const cl_event *eventWaitList,
988     cl_event *events);
989 
990 /**
991  * @example example_saxpy.c
992  * Example of how to use the @ref clblasSaxpy function.
993  */
994 
995 /**
996  * @brief Scale vector X of double elements and add to Y
997  *
998  *   - \f$ Y \leftarrow \alpha X + Y \f$
999  *
1000  * @param[in] N         Number of elements in vector \b X.
1001  * @param[in] alpha     The constant factor for vector \b X.
1002  * @param[in] X         Buffer object storing vector \b X.
1003  * @param[in] offx      Offset of first element of vector \b X in buffer object.
1004  *                      Counted in elements.
1005  * @param[in] incx      Increment for the elements of \b X. Must not be zero.
1006  * @param[out] Y        Buffer object storing the vector \b Y.
1007  * @param[in] offy      Offset of first element of vector \b Y in buffer object.
1008  *                      Counted in elements.
1009  * @param[in] incy      Increment for the elements of \b Y. Must not be zero.
1010  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
1011  *                                task is to be performed.
1012  * @param[in] commandQueues       OpenCL command queues.
1013  * @param[in] numEventsInWaitList Number of events in the event wait list.
1014  * @param[in] eventWaitList       Event wait list.
1015  * @param[in] events     Event objects per each command queue that identify
1016  *                       a particular kernel execution instance.
1017  *
1018  * @return
1019  *   - \b clblasSuccess on success;
1020  *   - \b clblasInvalidDevice if a target device does not support the
1021  *     floating point arithmetic with double precision;
1022  *   - the same error codes as the clblasSaxpy() function otherwise.
1023  *
1024  * @ingroup AXPY
1025  */
1026 clblasStatus
1027 clblasDaxpy(
1028     size_t N,
1029     cl_double alpha,
1030     const cl_mem X,
1031     size_t offx,
1032     int incx,
1033     cl_mem Y,
1034     size_t offy,
1035     int incy,
1036     cl_uint numCommandQueues,
1037     cl_command_queue *commandQueues,
1038     cl_uint numEventsInWaitList,
1039     const cl_event *eventWaitList,
1040     cl_event *events);
1041 
1042 /**
1043  * @brief Scale vector X of complex-float elements and add to Y
1044  *
1045  *   - \f$ Y \leftarrow \alpha X + Y \f$
1046  *
1047  * @param[in] N         Number of elements in vector \b X.
1048  * @param[in] alpha     The constant factor for vector \b X.
1049  * @param[in] X         Buffer object storing vector \b X.
1050  * @param[in] offx      Offset of first element of vector \b X in buffer object.
1051  *                      Counted in elements.
1052  * @param[in] incx      Increment for the elements of \b X. Must not be zero.
1053  * @param[out] Y        Buffer object storing the vector \b Y.
1054  * @param[in] offy      Offset of first element of vector \b Y in buffer object.
1055  *                      Counted in elements.
1056  * @param[in] incy      Increment for the elements of \b Y. Must not be zero.
1057  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
1058  *                                task is to be performed.
1059  * @param[in] commandQueues       OpenCL command queues.
1060  * @param[in] numEventsInWaitList Number of events in the event wait list.
1061  * @param[in] eventWaitList       Event wait list.
1062  * @param[in] events     Event objects per each command queue that identify
1063  *                       a particular kernel execution instance.
1064  *
1065  * @return
1066  *   - \b clblasSuccess on success;
1067  *   - the same error codes as the clblasSaxpy() function otherwise.
1068  *
1069  * @ingroup AXPY
1070  */
1071 clblasStatus
1072 clblasCaxpy(
1073     size_t N,
1074     cl_float2 alpha,
1075     const cl_mem X,
1076     size_t offx,
1077     int incx,
1078     cl_mem Y,
1079     size_t offy,
1080     int incy,
1081     cl_uint numCommandQueues,
1082     cl_command_queue *commandQueues,
1083     cl_uint numEventsInWaitList,
1084     const cl_event *eventWaitList,
1085     cl_event *events);
1086 
1087 /**
1088  * @brief Scale vector X of double-complex elements and add to Y
1089  *
1090  *   - \f$ Y \leftarrow \alpha X + Y \f$
1091  *
1092  * @param[in] N         Number of elements in vector \b X.
1093  * @param[in] alpha     The constant factor for vector \b X.
1094  * @param[in] X         Buffer object storing vector \b X.
1095  * @param[in] offx      Offset of first element of vector \b X in buffer object.
1096  *                      Counted in elements.
1097  * @param[in] incx      Increment for the elements of \b X. Must not be zero.
1098  * @param[out] Y        Buffer object storing the vector \b Y.
1099  * @param[in] offy      Offset of first element of vector \b Y in buffer object.
1100  *                      Counted in elements.
1101  * @param[in] incy      Increment for the elements of \b Y. Must not be zero.
1102  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
1103  *                                task is to be performed.
1104  * @param[in] commandQueues       OpenCL command queues.
1105  * @param[in] numEventsInWaitList Number of events in the event wait list.
1106  * @param[in] eventWaitList       Event wait list.
1107  * @param[in] events     Event objects per each command queue that identify
1108  *                       a particular kernel execution instance.
1109  *
1110  * @return
1111  *   - \b clblasSuccess on success;
1112  *   - the same error codes as the clblasDaxpy() function otherwise.
1113  *
1114  * @ingroup AXPY
1115  */
1116 clblasStatus
1117 clblasZaxpy(
1118     size_t N,
1119     cl_double2 alpha,
1120     const cl_mem X,
1121     size_t offx,
1122     int incx,
1123     cl_mem Y,
1124     size_t offy,
1125     int incy,
1126     cl_uint numCommandQueues,
1127     cl_command_queue *commandQueues,
1128     cl_uint numEventsInWaitList,
1129     const cl_event *eventWaitList,
1130     cl_event *events);
1131 
1132 /*@}*/
1133 
1134 
1135 /**
1136  * @defgroup DOT DOT  - Dot product of two vectors
1137  * @ingroup BLAS1
1138  */
1139 /*@{*/
1140 
1141 /**
1142  * @brief dot product of two vectors containing float elements
1143  *
1144  * @param[in] N             Number of elements in vector \b X.
1145  * @param[out] dotProduct   Buffer object that will contain the dot-product value
1146  * @param[in] offDP         Offset to dot-product in \b dotProduct buffer object.
1147  *                          Counted in elements.
1148  * @param[in] X             Buffer object storing vector \b X.
1149  * @param[in] offx          Offset of first element of vector \b X in buffer object.
1150  *                          Counted in elements.
1151  * @param[in] incx          Increment for the elements of \b X. Must not be zero.
1152  * @param[in] Y             Buffer object storing the vector \b Y.
1153  * @param[in] offy          Offset of first element of vector \b Y in buffer object.
1154  *                          Counted in elements.
1155  * @param[in] incy          Increment for the elements of \b Y. Must not be zero.
1156  * @param[in] scratchBuff	Temporary cl_mem scratch buffer object of minimum size N
1157  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
1158  *                                task is to be performed.
1159  * @param[in] commandQueues       OpenCL command queues.
1160  * @param[in] numEventsInWaitList Number of events in the event wait list.
1161  * @param[in] eventWaitList       Event wait list.
1162  * @param[in] events     Event objects per each command queue that identify
1163  *                       a particular kernel execution instance.
1164  *
1165  * @return
1166  *   - \b clblasSuccess on success;
1167  *   - \b clblasNotInitialized if clblasSetup() was not called;
1168  *   - \b clblasInvalidValue if invalid parameters are passed:
1169  *     - \b N is zero, or
1170  *     - either \b incx or \b incy is zero, or
1171  *     - the vector sizes along with the increments lead to
1172  *       accessing outside of any of the buffers;
1173  *   - \b clblasInvalidMemObject if either \b X, \b Y or \b dotProduct object is
1174  *     Invalid, or an image object rather than the buffer one;
1175  *   - \b clblasOutOfHostMemory if the library can't allocate memory for
1176  *     internal structures;
1177  *   - \b clblasInvalidCommandQueue if the passed command queue is invalid;
1178  *   - \b clblasInvalidContext if a context a passed command queue belongs
1179  *     to was released;
1180  *   - \b clblasInvalidOperation if kernel compilation relating to a previous
1181  *     call has not completed for any of the target devices;
1182  *   - \b clblasCompilerNotAvailable if a compiler is not available;
1183  *   - \b clblasBuildProgramFailure if there is a failure to build a program
1184  *     executable.
1185  *
1186  * @ingroup DOT
1187  */
1188 clblasStatus
1189 clblasSdot(
1190     size_t N,
1191     cl_mem dotProduct,
1192     size_t offDP,
1193     const cl_mem X,
1194     size_t offx,
1195     int incx,
1196     const cl_mem Y,
1197     size_t offy,
1198     int incy,
1199     cl_mem scratchBuff,
1200     cl_uint numCommandQueues,
1201     cl_command_queue *commandQueues,
1202     cl_uint numEventsInWaitList,
1203     const cl_event *eventWaitList,
1204     cl_event *events);
1205 
1206 /**
1207  * @example example_sdot.c
1208  * Example of how to use the @ref clblasSdot function.
1209  */
1210 
1211 /**
1212  * @brief dot product of two vectors containing double elements
1213  *
1214  * @param[in] N             Number of elements in vector \b X.
1215  * @param[out] dotProduct   Buffer object that will contain the dot-product value
1216  * @param[in] offDP         Offset to dot-product in \b dotProduct buffer object.
1217  *                          Counted in elements.
1218  * @param[in] X             Buffer object storing vector \b X.
1219  * @param[in] offx          Offset of first element of vector \b X in buffer object.
1220  *                          Counted in elements.
1221  * @param[in] incx          Increment for the elements of \b X. Must not be zero.
1222  * @param[in] Y             Buffer object storing the vector \b Y.
1223  * @param[in] offy          Offset of first element of vector \b Y in buffer object.
1224  *                          Counted in elements.
1225  * @param[in] incy          Increment for the elements of \b Y. Must not be zero.
1226  * @param[in] scratchBuff	Temporary cl_mem scratch buffer object of minimum size N
1227  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
1228  *                                task is to be performed.
1229  * @param[in] commandQueues       OpenCL command queues.
1230  * @param[in] numEventsInWaitList Number of events in the event wait list.
1231  * @param[in] eventWaitList       Event wait list.
1232  * @param[in] events     Event objects per each command queue that identify
1233  *                       a particular kernel execution instance.
1234  *
1235  * @return
1236  *   - \b clblasSuccess on success;
1237  *   - \b clblasInvalidDevice if a target device does not support the
1238  *     floating point arithmetic with double precision;
1239  *   - the same error codes as the clblasSdot() function otherwise.
1240  *
1241  * @ingroup DOT
1242  */
1243 clblasStatus
1244 clblasDdot(
1245     size_t N,
1246     cl_mem dotProduct,
1247     size_t offDP,
1248     const cl_mem X,
1249     size_t offx,
1250     int incx,
1251     const cl_mem Y,
1252     size_t offy,
1253     int incy,
1254     cl_mem scratchBuff,
1255     cl_uint numCommandQueues,
1256     cl_command_queue *commandQueues,
1257     cl_uint numEventsInWaitList,
1258     const cl_event *eventWaitList,
1259     cl_event *events);
1260 
1261 
1262 /**
1263  * @brief dot product of two vectors containing float-complex elements
1264  *
1265  * @param[in] N             Number of elements in vector \b X.
1266  * @param[out] dotProduct   Buffer object that will contain the dot-product value
1267  * @param[in] offDP         Offset to dot-product in \b dotProduct buffer object.
1268  *                          Counted in elements.
1269  * @param[in] X             Buffer object storing vector \b X.
1270  * @param[in] offx          Offset of first element of vector \b X in buffer object.
1271  *                          Counted in elements.
1272  * @param[in] incx          Increment for the elements of \b X. Must not be zero.
1273  * @param[in] Y             Buffer object storing the vector \b Y.
1274  * @param[in] offy          Offset of first element of vector \b Y in buffer object.
1275  *                          Counted in elements.
1276  * @param[in] incy          Increment for the elements of \b Y. Must not be zero.
1277  * @param[in] scratchBuff   Temporary cl_mem scratch buffer object of minimum size N
1278  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
1279  *                                task is to be performed.
1280  * @param[in] commandQueues       OpenCL command queues.
1281  * @param[in] numEventsInWaitList Number of events in the event wait list.
1282  * @param[in] eventWaitList       Event wait list.
1283  * @param[in] events     Event objects per each command queue that identify
1284  *                       a particular kernel execution instance.
1285  *
1286  * @return
1287  *   - \b clblasSuccess on success;
1288  *   - the same error codes as the clblasSdot() function otherwise.
1289  *
1290  * @ingroup DOT
1291  */
1292 
1293 clblasStatus
1294 clblasCdotu(
1295     size_t N,
1296     cl_mem dotProduct,
1297     size_t offDP,
1298     const cl_mem X,
1299     size_t offx,
1300     int incx,
1301     const cl_mem Y,
1302     size_t offy,
1303     int incy,
1304     cl_mem scratchBuff,
1305     cl_uint numCommandQueues,
1306     cl_command_queue *commandQueues,
1307     cl_uint numEventsInWaitList,
1308     const cl_event *eventWaitList,
1309     cl_event *events);
1310 
1311 
1312 /**
1313  * @brief dot product of two vectors containing double-complex elements
1314  *
1315  * @param[in] N             Number of elements in vector \b X.
1316  * @param[out] dotProduct   Buffer object that will contain the dot-product value
1317  * @param[in] offDP         Offset to dot-product in \b dotProduct buffer object.
1318  *                          Counted in elements.
1319  * @param[in] X             Buffer object storing vector \b X.
1320  * @param[in] offx          Offset of first element of vector \b X in buffer object.
1321  *                          Counted in elements.
1322  * @param[in] incx          Increment for the elements of \b X. Must not be zero.
1323  * @param[in] Y             Buffer object storing the vector \b Y.
1324  * @param[in] offy          Offset of first element of vector \b Y in buffer object.
1325  *                          Counted in elements.
1326  * @param[in] incy          Increment for the elements of \b Y. Must not be zero.
1327  * @param[in] scratchBuff   Temporary cl_mem scratch buffer object of minimum size N
1328  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
1329  *                                task is to be performed.
1330  * @param[in] commandQueues       OpenCL command queues.
1331  * @param[in] numEventsInWaitList Number of events in the event wait list.
1332  * @param[in] eventWaitList       Event wait list.
1333  * @param[in] events     Event objects per each command queue that identify
1334  *                       a particular kernel execution instance.
1335  *
1336  * @return
1337  *   - \b clblasSuccess on success;
1338  *   - \b clblasInvalidDevice if a target device does not support the
1339  *     floating point arithmetic with double precision;
1340  *   - the same error codes as the clblasSdot() function otherwise.
1341  *
1342  * @ingroup DOT
1343  */
1344 
1345 clblasStatus
1346 clblasZdotu(
1347     size_t N,
1348     cl_mem dotProduct,
1349     size_t offDP,
1350     const cl_mem X,
1351     size_t offx,
1352     int incx,
1353     const cl_mem Y,
1354     size_t offy,
1355     int incy,
1356     cl_mem scratchBuff,
1357     cl_uint numCommandQueues,
1358     cl_command_queue *commandQueues,
1359     cl_uint numEventsInWaitList,
1360     const cl_event *eventWaitList,
1361     cl_event *events);
1362 
1363 
1364 /**
1365  * @brief dot product of two vectors containing float-complex elements conjugating the first vector
1366  *
1367  * @param[in] N             Number of elements in vector \b X.
1368  * @param[out] dotProduct   Buffer object that will contain the dot-product value
1369  * @param[in] offDP         Offset to dot-product in \b dotProduct buffer object.
1370  *                          Counted in elements.
1371  * @param[in] X             Buffer object storing vector \b X.
1372  * @param[in] offx          Offset of first element of vector \b X in buffer object.
1373  *                          Counted in elements.
1374  * @param[in] incx          Increment for the elements of \b X. Must not be zero.
1375  * @param[in] Y             Buffer object storing the vector \b Y.
1376  * @param[in] offy          Offset of first element of vector \b Y in buffer object.
1377  *                          Counted in elements.
1378  * @param[in] incy          Increment for the elements of \b Y. Must not be zero.
1379  * @param[in] scratchBuff   Temporary cl_mem scratch buffer object of minimum size N
1380  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
1381  *                                task is to be performed.
1382  * @param[in] commandQueues       OpenCL command queues.
1383  * @param[in] numEventsInWaitList Number of events in the event wait list.
1384  * @param[in] eventWaitList       Event wait list.
1385  * @param[in] events     Event objects per each command queue that identify
1386  *                       a particular kernel execution instance.
1387  *
1388  * @return
1389  *   - \b clblasSuccess on success;
1390  *   - the same error codes as the clblasSdot() function otherwise.
1391  *
1392  * @ingroup DOT
1393  */
1394 
1395 clblasStatus
1396 clblasCdotc(
1397     size_t N,
1398     cl_mem dotProduct,
1399     size_t offDP,
1400     const cl_mem X,
1401     size_t offx,
1402     int incx,
1403     const cl_mem Y,
1404     size_t offy,
1405     int incy,
1406     cl_mem scratchBuff,
1407     cl_uint numCommandQueues,
1408     cl_command_queue *commandQueues,
1409     cl_uint numEventsInWaitList,
1410     const cl_event *eventWaitList,
1411     cl_event *events);
1412 
1413 
1414 /**
1415  * @brief dot product of two vectors containing double-complex elements conjugating the first vector
1416  *
1417  * @param[in] N             Number of elements in vector \b X.
1418  * @param[out] dotProduct   Buffer object that will contain the dot-product value
1419  * @param[in] offDP         Offset to dot-product in \b dotProduct buffer object.
1420  *                          Counted in elements.
1421  * @param[in] X             Buffer object storing vector \b X.
1422  * @param[in] offx          Offset of first element of vector \b X in buffer object.
1423  *                          Counted in elements.
1424  * @param[in] incx          Increment for the elements of \b X. Must not be zero.
1425  * @param[in] Y             Buffer object storing the vector \b Y.
1426  * @param[in] offy          Offset of first element of vector \b Y in buffer object.
1427  *                          Counted in elements.
1428  * @param[in] incy          Increment for the elements of \b Y. Must not be zero.
1429  * @param[in] scratchBuff   Temporary cl_mem scratch buffer object of minimum size N
1430  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
1431  *                                task is to be performed.
1432  * @param[in] commandQueues       OpenCL command queues.
1433  * @param[in] numEventsInWaitList Number of events in the event wait list.
1434  * @param[in] eventWaitList       Event wait list.
1435  * @param[in] events     Event objects per each command queue that identify
1436  *                       a particular kernel execution instance.
1437  *
1438  * @return
1439  *   - \b clblasSuccess on success;
1440  *   - \b clblasInvalidDevice if a target device does not support the
1441  *     floating point arithmetic with double precision;
1442  *   - the same error codes as the clblasSdot() function otherwise.
1443  *
1444  * @ingroup DOT
1445  */
1446 
1447 clblasStatus
1448 clblasZdotc(
1449     size_t N,
1450     cl_mem dotProduct,
1451     size_t offDP,
1452     const cl_mem X,
1453     size_t offx,
1454     int incx,
1455     const cl_mem Y,
1456     size_t offy,
1457     int incy,
1458     cl_mem scratchBuff,
1459     cl_uint numCommandQueues,
1460     cl_command_queue *commandQueues,
1461     cl_uint numEventsInWaitList,
1462     const cl_event *eventWaitList,
1463     cl_event *events);
1464 
1465 /*@}*/
1466 
1467 
1468 /**
1469  * @defgroup ROTG ROTG  - Constructs givens plane rotation
1470  * @ingroup BLAS1
1471  */
1472 /*@{*/
1473 
1474 /**
1475  * @brief construct givens plane rotation on float elements
1476  *
1477  * @param[out] SA           Buffer object that contains SA
1478  * @param[in] offSA         Offset to SA in \b SA buffer object.
1479  *                          Counted in elements.
1480  * @param[out] SB           Buffer object that contains SB
1481  * @param[in] offSB         Offset to SB in \b SB buffer object.
1482  *                          Counted in elements.
1483  * @param[out] C            Buffer object that contains C
1484  * @param[in] offC          Offset to C in \b C buffer object.
1485  *                          Counted in elements.
1486  * @param[out] S            Buffer object that contains S
1487  * @param[in] offS          Offset to S in \b S buffer object.
1488  *                          Counted in elements.
1489  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
1490  *                                task is to be performed.
1491  * @param[in] commandQueues       OpenCL command queues.
1492  * @param[in] numEventsInWaitList Number of events in the event wait list.
1493  * @param[in] eventWaitList       Event wait list.
1494  * @param[in] events     Event objects per each command queue that identify
1495  *                       a particular kernel execution instance.
1496  *
1497  * @return
1498  *   - \b clblasSuccess on success;
1499  *   - \b clblasNotInitialized if clblasSetup() was not called;
1500  *   - \b clblasInvalidMemObject if either \b SA, \b SB, \b C or \b S object is
1501  *     Invalid, or an image object rather than the buffer one;
1502  *   - \b clblasOutOfHostMemory if the library can't allocate memory for
1503  *     internal structures;
1504  *   - \b clblasInvalidCommandQueue if the passed command queue is invalid;
1505  *   - \b clblasInvalidContext if a context a passed command queue belongs
1506  *     to was released;
1507  *   - \b clblasInvalidOperation if kernel compilation relating to a previous
1508  *     call has not completed for any of the target devices;
1509  *   - \b clblasCompilerNotAvailable if a compiler is not available;
1510  *   - \b clblasBuildProgramFailure if there is a failure to build a program
1511  *     executable.
1512  *
1513  * @ingroup ROTG
1514  */
1515 clblasStatus
1516 clblasSrotg(
1517     cl_mem SA,
1518     size_t offSA,
1519     cl_mem SB,
1520     size_t offSB,
1521     cl_mem C,
1522     size_t offC,
1523     cl_mem S,
1524     size_t offS,
1525     cl_uint numCommandQueues,
1526     cl_command_queue *commandQueues,
1527     cl_uint numEventsInWaitList,
1528     const cl_event *eventWaitList,
1529     cl_event *events);
1530 
1531 /**
1532  * @example example_srotg.c
1533  * Example of how to use the @ref clblasSrotg function.
1534  */
1535 
1536 /**
1537  * @brief construct givens plane rotation on double elements
1538  *
1539  * @param[out] DA           Buffer object that contains DA
1540  * @param[in] offDA         Offset to DA in \b DA buffer object.
1541  *                          Counted in elements.
1542  * @param[out] DB           Buffer object that contains DB
1543  * @param[in] offDB         Offset to DB in \b DB buffer object.
1544  *                          Counted in elements.
1545  * @param[out] C            Buffer object that contains C
1546  * @param[in] offC          Offset to C in \b C buffer object.
1547  *                          Counted in elements.
1548  * @param[out] S            Buffer object that contains S
1549  * @param[in] offS          Offset to S in \b S buffer object.
1550  *                          Counted in elements.
1551  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
1552  *                                task is to be performed.
1553  * @param[in] commandQueues       OpenCL command queues.
1554  * @param[in] numEventsInWaitList Number of events in the event wait list.
1555  * @param[in] eventWaitList       Event wait list.
1556  * @param[in] events     Event objects per each command queue that identify
1557  *                       a particular kernel execution instance.
1558  *
1559  * @return
1560  *   - \b clblasSuccess on success;
1561  *   - \b clblasInvalidDevice if a target device does not support the
1562  *     floating point arithmetic with double precision;
1563  *   - the same error codes as the clblasSrotg() function otherwise.
1564  *
1565  * @ingroup ROTG
1566  */
1567 clblasStatus
1568 clblasDrotg(
1569     cl_mem DA,
1570     size_t offDA,
1571     cl_mem DB,
1572     size_t offDB,
1573     cl_mem C,
1574     size_t offC,
1575     cl_mem S,
1576     size_t offS,
1577     cl_uint numCommandQueues,
1578     cl_command_queue *commandQueues,
1579     cl_uint numEventsInWaitList,
1580     const cl_event *eventWaitList,
1581     cl_event *events);
1582 
1583 /**
1584  * @brief construct givens plane rotation on float-complex elements
1585  *
1586  * @param[out] CA           Buffer object that contains CA
1587  * @param[in] offCA         Offset to CA in \b CA buffer object.
1588  *                          Counted in elements.
1589  * @param[out] CB           Buffer object that contains CB
1590  * @param[in] offCB         Offset to CB in \b CB buffer object.
1591  *                          Counted in elements.
1592  * @param[out] C            Buffer object that contains C. C is real.
1593  * @param[in] offC          Offset to C in \b C buffer object.
1594  *                          Counted in elements.
1595  * @param[out] S            Buffer object that contains S
1596  * @param[in] offS          Offset to S in \b S buffer object.
1597  *                          Counted in elements.
1598  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
1599  *                                task is to be performed.
1600  * @param[in] commandQueues       OpenCL command queues.
1601  * @param[in] numEventsInWaitList Number of events in the event wait list.
1602  * @param[in] eventWaitList       Event wait list.
1603  * @param[in] events     Event objects per each command queue that identify
1604  *                       a particular kernel execution instance.
1605  *
1606  * @return
1607  *   - \b clblasSuccess on success;
1608  *   - the same error codes as the clblasSrotg() function otherwise.
1609  *
1610  * @ingroup ROTG
1611  */
1612 clblasStatus
1613 clblasCrotg(
1614     cl_mem CA,
1615     size_t offCA,
1616     cl_mem CB,
1617     size_t offCB,
1618     cl_mem C,
1619     size_t offC,
1620     cl_mem S,
1621     size_t offS,
1622     cl_uint numCommandQueues,
1623     cl_command_queue *commandQueues,
1624     cl_uint numEventsInWaitList,
1625     const cl_event *eventWaitList,
1626     cl_event *events);
1627 
1628 /**
1629  * @brief construct givens plane rotation on double-complex elements
1630  *
1631  * @param[out] CA           Buffer object that contains CA
1632  * @param[in] offCA         Offset to CA in \b CA buffer object.
1633  *                          Counted in elements.
1634  * @param[out] CB           Buffer object that contains CB
1635  * @param[in] offCB         Offset to CB in \b CB buffer object.
1636  *                          Counted in elements.
1637  * @param[out] C            Buffer object that contains C. C is real.
1638  * @param[in] offC          Offset to C in \b C buffer object.
1639  *                          Counted in elements.
1640  * @param[out] S            Buffer object that contains S
1641  * @param[in] offS          Offset to S in \b S buffer object.
1642  *                          Counted in elements.
1643  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
1644  *                                task is to be performed.
1645  * @param[in] commandQueues       OpenCL command queues.
1646  * @param[in] numEventsInWaitList Number of events in the event wait list.
1647  * @param[in] eventWaitList       Event wait list.
1648  * @param[in] events     Event objects per each command queue that identify
1649  *                       a particular kernel execution instance.
1650  *
1651  * @return
1652  *   - \b clblasSuccess on success;
1653  *   - the same error codes as the clblasDrotg() function otherwise.
1654  *
1655  * @ingroup ROTG
1656  */
1657 clblasStatus
1658 clblasZrotg(
1659     cl_mem CA,
1660     size_t offCA,
1661     cl_mem CB,
1662     size_t offCB,
1663     cl_mem C,
1664     size_t offC,
1665     cl_mem S,
1666     size_t offS,
1667     cl_uint numCommandQueues,
1668     cl_command_queue *commandQueues,
1669     cl_uint numEventsInWaitList,
1670     const cl_event *eventWaitList,
1671     cl_event *events);
1672 
1673 /*@}*/
1674 
1675 /**
1676  * @defgroup ROTMG ROTMG  - Constructs the modified givens rotation
1677  * @ingroup BLAS1
1678  */
1679 /*@{*/
1680 
1681 /**
1682  * @brief construct the modified givens rotation on float elements
1683  *
1684  * @param[out] SD1          Buffer object that contains SD1
1685  * @param[in] offSD1        Offset to SD1 in \b SD1 buffer object.
1686  *                          Counted in elements.
1687  * @param[out] SD2          Buffer object that contains SD2
1688  * @param[in] offSD2        Offset to SD2 in \b SD2 buffer object.
1689  *                          Counted in elements.
1690  * @param[out] SX1          Buffer object that contains SX1
1691  * @param[in] offSX1        Offset to SX1 in \b SX1 buffer object.
1692  *                          Counted in elements.
1693  * @param[in] SY1           Buffer object that contains SY1
1694  * @param[in] offSY1        Offset to SY1 in \b SY1 buffer object.
1695  *                          Counted in elements.
1696  * @param[out] SPARAM       Buffer object that contains SPARAM array of minimum length 5
1697                             SPARAM(0) = SFLAG
1698                             SPARAM(1) = SH11
1699                             SPARAM(2) = SH21
1700                             SPARAM(3) = SH12
1701                             SPARAM(4) = SH22
1702 
1703  * @param[in] offSparam     Offset to SPARAM in \b SPARAM buffer object.
1704  *                          Counted in elements.
1705  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
1706  *                                task is to be performed.
1707  * @param[in] commandQueues       OpenCL command queues.
1708  * @param[in] numEventsInWaitList Number of events in the event wait list.
1709  * @param[in] eventWaitList       Event wait list.
1710  * @param[in] events     Event objects per each command queue that identify
1711  *                       a particular kernel execution instance.
1712  *
1713  * @return
1714  *   - \b clblasSuccess on success;
1715  *   - \b clblasNotInitialized if clblasSetup() was not called;
1716  *   - \b clblasInvalidMemObject if either \b SX1, \b SY1, \b SD1, \b SD2 or \b SPARAM object is
1717  *     Invalid, or an image object rather than the buffer one;
1718  *   - \b clblasOutOfHostMemory if the library can't allocate memory for
1719  *     internal structures;
1720  *   - \b clblasInvalidCommandQueue if the passed command queue is invalid;
1721  *   - \b clblasInvalidContext if a context a passed command queue belongs
1722  *     to was released;
1723  *   - \b clblasInvalidOperation if kernel compilation relating to a previous
1724  *     call has not completed for any of the target devices;
1725  *   - \b clblasCompilerNotAvailable if a compiler is not available;
1726  *   - \b clblasBuildProgramFailure if there is a failure to build a program
1727  *     executable.
1728  *
1729  * @ingroup ROTMG
1730  */
1731 clblasStatus
1732 clblasSrotmg(
1733     cl_mem SD1,
1734     size_t offSD1,
1735     cl_mem SD2,
1736     size_t offSD2,
1737     cl_mem SX1,
1738     size_t offSX1,
1739     const cl_mem SY1,
1740     size_t offSY1,
1741     cl_mem SPARAM,
1742     size_t offSparam,
1743     cl_uint numCommandQueues,
1744     cl_command_queue *commandQueues,
1745     cl_uint numEventsInWaitList,
1746     const cl_event *eventWaitList,
1747     cl_event *events);
1748 
1749 /**
1750  * @example example_srotmg.c
1751  * Example of how to use the @ref clblasSrotmg function.
1752  */
1753 
1754 /**
1755  * @brief construct the modified givens rotation on double elements
1756  *
1757  * @param[out] DD1          Buffer object that contains DD1
1758  * @param[in] offDD1        Offset to DD1 in \b DD1 buffer object.
1759  *                          Counted in elements.
1760  * @param[out] DD2          Buffer object that contains DD2
1761  * @param[in] offDD2        Offset to DD2 in \b DD2 buffer object.
1762  *                          Counted in elements.
1763  * @param[out] DX1          Buffer object that contains DX1
1764  * @param[in] offDX1        Offset to DX1 in \b DX1 buffer object.
1765  *                          Counted in elements.
1766  * @param[in] DY1           Buffer object that contains DY1
1767  * @param[in] offDY1        Offset to DY1 in \b DY1 buffer object.
1768  *                          Counted in elements.
1769  * @param[out] DPARAM       Buffer object that contains DPARAM array of minimum length 5
1770                             DPARAM(0) = DFLAG
1771                             DPARAM(1) = DH11
1772                             DPARAM(2) = DH21
1773                             DPARAM(3) = DH12
1774                             DPARAM(4) = DH22
1775 
1776  * @param[in] offDparam     Offset to DPARAM in \b DPARAM buffer object.
1777  *                          Counted in elements.
1778  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
1779  *                                task is to be performed.
1780  * @param[in] commandQueues       OpenCL command queues.
1781  * @param[in] numEventsInWaitList Number of events in the event wait list.
1782  * @param[in] eventWaitList       Event wait list.
1783  * @param[in] events     Event objects per each command queue that identify
1784  *                       a particular kernel execution instance.
1785  *
1786  * @return
1787  *   - \b clblasSuccess on success;
1788  *   - \b clblasInvalidDevice if a target device does not support the
1789  *     floating point arithmetic with double precision;
1790  *   - the same error codes as the clblasSrotmg() function otherwise.
1791  *
1792  * @ingroup ROTMG
1793  */
1794 clblasStatus
1795 clblasDrotmg(
1796     cl_mem DD1,
1797     size_t offDD1,
1798     cl_mem DD2,
1799     size_t offDD2,
1800     cl_mem DX1,
1801     size_t offDX1,
1802     const cl_mem DY1,
1803     size_t offDY1,
1804     cl_mem DPARAM,
1805     size_t offDparam,
1806     cl_uint numCommandQueues,
1807     cl_command_queue *commandQueues,
1808     cl_uint numEventsInWaitList,
1809     const cl_event *eventWaitList,
1810     cl_event *events);
1811 
1812 /*@}*/
1813 
1814 
1815 /**
1816  * @defgroup ROT ROT  - Apply givens rotation
1817  * @ingroup BLAS1
1818  */
1819 /*@{*/
1820 
1821 /**
1822  * @brief applies a plane rotation for float elements
1823  *
1824  * @param[in] N         Number of elements in vector \b X and \b Y.
1825  * @param[out] X        Buffer object storing vector \b X.
1826  * @param[in] offx      Offset of first element of vector \b X in buffer object.
1827  *                      Counted in elements.
1828  * @param[in] incx      Increment for the elements of \b X. Must not be zero.
1829  * @param[out] Y        Buffer object storing the vector \b Y.
1830  * @param[in] offy      Offset of first element of vector \b Y in buffer object.
1831  *                      Counted in elements.
1832  * @param[in] incy      Increment for the elements of \b Y. Must not be zero.
1833  * @param[in] C         C specifies the cosine, cos.
1834  * @param[in] S         S specifies the sine, sin.
1835  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
1836  *                                task is to be performed.
1837  * @param[in] commandQueues       OpenCL command queues.
1838  * @param[in] numEventsInWaitList Number of events in the event wait list.
1839  * @param[in] eventWaitList       Event wait list.
1840  * @param[in] events     Event objects per each command queue that identify
1841  *                       a particular kernel execution instance.
1842  *
1843  * @return
1844  *   - \b clblasSuccess on success;
1845  *   - \b clblasNotInitialized if clblasSetup() was not called;
1846  *   - \b clblasInvalidValue if invalid parameters are passed:
1847  *     - \b N is zero, or
1848  *     - either \b incx or \b incy is zero, or
1849  *     - the vector sizes along with the increments lead to
1850  *       accessing outside of any of the buffers;
1851  *   - \b clblasInvalidMemObject if either \b X, or \b Y object is
1852  *     Invalid, or an image object rather than the buffer one;
1853  *   - \b clblasOutOfHostMemory if the library can't allocate memory for
1854  *     internal structures;
1855  *   - \b clblasInvalidCommandQueue if the passed command queue is invalid;
1856  *   - \b clblasInvalidContext if a context a passed command queue belongs
1857  *     to was released;
1858  *   - \b clblasInvalidOperation if kernel compilation relating to a previous
1859  *     call has not completed for any of the target devices;
1860  *   - \b clblasCompilerNotAvailable if a compiler is not available;
1861  *   - \b clblasBuildProgramFailure if there is a failure to build a program
1862  *     executable.
1863  *
1864  * @ingroup ROT
1865  */
1866 clblasStatus
1867 clblasSrot(
1868     size_t N,
1869     cl_mem X,
1870     size_t offx,
1871     int incx,
1872     cl_mem Y,
1873     size_t offy,
1874     int incy,
1875     cl_float C,
1876     cl_float S,
1877     cl_uint numCommandQueues,
1878     cl_command_queue *commandQueues,
1879     cl_uint numEventsInWaitList,
1880     const cl_event *eventWaitList,
1881     cl_event *events);
1882 
1883 /**
1884  * @example example_srot.c
1885  * Example of how to use the @ref clblasSrot function.
1886  */
1887 
1888 /**
1889  * @brief applies a plane rotation for double elements
1890  *
1891  * @param[in] N         Number of elements in vector \b X and \b Y.
1892  * @param[out] X        Buffer object storing vector \b X.
1893  * @param[in] offx      Offset of first element of vector \b X in buffer object.
1894  *                      Counted in elements.
1895  * @param[in] incx      Increment for the elements of \b X. Must not be zero.
1896  * @param[out] Y        Buffer object storing the vector \b Y.
1897  * @param[in] offy      Offset of first element of vector \b Y in buffer object.
1898  *                      Counted in elements.
1899  * @param[in] incy      Increment for the elements of \b Y. Must not be zero.
1900  * @param[in] C         C specifies the cosine, cos.
1901  * @param[in] S         S specifies the sine, sin.
1902  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
1903  *                                task is to be performed.
1904  * @param[in] commandQueues       OpenCL command queues.
1905  * @param[in] numEventsInWaitList Number of events in the event wait list.
1906  * @param[in] eventWaitList       Event wait list.
1907  * @param[in] events     Event objects per each command queue that identify
1908  *                       a particular kernel execution instance.
1909  *
1910  * @return
1911  *   - \b clblasSuccess on success;
1912  *   - \b clblasInvalidDevice if a target device does not support the
1913  *     floating point arithmetic with double precision;
1914  *   - the same error codes as the clblasSrot() function otherwise.
1915  *
1916  * @ingroup ROT
1917  */
1918 clblasStatus
1919 clblasDrot(
1920     size_t N,
1921     cl_mem X,
1922     size_t offx,
1923     int incx,
1924     cl_mem Y,
1925     size_t offy,
1926     int incy,
1927     cl_double C,
1928     cl_double S,
1929     cl_uint numCommandQueues,
1930     cl_command_queue *commandQueues,
1931     cl_uint numEventsInWaitList,
1932     const cl_event *eventWaitList,
1933     cl_event *events);
1934 
1935 /**
1936  * @brief applies a plane rotation for float-complex elements
1937  *
1938  * @param[in] N         Number of elements in vector \b X and \b Y.
1939  * @param[out] X        Buffer object storing vector \b X.
1940  * @param[in] offx      Offset of first element of vector \b X in buffer object.
1941  *                      Counted in elements.
1942  * @param[in] incx      Increment for the elements of \b X. Must not be zero.
1943  * @param[out] Y        Buffer object storing the vector \b Y.
1944  * @param[in] offy      Offset of first element of vector \b Y in buffer object.
1945  *                      Counted in elements.
1946  * @param[in] incy      Increment for the elements of \b Y. Must not be zero.
1947  * @param[in] C         C specifies the cosine, cos. This number is real
1948  * @param[in] S         S specifies the sine, sin. This number is real
1949  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
1950  *                                task is to be performed.
1951  * @param[in] commandQueues       OpenCL command queues.
1952  * @param[in] numEventsInWaitList Number of events in the event wait list.
1953  * @param[in] eventWaitList       Event wait list.
1954  * @param[in] events     Event objects per each command queue that identify
1955  *                       a particular kernel execution instance.
1956  *
1957  * @return
1958  *   - \b clblasSuccess on success;
1959  *   - the same error codes as the clblasSrot() function otherwise.
1960  *
1961  * @ingroup ROT
1962  */
1963 clblasStatus
1964 clblasCsrot(
1965     size_t N,
1966     cl_mem X,
1967     size_t offx,
1968     int incx,
1969     cl_mem Y,
1970     size_t offy,
1971     int incy,
1972     cl_float C,
1973     cl_float S,
1974     cl_uint numCommandQueues,
1975     cl_command_queue *commandQueues,
1976     cl_uint numEventsInWaitList,
1977     const cl_event *eventWaitList,
1978     cl_event *events);
1979 
1980 /**
1981  * @brief applies a plane rotation for double-complex elements
1982  *
1983  * @param[in] N         Number of elements in vector \b X and \b Y.
1984  * @param[out] X        Buffer object storing vector \b X.
1985  * @param[in] offx      Offset of first element of vector \b X in buffer object.
1986  *                      Counted in elements.
1987  * @param[in] incx      Increment for the elements of \b X. Must not be zero.
1988  * @param[out] Y        Buffer object storing the vector \b Y.
1989  * @param[in] offy      Offset of first element of vector \b Y in buffer object.
1990  *                      Counted in elements.
1991  * @param[in] incy      Increment for the elements of \b Y. Must not be zero.
1992  * @param[in] C         C specifies the cosine, cos. This number is real
1993  * @param[in] S         S specifies the sine, sin. This number is real
1994  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
1995  *                                task is to be performed.
1996  * @param[in] commandQueues       OpenCL command queues.
1997  * @param[in] numEventsInWaitList Number of events in the event wait list.
1998  * @param[in] eventWaitList       Event wait list.
1999  * @param[in] events     Event objects per each command queue that identify
2000  *                       a particular kernel execution instance.
2001  *
2002  * @return
2003  *   - \b clblasSuccess on success;
2004  *   - \b clblasInvalidDevice if a target device does not support the
2005  *     floating point arithmetic with double precision;
2006  *   - the same error codes as the clblasSrot() function otherwise.
2007  *
2008  * @ingroup ROT
2009  */
2010 clblasStatus
2011 clblasZdrot(
2012     size_t N,
2013     cl_mem X,
2014     size_t offx,
2015     int incx,
2016     cl_mem Y,
2017     size_t offy,
2018     int incy,
2019     cl_double C,
2020     cl_double S,
2021     cl_uint numCommandQueues,
2022     cl_command_queue *commandQueues,
2023     cl_uint numEventsInWaitList,
2024     const cl_event *eventWaitList,
2025     cl_event *events);
2026 
2027 /*@}*/
2028 
2029 /**
2030  * @defgroup ROTM ROTM  - Apply modified givens rotation for points in the plane
2031  * @ingroup BLAS1
2032  */
2033 /*@{*/
2034 
2035 /**
2036  * @brief modified givens rotation for float elements
2037  *
2038  * @param[in] N         Number of elements in vector \b X and \b Y.
2039  * @param[out] X        Buffer object storing vector \b X.
2040  * @param[in] offx      Offset of first element of vector \b X in buffer object.
2041  *                      Counted in elements.
2042  * @param[in] incx      Increment for the elements of \b X. Must not be zero.
2043  * @param[out] Y        Buffer object storing the vector \b Y.
2044  * @param[in] offy      Offset of first element of vector \b Y in buffer object.
2045  *                      Counted in elements.
2046  * @param[in] incy      Increment for the elements of \b Y. Must not be zero.
2047  * @param[in] SPARAM    Buffer object that contains SPARAM array of minimum length 5
2048  *                      SPARAM(1)=SFLAG
2049  *                      SPARAM(2)=SH11
2050  *                      SPARAM(3)=SH21
2051  *                      SPARAM(4)=SH12
2052  *                      SPARAM(5)=SH22
2053  * @param[in] offSparam Offset of first element of array \b SPARAM in buffer object.
2054  *                      Counted in elements.
2055  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
2056  *                                task is to be performed.
2057  * @param[in] commandQueues       OpenCL command queues.
2058  * @param[in] numEventsInWaitList Number of events in the event wait list.
2059  * @param[in] eventWaitList       Event wait list.
2060  * @param[in] events     Event objects per each command queue that identify
2061  *                       a particular kernel execution instance.
2062  *
2063  * @return
2064  *   - \b clblasSuccess on success;
2065  *   - \b clblasNotInitialized if clblasSetup() was not called;
2066  *   - \b clblasInvalidValue if invalid parameters are passed:
2067  *     - \b N is zero, or
2068  *     - either \b incx or \b incy is zero, or
2069  *     - the vector sizes along with the increments lead to
2070  *       accessing outside of any of the buffers;
2071  *   - \b clblasInvalidMemObject if either \b X, \b Y or \b SPARAM object is
2072  *     Invalid, or an image object rather than the buffer one;
2073  *   - \b clblasOutOfHostMemory if the library can't allocate memory for
2074  *     internal structures;
2075  *   - \b clblasInvalidCommandQueue if the passed command queue is invalid;
2076  *   - \b clblasInvalidContext if a context a passed command queue belongs
2077  *     to was released;
2078  *   - \b clblasInvalidOperation if kernel compilation relating to a previous
2079  *     call has not completed for any of the target devices;
2080  *   - \b clblasCompilerNotAvailable if a compiler is not available;
2081  *   - \b clblasBuildProgramFailure if there is a failure to build a program
2082  *     executable.
2083  *
2084  * @ingroup ROTM
2085  */
2086 clblasStatus
2087 clblasSrotm(
2088     size_t N,
2089     cl_mem X,
2090     size_t offx,
2091     int incx,
2092     cl_mem Y,
2093     size_t offy,
2094     int incy,
2095     const cl_mem SPARAM,
2096     size_t offSparam,
2097     cl_uint numCommandQueues,
2098     cl_command_queue *commandQueues,
2099     cl_uint numEventsInWaitList,
2100     const cl_event *eventWaitList,
2101     cl_event *events);
2102 
2103 /**
2104  * @example example_srotm.c
2105  * Example of how to use the @ref clblasSrotm function.
2106  */
2107 
2108 /**
2109  * @brief modified givens rotation for double elements
2110  *
2111  * @param[in] N         Number of elements in vector \b X and \b Y.
2112  * @param[out] X        Buffer object storing vector \b X.
2113  * @param[in] offx      Offset of first element of vector \b X in buffer object.
2114  *                      Counted in elements.
2115  * @param[in] incx      Increment for the elements of \b X. Must not be zero.
2116  * @param[out] Y        Buffer object storing the vector \b Y.
2117  * @param[in] offy      Offset of first element of vector \b Y in buffer object.
2118  *                      Counted in elements.
2119  * @param[in] incy      Increment for the elements of \b Y. Must not be zero.
2120  * @param[in] DPARAM    Buffer object that contains SPARAM array of minimum length 5
2121  *                      DPARAM(1)=DFLAG
2122  *                      DPARAM(2)=DH11
2123  *                      DPARAM(3)=DH21
2124  *                      DPARAM(4)=DH12
2125  *                      DPARAM(5)=DH22
2126  * @param[in] offDparam Offset of first element of array \b DPARAM in buffer object.
2127  *                      Counted in elements.
2128  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
2129  *                                task is to be performed.
2130  * @param[in] commandQueues       OpenCL command queues.
2131  * @param[in] numEventsInWaitList Number of events in the event wait list.
2132  * @param[in] eventWaitList       Event wait list.
2133  * @param[in] events     Event objects per each command queue that identify
2134  *                       a particular kernel execution instance.
2135  *
2136 * @return
2137  *   - \b clblasSuccess on success;
2138  *   - \b clblasInvalidDevice if a target device does not support the
2139  *     floating point arithmetic with double precision;
2140  *   - the same error codes as the clblasSrotm() function otherwise.
2141  *
2142  * @ingroup ROTM
2143  */
2144 clblasStatus
2145 clblasDrotm(
2146     size_t N,
2147     cl_mem X,
2148     size_t offx,
2149     int incx,
2150     cl_mem Y,
2151     size_t offy,
2152     int incy,
2153     const cl_mem DPARAM,
2154     size_t offDparam,
2155     cl_uint numCommandQueues,
2156     cl_command_queue *commandQueues,
2157     cl_uint numEventsInWaitList,
2158     const cl_event *eventWaitList,
2159     cl_event *events);
2160 
2161 /*@}*/
2162 
2163 /**
2164  * @defgroup NRM2 NRM2  - Euclidean norm of a vector
2165  * @ingroup BLAS1
2166  */
2167 /*@{*/
2168 
2169 /**
2170  * @brief computes the euclidean norm of vector containing float elements
2171  *
2172  *  NRM2 = sqrt( X' * X )
2173  *
2174  * @param[in] N             Number of elements in vector \b X.
2175  * @param[out] NRM2         Buffer object that will contain the NRM2 value
2176  * @param[in] offNRM2       Offset to NRM2 value in \b NRM2 buffer object.
2177  *                          Counted in elements.
2178  * @param[in] X             Buffer object storing vector \b X.
2179  * @param[in] offx          Offset of first element of vector \b X in buffer object.
2180  *                          Counted in elements.
2181  * @param[in] incx          Increment for the elements of \b X. Must not be zero.
2182  * @param[in] scratchBuff	Temporary cl_mem scratch buffer object that can hold minimum of (2*N) elements
2183  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
2184  *                                task is to be performed.
2185  * @param[in] commandQueues       OpenCL command queues.
2186  * @param[in] numEventsInWaitList Number of events in the event wait list.
2187  * @param[in] eventWaitList       Event wait list.
2188  * @param[in] events     Event objects per each command queue that identify
2189  *                       a particular kernel execution instance.
2190  *
2191  * @return
2192  *   - \b clblasSuccess on success;
2193  *   - \b clblasNotInitialized if clblasSetup() was not called;
2194  *   - \b clblasInvalidValue if invalid parameters are passed:
2195  *     - \b N is zero, or
2196  *     - either \b incx is zero, or
2197  *     - the vector sizes along with the increments lead to
2198  *       accessing outside of any of the buffers;
2199  *   - \b clblasInvalidMemObject if any of \b X or \b NRM2 or \b scratchBuff object is
2200  *     Invalid, or an image object rather than the buffer one;
2201  *   - \b clblasOutOfHostMemory if the library can't allocate memory for
2202  *     internal structures;
2203  *   - \b clblasInvalidCommandQueue if the passed command queue is invalid;
2204  *   - \b clblasInvalidContext if a context a passed command queue belongs
2205  *     to was released;
2206  *   - \b clblasInvalidOperation if kernel compilation relating to a previous
2207  *     call has not completed for any of the target devices;
2208  *   - \b clblasCompilerNotAvailable if a compiler is not available;
2209  *   - \b clblasBuildProgramFailure if there is a failure to build a program
2210  *     executable.
2211  *
2212  * @ingroup NRM2
2213  */
2214 clblasStatus
2215 clblasSnrm2(
2216     size_t N,
2217     cl_mem NRM2,
2218     size_t offNRM2,
2219     const cl_mem X,
2220     size_t offx,
2221     int incx,
2222     cl_mem scratchBuff,
2223     cl_uint numCommandQueues,
2224     cl_command_queue *commandQueues,
2225     cl_uint numEventsInWaitList,
2226     const cl_event *eventWaitList,
2227     cl_event *events);
2228 
2229 /**
2230  * @example example_snrm2.c
2231  * Example of how to use the @ref clblasSnrm2 function.
2232  */
2233 
2234 /**
2235  * @brief computes the euclidean norm of vector containing double elements
2236  *
2237  *  NRM2 = sqrt( X' * X )
2238  *
2239  * @param[in] N             Number of elements in vector \b X.
2240  * @param[out] NRM2         Buffer object that will contain the NRM2 value
2241  * @param[in] offNRM2       Offset to NRM2 value in \b NRM2 buffer object.
2242  *                          Counted in elements.
2243  * @param[in] X             Buffer object storing vector \b X.
2244  * @param[in] offx          Offset of first element of vector \b X in buffer object.
2245  *                          Counted in elements.
2246  * @param[in] incx          Increment for the elements of \b X. Must not be zero.
2247  * @param[in] scratchBuff	Temporary cl_mem scratch buffer object that can hold minimum of (2*N) elements
2248  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
2249  *                                task is to be performed.
2250  * @param[in] commandQueues       OpenCL command queues.
2251  * @param[in] numEventsInWaitList Number of events in the event wait list.
2252  * @param[in] eventWaitList       Event wait list.
2253  * @param[in] events     Event objects per each command queue that identify
2254  *                       a particular kernel execution instance.
2255  *
2256  * @return
2257  *   - \b clblasSuccess on success;
2258  *   - \b clblasInvalidDevice if a target device does not support the
2259  *     floating point arithmetic with double precision;
2260  *   - the same error codes as the clblasSnrm2() function otherwise.
2261  *
2262  * @ingroup NRM2
2263  */
2264 clblasStatus
2265 clblasDnrm2(
2266     size_t N,
2267     cl_mem NRM2,
2268     size_t offNRM2,
2269     const cl_mem X,
2270     size_t offx,
2271     int incx,
2272     cl_mem scratchBuff,
2273     cl_uint numCommandQueues,
2274     cl_command_queue *commandQueues,
2275     cl_uint numEventsInWaitList,
2276     const cl_event *eventWaitList,
2277     cl_event *events);
2278 
2279 /**
2280  * @brief computes the euclidean norm of vector containing float-complex elements
2281  *
2282  *  NRM2 = sqrt( X**H * X )
2283  *
2284  * @param[in] N             Number of elements in vector \b X.
2285  * @param[out] NRM2         Buffer object that will contain the NRM2 value.
2286  *                          Note that the answer of Scnrm2 is a real value.
2287  * @param[in] offNRM2       Offset to NRM2 value in \b NRM2 buffer object.
2288  *                          Counted in elements.
2289  * @param[in] X             Buffer object storing vector \b X.
2290  * @param[in] offx          Offset of first element of vector \b X in buffer object.
2291  *                          Counted in elements.
2292  * @param[in] incx          Increment for the elements of \b X. Must not be zero.
2293  * @param[in] scratchBuff	Temporary cl_mem scratch buffer object that can hold minimum of (2*N) elements
2294  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
2295  *                                task is to be performed.
2296  * @param[in] commandQueues       OpenCL command queues.
2297  * @param[in] numEventsInWaitList Number of events in the event wait list.
2298  * @param[in] eventWaitList       Event wait list.
2299  * @param[in] events     Event objects per each command queue that identify
2300  *                       a particular kernel execution instance.
2301  *
2302  * @return
2303  *   - \b clblasSuccess on success;
2304  *   - the same error codes as the clblasSnrm2() function otherwise.
2305  *
2306  * @ingroup NRM2
2307  */
2308 clblasStatus
2309 clblasScnrm2(
2310     size_t N,
2311     cl_mem NRM2,
2312     size_t offNRM2,
2313     const cl_mem X,
2314     size_t offx,
2315     int incx,
2316     cl_mem scratchBuff,
2317     cl_uint numCommandQueues,
2318     cl_command_queue *commandQueues,
2319     cl_uint numEventsInWaitList,
2320     const cl_event *eventWaitList,
2321     cl_event *events);
2322 
2323 /**
2324  * @brief computes the euclidean norm of vector containing double-complex elements
2325  *
2326  *  NRM2 = sqrt( X**H * X )
2327  *
2328  * @param[in] N             Number of elements in vector \b X.
2329  * @param[out] NRM2         Buffer object that will contain the NRM2 value.
2330  *                          Note that the answer of Dznrm2 is a real value.
2331  * @param[in] offNRM2       Offset to NRM2 value in \b NRM2 buffer object.
2332  *                          Counted in elements.
2333  * @param[in] X             Buffer object storing vector \b X.
2334  * @param[in] offx          Offset of first element of vector \b X in buffer object.
2335  *                          Counted in elements.
2336  * @param[in] incx          Increment for the elements of \b X. Must not be zero.
2337  * @param[in] scratchBuff	Temporary cl_mem scratch buffer object that can hold minimum of (2*N) elements
2338  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
2339  *                                task is to be performed.
2340  * @param[in] commandQueues       OpenCL command queues.
2341  * @param[in] numEventsInWaitList Number of events in the event wait list.
2342  * @param[in] eventWaitList       Event wait list.
2343  * @param[in] events     Event objects per each command queue that identify
2344  *                       a particular kernel execution instance.
2345  *
2346  * @return
2347  *   - \b clblasSuccess on success;
2348  *   - \b clblasInvalidDevice if a target device does not support the
2349  *     floating point arithmetic with double precision;
2350  *   - the same error codes as the clblasSnrm2() function otherwise.
2351  *     executable.
2352  *
2353  * @ingroup NRM2
2354  */
2355 clblasStatus
2356 clblasDznrm2(
2357     size_t N,
2358     cl_mem NRM2,
2359     size_t offNRM2,
2360     const cl_mem X,
2361     size_t offx,
2362     int incx,
2363     cl_mem scratchBuff,
2364     cl_uint numCommandQueues,
2365     cl_command_queue *commandQueues,
2366     cl_uint numEventsInWaitList,
2367     const cl_event *eventWaitList,
2368     cl_event *events);
2369 
2370 /*@}*/
2371 
2372 /**
2373  * @defgroup iAMAX iAMAX  - Index of max absolute value
2374  * @ingroup BLAS1
2375  */
2376 /*@{*/
2377 
2378 /**
2379  * @brief index of max absolute value in a float array
2380  *
2381  * @param[in] N             Number of elements in vector \b X.
2382  * @param[out] iMax         Buffer object storing the index of first absolute max.
2383  *                          The index will be of type unsigned int
2384  * @param[in] offiMax       Offset for storing index in the buffer iMax
2385  *                          Counted in elements.
2386  * @param[in] X             Buffer object storing vector \b X.
2387  * @param[in] offx          Offset of first element of vector \b X in buffer object.
2388  *                          Counted in elements.
2389  * @param[in] incx          Increment for the elements of \b X. Must not be zero.
2390  * @param[in] scratchBuff   Temprory cl_mem object to store intermediate results
2391                             It should be able to hold minimum of (2*N) elements
2392  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
2393  *                                task is to be performed.
2394  * @param[in] commandQueues       OpenCL command queues.
2395  * @param[in] numEventsInWaitList Number of events in the event wait list.
2396  * @param[in] eventWaitList       Event wait list.
2397  * @param[in] events     Event objects per each command queue that identify
2398  *                       a particular kernel execution instance.
2399  *
2400  * @return
2401  *   - \b clblasSuccess on success;
2402  *   - \b clblasNotInitialized if clblasSetup() was not called;
2403  *   - \b clblasInvalidValue if invalid parameters are passed:
2404  *     - \b N is zero, or
2405  *     - either \b incx is zero, or
2406  *     - the vector sizes along with the increments lead to
2407  *       accessing outside of any of the buffers;
2408  *   - \b clblasInvalidMemObject if any of \b iMax \b X or \b scratchBuff object is
2409  *     Invalid, or an image object rather than the buffer one;
2410  *   - \b clblasOutOfHostMemory if the library can't allocate memory for
2411  *     internal structures;
2412  *   - \b clblasInvalidCommandQueue if the passed command queue is invalid;
2413  *   - \b clblasInvalidContext if the context, the passed command queue belongs
2414  *     to was released;
2415  *   - \b clblasInvalidOperation if kernel compilation relating to a previous
2416  *     call has not completed for any of the target devices;
2417  *   - \b clblasCompilerNotAvailable if a compiler is not available;
2418  *   - \b clblasBuildProgramFailure if there is a failure to build a program
2419  *     executable.
2420  *
2421  * @ingroup iAMAX
2422  */
2423 clblasStatus
2424 clblasiSamax(
2425     size_t N,
2426     cl_mem iMax,
2427     size_t offiMax,
2428     const cl_mem X,
2429     size_t offx,
2430     int incx,
2431     cl_mem scratchBuff,
2432     cl_uint numCommandQueues,
2433     cl_command_queue *commandQueues,
2434     cl_uint numEventsInWaitList,
2435     const cl_event *eventWaitList,
2436     cl_event *events);
2437 /**
2438  * @example example_isamax.c
2439  * Example of how to use the @ref clblasiSamax function.
2440  */
2441 
2442 
2443 /**
2444  * @brief index of max absolute value in a double array
2445  *
2446  * @param[in] N             Number of elements in vector \b X.
2447  * @param[out] iMax         Buffer object storing the index of first absolute max.
2448  *                          The index will be of type unsigned int
2449  * @param[in] offiMax       Offset for storing index in the buffer iMax
2450  *                          Counted in elements.
2451  * @param[in] X             Buffer object storing vector \b X.
2452  * @param[in] offx          Offset of first element of vector \b X in buffer object.
2453  *                          Counted in elements.
2454  * @param[in] incx          Increment for the elements of \b X. Must not be zero.
2455  * @param[in] scratchBuff   Temprory cl_mem object to store intermediate results
2456                             It should be able to hold minimum of (2*N) elements
2457  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
2458  *                                task is to be performed.
2459  * @param[in] commandQueues       OpenCL command queues.
2460  * @param[in] numEventsInWaitList Number of events in the event wait list.
2461  * @param[in] eventWaitList       Event wait list.
2462  * @param[in] events     Event objects per each command queue that identify
2463  *                       a particular kernel execution instance.
2464  *
2465  * @return
2466  *   - \b clblasSuccess on success;
2467  *   - \b clblasInvalidDevice if a target device does not support the
2468  *     floating point arithmetic with double precision;
2469  *   - the same error codes as the clblasiSamax() function otherwise.
2470  *
2471  * @ingroup iAMAX
2472  */
2473 clblasStatus
2474 clblasiDamax(
2475     size_t N,
2476     cl_mem iMax,
2477     size_t offiMax,
2478     const cl_mem X,
2479     size_t offx,
2480     int incx,
2481     cl_mem scratchBuff,
2482     cl_uint numCommandQueues,
2483     cl_command_queue *commandQueues,
2484     cl_uint numEventsInWaitList,
2485     const cl_event *eventWaitList,
2486     cl_event *events);
2487 
2488 /**
2489  * @brief index of max absolute value in a complex float array
2490  *
2491  * @param[in] N             Number of elements in vector \b X.
2492  * @param[out] iMax         Buffer object storing the index of first absolute max.
2493  *                          The index will be of type unsigned int
2494  * @param[in] offiMax       Offset for storing index in the buffer iMax
2495  *                          Counted in elements.
2496  * @param[in] X             Buffer object storing vector \b X.
2497  * @param[in] offx          Offset of first element of vector \b X in buffer object.
2498  *                          Counted in elements.
2499  * @param[in] incx          Increment for the elements of \b X. Must not be zero.
2500  * @param[in] scratchBuff   Temprory cl_mem object to store intermediate results
2501                             It should be able to hold minimum of (2*N) elements
2502  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
2503  *                                task is to be performed.
2504  * @param[in] commandQueues       OpenCL command queues.
2505  * @param[in] numEventsInWaitList Number of events in the event wait list.
2506  * @param[in] eventWaitList       Event wait list.
2507  * @param[in] events     Event objects per each command queue that identify
2508  *                       a particular kernel execution instance.
2509  *
2510  * @return
2511  *   - \b clblasSuccess on success;
2512  *   - the same error codes as the clblasiSamax() function otherwise.
2513  *
2514  * @ingroup iAMAX
2515  */
2516 clblasStatus
2517 clblasiCamax(
2518     size_t N,
2519     cl_mem iMax,
2520     size_t offiMax,
2521     const cl_mem X,
2522     size_t offx,
2523     int incx,
2524     cl_mem scratchBuff,
2525     cl_uint numCommandQueues,
2526     cl_command_queue *commandQueues,
2527     cl_uint numEventsInWaitList,
2528     const cl_event *eventWaitList,
2529     cl_event *events);
2530 
2531 /**
2532  * @brief index of max absolute value in a complex double array
2533  *
2534  * @param[in] N             Number of elements in vector \b X.
2535  * @param[out] iMax         Buffer object storing the index of first absolute max.
2536  *                          The index will be of type unsigned int
2537  * @param[in] offiMax       Offset for storing index in the buffer iMax
2538  *                          Counted in elements.
2539  * @param[in] X             Buffer object storing vector \b X.
2540  * @param[in] offx          Offset of first element of vector \b X in buffer object.
2541  *                          Counted in elements.
2542  * @param[in] incx          Increment for the elements of \b X. Must not be zero.
2543  * @param[in] scratchBuff   Temprory cl_mem object to store intermediate results
2544                             It should be able to hold minimum of (2*N) elements
2545  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
2546  *                                task is to be performed.
2547  * @param[in] commandQueues       OpenCL command queues.
2548  * @param[in] numEventsInWaitList Number of events in the event wait list.
2549  * @param[in] eventWaitList       Event wait list.
2550  * @param[in] events     Event objects per each command queue that identify
2551  *                       a particular kernel execution instance.
2552  *
2553  * @return
2554  * @return
2555  *   - \b clblasSuccess on success;
2556  *   - \b clblasInvalidDevice if a target device does not support the
2557  *     floating point arithmetic with double precision;
2558  *   - the same error codes as the clblasiSamax() function otherwise.
2559  *
2560  * @ingroup iAMAX
2561  */
2562 clblasStatus
2563 clblasiZamax(
2564     size_t N,
2565     cl_mem iMax,
2566     size_t offiMax,
2567     const cl_mem X,
2568     size_t offx,
2569     int incx,
2570     cl_mem scratchBuff,
2571     cl_uint numCommandQueues,
2572     cl_command_queue *commandQueues,
2573     cl_uint numEventsInWaitList,
2574     const cl_event *eventWaitList,
2575     cl_event *events);
2576 
2577 /*@}*/
2578 
2579 /**
2580  * @defgroup ASUM ASUM  - Sum of absolute values
2581  * @ingroup BLAS1
2582  */
2583 /*@{*/
2584 
2585 /**
2586  * @brief absolute sum of values of a vector containing float elements
2587  *
2588  * @param[in] N             Number of elements in vector \b X.
2589  * @param[out] asum         Buffer object that will contain the absoule sum value
2590  * @param[in] offAsum       Offset to absolute sum in \b asum buffer object.
2591  *                          Counted in elements.
2592  * @param[in] X             Buffer object storing vector \b X.
2593  * @param[in] offx          Offset of first element of vector \b X in buffer object.
2594  *                          Counted in elements.
2595  * @param[in] incx          Increment for the elements of \b X. Must not be zero.
2596  * @param[in] scratchBuff   Temporary cl_mem scratch buffer object of minimum size N
2597  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
2598  *                                task is to be performed.
2599  * @param[in] commandQueues       OpenCL command queues.
2600  * @param[in] numEventsInWaitList Number of events in the event wait list.
2601  * @param[in] eventWaitList       Event wait list.
2602  * @param[in] events     Event objects per each command queue that identify
2603  *                       a particular kernel execution instance.
2604  *
2605  * @return
2606  *   - \b clblasSuccess on success;
2607  *   - \b clblasNotInitialized if clblasSetup() was not called;
2608  *   - \b clblasInvalidValue if invalid parameters are passed:
2609  *     - \b N is zero, or
2610  *     - either \b incx is zero, or
2611  *     - the vector sizes along with the increments lead to
2612  *       accessing outside of any of the buffers;
2613  *   - \b clblasInvalidMemObject if any of \b X or \b asum or \b scratchBuff object is
2614  *     Invalid, or an image object rather than the buffer one;
2615  *   - \b clblasOutOfHostMemory if the library can't allocate memory for
2616  *     internal structures;
2617  *   - \b clblasInvalidCommandQueue if the passed command queue is invalid;
2618  *   - \b clblasInvalidContext if a context a passed command queue belongs
2619  *     to was released;
2620  *   - \b clblasInvalidOperation if kernel compilation relating to a previous
2621  *     call has not completed for any of the target devices;
2622  *   - \b clblasCompilerNotAvailable if a compiler is not available;
2623  *   - \b clblasBuildProgramFailure if there is a failure to build a program
2624  *     executable.
2625  *
2626  * @ingroup ASUM
2627  */
2628 
2629 clblasStatus
2630 clblasSasum(
2631     size_t N,
2632     cl_mem asum,
2633     size_t offAsum,
2634     const cl_mem X,
2635     size_t offx,
2636     int incx,
2637     cl_mem scratchBuff,
2638     cl_uint numCommandQueues,
2639     cl_command_queue *commandQueues,
2640     cl_uint numEventsInWaitList,
2641     const cl_event *eventWaitList,
2642     cl_event *events);
2643 
2644 /**
2645  * @example example_sasum.c
2646  * Example of how to use the @ref clblasSasum function.
2647  */
2648 
2649 /**
2650  * @brief absolute sum of values of a vector containing double elements
2651  *
2652  * @param[in] N             Number of elements in vector \b X.
2653  * @param[out] asum         Buffer object that will contain the absoulte sum value
2654  * @param[in] offAsum       Offset to absoule sum in \b asum buffer object.
2655  *                          Counted in elements.
2656  * @param[in] X             Buffer object storing vector \b X.
2657  * @param[in] offx          Offset of first element of vector \b X in buffer object.
2658  *                          Counted in elements.
2659  * @param[in] incx          Increment for the elements of \b X. Must not be zero.
2660  * @param[in] scratchBuff   Temporary cl_mem scratch buffer object of minimum size N
2661  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
2662  *                                task is to be performed.
2663  * @param[in] commandQueues       OpenCL command queues.
2664  * @param[in] numEventsInWaitList Number of events in the event wait list.
2665  * @param[in] eventWaitList       Event wait list.
2666  * @param[in] events     Event objects per each command queue that identify
2667  *                       a particular kernel execution instance.
2668  *
2669  * @return
2670  *   - \b clblasSuccess on success;
2671  *   - \b clblasInvalidDevice if a target device does not support the
2672  *     floating point arithmetic with double precision;
2673  *   - the same error codes as the clblasSasum() function otherwise.
2674  *
2675  * @ingroup ASUM
2676  */
2677 
2678 clblasStatus
2679 clblasDasum(
2680     size_t N,
2681     cl_mem asum,
2682     size_t offAsum,
2683     const cl_mem X,
2684     size_t offx,
2685     int incx,
2686     cl_mem scratchBuff,
2687     cl_uint numCommandQueues,
2688     cl_command_queue *commandQueues,
2689     cl_uint numEventsInWaitList,
2690     const cl_event *eventWaitList,
2691     cl_event *events);
2692 
2693 
2694 /**
2695  * @brief absolute sum of values of a vector containing float-complex elements
2696  *
2697  * @param[in] N             Number of elements in vector \b X.
2698  * @param[out] asum         Buffer object that will contain the absolute sum value
2699  * @param[in] offAsum       Offset to absolute sum in \b asum buffer object.
2700  *                          Counted in elements.
2701  * @param[in] X             Buffer object storing vector \b X.
2702  * @param[in] offx          Offset of first element of vector \b X in buffer object.
2703  *                          Counted in elements.
2704  * @param[in] incx          Increment for the elements of \b X. Must not be zero.
2705  * @param[in] scratchBuff   Temporary cl_mem scratch buffer object of minimum size N
2706  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
2707  *                                task is to be performed.
2708  * @param[in] commandQueues       OpenCL command queues.
2709  * @param[in] numEventsInWaitList Number of events in the event wait list.
2710  * @param[in] eventWaitList       Event wait list.
2711  * @param[in] events     Event objects per each command queue that identify
2712  *                       a particular kernel execution instance.
2713  *
2714  * @return
2715  *   - \b clblasSuccess on success;
2716  *   - the same error codes as the clblasSasum() function otherwise.
2717  *
2718  * @ingroup ASUM
2719  */
2720 
2721 clblasStatus
2722 clblasScasum(
2723     size_t N,
2724     cl_mem asum,
2725     size_t offAsum,
2726     const cl_mem X,
2727     size_t offx,
2728     int incx,
2729     cl_mem scratchBuff,
2730     cl_uint numCommandQueues,
2731     cl_command_queue *commandQueues,
2732     cl_uint numEventsInWaitList,
2733     const cl_event *eventWaitList,
2734     cl_event *events);
2735 
2736 
2737 /**
2738  * @brief absolute sum of values of a vector containing double-complex elements
2739  *
2740  * @param[in] N             Number of elements in vector \b X.
2741  * @param[out] asum         Buffer object that will contain the absolute sum value
2742  * @param[in] offAsum       Offset to absolute sum in \b asum buffer object.
2743  *                          Counted in elements.
2744  * @param[in] X             Buffer object storing vector \b X.
2745  * @param[in] offx          Offset of first element of vector \b X in buffer object.
2746  *                          Counted in elements.
2747  * @param[in] incx          Increment for the elements of \b X. Must not be zero.
2748  * @param[in] scratchBuff   Temporary cl_mem scratch buffer object of minimum size N
2749  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
2750  *                                task is to be performed.
2751  * @param[in] commandQueues       OpenCL command queues.
2752  * @param[in] numEventsInWaitList Number of events in the event wait list.
2753  * @param[in] eventWaitList       Event wait list.
2754  * @param[in] events     Event objects per each command queue that identify
2755  *                       a particular kernel execution instance.
2756  *
2757  * @return
2758  *   - \b clblasSuccess on success;
2759  *   - \b clblasInvalidDevice if a target device does not support the
2760  *     floating point arithmetic with double precision;
2761  *   - the same error codes as the clblasSasum() function otherwise.
2762  *
2763  * @ingroup ASUM
2764  */
2765 
2766 clblasStatus
2767 clblasDzasum(
2768     size_t N,
2769     cl_mem asum,
2770     size_t offAsum,
2771     const cl_mem X,
2772     size_t offx,
2773     int incx,
2774     cl_mem scratchBuff,
2775     cl_uint numCommandQueues,
2776     cl_command_queue *commandQueues,
2777     cl_uint numEventsInWaitList,
2778     const cl_event *eventWaitList,
2779     cl_event *events);
2780 
2781 /*@}*/
2782 
2783 /**
2784  * @defgroup BLAS2 BLAS-2 functions
2785  *
2786  * The Level 2 Basic Linear Algebra Subprograms are functions that perform
2787  * matrix-vector operations.
2788  */
2789 /*@{*/
2790 /*@}*/
2791 
2792 
2793 /**
2794  * @defgroup GEMV GEMV  - General matrix-Vector multiplication
2795  * @ingroup BLAS2
2796  */
2797 /*@{*/
2798 
2799 /**
2800  * @brief Matrix-vector product with a general rectangular matrix and
2801  *        float elements. Extended version.
2802  *
2803  * Matrix-vector products:
2804  *   - \f$ y \leftarrow \alpha A x + \beta y \f$
2805  *   - \f$ y \leftarrow \alpha A^T x + \beta y \f$
2806  *
2807  * @param[in] order     Row/column order.
2808  * @param[in] transA    How matrix \b A is to be transposed.
2809  * @param[in] M         Number of rows in matrix \b A.
2810  * @param[in] N         Number of columns in matrix \b A.
2811  * @param[in] alpha     The factor of matrix \b A.
2812  * @param[in] A         Buffer object storing matrix \b A.
2813  * @param[in] offA      Offset of the first element of the matrix \b A in
2814  *                      the buffer object. Counted in elements.
2815  * @param[in] lda       Leading dimension of matrix \b A. It cannot be less
2816  *                      than \b N when the \b order parameter is set to
2817  *                      \b clblasRowMajor,\n or less than \b M when the
2818  *                      parameter is set to \b clblasColumnMajor.
2819  * @param[in] x         Buffer object storing vector \b x.
2820  * @param[in] offx      Offset of first element of vector \b x in buffer object.
2821  *                      Counted in elements.
2822  * @param[in] incx      Increment for the elements of \b x. It cannot be zero.
2823  * @param[in] beta      The factor of the vector \b y.
2824  * @param[out] y        Buffer object storing the vector \b y.
2825  * @param[in] offy      Offset of first element of vector \b y in buffer object.
2826  *                      Counted in elements.
2827  * @param[in] incy      Increment for the elements of \b y. It cannot be zero.
2828  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
2829  *                                task is to be performed.
2830  * @param[in] commandQueues       OpenCL command queues.
2831  * @param[in] numEventsInWaitList Number of events in the event wait list.
2832  * @param[in] eventWaitList       Event wait list.
2833  * @param[in] events     Event objects per each command queue that identify
2834  *                       a particular kernel execution instance.
2835  *
2836  * @return
2837  *   - \b clblasSuccess on success;
2838  *   - \b clblasInvalidValue if \b offA exceeds the size of \b A buffer
2839  *     object;
2840  *   - the same error codes as the clblasSgemv() function otherwise.
2841  *
2842  * @ingroup GEMV
2843  */
2844 clblasStatus
2845 clblasSgemv(
2846     clblasOrder order,
2847     clblasTranspose transA,
2848     size_t M,
2849     size_t N,
2850     cl_float alpha,
2851     const cl_mem A,
2852     size_t offA,
2853     size_t lda,
2854     const cl_mem x,
2855     size_t offx,
2856     int incx,
2857     cl_float beta,
2858     cl_mem y,
2859     size_t offy,
2860     int incy,
2861     cl_uint numCommandQueues,
2862     cl_command_queue *commandQueues,
2863     cl_uint numEventsInWaitList,
2864     const cl_event *eventWaitList,
2865     cl_event *events);
2866 
2867 /**
2868  * @example example_sgemv.c
2869  * This is an example of how to use the @ref clblasSgemvEx function.
2870  */
2871 
2872 /**
2873  * @brief Matrix-vector product with a general rectangular matrix and
2874  *        double elements. Extended version.
2875  *
2876  * Matrix-vector products:
2877  *   - \f$ y \leftarrow \alpha A x + \beta y \f$
2878  *   - \f$ y \leftarrow \alpha A^T x + \beta y \f$
2879  *
2880  * @param[in] order     Row/column order.
2881  * @param[in] transA    How matrix \b A is to be transposed.
2882  * @param[in] M         Number of rows in matrix \b A.
2883  * @param[in] N         Number of columns in matrix \b A.
2884  * @param[in] alpha     The factor of matrix \b A.
2885  * @param[in] A         Buffer object storing matrix \b A.
2886  * @param[in] offA      Offset of the first element of \b A in the buffer
2887  *                      object. Counted in elements.
2888  * @param[in] lda       Leading dimension of matrix \b A. For a detailed description,
2889  *                      see clblasSgemv().
2890  * @param[in] x         Buffer object storing vector \b x.
2891  * @param[in] offx      Offset of first element of vector \b x in buffer object.
2892  *                      Counted in elements.
2893  * @param[in] incx      Increment for the elements of \b x. It cannot be zero.
2894  * @param[in] beta      The factor of the vector \b y.
2895  * @param[out] y        Buffer object storing the vector \b y.
2896  * @param[in] offy      Offset of first element of vector \b y in buffer object.
2897  *                      Counted in elements.
2898  * @param[in] incy      Increment for the elements of \b y. It cannot be zero.
2899  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
2900  *                                task is to be performed.
2901  * @param[in] commandQueues       OpenCL command queues.
2902  * @param[in] numEventsInWaitList Number of events in the event wait list.
2903  * @param[in] eventWaitList       Event wait list.
2904  * @param[in] events     Event objects per each command queue that identify
2905  *                       a particular kernel execution instance.
2906  *
2907  * @return
2908  *   - \b clblasSuccess on success;
2909  *   - \b clblasInvalidDevice if a target device does not support the
2910  *     floating point arithmetic with double precision;
2911  *   - \b clblasInvalidValue if \b offA exceeds the size of \b A buffer
2912  *     object;
2913  *   - the same error codes as the clblasSgemv() function otherwise.
2914  *
2915  * @ingroup GEMV
2916  */
2917 clblasStatus
2918 clblasDgemv(
2919     clblasOrder order,
2920     clblasTranspose transA,
2921     size_t M,
2922     size_t N,
2923     cl_double alpha,
2924     const cl_mem A,
2925     size_t offA,
2926     size_t lda,
2927     const cl_mem x,
2928     size_t offx,
2929     int incx,
2930     cl_double beta,
2931     cl_mem y,
2932     size_t offy,
2933     int incy,
2934     cl_uint numCommandQueues,
2935     cl_command_queue *commandQueues,
2936     cl_uint numEventsInWaitList,
2937     const cl_event *eventWaitList,
2938     cl_event *events);
2939 
2940 /**
2941  * @brief Matrix-vector product with a general rectangular matrix and
2942  *        float complex elements. Extended version.
2943  *
2944  * Matrix-vector products:
2945  *   - \f$ y \leftarrow \alpha A x + \beta y \f$
2946  *   - \f$ y \leftarrow \alpha A^T x + \beta y \f$
2947  *
2948  * @param[in] order     Row/column order.
2949  * @param[in] transA    How matrix \b A is to be transposed.
2950  * @param[in] M         Number of rows in matrix \b A.
2951  * @param[in] N         Number of columns in matrix \b A.
2952  * @param[in] alpha     The factor of matrix \b A.
2953  * @param[in] A         Buffer object storing matrix \b A.
2954  * @param[in] offA      Offset of the first element of the matrix \b A in
2955  *                      the buffer object. Counted in elements
2956  * @param[in] lda       Leading dimension of matrix \b A. For a detailed description,
2957  *                      see clblasSgemv().
2958  * @param[in] x         Buffer object storing vector \b x.
2959  * @param[in] offx      Offset of first element of vector \b x in buffer object.
2960  *                      Counted in elements.
2961  * @param[in] incx      Increment for the elements of \b x. It cannot be zero.
2962  * @param[in] beta      The factor of the vector \b y.
2963  * @param[out] y        Buffer object storing the vector \b y.
2964  * @param[in] offy      Offset of first element of vector \b y in buffer object.
2965  *                      Counted in elements.
2966  * @param[in] incy      Increment for the elements of \b y. It cannot be zero.
2967  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
2968  *                                task is to be performed.
2969  * @param[in] commandQueues       OpenCL command queues.
2970  * @param[in] numEventsInWaitList Number of events in the event wait list.
2971  * @param[in] eventWaitList       Event wait list.
2972  * @param[in] events     Event objects per each command queue that identify
2973  *                       a particular kernel execution instance.
2974  *
2975  * @return
2976  *   - \b clblasSuccess on success;
2977  *   - \b clblasInvalidValue if \b offA exceeds the size of \b A buffer
2978  *     object;
2979  *   - the same error codes as the clblasSgemv() function otherwise.
2980  *
2981  * @ingroup GEMV
2982  */
2983 clblasStatus
2984 clblasCgemv(
2985     clblasOrder order,
2986     clblasTranspose transA,
2987     size_t M,
2988     size_t N,
2989     FloatComplex alpha,
2990     const cl_mem A,
2991     size_t offA,
2992     size_t lda,
2993     const cl_mem x,
2994     size_t offx,
2995     int incx,
2996     FloatComplex beta,
2997     cl_mem y,
2998     size_t offy,
2999     int incy,
3000     cl_uint numCommandQueues,
3001     cl_command_queue *commandQueues,
3002     cl_uint numEventsInWaitList,
3003     const cl_event *eventWaitList,
3004     cl_event *events);
3005 
3006 /**
3007  * @brief Matrix-vector product with a general rectangular matrix and
3008  *        double complex elements. Extended version.
3009  *
3010  * Matrix-vector products:
3011  *   - \f$ y \leftarrow \alpha A x + \beta y \f$
3012  *   - \f$ y \leftarrow \alpha A^T x + \beta y \f$
3013  *
3014  * @param[in] order     Row/column order.
3015  * @param[in] transA    How matrix \b A is to be transposed.
3016  * @param[in] M         Number of rows in matrix \b A.
3017  * @param[in] N         Number of columns in matrix \b A.
3018  * @param[in] alpha     The factor of matrix \b A.
3019  * @param[in] A         Buffer object storing matrix \b A.
3020  * @param[in] offA      Offset of the first element of the matrix \b A in
3021  *                      the buffer object. Counted in elements.
3022  * @param[in] lda       Leading dimension of matrix \b A. For a detailed description,
3023  *                      see clblasSgemv().
3024  * @param[in] x         Buffer object storing vector \b x.
3025  * @param[in] offx      Offset of first element of vector \b x in buffer object.
3026  *                      Counted in elements.
3027  * @param[in] incx      Increment for the elements of \b x. It cannot be zero.
3028  * @param[in] beta      The factor of the vector \b y.
3029  * @param[out] y        Buffer object storing the vector \b y.
3030  * @param[in] offy      Offset of first element of vector \b y in buffer object.
3031  *                      Counted in elements.
3032  * @param[in] incy      Increment for the elements of \b y. It cannot be zero.
3033  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
3034  *                                task is to be performed.
3035  * @param[in] commandQueues       OpenCL command queues.
3036  * @param[in] numEventsInWaitList Number of events in the event wait list.
3037  * @param[in] eventWaitList       Event wait list.
3038  * @param[in] events     Event objects per each command queue that identify
3039  *                       a particular kernel execution instance.
3040  *
3041  * @return
3042  *   - \b clblasSuccess on success;
3043  *   - \b clblasInvalidDevice if a target device does not support the
3044  *     floating point arithmetic with double precision;
3045  *   - \b clblasInvalidValue if \b offA exceeds the size of \b A buffer
3046  *     object;
3047  *   - the same error codes as the clblasSgemv() function otherwise.
3048  *
3049  * @ingroup GEMV
3050  */
3051 clblasStatus
3052 clblasZgemv(
3053     clblasOrder order,
3054     clblasTranspose transA,
3055     size_t M,
3056     size_t N,
3057     DoubleComplex alpha,
3058     const cl_mem A,
3059     size_t offA,
3060     size_t lda,
3061     const cl_mem x,
3062     size_t offx,
3063     int incx,
3064     DoubleComplex beta,
3065     cl_mem y,
3066     size_t offy,
3067     int incy,
3068     cl_uint numCommandQueues,
3069     cl_command_queue *commandQueues,
3070     cl_uint numEventsInWaitList,
3071     const cl_event *eventWaitList,
3072     cl_event *events);
3073 
3074 /*@}*/
3075 
3076 /**
3077  * @defgroup SYMV SYMV  - Symmetric matrix-Vector multiplication
3078  * @ingroup BLAS2
3079  */
3080 
3081 /*@{*/
3082 
3083 /**
3084  * @brief Matrix-vector product with a symmetric matrix and float elements.
3085  *
3086  *
3087  * Matrix-vector products:
3088  * - \f$ y \leftarrow \alpha A x + \beta y \f$
3089  *
3090  * @param[in] order     Row/columns order.
3091  * @param[in] uplo      The triangle in matrix being referenced.
3092  * @param[in] N         Number of rows and columns in matrix \b A.
3093  * @param[in] alpha     The factor of matrix \b A.
3094  * @param[in] A         Buffer object storing matrix \b A.
3095  * @param[in] offA      Offset of the first element of the matrix \b A in
3096  *                      the buffer object. Counted in elements.
3097  * @param[in] lda       Leading dimension of matrix \b A. It cannot less
3098  *                      than \b N.
3099  * @param[in] x         Buffer object storing vector \b x.
3100  * @param[in] offx      Offset of first element of vector \b x in buffer object.
3101  *                      Counted in elements.
3102  * @param[in] incx      Increment for the elements of vector \b x. It cannot be zero.
3103  * @param[in] beta      The factor of vector \b y.
3104  * @param[out] y        Buffer object storing vector \b y.
3105  * @param[in] offy      Offset of first element of vector \b y in buffer object.
3106  *                      Counted in elements.
3107  * @param[in] incy      Increment for the elements of vector \b y. It cannot be zero.
3108  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
3109  *                                task is to be performed.
3110  * @param[in] commandQueues       OpenCL command queues.
3111  * @param[in] numEventsInWaitList Number of events in the event wait list.
3112  * @param[in] eventWaitList       Event wait list.
3113  * @param[in] events     Event objects per each command queue that identify
3114  *                       a particular kernel execution instance.
3115  *
3116  * @return
3117  *   - \b clblasSuccess on success;
3118  *   - \b clblasInvalidValue if \b offA exceeds the size of \b A buffer
3119  *     object;
3120  *   - the same error codes as the clblasSgemv() function otherwise.
3121  *
3122  * @ingroup SYMV
3123  */
3124 clblasStatus
3125 clblasSsymv(
3126     clblasOrder order,
3127     clblasUplo uplo,
3128     size_t N,
3129     cl_float alpha,
3130     const cl_mem A,
3131     size_t offA,
3132     size_t lda,
3133     const cl_mem x,
3134     size_t offx,
3135     int incx,
3136     cl_float beta,
3137     cl_mem y,
3138     size_t offy,
3139     int incy,
3140     cl_uint numCommandQueues,
3141     cl_command_queue *commandQueues,
3142     cl_uint numEventsInWaitList,
3143     const cl_event *eventWaitList,
3144     cl_event *events);
3145 
3146 /**
3147  * @example example_ssymv.c
3148  * This is an example of how to use the @ref clblasSsymv function.
3149  */
3150 
3151 /**
3152  * @brief Matrix-vector product with a symmetric matrix and double elements.
3153  *
3154  *
3155  * Matrix-vector products:
3156  * - \f$ y \leftarrow \alpha A x + \beta y \f$
3157  *
3158  * @param[in] order     Row/columns order.
3159  * @param[in] uplo      The triangle in matrix being referenced.
3160  * @param[in] N         Number of rows and columns in matrix \b A.
3161  * @param[in] alpha     The factor of matrix \b A.
3162  * @param[in] A         Buffer object storing matrix \b A.
3163  * @param[in] offA      Offset of the first element of the matrix \b A in
3164  *                      the buffer object. Counted in elements.
3165  * @param[in] lda       Leading dimension of matrix \b A. It cannot less
3166  *                      than \b N.
3167  * @param[in] x         Buffer object storing vector \b x.
3168  * @param[in] offx      Offset of first element of vector \b x in buffer object.
3169  *                      Counted in elements.
3170  * @param[in] incx      Increment for the elements of vector \b x. It cannot be zero.
3171  * @param[in] beta      The factor of vector \b y.
3172  * @param[out] y        Buffer object storing vector \b y.
3173  * @param[in] offy      Offset of first element of vector \b y in buffer object.
3174  *                      Counted in elements.
3175  * @param[in] incy      Increment for the elements of vector \b y. It cannot be zero.
3176  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
3177  *                                task is to be performed.
3178  * @param[in] commandQueues       OpenCL command queues.
3179  * @param[in] numEventsInWaitList Number of events in the event wait list.
3180  * @param[in] eventWaitList       Event wait list.
3181  * @param[in] events     Event objects per each command queue that identify
3182  *                       a particular kernel execution instance.
3183  *
3184  * @return
3185  *   - \b clblasSuccess on success;
3186  *   - \b clblasInvalidDevice if a target device does not support floating
3187  *     point arithmetic with double precision;
3188  *   - \b clblasInvalidValue if \b offA exceeds the size of \b A buffer
3189  *     object;
3190  *   - the same error codes as the clblasSsymv() function otherwise.
3191  *
3192  * @ingroup SYMV
3193  */
3194 clblasStatus
3195 clblasDsymv(
3196     clblasOrder order,
3197     clblasUplo uplo,
3198     size_t N,
3199     cl_double alpha,
3200     const cl_mem A,
3201     size_t offA,
3202     size_t lda,
3203     const cl_mem x,
3204     size_t offx,
3205     int incx,
3206     cl_double beta,
3207     cl_mem y,
3208     size_t offy,
3209     int incy,
3210     cl_uint numCommandQueues,
3211     cl_command_queue *commandQueues,
3212     cl_uint numEventsInWaitList,
3213     const cl_event *eventWaitList,
3214     cl_event *events);
3215 
3216 /*@}*/
3217 
3218 
3219 /**
3220  * @defgroup HEMV HEMV  - Hermitian matrix-vector multiplication
3221  * @ingroup BLAS2
3222  */
3223 /*@{*/
3224 
3225 /**
3226  * @brief Matrix-vector product with a hermitian matrix and float-complex elements.
3227  *
3228  * Matrix-vector products:
3229  * - \f$ Y \leftarrow \alpha A X + \beta Y \f$
3230  *
3231  * @param[in] order     Row/columns order.
3232  * @param[in] uplo      The triangle in matrix being referenced.
3233  * @param[in] N         Number of rows and columns in matrix \b A.
3234  * @param[in] alpha     The factor of matrix \b A.
3235  * @param[in] A         Buffer object storing matrix \b A.
3236  * @param[in] offa		Offset in number of elements for first element in matrix \b A.
3237  * @param[in] lda       Leading dimension of matrix \b A. It cannot less
3238  *                      than \b N.
3239  * @param[in] X         Buffer object storing vector \b X.
3240  * @param[in] offx      Offset of first element of vector \b X in buffer object.
3241  *                      Counted in elements.
3242  * @param[in] incx      Increment for the elements of vector \b X. It cannot be zero.
3243  * @param[in] beta      The factor of vector \b Y.
3244  * @param[out] Y        Buffer object storing vector \b Y.
3245  * @param[in] offy      Offset of first element of vector \b Y in buffer object.
3246  *                      Counted in elements.
3247  * @param[in] incy      Increment for the elements of vector \b Y. It cannot be zero.
3248  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
3249  *                                task is to be performed.
3250  * @param[in] commandQueues       OpenCL command queues.
3251  * @param[in] numEventsInWaitList Number of events in the event wait list.
3252  * @param[in] eventWaitList       Event wait list.
3253  * @param[in] events     Event objects per each command queue that identify
3254  *                       a particular kernel execution instance.
3255  *
3256  * @return
3257  *   - \b clblasSuccess on success;
3258  *   - \b clblasNotInitialized if clblasSetup() was not called;
3259  *   - \b clblasInvalidValue if invalid parameters are passed:
3260  *     - \b N is zero, or
3261  *     - either \b incx or \b incy is zero, or
3262  *     - any of the leading dimensions is invalid;
3263  *     - the matrix sizes or the vector sizes along with the increments lead to
3264  *       accessing outsize of any of the buffers;
3265  *   - \b clblasInvalidMemObject if either \b A, \b X, or \b Y object is
3266  *     invalid, or an image object rather than the buffer one;
3267  *   - \b clblasOutOfHostMemory if the library can't allocate memory for
3268  *     internal structures;
3269  *   - \b clblasInvalidCommandQueue if the passed command queue is invalid;
3270  *   - \b clblasInvalidContext if a context a passed command queue belongs to
3271  *     was released;
3272  *   - \b clblasInvalidOperation if kernel compilation relating to a previous
3273  *     call has not completed for any of the target devices;
3274  *   - \b clblasCompilerNotAvailable if a compiler is not available;
3275  *   - \b clblasBuildProgramFailure if there is a failure to build a program
3276  *     executable.
3277  *
3278  * @ingroup HEMV
3279  */
3280 clblasStatus
3281 clblasChemv(
3282     clblasOrder order,
3283     clblasUplo uplo,
3284     size_t N,
3285     FloatComplex alpha,
3286     const cl_mem A,
3287     size_t offa,
3288     size_t lda,
3289     const cl_mem X,
3290     size_t offx,
3291     int incx,
3292     FloatComplex beta,
3293     cl_mem Y,
3294     size_t offy,
3295     int incy,
3296     cl_uint numCommandQueues,
3297     cl_command_queue *commandQueues,
3298     cl_uint numEventsInWaitList,
3299     const cl_event *eventWaitList,
3300     cl_event *events);
3301 
3302 /**
3303  * @brief Matrix-vector product with a hermitian matrix and double-complex elements.
3304  *
3305  * Matrix-vector products:
3306  * - \f$ Y \leftarrow \alpha A X + \beta Y \f$
3307  *
3308  * @param[in] order     Row/columns order.
3309  * @param[in] uplo      The triangle in matrix being referenced.
3310  * @param[in] N         Number of rows and columns in matrix \b A.
3311  * @param[in] alpha     The factor of matrix \b A.
3312  * @param[in] A         Buffer object storing matrix \b A.
3313  * @param[in] offa		Offset in number of elements for first element in matrix \b A.
3314  * @param[in] lda       Leading dimension of matrix \b A. It cannot less
3315  *                      than \b N.
3316  * @param[in] X         Buffer object storing vector \b X.
3317  * @param[in] offx      Offset of first element of vector \b X in buffer object.
3318  *                      Counted in elements.
3319  * @param[in] incx      Increment for the elements of vector \b X. It cannot be zero.
3320  * @param[in] beta      The factor of vector \b Y.
3321  * @param[out] Y        Buffer object storing vector \b Y.
3322  * @param[in] offy      Offset of first element of vector \b Y in buffer object.
3323  *                      Counted in elements.
3324  * @param[in] incy      Increment for the elements of vector \b Y. It cannot be zero.
3325  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
3326  *                                task is to be performed.
3327  * @param[in] commandQueues       OpenCL command queues.
3328  * @param[in] numEventsInWaitList Number of events in the event wait list.
3329  * @param[in] eventWaitList       Event wait list.
3330  * @param[in] events     Event objects per each command queue that identify
3331  *                       a particular kernel execution instance.
3332  *
3333  * @return
3334  *   - \b clblasSuccess on success;
3335  *   - \b clblasInvalidDevice if a target device does not support floating
3336  *     point arithmetic with double precision;
3337  *   - the same error codes as the clblasChemv() function otherwise.
3338  *
3339  * @ingroup HEMV
3340  */
3341 clblasStatus
3342 clblasZhemv(
3343     clblasOrder order,
3344     clblasUplo uplo,
3345     size_t N,
3346     DoubleComplex alpha,
3347     const cl_mem A,
3348     size_t offa,
3349     size_t lda,
3350     const cl_mem X,
3351     size_t offx,
3352     int incx,
3353     DoubleComplex beta,
3354     cl_mem Y,
3355     size_t offy,
3356     int incy,
3357     cl_uint numCommandQueues,
3358     cl_command_queue *commandQueues,
3359     cl_uint numEventsInWaitList,
3360     const cl_event *eventWaitList,
3361     cl_event *events);
3362 /**
3363  * @example example_zhemv.cpp
3364  * Example of how to use the @ref clblasZhemv function.
3365  */
3366 /*@}*/
3367 
3368 
3369 
3370 /**
3371  * @defgroup TRMV TRMV  - Triangular matrix vector multiply
3372  * @ingroup BLAS2
3373  */
3374 /*@{*/
3375 
3376 /**
3377  * @brief Matrix-vector product with a triangular matrix and
3378  * float elements.
3379  *
3380  * Matrix-vector products:
3381  *   - \f$ X \leftarrow  A X \f$
3382  *   - \f$ X \leftarrow  A^T X \f$
3383  *
3384  * @param[in] order				Row/column order.
3385  * @param[in] uplo				The triangle in matrix being referenced.
3386  * @param[in] trans				How matrix \b A is to be transposed.
3387  * @param[in] diag				Specify whether matrix \b A is unit triangular.
3388  * @param[in] N					Number of rows/columns in matrix \b A.
3389  * @param[in] A					Buffer object storing matrix \b A.
3390  * @param[in] offa				Offset in number of elements for first element in matrix \b A.
3391  * @param[in] lda				Leading dimension of matrix \b A. It cannot be less
3392  *								than \b N
3393  * @param[out] X				Buffer object storing vector \b X.
3394  * @param[in] offx				Offset in number of elements for first element in vector \b X.
3395  * @param[in] incx				Increment for the elements of \b X. Must not be zero.
3396  * @param[in] scratchBuff		Temporary cl_mem scratch buffer object which can hold a
3397  *								minimum of (1 + (N-1)*abs(incx)) elements
3398  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
3399  *                                task is to be performed.
3400  * @param[in] commandQueues       OpenCL command queues.
3401  * @param[in] numEventsInWaitList Number of events in the event wait list.
3402  * @param[in] eventWaitList       Event wait list.
3403  * @param[in] events     Event objects per each command queue that identify
3404  *                       a particular kernel execution instance.
3405  *
3406  * @return
3407  *   - \b clblasSuccess on success;
3408  *   - \b clblasNotInitialized if clblasSetup() was not called;
3409  *   - \b clblasInvalidValue if invalid parameters are passed:
3410  *     - either \b N or \b incx is zero, or
3411  *     - the leading dimension is invalid;
3412  *   - \b clblasInvalidMemObject if either \b A or \b X object is
3413  *     Invalid, or an image object rather than the buffer one;
3414  *   - \b clblasOutOfHostMemory if the library can't allocate memory for
3415  *     internal structures;
3416  *   - \b clblasInvalidCommandQueue if the passed command queue is invalid;
3417  *   - \b clblasInvalidContext if a context a passed command queue belongs
3418  *     to was released;
3419  *   - \b clblasInvalidOperation if kernel compilation relating to a previous
3420  *     call has not completed for any of the target devices;
3421  *   - \b clblasCompilerNotAvailable if a compiler is not available;
3422  *   - \b clblasBuildProgramFailure if there is a failure to build a program
3423  *     executable.
3424  *
3425  * @ingroup TRMV
3426  */
3427 clblasStatus
3428 clblasStrmv(
3429     clblasOrder order,
3430     clblasUplo uplo,
3431     clblasTranspose trans,
3432     clblasDiag diag,
3433     size_t N,
3434     const cl_mem A,
3435     size_t offa,
3436     size_t lda,
3437     cl_mem X,
3438     size_t offx,
3439     int incx,
3440 	cl_mem scratchBuff,
3441     cl_uint numCommandQueues,
3442     cl_command_queue *commandQueues,
3443     cl_uint numEventsInWaitList,
3444     const cl_event *eventWaitList,
3445     cl_event *events);
3446 
3447 /**
3448  * @example example_strmv.c
3449  * Example of how to use the @ref clblasStrmv function.
3450  */
3451 
3452 /**
3453  * @brief Matrix-vector product with a triangular matrix and
3454  * double elements.
3455  *
3456  * Matrix-vector products:
3457  *   - \f$ X \leftarrow  A X \f$
3458  *   - \f$ X \leftarrow  A^T X \f$
3459  *
3460  * @param[in] order				Row/column order.
3461  * @param[in] uplo				The triangle in matrix being referenced.
3462  * @param[in] trans				How matrix \b A is to be transposed.
3463  * @param[in] diag				Specify whether matrix \b A is unit triangular.
3464  * @param[in] N					Number of rows/columns in matrix \b A.
3465  * @param[in] A					Buffer object storing matrix \b A.
3466  * @param[in] offa				Offset in number of elements for first element in matrix \b A.
3467  * @param[in] lda				Leading dimension of matrix \b A. It cannot be less
3468  *								than \b N
3469  * @param[out] X				Buffer object storing vector \b X.
3470  * @param[in] offx				Offset in number of elements for first element in vector \b X.
3471  * @param[in] incx				Increment for the elements of \b X. Must not be zero.
3472  * @param[in] scratchBuff		Temporary cl_mem scratch buffer object which can hold a
3473  *								minimum of (1 + (N-1)*abs(incx)) elements
3474  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
3475  *                                task is to be performed.
3476  * @param[in] commandQueues       OpenCL command queues.
3477  * @param[in] numEventsInWaitList Number of events in the event wait list.
3478  * @param[in] eventWaitList       Event wait list.
3479  * @param[in] events     Event objects per each command queue that identify
3480  *                       a particular kernel execution instance.
3481  *
3482  * @return
3483  *   - \b clblasSuccess on success;
3484  *   - \b clblasInvalidDevice if a target device does not support floating
3485  *     point arithmetic with double precision;
3486  *   - the same error codes as the clblasStrmv() function otherwise.
3487  *
3488  * @ingroup TRMV
3489  */
3490 clblasStatus
3491 clblasDtrmv(
3492     clblasOrder order,
3493     clblasUplo uplo,
3494     clblasTranspose trans,
3495     clblasDiag diag,
3496     size_t N,
3497     const cl_mem A,
3498     size_t offa,
3499     size_t lda,
3500     cl_mem X,
3501     size_t offx,
3502     int incx,
3503 	cl_mem scratchBuff,
3504     cl_uint numCommandQueues,
3505     cl_command_queue *commandQueues,
3506     cl_uint numEventsInWaitList,
3507     const cl_event *eventWaitList,
3508     cl_event *events);
3509 
3510 /**
3511  * @brief Matrix-vector product with a triangular matrix and
3512  * float complex elements.
3513  *
3514  * Matrix-vector products:
3515  *   - \f$ X \leftarrow  A X \f$
3516  *   - \f$ X \leftarrow  A^T X \f$
3517  *
3518  * @param[in] order				Row/column order.
3519  * @param[in] uplo				The triangle in matrix being referenced.
3520  * @param[in] trans				How matrix \b A is to be transposed.
3521  * @param[in] diag				Specify whether matrix \b A is unit triangular.
3522  * @param[in] N					Number of rows/columns in matrix \b A.
3523  * @param[in] A					Buffer object storing matrix \b A.
3524  * @param[in] offa				Offset in number of elements for first element in matrix \b A.
3525  * @param[in] lda				Leading dimension of matrix \b A. It cannot be less
3526  *								than \b N
3527  * @param[out] X				Buffer object storing vector \b X.
3528  * @param[in] offx				Offset in number of elements for first element in vector \b X.
3529  * @param[in] incx				Increment for the elements of \b X. Must not be zero.
3530  * @param[in] scratchBuff		Temporary cl_mem scratch buffer object which can hold a
3531  *								minimum of (1 + (N-1)*abs(incx)) elements
3532  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
3533  *                                task is to be performed.
3534  * @param[in] commandQueues       OpenCL command queues.
3535  * @param[in] numEventsInWaitList Number of events in the event wait list.
3536  * @param[in] eventWaitList       Event wait list.
3537  * @param[in] events     Event objects per each command queue that identify
3538  *                       a particular kernel execution instance.
3539  *
3540  * @return The same result as the clblasStrmv() function.
3541  * @ingroup TRMV
3542  */
3543 clblasStatus
3544 clblasCtrmv(
3545     clblasOrder order,
3546     clblasUplo uplo,
3547     clblasTranspose trans,
3548     clblasDiag diag,
3549     size_t N,
3550     const cl_mem A,
3551     size_t offa,
3552     size_t lda,
3553     cl_mem X,
3554     size_t offx,
3555     int incx,
3556 	cl_mem scratchBuff,
3557     cl_uint numCommandQueues,
3558     cl_command_queue *commandQueues,
3559     cl_uint numEventsInWaitList,
3560     const cl_event *eventWaitList,
3561     cl_event *events);
3562 
3563 /**
3564  * @brief Matrix-vector product with a triangular matrix and
3565  * double complex elements.
3566  *
3567  * Matrix-vector products:
3568  *   - \f$ X \leftarrow  A X \f$
3569  *   - \f$ X \leftarrow  A^T X \f$
3570  *
3571  * @param[in] order				Row/column order.
3572  * @param[in] uplo				The triangle in matrix being referenced.
3573  * @param[in] trans				How matrix \b A is to be transposed.
3574  * @param[in] diag				Specify whether matrix \b A is unit triangular.
3575  * @param[in] N					Number of rows/columns in matrix \b A.
3576  * @param[in] A					Buffer object storing matrix \b A.
3577  * @param[in] offa				Offset in number of elements for first element in matrix \b A.
3578  * @param[in] lda				Leading dimension of matrix \b A. It cannot be less
3579  *								than \b N
3580  * @param[out] X				Buffer object storing vector \b X.
3581  * @param[in] offx				Offset in number of elements for first element in vector \b X.
3582  * @param[in] incx				Increment for the elements of \b X. Must not be zero.
3583  * @param[in] scratchBuff		Temporary cl_mem scratch buffer object which can hold a
3584  *								minimum of (1 + (N-1)*abs(incx)) elements
3585  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
3586  *                                task is to be performed.
3587  * @param[in] commandQueues       OpenCL command queues.
3588  * @param[in] numEventsInWaitList Number of events in the event wait list.
3589  * @param[in] eventWaitList       Event wait list.
3590  * @param[in] events     Event objects per each command queue that identify
3591  *                       a particular kernel execution instance.
3592  *
3593  * @return The same result as the clblasDtrmv() function.
3594  * @ingroup TRMV
3595  */
3596 clblasStatus
3597 clblasZtrmv(
3598     clblasOrder order,
3599     clblasUplo uplo,
3600     clblasTranspose trans,
3601     clblasDiag diag,
3602     size_t N,
3603     const cl_mem A,
3604     size_t offa,
3605     size_t lda,
3606     cl_mem X,
3607     size_t offx,
3608     int incx,
3609 	cl_mem scratchBuff,
3610     cl_uint numCommandQueues,
3611     cl_command_queue *commandQueues,
3612     cl_uint numEventsInWaitList,
3613     const cl_event *eventWaitList,
3614     cl_event *events);
3615 
3616 
3617 /*@}*/
3618 
3619 /**
3620  * @defgroup TRSV TRSV  - Triangular matrix vector Solve
3621  * @ingroup BLAS2
3622  */
3623 /*@{*/
3624 
3625 /**
3626  * @brief solving triangular matrix problems with float elements.
3627  *
3628  * Matrix-vector products:
3629  *   - \f$ A X \leftarrow  X \f$
3630  *   - \f$ A^T X \leftarrow  X \f$
3631  *
3632  * @param[in] order				Row/column order.
3633  * @param[in] uplo				The triangle in matrix being referenced.
3634  * @param[in] trans				How matrix \b A is to be transposed.
3635  * @param[in] diag				Specify whether matrix \b A is unit triangular.
3636  * @param[in] N					Number of rows/columns in matrix \b A.
3637  * @param[in] A					Buffer object storing matrix \b A.
3638  * @param[in] offa				Offset in number of elements for first element in matrix \b A.
3639  * @param[in] lda				Leading dimension of matrix \b A. It cannot be less
3640  *								than \b N
3641  * @param[out] X				Buffer object storing vector \b X.
3642  * @param[in] offx				Offset in number of elements for first element in vector \b X.
3643  * @param[in] incx				Increment for the elements of \b X. Must not be zero.
3644  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
3645  *                                task is to be performed.
3646  * @param[in] commandQueues       OpenCL command queues.
3647  * @param[in] numEventsInWaitList Number of events in the event wait list.
3648  * @param[in] eventWaitList       Event wait list.
3649  * @param[in] events     Event objects per each command queue that identify
3650  *                       a particular kernel execution instance.
3651  *
3652  * @return
3653  *   - \b clblasSuccess on success;
3654  *   - \b clblasNotInitialized if clblasSetup() was not called;
3655  *   - \b clblasInvalidValue if invalid parameters are passed:
3656  *     - either \b N or \b incx is zero, or
3657  *     - the leading dimension is invalid;
3658  *   - \b clblasInvalidMemObject if either \b A or \b X object is
3659  *     Invalid, or an image object rather than the buffer one;
3660  *   - \b clblasOutOfHostMemory if the library can't allocate memory for
3661  *     internal structures;
3662  *   - \b clblasInvalidCommandQueue if the passed command queue is invalid;
3663  *   - \b clblasInvalidContext if a context a passed command queue belongs
3664  *     to was released;
3665  *   - \b clblasInvalidOperation if kernel compilation relating to a previous
3666  *     call has not completed for any of the target devices;
3667  *   - \b clblasCompilerNotAvailable if a compiler is not available;
3668  *   - \b clblasBuildProgramFailure if there is a failure to build a program
3669  *     executable.
3670  *
3671  * @ingroup TRSV
3672  */
3673 clblasStatus
3674 clblasStrsv(
3675     clblasOrder order,
3676     clblasUplo uplo,
3677     clblasTranspose trans,
3678     clblasDiag diag,
3679     size_t N,
3680     const cl_mem A,
3681     size_t offa,
3682     size_t lda,
3683     cl_mem X,
3684     size_t offx,
3685     int incx,
3686     cl_uint numCommandQueues,
3687     cl_command_queue *commandQueues,
3688     cl_uint numEventsInWaitList,
3689     const cl_event *eventWaitList,
3690     cl_event *events);
3691 
3692 /**
3693  * @example example_strsv.c
3694  * Example of how to use the @ref clblasStrsv function.
3695  */
3696 
3697 
3698 /**
3699  * @brief solving triangular matrix problems with double elements.
3700  *
3701  * Matrix-vector products:
3702  *   - \f$ A X \leftarrow  X \f$
3703  *   - \f$ A^T X \leftarrow  X \f$
3704  *
3705  * @param[in] order				Row/column order.
3706  * @param[in] uplo				The triangle in matrix being referenced.
3707  * @param[in] trans				How matrix \b A is to be transposed.
3708  * @param[in] diag				Specify whether matrix \b A is unit triangular.
3709  * @param[in] N					Number of rows/columns in matrix \b A.
3710  * @param[in] A					Buffer object storing matrix \b A.
3711  * @param[in] offa				Offset in number of elements for first element in matrix \b A.
3712  * @param[in] lda				Leading dimension of matrix \b A. It cannot be less
3713  *								than \b N
3714  * @param[out] X				Buffer object storing vector \b X.
3715  * @param[in] offx				Offset in number of elements for first element in vector \b X.
3716  * @param[in] incx				Increment for the elements of \b X. Must not be zero.
3717  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
3718  *                                task is to be performed.
3719  * @param[in] commandQueues       OpenCL command queues.
3720  * @param[in] numEventsInWaitList Number of events in the event wait list.
3721  * @param[in] eventWaitList       Event wait list.
3722  * @param[in] events     Event objects per each command queue that identify
3723  *                       a particular kernel execution instance.
3724  *
3725  * @return
3726  *   - \b clblasSuccess on success;
3727  *   - \b clblasInvalidDevice if a target device does not support floating
3728  *     point arithmetic with double precision;
3729  *   - the same error codes as the clblasStrsv() function otherwise.
3730  *
3731  * @ingroup TRSV
3732  */
3733 clblasStatus
3734 clblasDtrsv(
3735     clblasOrder order,
3736     clblasUplo uplo,
3737     clblasTranspose trans,
3738     clblasDiag diag,
3739     size_t N,
3740     const cl_mem A,
3741     size_t offa,
3742     size_t lda,
3743     cl_mem X,
3744     size_t offx,
3745     int incx,
3746     cl_uint numCommandQueues,
3747     cl_command_queue *commandQueues,
3748     cl_uint numEventsInWaitList,
3749     const cl_event *eventWaitList,
3750     cl_event *events);
3751 
3752 
3753 /**
3754  * @brief solving triangular matrix problems with float-complex elements.
3755  *
3756  * Matrix-vector products:
3757  *   - \f$ A X \leftarrow  X \f$
3758  *   - \f$ A^T X \leftarrow  X \f$
3759  *
3760  * @param[in] order				Row/column order.
3761  * @param[in] uplo				The triangle in matrix being referenced.
3762  * @param[in] trans				How matrix \b A is to be transposed.
3763  * @param[in] diag				Specify whether matrix \b A is unit triangular.
3764  * @param[in] N					Number of rows/columns in matrix \b A.
3765  * @param[in] A					Buffer object storing matrix \b A.
3766  * @param[in] offa				Offset in number of elements for first element in matrix \b A.
3767  * @param[in] lda				Leading dimension of matrix \b A. It cannot be less
3768  *								than \b N
3769  * @param[out] X				Buffer object storing vector \b X.
3770  * @param[in] offx				Offset in number of elements for first element in vector \b X.
3771  * @param[in] incx				Increment for the elements of \b X. Must not be zero.
3772  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
3773  *                                task is to be performed.
3774  * @param[in] commandQueues       OpenCL command queues.
3775  * @param[in] numEventsInWaitList Number of events in the event wait list.
3776  * @param[in] eventWaitList       Event wait list.
3777  * @param[in] events     Event objects per each command queue that identify
3778  *                       a particular kernel execution instance.
3779  *
3780  * @return The same result as the clblasStrsv() function.
3781  *
3782  * @ingroup TRSV
3783  */
3784 clblasStatus
3785 clblasCtrsv(
3786     clblasOrder order,
3787     clblasUplo uplo,
3788     clblasTranspose trans,
3789     clblasDiag diag,
3790     size_t N,
3791     const cl_mem A,
3792     size_t offa,
3793     size_t lda,
3794     cl_mem X,
3795     size_t offx,
3796     int incx,
3797     cl_uint numCommandQueues,
3798     cl_command_queue *commandQueues,
3799     cl_uint numEventsInWaitList,
3800     const cl_event *eventWaitList,
3801     cl_event *events);
3802 
3803 
3804 /**
3805  * @brief solving triangular matrix problems with double-complex elements.
3806  *
3807  * Matrix-vector products:
3808  *   - \f$ A X \leftarrow  X \f$
3809  *   - \f$ A^T X \leftarrow  X \f$
3810  *
3811  * @param[in] order				Row/column order.
3812  * @param[in] uplo				The triangle in matrix being referenced.
3813  * @param[in] trans				How matrix \b A is to be transposed.
3814  * @param[in] diag				Specify whether matrix \b A is unit triangular.
3815  * @param[in] N					Number of rows/columns in matrix \b A.
3816  * @param[in] A					Buffer object storing matrix \b A.
3817  * @param[in] offa				Offset in number of elements for first element in matrix \b A.
3818  * @param[in] lda				Leading dimension of matrix \b A. It cannot be less
3819  *								than \b N
3820  * @param[out] X				Buffer object storing vector \b X.
3821  * @param[in] offx				Offset in number of elements for first element in vector \b X.
3822  * @param[in] incx				Increment for the elements of \b X. Must not be zero.
3823  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
3824  *                                task is to be performed.
3825  * @param[in] commandQueues       OpenCL command queues.
3826  * @param[in] numEventsInWaitList Number of events in the event wait list.
3827  * @param[in] eventWaitList       Event wait list.
3828  * @param[in] events     Event objects per each command queue that identify
3829  *                       a particular kernel execution instance.
3830  *
3831  * @return The same result as the clblasDtrsv() function.
3832  *
3833  * @ingroup TRSV
3834  */
3835 clblasStatus
3836 clblasZtrsv(
3837     clblasOrder order,
3838     clblasUplo uplo,
3839     clblasTranspose trans,
3840     clblasDiag diag,
3841     size_t N,
3842     const cl_mem A,
3843     size_t offa,
3844     size_t lda,
3845     cl_mem X,
3846     size_t offx,
3847     int incx,
3848     cl_uint numCommandQueues,
3849     cl_command_queue *commandQueues,
3850     cl_uint numEventsInWaitList,
3851     const cl_event *eventWaitList,
3852     cl_event *events);
3853 
3854 /*@}*/
3855 
3856 /**
3857  * @defgroup GER GER   - General matrix rank 1 operation
3858  * @ingroup BLAS2
3859  */
3860 /*@{*/
3861 
3862 /**
3863  * @brief vector-vector product with float elements and
3864  * performs the rank 1 operation A
3865  *
3866  * Vector-vector products:
3867  *   - \f$ A \leftarrow \alpha X Y^T + A \f$
3868  *
3869  * @param[in] order     Row/column order.
3870  * @param[in] M         Number of rows in matrix \b A.
3871  * @param[in] N         Number of columns in matrix \b A.
3872  * @param[in] alpha     specifies the scalar alpha.
3873  * @param[in] X         Buffer object storing vector \b X.
3874  * @param[in] offx      Offset in number of elements for the first element in vector \b X.
3875  * @param[in] incx      Increment for the elements of \b X. Must not be zero.
3876  * @param[in] Y         Buffer object storing vector \b Y.
3877  * @param[in] offy      Offset in number of elements for the first element in vector \b Y.
3878  * @param[in] incy      Increment for the elements of \b Y. Must not be zero.
3879  * @param[out] A 		Buffer object storing matrix \b A. On exit, A is
3880  *				        overwritten by the updated matrix.
3881  * @param[in] offa      Offset in number of elements for the first element in matrix \b A.
3882  * @param[in] lda       Leading dimension of matrix \b A. It cannot be less
3883  *                      than \b N when the \b order parameter is set to
3884  *                      \b clblasRowMajor,\n or less than \b M when the
3885  *                      parameter is set to \b clblasColumnMajor.
3886  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
3887  *                                task is to be performed.
3888  * @param[in] commandQueues       OpenCL command queues.
3889  * @param[in] numEventsInWaitList Number of events in the event wait list.
3890  * @param[in] eventWaitList       Event wait list.
3891  * @param[in] events     Event objects per each command queue that identify
3892  *                       a particular kernel execution instance.
3893  *
3894  * @return
3895  *   - \b clblasSuccess on success;
3896  *   - \b clblasNotInitialized if clblasSetup() was not called;
3897  *   - \b clblasInvalidValue if invalid parameters are passed:
3898  *     - \b M, \b N or
3899  *	   - either \b incx or \b incy is zero, or
3900  *     - a leading dimension is invalid;
3901  *   - \b clblasInvalidMemObject if A, X, or Y object is invalid,
3902  *     or an image object rather than the buffer one;
3903  *   - \b clblasOutOfResources if you use image-based function implementation
3904  *     and no suitable scratch image available;
3905  *   - \b clblasOutOfHostMemory if the library can't allocate memory for
3906  *     internal structures;
3907  *   - \b clblasInvalidCommandQueue if the passed command queue is invalid;
3908  *   - \b clblasInvalidContext if a context a passed command queue belongs to
3909  *     was released;
3910  *   - \b clblasInvalidOperation if kernel compilation relating to a previous
3911  *     call has not completed for any of the target devices;
3912  *   - \b clblasCompilerNotAvailable if a compiler is not available;
3913  *   - \b clblasBuildProgramFailure if there is a failure to build a program
3914  *     executable.
3915  *
3916  * @ingroup GER
3917  */
3918 clblasStatus
3919 clblasSger(
3920     clblasOrder order,
3921     size_t M,
3922     size_t N,
3923     cl_float alpha,
3924     const cl_mem X,
3925     size_t offx,
3926     int incx,
3927     const cl_mem Y,
3928     size_t offy,
3929     int incy,
3930     cl_mem A,
3931     size_t offa,
3932     size_t lda,
3933     cl_uint numCommandQueues,
3934     cl_command_queue *commandQueues,
3935     cl_uint numEventsInWaitList,
3936     const cl_event *eventWaitList,
3937     cl_event *events);
3938 
3939 /**
3940  * @example example_sger.c
3941  * Example of how to use the @ref clblasSger function.
3942  */
3943 
3944 
3945 /**
3946  * @brief vector-vector product with double elements and
3947  * performs the rank 1 operation A
3948  *
3949  * Vector-vector products:
3950  *   - \f$ A \leftarrow \alpha X Y^T + A \f$
3951  *
3952  * @param[in] order     Row/column order.
3953  * @param[in] M         Number of rows in matrix \b A.
3954  * @param[in] N         Number of columns in matrix \b A.
3955  * @param[in] alpha     specifies the scalar alpha.
3956  * @param[in] X         Buffer object storing vector \b X.
3957  * @param[in] offx      Offset in number of elements for the first element in vector \b X.
3958  * @param[in] incx      Increment for the elements of \b X. Must not be zero.
3959  * @param[in] Y         Buffer object storing vector \b Y.
3960  * @param[in] offy      Offset in number of elements for the first element in vector \b Y.
3961  * @param[in] incy      Increment for the elements of \b Y. Must not be zero.
3962  * @param[out] A 		Buffer object storing matrix \b A. On exit, A is
3963  *				        overwritten by the updated matrix.
3964  * @param[in] offa      Offset in number of elements for the first element in matrix \b A.
3965  * @param[in] lda       Leading dimension of matrix \b A. It cannot be less
3966  *                      than \b N when the \b order parameter is set to
3967  *                      \b clblasRowMajor,\n or less than \b M when the
3968  *                      parameter is set to \b clblasColumnMajor.
3969  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
3970  *                                task is to be performed.
3971  * @param[in] commandQueues       OpenCL command queues.
3972  * @param[in] numEventsInWaitList Number of events in the event wait list.
3973  * @param[in] eventWaitList       Event wait list.
3974  * @param[in] events     Event objects per each command queue that identify
3975  *                       a particular kernel execution instance.
3976  *
3977  * @return
3978  *   - \b clblasSuccess on success;
3979  *   - \b clblasInvalidDevice if a target device does not support floating
3980  *     point arithmetic with double precision;
3981  *   - the same error codes as the clblasSger() function otherwise.
3982  *
3983  * @ingroup GER
3984  */
3985 clblasStatus
3986 clblasDger(
3987     clblasOrder order,
3988     size_t M,
3989     size_t N,
3990     cl_double alpha,
3991     const cl_mem X,
3992     size_t offx,
3993     int incx,
3994     const cl_mem Y,
3995     size_t offy,
3996     int incy,
3997     cl_mem A,
3998     size_t offa,
3999     size_t lda,
4000     cl_uint numCommandQueues,
4001     cl_command_queue *commandQueues,
4002     cl_uint numEventsInWaitList,
4003     const cl_event *eventWaitList,
4004     cl_event *events);
4005 /*@}*/
4006 
4007 /**
4008  * @defgroup GERU GERU  - General matrix rank 1 operation
4009  * @ingroup BLAS2
4010  */
4011 /*@{*/
4012 
4013 /**
4014  * @brief vector-vector product with float complex elements and
4015  * performs the rank 1 operation A
4016  *
4017  * Vector-vector products:
4018  *   - \f$ A \leftarrow \alpha X Y^T + A \f$
4019  *
4020  * @param[in] order     Row/column order.
4021  * @param[in] M         Number of rows in matrix \b A.
4022  * @param[in] N         Number of columns in matrix \b A.
4023  * @param[in] alpha     specifies the scalar alpha.
4024  * @param[in] X         Buffer object storing vector \b X.
4025  * @param[in] offx      Offset in number of elements for the first element in vector \b X.
4026  * @param[in] incx      Increment for the elements of \b X. Must not be zero.
4027  * @param[in] Y         Buffer object storing vector \b Y.
4028  * @param[in] offy      Offset in number of elements for the first element in vector \b Y.
4029  * @param[in] incy      Increment for the elements of \b Y. Must not be zero.
4030  * @param[out] A 		Buffer object storing matrix \b A. On exit, A is
4031  *				        overwritten by the updated matrix.
4032  * @param[in] offa      Offset in number of elements for the first element in matrix \b A.
4033  * @param[in] lda       Leading dimension of matrix \b A. It cannot be less
4034  *                      than \b N when the \b order parameter is set to
4035  *                      \b clblasRowMajor,\n or less than \b M when the
4036  *                      parameter is set to \b clblasColumnMajor.
4037  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
4038  *                                task is to be performed.
4039  * @param[in] commandQueues       OpenCL command queues.
4040  * @param[in] numEventsInWaitList Number of events in the event wait list.
4041  * @param[in] eventWaitList       Event wait list.
4042  * @param[in] events     Event objects per each command queue that identify
4043  *                       a particular kernel execution instance.
4044  *
4045  * @return
4046  *   - \b clblasSuccess on success;
4047  *   - \b clblasNotInitialized if clblasSetup() was not called;
4048  *   - \b clblasInvalidValue if invalid parameters are passed:
4049  *     - \b M, \b N or
4050  *	   - either \b incx or \b incy is zero, or
4051  *     - a leading dimension is invalid;
4052  *   - \b clblasInvalidMemObject if A, X, or Y object is invalid,
4053  *     or an image object rather than the buffer one;
4054  *   - \b clblasOutOfResources if you use image-based function implementation
4055  *     and no suitable scratch image available;
4056  *   - \b clblasOutOfHostMemory if the library can't allocate memory for
4057  *     internal structures;
4058  *   - \b clblasInvalidCommandQueue if the passed command queue is invalid;
4059  *   - \b clblasInvalidContext if a context a passed command queue belongs to
4060  *     was released;
4061  *   - \b clblasInvalidOperation if kernel compilation relating to a previous
4062  *     call has not completed for any of the target devices;
4063  *   - \b clblasCompilerNotAvailable if a compiler is not available;
4064  *   - \b clblasBuildProgramFailure if there is a failure to build a program
4065  *     executable.
4066  *
4067  * @ingroup GERU
4068  */
4069 clblasStatus
4070 clblasCgeru(
4071     clblasOrder order,
4072     size_t M,
4073     size_t N,
4074     cl_float2 alpha,
4075     const cl_mem X,
4076     size_t offx,
4077     int incx,
4078     const cl_mem Y,
4079     size_t offy,
4080     int incy,
4081     cl_mem A ,
4082     size_t offa,
4083     size_t lda,
4084     cl_uint numCommandQueues,
4085     cl_command_queue *commandQueues,
4086     cl_uint numEventsInWaitList,
4087     const cl_event *eventWaitList,
4088     cl_event *events);
4089 
4090 /**
4091  * @brief vector-vector product with double complex elements and
4092  * performs the rank 1 operation A
4093  *
4094  * Vector-vector products:
4095  *   - \f$ A \leftarrow \alpha X Y^T + A \f$
4096  *
4097  * @param[in] order     Row/column order.
4098  * @param[in] M         Number of rows in matrix \b A.
4099  * @param[in] N         Number of columns in matrix \b A.
4100  * @param[in] alpha     specifies the scalar alpha.
4101  * @param[in] X         Buffer object storing vector \b X.
4102  * @param[in] offx      Offset in number of elements for the first element in vector \b X.
4103  * @param[in] incx      Increment for the elements of \b X. Must not be zero.
4104  * @param[in] Y         Buffer object storing vector \b Y.
4105  * @param[in] offy      Offset in number of elements for the first element in vector \b Y.
4106  * @param[in] incy      Increment for the elements of \b Y. Must not be zero.
4107  * @param[out] A		   Buffer object storing matrix \b A. On exit, A is
4108  *				        overwritten by the updated matrix.
4109  * @param[in] offa      Offset in number of elements for the first element in matrix \b A.
4110  * @param[in] lda       Leading dimension of matrix \b A. It cannot be less
4111  *                      than \b N when the \b order parameter is set to
4112  *                      \b clblasRowMajor,\n or less than \b M when the
4113  *                      parameter is set to \b clblasColumnMajor.
4114  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
4115  *                                task is to be performed.
4116  * @param[in] commandQueues       OpenCL command queues.
4117  * @param[in] numEventsInWaitList Number of events in the event wait list.
4118  * @param[in] eventWaitList       Event wait list.
4119  * @param[in] events     Event objects per each command queue that identify
4120  *                       a particular kernel execution instance.
4121  *
4122  * @return
4123  *   - \b clblasSuccess on success;
4124  *   - \b clblasInvalidDevice if a target device does not support floating
4125  *     point arithmetic with double precision;
4126  *   - the same error codes as the clblasCgeru() function otherwise.
4127  *
4128  * @ingroup GERU
4129  */
4130 clblasStatus
4131 clblasZgeru(
4132     clblasOrder order,
4133     size_t M,
4134     size_t N,
4135     cl_double2 alpha,
4136     const cl_mem X,
4137     size_t offx,
4138     int incx,
4139     const cl_mem Y,
4140     size_t offy,
4141     int incy,
4142     cl_mem A,
4143     size_t offa,
4144     size_t lda,
4145     cl_uint numCommandQueues,
4146     cl_command_queue *commandQueues,
4147     cl_uint numEventsInWaitList,
4148     const cl_event *eventWaitList,
4149     cl_event *events);
4150 /*@}*/
4151 
4152 /**
4153  * @defgroup GERC GERC  - General matrix rank 1 operation
4154  * @ingroup BLAS2
4155  */
4156 /*@{*/
4157 
4158 /**
4159  * @brief vector-vector product with float complex elements and
4160  * performs the rank 1 operation A
4161  *
4162  * Vector-vector products:
4163  *   - \f$ A \leftarrow \alpha X Y^H + A \f$
4164  *
4165  * @param[in] order     Row/column order.
4166  * @param[in] M         Number of rows in matrix \b A.
4167  * @param[in] N         Number of columns in matrix \b A.
4168  * @param[in] alpha     specifies the scalar alpha.
4169  * @param[in] X         Buffer object storing vector \b X.
4170  * @param[in] offx      Offset in number of elements for the first element in vector \b X.
4171  * @param[in] incx      Increment for the elements of \b X. Must not be zero.
4172  * @param[in] Y         Buffer object storing vector \b Y.
4173  * @param[in] offy      Offset in number of elements for the first element in vector \b Y.
4174  * @param[in] incy      Increment for the elements of \b Y. Must not be zero.
4175  * @param[out] A 	    Buffer object storing matrix \b A. On exit, A is
4176  *				        overwritten by the updated matrix.
4177  * @param[in] offa      Offset in number of elements for the first element in matrix \b A.
4178  * @param[in] lda       Leading dimension of matrix \b A. It cannot be less
4179  *                      than \b N when the \b order parameter is set to
4180  *                      \b clblasRowMajor,\n or less than \b M when the
4181  *                      parameter is set to \b clblasColumnMajor.
4182  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
4183  *                                task is to be performed.
4184  * @param[in] commandQueues       OpenCL command queues.
4185  * @param[in] numEventsInWaitList Number of events in the event wait list.
4186  * @param[in] eventWaitList       Event wait list.
4187  * @param[in] events     Event objects per each command queue that identify
4188  *                       a particular kernel execution instance.
4189  *
4190  * @return
4191  *   - \b clblasSuccess on success;
4192  *   - \b clblasNotInitialized if clblasSetup() was not called;
4193  *   - \b clblasInvalidValue if invalid parameters are passed:
4194  *     - \b M, \b N or
4195  *	   - either \b incx or \b incy is zero, or
4196  *     - a leading dimension is invalid;
4197  *   - \b clblasInvalidMemObject if A, X, or Y object is invalid,
4198  *     or an image object rather than the buffer one;
4199  *   - \b clblasOutOfResources if you use image-based function implementation
4200  *     and no suitable scratch image available;
4201  *   - \b clblasOutOfHostMemory if the library can't allocate memory for
4202  *     internal structures;
4203  *   - \b clblasInvalidCommandQueue if the passed command queue is invalid;
4204  *   - \b clblasInvalidContext if a context a passed command queue belongs to
4205  *     was released;
4206  *   - \b clblasInvalidOperation if kernel compilation relating to a previous
4207  *     call has not completed for any of the target devices;
4208  *   - \b clblasCompilerNotAvailable if a compiler is not available;
4209  *   - \b clblasBuildProgramFailure if there is a failure to build a program
4210  *     executable.
4211  *
4212  * @ingroup GERC
4213  */
4214 
4215 clblasStatus
4216 clblasCgerc(
4217     clblasOrder order,
4218     size_t M,
4219     size_t N,
4220     cl_float2 alpha,
4221     const cl_mem X,
4222     size_t offx,
4223     int incx,
4224     const cl_mem Y,
4225     size_t offy,
4226     int incy,
4227     cl_mem A ,
4228     size_t offa,
4229     size_t lda,
4230     cl_uint numCommandQueues,
4231     cl_command_queue *commandQueues,
4232     cl_uint numEventsInWaitList,
4233     const cl_event *eventWaitList,
4234     cl_event *events);
4235 
4236 /**
4237  * @brief vector-vector product with double complex elements and
4238  * performs the rank 1 operation A
4239  *
4240  * Vector-vector products:
4241  *   - \f$ A \leftarrow \alpha X Y^H + A \f$
4242  *
4243  * @param[in] order     Row/column order.
4244  * @param[in] M         Number of rows in matrix \b A.
4245  * @param[in] N         Number of columns in matrix \b A.
4246  * @param[in] alpha     specifies the scalar alpha.
4247  * @param[in] X         Buffer object storing vector \b X.
4248  * @param[in] offx      Offset in number of elements for the first element in vector \b X.
4249  * @param[in] incx      Increment for the elements of \b X. Must not be zero.
4250  * @param[in] Y         Buffer object storing vector \b Y.
4251  * @param[in] offy      Offset in number of elements for the first element in vector \b Y.
4252  * @param[in] incy      Increment for the elements of \b Y. Must not be zero.
4253  * @param[out] A		Buffer object storing matrix \b A. On exit, A is
4254  *				        overwritten by the updated matrix.
4255  * @param[in] offa      Offset in number of elements for the first element in matrix \b A.
4256  * @param[in] lda       Leading dimension of matrix \b A. It cannot be less
4257  *                      than \b N when the \b order parameter is set to
4258  *                      \b clblasRowMajor,\n or less than \b M when the
4259  *                      parameter is set to \b clblasColumnMajor.
4260  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
4261  *                                task is to be performed.
4262  * @param[in] commandQueues       OpenCL command queues.
4263  * @param[in] numEventsInWaitList Number of events in the event wait list.
4264  * @param[in] eventWaitList       Event wait list.
4265  * @param[in] events     Event objects per each command queue that identify
4266  *                       a particular kernel execution instance.
4267  *
4268  * @return
4269  *   - \b clblasSuccess on success;
4270  *   - \b clblasInvalidDevice if a target device does not support floating
4271  *     point arithmetic with double precision;
4272  *   - the same error codes as the clblasCgerc() function otherwise.
4273  *
4274  * @ingroup GERC
4275  */
4276 clblasStatus
4277 clblasZgerc(
4278     clblasOrder order,
4279     size_t M,
4280     size_t N,
4281     cl_double2 alpha,
4282     const cl_mem X,
4283     size_t offx,
4284     int incx,
4285     const cl_mem Y,
4286     size_t offy,
4287     int incy,
4288     cl_mem A,
4289     size_t offa,
4290     size_t lda,
4291     cl_uint numCommandQueues,
4292     cl_command_queue *commandQueues,
4293     cl_uint numEventsInWaitList,
4294     const cl_event *eventWaitList,
4295     cl_event *events);
4296 
4297 
4298 /*@}*/
4299 
4300 /**
4301  * @defgroup SYR SYR   - Symmetric rank 1 update
4302  *
4303  * The Level 2 Basic Linear Algebra Subprograms are functions that perform
4304  * symmetric rank 1 update operations.
4305   * @ingroup BLAS2
4306  */
4307 
4308 /*@{*/
4309 /**
4310  * @brief Symmetric rank 1 operation with a general triangular matrix and
4311  * float elements.
4312  *
4313  * Symmetric rank 1 operation:
4314  *   - \f$ A \leftarrow \alpha x x^T + A \f$
4315  *
4316  * @param[in] order     Row/column order.
4317  * @param[in] uplo      The triangle in matrix being referenced.
4318  * @param[in] N         Number of columns in matrix \b A.
4319  * @param[in] alpha     The factor of matrix \b A.
4320  * @param[in] X         Buffer object storing vector \b X.
4321  * @param[in] offx      Offset of first element of vector \b X in buffer object.
4322  * @param[in] incx      Increment for the elements of \b X. Must not be zero.
4323  * @param[out] A 	    Buffer object storing matrix \b A.
4324  * @param[in] offa      Offset of first element of matrix \b A in buffer object.
4325  * @param[in] lda       Leading dimension of matrix \b A. It cannot be less
4326  *                      than \b N.
4327  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
4328  *                                task is to be performed.
4329  * @param[in] commandQueues       OpenCL command queues.
4330  * @param[in] numEventsInWaitList Number of events in the event wait list.
4331  * @param[in] eventWaitList       Event wait list.
4332  * @param[in] events     Event objects per each command queue that identify
4333  *                       a particular kernel execution instance.
4334  *
4335  * @return
4336  *   - \b clblasSuccess on success;
4337  *   - \b clblasNotInitialized if clblasSetup() was not called;
4338  *   - \b clblasInvalidValue if invalid parameters are passed:
4339  *     - \b N is zero, or
4340  *     - either \b incx is zero, or
4341  *     - the leading dimension is invalid;
4342  *   - \b clblasInvalidMemObject if either \b A, \b X object is
4343  *     Invalid, or an image object rather than the buffer one;
4344  *   - \b clblasOutOfHostMemory if the library can't allocate memory for
4345  *     internal structures;
4346  *   - \b clblasInvalidCommandQueue if the passed command queue is invalid;
4347  *   - \b clblasInvalidContext if a context a passed command queue belongs
4348  *     to was released;
4349  *   - \b clblasInvalidOperation if kernel compilation relating to a previous
4350  *     call has not completed for any of the target devices;
4351  *   - \b clblasCompilerNotAvailable if a compiler is not available;
4352  *   - \b clblasBuildProgramFailure if there is a failure to build a program
4353  *     executable.
4354  *
4355  * @ingroup SYR
4356  */
4357 clblasStatus
4358 clblasSsyr(
4359     clblasOrder order,
4360     clblasUplo uplo,
4361     size_t N,
4362     cl_float alpha,
4363     const cl_mem X,
4364     size_t offx,
4365     int incx,
4366     cl_mem A,
4367     size_t offa,
4368     size_t lda,
4369     cl_uint numCommandQueues,
4370     cl_command_queue* commandQueues,
4371     cl_uint numEventsInWaitList,
4372     const cl_event* eventWaitList,
4373     cl_event* events);
4374 
4375 /**
4376  * @brief Symmetric rank 1 operation with a general triangular matrix and
4377  * double elements.
4378  *
4379  * Symmetric rank 1 operation:
4380  *   - \f$ A \leftarrow \alpha x x^T + A \f$
4381  *
4382  * @param[in] order     Row/column order.
4383  * @param[in] uplo      The triangle in matrix being referenced.
4384  * @param[in] N         Number of columns in matrix \b A.
4385  * @param[in] alpha     The factor of matrix \b A.
4386  * @param[in] X         Buffer object storing vector \b X.
4387  * @param[in] offx      Offset of first element of vector \b X in buffer object.
4388  * @param[in] incx      Increment for the elements of \b X. Must not be zero.
4389  * @param[out] A		Buffer object storing matrix \b A.
4390  * @param[in] offa      Offset of first element of matrix \b A in buffer object.
4391  * @param[in] lda       Leading dimension of matrix \b A. It cannot be less
4392  *                      than \b N.
4393  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
4394  *                                task is to be performed.
4395  * @param[in] commandQueues       OpenCL command queues.
4396  * @param[in] numEventsInWaitList Number of events in the event wait list.
4397  * @param[in] eventWaitList       Event wait list.
4398  * @param[in] events     Event objects per each command queue that identify
4399  *                       a particular kernel execution instance.
4400  *
4401  * @return
4402  *   - \b clblasSuccess on success;
4403  *   - \b clblasInvalidDevice if a target device does not support floating
4404  *     point arithmetic with double precision;
4405  *   - the same error codes as the clblasSsyr() function otherwise.
4406  *
4407  * @ingroup SYR
4408  */
4409 
4410 clblasStatus
4411 clblasDsyr(
4412     clblasOrder order,
4413     clblasUplo uplo,
4414     size_t N,
4415     cl_double alpha,
4416     const cl_mem X,
4417     size_t offx,
4418     int incx,
4419     cl_mem A,
4420     size_t offa,
4421     size_t lda,
4422     cl_uint numCommandQueues,
4423     cl_command_queue* commandQueues,
4424     cl_uint numEventsInWaitList,
4425     const cl_event* eventWaitList,
4426     cl_event* events);
4427 /*@}*/
4428 
4429 
4430 /**
4431  * @defgroup HER HER   - Hermitian rank 1 operation
4432  *
4433  * The Level 2 Basic Linear Algebra Subprogram functions that perform
4434  * hermitian rank 1 operations.
4435  * @ingroup BLAS2
4436  */
4437 
4438 /*@{*/
4439 /**
4440  * @brief hermitian rank 1 operation with a general triangular matrix and
4441  * float-complex elements.
4442  *
4443  * hermitian rank 1 operation:
4444  *   - \f$ A \leftarrow \alpha X X^H + A \f$
4445  *
4446  * @param[in] order     Row/column order.
4447  * @param[in] uplo      The triangle in matrix being referenced.
4448  * @param[in] N         Number of columns in matrix \b A.
4449  * @param[in] alpha     The factor of matrix \b A (a scalar float value)
4450  * @param[in] X         Buffer object storing vector \b X.
4451  * @param[in] offx      Offset in number of elements for the first element in vector \b X.
4452  * @param[in] incx      Increment for the elements of \b X. Must not be zero.
4453  * @param[out] A		Buffer object storing matrix \b A.
4454  * @param[in] offa      Offset in number of elements for the first element in matrix \b A.
4455  * @param[in] lda       Leading dimension of matrix \b A. It cannot be less
4456  *                      than \b N.
4457  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
4458  *                                task is to be performed.
4459  * @param[in] commandQueues       OpenCL command queues.
4460  * @param[in] numEventsInWaitList Number of events in the event wait list.
4461  * @param[in] eventWaitList       Event wait list.
4462  * @param[in] events     Event objects per each command queue that identify
4463  *                       a particular kernel execution instance.
4464  *
4465  * @return
4466  *   - \b clblasSuccess on success;
4467  *   - \b clblasNotInitialized if clblasSetup() was not called;
4468  *   - \b clblasInvalidValue if invalid parameters are passed:
4469  *     - \b N is zero, or
4470  *     - either \b incx is zero, or
4471  *     - the leading dimension is invalid;
4472  *   - \b clblasInvalidMemObject if either \b A, \b X object is
4473  *     Invalid, or an image object rather than the buffer one;
4474  *   - \b clblasOutOfHostMemory if the library can't allocate memory for
4475  *     internal structures;
4476  *   - \b clblasInvalidCommandQueue if the passed command queue is invalid;
4477  *   - \b clblasInvalidContext if a context a passed command queue belongs
4478  *     to was released;
4479  *   - \b clblasInvalidOperation if kernel compilation relating to a previous
4480  *     call has not completed for any of the target devices;
4481  *   - \b clblasCompilerNotAvailable if a compiler is not available;
4482  *   - \b clblasBuildProgramFailure if there is a failure to build a program
4483  *     executable.
4484  *
4485  * @ingroup HER
4486  */
4487 clblasStatus
4488 clblasCher(
4489 	clblasOrder order,
4490     clblasUplo uplo,
4491     size_t N,
4492     cl_float alpha,
4493     const cl_mem X,
4494     size_t offx,
4495     int incx,
4496     cl_mem A,
4497     size_t offa,
4498     size_t lda,
4499     cl_uint numCommandQueues,
4500     cl_command_queue* commandQueues,
4501     cl_uint numEventsInWaitList,
4502     const cl_event* eventWaitList,
4503     cl_event* events);
4504 /**
4505  * @example example_cher.c
4506  * Example of how to use the @ref clblasCher function.
4507  */
4508 
4509 /**
4510  * @brief hermitian rank 1 operation with a general triangular matrix and
4511  * double-complex elements.
4512  *
4513  * hermitian rank 1 operation:
4514  *   - \f$ A \leftarrow \alpha X X^H + A \f$
4515  *
4516  * @param[in] order     Row/column order.
4517  * @param[in] uplo      The triangle in matrix being referenced.
4518  * @param[in] N         Number of columns in matrix \b A.
4519  * @param[in] alpha     The factor of matrix \b A (a scalar double value)
4520  * @param[in] X         Buffer object storing vector \b X.
4521  * @param[in] offx      Offset in number of elements for the first element in vector \b X.
4522  * @param[in] incx      Increment for the elements of \b X. Must not be zero.
4523  * @param[out] A		Buffer object storing matrix \b A.
4524  * @param[in] offa      Offset in number of elements for the first element in matrix \b A.
4525  * @param[in] lda       Leading dimension of matrix \b A. It cannot be less
4526  *                      than \b N.
4527  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
4528  *                                task is to be performed.
4529  * @param[in] commandQueues       OpenCL command queues.
4530  * @param[in] numEventsInWaitList Number of events in the event wait list.
4531  * @param[in] eventWaitList       Event wait list.
4532  * @param[in] events     Event objects per each command queue that identify
4533  *                       a particular kernel execution instance.
4534  *
4535  * @return
4536  *   - \b clblasSuccess on success;
4537  *   - \b clblasInvalidDevice if a target device does not support floating
4538  *     point arithmetic with double precision;
4539  *   - the same error codes as the clblasCher() function otherwise.
4540  *
4541  * @ingroup HER
4542  */
4543 clblasStatus
4544 clblasZher(
4545     clblasOrder order,
4546     clblasUplo uplo,
4547     size_t N,
4548     cl_double alpha,
4549     const cl_mem X,
4550     size_t offx,
4551     int incx,
4552 	cl_mem A,
4553     size_t offa,
4554     size_t lda,
4555     cl_uint numCommandQueues,
4556     cl_command_queue* commandQueues,
4557     cl_uint numEventsInWaitList,
4558     const cl_event* eventWaitList,
4559     cl_event* events);
4560 /*@}*/
4561 
4562 /**
4563  * @defgroup SYR2 SYR2  - Symmetric rank 2 update
4564  *
4565  * The Level 2 Basic Linear Algebra Subprograms are functions that perform
4566  * symmetric rank 2 update operations.
4567   * @ingroup BLAS2
4568  */
4569 
4570 /*@{*/
4571 /**
4572  * @brief Symmetric rank 2 operation with a general triangular matrix and
4573  * float elements.
4574  *
4575  * Symmetric rank 2 operation:
4576  *   - \f$ A \leftarrow \alpha x y^T + \alpha y x^T + A \f$
4577  *
4578  * @param[in] order     Row/column order.
4579  * @param[in] uplo      The triangle in matrix being referenced.
4580  * @param[in] N         Number of columns in matrix \b A.
4581  * @param[in] alpha     The factor of matrix \b A.
4582  * @param[in] X         Buffer object storing vector \b X.
4583  * @param[in] offx      Offset of first element of vector \b X in buffer object.
4584  * @param[in] incx      Increment for the elements of \b X. Must not be zero.
4585  * @param[in] Y         Buffer object storing vector \b Y.
4586  * @param[in] offy      Offset of first element of vector \b Y in buffer object.
4587  * @param[in] incy      Increment for the elements of \b Y. Must not be zero.
4588  * @param[out] A 	    Buffer object storing matrix \b A.
4589  * @param[in] offa      Offset of first element of matrix \b A in buffer object.
4590  * @param[in] lda       Leading dimension of matrix \b A. It cannot be less
4591  *                      than \b N.
4592  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
4593  *                                task is to be performed.
4594  * @param[in] commandQueues       OpenCL command queues.
4595  * @param[in] numEventsInWaitList Number of events in the event wait list.
4596  * @param[in] eventWaitList       Event wait list.
4597  * @param[in] events     Event objects per each command queue that identify
4598  *                       a particular kernel execution instance.
4599  *
4600  * @return
4601  *   - \b clblasSuccess on success;
4602  *   - \b clblasNotInitialized if clblasSetup() was not called;
4603  *   - \b clblasInvalidValue if invalid parameters are passed:
4604  *     - either \b N is zero, or
4605  *     - either \b incx or \b incy is zero, or
4606  *     - the leading dimension is invalid;
4607  *   - \b clblasInvalidMemObject if either \b A, \b X, or \b Y object is
4608  *     Invalid, or an image object rather than the buffer one;
4609  *   - \b clblasOutOfHostMemory if the library can't allocate memory for
4610  *     internal structures;
4611  *   - \b clblasInvalidCommandQueue if the passed command queue is invalid;
4612  *   - \b clblasInvalidContext if a context a passed command queue belongs
4613  *     to was released;
4614  *   - \b clblasInvalidOperation if kernel compilation relating to a previous
4615  *     call has not completed for any of the target devices;
4616  *   - \b clblasCompilerNotAvailable if a compiler is not available;
4617  *   - \b clblasBuildProgramFailure if there is a failure to build a program
4618  *     executable.
4619  *
4620  * @ingroup SYR2
4621  */
4622 
4623 clblasStatus
4624 clblasSsyr2(
4625     clblasOrder order,
4626     clblasUplo uplo,
4627     size_t N,
4628     cl_float alpha,
4629     const cl_mem X,
4630     size_t offx,
4631     int  incx,
4632 	const cl_mem Y,
4633     size_t offy,
4634     int incy,
4635     cl_mem A,
4636     size_t offa,
4637     size_t lda,
4638     cl_uint numCommandQueues,
4639     cl_command_queue* commandQueues,
4640     cl_uint numEventsInWaitList,
4641     const cl_event* eventWaitList,
4642     cl_event* events);
4643 
4644 /**
4645  * @brief Symmetric rank 2 operation with a general triangular matrix and
4646  * double elements.
4647  *
4648  * Symmetric rank 2 operation:
4649  *   - \f$ A \leftarrow \alpha x y^T + \alpha y x^T + A \f$
4650  *
4651  * @param[in] order     Row/column order.
4652  * @param[in] uplo      The triangle in matrix being referenced.
4653  * @param[in] N         Number of columns in matrix \b A.
4654  * @param[in] alpha     The factor of matrix \b A.
4655  * @param[in] X         Buffer object storing vector \b X.
4656  * @param[in] offx      Offset of first element of vector \b X in buffer object.
4657  * @param[in] incx      Increment for the elements of \b X. Must not be zero.
4658  * @param[in] Y         Buffer object storing vector \b Y.
4659  * @param[in] offy      Offset of first element of vector \b Y in buffer object.
4660  * @param[in] incy      Increment for the elements of \b Y. Must not be zero.
4661  * @param[out] A 	    Buffer object storing matrix \b A.
4662  * @param[in] offa      Offset of first element of matrix \b A in buffer object.
4663  * @param[in] lda       Leading dimension of matrix \b A. It cannot be less
4664  *                      than \b N.
4665  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
4666  *                                task is to be performed.
4667  * @param[in] commandQueues       OpenCL command queues.
4668  * @param[in] numEventsInWaitList Number of events in the event wait list.
4669  * @param[in] eventWaitList       Event wait list.
4670  * @param[in] events     Event objects per each command queue that identify
4671  *                       a particular kernel execution instance.
4672  *
4673  * @return
4674  *   - \b clblasSuccess on success;
4675  *   - \b clblasNotInitialized if clblasSetup() was not called;
4676  *   - \b clblasInvalidValue if invalid parameters are passed:
4677  *     - either \b N is zero, or
4678  *     - either \b incx or \b incy is zero, or
4679  *     - the leading dimension is invalid;
4680  *   - \b clblasInvalidMemObject if either \b A, \b X, or \b Y object is
4681  *     Invalid, or an image object rather than the buffer one;
4682  *   - \b clblasOutOfHostMemory if the library can't allocate memory for
4683  *     internal structures;
4684  *   - \b clblasInvalidCommandQueue if the passed command queue is invalid;
4685  *   - \b clblasInvalidContext if a context a passed command queue belongs
4686  *     to was released;
4687  *   - \b clblasInvalidOperation if kernel compilation relating to a previous
4688  *     call has not completed for any of the target devices;
4689  *   - \b clblasCompilerNotAvailable if a compiler is not available;
4690  *   - \b clblasBuildProgramFailure if there is a failure to build a program
4691  *     executable.
4692  *
4693  * @ingroup SYR2
4694  */
4695 
4696 clblasStatus
4697 clblasDsyr2(
4698     clblasOrder order,
4699     clblasUplo uplo,
4700     size_t N,
4701     cl_double alpha,
4702     const cl_mem X,
4703     size_t offx,
4704     int incx,
4705     const cl_mem Y,
4706     size_t offy,
4707     int incy,
4708     cl_mem A,
4709     size_t offa,
4710     size_t lda,
4711     cl_uint numCommandQueues,
4712     cl_command_queue* commandQueues,
4713     cl_uint numEventsInWaitList,
4714     const cl_event* eventWaitList,
4715     cl_event* events);
4716 
4717 /*@}*/
4718 
4719 /**
4720  * @defgroup HER2 HER2  - Hermitian rank 2 update
4721  *
4722  * The Level 2 Basic Linear Algebra Subprograms are functions that perform
4723  * hermitian rank 2 update operations.
4724  * @ingroup BLAS2
4725  */
4726 
4727 /*@{*/
4728 /**
4729  * @brief Hermitian rank 2 operation with a general triangular matrix and
4730  * float-compelx elements.
4731  *
4732  * Hermitian rank 2 operation:
4733  *   - \f$ A \leftarrow \alpha X Y^H + \overline{ \alpha } Y X^H + A \f$
4734  *
4735  * @param[in] order     Row/column order.
4736  * @param[in] uplo      The triangle in matrix being referenced.
4737  * @param[in] N         Number of columns in matrix \b A.
4738  * @param[in] alpha     The factor of matrix \b A.
4739  * @param[in] X         Buffer object storing vector \b X.
4740  * @param[in] offx      Offset in number of elements for the first element in vector \b X.
4741  * @param[in] incx      Increment for the elements of \b X. Must not be zero.
4742  * @param[in] Y         Buffer object storing vector \b Y.
4743  * @param[in] offy      Offset in number of elements for the first element in vector \b Y.
4744  * @param[in] incy      Increment for the elements of \b Y. Must not be zero.
4745  * @param[out] A		Buffer object storing matrix \b A.
4746  * @param[in] offa      Offset in number of elements for the first element in matrix \b A.
4747  * @param[in] lda       Leading dimension of matrix \b A. It cannot be less
4748  *                      than \b N.
4749  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
4750  *                                task is to be performed.
4751  * @param[in] commandQueues       OpenCL command queues.
4752  * @param[in] numEventsInWaitList Number of events in the event wait list.
4753  * @param[in] eventWaitList       Event wait list.
4754  * @param[in] events     Event objects per each command queue that identify
4755  *                       a particular kernel execution instance.
4756  *
4757  * @return
4758  *   - \b clblasSuccess on success;
4759  *   - \b clblasNotInitialized if clblasSetup() was not called;
4760  *   - \b clblasInvalidValue if invalid parameters are passed:
4761  *     - either \b N is zero, or
4762  *     - either \b incx or \b incy is zero, or
4763  *     - the leading dimension is invalid;
4764  *   - \b clblasInvalidMemObject if either \b A, \b X, or \b Y object is
4765  *     Invalid, or an image object rather than the buffer one;
4766  *   - \b clblasOutOfHostMemory if the library can't allocate memory for
4767  *     internal structures;
4768  *   - \b clblasInvalidCommandQueue if the passed command queue is invalid;
4769  *   - \b clblasInvalidContext if a context a passed command queue belongs
4770  *     to was released;
4771  *   - \b clblasInvalidOperation if kernel compilation relating to a previous
4772  *     call has not completed for any of the target devices;
4773  *   - \b clblasCompilerNotAvailable if a compiler is not available;
4774  *   - \b clblasBuildProgramFailure if there is a failure to build a program
4775  *     executable.
4776  *
4777  * @ingroup HER2
4778  */
4779 clblasStatus
4780 clblasCher2(
4781 	clblasOrder order,
4782     clblasUplo uplo,
4783     size_t N,
4784     cl_float2 alpha,
4785     const cl_mem X,
4786     size_t offx,
4787     int incx,
4788 	const cl_mem Y,
4789     size_t offy,
4790     int incy,
4791     cl_mem A,
4792     size_t offa,
4793     size_t lda,
4794     cl_uint numCommandQueues,
4795     cl_command_queue* commandQueues,
4796     cl_uint numEventsInWaitList,
4797     const cl_event* eventWaitList,
4798     cl_event* events);
4799 
4800 
4801 /**
4802 * @brief Hermitian rank 2 operation with a general triangular matrix and
4803  * double-compelx elements.
4804  *
4805  * Hermitian rank 2 operation:
4806  *   - \f$ A \leftarrow \alpha X Y^H + \overline{ \alpha } Y X^H + A \f$
4807  *
4808  * @param[in] order     Row/column order.
4809  * @param[in] uplo      The triangle in matrix being referenced.
4810  * @param[in] N         Number of columns in matrix \b A.
4811  * @param[in] alpha     The factor of matrix \b A.
4812  * @param[in] X         Buffer object storing vector \b X.
4813  * @param[in] offx      Offset in number of elements for the first element in vector \b X.
4814  * @param[in] incx      Increment for the elements of \b X. Must not be zero.
4815  * @param[in] Y         Buffer object storing vector \b Y.
4816  * @param[in] offy      Offset in number of elements for the first element in vector \b Y.
4817  * @param[in] incy      Increment for the elements of \b Y. Must not be zero.
4818  * @param[out] A		Buffer object storing matrix \b A.
4819  * @param[in] offa      Offset in number of elements for the first element in matrix \b A.
4820  * @param[in] lda       Leading dimension of matrix \b A. It cannot be less
4821  *                      than \b N.
4822  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
4823  *                                task is to be performed.
4824  * @param[in] commandQueues       OpenCL command queues.
4825  * @param[in] numEventsInWaitList Number of events in the event wait list.
4826  * @param[in] eventWaitList       Event wait list.
4827  * @param[in] events     Event objects per each command queue that identify
4828  *                       a particular kernel execution instance.
4829  *
4830  * @return
4831  *   - \b clblasSuccess on success;
4832  *   - \b clblasInvalidDevice if a target device does not support floating
4833  *     point arithmetic with double precision;
4834  *   - the same error codes as the clblasCher2() function otherwise.
4835  *
4836  * @ingroup HER2
4837  */
4838 clblasStatus
4839 clblasZher2(
4840     clblasOrder order,
4841     clblasUplo uplo,
4842     size_t N,
4843     cl_double2 alpha,
4844     const cl_mem X,
4845     size_t offx,
4846     int incx,
4847     const cl_mem Y,
4848     size_t offy,
4849     int incy,
4850 	cl_mem A,
4851     size_t offa,
4852     size_t lda,
4853     cl_uint numCommandQueues,
4854     cl_command_queue* commandQueues,
4855     cl_uint numEventsInWaitList,
4856     const cl_event* eventWaitList,
4857     cl_event* events);
4858 
4859 /**
4860  * @example example_zher2.c
4861  * Example of how to use the @ref clblasZher2 function.
4862  */
4863 
4864 /*@}*/
4865 
4866 /**
4867  * @defgroup TPMV TPMV  - Triangular packed matrix-vector multiply
4868  * @ingroup BLAS2
4869  */
4870 /*@{*/
4871 
4872 /**
4873  * @brief Matrix-vector product with a packed triangular matrix and
4874  * float elements.
4875  *
4876  * Matrix-vector products:
4877  *   - \f$ X \leftarrow  A X \f$
4878  *   - \f$ X \leftarrow  A^T X \f$
4879  *
4880  * @param[in] order     Row/column order.
4881  * @param[in] uplo				The triangle in matrix being referenced.
4882  * @param[in] trans				How matrix \b AP is to be transposed.
4883  * @param[in] diag				Specify whether matrix \b AP is unit triangular.
4884  * @param[in] N					Number of rows/columns in matrix \b A.
4885  * @param[in] AP				Buffer object storing matrix \b AP in packed format.
4886  * @param[in] offa				Offset in number of elements for first element in matrix \b AP.
4887  * @param[out] X				Buffer object storing vector \b X.
4888  * @param[in] offx				Offset in number of elements for first element in vector \b X.
4889  * @param[in] incx				Increment for the elements of \b X. Must not be zero.
4890  * @param[in] scratchBuff		Temporary cl_mem scratch buffer object which can hold a
4891  *								minimum of (1 + (N-1)*abs(incx)) elements
4892  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
4893  *                                task is to be performed.
4894  * @param[in] commandQueues       OpenCL command queues.
4895  * @param[in] numEventsInWaitList Number of events in the event wait list.
4896  * @param[in] eventWaitList       Event wait list.
4897  * @param[in] events     Event objects per each command queue that identify
4898  *                       a particular kernel execution instance.
4899  *
4900  * @return
4901  *   - \b clblasSuccess on success;
4902  *   - \b clblasNotInitialized if clblasSetup() was not called;
4903  *   - \b clblasInvalidValue if invalid parameters are passed:
4904  *     - either \b N or \b incx is zero
4905  *   - \b clblasInvalidMemObject if either \b AP or \b X object is
4906  *     Invalid, or an image object rather than the buffer one;
4907  *   - \b clblasOutOfHostMemory if the library can't allocate memory for
4908  *     internal structures;
4909  *   - \b clblasInvalidCommandQueue if the passed command queue is invalid;
4910  *   - \b clblasInvalidContext if a context a passed command queue belongs
4911  *     to was released;
4912  *   - \b clblasInvalidOperation if kernel compilation relating to a previous
4913  *     call has not completed for any of the target devices;
4914  *   - \b clblasCompilerNotAvailable if a compiler is not available;
4915  *   - \b clblasBuildProgramFailure if there is a failure to build a program
4916  *     executable.
4917  *
4918  * @ingroup TPMV
4919  */
4920 clblasStatus
4921 clblasStpmv(
4922     clblasOrder order,
4923     clblasUplo uplo,
4924     clblasTranspose trans,
4925     clblasDiag diag,
4926     size_t N,
4927     const cl_mem AP,
4928     size_t offa,
4929     cl_mem X,
4930     size_t offx,
4931     int incx,
4932 	cl_mem scratchBuff,
4933     cl_uint numCommandQueues,
4934     cl_command_queue *commandQueues,
4935     cl_uint numEventsInWaitList,
4936     const cl_event *eventWaitList,
4937     cl_event *events);
4938 
4939 /**
4940  * @example example_stpmv.c
4941  * Example of how to use the @ref clblasStpmv function.
4942  */
4943 
4944 /**
4945  * @brief Matrix-vector product with a packed triangular matrix and
4946  * double elements.
4947  *
4948  * Matrix-vector products:
4949  *   - \f$ X \leftarrow  A X \f$
4950  *   - \f$ X \leftarrow  A^T X \f$
4951  *
4952  * @param[in] order     Row/column order.
4953  * @param[in] uplo				The triangle in matrix being referenced.
4954  * @param[in] trans				How matrix \b AP is to be transposed.
4955  * @param[in] diag				Specify whether matrix \b AP is unit triangular.
4956  * @param[in] N					Number of rows/columns in matrix \b AP.
4957  * @param[in] AP				Buffer object storing matrix \b AP in packed format.
4958  * @param[in] offa				Offset in number of elements for first element in matrix \b AP.
4959  * @param[out] X				Buffer object storing vector \b X.
4960  * @param[in] offx				Offset in number of elements for first element in vector \b X.
4961  * @param[in] incx				Increment for the elements of \b X. Must not be zero.
4962  * @param[in] scratchBuff		Temporary cl_mem scratch buffer object which can hold a
4963  *								minimum of (1 + (N-1)*abs(incx)) elements
4964  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
4965  *                                task is to be performed.
4966  * @param[in] commandQueues       OpenCL command queues.
4967  * @param[in] numEventsInWaitList Number of events in the event wait list.
4968  * @param[in] eventWaitList       Event wait list.
4969  * @param[in] events     Event objects per each command queue that identify
4970  *                       a particular kernel execution instance.
4971  *
4972  * @return
4973  *   - \b clblasSuccess on success;
4974  *   - \b clblasInvalidDevice if a target device does not support floating
4975  *     point arithmetic with double precision;
4976  *   - the same error codes as the clblasStpmv() function otherwise.
4977  *
4978  * @ingroup TPMV
4979  */
4980 clblasStatus
4981 clblasDtpmv(
4982     clblasOrder order,
4983     clblasUplo uplo,
4984     clblasTranspose trans,
4985     clblasDiag diag,
4986     size_t N,
4987     const cl_mem AP,
4988     size_t offa,
4989     cl_mem X,
4990     size_t offx,
4991     int incx,
4992 	cl_mem scratchBuff,
4993     cl_uint numCommandQueues,
4994     cl_command_queue *commandQueues,
4995     cl_uint numEventsInWaitList,
4996     const cl_event *eventWaitList,
4997     cl_event *events);
4998 
4999 /**
5000   * @brief Matrix-vector product with a packed triangular matrix and
5001  * float-complex elements.
5002  *
5003  * Matrix-vector products:
5004  *   - \f$ X \leftarrow  A X \f$
5005  *   - \f$ X \leftarrow  A^T X \f$
5006  *
5007  * @param[in] order     Row/column order.
5008  * @param[in] uplo				The triangle in matrix being referenced.
5009  * @param[in] trans				How matrix \b AP is to be transposed.
5010  * @param[in] diag				Specify whether matrix \b AP is unit triangular.
5011  * @param[in] N					Number of rows/columns in matrix \b AP.
5012  * @param[in] AP				Buffer object storing matrix \b AP in packed format.
5013  * @param[in] offa				Offset in number of elements for first element in matrix \b AP.
5014  * @param[out] X				Buffer object storing vector \b X.
5015  * @param[in] offx				Offset in number of elements for first element in vector \b X.
5016  * @param[in] incx				Increment for the elements of \b X. Must not be zero.
5017  * @param[in] scratchBuff		Temporary cl_mem scratch buffer object which can hold a
5018  *								minimum of (1 + (N-1)*abs(incx)) elements
5019  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
5020  *                                task is to be performed.
5021  * @param[in] commandQueues       OpenCL command queues.
5022  * @param[in] numEventsInWaitList Number of events in the event wait list.
5023  * @param[in] eventWaitList       Event wait list.
5024  * @param[in] events     Event objects per each command queue that identify
5025  *                       a particular kernel execution instance.
5026  *
5027  * @return The same result as the clblasStpmv() function.
5028  * @ingroup TPMV
5029  */
5030 clblasStatus
5031 clblasCtpmv(
5032     clblasOrder order,
5033     clblasUplo uplo,
5034     clblasTranspose trans,
5035     clblasDiag diag,
5036     size_t N,
5037     const cl_mem AP,
5038     size_t offa,
5039     cl_mem X,
5040     size_t offx,
5041     int incx,
5042 	cl_mem scratchBuff,
5043     cl_uint numCommandQueues,
5044     cl_command_queue *commandQueues,
5045     cl_uint numEventsInWaitList,
5046     const cl_event *eventWaitList,
5047     cl_event *events);
5048 
5049 /**
5050  * @brief Matrix-vector product with a packed triangular matrix and
5051  * double-complex elements.
5052  *
5053  * Matrix-vector products:
5054  *   - \f$ X \leftarrow  A X \f$
5055  *   - \f$ X \leftarrow  A^T X \f$
5056  *
5057  * @param[in] order     Row/column order.
5058  * @param[in] uplo				The triangle in matrix being referenced.
5059  * @param[in] trans				How matrix \b AP is to be transposed.
5060  * @param[in] diag				Specify whether matrix \b AP is unit triangular.
5061  * @param[in] N					Number of rows/columns in matrix \b AP.
5062  * @param[in] AP				Buffer object storing matrix \b AP in packed format.
5063  * @param[in] offa				Offset in number of elements for first element in matrix \b AP.
5064  * @param[out] X				Buffer object storing vector \b X.
5065  * @param[in] offx				Offset in number of elements for first element in vector \b X.
5066  * @param[in] incx				Increment for the elements of \b X. Must not be zero.
5067  * @param[in] scratchBuff		Temporary cl_mem scratch buffer object which can hold a
5068  *								minimum of (1 + (N-1)*abs(incx)) elements
5069  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
5070  *                                task is to be performed.
5071  * @param[in] commandQueues       OpenCL command queues.
5072  * @param[in] numEventsInWaitList Number of events in the event wait list.
5073  * @param[in] eventWaitList       Event wait list.
5074  * @param[in] events     Event objects per each command queue that identify
5075  *                       a particular kernel execution instance.
5076  *
5077  * @return The same result as the clblasDtpmv() function.
5078  * @ingroup TPMV
5079  */
5080 clblasStatus
5081 clblasZtpmv(
5082     clblasOrder order,
5083     clblasUplo uplo,
5084     clblasTranspose trans,
5085     clblasDiag diag,
5086     size_t N,
5087     const cl_mem AP,
5088     size_t offa,
5089     cl_mem X,
5090     size_t offx,
5091     int incx,
5092 	cl_mem scratchBuff,
5093     cl_uint numCommandQueues,
5094     cl_command_queue *commandQueues,
5095     cl_uint numEventsInWaitList,
5096     const cl_event *eventWaitList,
5097     cl_event *events);
5098 /*@}*/
5099 
5100 
5101 
5102 /**
5103  * @defgroup TPSV TPSV  - Triangular packed matrix vector solve
5104  * @ingroup BLAS2
5105  */
5106 /*@{*/
5107 
5108 /**
5109  * @brief solving triangular packed matrix problems with float elements.
5110  *
5111  * Matrix-vector products:
5112  *   - \f$ A X \leftarrow  X \f$
5113  *   - \f$ A^T X \leftarrow  X \f$
5114  *
5115  * @param[in] order     Row/column order.
5116  * @param[in] uplo              The triangle in matrix being referenced.
5117  * @param[in] trans             How matrix \b A is to be transposed.
5118  * @param[in] diag              Specify whether matrix \b A is unit triangular.
5119  * @param[in] N                 Number of rows/columns in matrix \b A.
5120  * @param[in] A                 Buffer object storing matrix in packed format.\b A.
5121  * @param[in] offa              Offset in number of elements for first element in matrix \b A.
5122  * @param[out] X                Buffer object storing vector \b X.
5123  * @param[in] offx              Offset in number of elements for first element in vector \b X.
5124  * @param[in] incx              Increment for the elements of \b X. Must not be zero.
5125  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
5126  *                                task is to be performed.
5127  * @param[in] commandQueues       OpenCL command queues.
5128  * @param[in] numEventsInWaitList Number of events in the event wait list.
5129  * @param[in] eventWaitList       Event wait list.
5130  * @param[in] events     Event objects per each command queue that identify
5131  *                       a particular kernel execution instance.
5132  *
5133  * @return
5134  *   - \b clblasSuccess on success;
5135  *   - \b clblasNotInitialized if clblasSetup() was not called;
5136  *   - \b clblasInvalidValue if invalid parameters are passed:
5137  *     - either \b N or \b incx is zero, or
5138  *     - the leading dimension is invalid;
5139  *   - \b clblasInvalidMemObject if either \b A or \b X object is
5140  *     Invalid, or an image object rather than the buffer one;
5141  *   - \b clblasOutOfHostMemory if the library can't allocate memory for
5142  *     internal structures;
5143  *   - \b clblasInvalidCommandQueue if the passed command queue is invalid;
5144  *   - \b clblasInvalidContext if a context a passed command queue belongs
5145  *     to was released;
5146  *   - \b clblasInvalidOperation if kernel compilation relating to a previous
5147  *     call has not completed for any of the target devices;
5148  *   - \b clblasCompilerNotAvailable if a compiler is not available;
5149  *   - \b clblasBuildProgramFailure if there is a failure to build a program
5150  *     executable.
5151  *
5152  * @ingroup TPSV
5153  */
5154 
5155 clblasStatus
5156 clblasStpsv(
5157     clblasOrder order,
5158     clblasUplo uplo,
5159     clblasTranspose trans,
5160     clblasDiag diag,
5161     size_t N,
5162     const cl_mem A,
5163     size_t offa,
5164     cl_mem X,
5165     size_t offx,
5166     int incx,
5167     cl_uint numCommandQueues,
5168     cl_command_queue *commandQueues,
5169     cl_uint numEventsInWaitList,
5170     const cl_event *eventWaitList,
5171     cl_event *events);
5172 
5173 /**
5174  * @example example_stpsv.c
5175  * Example of how to use the @ref clblasStpsv function.
5176  */
5177 
5178 /**
5179  * @brief solving triangular packed matrix problems with double elements.
5180  *
5181  * Matrix-vector products:
5182  *   - \f$ A X \leftarrow  X \f$
5183  *   - \f$ A^T X \leftarrow  X \f$
5184  *
5185  * @param[in] order     Row/column order.
5186  * @param[in] uplo              The triangle in matrix being referenced.
5187  * @param[in] trans             How matrix \b A is to be transposed.
5188  * @param[in] diag              Specify whether matrix \b A is unit triangular.
5189  * @param[in] N                 Number of rows/columns in matrix \b A.
5190  * @param[in] A                 Buffer object storing matrix in packed format.\b A.
5191  * @param[in] offa              Offset in number of elements for first element in matrix \b A.
5192  * @param[out] X                Buffer object storing vector \b X.
5193  * @param[in] offx              Offset in number of elements for first element in vector \b X.
5194  * @param[in] incx              Increment for the elements of \b X. Must not be zero.
5195  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
5196  *                                task is to be performed.
5197  * @param[in] commandQueues       OpenCL command queues.
5198  * @param[in] numEventsInWaitList Number of events in the event wait list.
5199  * @param[in] eventWaitList       Event wait list.
5200  * @param[in] events     Event objects per each command queue that identify
5201  *                       a particular kernel execution instance.
5202  *
5203  * @return
5204  *   - \b clblasSuccess on success;
5205  *   - \b clblasNotInitialized if clblasSetup() was not called;
5206  *   - \b clblasInvalidValue if invalid parameters are passed:
5207  *     - either \b N or \b incx is zero, or
5208  *     - the leading dimension is invalid;
5209  *   - \b clblasInvalidMemObject if either \b A or \b X object is
5210  *     Invalid, or an image object rather than the buffer one;
5211  *   - \b clblasOutOfHostMemory if the library can't allocate memory for
5212  *     internal structures;
5213  *   - \b clblasInvalidCommandQueue if the passed command queue is invalid;
5214  *   - \b clblasInvalidContext if a context a passed command queue belongs
5215  *     to was released;
5216  *   - \b clblasInvalidOperation if kernel compilation relating to a previous
5217  *     call has not completed for any of the target devices;
5218  *   - \b clblasCompilerNotAvailable if a compiler is not available;
5219  *   - \b clblasBuildProgramFailure if there is a failure to build a program
5220  *     executable.
5221  *
5222  * @ingroup TPSV
5223  */
5224 
5225 clblasStatus
5226 clblasDtpsv(
5227     clblasOrder order,
5228     clblasUplo uplo,
5229     clblasTranspose trans,
5230     clblasDiag diag,
5231     size_t N,
5232     const cl_mem A,
5233     size_t offa,
5234     cl_mem X,
5235     size_t offx,
5236     int incx,
5237     cl_uint numCommandQueues,
5238     cl_command_queue *commandQueues,
5239     cl_uint numEventsInWaitList,
5240     const cl_event *eventWaitList,
5241     cl_event *events);
5242 
5243 /**
5244  * @brief solving triangular packed matrix problems with float complex elements.
5245  *
5246  * Matrix-vector products:
5247  *   - \f$ A X \leftarrow  X \f$
5248  *   - \f$ A^T X \leftarrow  X \f$
5249  *
5250  * @param[in] order     Row/column order.
5251  * @param[in] uplo              The triangle in matrix being referenced.
5252  * @param[in] trans             How matrix \b A is to be transposed.
5253  * @param[in] diag              Specify whether matrix \b A is unit triangular.
5254  * @param[in] N                 Number of rows/columns in matrix \b A.
5255  * @param[in] A                 Buffer object storing matrix in packed format.\b A.
5256  * @param[in] offa              Offset in number of elements for first element in matrix \b A.
5257  * @param[out] X                Buffer object storing vector \b X.
5258  * @param[in] offx              Offset in number of elements for first element in vector \b X.
5259  * @param[in] incx              Increment for the elements of \b X. Must not be zero.
5260  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
5261  *                                task is to be performed.
5262  * @param[in] commandQueues       OpenCL command queues.
5263  * @param[in] numEventsInWaitList Number of events in the event wait list.
5264  * @param[in] eventWaitList       Event wait list.
5265  * @param[in] events     Event objects per each command queue that identify
5266  *                       a particular kernel execution instance.
5267  *
5268  * @return
5269  *   - \b clblasSuccess on success;
5270  *   - \b clblasNotInitialized if clblasSetup() was not called;
5271  *   - \b clblasInvalidValue if invalid parameters are passed:
5272  *     - either \b N or \b incx is zero, or
5273  *     - the leading dimension is invalid;
5274  *   - \b clblasInvalidMemObject if either \b A or \b X object is
5275  *     Invalid, or an image object rather than the buffer one;
5276  *   - \b clblasOutOfHostMemory if the library can't allocate memory for
5277  *     internal structures;
5278  *   - \b clblasInvalidCommandQueue if the passed command queue is invalid;
5279  *   - \b clblasInvalidContext if a context a passed command queue belongs
5280  *     to was released;
5281  *   - \b clblasInvalidOperation if kernel compilation relating to a previous
5282  *     call has not completed for any of the target devices;
5283  *   - \b clblasCompilerNotAvailable if a compiler is not available;
5284  *   - \b clblasBuildProgramFailure if there is a failure to build a program
5285  *     executable.
5286  *
5287  * @ingroup TPSV
5288  */
5289 
5290 clblasStatus
5291 clblasCtpsv(
5292     clblasOrder order,
5293     clblasUplo uplo,
5294     clblasTranspose trans,
5295     clblasDiag diag,
5296     size_t N,
5297     const cl_mem A,
5298     size_t offa,
5299     cl_mem X,
5300     size_t offx,
5301     int incx,
5302     cl_uint numCommandQueues,
5303     cl_command_queue *commandQueues,
5304     cl_uint numEventsInWaitList,
5305     const cl_event *eventWaitList,
5306     cl_event *events);
5307 
5308 /**
5309  * @brief solving triangular packed matrix problems with double complex elements.
5310  *
5311  * Matrix-vector products:
5312  *   - \f$ A X \leftarrow  X \f$
5313  *   - \f$ A^T X \leftarrow  X \f$
5314  *
5315  * @param[in] order     Row/column order.
5316  * @param[in] uplo              The triangle in matrix being referenced.
5317  * @param[in] trans             How matrix \b A is to be transposed.
5318  * @param[in] diag              Specify whether matrix \b A is unit triangular.
5319  * @param[in] N                 Number of rows/columns in matrix \b A.
5320  * @param[in] A                 Buffer object storing matrix in packed format.\b A.
5321  * @param[in] offa              Offset in number of elements for first element in matrix \b A.
5322  * @param[out] X                Buffer object storing vector \b X.
5323  * @param[in] offx              Offset in number of elements for first element in vector \b X.
5324  * @param[in] incx              Increment for the elements of \b X. Must not be zero.
5325  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
5326  *                                task is to be performed.
5327  * @param[in] commandQueues       OpenCL command queues.
5328  * @param[in] numEventsInWaitList Number of events in the event wait list.
5329  * @param[in] eventWaitList       Event wait list.
5330  * @param[in] events     Event objects per each command queue that identify
5331  *                       a particular kernel execution instance.
5332  *
5333  * @return
5334  *   - \b clblasSuccess on success;
5335  *   - \b clblasNotInitialized if clblasSetup() was not called;
5336  *   - \b clblasInvalidValue if invalid parameters are passed:
5337  *     - either \b N or \b incx is zero, or
5338  *     - the leading dimension is invalid;
5339  *   - \b clblasInvalidMemObject if either \b A or \b X object is
5340  *     Invalid, or an image object rather than the buffer one;
5341  *   - \b clblasOutOfHostMemory if the library can't allocate memory for
5342  *     internal structures;
5343  *   - \b clblasInvalidCommandQueue if the passed command queue is invalid;
5344  *   - \b clblasInvalidContext if a context a passed command queue belongs
5345  *     to was released;
5346  *   - \b clblasInvalidOperation if kernel compilation relating to a previous
5347  *     call has not completed for any of the target devices;
5348  *   - \b clblasCompilerNotAvailable if a compiler is not available;
5349  *   - \b clblasBuildProgramFailure if there is a failure to build a program
5350  *     executable.
5351  *
5352  * @ingroup TPSV
5353  */
5354 
5355 clblasStatus
5356 clblasZtpsv(
5357     clblasOrder order,
5358     clblasUplo uplo,
5359     clblasTranspose trans,
5360     clblasDiag diag,
5361     size_t N,
5362     const cl_mem A,
5363     size_t offa,
5364     cl_mem X,
5365     size_t offx,
5366     int incx,
5367     cl_uint numCommandQueues,
5368     cl_command_queue *commandQueues,
5369     cl_uint numEventsInWaitList,
5370     const cl_event *eventWaitList,
5371     cl_event *events);
5372 /*@}*/
5373 
5374 
5375 /**
5376  * @defgroup SPMV SPMV  - Symmetric packed matrix vector multiply
5377  * @ingroup BLAS2
5378  */
5379 
5380 /*@{*/
5381 
5382 /**
5383  * @brief Matrix-vector product with a symmetric packed-matrix and float elements.
5384  *
5385  * Matrix-vector products:
5386  * - \f$ Y \leftarrow \alpha A X + \beta Y \f$
5387  *
5388  * @param[in] order     Row/columns order.
5389  * @param[in] uplo      The triangle in matrix being referenced.
5390  * @param[in] N         Number of rows and columns in matrix \b AP.
5391  * @param[in] alpha     The factor of matrix \b AP.
5392  * @param[in] AP        Buffer object storing matrix \b AP.
5393  * @param[in] offa		Offset in number of elements for first element in matrix \b AP.
5394  * @param[in] X         Buffer object storing vector \b X.
5395  * @param[in] offx      Offset of first element of vector \b X in buffer object.
5396  *                      Counted in elements.
5397  * @param[in] incx      Increment for the elements of vector \b X. It cannot be zero.
5398  * @param[in] beta      The factor of vector \b Y.
5399  * @param[out] Y        Buffer object storing vector \b Y.
5400  * @param[in] offy      Offset of first element of vector \b Y in buffer object.
5401  *                      Counted in elements.
5402  * @param[in] incy      Increment for the elements of vector \b Y. It cannot be zero.
5403  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
5404  *                                task is to be performed.
5405  * @param[in] commandQueues       OpenCL command queues.
5406  * @param[in] numEventsInWaitList Number of events in the event wait list.
5407  * @param[in] eventWaitList       Event wait list.
5408  * @param[in] events     Event objects per each command queue that identify
5409  *                       a particular kernel execution instance.
5410  *
5411  * @return
5412  *   - \b clblasSuccess on success;
5413  *   - \b clblasNotInitialized if clblasSetup() was not called;
5414  *   - \b clblasInvalidValue if invalid parameters are passed:
5415  *     - \b N is zero, or
5416  *     - either \b incx or \b incy is zero, or
5417  *     - the matrix sizes or the vector sizes along with the increments lead to
5418  *       accessing outsize of any of the buffers;
5419  *   - \b clblasInvalidMemObject if either \b AP, \b X, or \b Y object is
5420  *     invalid, or an image object rather than the buffer one;
5421  *   - \b clblasOutOfHostMemory if the library can't allocate memory for
5422  *     internal structures;
5423  *   - \b clblasInvalidCommandQueue if the passed command queue is invalid;
5424  *   - \b clblasInvalidContext if a context a passed command queue belongs to
5425  *     was released;
5426  *   - \b clblasInvalidOperation if kernel compilation relating to a previous
5427  *     call has not completed for any of the target devices;
5428  *   - \b clblasCompilerNotAvailable if a compiler is not available;
5429  *   - \b clblasBuildProgramFailure if there is a failure to build a program
5430  *     executable.
5431  *
5432  * @ingroup SPMV
5433  */
5434 clblasStatus
5435 clblasSspmv(
5436     clblasOrder order,
5437     clblasUplo uplo,
5438     size_t N,
5439     cl_float alpha,
5440     const cl_mem AP,
5441     size_t offa,
5442     const cl_mem X,
5443     size_t offx,
5444     int incx,
5445     cl_float beta,
5446     cl_mem Y,
5447     size_t offy,
5448     int incy,
5449     cl_uint numCommandQueues,
5450     cl_command_queue *commandQueues,
5451     cl_uint numEventsInWaitList,
5452     const cl_event *eventWaitList,
5453     cl_event *events);
5454 
5455 /**
5456  * @example example_sspmv.c
5457  * This is an example of how to use the @ref clblasSspmv function.
5458  */
5459 
5460 /**
5461  * @brief Matrix-vector product with a symmetric packed-matrix and double elements.
5462  *
5463  * Matrix-vector products:
5464  * - \f$ Y \leftarrow \alpha A X + \beta Y \f$
5465  *
5466  * @param[in] order     Row/columns order.
5467  * @param[in] uplo      The triangle in matrix being referenced.
5468  * @param[in] N         Number of rows and columns in matrix \b AP.
5469  * @param[in] alpha     The factor of matrix \b AP.
5470  * @param[in] AP        Buffer object storing matrix \b AP.
5471  * @param[in] offa		Offset in number of elements for first element in matrix \b AP.
5472  * @param[in] X         Buffer object storing vector \b X.
5473  * @param[in] offx      Offset of first element of vector \b X in buffer object.
5474  *                      Counted in elements.
5475  * @param[in] incx      Increment for the elements of vector \b X. It cannot be zero.
5476  * @param[in] beta      The factor of vector \b Y.
5477  * @param[out] Y        Buffer object storing vector \b Y.
5478  * @param[in] offy      Offset of first element of vector \b Y in buffer object.
5479  *                      Counted in elements.
5480  * @param[in] incy      Increment for the elements of vector \b Y. It cannot be zero.
5481  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
5482  *                                task is to be performed.
5483  * @param[in] commandQueues       OpenCL command queues.
5484  * @param[in] numEventsInWaitList Number of events in the event wait list.
5485  * @param[in] eventWaitList       Event wait list.
5486  * @param[in] events     Event objects per each command queue that identify
5487  *                       a particular kernel execution instance.
5488  *
5489  * @return
5490  *   - \b clblasSuccess on success;
5491  *   - \b clblasInvalidDevice if a target device does not support floating
5492  *     point arithmetic with double precision;
5493  *   - the same error codes as the clblasSspmv() function otherwise.
5494  *
5495  * @ingroup SPMV
5496  */
5497 clblasStatus
5498 clblasDspmv(
5499     clblasOrder order,
5500     clblasUplo uplo,
5501     size_t N,
5502     cl_double alpha,
5503     const cl_mem AP,
5504     size_t offa,
5505     const cl_mem X,
5506     size_t offx,
5507     int incx,
5508     cl_double beta,
5509     cl_mem Y,
5510     size_t offy,
5511     int incy,
5512     cl_uint numCommandQueues,
5513     cl_command_queue *commandQueues,
5514     cl_uint numEventsInWaitList,
5515     const cl_event *eventWaitList,
5516     cl_event *events);
5517 /*@}*/
5518 
5519 
5520 
5521 /**
5522  * @defgroup HPMV HPMV  - Hermitian packed matrix-vector multiplication
5523  * @ingroup BLAS2
5524  */
5525 
5526 /*@{*/
5527 
5528 /**
5529  * @brief Matrix-vector product with a packed hermitian matrix and float-complex elements.
5530  *
5531  * Matrix-vector products:
5532  * - \f$ Y \leftarrow \alpha A X + \beta Y \f$
5533  *
5534  * @param[in] order     Row/columns order.
5535  * @param[in] uplo      The triangle in matrix being referenced.
5536  * @param[in] N         Number of rows and columns in matrix \b AP.
5537  * @param[in] alpha     The factor of matrix \b AP.
5538  * @param[in] AP        Buffer object storing packed matrix \b AP.
5539  * @param[in] offa		Offset in number of elements for first element in matrix \b AP.
5540  * @param[in] X         Buffer object storing vector \b X.
5541  * @param[in] offx      Offset of first element of vector \b X in buffer object.
5542  *                      Counted in elements.
5543  * @param[in] incx      Increment for the elements of vector \b X. It cannot be zero.
5544  * @param[in] beta      The factor of vector \b Y.
5545  * @param[out] Y        Buffer object storing vector \b Y.
5546  * @param[in] offy      Offset of first element of vector \b Y in buffer object.
5547  *                      Counted in elements.
5548  * @param[in] incy      Increment for the elements of vector \b Y. It cannot be zero.
5549  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
5550  *                                task is to be performed.
5551  * @param[in] commandQueues       OpenCL command queues.
5552  * @param[in] numEventsInWaitList Number of events in the event wait list.
5553  * @param[in] eventWaitList       Event wait list.
5554  * @param[in] events     Event objects per each command queue that identify
5555  *                       a particular kernel execution instance.
5556  *
5557  * @return
5558  *   - \b clblasSuccess on success;
5559  *   - \b clblasNotInitialized if clblasSetup() was not called;
5560  *   - \b clblasInvalidValue if invalid parameters are passed:
5561  *     - \b N is zero, or
5562  *     - either \b incx or \b incy is zero, or
5563  *     - the matrix sizes or the vector sizes along with the increments lead to
5564  *       accessing outsize of any of the buffers;
5565  *   - \b clblasInvalidMemObject if either \b AP, \b X, or \b Y object is
5566  *     invalid, or an image object rather than the buffer one;
5567  *   - \b clblasOutOfHostMemory if the library can't allocate memory for
5568  *     internal structures;
5569  *   - \b clblasInvalidCommandQueue if the passed command queue is invalid;
5570  *   - \b clblasInvalidContext if a context a passed command queue belongs to
5571  *     was released;
5572  *   - \b clblasInvalidOperation if kernel compilation relating to a previous
5573  *     call has not completed for any of the target devices;
5574  *   - \b clblasCompilerNotAvailable if a compiler is not available;
5575  *   - \b clblasBuildProgramFailure if there is a failure to build a program
5576  *     executable.
5577  *
5578  * @ingroup HPMV
5579  */
5580 clblasStatus
5581 clblasChpmv(
5582     clblasOrder order,
5583     clblasUplo uplo,
5584     size_t N,
5585     cl_float2 alpha,
5586     const cl_mem AP,
5587     size_t offa,
5588     const cl_mem X,
5589     size_t offx,
5590     int incx,
5591     cl_float2 beta,
5592     cl_mem Y,
5593     size_t offy,
5594     int incy,
5595     cl_uint numCommandQueues,
5596     cl_command_queue *commandQueues,
5597     cl_uint numEventsInWaitList,
5598     const cl_event *eventWaitList,
5599     cl_event *events);
5600 
5601 /**
5602  * @example example_chpmv.c
5603  * This is an example of how to use the @ref clblasChpmv function.
5604  */
5605 
5606 
5607 /**
5608  * @brief Matrix-vector product with a packed hermitian matrix and double-complex elements.
5609  *
5610  * Matrix-vector products:
5611  * - \f$ Y \leftarrow \alpha A X + \beta Y \f$
5612  *
5613  * @param[in] order     Row/columns order.
5614  * @param[in] uplo      The triangle in matrix being referenced.
5615  * @param[in] N         Number of rows and columns in matrix \b AP.
5616  * @param[in] alpha     The factor of matrix \b AP.
5617  * @param[in] AP        Buffer object storing packed matrix \b AP.
5618  * @param[in] offa		Offset in number of elements for first element in matrix \b AP.
5619  * @param[in] X         Buffer object storing vector \b X.
5620  * @param[in] offx      Offset of first element of vector \b X in buffer object.
5621  *                      Counted in elements.
5622  * @param[in] incx      Increment for the elements of vector \b X. It cannot be zero.
5623  * @param[in] beta      The factor of vector \b Y.
5624  * @param[out] Y        Buffer object storing vector \b Y.
5625  * @param[in] offy      Offset of first element of vector \b Y in buffer object.
5626  *                      Counted in elements.
5627  * @param[in] incy      Increment for the elements of vector \b Y. It cannot be zero.
5628  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
5629  *                                task is to be performed.
5630  * @param[in] commandQueues       OpenCL command queues.
5631  * @param[in] numEventsInWaitList Number of events in the event wait list.
5632  * @param[in] eventWaitList       Event wait list.
5633  * @param[in] events     Event objects per each command queue that identify
5634  *                       a particular kernel execution instance.
5635  *
5636  * @return
5637  *   - \b clblasSuccess on success;
5638  *   - \b clblasInvalidDevice if a target device does not support floating
5639  *     point arithmetic with double precision;
5640  *   - the same error codes as the clblasChpmv() function otherwise.
5641  *
5642  * @ingroup HPMV
5643  */
5644 clblasStatus
5645 clblasZhpmv(
5646     clblasOrder order,
5647     clblasUplo uplo,
5648     size_t N,
5649     cl_double2 alpha,
5650     const cl_mem AP,
5651     size_t offa,
5652     const cl_mem X,
5653     size_t offx,
5654     int incx,
5655     cl_double2 beta,
5656     cl_mem Y,
5657     size_t offy,
5658     int incy,
5659     cl_uint numCommandQueues,
5660     cl_command_queue *commandQueues,
5661     cl_uint numEventsInWaitList,
5662     const cl_event *eventWaitList,
5663     cl_event *events);
5664 /*@}*/
5665 
5666 
5667 /**
5668  * @defgroup SPR SPR   - Symmetric packed matrix rank 1 update
5669  *
5670  * The Level 2 Basic Linear Algebra Subprograms are functions that perform
5671  * symmetric rank 1 update operations on packed matrix
5672  * @ingroup BLAS2
5673  */
5674 
5675 /*@{*/
5676 /**
5677  * @brief Symmetric rank 1 operation with a general triangular packed-matrix and
5678  * float elements.
5679  *
5680  * Symmetric rank 1 operation:
5681  *   - \f$ A \leftarrow \alpha X X^T + A \f$
5682  *
5683  * @param[in] order     Row/column order.
5684  * @param[in] uplo      The triangle in matrix being referenced.
5685  * @param[in] N         Number of columns in matrix \b A.
5686  * @param[in] alpha     The factor of matrix \b A.
5687  * @param[in] X         Buffer object storing vector \b X.
5688  * @param[in] offx      Offset of first element of vector \b X in buffer object.
5689  * @param[in] incx      Increment for the elements of \b X. Must not be zero.
5690  * @param[out] AP 	    Buffer object storing packed-matrix \b AP.
5691  * @param[in] offa      Offset of first element of matrix \b AP in buffer object.
5692  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
5693  *                                task is to be performed.
5694  * @param[in] commandQueues       OpenCL command queues.
5695  * @param[in] numEventsInWaitList Number of events in the event wait list.
5696  * @param[in] eventWaitList       Event wait list.
5697  * @param[in] events     Event objects per each command queue that identify
5698  *                       a particular kernel execution instance.
5699  *
5700  * @return
5701  *   - \b clblasSuccess on success;
5702  *   - \b clblasNotInitialized if clblasSetup() was not called;
5703  *   - \b clblasInvalidValue if invalid parameters are passed:
5704  *     - \b N is zero, or
5705  *     - either \b incx is zero
5706  *   - \b clblasInvalidMemObject if either \b AP, \b X object is
5707  *     Invalid, or an image object rather than the buffer one;
5708  *   - \b clblasOutOfHostMemory if the library can't allocate memory for
5709  *     internal structures;
5710  *   - \b clblasInvalidCommandQueue if the passed command queue is invalid;
5711  *   - \b clblasInvalidContext if a context a passed command queue belongs
5712  *     to was released;
5713  *   - \b clblasInvalidOperation if kernel compilation relating to a previous
5714  *     call has not completed for any of the target devices;
5715  *   - \b clblasCompilerNotAvailable if a compiler is not available;
5716  *   - \b clblasBuildProgramFailure if there is a failure to build a program
5717  *     executable.
5718  *
5719  * @ingroup SPR
5720  */
5721 clblasStatus
5722 clblasSspr(
5723     clblasOrder order,
5724     clblasUplo uplo,
5725     size_t N,
5726     cl_float alpha,
5727     const cl_mem X,
5728     size_t offx,
5729     int incx,
5730     cl_mem AP,
5731     size_t offa,
5732     cl_uint numCommandQueues,
5733     cl_command_queue* commandQueues,
5734     cl_uint numEventsInWaitList,
5735     const cl_event* eventWaitList,
5736     cl_event* events);
5737 /**
5738  * @example example_sspr.c
5739  * Example of how to use the @ref clblasSspr function.
5740  */
5741 
5742 /**
5743  * @brief Symmetric rank 1 operation with a general triangular packed-matrix and
5744  * double elements.
5745  *
5746  * Symmetric rank 1 operation:
5747  *   - \f$ A \leftarrow \alpha X X^T + A \f$
5748  *
5749  * @param[in] order     Row/column order.
5750  * @param[in] uplo      The triangle in matrix being referenced.
5751  * @param[in] N         Number of columns in matrix \b A.
5752  * @param[in] alpha     The factor of matrix \b A.
5753  * @param[in] X         Buffer object storing vector \b X.
5754  * @param[in] offx      Offset of first element of vector \b X in buffer object.
5755  * @param[in] incx      Increment for the elements of \b X. Must not be zero.
5756  * @param[out] AP 	    Buffer object storing packed-matrix \b AP.
5757  * @param[in] offa      Offset of first element of matrix \b AP in buffer object.
5758  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
5759  *                                task is to be performed.
5760  * @param[in] commandQueues       OpenCL command queues.
5761  * @param[in] numEventsInWaitList Number of events in the event wait list.
5762  * @param[in] eventWaitList       Event wait list.
5763  * @param[in] events     Event objects per each command queue that identify
5764  *                       a particular kernel execution instance.
5765  *
5766  * @return
5767  *   - \b clblasSuccess on success;
5768  *   - \b clblasInvalidDevice if a target device does not support floating
5769  *     point arithmetic with double precision;
5770  *   - the same error codes as the clblasSspr() function otherwise.
5771  *
5772  * @ingroup SPR
5773  */
5774 
5775 clblasStatus
5776 clblasDspr(
5777     clblasOrder order,
5778     clblasUplo uplo,
5779     size_t N,
5780     cl_double alpha,
5781     const cl_mem X,
5782     size_t offx,
5783     int incx,
5784     cl_mem AP,
5785     size_t offa,
5786     cl_uint numCommandQueues,
5787     cl_command_queue* commandQueues,
5788     cl_uint numEventsInWaitList,
5789     const cl_event* eventWaitList,
5790     cl_event* events);
5791 /*@}*/
5792 
5793 /**
5794  * @defgroup HPR HPR   - Hermitian packed matrix rank 1 update
5795  *
5796  * The Level 2 Basic Linear Algebra Subprogram functions that perform
5797  * hermitian rank 1 operations on packed matrix
5798  * @ingroup BLAS2
5799  */
5800 
5801 /*@{*/
5802 /**
5803  * @brief hermitian rank 1 operation with a general triangular packed-matrix and
5804  * float-complex elements.
5805  *
5806  * hermitian rank 1 operation:
5807  *   - \f$ A \leftarrow \alpha X X^H + A \f$
5808  *
5809  * @param[in] order     Row/column order.
5810  * @param[in] uplo      The triangle in matrix being referenced.
5811  * @param[in] N         Number of columns in matrix \b A.
5812  * @param[in] alpha     The factor of matrix \b A (a scalar float value)
5813  * @param[in] X         Buffer object storing vector \b X.
5814  * @param[in] offx      Offset in number of elements for the first element in vector \b X.
5815  * @param[in] incx      Increment for the elements of \b X. Must not be zero.
5816  * @param[out] AP 	    Buffer object storing matrix \b AP.
5817  * @param[in] offa      Offset in number of elements for the first element in matrix \b AP.
5818  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
5819  *                                task is to be performed.
5820  * @param[in] commandQueues       OpenCL command queues.
5821  * @param[in] numEventsInWaitList Number of events in the event wait list.
5822  * @param[in] eventWaitList       Event wait list.
5823  * @param[in] events     Event objects per each command queue that identify
5824  *                       a particular kernel execution instance.
5825  *
5826  * @return
5827  *   - \b clblasSuccess on success;
5828  *   - \b clblasNotInitialized if clblasSetup() was not called;
5829  *   - \b clblasInvalidValue if invalid parameters are passed:
5830  *     - \b N is zero, or
5831  *     - either \b incx is zero
5832  *   - \b clblasInvalidMemObject if either \b AP, \b X object is
5833  *     Invalid, or an image object rather than the buffer one;
5834  *   - \b clblasOutOfHostMemory if the library can't allocate memory for
5835  *     internal structures;
5836  *   - \b clblasInvalidCommandQueue if the passed command queue is invalid;
5837  *   - \b clblasInvalidContext if a context a passed command queue belongs
5838  *     to was released;
5839  *   - \b clblasInvalidOperation if kernel compilation relating to a previous
5840  *     call has not completed for any of the target devices;
5841  *   - \b clblasCompilerNotAvailable if a compiler is not available;
5842  *   - \b clblasBuildProgramFailure if there is a failure to build a program
5843  *     executable.
5844  *
5845  * @ingroup HPR
5846  */
5847 clblasStatus
5848 clblasChpr(
5849     clblasOrder order,
5850     clblasUplo uplo,
5851     size_t N,
5852     cl_float alpha,
5853     const cl_mem X,
5854     size_t offx,
5855     int  incx,
5856     cl_mem AP,
5857     size_t offa,
5858     cl_uint numCommandQueues,
5859     cl_command_queue* commandQueues,
5860     cl_uint numEventsInWaitList,
5861     const cl_event* eventWaitList,
5862     cl_event* events);
5863 /**
5864  * @example example_chpr.c
5865  * Example of how to use the @ref clblasChpr function.
5866  */
5867 
5868 /**
5869  * @brief hermitian rank 1 operation with a general triangular packed-matrix and
5870  * double-complex elements.
5871  *
5872  * hermitian rank 1 operation:
5873  *   - \f$ A \leftarrow \alpha X X^H + A \f$
5874  *
5875  * @param[in] order     Row/column order.
5876  * @param[in] uplo      The triangle in matrix being referenced.
5877  * @param[in] N         Number of columns in matrix \b A.
5878  * @param[in] alpha     The factor of matrix \b A (a scalar float value)
5879  * @param[in] X         Buffer object storing vector \b X.
5880  * @param[in] offx      Offset in number of elements for the first element in vector \b X.
5881  * @param[in] incx      Increment for the elements of \b X. Must not be zero.
5882  * @param[out] AP 	    Buffer object storing matrix \b AP.
5883  * @param[in] offa      Offset in number of elements for the first element in matrix \b AP.
5884  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
5885  *                                task is to be performed.
5886  * @param[in] commandQueues       OpenCL command queues.
5887  * @param[in] numEventsInWaitList Number of events in the event wait list.
5888  * @param[in] eventWaitList       Event wait list.
5889  * @param[in] events     Event objects per each command queue that identify
5890  *                       a particular kernel execution instance.
5891  *
5892  * @return
5893  *   - \b clblasSuccess on success;
5894  *   - \b clblasInvalidDevice if a target device does not support floating
5895  *     point arithmetic with double precision;
5896  *   - the same error codes as the clblasChpr() function otherwise.
5897  *
5898  * @ingroup HPR
5899  */
5900 clblasStatus
5901 clblasZhpr(
5902     clblasOrder order,
5903     clblasUplo uplo,
5904     size_t N,
5905     cl_double alpha,
5906     const cl_mem X,
5907     size_t offx,
5908     int incx,
5909     cl_mem AP,
5910     size_t offa,
5911     cl_uint numCommandQueues,
5912     cl_command_queue* commandQueues,
5913     cl_uint numEventsInWaitList,
5914     const cl_event* eventWaitList,
5915     cl_event* events);
5916 /*@}*/
5917 
5918 /**
5919  * @defgroup SPR2 SPR2  - Symmetric packed matrix rank 2 update
5920  *
5921  * The Level 2 Basic Linear Algebra Subprograms are functions that perform
5922  * symmetric rank 2 update operations on packed matrices
5923  * @ingroup BLAS2
5924  */
5925 
5926 /*@{*/
5927 /**
5928  * @brief Symmetric rank 2 operation with a general triangular packed-matrix and
5929  * float elements.
5930  *
5931  * Symmetric rank 2 operation:
5932  *   - \f$ A \leftarrow \alpha X Y^T + \alpha Y X^T + A \f$
5933  *
5934  * @param[in] order     Row/column order.
5935  * @param[in] uplo      The triangle in matrix being referenced.
5936  * @param[in] N         Number of columns in matrix \b A.
5937  * @param[in] alpha     The factor of matrix \b A.
5938  * @param[in] X         Buffer object storing vector \b X.
5939  * @param[in] offx      Offset of first element of vector \b X in buffer object.
5940  * @param[in] incx      Increment for the elements of \b X. Must not be zero.
5941  * @param[in] Y         Buffer object storing vector \b Y.
5942  * @param[in] offy      Offset of first element of vector \b Y in buffer object.
5943  * @param[in] incy      Increment for the elements of \b Y. Must not be zero.
5944  * @param[out] AP		Buffer object storing packed-matrix \b AP.
5945  * @param[in] offa      Offset of first element of matrix \b AP in buffer object.
5946  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
5947  *                                task is to be performed.
5948  * @param[in] commandQueues       OpenCL command queues.
5949  * @param[in] numEventsInWaitList Number of events in the event wait list.
5950  * @param[in] eventWaitList       Event wait list.
5951  * @param[in] events     Event objects per each command queue that identify
5952  *                       a particular kernel execution instance.
5953  *
5954  * @return
5955  *   - \b clblasSuccess on success;
5956  *   - \b clblasNotInitialized if clblasSetup() was not called;
5957  *   - \b clblasInvalidValue if invalid parameters are passed:
5958  *     - either \b N is zero, or
5959  *     - either \b incx or \b incy is zero
5960  *   - \b clblasInvalidMemObject if either \b AP, \b X, or \b Y object is
5961  *     Invalid, or an image object rather than the buffer one;
5962  *   - \b clblasOutOfHostMemory if the library can't allocate memory for
5963  *     internal structures;
5964  *   - \b clblasInvalidCommandQueue if the passed command queue is invalid;
5965  *   - \b clblasInvalidContext if a context a passed command queue belongs
5966  *     to was released;
5967  *   - \b clblasInvalidOperation if kernel compilation relating to a previous
5968  *     call has not completed for any of the target devices;
5969  *   - \b clblasCompilerNotAvailable if a compiler is not available;
5970  *   - \b clblasBuildProgramFailure if there is a failure to build a program
5971  *     executable.
5972  *
5973  * @ingroup SPR2
5974  */
5975 
5976 clblasStatus
5977 clblasSspr2(
5978 	clblasOrder order,
5979     clblasUplo uplo,
5980     size_t N,
5981     cl_float alpha,
5982     const cl_mem X,
5983     size_t offx,
5984     int incx,
5985 	const cl_mem Y,
5986     size_t offy,
5987     int incy,
5988     cl_mem AP,
5989     size_t offa,
5990     cl_uint numCommandQueues,
5991     cl_command_queue* commandQueues,
5992     cl_uint numEventsInWaitList,
5993     const cl_event* eventWaitList,
5994     cl_event* events);
5995 /**
5996  * @example example_sspr2.c
5997  * Example of how to use the @ref clblasSspr2 function.
5998  */
5999 
6000 /**
6001  * @brief Symmetric rank 2 operation with a general triangular packed-matrix and
6002  * double elements.
6003  *
6004  * Symmetric rank 2 operation:
6005  *   - \f$ A \leftarrow \alpha X Y^T + \alpha Y X^T + A \f$
6006  *
6007  * @param[in] order     Row/column order.
6008  * @param[in] uplo      The triangle in matrix being referenced.
6009  * @param[in] N         Number of columns in matrix \b A.
6010  * @param[in] alpha     The factor of matrix \b A.
6011  * @param[in] X         Buffer object storing vector \b X.
6012  * @param[in] offx      Offset of first element of vector \b X in buffer object.
6013  * @param[in] incx      Increment for the elements of \b X. Must not be zero.
6014  * @param[in] Y         Buffer object storing vector \b Y.
6015  * @param[in] offy      Offset of first element of vector \b Y in buffer object.
6016  * @param[in] incy      Increment for the elements of \b Y. Must not be zero.
6017  * @param[out] AP		Buffer object storing packed-matrix \b AP.
6018  * @param[in] offa      Offset of first element of matrix \b AP in buffer object.
6019  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
6020  *                                task is to be performed.
6021  * @param[in] commandQueues       OpenCL command queues.
6022  * @param[in] numEventsInWaitList Number of events in the event wait list.
6023  * @param[in] eventWaitList       Event wait list.
6024  * @param[in] events     Event objects per each command queue that identify
6025  *                       a particular kernel execution instance.
6026  *
6027  * @return
6028  *   - \b clblasSuccess on success;
6029  *   - \b clblasInvalidDevice if a target device does not support floating
6030  *     point arithmetic with double precision;
6031  *   - the same error codes as the clblasSspr2() function otherwise.
6032  *
6033  * @ingroup SPR2
6034  */
6035 
6036 clblasStatus
6037 clblasDspr2(
6038     clblasOrder order,
6039     clblasUplo uplo,
6040     size_t N,
6041     cl_double alpha,
6042     const cl_mem X,
6043     size_t offx,
6044     int incx,
6045     const cl_mem Y,
6046     size_t offy,
6047     int incy,
6048 	cl_mem AP,
6049     size_t offa,
6050     cl_uint numCommandQueues,
6051     cl_command_queue* commandQueues,
6052     cl_uint numEventsInWaitList,
6053     const cl_event* eventWaitList,
6054     cl_event* events);
6055 /*@}*/
6056 
6057 /**
6058  * @defgroup HPR2 HPR2  - Hermitian packed matrix rank 2 update
6059  *
6060  * The Level 2 Basic Linear Algebra Subprograms are functions that perform
6061  * hermitian rank 2 update operations on packed matrices
6062  * @ingroup BLAS2
6063  */
6064 
6065 /*@{*/
6066 /**
6067  * @brief Hermitian rank 2 operation with a general triangular packed-matrix and
6068  * float-compelx elements.
6069  *
6070  * Hermitian rank 2 operation:
6071  *   - \f$ A \leftarrow \alpha X Y^H + \conjg( alpha ) Y X^H + A \f$
6072  *
6073  * @param[in] order     Row/column order.
6074  * @param[in] uplo      The triangle in matrix being referenced.
6075  * @param[in] N         Number of columns in matrix \b A.
6076  * @param[in] alpha     The factor of matrix \b A.
6077  * @param[in] X         Buffer object storing vector \b X.
6078  * @param[in] offx      Offset in number of elements for the first element in vector \b X.
6079  * @param[in] incx      Increment for the elements of \b X. Must not be zero.
6080  * @param[in] Y         Buffer object storing vector \b Y.
6081  * @param[in] offy      Offset in number of elements for the first element in vector \b Y.
6082  * @param[in] incy      Increment for the elements of \b Y. Must not be zero.
6083  * @param[out] AP		Buffer object storing packed-matrix \b AP.
6084  * @param[in] offa      Offset in number of elements for the first element in matrix \b AP.
6085  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
6086  *                                task is to be performed.
6087  * @param[in] commandQueues       OpenCL command queues.
6088  * @param[in] numEventsInWaitList Number of events in the event wait list.
6089  * @param[in] eventWaitList       Event wait list.
6090  * @param[in] events     Event objects per each command queue that identify
6091  *                       a particular kernel execution instance.
6092  *
6093  * @return
6094  *   - \b clblasSuccess on success;
6095  *   - \b clblasNotInitialized if clblasSetup() was not called;
6096  *   - \b clblasInvalidValue if invalid parameters are passed:
6097  *     - either \b N is zero, or
6098  *     - either \b incx or \b incy is zero
6099  *   - \b clblasInvalidMemObject if either \b AP, \b X, or \b Y object is
6100  *     Invalid, or an image object rather than the buffer one;
6101  *   - \b clblasOutOfHostMemory if the library can't allocate memory for
6102  *     internal structures;
6103  *   - \b clblasInvalidCommandQueue if the passed command queue is invalid;
6104  *   - \b clblasInvalidContext if a context a passed command queue belongs
6105  *     to was released;
6106  *   - \b clblasInvalidOperation if kernel compilation relating to a previous
6107  *     call has not completed for any of the target devices;
6108  *   - \b clblasCompilerNotAvailable if a compiler is not available;
6109  *   - \b clblasBuildProgramFailure if there is a failure to build a program
6110  *     executable.
6111  *
6112  * @ingroup HPR2
6113  */
6114 clblasStatus
6115 clblasChpr2(
6116 	clblasOrder order,
6117     clblasUplo uplo,
6118     size_t N,
6119     cl_float2 alpha,
6120     const cl_mem X,
6121     size_t offx,
6122     int incx,
6123 	const cl_mem Y,
6124     size_t offy,
6125     int incy,
6126     cl_mem AP,
6127     size_t offa,
6128     cl_uint numCommandQueues,
6129     cl_command_queue* commandQueues,
6130     cl_uint numEventsInWaitList,
6131     const cl_event* eventWaitList,
6132     cl_event* events);
6133 
6134 
6135 /**
6136  * @brief Hermitian rank 2 operation with a general triangular packed-matrix and
6137  * double-compelx elements.
6138  *
6139  * Hermitian rank 2 operation:
6140  *   - \f$ A \leftarrow \alpha X Y^H + \conjg( alpha ) Y X^H + A \f$
6141  *
6142  * @param[in] order     Row/column order.
6143  * @param[in] uplo      The triangle in matrix being referenced.
6144  * @param[in] N         Number of columns in matrix \b A.
6145  * @param[in] alpha     The factor of matrix \b A.
6146  * @param[in] X         Buffer object storing vector \b X.
6147  * @param[in] offx      Offset in number of elements for the first element in vector \b X.
6148  * @param[in] incx      Increment for the elements of \b X. Must not be zero.
6149  * @param[in] Y         Buffer object storing vector \b Y.
6150  * @param[in] offy      Offset in number of elements for the first element in vector \b Y.
6151  * @param[in] incy      Increment for the elements of \b Y. Must not be zero.
6152  * @param[out] AP		Buffer object storing packed-matrix \b AP.
6153  * @param[in] offa      Offset in number of elements for the first element in matrix \b AP.
6154  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
6155  *                                task is to be performed.
6156  * @param[in] commandQueues       OpenCL command queues.
6157  * @param[in] numEventsInWaitList Number of events in the event wait list.
6158  * @param[in] eventWaitList       Event wait list.
6159  * @param[in] events     Event objects per each command queue that identify
6160  *                       a particular kernel execution instance.
6161  *
6162  * @return
6163  *   - \b clblasSuccess on success;
6164  *   - \b clblasInvalidDevice if a target device does not support floating
6165  *     point arithmetic with double precision;
6166  *   - the same error codes as the clblasChpr2() function otherwise.
6167  *
6168  * @ingroup HPR2
6169  */
6170 clblasStatus
6171 clblasZhpr2(
6172     clblasOrder order,
6173     clblasUplo uplo,
6174     size_t N,
6175     cl_double2 alpha,
6176     const cl_mem X,
6177     size_t offx,
6178     int incx,
6179     const cl_mem Y,
6180     size_t offy,
6181     int incy,
6182 	cl_mem AP,
6183     size_t offa,
6184     cl_uint numCommandQueues,
6185     cl_command_queue* commandQueues,
6186     cl_uint numEventsInWaitList,
6187     const cl_event* eventWaitList,
6188     cl_event* events);
6189 
6190 /**
6191  * @example example_zhpr2.c
6192  * Example of how to use the @ref clblasZhpr2 function.
6193  */
6194 /*@}*/
6195 
6196 
6197 
6198 /**
6199  * @defgroup GBMV GBMV  - General banded matrix-vector multiplication
6200  * @ingroup BLAS2
6201  */
6202 /*@{*/
6203 
6204 /**
6205  * @brief Matrix-vector product with a general rectangular banded matrix and
6206  * float elements.
6207  *
6208  * Matrix-vector products:
6209  *   - \f$ Y \leftarrow \alpha A X + \beta Y \f$
6210  *   - \f$ Y \leftarrow \alpha A^T X + \beta Y \f$
6211  *
6212  * @param[in] order     Row/column order.
6213  * @param[in] trans     How matrix \b A is to be transposed.
6214  * @param[in] M         Number of rows in banded matrix \b A.
6215  * @param[in] N         Number of columns in banded matrix \b A.
6216  * @param[in] KL        Number of sub-diagonals in banded matrix \b A.
6217  * @param[in] KU        Number of super-diagonals in banded matrix \b A.
6218  * @param[in] alpha     The factor of banded matrix \b A.
6219  * @param[in] A         Buffer object storing banded matrix \b A.
6220  * @param[in] offa      Offset in number of elements for the first element in banded matrix \b A.
6221  * @param[in] lda       Leading dimension of banded matrix \b A. It cannot be less
6222  *                      than ( \b KL + \b KU + 1 )
6223  * @param[in] X         Buffer object storing vector \b X.
6224  * @param[in] offx      Offset of first element of vector \b X in buffer object.
6225  *                      Counted in elements.
6226  * @param[in] incx      Increment for the elements of \b X. Must not be zero.
6227  * @param[in] beta      The factor of the vector \b Y.
6228  * @param[out] Y        Buffer object storing the vector \b y.
6229  * @param[in] offy      Offset of first element of vector \b Y in buffer object.
6230  *                      Counted in elements.
6231  * @param[in] incy      Increment for the elements of \b Y. Must not be zero.
6232  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
6233  *                                task is to be performed.
6234  * @param[in] commandQueues       OpenCL command queues.
6235  * @param[in] numEventsInWaitList Number of events in the event wait list.
6236  * @param[in] eventWaitList       Event wait list.
6237  * @param[in] events     Event objects per each command queue that identify
6238  *                       a particular kernel execution instance.
6239  *
6240  * @return
6241  *   - \b clblasSuccess on success;
6242  *   - \b clblasNotInitialized if clblasSetup() was not called;
6243  *   - \b clblasInvalidValue if invalid parameters are passed:
6244  *     - either \b M or \b N is zero, or
6245  *     - KL is greater than \b M - 1, or
6246  *     - KU is greater than \b N - 1, or
6247  *     - either \b incx or \b incy is zero, or
6248  *     - any of the leading dimensions is invalid;
6249  *     - the matrix size or the vector sizes along with the increments lead to
6250  *       accessing outside of any of the buffers;
6251  *   - \b clblasInvalidMemObject if either \b A, \b X, or \b Y object is
6252  *     Invalid, or an image object rather than the buffer one;
6253  *   - \b clblasOutOfHostMemory if the library can't allocate memory for
6254  *     internal structures;
6255  *   - \b clblasInvalidCommandQueue if the passed command queue is invalid;
6256  *   - \b clblasInvalidContext if a context a passed command queue belongs
6257  *     to was released;
6258  *   - \b clblasInvalidOperation if kernel compilation relating to a previous
6259  *     call has not completed for any of the target devices;
6260  *   - \b clblasCompilerNotAvailable if a compiler is not available;
6261  *   - \b clblasBuildProgramFailure if there is a failure to build a program
6262  *     executable.
6263  *
6264  * @ingroup GBMV
6265  */
6266 clblasStatus
6267 clblasSgbmv(
6268     clblasOrder order,
6269     clblasTranspose trans,
6270     size_t M,
6271     size_t N,
6272     size_t KL,
6273     size_t KU,
6274     cl_float alpha,
6275     const cl_mem A,
6276     size_t offa,
6277     size_t lda,
6278     const cl_mem X,
6279     size_t offx,
6280     int incx,
6281     cl_float beta,
6282     cl_mem Y,
6283     size_t offy,
6284     int incy,
6285     cl_uint numCommandQueues,
6286     cl_command_queue *commandQueues,
6287     cl_uint numEventsInWaitList,
6288     const cl_event *eventWaitList,
6289     cl_event *events);
6290 /**
6291  * @example example_sgbmv.c
6292  * Example of how to use the @ref clblasSgbmv function.
6293  */
6294 
6295 
6296 /**
6297  * @brief Matrix-vector product with a general rectangular banded matrix and
6298  * double elements.
6299  *
6300  * Matrix-vector products:
6301  *   - \f$ Y \leftarrow \alpha A X + \beta Y \f$
6302  *   - \f$ Y \leftarrow \alpha A^T X + \beta Y \f$
6303  *
6304  * @param[in] order     Row/column order.
6305  * @param[in] trans     How matrix \b A is to be transposed.
6306  * @param[in] M         Number of rows in banded matrix \b A.
6307  * @param[in] N         Number of columns in banded matrix \b A.
6308  * @param[in] KL        Number of sub-diagonals in banded matrix \b A.
6309  * @param[in] KU        Number of super-diagonals in banded matrix \b A.
6310  * @param[in] alpha     The factor of banded matrix \b A.
6311  * @param[in] A         Buffer object storing banded matrix \b A.
6312  * @param[in] offa      Offset in number of elements for the first element in banded matrix \b A.
6313  * @param[in] lda       Leading dimension of banded matrix \b A. It cannot be less
6314  *                      than ( \b KL + \b KU + 1 )
6315  * @param[in] X         Buffer object storing vector \b X.
6316  * @param[in] offx      Offset of first element of vector \b X in buffer object.
6317  *                      Counted in elements.
6318  * @param[in] incx      Increment for the elements of \b X. Must not be zero.
6319  * @param[in] beta      The factor of the vector \b Y.
6320  * @param[out] Y        Buffer object storing the vector \b y.
6321  * @param[in] offy      Offset of first element of vector \b Y in buffer object.
6322  *                      Counted in elements.
6323  * @param[in] incy      Increment for the elements of \b Y. Must not be zero.
6324  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
6325  *                                task is to be performed.
6326  * @param[in] commandQueues       OpenCL command queues.
6327  * @param[in] numEventsInWaitList Number of events in the event wait list.
6328  * @param[in] eventWaitList       Event wait list.
6329  * @param[in] events     Event objects per each command queue that identify
6330  *                       a particular kernel execution instance.
6331  *
6332  * @return
6333  *   - \b clblasSuccess on success;
6334  *   - \b clblasInvalidDevice if a target device does not support floating
6335  *     point arithmetic with double precision;
6336  *   - the same error codes as the clblasSgbmv() function otherwise.
6337  *
6338  * @ingroup GBMV
6339  */
6340 clblasStatus
6341 clblasDgbmv(
6342     clblasOrder order,
6343     clblasTranspose trans,
6344     size_t M,
6345     size_t N,
6346     size_t KL,
6347     size_t KU,
6348     cl_double alpha,
6349     const cl_mem A,
6350     size_t offa,
6351     size_t lda,
6352     const cl_mem X,
6353     size_t offx,
6354     int incx,
6355     cl_double beta,
6356     cl_mem Y,
6357     size_t offy,
6358     int incy,
6359     cl_uint numCommandQueues,
6360     cl_command_queue *commandQueues,
6361     cl_uint numEventsInWaitList,
6362     const cl_event *eventWaitList,
6363     cl_event *events);
6364 
6365 
6366 /**
6367  * @brief Matrix-vector product with a general rectangular banded matrix and
6368  * float-complex elements.
6369  *
6370  * Matrix-vector products:
6371  *   - \f$ Y \leftarrow \alpha A X + \beta Y \f$
6372  *   - \f$ Y \leftarrow \alpha A^T X + \beta Y \f$
6373  *
6374  * @param[in] order     Row/column order.
6375  * @param[in] trans     How matrix \b A is to be transposed.
6376  * @param[in] M         Number of rows in banded matrix \b A.
6377  * @param[in] N         Number of columns in banded matrix \b A.
6378  * @param[in] KL        Number of sub-diagonals in banded matrix \b A.
6379  * @param[in] KU        Number of super-diagonals in banded matrix \b A.
6380  * @param[in] alpha     The factor of banded matrix \b A.
6381  * @param[in] A         Buffer object storing banded matrix \b A.
6382  * @param[in] offa      Offset in number of elements for the first element in banded matrix \b A.
6383  * @param[in] lda       Leading dimension of banded matrix \b A. It cannot be less
6384  *                      than ( \b KL + \b KU + 1 )
6385  * @param[in] X         Buffer object storing vector \b X.
6386  * @param[in] offx      Offset of first element of vector \b X in buffer object.
6387  *                      Counted in elements.
6388  * @param[in] incx      Increment for the elements of \b X. Must not be zero.
6389  * @param[in] beta      The factor of the vector \b Y.
6390  * @param[out] Y        Buffer object storing the vector \b y.
6391  * @param[in] offy      Offset of first element of vector \b Y in buffer object.
6392  *                      Counted in elements.
6393  * @param[in] incy      Increment for the elements of \b Y. Must not be zero.
6394  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
6395  *                                task is to be performed.
6396  * @param[in] commandQueues       OpenCL command queues.
6397  * @param[in] numEventsInWaitList Number of events in the event wait list.
6398  * @param[in] eventWaitList       Event wait list.
6399  * @param[in] events     Event objects per each command queue that identify
6400  *                       a particular kernel execution instance.
6401  *
6402  * @return The same result as the clblasSgbmv() function.
6403  *
6404  * @ingroup GBMV
6405  */
6406 clblasStatus
6407 clblasCgbmv(
6408     clblasOrder order,
6409     clblasTranspose trans,
6410     size_t M,
6411     size_t N,
6412     size_t KL,
6413     size_t KU,
6414     cl_float2 alpha,
6415     const cl_mem A,
6416     size_t offa,
6417     size_t lda,
6418     const cl_mem X,
6419     size_t offx,
6420     int incx,
6421     cl_float2 beta,
6422     cl_mem Y,
6423     size_t offy,
6424     int incy,
6425     cl_uint numCommandQueues,
6426     cl_command_queue *commandQueues,
6427     cl_uint numEventsInWaitList,
6428     const cl_event *eventWaitList,
6429     cl_event *events);
6430 
6431 
6432 /**
6433  * @brief Matrix-vector product with a general rectangular banded matrix and
6434  * double-complex elements.
6435  *
6436  * Matrix-vector products:
6437  *   - \f$ Y \leftarrow \alpha A X + \beta Y \f$
6438  *   - \f$ Y \leftarrow \alpha A^T X + \beta Y \f$
6439  *
6440  * @param[in] order     Row/column order.
6441  * @param[in] trans     How matrix \b A is to be transposed.
6442  * @param[in] M         Number of rows in banded matrix \b A.
6443  * @param[in] N         Number of columns in banded matrix \b A.
6444  * @param[in] KL        Number of sub-diagonals in banded matrix \b A.
6445  * @param[in] KU        Number of super-diagonals in banded matrix \b A.
6446  * @param[in] alpha     The factor of banded matrix \b A.
6447  * @param[in] A         Buffer object storing banded matrix \b A.
6448  * @param[in] offa      Offset in number of elements for the first element in banded matrix \b A.
6449  * @param[in] lda       Leading dimension of banded matrix \b A. It cannot be less
6450  *                      than ( \b KL + \b KU + 1 )
6451  * @param[in] X         Buffer object storing vector \b X.
6452  * @param[in] offx      Offset of first element of vector \b X in buffer object.
6453  *                      Counted in elements.
6454  * @param[in] incx      Increment for the elements of \b X. Must not be zero.
6455  * @param[in] beta      The factor of the vector \b Y.
6456  * @param[out] Y        Buffer object storing the vector \b y.
6457  * @param[in] offy      Offset of first element of vector \b Y in buffer object.
6458  *                      Counted in elements.
6459  * @param[in] incy      Increment for the elements of \b Y. Must not be zero.
6460  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
6461  *                                task is to be performed.
6462  * @param[in] commandQueues       OpenCL command queues.
6463  * @param[in] numEventsInWaitList Number of events in the event wait list.
6464  * @param[in] eventWaitList       Event wait list.
6465  * @param[in] events     Event objects per each command queue that identify
6466  *                       a particular kernel execution instance.
6467  *
6468  * @return The same result as the clblasDgbmv() function.
6469  *
6470  * @ingroup GBMV
6471  */
6472 clblasStatus
6473 clblasZgbmv(
6474     clblasOrder order,
6475     clblasTranspose trans,
6476     size_t M,
6477     size_t N,
6478     size_t KL,
6479     size_t KU,
6480     cl_double2 alpha,
6481     const cl_mem A,
6482     size_t offa,
6483     size_t lda,
6484     const cl_mem X,
6485     size_t offx,
6486     int incx,
6487     cl_double2 beta,
6488     cl_mem Y,
6489     size_t offy,
6490     int incy,
6491     cl_uint numCommandQueues,
6492     cl_command_queue *commandQueues,
6493     cl_uint numEventsInWaitList,
6494     const cl_event *eventWaitList,
6495     cl_event *events);
6496 /*@}*/
6497 
6498 
6499 /**
6500  * @defgroup TBMV TBMV  - Triangular banded matrix vector multiply
6501  * @ingroup BLAS2
6502  */
6503 /*@{*/
6504 
6505 /**
6506  * @brief Matrix-vector product with a triangular banded matrix and
6507  * float elements.
6508  *
6509  * Matrix-vector products:
6510  *   - \f$ X \leftarrow  A X \f$
6511  *   - \f$ X \leftarrow  A^T X \f$
6512  *
6513  * @param[in] order				Row/column order.
6514  * @param[in] uplo				The triangle in matrix being referenced.
6515  * @param[in] trans				How matrix \b A is to be transposed.
6516  * @param[in] diag				Specify whether matrix \b A is unit triangular.
6517  * @param[in] N					Number of rows/columns in banded matrix \b A.
6518  * @param[in] K					Number of sub-diagonals/super-diagonals in triangular banded matrix \b A.
6519  * @param[in] A					Buffer object storing matrix \b A.
6520  * @param[in] offa				Offset in number of elements for first element in matrix \b A.
6521  * @param[in] lda				Leading dimension of matrix \b A. It cannot be less
6522  *								than ( \b K + 1 )
6523  * @param[out] X				Buffer object storing vector \b X.
6524  * @param[in] offx				Offset in number of elements for first element in vector \b X.
6525  * @param[in] incx				Increment for the elements of \b X. Must not be zero.
6526  * @param[in] scratchBuff		Temporary cl_mem scratch buffer object which can hold a
6527  *								minimum of (1 + (N-1)*abs(incx)) elements
6528  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
6529  *                                task is to be performed.
6530  * @param[in] commandQueues       OpenCL command queues.
6531  * @param[in] numEventsInWaitList Number of events in the event wait list.
6532  * @param[in] eventWaitList       Event wait list.
6533  * @param[in] events     Event objects per each command queue that identify
6534  *                       a particular kernel execution instance.
6535  *
6536  * @return
6537  *   - \b clblasSuccess on success;
6538  *   - \b clblasNotInitialized if clblasSetup() was not called;
6539  *   - \b clblasInvalidValue if invalid parameters are passed:
6540  *     - either \b N or \b incx is zero, or
6541  *     - K is greater than \b N - 1
6542  *     - the leading dimension is invalid;
6543  *   - \b clblasInvalidMemObject if either \b A or \b X object is
6544  *     Invalid, or an image object rather than the buffer one;
6545  *   - \b clblasOutOfHostMemory if the library can't allocate memory for
6546  *     internal structures;
6547  *   - \b clblasInvalidCommandQueue if the passed command queue is invalid;
6548  *   - \b clblasInvalidContext if a context a passed command queue belongs
6549  *     to was released;
6550  *   - \b clblasInvalidOperation if kernel compilation relating to a previous
6551  *     call has not completed for any of the target devices;
6552  *   - \b clblasCompilerNotAvailable if a compiler is not available;
6553  *   - \b clblasBuildProgramFailure if there is a failure to build a program
6554  *     executable.
6555  *
6556  * @ingroup TBMV
6557  */
6558 clblasStatus
6559 clblasStbmv(
6560     clblasOrder order,
6561     clblasUplo uplo,
6562     clblasTranspose trans,
6563     clblasDiag diag,
6564     size_t N,
6565     size_t K,
6566     const cl_mem A,
6567     size_t offa,
6568     size_t lda,
6569     cl_mem X,
6570     size_t offx,
6571     int incx,
6572     cl_mem scratchBuff,
6573     cl_uint numCommandQueues,
6574     cl_command_queue *commandQueues,
6575     cl_uint numEventsInWaitList,
6576     const cl_event *eventWaitList,
6577     cl_event *events);
6578 /**
6579  * @example example_stbmv.c
6580  * Example of how to use the @ref clblasStbmv function.
6581  */
6582 
6583 
6584 /**
6585  * @brief Matrix-vector product with a triangular banded matrix and
6586  * double elements.
6587  *
6588  * Matrix-vector products:
6589  *   - \f$ X \leftarrow  A X \f$
6590  *   - \f$ X \leftarrow  A^T X \f$
6591  *
6592  * @param[in] order				Row/column order.
6593  * @param[in] uplo				The triangle in matrix being referenced.
6594  * @param[in] trans				How matrix \b A is to be transposed.
6595  * @param[in] diag				Specify whether matrix \b A is unit triangular.
6596  * @param[in] N					Number of rows/columns in banded matrix \b A.
6597  * @param[in] K					Number of sub-diagonals/super-diagonals in triangular banded matrix \b A.
6598  * @param[in] A					Buffer object storing matrix \b A.
6599  * @param[in] offa				Offset in number of elements for first element in matrix \b A.
6600  * @param[in] lda				Leading dimension of matrix \b A. It cannot be less
6601  *								than ( \b K + 1 )
6602  * @param[out] X				Buffer object storing vector \b X.
6603  * @param[in] offx				Offset in number of elements for first element in vector \b X.
6604  * @param[in] incx				Increment for the elements of \b X. Must not be zero.
6605  * @param[in] scratchBuff		Temporary cl_mem scratch buffer object which can hold a
6606  *								minimum of (1 + (N-1)*abs(incx)) elements
6607  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
6608  *                                task is to be performed.
6609  * @param[in] commandQueues       OpenCL command queues.
6610  * @param[in] numEventsInWaitList Number of events in the event wait list.
6611  * @param[in] eventWaitList       Event wait list.
6612  * @param[in] events     Event objects per each command queue that identify
6613  *                       a particular kernel execution instance.
6614  *
6615  * @return
6616  *   - \b clblasSuccess on success;
6617  *   - \b clblasInvalidDevice if a target device does not support floating
6618  *     point arithmetic with double precision;
6619  *   - the same error codes as the clblasStbmv() function otherwise.
6620  *
6621  * @ingroup TBMV
6622  */
6623 clblasStatus
6624 clblasDtbmv(
6625     clblasOrder order,
6626     clblasUplo uplo,
6627     clblasTranspose trans,
6628     clblasDiag diag,
6629     size_t N,
6630     size_t K,
6631     const cl_mem A,
6632     size_t offa,
6633     size_t lda,
6634     cl_mem X,
6635     size_t offx,
6636     int incx,
6637     cl_mem scratchBuff,
6638     cl_uint numCommandQueues,
6639     cl_command_queue *commandQueues,
6640     cl_uint numEventsInWaitList,
6641     const cl_event *eventWaitList,
6642     cl_event *events);
6643 
6644 
6645 /**
6646  * @brief Matrix-vector product with a triangular banded matrix and
6647  * float-complex elements.
6648  *
6649  * Matrix-vector products:
6650  *   - \f$ X \leftarrow  A X \f$
6651  *   - \f$ X \leftarrow  A^T X \f$
6652  *
6653  * @param[in] order				Row/column order.
6654  * @param[in] uplo				The triangle in matrix being referenced.
6655  * @param[in] trans				How matrix \b A is to be transposed.
6656  * @param[in] diag				Specify whether matrix \b A is unit triangular.
6657  * @param[in] N					Number of rows/columns in banded matrix \b A.
6658  * @param[in] K					Number of sub-diagonals/super-diagonals in triangular banded matrix \b A.
6659  * @param[in] A					Buffer object storing matrix \b A.
6660  * @param[in] offa				Offset in number of elements for first element in matrix \b A.
6661  * @param[in] lda				Leading dimension of matrix \b A. It cannot be less
6662  *								than ( \b K + 1 )
6663  * @param[out] X				Buffer object storing vector \b X.
6664  * @param[in] offx				Offset in number of elements for first element in vector \b X.
6665  * @param[in] incx				Increment for the elements of \b X. Must not be zero.
6666  * @param[in] scratchBuff		Temporary cl_mem scratch buffer object which can hold a
6667  *								minimum of (1 + (N-1)*abs(incx)) elements
6668  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
6669  *                                task is to be performed.
6670  * @param[in] commandQueues       OpenCL command queues.
6671  * @param[in] numEventsInWaitList Number of events in the event wait list.
6672  * @param[in] eventWaitList       Event wait list.
6673  * @param[in] events     Event objects per each command queue that identify
6674  *                       a particular kernel execution instance.
6675  *
6676 * @return The same result as the clblasStbmv() function.
6677  *
6678  * @ingroup TBMV
6679  */
6680 clblasStatus
6681 clblasCtbmv(
6682     clblasOrder order,
6683     clblasUplo uplo,
6684     clblasTranspose trans,
6685     clblasDiag diag,
6686     size_t N,
6687     size_t K,
6688     const cl_mem A,
6689     size_t offa,
6690     size_t lda,
6691     cl_mem X,
6692     size_t offx,
6693     int incx,
6694     cl_mem scratchBuff,
6695     cl_uint numCommandQueues,
6696     cl_command_queue *commandQueues,
6697     cl_uint numEventsInWaitList,
6698     const cl_event *eventWaitList,
6699     cl_event *events);
6700 
6701 
6702 /**
6703  * @brief Matrix-vector product with a triangular banded matrix and
6704  * double-complex elements.
6705  *
6706  * Matrix-vector products:
6707  *   - \f$ X \leftarrow  A X \f$
6708  *   - \f$ X \leftarrow  A^T X \f$
6709  *
6710  * @param[in] order				Row/column order.
6711  * @param[in] uplo				The triangle in matrix being referenced.
6712  * @param[in] trans				How matrix \b A is to be transposed.
6713  * @param[in] diag				Specify whether matrix \b A is unit triangular.
6714  * @param[in] N					Number of rows/columns in banded matrix \b A.
6715  * @param[in] K					Number of sub-diagonals/super-diagonals in triangular banded matrix \b A.
6716  * @param[in] A					Buffer object storing matrix \b A.
6717  * @param[in] offa				Offset in number of elements for first element in matrix \b A.
6718  * @param[in] lda				Leading dimension of matrix \b A. It cannot be less
6719  *								than ( \b K + 1 )
6720  * @param[out] X				Buffer object storing vector \b X.
6721  * @param[in] offx				Offset in number of elements for first element in vector \b X.
6722  * @param[in] incx				Increment for the elements of \b X. Must not be zero.
6723  * @param[in] scratchBuff		Temporary cl_mem scratch buffer object which can hold a
6724  *								minimum of (1 + (N-1)*abs(incx)) elements
6725  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
6726  *                                task is to be performed.
6727  * @param[in] commandQueues       OpenCL command queues.
6728  * @param[in] numEventsInWaitList Number of events in the event wait list.
6729  * @param[in] eventWaitList       Event wait list.
6730  * @param[in] events     Event objects per each command queue that identify
6731  *                       a particular kernel execution instance.
6732  *
6733 * @return The same result as the clblasDtbmv() function.
6734  *
6735  * @ingroup TBMV
6736  */
6737 clblasStatus
6738 clblasZtbmv(
6739     clblasOrder order,
6740     clblasUplo uplo,
6741     clblasTranspose trans,
6742     clblasDiag diag,
6743     size_t N,
6744     size_t K,
6745     const cl_mem A,
6746     size_t offa,
6747     size_t lda,
6748     cl_mem X,
6749     size_t offx,
6750     int incx,
6751     cl_mem scratchBuff,
6752     cl_uint numCommandQueues,
6753     cl_command_queue *commandQueues,
6754     cl_uint numEventsInWaitList,
6755     const cl_event *eventWaitList,
6756     cl_event *events);
6757 /*@}*/
6758 
6759 
6760 /**
6761  * @defgroup SBMV SBMV  - Symmetric banded matrix-vector multiplication
6762  * @ingroup BLAS2
6763  */
6764 /*@{*/
6765 
6766 /**
6767  * @brief Matrix-vector product with a symmetric banded matrix and float elements.
6768  *
6769  * Matrix-vector products:
6770  * - \f$ Y \leftarrow \alpha A X + \beta Y \f$
6771  *
6772  * @param[in] order     Row/columns order.
6773  * @param[in] uplo      The triangle in matrix being referenced.
6774  * @param[in] N         Number of rows and columns in banded matrix \b A.
6775  * @param[in] K			Number of sub-diagonals/super-diagonals in banded matrix \b A.
6776  * @param[in] alpha     The factor of matrix \b A.
6777  * @param[in] A			Buffer object storing matrix \b A.
6778  * @param[in] offa		Offset in number of elements for first element in matrix \b A.
6779  * @param[in] lda		Leading dimension of matrix \b A. It cannot be less
6780  *						than ( \b K + 1 )
6781  * @param[in] X         Buffer object storing vector \b X.
6782  * @param[in] offx      Offset of first element of vector \b X in buffer object.
6783  *                      Counted in elements.
6784  * @param[in] incx      Increment for the elements of vector \b X. It cannot be zero.
6785  * @param[in] beta      The factor of vector \b Y.
6786  * @param[out] Y        Buffer object storing vector \b Y.
6787  * @param[in] offy      Offset of first element of vector \b Y in buffer object.
6788  *                      Counted in elements.
6789  * @param[in] incy      Increment for the elements of vector \b Y. It cannot be zero.
6790  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
6791  *                                task is to be performed.
6792  * @param[in] commandQueues       OpenCL command queues.
6793  * @param[in] numEventsInWaitList Number of events in the event wait list.
6794  * @param[in] eventWaitList       Event wait list.
6795  * @param[in] events     Event objects per each command queue that identify
6796  *                       a particular kernel execution instance.
6797  *
6798  * @return
6799  *   - \b clblasSuccess on success;
6800  *   - \b clblasNotInitialized if clblasSetup() was not called;
6801  *   - \b clblasInvalidValue if invalid parameters are passed:
6802  *     - either \b N or \b incx is zero, or
6803  *     - K is greater than \b N - 1
6804  *     - the leading dimension is invalid;
6805  *   - \b clblasInvalidMemObject if either \b A or \b X object is
6806  *     Invalid, or an image object rather than the buffer one;
6807  *   - \b clblasOutOfHostMemory if the library can't allocate memory for
6808  *     internal structures;
6809  *   - \b clblasInvalidCommandQueue if the passed command queue is invalid;
6810  *   - \b clblasInvalidContext if a context a passed command queue belongs
6811  *     to was released;
6812  *   - \b clblasInvalidOperation if kernel compilation relating to a previous
6813  *     call has not completed for any of the target devices;
6814  *   - \b clblasCompilerNotAvailable if a compiler is not available;
6815  *   - \b clblasBuildProgramFailure if there is a failure to build a program
6816  *     executable.
6817  *
6818  * @ingroup SBMV
6819  */
6820 clblasStatus
6821 clblasSsbmv(
6822     clblasOrder order,
6823     clblasUplo uplo,
6824     size_t N,
6825     size_t K,
6826     cl_float alpha,
6827     const cl_mem A,
6828     size_t offa,
6829     size_t lda,
6830     const cl_mem X,
6831     size_t offx,
6832     int incx,
6833     cl_float beta,
6834     cl_mem Y,
6835     size_t offy,
6836     int incy,
6837     cl_uint numCommandQueues,
6838     cl_command_queue *commandQueues,
6839     cl_uint numEventsInWaitList,
6840     const cl_event *eventWaitList,
6841     cl_event *events);
6842 /**
6843  * @example example_ssbmv.c
6844  * This is an example of how to use the @ref clblasSsbmv function.
6845  */
6846 
6847 
6848 /**
6849  * @brief Matrix-vector product with a symmetric banded matrix and double elements.
6850  *
6851  * Matrix-vector products:
6852  * - \f$ Y \leftarrow \alpha A X + \beta Y \f$
6853  *
6854  * @param[in] order     Row/columns order.
6855  * @param[in] uplo      The triangle in matrix being referenced.
6856  * @param[in] N         Number of rows and columns in banded matrix \b A.
6857  * @param[in] K			Number of sub-diagonals/super-diagonals in banded matrix \b A.
6858  * @param[in] alpha     The factor of matrix \b A.
6859  * @param[in] A			Buffer object storing matrix \b A.
6860  * @param[in] offa		Offset in number of elements for first element in matrix \b A.
6861  * @param[in] lda		Leading dimension of matrix \b A. It cannot be less
6862  *						than ( \b K + 1 )
6863  * @param[in] X         Buffer object storing vector \b X.
6864  * @param[in] offx      Offset of first element of vector \b X in buffer object.
6865  *                      Counted in elements.
6866  * @param[in] incx      Increment for the elements of vector \b X. It cannot be zero.
6867  * @param[in] beta      The factor of vector \b Y.
6868  * @param[out] Y        Buffer object storing vector \b Y.
6869  * @param[in] offy      Offset of first element of vector \b Y in buffer object.
6870  *                      Counted in elements.
6871  * @param[in] incy      Increment for the elements of vector \b Y. It cannot be zero.
6872  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
6873  *                                task is to be performed.
6874  * @param[in] commandQueues       OpenCL command queues.
6875  * @param[in] numEventsInWaitList Number of events in the event wait list.
6876  * @param[in] eventWaitList       Event wait list.
6877  * @param[in] events     Event objects per each command queue that identify
6878  *                       a particular kernel execution instance.
6879  *
6880  * @return
6881  *   - \b clblasSuccess on success;
6882  *   - \b clblasInvalidDevice if a target device does not support floating
6883  *     point arithmetic with double precision;
6884  *   - the same error codes as the clblasSsbmv() function otherwise.
6885  *
6886  * @ingroup SBMV
6887  */
6888 clblasStatus
6889 clblasDsbmv(
6890     clblasOrder order,
6891     clblasUplo uplo,
6892     size_t N,
6893     size_t K,
6894     cl_double alpha,
6895     const cl_mem A,
6896     size_t offa,
6897     size_t lda,
6898     const cl_mem X,
6899     size_t offx,
6900     int incx,
6901     cl_double beta,
6902     cl_mem Y,
6903     size_t offy,
6904     int incy,
6905     cl_uint numCommandQueues,
6906     cl_command_queue *commandQueues,
6907     cl_uint numEventsInWaitList,
6908     const cl_event *eventWaitList,
6909     cl_event *events);
6910 
6911 /*@}*/
6912 
6913 
6914 /**
6915  * @defgroup HBMV HBMV  - Hermitian banded matrix-vector multiplication
6916  * @ingroup BLAS2
6917  */
6918 /*@{*/
6919 
6920 /**
6921  * @brief Matrix-vector product with a hermitian banded matrix and float elements.
6922  *
6923  * Matrix-vector products:
6924  * - \f$ Y \leftarrow \alpha A X + \beta Y \f$
6925  *
6926  * @param[in] order     Row/columns order.
6927  * @param[in] uplo      The triangle in matrix being referenced.
6928  * @param[in] N         Number of rows and columns in banded matrix \b A.
6929  * @param[in] K			Number of sub-diagonals/super-diagonals in banded matrix \b A.
6930  * @param[in] alpha     The factor of matrix \b A.
6931  * @param[in] A			Buffer object storing matrix \b A.
6932  * @param[in] offa		Offset in number of elements for first element in matrix \b A.
6933  * @param[in] lda		Leading dimension of matrix \b A. It cannot be less
6934  *						than ( \b K + 1 )
6935  * @param[in] X         Buffer object storing vector \b X.
6936  * @param[in] offx      Offset of first element of vector \b X in buffer object.
6937  *                      Counted in elements.
6938  * @param[in] incx      Increment for the elements of vector \b X. It cannot be zero.
6939  * @param[in] beta      The factor of vector \b Y.
6940  * @param[out] Y        Buffer object storing vector \b Y.
6941  * @param[in] offy      Offset of first element of vector \b Y in buffer object.
6942  *                      Counted in elements.
6943  * @param[in] incy      Increment for the elements of vector \b Y. It cannot be zero.
6944  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
6945  *                                task is to be performed.
6946  * @param[in] commandQueues       OpenCL command queues.
6947  * @param[in] numEventsInWaitList Number of events in the event wait list.
6948  * @param[in] eventWaitList       Event wait list.
6949  * @param[in] events     Event objects per each command queue that identify
6950  *                       a particular kernel execution instance.
6951  *
6952  * @return
6953  *   - \b clblasSuccess on success;
6954  *   - \b clblasNotInitialized if clblasSetup() was not called;
6955  *   - \b clblasInvalidValue if invalid parameters are passed:
6956  *     - either \b N or \b incx is zero, or
6957  *     - K is greater than \b N - 1
6958  *     - the leading dimension is invalid;
6959  *   - \b clblasInvalidMemObject if either \b A or \b X object is
6960  *     Invalid, or an image object rather than the buffer one;
6961  *   - \b clblasOutOfHostMemory if the library can't allocate memory for
6962  *     internal structures;
6963  *   - \b clblasInvalidCommandQueue if the passed command queue is invalid;
6964  *   - \b clblasInvalidContext if a context a passed command queue belongs
6965  *     to was released;
6966  *   - \b clblasInvalidOperation if kernel compilation relating to a previous
6967  *     call has not completed for any of the target devices;
6968  *   - \b clblasCompilerNotAvailable if a compiler is not available;
6969  *   - \b clblasBuildProgramFailure if there is a failure to build a program
6970  *     executable.
6971  *
6972  * @ingroup HBMV
6973  */
6974 clblasStatus
6975 clblasChbmv(
6976     clblasOrder order,
6977     clblasUplo uplo,
6978     size_t N,
6979     size_t K,
6980     cl_float2 alpha,
6981     const cl_mem A,
6982     size_t offa,
6983     size_t lda,
6984     const cl_mem X,
6985     size_t offx,
6986     int incx,
6987     cl_float2 beta,
6988     cl_mem Y,
6989     size_t offy,
6990     int incy,
6991     cl_uint numCommandQueues,
6992     cl_command_queue *commandQueues,
6993     cl_uint numEventsInWaitList,
6994     const cl_event *eventWaitList,
6995     cl_event *events);
6996 /**
6997  * @example example_chbmv.c
6998  * This is an example of how to use the @ref clblasChbmv function.
6999  */
7000 
7001 
7002 /**
7003  * @brief Matrix-vector product with a hermitian banded matrix and double elements.
7004  *
7005  * Matrix-vector products:
7006  * - \f$ Y \leftarrow \alpha A X + \beta Y \f$
7007  *
7008  * @param[in] order     Row/columns order.
7009  * @param[in] uplo      The triangle in matrix being referenced.
7010  * @param[in] N         Number of rows and columns in banded matrix \b A.
7011  * @param[in] K			Number of sub-diagonals/super-diagonals in banded matrix \b A.
7012  * @param[in] alpha     The factor of matrix \b A.
7013  * @param[in] A			Buffer object storing matrix \b A.
7014  * @param[in] offa		Offset in number of elements for first element in matrix \b A.
7015  * @param[in] lda		Leading dimension of matrix \b A. It cannot be less
7016  *						than ( \b K + 1 )
7017  * @param[in] X         Buffer object storing vector \b X.
7018  * @param[in] offx      Offset of first element of vector \b X in buffer object.
7019  *                      Counted in elements.
7020  * @param[in] incx      Increment for the elements of vector \b X. It cannot be zero.
7021  * @param[in] beta      The factor of vector \b Y.
7022  * @param[out] Y        Buffer object storing vector \b Y.
7023  * @param[in] offy      Offset of first element of vector \b Y in buffer object.
7024  *                      Counted in elements.
7025  * @param[in] incy      Increment for the elements of vector \b Y. It cannot be zero.
7026  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
7027  *                                task is to be performed.
7028  * @param[in] commandQueues       OpenCL command queues.
7029  * @param[in] numEventsInWaitList Number of events in the event wait list.
7030  * @param[in] eventWaitList       Event wait list.
7031  * @param[in] events     Event objects per each command queue that identify
7032  *                       a particular kernel execution instance.
7033  *
7034  * @return
7035  *   - \b clblasSuccess on success;
7036  *   - \b clblasInvalidDevice if a target device does not support floating
7037  *     point arithmetic with double precision;
7038  *   - the same error codes as the clblasChbmv() function otherwise.
7039  *
7040  * @ingroup HBMV
7041  */
7042 clblasStatus
7043 clblasZhbmv(
7044     clblasOrder order,
7045     clblasUplo uplo,
7046     size_t N,
7047     size_t K,
7048     cl_double2 alpha,
7049     const cl_mem A,
7050     size_t offa,
7051     size_t lda,
7052     const cl_mem X,
7053     size_t offx,
7054     int incx,
7055     cl_double2 beta,
7056     cl_mem Y,
7057     size_t offy,
7058     int incy,
7059     cl_uint numCommandQueues,
7060     cl_command_queue *commandQueues,
7061     cl_uint numEventsInWaitList,
7062     const cl_event *eventWaitList,
7063     cl_event *events);
7064 
7065 /*@}*/
7066 
7067 
7068 /**
7069  * @defgroup TBSV TBSV  - Solving triangular banded matrix
7070  * @ingroup BLAS2
7071  */
7072 /*@{*/
7073 
7074 /**
7075  * @brief solving triangular banded matrix problems with float elements.
7076  *
7077  * Matrix-vector products:
7078  *   - \f$ A X \leftarrow  X \f$
7079  *   - \f$ A^T X \leftarrow  X \f$
7080  *
7081  * @param[in] order				Row/column order.
7082  * @param[in] uplo				The triangle in matrix being referenced.
7083  * @param[in] trans				How matrix \b A is to be transposed.
7084  * @param[in] diag				Specify whether matrix \b A is unit triangular.
7085  * @param[in] N					Number of rows/columns in banded matrix \b A.
7086  * @param[in] K					Number of sub-diagonals/super-diagonals in triangular banded matrix \b A.
7087  * @param[in] A					Buffer object storing matrix \b A.
7088  * @param[in] offa				Offset in number of elements for first element in matrix \b A.
7089  * @param[in] lda				Leading dimension of matrix \b A. It cannot be less
7090  *								than ( \b K + 1 )
7091  * @param[out] X				Buffer object storing vector \b X.
7092  * @param[in] offx				Offset in number of elements for first element in vector \b X.
7093  * @param[in] incx				Increment for the elements of \b X. Must not be zero.
7094  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
7095  *                                task is to be performed.
7096  * @param[in] commandQueues       OpenCL command queues.
7097  * @param[in] numEventsInWaitList Number of events in the event wait list.
7098  * @param[in] eventWaitList       Event wait list.
7099  * @param[in] events     Event objects per each command queue that identify
7100  *                       a particular kernel execution instance.
7101  *
7102  * @return
7103  *   - \b clblasSuccess on success;
7104  *   - \b clblasNotInitialized if clblasSetup() was not called;
7105  *   - \b clblasInvalidValue if invalid parameters are passed:
7106  *     - either \b N or \b incx is zero, or
7107  *     - K is greater than \b N - 1
7108  *     - the leading dimension is invalid;
7109  *   - \b clblasInvalidMemObject if either \b A or \b X object is
7110  *     Invalid, or an image object rather than the buffer one;
7111  *   - \b clblasOutOfHostMemory if the library can't allocate memory for
7112  *     internal structures;
7113  *   - \b clblasInvalidCommandQueue if the passed command queue is invalid;
7114  *   - \b clblasInvalidContext if a context a passed command queue belongs
7115  *     to was released;
7116  *   - \b clblasInvalidOperation if kernel compilation relating to a previous
7117  *     call has not completed for any of the target devices;
7118  *   - \b clblasCompilerNotAvailable if a compiler is not available;
7119  *   - \b clblasBuildProgramFailure if there is a failure to build a program
7120  *     executable.
7121  *
7122  * @ingroup TBSV
7123  */
7124  clblasStatus
7125 clblasStbsv(
7126     clblasOrder order,
7127     clblasUplo uplo,
7128     clblasTranspose trans,
7129     clblasDiag diag,
7130     size_t N,
7131     size_t K,
7132     const cl_mem A,
7133     size_t offa,
7134     size_t lda,
7135     cl_mem X,
7136     size_t offx,
7137     int incx,
7138     cl_uint numCommandQueues,
7139     cl_command_queue *commandQueues,
7140     cl_uint numEventsInWaitList,
7141     const cl_event *eventWaitList,
7142     cl_event *events);
7143 /**
7144  * @example example_stbsv.c
7145  * This is an example of how to use the @ref clblasStbsv function.
7146  */
7147 
7148 
7149 /**
7150  * @brief solving triangular banded matrix problems with double elements.
7151  *
7152  * Matrix-vector products:
7153  *   - \f$ A X \leftarrow  X \f$
7154  *   - \f$ A^T X \leftarrow  X \f$
7155  *
7156  * @param[in] order				Row/column order.
7157  * @param[in] uplo				The triangle in matrix being referenced.
7158  * @param[in] trans				How matrix \b A is to be transposed.
7159  * @param[in] diag				Specify whether matrix \b A is unit triangular.
7160  * @param[in] N					Number of rows/columns in banded matrix \b A.
7161  * @param[in] K					Number of sub-diagonals/super-diagonals in triangular banded matrix \b A.
7162  * @param[in] A					Buffer object storing matrix \b A.
7163  * @param[in] offa				Offset in number of elements for first element in matrix \b A.
7164  * @param[in] lda				Leading dimension of matrix \b A. It cannot be less
7165  *								than ( \b K + 1 )
7166  * @param[out] X				Buffer object storing vector \b X.
7167  * @param[in] offx				Offset in number of elements for first element in vector \b X.
7168  * @param[in] incx				Increment for the elements of \b X. Must not be zero.
7169  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
7170  *                                task is to be performed.
7171  * @param[in] commandQueues       OpenCL command queues.
7172  * @param[in] numEventsInWaitList Number of events in the event wait list.
7173  * @param[in] eventWaitList       Event wait list.
7174  * @param[in] events     Event objects per each command queue that identify
7175  *                       a particular kernel execution instance.
7176  *
7177  * @return
7178  *   - \b clblasSuccess on success;
7179  *   - \b clblasInvalidDevice if a target device does not support floating
7180  *     point arithmetic with double precision;
7181  *   - the same error codes as the clblasStbsv() function otherwise.
7182  *
7183  * @ingroup TBSV
7184  */
7185 clblasStatus
7186 clblasDtbsv(
7187     clblasOrder order,
7188     clblasUplo uplo,
7189     clblasTranspose trans,
7190     clblasDiag diag,
7191     size_t N,
7192     size_t K,
7193     const cl_mem A,
7194     size_t offa,
7195     size_t lda,
7196     cl_mem X,
7197     size_t offx,
7198     int incx,
7199     cl_uint numCommandQueues,
7200     cl_command_queue *commandQueues,
7201     cl_uint numEventsInWaitList,
7202     const cl_event *eventWaitList,
7203     cl_event *events);
7204 
7205 /**
7206  * @brief solving triangular banded matrix problems with float-complex elements.
7207  *
7208  * Matrix-vector products:
7209  *   - \f$ A X \leftarrow  X \f$
7210  *   - \f$ A^T X \leftarrow  X \f$
7211  *
7212  * @param[in] order				Row/column order.
7213  * @param[in] uplo				The triangle in matrix being referenced.
7214  * @param[in] trans				How matrix \b A is to be transposed.
7215  * @param[in] diag				Specify whether matrix \b A is unit triangular.
7216  * @param[in] N					Number of rows/columns in banded matrix \b A.
7217  * @param[in] K					Number of sub-diagonals/super-diagonals in triangular banded matrix \b A.
7218  * @param[in] A					Buffer object storing matrix \b A.
7219  * @param[in] offa				Offset in number of elements for first element in matrix \b A.
7220  * @param[in] lda				Leading dimension of matrix \b A. It cannot be less
7221  *								than ( \b K + 1 )
7222  * @param[out] X				Buffer object storing vector \b X.
7223  * @param[in] offx				Offset in number of elements for first element in vector \b X.
7224  * @param[in] incx				Increment for the elements of \b X. Must not be zero.
7225  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
7226  *                                task is to be performed.
7227  * @param[in] commandQueues       OpenCL command queues.
7228  * @param[in] numEventsInWaitList Number of events in the event wait list.
7229  * @param[in] eventWaitList       Event wait list.
7230  * @param[in] events     Event objects per each command queue that identify
7231  *                       a particular kernel execution instance.
7232  *
7233  * @return The same result as the clblasStbsv() function.
7234  *
7235  * @ingroup TBSV
7236  */
7237 clblasStatus
7238 clblasCtbsv(
7239     clblasOrder order,
7240     clblasUplo uplo,
7241     clblasTranspose trans,
7242     clblasDiag diag,
7243     size_t N,
7244     size_t K,
7245     const cl_mem A,
7246     size_t offa,
7247     size_t lda,
7248     cl_mem X,
7249     size_t offx,
7250     int incx,
7251     cl_uint numCommandQueues,
7252     cl_command_queue *commandQueues,
7253     cl_uint numEventsInWaitList,
7254     const cl_event *eventWaitList,
7255     cl_event *events);
7256 
7257 /**
7258  * @brief solving triangular banded matrix problems with double-complex elements.
7259  *
7260  * Matrix-vector products:
7261  *   - \f$ A X \leftarrow  X \f$
7262  *   - \f$ A^T X \leftarrow  X \f$
7263  *
7264  * @param[in] order				Row/column order.
7265  * @param[in] uplo				The triangle in matrix being referenced.
7266  * @param[in] trans				How matrix \b A is to be transposed.
7267  * @param[in] diag				Specify whether matrix \b A is unit triangular.
7268  * @param[in] N					Number of rows/columns in banded matrix \b A.
7269  * @param[in] K					Number of sub-diagonals/super-diagonals in triangular banded matrix \b A.
7270  * @param[in] A					Buffer object storing matrix \b A.
7271  * @param[in] offa				Offset in number of elements for first element in matrix \b A.
7272  * @param[in] lda				Leading dimension of matrix \b A. It cannot be less
7273  *								than ( \b K + 1 )
7274  * @param[out] X				Buffer object storing vector \b X.
7275  * @param[in] offx				Offset in number of elements for first element in vector \b X.
7276  * @param[in] incx				Increment for the elements of \b X. Must not be zero.
7277  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
7278  *                                task is to be performed.
7279  * @param[in] commandQueues       OpenCL command queues.
7280  * @param[in] numEventsInWaitList Number of events in the event wait list.
7281  * @param[in] eventWaitList       Event wait list.
7282  * @param[in] events     Event objects per each command queue that identify
7283  *                       a particular kernel execution instance.
7284  *
7285  * @return The same result as the clblasDtbsv() function.
7286  *
7287  * @ingroup TBSV
7288  */
7289 clblasStatus
7290 clblasZtbsv(
7291     clblasOrder order,
7292     clblasUplo uplo,
7293     clblasTranspose trans,
7294     clblasDiag diag,
7295     size_t N,
7296     size_t K,
7297     const cl_mem A,
7298     size_t offa,
7299     size_t lda,
7300     cl_mem X,
7301     size_t offx,
7302     int incx,
7303     cl_uint numCommandQueues,
7304     cl_command_queue *commandQueues,
7305     cl_uint numEventsInWaitList,
7306     const cl_event *eventWaitList,
7307     cl_event *events);
7308 
7309 /*@}*/
7310 
7311 
7312 /**
7313  * @defgroup BLAS3 BLAS-3 functions
7314  *
7315  * The Level 3 Basic Linear Algebra Subprograms are funcions that perform
7316  * matrix-matrix operations.
7317  */
7318 /*@{*/
7319 /*@}*/
7320 
7321 /**
7322  * @defgroup GEMM GEMM - General matrix-matrix multiplication
7323  * @ingroup BLAS3
7324  */
7325 /*@{*/
7326 
7327 /**
7328  * @brief Matrix-matrix product of general rectangular matrices with float
7329  *        elements. Extended version.
7330  *
7331  * Matrix-matrix products:
7332  *   - \f$ C \leftarrow \alpha A B + \beta C \f$
7333  *   - \f$ C \leftarrow \alpha A^T B + \beta C \f$
7334  *   - \f$ C \leftarrow \alpha A B^T + \beta C \f$
7335  *   - \f$ C \leftarrow \alpha A^T B^T + \beta C \f$
7336  *
7337  * @param[in] order     Row/column order.
7338  * @param[in] transA    How matrix \b A is to be transposed.
7339  * @param[in] transB    How matrix \b B is to be transposed.
7340  * @param[in] M         Number of rows in matrix \b A.
7341  * @param[in] N         Number of columns in matrix \b B.
7342  * @param[in] K         Number of columns in matrix \b A and rows in matrix \b B.
7343  * @param[in] alpha     The factor of matrix \b A.
7344  * @param[in] A         Buffer object storing matrix \b A.
7345  * @param[in] offA      Offset of the first element of the matrix \b A in the
7346  *                      buffer object. Counted in elements.
7347  * @param[in] lda       Leading dimension of matrix \b A. It cannot be less
7348  *                      than \b K when the \b order parameter is set to
7349  *                      \b clblasRowMajor,\n or less than \b M when the
7350  *                      parameter is set to \b clblasColumnMajor.
7351  * @param[in] B         Buffer object storing matrix \b B.
7352  * @param[in] offB      Offset of the first element of the matrix \b B in the
7353  *                      buffer object. Counted in elements.
7354  * @param[in] ldb       Leading dimension of matrix \b B. It cannot be less
7355  *                      than \b N when the \b order parameter is set to
7356  *                      \b clblasRowMajor,\n or less than \b K
7357  *                      when it is set to \b clblasColumnMajor.
7358  * @param[in] beta      The factor of matrix \b C.
7359  * @param[out] C        Buffer object storing matrix \b C.
7360  * @param[in]  offC     Offset of the first element of the matrix \b C in the
7361  *                      buffer object. Counted in elements.
7362  * @param[in] ldc       Leading dimension of matrix \b C. It cannot be less
7363  *                      than \b N when the \b order parameter is set to
7364  *                      \b clblasRowMajor,\n or less than \b M when
7365  *                      it is set to \b clblasColumnMajorOrder.
7366  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
7367  *                                task is to be performed.
7368  * @param[in] commandQueues       OpenCL command queues.
7369  * @param[in] numEventsInWaitList Number of events in the event wait list.
7370  * @param[in] eventWaitList       Event wait list.
7371  * @param[in] events     Event objects per each command queue that identify
7372  *                       a particular kernel execution instance.
7373  *
7374  * @return
7375  *   - \b clblasSuccess on success;
7376  *   - \b clblasInvalidValue if either \b offA, \b offB or \b offC exceeds
7377  *        the size of the respective buffer object;
7378  *   - the same error codes as clblasSgemm() otherwise.
7379  *
7380  * @ingroup GEMM
7381  */
7382 clblasStatus
7383 clblasSgemm(
7384     clblasOrder order,
7385     clblasTranspose transA,
7386     clblasTranspose transB,
7387     size_t M,
7388     size_t N,
7389     size_t K,
7390     cl_float alpha,
7391     const cl_mem A,
7392     size_t offA,
7393     size_t lda,
7394     const cl_mem B,
7395     size_t offB,
7396     size_t ldb,
7397     cl_float beta,
7398     cl_mem C,
7399     size_t offC,
7400     size_t ldc,
7401     cl_uint numCommandQueues,
7402     cl_command_queue *commandQueues,
7403     cl_uint numEventsInWaitList,
7404     const cl_event *eventWaitList,
7405     cl_event *events);
7406 
7407 /**
7408  * @example example_sgemm.c
7409  * This is an example of how to use the @ref clblasSgemmEx function.
7410  */
7411 
7412 /**
7413  * @brief Matrix-matrix product of general rectangular matrices with double
7414  *        elements. Extended version.
7415  *
7416  * Matrix-matrix products:
7417  *   - \f$ C \leftarrow \alpha A B + \beta C \f$
7418  *   - \f$ C \leftarrow \alpha A^T B + \beta C \f$
7419  *   - \f$ C \leftarrow \alpha A B^T + \beta C \f$
7420  *   - \f$ C \leftarrow \alpha A^T B^T + \beta C \f$
7421  *
7422  * @param[in] order     Row/column order.
7423  * @param[in] transA    How matrix \b A is to be transposed.
7424  * @param[in] transB    How matrix \b B is to be transposed.
7425  * @param[in] M         Number of rows in matrix \b A.
7426  * @param[in] N         Number of columns in matrix \b B.
7427  * @param[in] K         Number of columns in matrix \b A and rows in matrix \b B.
7428  * @param[in] alpha     The factor of matrix \b A.
7429  * @param[in] A         Buffer object storing matrix \b A.
7430  * @param[in] offA      Offset of the first element of the matrix \b A in the
7431  *                      buffer object. Counted in elements.
7432  * @param[in] lda       Leading dimension of matrix \b A. For detailed description,
7433  *                      see clblasSgemm().
7434  * @param[in] B         Buffer object storing matrix \b B.
7435  * @param[in] offB      Offset of the first element of the matrix \b B in the
7436  *                      buffer object. Counted in elements.
7437  * @param[in] ldb       Leading dimension of matrix \b B. For detailed description,
7438  *                      see clblasSgemm().
7439  * @param[in] beta      The factor of matrix \b C.
7440  * @param[out] C        Buffer object storing matrix \b C.
7441  * @param[in] offC      Offset of the first element of the matrix \b C in the
7442  *                      buffer object. Counted in elements.
7443  * @param[in] ldc       Leading dimension of matrix \b C. For detailed description,
7444  *                      see clblasSgemm().
7445  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
7446  *                                task is to be performed.
7447  * @param[in] commandQueues       OpenCL command queues.
7448  * @param[in] numEventsInWaitList Number of events in the event wait list.
7449  * @param[in] eventWaitList       Event wait list.
7450  * @param[in] events     Event objects per each command queue that identify
7451  *                       a particular kernel execution instance.
7452  *
7453  * @return
7454  *   - \b clblasSuccess on success;
7455  *   - \b clblasInvalidDevice if a target device does not support floating
7456  *        point arithmetic with double precision;
7457  *   - \b clblasInvalidValue if either \b offA, \b offB or offC exceeds
7458  *        the size of the respective buffer object;
7459  *   - the same error codes as the clblasSgemm() function otherwise.
7460  *
7461  * @ingroup GEMM
7462  */
7463 clblasStatus
7464 clblasDgemm(
7465     clblasOrder order,
7466     clblasTranspose transA,
7467     clblasTranspose transB,
7468     size_t M,
7469     size_t N,
7470     size_t K,
7471     cl_double alpha,
7472     const cl_mem A,
7473     size_t offA,
7474     size_t lda,
7475     const cl_mem B,
7476     size_t offB,
7477     size_t ldb,
7478     cl_double beta,
7479     cl_mem C,
7480     size_t offC,
7481     size_t ldc,
7482     cl_uint numCommandQueues,
7483     cl_command_queue *commandQueues,
7484     cl_uint numEventsInWaitList,
7485     const cl_event *eventWaitList,
7486     cl_event *events);
7487 
7488 /**
7489  * @brief Matrix-matrix product of general rectangular matrices with float
7490  *        complex elements. Extended version.
7491  *
7492  * Matrix-matrix products:
7493  *   - \f$ C \leftarrow \alpha A B + \beta C \f$
7494  *   - \f$ C \leftarrow \alpha A^T B + \beta C \f$
7495  *   - \f$ C \leftarrow \alpha A B^T + \beta C \f$
7496  *   - \f$ C \leftarrow \alpha A^T B^T + \beta C \f$
7497  *
7498  * @param[in] order     Row/column order.
7499  * @param[in] transA    How matrix \b A is to be transposed.
7500  * @param[in] transB    How matrix \b B is to be transposed.
7501  * @param[in] M         Number of rows in matrix \b A.
7502  * @param[in] N         Number of columns in matrix \b B.
7503  * @param[in] K         Number of columns in matrix \b A and rows in matrix \b B.
7504  * @param[in] alpha     The factor of matrix \b A.
7505  * @param[in] A         Buffer object storing matrix \b A.
7506  * @param[in] offA      Offset of the first element of the matrix \b A in the
7507  *                      buffer object. Counted in elements.
7508  * @param[in] lda       Leading dimension of matrix \b A. For detailed description,
7509  *                      see clblasSgemm().
7510  * @param[in] B         Buffer object storing matrix \b B.
7511  * @param[in] offB      Offset of the first element of the matrix \b B in the
7512  *                      buffer object. Counted in elements.
7513  * @param[in] ldb       Leading dimension of matrix \b B. For detailed description,
7514  *                      see clblasSgemm().
7515  * @param[in] beta      The factor of matrix \b C.
7516  * @param[out] C        Buffer object storing matrix \b C.
7517  * @param[in] offC      Offset of the first element of the matrix \b C in the
7518  *                      buffer object. Counted in elements.
7519  * @param[in] ldc       Leading dimension of matrix \b C. For detailed description,
7520  *                      see clblasSgemm().
7521  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
7522  *                                task is to be performed.
7523  * @param[in] commandQueues       OpenCL command queues.
7524  * @param[in] numEventsInWaitList Number of events in the event wait list.
7525  * @param[in] eventWaitList       Event wait list.
7526  * @param[in] events     Event objects per each command queue that identify
7527  *                       a particular kernel execution instance.
7528  *
7529  * @return
7530  *   - \b clblasSuccess on success;
7531  *   - \b clblasInvalidValue if either \b offA, \b offB or offC exceeds
7532  *        the size of the respective buffer object;
7533  *   - the same error codes as the clblasSgemm() function otherwise.
7534  *
7535  * @ingroup GEMM
7536  */
7537 clblasStatus
7538 clblasCgemm(
7539     clblasOrder order,
7540     clblasTranspose transA,
7541     clblasTranspose transB,
7542     size_t M,
7543     size_t N,
7544     size_t K,
7545     FloatComplex alpha,
7546     const cl_mem A,
7547     size_t offA,
7548     size_t lda,
7549     const cl_mem B,
7550     size_t offB,
7551     size_t ldb,
7552     FloatComplex beta,
7553     cl_mem C,
7554     size_t offC,
7555     size_t ldc,
7556     cl_uint numCommandQueues,
7557     cl_command_queue *commandQueues,
7558     cl_uint numEventsInWaitList,
7559     const cl_event *eventWaitList,
7560     cl_event *events);
7561 
7562 /**
7563  * @brief Matrix-matrix product of general rectangular matrices with double
7564  *        complex elements. Exteneded version.
7565  *
7566  * Matrix-matrix products:
7567  *   - \f$ C \leftarrow \alpha A B + \beta C \f$
7568  *   - \f$ C \leftarrow \alpha A^T B + \beta C \f$
7569  *   - \f$ C \leftarrow \alpha A B^T + \beta C \f$
7570  *   - \f$ C \leftarrow \alpha A^T B^T + \beta C \f$
7571  *
7572  * @param[in] order     Row/column order.
7573  * @param[in] transA    How matrix \b A is to be transposed.
7574  * @param[in] transB    How matrix \b B is to be transposed.
7575  * @param[in] M         Number of rows in matrix \b A.
7576  * @param[in] N         Number of columns in matrix \b B.
7577  * @param[in] K         Number of columns in matrix \b A and rows in matrix \b B.
7578  * @param[in] alpha     The factor of matrix \b A.
7579  * @param[in] A         Buffer object storing matrix \b A.
7580  * @param[in] offA      Offset of the first element of the matrix \b A in the
7581  *                      buffer object. Counted in elements.
7582  * @param[in] lda       Leading dimension of matrix \b A. For detailed description,
7583  *                      see clblasSgemm().
7584  * @param[in] B         Buffer object storing matrix \b B.
7585  * @param[in] offB      Offset of the first element of the matrix \b B in the
7586  *                      buffer object. Counted in elements.
7587  * @param[in] ldb       Leading dimension of matrix \b B. For detailed description,
7588  *                      see clblasSgemm().
7589  * @param[in] beta      The factor of matrix \b C.
7590  * @param[out] C        Buffer object storing matrix \b C.
7591  * @param[in] offC      Offset of the first element of the matrix \b C in the
7592  *                      buffer object. Counted in elements.
7593  * @param[in] ldc       Leading dimension of matrix \b C. For detailed description,
7594  *                      see clblasSgemm().
7595  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
7596  *                                task is to be performed.
7597  * @param[in] commandQueues       OpenCL command queues.
7598  * @param[in] numEventsInWaitList Number of events in the event wait list.
7599  * @param[in] eventWaitList       Event wait list.
7600  * @param[in] events     Event objects per each command queue that identify
7601  *                       a particular kernel execution instance.
7602  *
7603  * @return
7604  *   - \b clblasSuccess on success;
7605  *   - \b clblasInvalidDevice if a target device does not support floating
7606  *        point arithmetic with double precision;
7607  *   - \b clblasInvalidValue if either \b offA, \b offB or offC exceeds
7608  *        the size of the respective buffer object;
7609  *   - the same error codes as the clblasSgemm() function otherwise.
7610  *
7611  * @ingroup GEMM
7612  */
7613 clblasStatus
7614 clblasZgemm(
7615     clblasOrder order,
7616     clblasTranspose transA,
7617     clblasTranspose transB,
7618     size_t M,
7619     size_t N,
7620     size_t K,
7621     DoubleComplex alpha,
7622     const cl_mem A,
7623     size_t offA,
7624     size_t lda,
7625     const cl_mem B,
7626     size_t offB,
7627     size_t ldb,
7628     DoubleComplex beta,
7629     cl_mem C,
7630     size_t offC,
7631     size_t ldc,
7632     cl_uint numCommandQueues,
7633     cl_command_queue *commandQueues,
7634     cl_uint numEventsInWaitList,
7635     const cl_event *eventWaitList,
7636     cl_event *events);
7637 
7638 /*@}*/
7639 
7640 /**
7641  * @defgroup TRMM TRMM - Triangular matrix-matrix multiplication
7642  * @ingroup BLAS3
7643  */
7644 /*@{*/
7645 
7646 /**
7647  * @brief Multiplying a matrix by a triangular matrix with float elements.
7648  *        Extended version.
7649  *
7650  * Matrix-triangular matrix products:
7651  *   - \f$ B \leftarrow \alpha A B \f$
7652  *   - \f$ B \leftarrow \alpha A^T B \f$
7653  *   - \f$ B \leftarrow \alpha B A \f$
7654  *   - \f$ B \leftarrow \alpha B A^T \f$
7655  *
7656  * where \b T is an upper or lower triangular matrix.
7657  *
7658  * @param[in] order     Row/column order.
7659  * @param[in] side      The side of triangular matrix.
7660  * @param[in] uplo      The triangle in matrix being referenced.
7661  * @param[in] transA    How matrix \b A is to be transposed.
7662  * @param[in] diag      Specify whether matrix is unit triangular.
7663  * @param[in] M         Number of rows in matrix \b B.
7664  * @param[in] N         Number of columns in matrix \b B.
7665  * @param[in] alpha     The factor of matrix \b A.
7666  * @param[in] A         Buffer object storing matrix \b A.
7667  * @param[in] offA      Offset of the first element of the matrix \b A in the
7668  *                      buffer object. Counted in elements.
7669  * @param[in] lda       Leading dimension of matrix \b A. It cannot be less
7670  *                      than \b M when the \b side parameter is set to
7671  *                      \b clblasLeft,\n or less than \b N when it is set
7672  *                      to \b clblasRight.
7673  * @param[out] B        Buffer object storing matrix \b B.
7674  * @param[in] offB      Offset of the first element of the matrix \b B in the
7675  *                      buffer object. Counted in elements.
7676  * @param[in] ldb       Leading dimension of matrix \b B. It cannot be less
7677  *                      than \b N when the \b order parameter is set to
7678  *                      \b clblasRowMajor,\n or not less than \b M
7679  *                      when it is set to \b clblasColumnMajor.
7680  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
7681  *                                task is to be performed.
7682  * @param[in] commandQueues       OpenCL command queues.
7683  * @param[in] numEventsInWaitList Number of events in the event wait list.
7684  * @param[in] eventWaitList       Event wait list.
7685  * @param[in] events     Event objects per each command queue that identify
7686  *                       a particular kernel execution instance.
7687  *
7688  * @return
7689  *   - \b clblasSuccess on success;
7690  *   - \b clblasInvalidValue if either \b offA or \b offB exceeds the size
7691  *        of the respective buffer object;
7692  *   - the same error codes as clblasStrmm() otherwise.
7693  *
7694  * @ingroup TRMM
7695  */
7696 clblasStatus
7697 clblasStrmm(
7698     clblasOrder order,
7699     clblasSide side,
7700     clblasUplo uplo,
7701     clblasTranspose transA,
7702     clblasDiag diag,
7703     size_t M,
7704     size_t N,
7705     cl_float alpha,
7706     const cl_mem A,
7707     size_t offA,
7708     size_t lda,
7709     cl_mem B,
7710     size_t offB,
7711     size_t ldb,
7712     cl_uint numCommandQueues,
7713     cl_command_queue *commandQueues,
7714     cl_uint numEventsInWaitList,
7715     const cl_event *eventWaitList,
7716     cl_event *events);
7717 
7718 /**
7719  * @example example_strmm.c
7720  * This is an example of how to use the @ref clblasStrmmEx function.
7721  */
7722 
7723 /**
7724  * @brief Multiplying a matrix by a triangular matrix with double elements.
7725  *        Extended version.
7726  *
7727  * Matrix-triangular matrix products:
7728  *   - \f$ B \leftarrow \alpha A B \f$
7729  *   - \f$ B \leftarrow \alpha A^T B \f$
7730  *   - \f$ B \leftarrow \alpha B A \f$
7731  *   - \f$ B \leftarrow \alpha B A^T \f$
7732  *
7733  * where \b T is an upper or lower triangular matrix.
7734  *
7735  * @param[in] order     Row/column order.
7736  * @param[in] side      The side of triangular matrix.
7737  * @param[in] uplo      The triangle in matrix being referenced.
7738  * @param[in] transA    How matrix \b A is to be transposed.
7739  * @param[in] diag      Specify whether matrix is unit triangular.
7740  * @param[in] M         Number of rows in matrix \b B.
7741  * @param[in] N         Number of columns in matrix \b B.
7742  * @param[in] alpha     The factor of matrix \b A.
7743  * @param[in] A         Buffer object storing matrix \b A.
7744  * @param[in] offA      Offset of the first element of the matrix \b A in the
7745  *                      buffer object. Counted in elements.
7746  * @param[in] lda       Leading dimension of matrix \b A. For detailed
7747  *                      description, see clblasStrmm().
7748  * @param[out] B        Buffer object storing matrix \b B.
7749  * @param[in] offB      Offset of the first element of the matrix \b B in the
7750  *                      buffer object. Counted in elements.
7751  * @param[in] ldb       Leading dimension of matrix \b B. For detailed
7752  *                      description, see clblasStrmm().
7753  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
7754  *                                task is to be performed.
7755  * @param[in] commandQueues       OpenCL command queues.
7756  * @param[in] numEventsInWaitList Number of events in the event wait list.
7757  * @param[in] eventWaitList       Event wait list.
7758  * @param[in] events     Event objects per each command queue that identify
7759  *                       a particular kernel execution instance.
7760  *
7761  * @return
7762  *   - \b clblasSuccess on success;
7763  *   - \b clblasInvalidDevice if a target device does not support floating
7764  *     point arithmetic with double precision;
7765  *   - \b clblasInvalidValue if either \b offA or \b offB exceeds the size
7766  *        of the respective buffer object;
7767  *   - the same error codes as the clblasStrmm() function otherwise.
7768  *
7769  * @ingroup TRMM
7770  */
7771 clblasStatus
7772 clblasDtrmm(
7773     clblasOrder order,
7774     clblasSide side,
7775     clblasUplo uplo,
7776     clblasTranspose transA,
7777     clblasDiag diag,
7778     size_t M,
7779     size_t N,
7780     cl_double alpha,
7781     const cl_mem A,
7782     size_t offA,
7783     size_t lda,
7784     cl_mem B,
7785     size_t offB,
7786     size_t ldb,
7787     cl_uint numCommandQueues,
7788     cl_command_queue *commandQueues,
7789     cl_uint numEventsInWaitList,
7790     const cl_event *eventWaitList,
7791     cl_event *events);
7792 
7793 /**
7794  * @brief Multiplying a matrix by a triangular matrix with float complex
7795  *        elements. Extended version.
7796  *
7797  * Matrix-triangular matrix products:
7798  *   - \f$ B \leftarrow \alpha A B \f$
7799  *   - \f$ B \leftarrow \alpha A^T B \f$
7800  *   - \f$ B \leftarrow \alpha B A \f$
7801  *   - \f$ B \leftarrow \alpha B A^T \f$
7802  *
7803  * where \b T is an upper or lower triangular matrix.
7804  * @param[in] order     Row/column order.
7805  * @param[in] side      The side of triangular matrix.
7806  * @param[in] uplo      The triangle in matrix being referenced.
7807  * @param[in] transA    How matrix \b A is to be transposed.
7808  * @param[in] diag      Specify whether matrix is unit triangular.
7809  * @param[in] M         Number of rows in matrix \b B.
7810  * @param[in] N         Number of columns in matrix \b B.
7811  * @param[in] alpha     The factor of matrix \b A.
7812  * @param[in] offA      Offset of the first element of the matrix \b A in the
7813  *                      buffer object. Counted in elements.
7814  * @param[in] A         Buffer object storing matrix \b A.
7815  * @param[in] lda       Leading dimension of matrix \b A. For detailed
7816  *                      description, see clblasStrmm().
7817  * @param[out] B        Buffer object storing matrix \b B.
7818  * @param[in] offB      Offset of the first element of the matrix \b B in the
7819  *                      buffer object. Counted in elements.
7820  * @param[in] ldb       Leading dimension of matrix \b B. For detailed
7821  *                      description, see clblasStrmm().
7822  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
7823  *                                task is to be performed.
7824  * @param[in] commandQueues       OpenCL command queues.
7825  * @param[in] numEventsInWaitList Number of events in the event wait list.
7826  * @param[in] eventWaitList       Event wait list.
7827  * @param[in] events     Event objects per each command queue that identify
7828  *                       a particular kernel execution instance.
7829  *
7830  * @return
7831  *   - \b clblasSuccess on success;
7832  *   - \b clblasInvalidValue if either \b offA or \b offB exceeds the size
7833  *        of the respective buffer object;
7834  *   - the same error codes as clblasStrmm() otherwise.
7835  *
7836  * @ingroup TRMM
7837  */
7838 clblasStatus
7839 clblasCtrmm(
7840     clblasOrder order,
7841     clblasSide side,
7842     clblasUplo uplo,
7843     clblasTranspose transA,
7844     clblasDiag diag,
7845     size_t M,
7846     size_t N,
7847     FloatComplex alpha,
7848     const cl_mem A,
7849     size_t offA,
7850     size_t lda,
7851     cl_mem B,
7852     size_t offB,
7853     size_t ldb,
7854     cl_uint numCommandQueues,
7855     cl_command_queue *commandQueues,
7856     cl_uint numEventsInWaitList,
7857     const cl_event *eventWaitList,
7858     cl_event *events);
7859 
7860 /**
7861  * @brief Multiplying a matrix by a triangular matrix with double complex
7862  *        elements. Extended version.
7863  *
7864  * Matrix-triangular matrix products:
7865  *   - \f$ B \leftarrow \alpha A B \f$
7866  *   - \f$ B \leftarrow \alpha A^T B \f$
7867  *   - \f$ B \leftarrow \alpha B A \f$
7868  *   - \f$ B \leftarrow \alpha B A^T \f$
7869  *
7870  * where \b T is an upper or lower triangular matrix.
7871  *
7872  * @param[in] order     Row/column order.
7873  * @param[in] side      The side of triangular matrix.
7874  * @param[in] uplo      The triangle in matrix being referenced.
7875  * @param[in] transA    How matrix \b A is to be transposed.
7876  * @param[in] diag      Specify whether matrix is unit triangular.
7877  * @param[in] M         Number of rows in matrix \b B.
7878  * @param[in] N         Number of columns in matrix \b B.
7879  * @param[in] alpha     The factor of matrix \b A.
7880  * @param[in] A         Buffer object storing matrix \b A.
7881  * @param[in] offA      Offset of the first element of the matrix \b A in the
7882  *                      buffer object. Counted in elements.
7883  * @param[in] lda       Leading dimension of matrix \b A. For detailed
7884  *                      description, see clblasStrmm().
7885  * @param[out] B        Buffer object storing matrix \b B.
7886  * @param[in] offB      Offset of the first element of the matrix \b B in the
7887  *                      buffer object. Counted in elements.
7888  * @param[in] ldb       Leading dimension of matrix \b B. For detailed
7889  *                      description, see clblasStrmm().
7890  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
7891  *                                task is to be performed.
7892  * @param[in] commandQueues       OpenCL command queues.
7893  * @param[in] numEventsInWaitList Number of events in the event wait list.
7894  * @param[in] eventWaitList       Event wait list.
7895  * @param[in] events     Event objects per each command queue that identify
7896  *                       a particular kernel execution instance.
7897  *
7898  * @return
7899  *   - \b clblasSuccess on success;
7900  *   - \b clblasInvalidDevice if a target device does not support floating
7901  *     point arithmetic with double precision;
7902  *   - \b clblasInvalidValue if either \b offA or \b offB exceeds the size
7903  *        of the respective buffer object;
7904  *   - the same error codes as the clblasStrmm() function otherwise.
7905  *
7906  * @ingroup TRMM
7907  */
7908 clblasStatus
7909 clblasZtrmm(
7910     clblasOrder order,
7911     clblasSide side,
7912     clblasUplo uplo,
7913     clblasTranspose transA,
7914     clblasDiag diag,
7915     size_t M,
7916     size_t N,
7917     DoubleComplex alpha,
7918     const cl_mem A,
7919     size_t offA,
7920     size_t lda,
7921     cl_mem B,
7922     size_t offB,
7923     size_t ldb,
7924     cl_uint numCommandQueues,
7925     cl_command_queue *commandQueues,
7926     cl_uint numEventsInWaitList,
7927     const cl_event *eventWaitList,
7928     cl_event *events);
7929 
7930 /*@}*/
7931 
7932 /**
7933  * @defgroup TRSM TRSM - Solving triangular systems of equations
7934  * @ingroup BLAS3
7935  */
7936 /*@{*/
7937 
7938 /**
7939  * @brief Solving triangular systems of equations with multiple right-hand
7940  *        sides and float elements. Extended version.
7941  *
7942  * Solving triangular systems of equations:
7943  *   - \f$ B \leftarrow \alpha A^{-1} B \f$
7944  *   - \f$ B \leftarrow \alpha A^{-T} B \f$
7945  *   - \f$ B \leftarrow \alpha B A^{-1} \f$
7946  *   - \f$ B \leftarrow \alpha B A^{-T} \f$
7947  *
7948  * where \b T is an upper or lower triangular matrix.
7949  *
7950  * @param[in] order     Row/column order.
7951  * @param[in] side      The side of triangular matrix.
7952  * @param[in] uplo      The triangle in matrix being referenced.
7953  * @param[in] transA    How matrix \b A is to be transposed.
7954  * @param[in] diag      Specify whether matrix is unit triangular.
7955  * @param[in] M         Number of rows in matrix \b B.
7956  * @param[in] N         Number of columns in matrix \b B.
7957  * @param[in] alpha     The factor of matrix \b A.
7958  * @param[in] A         Buffer object storing matrix \b A.
7959  * @param[in] offA      Offset of the first element of the matrix \b A in the
7960  *                      buffer object. Counted in elements.
7961  * @param[in] lda       Leading dimension of matrix \b A. It cannot be less
7962  *                      than \b M when the \b side parameter is set to
7963  *                      \b clblasLeft,\n or less than \b N
7964  *                      when it is set to \b clblasRight.
7965  * @param[out] B        Buffer object storing matrix \b B.
7966  * @param[in] offB      Offset of the first element of the matrix \b B in the
7967  *                      buffer object. Counted in elements.
7968  * @param[in] ldb       Leading dimension of matrix \b B. It cannot be less
7969  *                      than \b N when the \b order parameter is set to
7970  *                      \b clblasRowMajor,\n or less than \b M
7971  *                      when it is set to \b clblasColumnMajor.
7972  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
7973  *                                task is to be performed.
7974  * @param[in] commandQueues       OpenCL command queues.
7975  * @param[in] numEventsInWaitList Number of events in the event wait list.
7976  * @param[in] eventWaitList       Event wait list.
7977  * @param[in] events     Event objects per each command queue that identify
7978  *                       a particular kernel execution instance.
7979  *
7980  * @return
7981  *   - \b clblasSuccess on success;
7982  *   - \b clblasInvalidValue if either \b offA or \b offB exceeds the size
7983  *        of the respective buffer object;
7984  *   - the same error codes as clblasStrsm() otherwise.
7985  *
7986  * @ingroup TRSM
7987  */
7988 clblasStatus
7989 clblasStrsm(
7990     clblasOrder order,
7991     clblasSide side,
7992     clblasUplo uplo,
7993     clblasTranspose transA,
7994     clblasDiag diag,
7995     size_t M,
7996     size_t N,
7997     cl_float alpha,
7998     const cl_mem A,
7999     size_t offA,
8000     size_t lda,
8001     cl_mem B,
8002     size_t offB,
8003     size_t ldb,
8004     cl_uint numCommandQueues,
8005     cl_command_queue *commandQueues,
8006     cl_uint numEventsInWaitList,
8007     const cl_event *eventWaitList,
8008     cl_event *events);
8009 
8010 /**
8011  * @example example_strsm.c
8012  * This is an example of how to use the @ref clblasStrsmEx function.
8013  */
8014 
8015 /**
8016  * @brief Solving triangular systems of equations with multiple right-hand
8017  *        sides and double elements. Extended version.
8018  *
8019  * Solving triangular systems of equations:
8020  *   - \f$ B \leftarrow \alpha A^{-1} B \f$
8021  *   - \f$ B \leftarrow \alpha A^{-T} B \f$
8022  *   - \f$ B \leftarrow \alpha B A^{-1} \f$
8023  *   - \f$ B \leftarrow \alpha B A^{-T} \f$
8024  *
8025  * where \b T is an upper or lower triangular matrix.
8026  *
8027  * @param[in] order     Row/column order.
8028  * @param[in] side      The side of triangular matrix.
8029  * @param[in] uplo      The triangle in matrix being referenced.
8030  * @param[in] transA    How matrix \b A is to be transposed.
8031  * @param[in] diag      Specify whether matrix is unit triangular.
8032  * @param[in] M         Number of rows in matrix \b B.
8033  * @param[in] N         Number of columns in matrix \b B.
8034  * @param[in] alpha     The factor of matrix \b A.
8035  * @param[in] A         Buffer object storing matrix \b A.
8036  * @param[in] offA      Offset of the first element of the matrix \b A in the
8037  *                      buffer object. Counted in elements.
8038  * @param[in] lda       Leading dimension of matrix \b A. For detailed
8039  *                      description, see clblasStrsm().
8040  * @param[out] B        Buffer object storing matrix \b B.
8041  * @param[in] offB      Offset of the first element of the matrix \b A in the
8042  *                      buffer object. Counted in elements.
8043  * @param[in] ldb       Leading dimension of matrix \b B. For detailed
8044  *                      description, see clblasStrsm().
8045  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
8046  *                                task is to be performed.
8047  * @param[in] commandQueues       OpenCL command queues.
8048  * @param[in] numEventsInWaitList Number of events in the event wait list.
8049  * @param[in] eventWaitList       Event wait list.
8050  * @param[in] events     Event objects per each command queue that identify
8051  *                       a particular kernel execution instance.
8052  *
8053  * @return
8054  *   - \b clblasSuccess on success;
8055  *   - \b clblasInvalidDevice if a target device does not support floating
8056  *        point arithmetic with double precision;
8057  *   - \b clblasInvalidValue if either \b offA or \b offB exceeds the size
8058  *        of the respective buffer object;
8059  *   - the same error codes as the clblasStrsm() function otherwise.
8060  *
8061  * @ingroup TRSM
8062  */
8063 clblasStatus
8064 clblasDtrsm(
8065     clblasOrder order,
8066     clblasSide side,
8067     clblasUplo uplo,
8068     clblasTranspose transA,
8069     clblasDiag diag,
8070     size_t M,
8071     size_t N,
8072     cl_double alpha,
8073     const cl_mem A,
8074     size_t offA,
8075     size_t lda,
8076     cl_mem B,
8077     size_t offB,
8078     size_t ldb,
8079     cl_uint numCommandQueues,
8080     cl_command_queue *commandQueues,
8081     cl_uint numEventsInWaitList,
8082     const cl_event *eventWaitList,
8083     cl_event *events);
8084 
8085 /**
8086  * @brief Solving triangular systems of equations with multiple right-hand
8087  *        sides and float complex elements. Extended version.
8088  *
8089  * Solving triangular systems of equations:
8090  *   - \f$ B \leftarrow \alpha A^{-1} B \f$
8091  *   - \f$ B \leftarrow \alpha A^{-T} B \f$
8092  *   - \f$ B \leftarrow \alpha B A^{-1} \f$
8093  *   - \f$ B \leftarrow \alpha B A^{-T} \f$
8094  *
8095  * where \b T is an upper or lower triangular matrix.
8096  *
8097  * @param[in] order     Row/column order.
8098  * @param[in] side      The side of triangular matrix.
8099  * @param[in] uplo      The triangle in matrix being referenced.
8100  * @param[in] transA    How matrix \b A is to be transposed.
8101  * @param[in] diag      Specify whether matrix is unit triangular.
8102  * @param[in] M         Number of rows in matrix \b B.
8103  * @param[in] N         Number of columns in matrix \b B.
8104  * @param[in] alpha     The factor of matrix \b A.
8105  * @param[in] A         Buffer object storing matrix \b A.
8106  * @param[in] offA      Offset of the first element of the matrix \b A in the
8107  *                      buffer object. Counted in elements.
8108  * @param[in] lda       Leading dimension of matrix \b A. For detailed
8109  *                      description, see clblasStrsm().
8110  * @param[out] B        Buffer object storing matrix \b B.
8111  * @param[in] offB      Offset of the first element of the matrix \b B in the
8112  *                      buffer object. Counted in elements.
8113  * @param[in] ldb       Leading dimension of matrix \b B. For detailed
8114  *                      description, see clblasStrsm().
8115  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
8116  *                                task is to be performed.
8117  * @param[in] commandQueues       OpenCL command queues.
8118  * @param[in] numEventsInWaitList Number of events in the event wait list.
8119  * @param[in] eventWaitList       Event wait list.
8120  * @param[in] events     Event objects per each command queue that identify
8121  *                       a particular kernel execution instance.
8122  *
8123  * @return
8124  *   - \b clblasSuccess on success;
8125  *   - \b clblasInvalidValue if either \b offA or \b offB exceeds the size
8126  *        of the respective buffer object;
8127  *   - the same error codes as clblasStrsm() otherwise.
8128  *
8129  * @ingroup TRSM
8130  */
8131 clblasStatus
8132 clblasCtrsm(
8133     clblasOrder order,
8134     clblasSide side,
8135     clblasUplo uplo,
8136     clblasTranspose transA,
8137     clblasDiag diag,
8138     size_t M,
8139     size_t N,
8140     FloatComplex alpha,
8141     const cl_mem A,
8142     size_t offA,
8143     size_t lda,
8144     cl_mem B,
8145     size_t offB,
8146     size_t ldb,
8147     cl_uint numCommandQueues,
8148     cl_command_queue *commandQueues,
8149     cl_uint numEventsInWaitList,
8150     const cl_event *eventWaitList,
8151     cl_event *events);
8152 
8153 /**
8154  * @brief Solving triangular systems of equations with multiple right-hand
8155  *        sides and double complex elements. Extended version.
8156  *
8157  * Solving triangular systems of equations:
8158  *   - \f$ B \leftarrow \alpha A^{-1} B \f$
8159  *   - \f$ B \leftarrow \alpha A^{-T} B \f$
8160  *   - \f$ B \leftarrow \alpha B A^{-1} \f$
8161  *   - \f$ B \leftarrow \alpha B A^{-T} \f$
8162  *
8163  * where \b T is an upper or lower triangular matrix.
8164  *
8165  * @param[in] order     Row/column order.
8166  * @param[in] side      The side of triangular matrix.
8167  * @param[in] uplo      The triangle in matrix being referenced.
8168  * @param[in] transA    How matrix \b A is to be transposed.
8169  * @param[in] diag      Specify whether matrix is unit triangular.
8170  * @param[in] M         Number of rows in matrix \b B.
8171  * @param[in] N         Number of columns in matrix \b B.
8172  * @param[in] alpha     The factor of matrix \b A.
8173  * @param[in] A         Buffer object storing matrix \b A.
8174  * @param[in] offA      Offset of the first element of the matrix \b A in the
8175  *                      buffer object. Counted in elements.
8176  * @param[in] lda       Leading dimension of matrix \b A. For detailed
8177  *                      description, see clblasStrsm().
8178  * @param[out] B        Buffer object storing matrix \b B.
8179  * @param[in] offB      Offset of the first element of the matrix \b B in the
8180  *                      buffer object. Counted in elements.
8181  * @param[in] ldb       Leading dimension of matrix \b B. For detailed
8182  *                      description, see clblasStrsm().
8183  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
8184  *                                task is to be performed.
8185  * @param[in] commandQueues       OpenCL command queues.
8186  * @param[in] numEventsInWaitList Number of events in the event wait list.
8187  * @param[in] eventWaitList       Event wait list.
8188  * @param[in] events     Event objects per each command queue that identify
8189  *                       a particular kernel execution instance.
8190  *
8191  * @return
8192  *   - \b clblasSuccess on success;
8193  *   - \b clblasInvalidDevice if a target device does not support floating
8194  *        point arithmetic with double precision;
8195  *   - \b clblasInvalidValue if either \b offA or \b offB exceeds the size
8196  *        of the respective buffer object;
8197  *   - the same error codes as the clblasStrsm() function otherwise
8198  *
8199  * @ingroup TRSM
8200  */
8201 clblasStatus
8202 clblasZtrsm(
8203     clblasOrder order,
8204     clblasSide side,
8205     clblasUplo uplo,
8206     clblasTranspose transA,
8207     clblasDiag diag,
8208     size_t M,
8209     size_t N,
8210     DoubleComplex alpha,
8211     const cl_mem A,
8212     size_t offA,
8213     size_t lda,
8214     cl_mem B,
8215     size_t offB,
8216     size_t ldb,
8217     cl_uint numCommandQueues,
8218     cl_command_queue *commandQueues,
8219     cl_uint numEventsInWaitList,
8220     const cl_event *eventWaitList,
8221     cl_event *events);
8222 
8223 /*@}*/
8224 
8225 /**
8226  * @defgroup SYRK SYRK - Symmetric rank-k update of a matrix
8227  * @ingroup BLAS3
8228  */
8229 
8230 /*@{*/
8231 
8232 /**
8233  * @brief Rank-k update of a symmetric matrix with float elements.
8234  *        Extended version.
8235  *
8236  * Rank-k updates:
8237  *   - \f$ C \leftarrow \alpha A A^T + \beta C \f$
8238  *   - \f$ C \leftarrow \alpha A^T A + \beta C \f$
8239  *
8240  * where \b C is a symmetric matrix.
8241  *
8242  * @param[in] order      Row/column order.
8243  * @param[in] uplo       The triangle in matrix \b C being referenced.
8244  * @param[in] transA     How matrix \b A is to be transposed.
8245  * @param[in] N          Number of rows and columns in matrix \b C.
8246  * @param[in] K          Number of columns of the matrix \b A if it is not
8247  *                       transposed, and number of rows otherwise.
8248  * @param[in] alpha      The factor of matrix \b A.
8249  * @param[in] A          Buffer object storing the matrix \b A.
8250  * @param[in] offA       Offset of the first element of the matrix \b A in the
8251  *                       buffer object. Counted in elements.
8252  * @param[in] lda        Leading dimension of matrix \b A. It cannot be
8253  *                       less than \b K if \b A is
8254  *                       in the row-major format, and less than \b N
8255  *                       otherwise.
8256  * @param[in] beta       The factor of the matrix \b C.
8257  * @param[out] C         Buffer object storing matrix \b C.
8258  * @param[in] offC       Offset of the first element of the matrix \b C in the
8259  *                       buffer object. Counted in elements.
8260  * @param[in] ldc        Leading dimension of matric \b C. It cannot be less
8261  *                       than \b N.
8262  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
8263  *                                task is to be performed.
8264  * @param[in] commandQueues       OpenCL command queues.
8265  * @param[in] numEventsInWaitList Number of events in the event wait list.
8266  * @param[in] eventWaitList       Event wait list.
8267  * @param[in] events     Event objects per each command queue that identify
8268  *                       a particular kernel execution instance.
8269  *
8270  * @return
8271  *   - \b clblasSuccess on success;
8272  *   - \b clblasInvalidValue if either \b offA or \b offC exceeds the size
8273  *        of the respective buffer object;
8274  *   - the same error codes as the clblasSsyrk() function otherwise.
8275  *
8276  * @ingroup SYRK
8277  */
8278 clblasStatus
8279 clblasSsyrk(
8280     clblasOrder order,
8281     clblasUplo uplo,
8282     clblasTranspose transA,
8283     size_t N,
8284     size_t K,
8285     cl_float alpha,
8286     const cl_mem A,
8287     size_t offA,
8288     size_t lda,
8289     cl_float beta,
8290     cl_mem C,
8291     size_t offC,
8292     size_t ldc,
8293     cl_uint numCommandQueues,
8294     cl_command_queue *commandQueues,
8295     cl_uint numEventsInWaitList,
8296     const cl_event *eventWaitList,
8297     cl_event *events);
8298 
8299 /**
8300  * @example example_ssyrk.c
8301  * This is an example of how to use the @ref clblasSsyrkEx function.
8302  */
8303 
8304 /**
8305  * @brief Rank-k update of a symmetric matrix with double elements.
8306  *        Extended version.
8307  *
8308  * Rank-k updates:
8309  *   - \f$ C \leftarrow \alpha A A^T + \beta C \f$
8310  *   - \f$ C \leftarrow \alpha A^T A + \beta C \f$
8311  *
8312  * where \b C is a symmetric matrix.
8313  *
8314  * @param[in] order      Row/column order.
8315  * @param[in] uplo       The triangle in matrix \b C being referenced.
8316  * @param[in] transA     How matrix \b A is to be transposed.
8317  * @param[in] N          Number of rows and columns in matrix \b C.
8318  * @param[in] K          Number of columns of the matrix \b A if it is not
8319  *                       transposed, and number of rows otherwise.
8320  * @param[in] alpha      The factor of matrix \b A.
8321  * @param[in] A          Buffer object storing the matrix \b A.
8322  * @param[in] offA       Offset of the first element of the matrix \b A in the
8323  *                       buffer object. Counted in elements.
8324  * @param[in] lda        Leading dimension of matrix \b A. For detailed
8325  *                       description, see clblasSsyrk().
8326  * @param[in] beta       The factor of the matrix \b C.
8327  * @param[out] C         Buffer object storing matrix \b C.
8328  * @param[in] offC       Offset of the first element of the matrix \b C in the
8329  *                       buffer object. Counted in elements.
8330  * @param[in] ldc        Leading dimension of matrix \b C. It cannot be less
8331  *                       than \b N.
8332  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
8333  *                                task is to be performed.
8334  * @param[in] commandQueues       OpenCL command queues.
8335  * @param[in] numEventsInWaitList Number of events in the event wait list.
8336  * @param[in] eventWaitList       Event wait list.
8337  * @param[in] events     Event objects per each command queue that identify
8338  *                       a particular kernel execution instance.
8339  *
8340  * @return
8341  *   - \b clblasSuccess on success;
8342  *   - \b clblasInvalidDevice if a target device does not support floating
8343  *        point arithmetic with double precision;
8344  *   - \b clblasInvalidValue if either \b offA or \b offC exceeds the size
8345  *        of the respective buffer object;
8346  *   - the same error codes as the clblasSsyrk() function otherwise.
8347  *
8348  * @ingroup SYRK
8349  */
8350 clblasStatus
8351 clblasDsyrk(
8352     clblasOrder order,
8353     clblasUplo uplo,
8354     clblasTranspose transA,
8355     size_t N,
8356     size_t K,
8357     cl_double alpha,
8358     const cl_mem A,
8359     size_t offA,
8360     size_t lda,
8361     cl_double beta,
8362     cl_mem C,
8363     size_t offC,
8364     size_t ldc,
8365     cl_uint numCommandQueues,
8366     cl_command_queue *commandQueues,
8367     cl_uint numEventsInWaitList,
8368     const cl_event *eventWaitList,
8369     cl_event *events);
8370 
8371 /**
8372  * @brief Rank-k update of a symmetric matrix with complex float elements.
8373  *        Extended version.
8374  *
8375  * Rank-k updates:
8376  *   - \f$ C \leftarrow \alpha A A^T + \beta C \f$
8377  *   - \f$ C \leftarrow \alpha A^T A + \beta C \f$
8378  *
8379  * where \b C is a symmetric matrix.
8380  *
8381  * @param[in] order      Row/column order.
8382  * @param[in] uplo       The triangle in matrix \b C being referenced.
8383  * @param[in] transA     How matrix \b A is to be transposed.
8384  * @param[in] N          Number of rows and columns in matrix \b C.
8385  * @param[in] K          Number of columns of the matrix \b A if it is not
8386  *                       transposed, and number of rows otherwise.
8387  * @param[in] alpha      The factor of matrix \b A.
8388  * @param[in] A          Buffer object storing the matrix \b A.
8389  * @param[in] offA       Offset of the first element of the matrix \b A in the
8390  *                       buffer object. Counted in elements.
8391  * @param[in] lda        Leading dimension of matrix \b A. For detailed
8392  *                       description, see clblasSsyrk().
8393  * @param[in] beta       The factor of the matrix \b C.
8394  * @param[out] C         Buffer object storing matrix \b C.
8395  * @param[in] offC       Offset of the first element of the matrix \b C in the
8396  *                       buffer object. Counted in elements.
8397  * @param[in] ldc        Leading dimension of matrix \b C. It cannot be less
8398  *                       than \b N.
8399  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
8400  *                                task is to be performed.
8401  * @param[in] commandQueues       OpenCL command queues.
8402  * @param[in] numEventsInWaitList Number of events in the event wait list.
8403  * @param[in] eventWaitList       Event wait list.
8404  * @param[in] events     Event objects per each command queue that identify
8405  *                       a particular kernel execution instance.
8406  *
8407  * @return
8408  *   - \b clblasSuccess on success;
8409  *   - \b clblasInvalidValue if either \b offA or \b offC exceeds the size
8410  *        of the respective buffer object;
8411  *   - \b clblasInvalidValue if \b transA is set to \ref clblasConjTrans.
8412  *   - the same error codes as the clblasSsyrk() function otherwise.
8413  *
8414  * @ingroup SYRK
8415  */
8416 clblasStatus
8417 clblasCsyrk(
8418     clblasOrder order,
8419     clblasUplo uplo,
8420     clblasTranspose transA,
8421     size_t N,
8422     size_t K,
8423     FloatComplex alpha,
8424     const cl_mem A,
8425     size_t offA,
8426     size_t lda,
8427     FloatComplex beta,
8428     cl_mem C,
8429     size_t offC,
8430     size_t ldc,
8431     cl_uint numCommandQueues,
8432     cl_command_queue *commandQueues,
8433     cl_uint numEventsInWaitList,
8434     const cl_event *eventWaitList,
8435     cl_event *events);
8436 
8437 /**
8438  * @brief Rank-k update of a symmetric matrix with complex double elements.
8439  *        Extended version.
8440  *
8441  * Rank-k updates:
8442  *   - \f$ C \leftarrow \alpha A A^T + \beta C \f$
8443  *   - \f$ C \leftarrow \alpha A^T A + \beta C \f$
8444  *
8445  * where \b C is a symmetric matrix.
8446  *
8447  * @param[in] order      Row/column order.
8448  * @param[in] uplo       The triangle in matrix \b C being referenced.
8449  * @param[in] transA     How matrix \b A is to be transposed.
8450  * @param[in] N          Number of rows and columns in matrix \b C.
8451  * @param[in] K          Number of columns of the matrix \b A if it is not
8452  *                       transposed, and number of rows otherwise.
8453  * @param[in] alpha      The factor of matrix \b A.
8454  * @param[in] A          Buffer object storing the matrix \b A.
8455  * @param[in] offA       Offset of the first element of the matrix \b A in the
8456  *                       buffer object. Counted in elements.
8457  * @param[in] lda        Leading dimension of matrix \b A. For detailed
8458  *                       description, see clblasSsyrk().
8459  * @param[in] beta       The factor of the matrix \b C.
8460  * @param[out] C         Buffer object storing matrix \b C.
8461  * @param[in] offC       Offset of the first element of the matrix \b C in the
8462  *                       buffer object. Counted in elements.
8463  * @param[in] ldc        Leading dimension of matrix \b C. It cannot be less
8464  *                       than \b N.
8465  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
8466  *                                task is to be performed.
8467  * @param[in] commandQueues       OpenCL command queues.
8468  * @param[in] numEventsInWaitList Number of events in the event wait list.
8469  * @param[in] eventWaitList       Event wait list.
8470  * @param[in] events     Event objects per each command queue that identify
8471  *                       a particular kernel execution instance.
8472  *
8473  * @return
8474  *   - \b clblasSuccess on success;
8475  *   - \b clblasInvalidDevice if a target device does not support floating
8476  *         point arithmetic with double precision;
8477  *   - \b clblasInvalidValue if either \b offA or \b offC exceeds the size
8478  *        of the respective buffer object;
8479  *   - \b clblasInvalidValue if \b transA is set to \ref clblasConjTrans.
8480  *   - the same error codes as the clblasSsyrk() function otherwise.
8481  *
8482  * @ingroup SYRK
8483  */
8484 clblasStatus
8485 clblasZsyrk(
8486     clblasOrder order,
8487     clblasUplo uplo,
8488     clblasTranspose transA,
8489     size_t N,
8490     size_t K,
8491     DoubleComplex alpha,
8492     const cl_mem A,
8493     size_t offA,
8494     size_t lda,
8495     DoubleComplex beta,
8496     cl_mem C,
8497     size_t offC,
8498     size_t ldc,
8499     cl_uint numCommandQueues,
8500     cl_command_queue *commandQueues,
8501     cl_uint numEventsInWaitList,
8502     const cl_event *eventWaitList,
8503     cl_event *events);
8504 
8505 /*@}*/
8506 
8507 /**
8508  * @defgroup SYR2K SYR2K - Symmetric rank-2k update to a matrix
8509  * @ingroup BLAS3
8510  */
8511 
8512 /*@{*/
8513 
8514 /**
8515  * @brief Rank-2k update of a symmetric matrix with float elements.
8516  *        Extended version.
8517  *
8518  * Rank-k updates:
8519  *   - \f$ C \leftarrow \alpha A B^T + \alpha B A^T + \beta C \f$
8520  *   - \f$ C \leftarrow \alpha A^T B + \alpha B^T A \beta C \f$
8521  *
8522  * where \b C is a symmetric matrix.
8523  *
8524  * @param[in] order      Row/column order.
8525  * @param[in] uplo       The triangle in matrix \b C being referenced.
8526  * @param[in] transAB    How matrices \b A and \b B is to be transposed.
8527  * @param[in] N          Number of rows and columns in matrix \b C.
8528  * @param[in] K          Number of columns of the matrices \b A and \b B if they
8529  *                       are not transposed, and number of rows otherwise.
8530  * @param[in] alpha      The factor of matrices \b A and \b B.
8531  * @param[in] A          Buffer object storing matrix \b A.
8532  * @param[in] offA       Offset of the first element of the matrix \b A in the
8533  *                       buffer object. Counted in elements.
8534  * @param[in] lda        Leading dimension of matrix \b A. It cannot be less
8535  *                       than \b K if \b A is
8536  *                       in the row-major format, and less than \b N
8537  *                       otherwise.
8538  * @param[in] B          Buffer object storing matrix \b B.
8539  * @param[in] offB       Offset of the first element of the matrix \b B in the
8540  *                       buffer object. Counted in elements.
8541  * @param[in] ldb        Leading dimension of matrix \b B. It cannot be less
8542  *                       less than \b K if \b B matches to the op(\b B) matrix
8543  *                       in the row-major format, and less than \b N
8544  *                       otherwise.
8545  * @param[in] beta       The factor of matrix \b C.
8546  * @param[out] C         Buffer object storing matrix \b C.
8547  * @param[in] offC       Offset of the first element of the matrix \b C in the
8548  *                       buffer object. Counted in elements.
8549  * @param[in] ldc        Leading dimension of matrix \b C. It cannot be less
8550  *                       than \b N.
8551  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
8552  *                                task is to be performed.
8553  * @param[in] commandQueues       OpenCL command queues.
8554  * @param[in] numEventsInWaitList Number of events in the event wait list.
8555  * @param[in] eventWaitList       Event wait list.
8556  * @param[in] events     Event objects per each command queue that identify
8557  *                       a particular kernel execution instance.
8558  *
8559  * @return
8560  *   - \b clblasSuccess on success;
8561  *   - \b clblasInvalidValue if either \b offA, \b offB or \b offC exceeds
8562  *        the size of the respective buffer object;
8563  *   - the same error codes as the clblasSsyr2k() function otherwise.
8564  *
8565  * @ingroup SYR2K
8566  */
8567 clblasStatus
8568 clblasSsyr2k(
8569     clblasOrder order,
8570     clblasUplo uplo,
8571     clblasTranspose transAB,
8572     size_t N,
8573     size_t K,
8574     cl_float alpha,
8575     const cl_mem A,
8576     size_t offA,
8577     size_t lda,
8578     const cl_mem B,
8579     size_t offB,
8580     size_t ldb,
8581     cl_float beta,
8582     cl_mem C,
8583     size_t offC,
8584     size_t ldc,
8585     cl_uint numCommandQueues,
8586     cl_command_queue *commandQueues,
8587     cl_uint numEventsInWaitList,
8588     const cl_event *eventWaitList,
8589     cl_event *events);
8590 
8591 /**
8592  * @example example_ssyr2k.c
8593  * This is an example of how to use the @ref clblasSsyr2kEx function.
8594  */
8595 
8596 /**
8597  * @brief Rank-2k update of a symmetric matrix with double elements.
8598  *        Extended version.
8599  *
8600  * Rank-k updates:
8601  *   - \f$ C \leftarrow \alpha A B^T + \alpha B A^T + \beta C \f$
8602  *   - \f$ C \leftarrow \alpha A^T B + \alpha B^T A \beta C \f$
8603  *
8604  * where \b C is a symmetric matrix.
8605  *
8606  * @param[in] order      Row/column order.
8607  * @param[in] uplo       The triangle in matrix \b C being referenced.
8608  * @param[in] transAB    How matrices \b A and \b B is to be transposed.
8609  * @param[in] N          Number of rows and columns in matrix \b C.
8610  * @param[in] K          Number of columns of the matrices \b A and \b B if they
8611  *                       are not transposed, and number of rows otherwise.
8612  * @param[in] alpha      The factor of matrices \b A and \b B.
8613  * @param[in] A          Buffer object storing matrix \b A.
8614  * @param[in] offA       Offset of the first element of the matrix \b A in the
8615  *                       buffer object. Counted in elements.
8616  * @param[in] lda        Leading dimension of matrix \b A. For detailed
8617  *                       description, see clblasSsyr2k().
8618  * @param[in] B          Buffer object storing matrix \b B.
8619  * @param[in] offB       Offset of the first element of the matrix \b B in the
8620  *                       buffer object. Counted in elements.
8621  * @param[in] ldb        Leading dimension of matrix \b B. For detailed
8622  *                       description, see clblasSsyr2k().
8623  * @param[in] beta       The factor of matrix \b C.
8624  * @param[out] C         Buffer object storing matrix \b C.
8625  * @param[in] offC       Offset of the first element of the matrix \b C in the
8626  *                       buffer object. Counted in elements.
8627  * @param[in] ldc        Leading dimension of matrix \b C. It cannot be less
8628  *                       than \b N.
8629  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
8630  *                                task is to be performed.
8631  * @param[in] commandQueues       OpenCL command queues.
8632  * @param[in] numEventsInWaitList Number of events in the event wait list.
8633  * @param[in] eventWaitList       Event wait list.
8634  * @param[in] events     Event objects per each command queue that identify
8635  *                       a particular kernel execution instance.
8636  *
8637  * @return
8638  *   - \b clblasSuccess on success;
8639  *   - \b clblasInvalidDevice if a target device does not support floating
8640  *        point arithmetic with double precision;
8641  *   - \b clblasInvalidValue if either \b offA, \b offB or \b offC exceeds
8642  *        the size of the respective buffer object;
8643  *   - the same error codes as the clblasSsyr2k() function otherwise.
8644  *
8645  * @ingroup SYR2K
8646  */
8647 clblasStatus
8648 clblasDsyr2k(
8649     clblasOrder order,
8650     clblasUplo uplo,
8651     clblasTranspose transAB,
8652     size_t N,
8653     size_t K,
8654     cl_double alpha,
8655     const cl_mem A,
8656     size_t offA,
8657     size_t lda,
8658     const cl_mem B,
8659     size_t offB,
8660     size_t ldb,
8661     cl_double beta,
8662     cl_mem C,
8663     size_t offC,
8664     size_t ldc,
8665     cl_uint numCommandQueues,
8666     cl_command_queue *commandQueues,
8667     cl_uint numEventsInWaitList,
8668     const cl_event *eventWaitList,
8669     cl_event *events);
8670 
8671 /**
8672  * @brief Rank-2k update of a symmetric matrix with complex float elements.
8673  *        Extended version.
8674  *
8675  * Rank-k updates:
8676  *   - \f$ C \leftarrow \alpha A B^T + \alpha B A^T + \beta C \f$
8677  *   - \f$ C \leftarrow \alpha A^T B + \alpha B^T A \beta C \f$
8678  *
8679  * where \b C is a symmetric matrix.
8680  *
8681  * @param[in] order      Row/column order.
8682  * @param[in] uplo       The triangle in matrix \b C being referenced.
8683  * @param[in] transAB    How matrices \b A and \b B is to be transposed.
8684  * @param[in] N          Number of rows and columns in matrix \b C.
8685  * @param[in] K          Number of columns of the matrices \b A and \b B if they
8686  *                       are not transposed, and number of rows otherwise.
8687  * @param[in] alpha      The factor of matrices \b A and \b B.
8688  * @param[in] A          Buffer object storing matrix \b A.
8689  * @param[in] offA       Offset of the first element of the matrix \b A in the
8690  *                       buffer object. Counted in elements.
8691  * @param[in] lda        Leading dimension of matrix \b A. For detailed
8692  *                       description, see clblasSsyr2k().
8693  * @param[in] B          Buffer object storing matrix \b B.
8694  * @param[in] offB       Offset of the first element of the matrix \b B in the
8695  *                       buffer object. Counted in elements.
8696  * @param[in] ldb        Leading dimension of matrix \b B. For detailed
8697  *                       description, see clblasSsyr2k().
8698  * @param[in] beta       The factor of matrix \b C.
8699  * @param[out] C         Buffer object storing matrix \b C.
8700  * @param[in] offC       Offset of the first element of the matrix \b C in the
8701  *                       buffer object. Counted in elements.
8702  * @param[in] ldc        Leading dimension of matrix \b C. It cannot be less
8703  *                       than \b N.
8704  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
8705  *                                task is to be performed.
8706  * @param[in] commandQueues       OpenCL command queues.
8707  * @param[in] numEventsInWaitList Number of events in the event wait list.
8708  * @param[in] eventWaitList       Event wait list.
8709  * @param[in] events     Event objects per each command queue that identify
8710  *                       a particular kernel execution instance.
8711  *
8712  * @return
8713  *   - \b clblasSuccess on success;
8714  *   - \b clblasInvalidValue if either \b offA, \b offB or \b offC exceeds
8715  *        the size of the respective buffer object;
8716  *   - \b clblasInvalidValue if \b transAB is set to \ref clblasConjTrans.
8717  *   - the same error codes as the clblasSsyr2k() function otherwise.
8718  *
8719  * @ingroup SYR2K
8720  */
8721 clblasStatus
8722 clblasCsyr2k(
8723     clblasOrder order,
8724     clblasUplo uplo,
8725     clblasTranspose transAB,
8726     size_t N,
8727     size_t K,
8728     FloatComplex alpha,
8729     const cl_mem A,
8730     size_t offA,
8731     size_t lda,
8732     const cl_mem B,
8733     size_t offB,
8734     size_t ldb,
8735     FloatComplex beta,
8736     cl_mem C,
8737     size_t offC,
8738     size_t ldc,
8739     cl_uint numCommandQueues,
8740     cl_command_queue *commandQueues,
8741     cl_uint numEventsInWaitList,
8742     const cl_event *eventWaitList,
8743     cl_event *events);
8744 
8745 /**
8746  * @brief Rank-2k update of a symmetric matrix with complex double elements.
8747  *        Extended version.
8748  *
8749  * Rank-k updates:
8750  *   - \f$ C \leftarrow \alpha A B^T + \alpha B A^T + \beta C \f$
8751  *   - \f$ C \leftarrow \alpha A^T B + \alpha B^T A \beta C \f$
8752  *
8753  * where \b C is a symmetric matrix.
8754  *
8755  * @param[in] order      Row/column order.
8756  * @param[in] uplo       The triangle in matrix \b C being referenced.
8757  * @param[in] transAB    How matrices \b A and \b B is to be transposed.
8758  * @param[in] N          Number of rows and columns in matrix \b C.
8759  * @param[in] K          Number of columns of the matrices \b A and \b B if they
8760  *                       are not transposed, and number of rows otherwise.
8761  * @param[in] alpha      The factor of matrices \b A and \b B.
8762  * @param[in] A          Buffer object storing matrix \b A.
8763  * @param[in] offA       Offset of the first element of the matrix \b A in the
8764  *                       buffer object. Counted in elements.
8765  * @param[in] lda        Leading dimension of matrix \b A. For detailed
8766  *                       description, see clblasSsyr2k().
8767  * @param[in] B          Buffer object storing matrix \b B.
8768  * @param[in] offB       Offset of the first element of the matrix \b B in the
8769  *                       buffer object. Counted in elements.
8770  * @param[in] ldb        Leading dimension of matrix \b B. For detailed
8771  *                       description, see clblasSsyr2k().
8772  * @param[in] beta       The factor of matrix \b C.
8773  * @param[out] C         Buffer object storing matrix \b C.
8774  * @param[in] offC       Offset of the first element of the matrix \b C in the
8775  *                       buffer object. Counted in elements.
8776  * @param[in] ldc        Leading dimension of matrix \b C. It cannot be less
8777  *                       than \b N.
8778  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
8779  *                                task is to be performed.
8780  * @param[in] commandQueues       OpenCL command queues.
8781  * @param[in] numEventsInWaitList Number of events in the event wait list.
8782  * @param[in] eventWaitList       Event wait list.
8783  * @param[in] events     Event objects per each command queue that identify
8784  *                       a particular kernel execution instance.
8785  *
8786  * @return
8787  *   - \b clblasSuccess on success;
8788  *   - \b clblasInvalidDevice if a target device does not support floating
8789  *        point arithmetic with double precision;
8790  *   - \b clblasInvalidValue if either \b offA, \b offB or \b offC exceeds
8791  *        the size of the respective buffer object;
8792  *   - \b clblasInvalidValue if \b transAB is set to \ref clblasConjTrans.
8793  *   - the same error codes as the clblasSsyr2k() function otherwise.
8794  *
8795  * @ingroup SYR2K
8796  */
8797 clblasStatus
8798 clblasZsyr2k(
8799     clblasOrder order,
8800     clblasUplo uplo,
8801     clblasTranspose transAB,
8802     size_t N,
8803     size_t K,
8804     DoubleComplex alpha,
8805     const cl_mem A,
8806     size_t offA,
8807     size_t lda,
8808     const cl_mem B,
8809     size_t offB,
8810     size_t ldb,
8811     DoubleComplex beta,
8812     cl_mem C,
8813     size_t offC,
8814     size_t ldc,
8815     cl_uint numCommandQueues,
8816     cl_command_queue *commandQueues,
8817     cl_uint numEventsInWaitList,
8818     const cl_event *eventWaitList,
8819     cl_event *events);
8820 /*@}*/
8821 
8822 
8823 /**
8824  * @defgroup SYMM SYMM  - Symmetric matrix-matrix multiply
8825  * @ingroup BLAS3
8826  */
8827 /*@{*/
8828 
8829 /**
8830  * @brief Matrix-matrix product of symmetric rectangular matrices with float
8831  * elements.
8832  *
8833  * Matrix-matrix products:
8834  *   - \f$ C \leftarrow \alpha A B + \beta C \f$
8835  *   - \f$ C \leftarrow \alpha B A + \beta C \f$
8836  *
8837  * @param[in] order     Row/column order.
8838  * @param[in] side		The side of triangular matrix.
8839  * @param[in] uplo		The triangle in matrix being referenced.
8840  * @param[in] M         Number of rows in matrices \b B and \b C.
8841  * @param[in] N         Number of columns in matrices \b B and \b C.
8842  * @param[in] alpha     The factor of matrix \b A.
8843  * @param[in] A         Buffer object storing matrix \b A.
8844  * @param[in] offa      Offset of the first element of the matrix \b A in the
8845  *                      buffer object. Counted in elements.
8846  * @param[in] lda       Leading dimension of matrix \b A. It cannot be less
8847  *                      than \b M when the \b side parameter is set to
8848  *                      \b clblasLeft,\n or less than \b N when the
8849  *                      parameter is set to \b clblasRight.
8850  * @param[in] B         Buffer object storing matrix \b B.
8851  * @param[in] offb      Offset of the first element of the matrix \b B in the
8852  *                      buffer object. Counted in elements.
8853  * @param[in] ldb       Leading dimension of matrix \b B. It cannot be less
8854  *                      than \b N when the \b order parameter is set to
8855  *                      \b clblasRowMajor,\n or less than \b M
8856  *                      when it is set to \b clblasColumnMajor.
8857  * @param[in] beta      The factor of matrix \b C.
8858  * @param[out] C        Buffer object storing matrix \b C.
8859  * @param[in] offc      Offset of the first element of the matrix \b C in the
8860  *                      buffer object. Counted in elements.
8861  * @param[in] ldc       Leading dimension of matrix \b C. It cannot be less
8862  *                      than \b N when the \b order parameter is set to
8863  *                      \b clblasRowMajor,\n or less than \b M when
8864  *                      it is set to \b clblasColumnMajorOrder.
8865  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
8866  *                                task is to be performed.
8867  * @param[in] commandQueues       OpenCL command queues.
8868  * @param[in] numEventsInWaitList Number of events in the event wait list.
8869  * @param[in] eventWaitList       Event wait list.
8870  * @param[in] events			  Event objects per each command queue that identify
8871  *								  a particular kernel execution instance.
8872  *
8873  * @return
8874  *   - \b clblasSuccess on success;
8875  *   - \b clblasNotInitialized if clblasSetup() was not called;
8876  *   - \b clblasInvalidValue if invalid parameters are passed:
8877  *     - \b M or \b N is zero, or
8878  *     - any of the leading dimensions is invalid;
8879  *     - the matrix sizes lead to accessing outsize of any of the buffers;
8880  *   - \b clblasInvalidMemObject if A, B, or C object is invalid,
8881  *     or an image object rather than the buffer one;
8882  *   - \b clblasOutOfResources if you use image-based function implementation
8883  *     and no suitable scratch image available;
8884  *   - \b clblasOutOfHostMemory if the library can't allocate memory for
8885  *     internal structures;
8886  *   - \b clblasInvalidCommandQueue if the passed command queue is invalid;
8887  *   - \b clblasInvalidContext if a context a passed command queue belongs to
8888  *     was released;
8889  *   - \b clblasInvalidOperation if kernel compilation relating to a previous
8890  *     call has not completed for any of the target devices;
8891  *   - \b clblasCompilerNotAvailable if a compiler is not available;
8892  *   - \b clblasBuildProgramFailure if there is a failure to build a program
8893  *     executable.
8894  *
8895  * @ingroup SYMM
8896  */
8897 clblasStatus
8898 clblasSsymm(
8899     clblasOrder order,
8900     clblasSide side,
8901     clblasUplo uplo,
8902     size_t M,
8903     size_t N,
8904     cl_float alpha,
8905     const cl_mem A,
8906     size_t offa,
8907     size_t lda,
8908     const cl_mem B,
8909     size_t offb,
8910     size_t ldb,
8911     cl_float beta,
8912     cl_mem C,
8913     size_t offc,
8914     size_t ldc,
8915     cl_uint numCommandQueues,
8916     cl_command_queue *commandQueues,
8917     cl_uint numEventsInWaitList,
8918     const cl_event *eventWaitList,
8919     cl_event *events);
8920 /**
8921  * @example example_ssymm.c
8922  * This is an example of how to use the @ref clblasSsymm function.
8923  */
8924 
8925 
8926 /**
8927  * @brief Matrix-matrix product of symmetric rectangular matrices with double
8928  * elements.
8929  *
8930  * Matrix-matrix products:
8931  *   - \f$ C \leftarrow \alpha A B + \beta C \f$
8932  *   - \f$ C \leftarrow \alpha B A + \beta C \f$
8933  *
8934  * @param[in] order     Row/column order.
8935  * @param[in] side		The side of triangular matrix.
8936  * @param[in] uplo		The triangle in matrix being referenced.
8937  * @param[in] M         Number of rows in matrices \b B and \b C.
8938  * @param[in] N         Number of columns in matrices \b B and \b C.
8939  * @param[in] alpha     The factor of matrix \b A.
8940  * @param[in] A         Buffer object storing matrix \b A.
8941  * @param[in] offa      Offset of the first element of the matrix \b A in the
8942  *                      buffer object. Counted in elements.
8943  * @param[in] lda       Leading dimension of matrix \b A. It cannot be less
8944  *                      than \b M when the \b side parameter is set to
8945  *                      \b clblasLeft,\n or less than \b N when the
8946  *                      parameter is set to \b clblasRight.
8947  * @param[in] B         Buffer object storing matrix \b B.
8948  * @param[in] offb      Offset of the first element of the matrix \b B in the
8949  *                      buffer object. Counted in elements.
8950  * @param[in] ldb       Leading dimension of matrix \b B. It cannot be less
8951  *                      than \b N when the \b order parameter is set to
8952  *                      \b clblasRowMajor,\n or less than \b M
8953  *                      when it is set to \b clblasColumnMajor.
8954  * @param[in] beta      The factor of matrix \b C.
8955  * @param[out] C        Buffer object storing matrix \b C.
8956  * @param[in] offc      Offset of the first element of the matrix \b C in the
8957  *                      buffer object. Counted in elements.
8958  * @param[in] ldc       Leading dimension of matrix \b C. It cannot be less
8959  *                      than \b N when the \b order parameter is set to
8960  *                      \b clblasRowMajor,\n or less than \b M when
8961  *                      it is set to \b clblasColumnMajorOrder.
8962  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
8963  *                                task is to be performed.
8964  * @param[in] commandQueues       OpenCL command queues.
8965  * @param[in] numEventsInWaitList Number of events in the event wait list.
8966  * @param[in] eventWaitList       Event wait list.
8967  * @param[in] events			  Event objects per each command queue that identify
8968  *								  a particular kernel execution instance.
8969  *
8970  * @return
8971  *   - \b clblasSuccess on success;
8972  *   - \b clblasInvalidDevice if a target device does not support floating
8973  *     point arithmetic with double precision;
8974  *   - the same error codes as the clblasSsymm() function otherwise.
8975  *
8976  * @ingroup SYMM
8977  */
8978 clblasStatus
8979 clblasDsymm(
8980     clblasOrder order,
8981     clblasSide side,
8982     clblasUplo uplo,
8983     size_t M,
8984     size_t N,
8985     cl_double alpha,
8986     const cl_mem A,
8987     size_t offa,
8988     size_t lda,
8989     const cl_mem B,
8990     size_t offb,
8991     size_t ldb,
8992     cl_double beta,
8993     cl_mem C,
8994     size_t offc,
8995     size_t ldc,
8996     cl_uint numCommandQueues,
8997     cl_command_queue *commandQueues,
8998     cl_uint numEventsInWaitList,
8999     const cl_event *eventWaitList,
9000     cl_event *events);
9001 
9002 
9003 /**
9004  * @brief Matrix-matrix product of symmetric rectangular matrices with
9005  * float-complex elements.
9006  *
9007  * Matrix-matrix products:
9008  *   - \f$ C \leftarrow \alpha A B + \beta C \f$
9009  *   - \f$ C \leftarrow \alpha B A + \beta C \f$
9010  *
9011  * @param[in] order     Row/column order.
9012  * @param[in] side		The side of triangular matrix.
9013  * @param[in] uplo		The triangle in matrix being referenced.
9014  * @param[in] M         Number of rows in matrices \b B and \b C.
9015  * @param[in] N         Number of columns in matrices \b B and \b C.
9016  * @param[in] alpha     The factor of matrix \b A.
9017  * @param[in] A         Buffer object storing matrix \b A.
9018  * @param[in] offa      Offset of the first element of the matrix \b A in the
9019  *                      buffer object. Counted in elements.
9020  * @param[in] lda       Leading dimension of matrix \b A. It cannot be less
9021  *                      than \b M when the \b side parameter is set to
9022  *                      \b clblasLeft,\n or less than \b N when the
9023  *                      parameter is set to \b clblasRight.
9024  * @param[in] B         Buffer object storing matrix \b B.
9025  * @param[in] offb      Offset of the first element of the matrix \b B in the
9026  *                      buffer object. Counted in elements.
9027  * @param[in] ldb       Leading dimension of matrix \b B. It cannot be less
9028  *                      than \b N when the \b order parameter is set to
9029  *                      \b clblasRowMajor,\n or less than \b M
9030  *                      when it is set to \b clblasColumnMajor.
9031  * @param[in] beta      The factor of matrix \b C.
9032  * @param[out] C        Buffer object storing matrix \b C.
9033  * @param[in] offc      Offset of the first element of the matrix \b C in the
9034  *                      buffer object. Counted in elements.
9035  * @param[in] ldc       Leading dimension of matrix \b C. It cannot be less
9036  *                      than \b N when the \b order parameter is set to
9037  *                      \b clblasRowMajor,\n or less than \b M when
9038  *                      it is set to \b clblasColumnMajorOrder.
9039  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
9040  *                                task is to be performed.
9041  * @param[in] commandQueues       OpenCL command queues.
9042  * @param[in] numEventsInWaitList Number of events in the event wait list.
9043  * @param[in] eventWaitList       Event wait list.
9044  * @param[in] events			  Event objects per each command queue that identify
9045  *								  a particular kernel execution instance.
9046  *
9047  * @return The same result as the clblasSsymm() function.
9048  *
9049  * @ingroup SYMM
9050  */
9051 clblasStatus
9052 clblasCsymm(
9053     clblasOrder order,
9054     clblasSide side,
9055     clblasUplo uplo,
9056     size_t M,
9057     size_t N,
9058     cl_float2 alpha,
9059     const cl_mem A,
9060     size_t offa,
9061     size_t lda,
9062     const cl_mem B,
9063     size_t offb,
9064     size_t ldb,
9065     cl_float2 beta,
9066     cl_mem C,
9067     size_t offc,
9068     size_t ldc,
9069     cl_uint numCommandQueues,
9070     cl_command_queue *commandQueues,
9071     cl_uint numEventsInWaitList,
9072     const cl_event *eventWaitList,
9073     cl_event *events);
9074 
9075 /**
9076  * @brief Matrix-matrix product of symmetric rectangular matrices with
9077  * double-complex elements.
9078  *
9079  * Matrix-matrix products:
9080  *   - \f$ C \leftarrow \alpha A B + \beta C \f$
9081  *   - \f$ C \leftarrow \alpha B A + \beta C \f$
9082  *
9083  * @param[in] order     Row/column order.
9084  * @param[in] side		The side of triangular matrix.
9085  * @param[in] uplo		The triangle in matrix being referenced.
9086  * @param[in] M         Number of rows in matrices \b B and \b C.
9087  * @param[in] N         Number of columns in matrices \b B and \b C.
9088  * @param[in] alpha     The factor of matrix \b A.
9089  * @param[in] A         Buffer object storing matrix \b A.
9090  * @param[in] offa      Offset of the first element of the matrix \b A in the
9091  *                      buffer object. Counted in elements.
9092  * @param[in] lda       Leading dimension of matrix \b A. It cannot be less
9093  *                      than \b M when the \b side parameter is set to
9094  *                      \b clblasLeft,\n or less than \b N when the
9095  *                      parameter is set to \b clblasRight.
9096  * @param[in] B         Buffer object storing matrix \b B.
9097  * @param[in] offb      Offset of the first element of the matrix \b B in the
9098  *                      buffer object. Counted in elements.
9099  * @param[in] ldb       Leading dimension of matrix \b B. It cannot be less
9100  *                      than \b N when the \b order parameter is set to
9101  *                      \b clblasRowMajor,\n or less than \b M
9102  *                      when it is set to \b clblasColumnMajor.
9103  * @param[in] beta      The factor of matrix \b C.
9104  * @param[out] C        Buffer object storing matrix \b C.
9105  * @param[in] offc      Offset of the first element of the matrix \b C in the
9106  *                      buffer object. Counted in elements.
9107  * @param[in] ldc       Leading dimension of matrix \b C. It cannot be less
9108  *                      than \b N when the \b order parameter is set to
9109  *                      \b clblasRowMajor,\n or less than \b M when
9110  *                      it is set to \b clblasColumnMajorOrder.
9111  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
9112  *                                task is to be performed.
9113  * @param[in] commandQueues       OpenCL command queues.
9114  * @param[in] numEventsInWaitList Number of events in the event wait list.
9115  * @param[in] eventWaitList       Event wait list.
9116  * @param[in] events			  Event objects per each command queue that identify
9117  *								  a particular kernel execution instance.
9118  *
9119  * @return The same result as the clblasDsymm() function.
9120  *
9121  * @ingroup SYMM
9122  */
9123 clblasStatus
9124 clblasZsymm(
9125     clblasOrder order,
9126     clblasSide side,
9127     clblasUplo uplo,
9128     size_t M,
9129     size_t N,
9130     cl_double2 alpha,
9131     const cl_mem A,
9132     size_t offa,
9133     size_t lda,
9134     const cl_mem B,
9135     size_t offb,
9136     size_t ldb,
9137     cl_double2 beta,
9138     cl_mem C,
9139     size_t offc,
9140     size_t ldc,
9141     cl_uint numCommandQueues,
9142     cl_command_queue *commandQueues,
9143     cl_uint numEventsInWaitList,
9144     const cl_event *eventWaitList,
9145     cl_event *events);
9146 /*@}*/
9147 
9148 
9149 /**
9150  * @defgroup HEMM HEMM  - Hermitian matrix-matrix multiplication
9151  * @ingroup BLAS3
9152  */
9153 /*@{*/
9154 
9155 /**
9156  * @brief Matrix-matrix product of hermitian rectangular matrices with
9157  * float-complex elements.
9158  *
9159  * Matrix-matrix products:
9160  *   - \f$ C \leftarrow \alpha A B + \beta C \f$
9161  *   - \f$ C \leftarrow \alpha B A + \beta C \f$
9162  *
9163  * @param[in] order     Row/column order.
9164  * @param[in] side		The side of triangular matrix.
9165  * @param[in] uplo		The triangle in matrix being referenced.
9166  * @param[in] M         Number of rows in matrices \b B and \b C.
9167  * @param[in] N         Number of columns in matrices \b B and \b C.
9168  * @param[in] alpha     The factor of matrix \b A.
9169  * @param[in] A         Buffer object storing matrix \b A.
9170  * @param[in] offa      Offset of the first element of the matrix \b A in the
9171  *                      buffer object. Counted in elements.
9172  * @param[in] lda       Leading dimension of matrix \b A. It cannot be less
9173  *                      than \b M when the \b side parameter is set to
9174  *                      \b clblasLeft,\n or less than \b N when the
9175  *                      parameter is set to \b clblasRight.
9176  * @param[in] B         Buffer object storing matrix \b B.
9177  * @param[in] offb      Offset of the first element of the matrix \b B in the
9178  *                      buffer object. Counted in elements.
9179  * @param[in] ldb       Leading dimension of matrix \b B. It cannot be less
9180  *                      than \b N when the \b order parameter is set to
9181  *                      \b clblasRowMajor,\n or less than \b M
9182  *                      when it is set to \b clblasColumnMajor.
9183  * @param[in] beta      The factor of matrix \b C.
9184  * @param[out] C        Buffer object storing matrix \b C.
9185  * @param[in] offc      Offset of the first element of the matrix \b C in the
9186  *                      buffer object. Counted in elements.
9187  * @param[in] ldc       Leading dimension of matrix \b C. It cannot be less
9188  *                      than \b N when the \b order parameter is set to
9189  *                      \b clblasRowMajor,\n or less than \b M when
9190  *                      it is set to \b clblasColumnMajorOrder.
9191  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
9192  *                                task is to be performed.
9193  * @param[in] commandQueues       OpenCL command queues.
9194  * @param[in] numEventsInWaitList Number of events in the event wait list.
9195  * @param[in] eventWaitList       Event wait list.
9196  * @param[in] events     Event objects per each command queue that identify
9197  *                       a particular kernel execution instance.
9198  *
9199  * @return
9200  *   - \b clblasSuccess on success;
9201  *   - \b clblasNotInitialized if clblasSetup() was not called;
9202  *   - \b clblasInvalidValue if invalid parameters are passed:
9203  *     - \b M or \b N is zero, or
9204  *     - any of the leading dimensions is invalid;
9205  *     - the matrix sizes lead to accessing outsize of any of the buffers;
9206  *   - \b clblasInvalidMemObject if A, B, or C object is invalid,
9207  *     or an image object rather than the buffer one;
9208  *   - \b clblasOutOfResources if you use image-based function implementation
9209  *     and no suitable scratch image available;
9210  *   - \b clblasOutOfHostMemory if the library can't allocate memory for
9211  *     internal structures;
9212  *   - \b clblasInvalidCommandQueue if the passed command queue is invalid;
9213  *   - \b clblasInvalidContext if a context a passed command queue belongs to
9214  *     was released;
9215  *   - \b clblasInvalidOperation if kernel compilation relating to a previous
9216  *     call has not completed for any of the target devices;
9217  *   - \b clblasCompilerNotAvailable if a compiler is not available;
9218  *   - \b clblasBuildProgramFailure if there is a failure to build a program
9219  *     executable.
9220  *
9221  * @ingroup HEMM
9222  */
9223 clblasStatus
9224 clblasChemm(
9225     clblasOrder order,
9226     clblasSide side,
9227     clblasUplo uplo,
9228     size_t M,
9229     size_t N,
9230     cl_float2 alpha,
9231     const cl_mem A,
9232     size_t offa,
9233     size_t lda,
9234     const cl_mem B,
9235     size_t offb,
9236     size_t ldb,
9237     cl_float2 beta,
9238     cl_mem C,
9239     size_t offc,
9240     size_t ldc,
9241     cl_uint numCommandQueues,
9242     cl_command_queue *commandQueues,
9243     cl_uint numEventsInWaitList,
9244     const cl_event *eventWaitList,
9245     cl_event *events);
9246 /**
9247  * @example example_chemm.cpp
9248  * This is an example of how to use the @ref clblasChemm function.
9249  */
9250 
9251 
9252 /**
9253  * @brief Matrix-matrix product of hermitian rectangular matrices with
9254  * double-complex elements.
9255  *
9256  * Matrix-matrix products:
9257  *   - \f$ C \leftarrow \alpha A B + \beta C \f$
9258  *   - \f$ C \leftarrow \alpha B A + \beta C \f$
9259  *
9260  * @param[in] order     Row/column order.
9261  * @param[in] side		The side of triangular matrix.
9262  * @param[in] uplo		The triangle in matrix being referenced.
9263  * @param[in] M         Number of rows in matrices \b B and \b C.
9264  * @param[in] N         Number of columns in matrices \b B and \b C.
9265  * @param[in] alpha     The factor of matrix \b A.
9266  * @param[in] A         Buffer object storing matrix \b A.
9267  * @param[in] offa      Offset of the first element of the matrix \b A in the
9268  *                      buffer object. Counted in elements.
9269  * @param[in] lda       Leading dimension of matrix \b A. It cannot be less
9270  *                      than \b M when the \b side parameter is set to
9271  *                      \b clblasLeft,\n or less than \b N when the
9272  *                      parameter is set to \b clblasRight.
9273  * @param[in] B         Buffer object storing matrix \b B.
9274  * @param[in] offb      Offset of the first element of the matrix \b B in the
9275  *                      buffer object. Counted in elements.
9276  * @param[in] ldb       Leading dimension of matrix \b B. It cannot be less
9277  *                      than \b N when the \b order parameter is set to
9278  *                      \b clblasRowMajor,\n or less than \b M
9279  *                      when it is set to \b clblasColumnMajor.
9280  * @param[in] beta      The factor of matrix \b C.
9281  * @param[out] C        Buffer object storing matrix \b C.
9282  * @param[in] offc      Offset of the first element of the matrix \b C in the
9283  *                      buffer object. Counted in elements.
9284  * @param[in] ldc       Leading dimension of matrix \b C. It cannot be less
9285  *                      than \b N when the \b order parameter is set to
9286  *                      \b clblasRowMajor,\n or less than \b M when
9287  *                      it is set to \b clblasColumnMajorOrder.
9288  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
9289  *                                task is to be performed.
9290  * @param[in] commandQueues       OpenCL command queues.
9291  * @param[in] numEventsInWaitList Number of events in the event wait list.
9292  * @param[in] eventWaitList       Event wait list.
9293  * @param[in] events     Event objects per each command queue that identify
9294  *                       a particular kernel execution instance.
9295  *
9296  * @return
9297  *   - \b clblasSuccess on success;
9298  *   - \b clblasInvalidDevice if a target device does not support floating
9299  *     point arithmetic with double precision;
9300  *   - the same error codes as the clblasChemm() function otherwise.
9301  *
9302  * @ingroup HEMM
9303  */
9304 clblasStatus
9305 clblasZhemm(
9306     clblasOrder order,
9307     clblasSide side,
9308     clblasUplo uplo,
9309     size_t M,
9310     size_t N,
9311     cl_double2 alpha,
9312     const cl_mem A,
9313     size_t offa,
9314     size_t lda,
9315     const cl_mem B,
9316     size_t offb,
9317     size_t ldb,
9318     cl_double2 beta,
9319     cl_mem C,
9320     size_t offc,
9321     size_t ldc,
9322     cl_uint numCommandQueues,
9323     cl_command_queue *commandQueues,
9324     cl_uint numEventsInWaitList,
9325     const cl_event *eventWaitList,
9326     cl_event *events);
9327 /*@}*/
9328 
9329 
9330 /**
9331  * @defgroup HERK HERK  - Hermitian rank-k update to a matrix
9332  * @ingroup BLAS3
9333  */
9334 /*@{*/
9335 
9336 /**
9337  * @brief Rank-k update of a hermitian matrix with float-complex elements.
9338  *
9339  * Rank-k updates:
9340  *   - \f$ C \leftarrow \alpha A A^H + \beta C \f$
9341  *   - \f$ C \leftarrow \alpha A^H A + \beta C \f$
9342  *
9343  * where \b C is a hermitian matrix.
9344  *
9345  * @param[in] order      Row/column order.
9346  * @param[in] uplo       The triangle in matrix \b C being referenced.
9347  * @param[in] transA     How matrix \b A is to be transposed.
9348  * @param[in] N          Number of rows and columns in matrix \b C.
9349  * @param[in] K          Number of columns of the matrix \b A if it is not
9350  *                       transposed, and number of rows otherwise.
9351  * @param[in] alpha      The factor of matrix \b A.
9352  * @param[in] A          Buffer object storing the matrix \b A.
9353  * @param[in] offa       Offset in number of elements for the first element in matrix \b A.
9354  * @param[in] lda        Leading dimension of matrix \b A. It cannot be
9355  *                       less than \b K if \b A is
9356  *                       in the row-major format, and less than \b N
9357  *                       otherwise.
9358  * @param[in] beta       The factor of the matrix \b C.
9359  * @param[out] C         Buffer object storing matrix \b C.
9360  * @param[in] offc       Offset in number of elements for the first element in matrix \b C.
9361  * @param[in] ldc        Leading dimension of matric \b C. It cannot be less
9362  *                       than \b N.
9363  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
9364  *                                task is to be performed.
9365  * @param[in] commandQueues       OpenCL command queues.
9366  * @param[in] numEventsInWaitList Number of events in the event wait list.
9367  * @param[in] eventWaitList       Event wait list.
9368  * @param[in] events     Event objects per each command queue that identify
9369  *                       a particular kernel execution instance.
9370  *
9371  * @return
9372  *   - \b clblasSuccess on success;
9373  *   - \b clblasNotInitialized if clblasSetup() was not called;
9374  *   - \b clblasInvalidValue if invalid parameters are passed:
9375  *     - either \b N or \b K is zero, or
9376  *     - any of the leading dimensions is invalid;
9377  *     - the matrix sizes lead to accessing outsize of any of the buffers;
9378  *   - \b clblasInvalidMemObject if either \b A or \b C object is
9379  *     invalid, or an image object rather than the buffer one;
9380  *   - \b clblasOutOfHostMemory if the library can't allocate memory for
9381  *     internal structures;
9382  *   - \b clblasInvalidCommandQueue if the passed command queue is invalid;
9383  *   - \b clblasInvalidContext if a context a passed command queue belongs to
9384  *     was released.
9385  *
9386  * @ingroup HERK
9387  */
9388 clblasStatus
9389 clblasCherk(
9390     clblasOrder order,
9391     clblasUplo uplo,
9392     clblasTranspose transA,
9393     size_t N,
9394     size_t K,
9395     float alpha,
9396     const cl_mem A,
9397     size_t offa,
9398     size_t lda,
9399     float beta,
9400     cl_mem C,
9401     size_t offc,
9402     size_t ldc,
9403     cl_uint numCommandQueues,
9404     cl_command_queue *commandQueues,
9405     cl_uint numEventsInWaitList,
9406     const cl_event *eventWaitList,
9407     cl_event *events);
9408 /**
9409  * @example example_cherk.cpp
9410  * This is an example of how to use the @ref clblasCherk function.
9411  */
9412 
9413 
9414 /**
9415  * @brief Rank-k update of a hermitian matrix with double-complex elements.
9416  *
9417  * Rank-k updates:
9418  *   - \f$ C \leftarrow \alpha A A^H + \beta C \f$
9419  *   - \f$ C \leftarrow \alpha A^H A + \beta C \f$
9420  *
9421  * where \b C is a hermitian matrix.
9422  *
9423  * @param[in] order      Row/column order.
9424  * @param[in] uplo       The triangle in matrix \b C being referenced.
9425  * @param[in] transA     How matrix \b A is to be transposed.
9426  * @param[in] N          Number of rows and columns in matrix \b C.
9427  * @param[in] K          Number of columns of the matrix \b A if it is not
9428  *                       transposed, and number of rows otherwise.
9429  * @param[in] alpha      The factor of matrix \b A.
9430  * @param[in] A          Buffer object storing the matrix \b A.
9431  * @param[in] offa       Offset in number of elements for the first element in matrix \b A.
9432  * @param[in] lda        Leading dimension of matrix \b A. It cannot be
9433  *                       less than \b K if \b A is
9434  *                       in the row-major format, and less than \b N
9435  *                       otherwise.
9436  * @param[in] beta       The factor of the matrix \b C.
9437  * @param[out] C         Buffer object storing matrix \b C.
9438  * @param[in] offc       Offset in number of elements for the first element in matrix \b C.
9439  * @param[in] ldc        Leading dimension of matric \b C. It cannot be less
9440  *                       than \b N.
9441  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
9442  *                                task is to be performed.
9443  * @param[in] commandQueues       OpenCL command queues.
9444  * @param[in] numEventsInWaitList Number of events in the event wait list.
9445  * @param[in] eventWaitList       Event wait list.
9446  * @param[in] events     Event objects per each command queue that identify
9447  *                       a particular kernel execution instance.
9448  *
9449  * @return
9450  *   - \b clblasSuccess on success;
9451  *   - \b clblasInvalidDevice if a target device does not support floating
9452  *     point arithmetic with double precision;
9453  *   - the same error codes as the clblasCherk() function otherwise.
9454  *
9455  * @ingroup HERK
9456  */
9457 clblasStatus
9458 clblasZherk(
9459     clblasOrder order,
9460     clblasUplo uplo,
9461     clblasTranspose transA,
9462     size_t N,
9463     size_t K,
9464     double alpha,
9465     const cl_mem A,
9466     size_t offa,
9467     size_t lda,
9468     double beta,
9469     cl_mem C,
9470     size_t offc,
9471     size_t ldc,
9472     cl_uint numCommandQueues,
9473     cl_command_queue *commandQueues,
9474     cl_uint numEventsInWaitList,
9475     const cl_event *eventWaitList,
9476     cl_event *events);
9477 /*@}*/
9478 
9479 
9480 /**
9481  * @defgroup HER2K HER2K  - Hermitian rank-2k update to a matrix
9482  * @ingroup BLAS3
9483  */
9484 /*@{*/
9485 
9486 /**
9487  * @brief Rank-2k update of a hermitian matrix with float-complex elements.
9488  *
9489  * Rank-k updates:
9490  *   - \f$ C \leftarrow \alpha A B^H + conj( \alpha ) B A^H + \beta C \f$
9491  *   - \f$ C \leftarrow \alpha A^H B + conj( \alpha ) B^H A + \beta C \f$
9492  *
9493  * where \b C is a hermitian matrix.
9494  *
9495  * @param[in] order      Row/column order.
9496  * @param[in] uplo       The triangle in matrix \b C being referenced.
9497  * @param[in] trans      How matrix \b A is to be transposed.
9498  * @param[in] N          Number of rows and columns in matrix \b C.
9499  * @param[in] K          Number of columns of the matrix \b A if it is not
9500  *                       transposed, and number of rows otherwise.
9501  * @param[in] alpha      The factor of matrix \b A.
9502  * @param[in] A          Buffer object storing the matrix \b A.
9503  * @param[in] offa       Offset in number of elements for the first element in matrix \b A.
9504  * @param[in] lda        Leading dimension of matrix \b A. It cannot be
9505  *                       less than \b K if \b A is
9506  *                       in the row-major format, and less than \b N
9507  *                       otherwise. Vice-versa for transpose case.
9508  * @param[in] B          Buffer object storing the matrix \b B.
9509  * @param[in] offb       Offset in number of elements for the first element in matrix \b B.
9510  * @param[in] ldb        Leading dimension of matrix \b B. It cannot be
9511  *                       less than \b K if \b B is
9512  *                       in the row-major format, and less than \b N
9513  *                       otherwise. Vice-versa for transpose case
9514  * @param[in] beta       The factor of the matrix \b C.
9515  * @param[out] C         Buffer object storing matrix \b C.
9516  * @param[in] offc       Offset in number of elements for the first element in matrix \b C.
9517  * @param[in] ldc        Leading dimension of matric \b C. It cannot be less
9518  *                       than \b N.
9519  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
9520  *                                task is to be performed.
9521  * @param[in] commandQueues       OpenCL command queues.
9522  * @param[in] numEventsInWaitList Number of events in the event wait list.
9523  * @param[in] eventWaitList       Event wait list.
9524  * @param[in] events     Event objects per each command queue that identify
9525  *                       a particular kernel execution instance.
9526  *
9527  * @return
9528  *   - \b clblasSuccess on success;
9529  *   - \b clblasNotInitialized if clblasSetup() was not called;
9530  *   - \b clblasInvalidValue if invalid parameters are passed:
9531  *     - either \b N or \b K is zero, or
9532  *     - any of the leading dimensions is invalid;
9533  *     - the matrix sizes lead to accessing outsize of any of the buffers;
9534  *   - \b clblasInvalidMemObject if either \b A , \b B or \b C object is
9535  *     invalid, or an image object rather than the buffer one;
9536  *   - \b clblasOutOfHostMemory if the library can't allocate memory for
9537  *     internal structures;
9538  *   - \b clblasInvalidCommandQueue if the passed command queue is invalid;
9539  *   - \b clblasInvalidContext if a context a passed command queue belongs to
9540  *     was released.
9541  *
9542  * @ingroup HER2K
9543  */
9544 clblasStatus
9545 clblasCher2k(
9546     clblasOrder order,
9547     clblasUplo uplo,
9548     clblasTranspose trans,
9549     size_t N,
9550     size_t K,
9551     FloatComplex alpha,
9552     const cl_mem A,
9553     size_t offa,
9554     size_t lda,
9555     const cl_mem B,
9556     size_t offb,
9557     size_t ldb,
9558     cl_float beta,
9559     cl_mem C,
9560     size_t offc,
9561     size_t ldc,
9562     cl_uint numCommandQueues,
9563     cl_command_queue *commandQueues,
9564     cl_uint numEventsInWaitList,
9565     const cl_event *eventWaitList,
9566     cl_event *events);
9567 /**
9568  * @example example_cher2k.c
9569  * This is an example of how to use the @ref clblasCher2k function.
9570  */
9571 
9572 
9573 /**
9574  * @brief Rank-2k update of a hermitian matrix with double-complex elements.
9575  *
9576  * Rank-k updates:
9577  *   - \f$ C \leftarrow \alpha A B^H + conj( \alpha ) B A^H + \beta C \f$
9578  *   - \f$ C \leftarrow \alpha A^H B + conj( \alpha ) B^H A + \beta C \f$
9579  *
9580  * where \b C is a hermitian matrix.
9581  *
9582  * @param[in] order      Row/column order.
9583  * @param[in] uplo       The triangle in matrix \b C being referenced.
9584  * @param[in] trans      How matrix \b A is to be transposed.
9585  * @param[in] N          Number of rows and columns in matrix \b C.
9586  * @param[in] K          Number of columns of the matrix \b A if it is not
9587  *                       transposed, and number of rows otherwise.
9588  * @param[in] alpha      The factor of matrix \b A.
9589  * @param[in] A          Buffer object storing the matrix \b A.
9590  * @param[in] offa       Offset in number of elements for the first element in matrix \b A.
9591  * @param[in] lda        Leading dimension of matrix \b A. It cannot be
9592  *                       less than \b K if \b A is
9593  *                       in the row-major format, and less than \b N
9594  *                       otherwise. Vice-versa for transpose case.
9595  * @param[in] B          Buffer object storing the matrix \b B.
9596  * @param[in] offb       Offset in number of elements for the first element in matrix \b B.
9597  * @param[in] ldb        Leading dimension of matrix \b B. It cannot be
9598  *                       less than \b K if B is
9599  *                       in the row-major format, and less than \b N
9600  *                       otherwise. Vice-versa for transpose case.
9601  * @param[in] beta       The factor of the matrix \b C.
9602  * @param[out] C         Buffer object storing matrix \b C.
9603  * @param[in] offc       Offset in number of elements for the first element in matrix \b C.
9604  * @param[in] ldc        Leading dimension of matric \b C. It cannot be less
9605  *                       than \b N.
9606  * @param[in] numCommandQueues    Number of OpenCL command queues in which the
9607  *                                task is to be performed.
9608  * @param[in] commandQueues       OpenCL command queues.
9609  * @param[in] numEventsInWaitList Number of events in the event wait list.
9610  * @param[in] eventWaitList       Event wait list.
9611  * @param[in] events     Event objects per each command queue that identify
9612  *                       a particular kernel execution instance.
9613  *
9614  * @return
9615  *   - \b clblasSuccess on success;
9616  *   - \b clblasInvalidDevice if a target device does not support floating
9617  *     point arithmetic with double precision;
9618  *   - the same error codes as the clblasCher2k() function otherwise.
9619  *
9620  * @ingroup HER2K
9621  */
9622 clblasStatus
9623 clblasZher2k(
9624     clblasOrder order,
9625     clblasUplo uplo,
9626     clblasTranspose trans,
9627     size_t N,
9628     size_t K,
9629     DoubleComplex alpha,
9630     const cl_mem A,
9631     size_t offa,
9632     size_t lda,
9633     const cl_mem B,
9634     size_t offb,
9635     size_t ldb,
9636     cl_double beta,
9637     cl_mem C,
9638     size_t offc,
9639     size_t ldc,
9640     cl_uint numCommandQueues,
9641     cl_command_queue *commandQueues,
9642     cl_uint numEventsInWaitList,
9643     const cl_event *eventWaitList,
9644     cl_event *events);
9645 /*@}*/
9646 
9647 /**
9648  * @brief Helper function to compute leading dimension and size of a matrix
9649  *
9650  * @param[in] order	matrix ordering
9651  * @param[in] rows	number of rows
9652  * @param[in] columns	number of column
9653  * @param[in] elemsize	element size
9654  * @param[in] padding	additional padding on the leading dimension
9655  * @param[out] ld	if non-NULL *ld is filled with the leading dimension
9656  *			in elements
9657  * @param[out] fullsize	if non-NULL *fullsize is filled with the byte size
9658  *
9659  * @return
9660  *   - \b clblasSuccess for success
9661  *   - \b clblasInvalidValue if:
9662  *	 - \b elementsize is 0
9663  *	 - \b row and \b colums are both equal to 0
9664  */
9665 clblasStatus clblasMatrixSizeInfo(
9666 	clblasOrder order,
9667 	size_t rows,
9668 	size_t columns,
9669 	size_t elemsize,
9670 	size_t padding,
9671 	size_t * ld,
9672 	size_t * fullsize);
9673 
9674 /**
9675  * @brief Allocates matrix on device and computes ld and size
9676  *
9677  * @param[in] context	OpenCL context
9678  * @param[in] order	Row/column order.
9679  * @param[in] rows	number of rows
9680  * @param[in] columns	number of columns
9681  * @param[in] elemsize	element size
9682  * @param[in] padding	additional padding on the leading dimension
9683  * @param[out] ld	if non-NULL *ld is filled with the leading dimension
9684  *			in elements
9685  * @param[out] fullsize	if non-NULL *fullsize is filled with the byte size
9686  * @param[in] err	Error code (see \b clCreateBuffer() )
9687  *
9688  * @return
9689  *   - OpenCL memory object of the allocated matrix
9690  */
9691 cl_mem clblasCreateMatrix(
9692 	cl_context context,
9693 	clblasOrder order,
9694 	size_t rows,
9695 	size_t columns,
9696 	size_t elemsize,
9697 	size_t padding,
9698 	size_t * ld,
9699 	size_t * fullsize,
9700 	cl_int * err);
9701 
9702 
9703 /**
9704  * @brief Allocates matrix on device with specified size and ld and computes its size
9705  *
9706  * @param[in] context	OpenCL context
9707  * @param[in] order	Row/column order.
9708  * @param[in] rows	number of rows
9709  * @param[in] columns	number of columns
9710  * @param[in] elemsize	element size
9711  * @param[in] padding	additional padding on the leading dimension
9712  * @param[out] ld	the length of the leading dimensions. It cannot
9713  *                      be less than \b columns when the \b order parameter is set to
9714  *                      \b clblasRowMajor,\n or less than \b rows when the
9715  *                      parameter is set to \b clblasColumnMajor.
9716  * @param[out] fullsize	if non-NULL *fullsize is filled with the byte size
9717  * @param[in] err	Error code (see \b clCreateBuffer() )
9718  *
9719  * @return
9720  *   - OpenCL memory object of the allocated matrix
9721  */
9722 cl_mem clblasCreateMatrixWithLd( cl_context context,
9723                                  clblasOrder order,
9724                                  size_t rows,
9725                                  size_t columns,
9726                                  size_t elemsize,
9727                                  size_t ld,
9728                                  size_t * fullsize,
9729                                  cl_int * err) ;
9730 
9731 
9732 /**
9733  * @brief Allocates matrix on device and initialize from existing similar matrix
9734  *	  on host. See \b clblasCreateMatrixBuffer().
9735  *
9736  * @param[in] ld	leading dimension in elements
9737  * @param[in] host 	base address of host matrix data
9738  * @param[in] off_host 	host matrix offset in elements
9739  * @param[in] ld_host 	leading dimension of host matrix in elements
9740  * @param[in] command_queue 		specifies the OpenCL queue
9741  * @param[in] numEventsInWaitList 	specifies the number of OpenCL events
9742  *	   	    		        to wait for
9743  * @param[in] eventWaitList 		specifies the list of OpenCL events to
9744  *					wait for
9745  *
9746  * @return
9747  *   - OpenCL memory object of the allocated matrix
9748  */
9749 cl_mem clblasCreateMatrixFromHost(
9750 	cl_context context,
9751 	clblasOrder order,
9752 	size_t rows,
9753 	size_t columns,
9754 	size_t elemsize,
9755 	size_t ld,
9756 	void * host,
9757 	size_t off_host,
9758 	size_t ld_host,
9759   cl_command_queue command_queue,
9760 	cl_uint numEventsInWaitList,
9761 	const cl_event *eventWaitList,
9762 	cl_int * err);
9763 
9764 /**
9765  * @brief Copies synchronously a sub-matrix from host (A) to device (B).
9766  *
9767  * @param[in] order			matrix ordering
9768  * @param[in] element_size		element size
9769  * @param[in] A				specifies the source matrix on the host
9770  * @param[in] offA			specifies the offset of matrix A in
9771  *					elements
9772  * @param[in] ldA			specifies the leading dimension of
9773  * 					matrix A in elements
9774  * @param[in] nrA			specifies the number of rows of A
9775  *					in elements
9776  * @param[in] ncA			specifies the number of columns of A
9777  *					in elements
9778  * @param[in] xA			specifies the top-left x position to
9779  * 					copy from A
9780  * @param[in] yA			specifies the top-left y position to
9781  * 					copy from A
9782  * @param[in] B				specifies the destination matrix on the
9783  *					device
9784  * @param[in] offB			specifies the offset of matrix B in
9785  *					elements
9786  * @param[in] ldB 			specifies the leading dimension of
9787  * 					matrix B in bytes
9788  * @param[in] nrB 			specifies the number of rows of B
9789  *					in elements
9790  * @param[in] ncB 			specifies the number of columns of B
9791  *					in elements
9792  * @param[in] xB 			specifies the top-left x position to
9793  *					copy from B
9794  * @param[in] yB 			specifies the top-left y position to
9795  *					copy from B
9796  * @param[in] nx 			specifies the number of elements to
9797  *					copy according to the x dimension (rows)
9798  * @param[in] ny 			specifies the number of elements to
9799  *					copy according to the y dimension
9800  *					(columns)
9801  * @param[in] command_queue 		specifies the OpenCL queue
9802  * @param[in] numEventsInWaitList 	specifies the number of OpenCL events
9803  *	   	    		        to wait for
9804  * @param[in] eventWaitList 		specifies the list of OpenCL events to
9805  *					wait for
9806  *
9807  * @return
9808  *   - \b clblasSuccess for success
9809  *   - \b clblasInvalidValue if:
9810  *	- \b xA + \b offA + \b nx is superior to number of columns of A
9811  *      - \b xB + \b offB + \b nx is superior to number of columns of B
9812  *      - \b yA + \b ny is superior to number of rows of A
9813  *      - \b yB + \b ny is superior to number of rows of B
9814  */
9815 clblasStatus clblasWriteSubMatrix(
9816 	clblasOrder order,
9817 	size_t element_size,
9818 	const void *A, size_t offA, size_t ldA,
9819 	size_t nrA, size_t ncA,
9820 	size_t xA, size_t yA,
9821 	cl_mem B, size_t offB, size_t ldB,
9822 	size_t nrB, size_t ncB,
9823 	size_t xB, size_t yB,
9824 	size_t nx, size_t ny,
9825 	cl_command_queue command_queue,
9826 	cl_uint numEventsInWaitList,
9827 	const cl_event *eventWaitList);
9828 
9829 /**
9830  * @brief Copies asynchronously a sub-matrix from host (A) to device (B).
9831  *	  See \b clblasWriteSubMatrix().
9832  *
9833  * @param[out] event 	Event objects per each command queue that identify a
9834  *			particular kernel execution instance.
9835  */
9836 clblasStatus clblasWriteSubMatrixAsync(
9837 	clblasOrder order,
9838 	size_t element_size,
9839 	const void *A, size_t offA, size_t ldA,
9840 	size_t nrA, size_t ncA,
9841 	size_t xA, size_t yA,
9842 	cl_mem B, size_t offB, size_t ldB,
9843 	size_t nrB, size_t ncB,
9844 	size_t xB, size_t yB,
9845 	size_t nx, size_t ny,
9846 	cl_command_queue command_queue,
9847 	cl_uint numEventsInWaitList,
9848 	const cl_event *eventWaitList,
9849 	cl_event *event);
9850 
9851 /**
9852  * @brief Copies a sub-matrix from device (A) to host (B).
9853  *	  See \b clblasWriteSubMatrix().
9854  *
9855  * @param[in] A		specifies the source matrix on the device
9856  * @param[in] B		specifies the destination matrix on the host
9857  *
9858  * @return
9859  *   - see \b clblasWriteSubMatrix()
9860  */
9861 clblasStatus clblasReadSubMatrix(
9862 	clblasOrder order,
9863 	size_t element_size,
9864 	const cl_mem A, size_t offA, size_t ldA,
9865 	size_t nrA, size_t ncA,
9866 	size_t xA, size_t yA,
9867 	void *B, size_t offB, size_t ldB,
9868 	size_t nrB, size_t ncB,
9869 	size_t xB, size_t yB,
9870 	size_t nx, size_t ny,
9871 	cl_command_queue command_queue,
9872 	cl_uint numEventsInWaitList,
9873 	const cl_event *eventWaitList);
9874 
9875 /**
9876  * @brief Copies asynchronously a sub-matrix from device (A) to host (B).
9877  * 	  See \b clblasReadSubMatrix() and \b clblasWriteSubMatrixAsync().
9878  */
9879 clblasStatus clblasReadSubMatrixAsync(
9880 	clblasOrder order,
9881 	size_t element_size,
9882 	const cl_mem A, size_t offA, size_t ldA,
9883 	size_t nrA, size_t ncA,
9884 	size_t xA, size_t yA,
9885 	void *B, size_t offB, size_t ldB,
9886 	size_t nrB, size_t ncB,
9887 	size_t xB, size_t yB,
9888 	size_t nx, size_t ny,
9889 	cl_command_queue command_queue,
9890 	cl_uint numEventsInWaitList,
9891 	const cl_event *eventWaitList,
9892 	cl_event *event);
9893 
9894 /**
9895  * @brief Copies a sub-matrix from device (A) to device (B).
9896  *	  See \b clblasWriteSubMatrix().
9897  *
9898  * @param[in] A		specifies the source matrix on the device
9899  * @param[in] B		specifies the destination matrix on the device
9900  *
9901  * @return
9902  *   - see \b clblasWriteSubMatrix()
9903  */
9904 clblasStatus clblasCopySubMatrix(
9905 	clblasOrder order,
9906 	size_t element_size,
9907 	const cl_mem A, size_t offA, size_t ldA,
9908 	size_t nrA, size_t ncA,
9909 	size_t xA, size_t yA,
9910 	cl_mem B, size_t offB, size_t ldB,
9911 	size_t nrB, size_t ncB,
9912 	size_t xB, size_t yB,
9913 	size_t nx, size_t ny,
9914 	cl_command_queue command_queue,
9915 	cl_uint numEventsInWaitList,
9916 	const cl_event *eventWaitList);
9917 
9918 /**
9919  * @brief Copies asynchronously a sub-matrix from device (A) to device (B).
9920  *        See \b clblasCopySubMatrix() and \b clblasWriteSubMatrixAsync().
9921  */
9922 clblasStatus clblasCopySubMatrixAsync(
9923 	clblasOrder order,
9924 	size_t element_size,
9925 	const cl_mem A, size_t offA, size_t ldA,
9926 	size_t nrA, size_t ncA,
9927 	size_t xA, size_t yA,
9928 	cl_mem B, size_t offB, size_t ldB,
9929 	size_t nrB, size_t ncB,
9930 	size_t xB, size_t yB,
9931 	size_t nx, size_t ny,
9932 	cl_command_queue command_queue,
9933 	cl_uint numEventsInWaitList,
9934 	const cl_event *eventWaitList,
9935 	cl_event *event);
9936 
9937 /**
9938  * @brief Copies synchronously a vector from host (A) to device (B).
9939  *	  See \b clblasWriteSubMatrix().
9940  *
9941  * @param[in] A		specifies the source vector on the host
9942  * @param[in] B		specifies the destination vector on the device
9943  *
9944  * @return
9945  *   - see \b clblasWriteSubMatrix()
9946  */
9947 clblasStatus clblasWriteVector(
9948 	size_t nb_elem,
9949 	size_t element_size,
9950 	const void *A, size_t offA,
9951 	cl_mem B, size_t offB,
9952 	cl_command_queue command_queue,
9953 	cl_uint numEventsInWaitList,
9954 	const cl_event *eventWaitList);
9955 
9956 /**
9957  * @brief Copies asynchronously a vector from host (A) to device (B).
9958  * 	  See \b clblasWriteVector() and \b clblasWriteSubMatrixAsync().
9959  */
9960 clblasStatus clblasWriteVectorAsync(
9961 	size_t nb_elem,
9962 	size_t element_size,
9963 	const void *A, size_t offA,
9964 	cl_mem B, size_t offB,
9965 	cl_command_queue command_queue,
9966 	cl_uint numEventsInWaitList,
9967 	const cl_event *eventWaitList,
9968 	cl_event *events);
9969 
9970 /**
9971  * @brief Copies synchronously a vector from device (A) to host (B).
9972  *	  See \b clblasReadSubMatrix().
9973  *
9974  * @param[in] A		specifies the source vector on the device
9975  * @param[in] B		specifies the destination vector on the host
9976  *
9977  * @return
9978  *   - see \b clblasReadSubMatrix()
9979  */
9980 clblasStatus clblasReadVector(
9981 	size_t nb_elem,
9982 	size_t element_size,
9983 	const cl_mem A, size_t offA,
9984 	void * B, size_t offB,
9985 	cl_command_queue command_queue,
9986 	cl_uint numEventsInWaitList,
9987 	const cl_event *eventWaitList);
9988 
9989 /**
9990  * @brief Copies asynchronously a vector from device (A) to host (B).
9991  * 	  See \b clblasReadVector() and \b clblasWriteSubMatrixAsync().
9992  */
9993 clblasStatus clblasReadVectorAsync(
9994 	size_t nb_elem,
9995 	size_t element_size,
9996 	const cl_mem A, size_t offA,
9997 	void * B, size_t offB,
9998 	cl_command_queue command_queue,
9999 	cl_uint numEventsInWaitList,
10000 	const cl_event *eventWaitList,
10001 	cl_event *events);
10002 
10003 /**
10004  * @brief Copies synchronously a vector from device (A) to device (B).
10005  *	  See \b clblasCopySubMatrix().
10006  *
10007  * @param[in] A		specifies the source vector on the device
10008  * @param[in] B		specifies the destination vector on the device
10009  *
10010  * @return
10011  *   - see \b clblasCopySubMatrix()
10012  */
10013 clblasStatus clblasCopyVector(
10014 	size_t nb_elem,
10015 	size_t element_size,
10016 	const cl_mem A, size_t offA,
10017 	cl_mem B, size_t offB,
10018 	cl_command_queue command_queue,
10019 	cl_uint numEventsInWaitList,
10020 	const cl_event *eventWaitList);
10021 
10022 /**
10023  * @brief Copies asynchronously a vector from device (A) to device (B).
10024  * 	  See \b clblasCopyVector() and \b clblasWriteSubMatrixAsync().
10025  */
10026 clblasStatus clblasCopyVectorAsync(
10027 	size_t nb_elem,
10028 	size_t element_size,
10029 	const cl_mem A, size_t offA,
10030 	cl_mem B, size_t offB,
10031 	cl_command_queue command_queue,
10032 	cl_uint numEventsInWaitList,
10033 	const cl_event *eventWaitList,
10034 	cl_event *events);
10035 
10036 /**
10037  * @brief Copies synchronously a whole matrix from host (A) to device (B).
10038  *        See \b clblasWriteSubMatrix().
10039  *
10040  * @param[in] A		specifies the source matrix on the host
10041  * @param[in] B		specifies the destination matrix on the device
10042  *
10043  * @return
10044  *   - see \b clblasWriteSubMatrix()
10045  */
10046 clblasStatus clblasWriteMatrix(
10047 	clblasOrder order,
10048 	size_t sx, size_t sy,
10049 	size_t element_size,
10050 	const void *A, size_t offA, size_t ldA,
10051 	cl_mem B, size_t offB, size_t ldB,
10052 	cl_command_queue command_queue,
10053 	cl_uint numEventsInWaitList,
10054 	const cl_event *eventWaitList);
10055 
10056 /**
10057  * @brief Copies asynchronously a vector from host (A) to device (B).
10058  *        See \b clblasWriteMatrix() and \b clblasWriteSubMatrixAsync().
10059  */
10060 clblasStatus clblasWriteMatrixAsync(
10061 	clblasOrder order,
10062 	size_t sx, size_t sy,
10063 	size_t element_size,
10064 	const void *A, size_t offA, size_t ldA,
10065 	cl_mem B, size_t offB, size_t ldB,
10066 	cl_command_queue command_queue,
10067 	cl_uint numEventsInWaitList,
10068 	const cl_event *eventWaitList,
10069 	cl_event *events);
10070 
10071 /**
10072  * @brief Copies synchronously a whole matrix from device (A) to host (B).
10073  *	  See \b clblasReadSubMatrix().
10074  *
10075  * @param[in] A		specifies the source vector on the device
10076  * @param[in] B		specifies the destination vector on the host
10077  *
10078  * @return
10079  *   - see \b clblasReadSubMatrix()
10080  */
10081 clblasStatus clblasReadMatrix(
10082 	clblasOrder order,
10083 	size_t sx, size_t sy,
10084 	size_t element_size,
10085 	const cl_mem A, size_t offA, size_t ldA,
10086 	void * B, size_t offB, size_t ldB,
10087 	cl_command_queue command_queue,
10088 	cl_uint numEventsInWaitList,
10089 	const cl_event *eventWaitList);
10090 
10091 /**
10092  * @brief Copies asynchronously a vector from device (A) to host (B).
10093  *        See \b clblasReadMatrix() and \b clblasWriteSubMatrixAsync().
10094  */
10095 clblasStatus clblasReadMatrixAsync(
10096 	clblasOrder order,
10097 	size_t sx, size_t sy,
10098 	size_t element_size,
10099 	const cl_mem A, size_t offA, size_t ldA,
10100 	void * B, size_t offB, size_t ldB,
10101 	cl_command_queue command_queue,
10102 	cl_uint numEventsInWaitList,
10103 	const cl_event *eventWaitList,
10104 	cl_event *events);
10105 
10106 /**
10107  * @brief Copies synchronously a whole matrix from device (A) to device (B).
10108  *	  See \b clblasCopySubMatrix().
10109  *
10110  * @param[in] A		specifies the source matrix on the device
10111  * @param[in] B		specifies the destination matrix on the device
10112  *
10113  * @return
10114  *   - see \b clblasCopySubMatrix()
10115  */
10116 clblasStatus clblasCopyMatrix(
10117 	clblasOrder order,
10118 	size_t sx, size_t sy,
10119 	size_t element_size,
10120 	const cl_mem A, size_t offA, size_t ldA,
10121 	cl_mem B, size_t offB, size_t ldB,
10122 	cl_command_queue command_queue,
10123 	cl_uint numEventsInWaitList,
10124 	const cl_event *eventWaitList);
10125 
10126 /**
10127  * @brief Copies asynchronously a vector from device (A) to device (B).
10128  *        See \b clblasCopyMatrix() and \b clblasWriteSubMatrixAsync().
10129  */
10130 clblasStatus clblasCopyMatrixAsync(
10131 	clblasOrder order,
10132 	size_t sx, size_t sy,
10133 	size_t element_size,
10134 	const cl_mem A, size_t offA, size_t ldA,
10135 	cl_mem B, size_t offB, size_t ldB,
10136 	cl_command_queue command_queue,
10137 	cl_uint numEventsInWaitList,
10138 	const cl_event *eventWaitList,
10139 	cl_event *events);
10140 
10141 /**
10142  * @brief Fill synchronously a vector with a pattern of a size element_size_bytes
10143  *
10144  * @param[in] nb_elem             specifies the number of element in buffer A
10145  * @param[in] element_size        specifies the size of one element of A. Supported sizes correspond
10146  *                                element size used in clBLAS (1,2,4,8,16)
10147  * @param[in] A		          specifies the source vector on the device
10148  * @param[in] offA                specifies the offset of matrix A in
10149  *				  elements
10150  * @param[in] pattern             specifies the host address of the pattern to fill with (element_size_bytes)
10151  * @param[in] command_queue 	  specifies the OpenCL queue
10152  * @param[in] numEventsInWaitList specifies the number of OpenCL events
10153  *	   	    		  to wait for
10154  * @param[in] eventWaitList 	  specifies the list of OpenCL events to
10155  *				  wait for
10156  * @return
10157  *   - see \b clblasWriteSubMatrix()
10158  */
10159 clblasStatus clblasFillVector(
10160      size_t nb_elem,
10161      size_t element_size,
10162      cl_mem A, size_t offA,
10163      const void * host,
10164      cl_command_queue command_queue,
10165      cl_uint numEventsInWaitList,
10166      const cl_event *eventWaitList);
10167 
10168 /**
10169  * @brief Fill asynchronously a vector with a pattern of a size element_size_bytes
10170  *	  See \b clblasFillVector().
10171  */
10172 clblasStatus clblasFillVectorAsync(
10173      size_t nb_elem,
10174      size_t element_size,
10175      cl_mem A, size_t offA,
10176      const void * pattern,
10177      cl_command_queue command_queue,
10178      cl_uint numEventsInWaitList,
10179      const cl_event *eventWaitList,
10180      cl_event *event);
10181 
10182 /**
10183  * @brief Fill synchronously a matrix with a pattern of a size element_size_bytes
10184  *
10185  * @param[in] order               specifies the matrix order
10186  * @param[in] element_size        specifies the size of one element of A. Supported sizes correspond
10187  *                                element size used in clBLAS (1,2,4,8,16)
10188  * @param[in] A		          specifies the source vector on the device
10189  * @param[in] offA                specifies the offset of matrix A in
10190  * @param[in] ldA                 specifies the leading dimension of A
10191  * @param[in] nrA                 specifies the number of row in A
10192  * @param[in] ncA                 specifies the number of column in A
10193  * @param[in] pattern             specifies the host address of the pattern to fill with (element_size_bytes)
10194  * @param[in] command_queue 	  specifies the OpenCL queue
10195  * @param[in] numEventsInWaitList specifies the number of OpenCL events to wait for
10196  * @param[in] eventWaitList 	  specifies the list of OpenCL events to wait for
10197  * @return
10198  *   - see \b clblasWriteSubMatrix()
10199  */
10200 clblasStatus clblasFillMatrix(
10201      clblasOrder order,
10202      size_t element_size,
10203      cl_mem A, size_t offA, size_t ldA,
10204      size_t nrA, size_t ncA,
10205      const void *pattern,
10206      cl_command_queue command_queue,
10207      cl_uint numEventsInWaitList,
10208      const cl_event *eventWaitList);
10209 
10210 
10211 /**
10212  * @brief Partially fill a sub-matrix with a pattern of a size element_size_bytes
10213  *
10214  *
10215  * @param[in] order               specifies the matrix order
10216  * @param[in] element_size        specifies the size of one element of A. Supported values
10217  *                                are to element sizes used in clBLAS - that is 1, 2, 4, 8 or 16
10218  * @param[in] offA                specifies the offset of matrix A in elements
10219  * @param[in] ldA                 specifies the leading dimension of A in elements
10220  * @param[in] nrA		  specifies the number of rows of A
10221  *				  in elements
10222  * @param[in] ncA		  specifies the number of columns of A
10223  *				  in elements
10224  * @param[in] xA		  specifies the top-left x position to
10225  * 				  copy from A
10226  * @param[in] yA		  specifies the top-left y position to
10227  * 				  copy from A
10228  * @param[in] nx 		  specifies the number of elements to
10229  *				  copy according to the x dimension (rows)
10230  * @param[in] ny 		  specifies the number of elements to
10231  *				  copy according to the y dimension
10232  *				  (columns)
10233  * @param[in] pattern             specifies the host address of the pattern to fill with (element_size_bytes)
10234  * @param[in] command_queue 	  specifies the OpenCL queue
10235  * @param[in] numEventsInWaitList specifies the number of OpenCL events to wait for
10236  * @param[in] eventWaitList 	  specifies the list of OpenCL events to wait for
10237  * @return
10238  *   - see \b clblasWriteSubMatrix()
10239  */
10240 
10241 clblasStatus clblasFillSubMatrix(
10242      clblasOrder order,
10243      size_t element_size,
10244      cl_mem A, size_t offA, size_t ldA,
10245      size_t nrA, size_t ncA,
10246      size_t xA, size_t yA,
10247      size_t nx, size_t ny,
10248      const void *pattern,
10249      cl_command_queue command_queue,
10250      cl_uint numEventsInWaitList,
10251      const cl_event *eventWaitList);
10252 
10253 /**
10254  * @brief Asynchronous asynchronously fill a sub-matrix with a pattern of a size element_size_bytes
10255  *	  See \b clblasFillSubMatrix().
10256  */
10257 clblasStatus clblasFillSubMatrixAsync(
10258      clblasOrder order,
10259      size_t element_size,
10260      cl_mem A, size_t offA, size_t ldA,
10261      size_t sxA, size_t syA,
10262      int xA, int yA,
10263      size_t nx, size_t ny,
10264      const void *host,
10265      cl_command_queue command_queue,
10266      cl_uint numEventsInWaitList,
10267      const cl_event *eventWaitList,
10268      cl_event *event);
10269 
10270 
10271 
10272 #ifdef __cplusplus
10273 }      /* extern "C" { */
10274 #endif
10275 
10276 #endif /* CLBLAS_H_ */
10277