/* ************************************************************************
* Copyright 2013 Advanced Micro Devices, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ************************************************************************/
#ifndef CLBLAS_H_
#define CLBLAS_H_
/**
* @mainpage OpenCL BLAS
*
* This is an implementation of
*
* Basic Linear Algebra Subprograms, levels 1, 2 and 3 using
* OpenCL and optimized for
* the AMD GPU hardware.
*/
#if defined(__APPLE__) || defined(__MACOSX)
#include
#else
#include
#endif
#include
#ifdef __cplusplus
extern "C" {
#endif
/**
* @defgroup OVERVIEW Overview
*
* This library provides an implementation of the Basic Linear Algebra Subprograms levels 1, 2 and 3,
* using OpenCL and optimized for AMD GPU hardware. It provides BLAS-1 functions
* SWAP, SCAL, COPY, AXPY, DOT, DOTU, DOTC, ROTG, ROTMG, ROT, ROTM, iAMAX, ASUM and NRM2,
* BLAS-2 functions GEMV, SYMV, TRMV, TRSV, HEMV, SYR, SYR2, HER, HER2, GER, GERU, GERC,
* TPMV, SPMV, HPMV, TPSV, SPR, SPR2, HPR, HPR2, GBMV, TBMV, SBMV, HBMV and TBSV
* and BLAS-3 functions GEMM, SYMM, TRMM, TRSM, HEMM, HERK, HER2K, SYRK and SYR2K.
*
* This library’s primary goal is to assist the end user to enqueue OpenCL
* kernels to process BLAS functions in an OpenCL-efficient manner, while
* keeping interfaces familiar to users who know how to use BLAS. All
* functions accept matrices through buffer objects.
*
* This library is entirely thread-safe with the exception of the following API :
* clblasSetup and clblasTeardown.
* Developers using the library can safely using any blas routine from different thread.
*
* @section deprecated
* This library provided support for the creation of scratch images to achieve better performance
* on older AMD APP SDK's.
* However, memory buffers now give the same performance as buffers objects in the current SDK's.
* Scratch image buffers are being deprecated and users are advised not to use scratch images in
* new applications.
*/
/**
* @defgroup TYPES clblas types
*/
/*@{*/
/** Shows how matrices are placed in memory. */
typedef enum clblasOrder_ {
clblasRowMajor, /**< Every row is placed sequentially */
clblasColumnMajor /**< Every column is placed sequentially */
} clblasOrder;
/** Used to specify whether the matrix is to be transposed or not. */
typedef enum clblasTranspose_ {
clblasNoTrans, /**< Operate with the matrix. */
clblasTrans, /**< Operate with the transpose of the matrix. */
clblasConjTrans /**< Operate with the conjugate transpose of
the matrix. */
} clblasTranspose;
/** Used by the Hermitian, symmetric and triangular matrix
* routines to specify whether the upper or lower triangle is being referenced.
*/
typedef enum clblasUplo_ {
clblasUpper, /**< Upper triangle. */
clblasLower /**< Lower triangle. */
} clblasUplo;
/** It is used by the triangular matrix routines to specify whether the
* matrix is unit triangular.
*/
typedef enum clblasDiag_ {
clblasUnit, /**< Unit triangular. */
clblasNonUnit /**< Non-unit triangular. */
} clblasDiag;
/** Indicates the side matrix A is located relative to matrix B during multiplication. */
typedef enum clblasSide_ {
clblasLeft, /**< Multiply general matrix by symmetric,
Hermitian or triangular matrix on the left. */
clblasRight /**< Multiply general matrix by symmetric,
Hermitian or triangular matrix on the right. */
} clblasSide;
/**
* @brief clblas error codes definition, incorporating OpenCL error
* definitions.
*
* This enumeration is a subset of the OpenCL error codes extended with some
* additional extra codes. For example, CL_OUT_OF_HOST_MEMORY, which is
* defined in cl.h is aliased as clblasOutOfHostMemory.
*/
typedef enum clblasStatus_ {
clblasSuccess = CL_SUCCESS,
clblasInvalidValue = CL_INVALID_VALUE,
clblasInvalidCommandQueue = CL_INVALID_COMMAND_QUEUE,
clblasInvalidContext = CL_INVALID_CONTEXT,
clblasInvalidMemObject = CL_INVALID_MEM_OBJECT,
clblasInvalidDevice = CL_INVALID_DEVICE,
clblasInvalidEventWaitList = CL_INVALID_EVENT_WAIT_LIST,
clblasOutOfResources = CL_OUT_OF_RESOURCES,
clblasOutOfHostMemory = CL_OUT_OF_HOST_MEMORY,
clblasInvalidOperation = CL_INVALID_OPERATION,
clblasCompilerNotAvailable = CL_COMPILER_NOT_AVAILABLE,
clblasBuildProgramFailure = CL_BUILD_PROGRAM_FAILURE,
/* Extended error codes */
clblasNotImplemented = -1024, /**< Functionality is not implemented */
clblasNotInitialized, /**< clblas library is not initialized yet */
clblasInvalidMatA, /**< Matrix A is not a valid memory object */
clblasInvalidMatB, /**< Matrix B is not a valid memory object */
clblasInvalidMatC, /**< Matrix C is not a valid memory object */
clblasInvalidVecX, /**< Vector X is not a valid memory object */
clblasInvalidVecY, /**< Vector Y is not a valid memory object */
clblasInvalidDim, /**< An input dimension (M,N,K) is invalid */
clblasInvalidLeadDimA, /**< Leading dimension A must not be less than the size of the first dimension */
clblasInvalidLeadDimB, /**< Leading dimension B must not be less than the size of the second dimension */
clblasInvalidLeadDimC, /**< Leading dimension C must not be less than the size of the third dimension */
clblasInvalidIncX, /**< The increment for a vector X must not be 0 */
clblasInvalidIncY, /**< The increment for a vector Y must not be 0 */
clblasInsufficientMemMatA, /**< The memory object for Matrix A is too small */
clblasInsufficientMemMatB, /**< The memory object for Matrix B is too small */
clblasInsufficientMemMatC, /**< The memory object for Matrix C is too small */
clblasInsufficientMemVecX, /**< The memory object for Vector X is too small */
clblasInsufficientMemVecY /**< The memory object for Vector Y is too small */
} clblasStatus;
/*@}*/
/**
* @defgroup VERSION Version information
*/
/*@{*/
/**
* @brief Get the clblas library version info.
*
* @param[out] major Location to store library's major version.
* @param[out] minor Location to store library's minor version.
* @param[out] patch Location to store library's patch version.
*
* @returns always \b clblasSuccess.
*
* @ingroup VERSION
*/
clblasStatus
clblasGetVersion(cl_uint* major, cl_uint* minor, cl_uint* patch);
/*@}*/
/**
* @defgroup INIT Initialize library
*/
/*@{*/
/**
* @brief Initialize the clblas library.
*
* Must be called before any other clblas API function is invoked.
* @note This function is not thread-safe.
*
* @return
* - \b clblasSucces on success;
* - \b clblasOutOfHostMemory if there is not enough of memory to allocate
* library's internal structures;
* - \b clblasOutOfResources in case of requested resources scarcity.
*
* @ingroup INIT
*/
clblasStatus
clblasSetup(void);
/**
* @brief Finalize the usage of the clblas library.
*
* Frees all memory allocated for different computational kernel and other
* internal data.
* @note This function is not thread-safe.
*
* @ingroup INIT
*/
void
clblasTeardown(void);
/*@}*/
/**
* @defgroup BLAS1 BLAS-1 functions
*
* The Level 1 Basic Linear Algebra Subprograms are functions that perform
* vector-vector operations.
*/
/*@{*/
/*@}*/
/**
* @defgroup SWAP SWAP - Swap elements from 2 vectors
* @ingroup BLAS1
*/
/*@{*/
/**
* @brief interchanges two vectors of float.
*
*
* @param[in] N Number of elements in vector \b X.
* @param[out] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[out] Y Buffer object storing the vector \b Y.
* @param[in] offy Offset of first element of vector \b Y in buffer object.
* Counted in elements.
* @param[in] incy Increment for the elements of \b Y. Must not be zero.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasNotInitialized if clblasSetup() was not called;
* - \b clblasInvalidValue if invalid parameters are passed:
* - \b N is zero, or
* - either \b incx or \b incy is zero, or
* - the vector sizes along with the increments lead to
* accessing outside of any of the buffers;
* - \b clblasInvalidMemObject if either \b X, or \b Y object is
* Invalid, or an image object rather than the buffer one;
* - \b clblasOutOfHostMemory if the library can't allocate memory for
* internal structures;
* - \b clblasInvalidCommandQueue if the passed command queue is invalid;
* - \b clblasInvalidContext if a context a passed command queue belongs
* to was released;
* - \b clblasInvalidOperation if kernel compilation relating to a previous
* call has not completed for any of the target devices;
* - \b clblasCompilerNotAvailable if a compiler is not available;
* - \b clblasBuildProgramFailure if there is a failure to build a program
* executable.
*
* @ingroup SWAP
*/
clblasStatus
clblasSswap(
size_t N,
cl_mem X,
size_t offx,
int incx,
cl_mem Y,
size_t offy,
int incy,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @example example_sswap.c
* Example of how to use the @ref clblasSswap function.
*/
/**
* @brief interchanges two vectors of double.
*
*
* @param[in] N Number of elements in vector \b X.
* @param[out] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[out] Y Buffer object storing the vector \b Y.
* @param[in] offy Offset of first element of vector \b Y in buffer object.
* Counted in elements.
* @param[in] incy Increment for the elements of \b Y. Must not be zero.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support the
* floating point arithmetic with double precision;
* - the same error codes as the clblasSswap() function otherwise.
*
* @ingroup SWAP
*/
clblasStatus
clblasDswap(
size_t N,
cl_mem X,
size_t offx,
int incx,
cl_mem Y,
size_t offy,
int incy,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief interchanges two vectors of complex-float elements.
*
*
* @param[in] N Number of elements in vector \b X.
* @param[out] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[out] Y Buffer object storing the vector \b Y.
* @param[in] offy Offset of first element of vector \b Y in buffer object.
* Counted in elements.
* @param[in] incy Increment for the elements of \b Y. Must not be zero.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - the same error codes as the clblasSwap() function otherwise.
*
* @ingroup SWAP
*/
clblasStatus
clblasCswap(
size_t N,
cl_mem X,
size_t offx,
int incx,
cl_mem Y,
size_t offy,
int incy,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief interchanges two vectors of double-complex elements.
*
*
* @param[in] N Number of elements in vector \b X.
* @param[out] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[out] Y Buffer object storing the vector \b Y.
* @param[in] offy Offset of first element of vector \b Y in buffer object.
* Counted in elements.
* @param[in] incy Increment for the elements of \b Y. Must not be zero.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - the same error codes as the clblasDwap() function otherwise.
*
* @ingroup SWAP
*/
clblasStatus
clblasZswap(
size_t N,
cl_mem X,
size_t offx,
int incx,
cl_mem Y,
size_t offy,
int incy,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/*@}*/
/**
* @defgroup SCAL SCAL - Scales a vector by a constant
* @ingroup BLAS1
*/
/*@{*/
/**
* @brief Scales a float vector by a float constant
*
* - \f$ X \leftarrow \alpha X \f$
*
* @param[in] N Number of elements in vector \b X.
* @param[in] alpha The constant factor for vector \b X.
* @param[out] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasNotInitialized if clblasSetup() was not called;
* - \b clblasInvalidValue if invalid parameters are passed:
* - \b N is zero, or
* - \b incx zero, or
* - the vector sizes along with the increments lead to
* accessing outside of any of the buffers;
* - \b clblasInvalidMemObject if either \b X, or \b Y object is
* Invalid, or an image object rather than the buffer one;
* - \b clblasOutOfHostMemory if the library can't allocate memory for
* internal structures;
* - \b clblasInvalidCommandQueue if the passed command queue is invalid;
* - \b clblasInvalidContext if a context a passed command queue belongs
* to was released;
* - \b clblasInvalidOperation if kernel compilation relating to a previous
* call has not completed for any of the target devices;
* - \b clblasCompilerNotAvailable if a compiler is not available;
* - \b clblasBuildProgramFailure if there is a failure to build a program
* executable.
*
* @ingroup SCAL
*/
clblasStatus
clblasSscal(
size_t N,
cl_float alpha,
cl_mem X,
size_t offx,
int incx,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @example example_sscal.c
* Example of how to use the @ref clblasSscal function.
*/
/**
* @brief Scales a double vector by a double constant
*
* - \f$ X \leftarrow \alpha X \f$
*
* @param[in] N Number of elements in vector \b X.
* @param[in] alpha The constant factor for vector \b X.
* @param[out] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support the
* floating point arithmetic with double precision;
* - the same error codes as the clblasSscal() function otherwise.
*
* @ingroup SCAL
*/
clblasStatus
clblasDscal(
size_t N,
cl_double alpha,
cl_mem X,
size_t offx,
int incx,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief Scales a complex-float vector by a complex-float constant
*
* - \f$ X \leftarrow \alpha X \f$
*
* @param[in] N Number of elements in vector \b X.
* @param[in] alpha The constant factor for vector \b X.
* @param[out] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - the same error codes as the clblasSscal() function otherwise.
*
* @ingroup SCAL
*/
clblasStatus
clblasCscal(
size_t N,
cl_float2 alpha,
cl_mem X,
size_t offx,
int incx,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief Scales a complex-double vector by a complex-double constant
*
* - \f$ X \leftarrow \alpha X \f$
*
* @param[in] N Number of elements in vector \b X.
* @param[in] alpha The constant factor for vector \b X.
* @param[out] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - the same error codes as the clblasDscal() function otherwise.
*
* @ingroup SCAL
*/
clblasStatus
clblasZscal(
size_t N,
cl_double2 alpha,
cl_mem X,
size_t offx,
int incx,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/*@}*/
/**
* @defgroup SSCAL SSCAL - Scales a complex vector by a real constant
* @ingroup BLAS1
*/
/*@{*/
/**
* @brief Scales a complex-float vector by a float constant
*
* - \f$ X \leftarrow \alpha X \f$
*
* @param[in] N Number of elements in vector \b X.
* @param[in] alpha The constant factor for vector \b X.
* @param[out] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasNotInitialized if clblasSetup() was not called;
* - \b clblasInvalidValue if invalid parameters are passed:
* - \b N is zero, or
* - \b incx zero, or
* - the vector sizes along with the increments lead to
* accessing outside of any of the buffers;
* - \b clblasInvalidMemObject if either \b X, or \b Y object is
* Invalid, or an image object rather than the buffer one;
* - \b clblasOutOfHostMemory if the library can't allocate memory for
* internal structures;
* - \b clblasInvalidCommandQueue if the passed command queue is invalid;
* - \b clblasInvalidContext if a context a passed command queue belongs
* to was released;
* - \b clblasInvalidOperation if kernel compilation relating to a previous
* call has not completed for any of the target devices;
* - \b clblasCompilerNotAvailable if a compiler is not available;
* - \b clblasBuildProgramFailure if there is a failure to build a program
* executable.
*
* @ingroup SSCAL
*/
clblasStatus
clblasCsscal(
size_t N,
cl_float alpha,
cl_mem X,
size_t offx,
int incx,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @example example_csscal.c
* Example of how to use the @ref clblasCsscal function.
*/
/**
* @brief Scales a complex-double vector by a double constant
*
* - \f$ X \leftarrow \alpha X \f$
*
* @param[in] N Number of elements in vector \b X.
* @param[in] alpha The constant factor for vector \b X.
* @param[out] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support the
* floating point arithmetic with double precision;
* - the same error codes as the clblasCsscal() function otherwise.
*
* @ingroup SSCAL
*/
clblasStatus
clblasZdscal(
size_t N,
cl_double alpha,
cl_mem X,
size_t offx,
int incx,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/*@}*/
/**
* @defgroup COPY COPY - Copies elements from vector X to vector Y
* @ingroup BLAS1
*/
/*@{*/
/**
* @brief Copies float elements from vector X to vector Y
*
* - \f$ Y \leftarrow X \f$
*
* @param[in] N Number of elements in vector \b X.
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[out] Y Buffer object storing the vector \b Y.
* @param[in] offy Offset of first element of vector \b Y in buffer object.
* Counted in elements.
* @param[in] incy Increment for the elements of \b Y. Must not be zero.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasNotInitialized if clblasSetup() was not called;
* - \b clblasInvalidValue if invalid parameters are passed:
* - \b N is zero, or
* - either \b incx or \b incy is zero, or
* - the vector sizes along with the increments lead to
* accessing outside of any of the buffers;
* - \b clblasInvalidMemObject if either \b X, or \b Y object is
* Invalid, or an image object rather than the buffer one;
* - \b clblasOutOfHostMemory if the library can't allocate memory for
* internal structures;
* - \b clblasInvalidCommandQueue if the passed command queue is invalid;
* - \b clblasInvalidContext if a context a passed command queue belongs
* to was released;
* - \b clblasInvalidOperation if kernel compilation relating to a previous
* call has not completed for any of the target devices;
* - \b clblasCompilerNotAvailable if a compiler is not available;
* - \b clblasBuildProgramFailure if there is a failure to build a program
* executable.
*
* @ingroup COPY
*/
clblasStatus
clblasScopy(
size_t N,
const cl_mem X,
size_t offx,
int incx,
cl_mem Y,
size_t offy,
int incy,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @example example_scopy.c
* Example of how to use the @ref clblasScopy function.
*/
/**
* @brief Copies double elements from vector X to vector Y
*
* - \f$ Y \leftarrow X \f$
*
* @param[in] N Number of elements in vector \b X.
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[out] Y Buffer object storing the vector \b Y.
* @param[in] offy Offset of first element of vector \b Y in buffer object.
* Counted in elements.
* @param[in] incy Increment for the elements of \b Y. Must not be zero.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support the
* floating point arithmetic with double precision;
* - the same error codes as the clblasScopy() function otherwise.
*
* @ingroup COPY
*/
clblasStatus
clblasDcopy(
size_t N,
const cl_mem X,
size_t offx,
int incx,
cl_mem Y,
size_t offy,
int incy,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief Copies complex-float elements from vector X to vector Y
*
* - \f$ Y \leftarrow X \f$
*
* @param[in] N Number of elements in vector \b X.
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[out] Y Buffer object storing the vector \b Y.
* @param[in] offy Offset of first element of vector \b Y in buffer object.
* Counted in elements.
* @param[in] incy Increment for the elements of \b Y. Must not be zero.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - the same error codes as the clblasScopy() function otherwise.
*
* @ingroup COPY
*/
clblasStatus
clblasCcopy(
size_t N,
const cl_mem X,
size_t offx,
int incx,
cl_mem Y,
size_t offy,
int incy,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief Copies complex-double elements from vector X to vector Y
*
* - \f$ Y \leftarrow X \f$
*
* @param[in] N Number of elements in vector \b X.
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[out] Y Buffer object storing the vector \b Y.
* @param[in] offy Offset of first element of vector \b Y in buffer object.
* Counted in elements.
* @param[in] incy Increment for the elements of \b Y. Must not be zero.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - the same error codes as the clblasDcopy() function otherwise.
*
* @ingroup COPY
*/
clblasStatus
clblasZcopy(
size_t N,
const cl_mem X,
size_t offx,
int incx,
cl_mem Y,
size_t offy,
int incy,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/*@}*/
/**
* @defgroup AXPY AXPY - Scale X and add to Y
* @ingroup BLAS1
*/
/*@{*/
/**
* @brief Scale vector X of float elements and add to Y
*
* - \f$ Y \leftarrow \alpha X + Y \f$
*
* @param[in] N Number of elements in vector \b X.
* @param[in] alpha The constant factor for vector \b X.
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[out] Y Buffer object storing the vector \b Y.
* @param[in] offy Offset of first element of vector \b Y in buffer object.
* Counted in elements.
* @param[in] incy Increment for the elements of \b Y. Must not be zero.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasNotInitialized if clblasSetup() was not called;
* - \b clblasInvalidValue if invalid parameters are passed:
* - \b N is zero, or
* - either \b incx or \b incy is zero, or
* - the vector sizes along with the increments lead to
* accessing outside of any of the buffers;
* - \b clblasInvalidMemObject if either \b X, or \b Y object is
* Invalid, or an image object rather than the buffer one;
* - \b clblasOutOfHostMemory if the library can't allocate memory for
* internal structures;
* - \b clblasInvalidCommandQueue if the passed command queue is invalid;
* - \b clblasInvalidContext if a context a passed command queue belongs
* to was released;
* - \b clblasInvalidOperation if kernel compilation relating to a previous
* call has not completed for any of the target devices;
* - \b clblasCompilerNotAvailable if a compiler is not available;
* - \b clblasBuildProgramFailure if there is a failure to build a program
* executable.
*
* @ingroup AXPY
*/
clblasStatus
clblasSaxpy(
size_t N,
cl_float alpha,
const cl_mem X,
size_t offx,
int incx,
cl_mem Y,
size_t offy,
int incy,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @example example_saxpy.c
* Example of how to use the @ref clblasSaxpy function.
*/
/**
* @brief Scale vector X of double elements and add to Y
*
* - \f$ Y \leftarrow \alpha X + Y \f$
*
* @param[in] N Number of elements in vector \b X.
* @param[in] alpha The constant factor for vector \b X.
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[out] Y Buffer object storing the vector \b Y.
* @param[in] offy Offset of first element of vector \b Y in buffer object.
* Counted in elements.
* @param[in] incy Increment for the elements of \b Y. Must not be zero.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support the
* floating point arithmetic with double precision;
* - the same error codes as the clblasSaxpy() function otherwise.
*
* @ingroup AXPY
*/
clblasStatus
clblasDaxpy(
size_t N,
cl_double alpha,
const cl_mem X,
size_t offx,
int incx,
cl_mem Y,
size_t offy,
int incy,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief Scale vector X of complex-float elements and add to Y
*
* - \f$ Y \leftarrow \alpha X + Y \f$
*
* @param[in] N Number of elements in vector \b X.
* @param[in] alpha The constant factor for vector \b X.
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[out] Y Buffer object storing the vector \b Y.
* @param[in] offy Offset of first element of vector \b Y in buffer object.
* Counted in elements.
* @param[in] incy Increment for the elements of \b Y. Must not be zero.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - the same error codes as the clblasSaxpy() function otherwise.
*
* @ingroup AXPY
*/
clblasStatus
clblasCaxpy(
size_t N,
cl_float2 alpha,
const cl_mem X,
size_t offx,
int incx,
cl_mem Y,
size_t offy,
int incy,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief Scale vector X of double-complex elements and add to Y
*
* - \f$ Y \leftarrow \alpha X + Y \f$
*
* @param[in] N Number of elements in vector \b X.
* @param[in] alpha The constant factor for vector \b X.
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[out] Y Buffer object storing the vector \b Y.
* @param[in] offy Offset of first element of vector \b Y in buffer object.
* Counted in elements.
* @param[in] incy Increment for the elements of \b Y. Must not be zero.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - the same error codes as the clblasDaxpy() function otherwise.
*
* @ingroup AXPY
*/
clblasStatus
clblasZaxpy(
size_t N,
cl_double2 alpha,
const cl_mem X,
size_t offx,
int incx,
cl_mem Y,
size_t offy,
int incy,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/*@}*/
/**
* @defgroup DOT DOT - Dot product of two vectors
* @ingroup BLAS1
*/
/*@{*/
/**
* @brief dot product of two vectors containing float elements
*
* @param[in] N Number of elements in vector \b X.
* @param[out] dotProduct Buffer object that will contain the dot-product value
* @param[in] offDP Offset to dot-product in \b dotProduct buffer object.
* Counted in elements.
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] Y Buffer object storing the vector \b Y.
* @param[in] offy Offset of first element of vector \b Y in buffer object.
* Counted in elements.
* @param[in] incy Increment for the elements of \b Y. Must not be zero.
* @param[in] scratchBuff Temporary cl_mem scratch buffer object of minimum size N
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasNotInitialized if clblasSetup() was not called;
* - \b clblasInvalidValue if invalid parameters are passed:
* - \b N is zero, or
* - either \b incx or \b incy is zero, or
* - the vector sizes along with the increments lead to
* accessing outside of any of the buffers;
* - \b clblasInvalidMemObject if either \b X, \b Y or \b dotProduct object is
* Invalid, or an image object rather than the buffer one;
* - \b clblasOutOfHostMemory if the library can't allocate memory for
* internal structures;
* - \b clblasInvalidCommandQueue if the passed command queue is invalid;
* - \b clblasInvalidContext if a context a passed command queue belongs
* to was released;
* - \b clblasInvalidOperation if kernel compilation relating to a previous
* call has not completed for any of the target devices;
* - \b clblasCompilerNotAvailable if a compiler is not available;
* - \b clblasBuildProgramFailure if there is a failure to build a program
* executable.
*
* @ingroup DOT
*/
clblasStatus
clblasSdot(
size_t N,
cl_mem dotProduct,
size_t offDP,
const cl_mem X,
size_t offx,
int incx,
const cl_mem Y,
size_t offy,
int incy,
cl_mem scratchBuff,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @example example_sdot.c
* Example of how to use the @ref clblasSdot function.
*/
/**
* @brief dot product of two vectors containing double elements
*
* @param[in] N Number of elements in vector \b X.
* @param[out] dotProduct Buffer object that will contain the dot-product value
* @param[in] offDP Offset to dot-product in \b dotProduct buffer object.
* Counted in elements.
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] Y Buffer object storing the vector \b Y.
* @param[in] offy Offset of first element of vector \b Y in buffer object.
* Counted in elements.
* @param[in] incy Increment for the elements of \b Y. Must not be zero.
* @param[in] scratchBuff Temporary cl_mem scratch buffer object of minimum size N
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support the
* floating point arithmetic with double precision;
* - the same error codes as the clblasSdot() function otherwise.
*
* @ingroup DOT
*/
clblasStatus
clblasDdot(
size_t N,
cl_mem dotProduct,
size_t offDP,
const cl_mem X,
size_t offx,
int incx,
const cl_mem Y,
size_t offy,
int incy,
cl_mem scratchBuff,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief dot product of two vectors containing float-complex elements
*
* @param[in] N Number of elements in vector \b X.
* @param[out] dotProduct Buffer object that will contain the dot-product value
* @param[in] offDP Offset to dot-product in \b dotProduct buffer object.
* Counted in elements.
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] Y Buffer object storing the vector \b Y.
* @param[in] offy Offset of first element of vector \b Y in buffer object.
* Counted in elements.
* @param[in] incy Increment for the elements of \b Y. Must not be zero.
* @param[in] scratchBuff Temporary cl_mem scratch buffer object of minimum size N
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - the same error codes as the clblasSdot() function otherwise.
*
* @ingroup DOT
*/
clblasStatus
clblasCdotu(
size_t N,
cl_mem dotProduct,
size_t offDP,
const cl_mem X,
size_t offx,
int incx,
const cl_mem Y,
size_t offy,
int incy,
cl_mem scratchBuff,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief dot product of two vectors containing double-complex elements
*
* @param[in] N Number of elements in vector \b X.
* @param[out] dotProduct Buffer object that will contain the dot-product value
* @param[in] offDP Offset to dot-product in \b dotProduct buffer object.
* Counted in elements.
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] Y Buffer object storing the vector \b Y.
* @param[in] offy Offset of first element of vector \b Y in buffer object.
* Counted in elements.
* @param[in] incy Increment for the elements of \b Y. Must not be zero.
* @param[in] scratchBuff Temporary cl_mem scratch buffer object of minimum size N
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support the
* floating point arithmetic with double precision;
* - the same error codes as the clblasSdot() function otherwise.
*
* @ingroup DOT
*/
clblasStatus
clblasZdotu(
size_t N,
cl_mem dotProduct,
size_t offDP,
const cl_mem X,
size_t offx,
int incx,
const cl_mem Y,
size_t offy,
int incy,
cl_mem scratchBuff,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief dot product of two vectors containing float-complex elements conjugating the first vector
*
* @param[in] N Number of elements in vector \b X.
* @param[out] dotProduct Buffer object that will contain the dot-product value
* @param[in] offDP Offset to dot-product in \b dotProduct buffer object.
* Counted in elements.
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] Y Buffer object storing the vector \b Y.
* @param[in] offy Offset of first element of vector \b Y in buffer object.
* Counted in elements.
* @param[in] incy Increment for the elements of \b Y. Must not be zero.
* @param[in] scratchBuff Temporary cl_mem scratch buffer object of minimum size N
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - the same error codes as the clblasSdot() function otherwise.
*
* @ingroup DOT
*/
clblasStatus
clblasCdotc(
size_t N,
cl_mem dotProduct,
size_t offDP,
const cl_mem X,
size_t offx,
int incx,
const cl_mem Y,
size_t offy,
int incy,
cl_mem scratchBuff,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief dot product of two vectors containing double-complex elements conjugating the first vector
*
* @param[in] N Number of elements in vector \b X.
* @param[out] dotProduct Buffer object that will contain the dot-product value
* @param[in] offDP Offset to dot-product in \b dotProduct buffer object.
* Counted in elements.
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] Y Buffer object storing the vector \b Y.
* @param[in] offy Offset of first element of vector \b Y in buffer object.
* Counted in elements.
* @param[in] incy Increment for the elements of \b Y. Must not be zero.
* @param[in] scratchBuff Temporary cl_mem scratch buffer object of minimum size N
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support the
* floating point arithmetic with double precision;
* - the same error codes as the clblasSdot() function otherwise.
*
* @ingroup DOT
*/
clblasStatus
clblasZdotc(
size_t N,
cl_mem dotProduct,
size_t offDP,
const cl_mem X,
size_t offx,
int incx,
const cl_mem Y,
size_t offy,
int incy,
cl_mem scratchBuff,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/*@}*/
/**
* @defgroup ROTG ROTG - Constructs givens plane rotation
* @ingroup BLAS1
*/
/*@{*/
/**
* @brief construct givens plane rotation on float elements
*
* @param[out] SA Buffer object that contains SA
* @param[in] offSA Offset to SA in \b SA buffer object.
* Counted in elements.
* @param[out] SB Buffer object that contains SB
* @param[in] offSB Offset to SB in \b SB buffer object.
* Counted in elements.
* @param[out] C Buffer object that contains C
* @param[in] offC Offset to C in \b C buffer object.
* Counted in elements.
* @param[out] S Buffer object that contains S
* @param[in] offS Offset to S in \b S buffer object.
* Counted in elements.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasNotInitialized if clblasSetup() was not called;
* - \b clblasInvalidMemObject if either \b SA, \b SB, \b C or \b S object is
* Invalid, or an image object rather than the buffer one;
* - \b clblasOutOfHostMemory if the library can't allocate memory for
* internal structures;
* - \b clblasInvalidCommandQueue if the passed command queue is invalid;
* - \b clblasInvalidContext if a context a passed command queue belongs
* to was released;
* - \b clblasInvalidOperation if kernel compilation relating to a previous
* call has not completed for any of the target devices;
* - \b clblasCompilerNotAvailable if a compiler is not available;
* - \b clblasBuildProgramFailure if there is a failure to build a program
* executable.
*
* @ingroup ROTG
*/
clblasStatus
clblasSrotg(
cl_mem SA,
size_t offSA,
cl_mem SB,
size_t offSB,
cl_mem C,
size_t offC,
cl_mem S,
size_t offS,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @example example_srotg.c
* Example of how to use the @ref clblasSrotg function.
*/
/**
* @brief construct givens plane rotation on double elements
*
* @param[out] DA Buffer object that contains DA
* @param[in] offDA Offset to DA in \b DA buffer object.
* Counted in elements.
* @param[out] DB Buffer object that contains DB
* @param[in] offDB Offset to DB in \b DB buffer object.
* Counted in elements.
* @param[out] C Buffer object that contains C
* @param[in] offC Offset to C in \b C buffer object.
* Counted in elements.
* @param[out] S Buffer object that contains S
* @param[in] offS Offset to S in \b S buffer object.
* Counted in elements.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support the
* floating point arithmetic with double precision;
* - the same error codes as the clblasSrotg() function otherwise.
*
* @ingroup ROTG
*/
clblasStatus
clblasDrotg(
cl_mem DA,
size_t offDA,
cl_mem DB,
size_t offDB,
cl_mem C,
size_t offC,
cl_mem S,
size_t offS,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief construct givens plane rotation on float-complex elements
*
* @param[out] CA Buffer object that contains CA
* @param[in] offCA Offset to CA in \b CA buffer object.
* Counted in elements.
* @param[out] CB Buffer object that contains CB
* @param[in] offCB Offset to CB in \b CB buffer object.
* Counted in elements.
* @param[out] C Buffer object that contains C. C is real.
* @param[in] offC Offset to C in \b C buffer object.
* Counted in elements.
* @param[out] S Buffer object that contains S
* @param[in] offS Offset to S in \b S buffer object.
* Counted in elements.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - the same error codes as the clblasSrotg() function otherwise.
*
* @ingroup ROTG
*/
clblasStatus
clblasCrotg(
cl_mem CA,
size_t offCA,
cl_mem CB,
size_t offCB,
cl_mem C,
size_t offC,
cl_mem S,
size_t offS,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief construct givens plane rotation on double-complex elements
*
* @param[out] CA Buffer object that contains CA
* @param[in] offCA Offset to CA in \b CA buffer object.
* Counted in elements.
* @param[out] CB Buffer object that contains CB
* @param[in] offCB Offset to CB in \b CB buffer object.
* Counted in elements.
* @param[out] C Buffer object that contains C. C is real.
* @param[in] offC Offset to C in \b C buffer object.
* Counted in elements.
* @param[out] S Buffer object that contains S
* @param[in] offS Offset to S in \b S buffer object.
* Counted in elements.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - the same error codes as the clblasDrotg() function otherwise.
*
* @ingroup ROTG
*/
clblasStatus
clblasZrotg(
cl_mem CA,
size_t offCA,
cl_mem CB,
size_t offCB,
cl_mem C,
size_t offC,
cl_mem S,
size_t offS,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/*@}*/
/**
* @defgroup ROTMG ROTMG - Constructs the modified givens rotation
* @ingroup BLAS1
*/
/*@{*/
/**
* @brief construct the modified givens rotation on float elements
*
* @param[out] SD1 Buffer object that contains SD1
* @param[in] offSD1 Offset to SD1 in \b SD1 buffer object.
* Counted in elements.
* @param[out] SD2 Buffer object that contains SD2
* @param[in] offSD2 Offset to SD2 in \b SD2 buffer object.
* Counted in elements.
* @param[out] SX1 Buffer object that contains SX1
* @param[in] offSX1 Offset to SX1 in \b SX1 buffer object.
* Counted in elements.
* @param[in] SY1 Buffer object that contains SY1
* @param[in] offSY1 Offset to SY1 in \b SY1 buffer object.
* Counted in elements.
* @param[out] SPARAM Buffer object that contains SPARAM array of minimum length 5
SPARAM(0) = SFLAG
SPARAM(1) = SH11
SPARAM(2) = SH21
SPARAM(3) = SH12
SPARAM(4) = SH22
* @param[in] offSparam Offset to SPARAM in \b SPARAM buffer object.
* Counted in elements.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasNotInitialized if clblasSetup() was not called;
* - \b clblasInvalidMemObject if either \b SX1, \b SY1, \b SD1, \b SD2 or \b SPARAM object is
* Invalid, or an image object rather than the buffer one;
* - \b clblasOutOfHostMemory if the library can't allocate memory for
* internal structures;
* - \b clblasInvalidCommandQueue if the passed command queue is invalid;
* - \b clblasInvalidContext if a context a passed command queue belongs
* to was released;
* - \b clblasInvalidOperation if kernel compilation relating to a previous
* call has not completed for any of the target devices;
* - \b clblasCompilerNotAvailable if a compiler is not available;
* - \b clblasBuildProgramFailure if there is a failure to build a program
* executable.
*
* @ingroup ROTMG
*/
clblasStatus
clblasSrotmg(
cl_mem SD1,
size_t offSD1,
cl_mem SD2,
size_t offSD2,
cl_mem SX1,
size_t offSX1,
const cl_mem SY1,
size_t offSY1,
cl_mem SPARAM,
size_t offSparam,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @example example_srotmg.c
* Example of how to use the @ref clblasSrotmg function.
*/
/**
* @brief construct the modified givens rotation on double elements
*
* @param[out] DD1 Buffer object that contains DD1
* @param[in] offDD1 Offset to DD1 in \b DD1 buffer object.
* Counted in elements.
* @param[out] DD2 Buffer object that contains DD2
* @param[in] offDD2 Offset to DD2 in \b DD2 buffer object.
* Counted in elements.
* @param[out] DX1 Buffer object that contains DX1
* @param[in] offDX1 Offset to DX1 in \b DX1 buffer object.
* Counted in elements.
* @param[in] DY1 Buffer object that contains DY1
* @param[in] offDY1 Offset to DY1 in \b DY1 buffer object.
* Counted in elements.
* @param[out] DPARAM Buffer object that contains DPARAM array of minimum length 5
DPARAM(0) = DFLAG
DPARAM(1) = DH11
DPARAM(2) = DH21
DPARAM(3) = DH12
DPARAM(4) = DH22
* @param[in] offDparam Offset to DPARAM in \b DPARAM buffer object.
* Counted in elements.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support the
* floating point arithmetic with double precision;
* - the same error codes as the clblasSrotmg() function otherwise.
*
* @ingroup ROTMG
*/
clblasStatus
clblasDrotmg(
cl_mem DD1,
size_t offDD1,
cl_mem DD2,
size_t offDD2,
cl_mem DX1,
size_t offDX1,
const cl_mem DY1,
size_t offDY1,
cl_mem DPARAM,
size_t offDparam,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/*@}*/
/**
* @defgroup ROT ROT - Apply givens rotation
* @ingroup BLAS1
*/
/*@{*/
/**
* @brief applies a plane rotation for float elements
*
* @param[in] N Number of elements in vector \b X and \b Y.
* @param[out] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[out] Y Buffer object storing the vector \b Y.
* @param[in] offy Offset of first element of vector \b Y in buffer object.
* Counted in elements.
* @param[in] incy Increment for the elements of \b Y. Must not be zero.
* @param[in] C C specifies the cosine, cos.
* @param[in] S S specifies the sine, sin.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasNotInitialized if clblasSetup() was not called;
* - \b clblasInvalidValue if invalid parameters are passed:
* - \b N is zero, or
* - either \b incx or \b incy is zero, or
* - the vector sizes along with the increments lead to
* accessing outside of any of the buffers;
* - \b clblasInvalidMemObject if either \b X, or \b Y object is
* Invalid, or an image object rather than the buffer one;
* - \b clblasOutOfHostMemory if the library can't allocate memory for
* internal structures;
* - \b clblasInvalidCommandQueue if the passed command queue is invalid;
* - \b clblasInvalidContext if a context a passed command queue belongs
* to was released;
* - \b clblasInvalidOperation if kernel compilation relating to a previous
* call has not completed for any of the target devices;
* - \b clblasCompilerNotAvailable if a compiler is not available;
* - \b clblasBuildProgramFailure if there is a failure to build a program
* executable.
*
* @ingroup ROT
*/
clblasStatus
clblasSrot(
size_t N,
cl_mem X,
size_t offx,
int incx,
cl_mem Y,
size_t offy,
int incy,
cl_float C,
cl_float S,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @example example_srot.c
* Example of how to use the @ref clblasSrot function.
*/
/**
* @brief applies a plane rotation for double elements
*
* @param[in] N Number of elements in vector \b X and \b Y.
* @param[out] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[out] Y Buffer object storing the vector \b Y.
* @param[in] offy Offset of first element of vector \b Y in buffer object.
* Counted in elements.
* @param[in] incy Increment for the elements of \b Y. Must not be zero.
* @param[in] C C specifies the cosine, cos.
* @param[in] S S specifies the sine, sin.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support the
* floating point arithmetic with double precision;
* - the same error codes as the clblasSrot() function otherwise.
*
* @ingroup ROT
*/
clblasStatus
clblasDrot(
size_t N,
cl_mem X,
size_t offx,
int incx,
cl_mem Y,
size_t offy,
int incy,
cl_double C,
cl_double S,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief applies a plane rotation for float-complex elements
*
* @param[in] N Number of elements in vector \b X and \b Y.
* @param[out] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[out] Y Buffer object storing the vector \b Y.
* @param[in] offy Offset of first element of vector \b Y in buffer object.
* Counted in elements.
* @param[in] incy Increment for the elements of \b Y. Must not be zero.
* @param[in] C C specifies the cosine, cos. This number is real
* @param[in] S S specifies the sine, sin. This number is real
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - the same error codes as the clblasSrot() function otherwise.
*
* @ingroup ROT
*/
clblasStatus
clblasCsrot(
size_t N,
cl_mem X,
size_t offx,
int incx,
cl_mem Y,
size_t offy,
int incy,
cl_float C,
cl_float S,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief applies a plane rotation for double-complex elements
*
* @param[in] N Number of elements in vector \b X and \b Y.
* @param[out] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[out] Y Buffer object storing the vector \b Y.
* @param[in] offy Offset of first element of vector \b Y in buffer object.
* Counted in elements.
* @param[in] incy Increment for the elements of \b Y. Must not be zero.
* @param[in] C C specifies the cosine, cos. This number is real
* @param[in] S S specifies the sine, sin. This number is real
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support the
* floating point arithmetic with double precision;
* - the same error codes as the clblasSrot() function otherwise.
*
* @ingroup ROT
*/
clblasStatus
clblasZdrot(
size_t N,
cl_mem X,
size_t offx,
int incx,
cl_mem Y,
size_t offy,
int incy,
cl_double C,
cl_double S,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/*@}*/
/**
* @defgroup ROTM ROTM - Apply modified givens rotation for points in the plane
* @ingroup BLAS1
*/
/*@{*/
/**
* @brief modified givens rotation for float elements
*
* @param[in] N Number of elements in vector \b X and \b Y.
* @param[out] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[out] Y Buffer object storing the vector \b Y.
* @param[in] offy Offset of first element of vector \b Y in buffer object.
* Counted in elements.
* @param[in] incy Increment for the elements of \b Y. Must not be zero.
* @param[in] SPARAM Buffer object that contains SPARAM array of minimum length 5
* SPARAM(1)=SFLAG
* SPARAM(2)=SH11
* SPARAM(3)=SH21
* SPARAM(4)=SH12
* SPARAM(5)=SH22
* @param[in] offSparam Offset of first element of array \b SPARAM in buffer object.
* Counted in elements.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasNotInitialized if clblasSetup() was not called;
* - \b clblasInvalidValue if invalid parameters are passed:
* - \b N is zero, or
* - either \b incx or \b incy is zero, or
* - the vector sizes along with the increments lead to
* accessing outside of any of the buffers;
* - \b clblasInvalidMemObject if either \b X, \b Y or \b SPARAM object is
* Invalid, or an image object rather than the buffer one;
* - \b clblasOutOfHostMemory if the library can't allocate memory for
* internal structures;
* - \b clblasInvalidCommandQueue if the passed command queue is invalid;
* - \b clblasInvalidContext if a context a passed command queue belongs
* to was released;
* - \b clblasInvalidOperation if kernel compilation relating to a previous
* call has not completed for any of the target devices;
* - \b clblasCompilerNotAvailable if a compiler is not available;
* - \b clblasBuildProgramFailure if there is a failure to build a program
* executable.
*
* @ingroup ROTM
*/
clblasStatus
clblasSrotm(
size_t N,
cl_mem X,
size_t offx,
int incx,
cl_mem Y,
size_t offy,
int incy,
const cl_mem SPARAM,
size_t offSparam,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @example example_srotm.c
* Example of how to use the @ref clblasSrotm function.
*/
/**
* @brief modified givens rotation for double elements
*
* @param[in] N Number of elements in vector \b X and \b Y.
* @param[out] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[out] Y Buffer object storing the vector \b Y.
* @param[in] offy Offset of first element of vector \b Y in buffer object.
* Counted in elements.
* @param[in] incy Increment for the elements of \b Y. Must not be zero.
* @param[in] DPARAM Buffer object that contains SPARAM array of minimum length 5
* DPARAM(1)=DFLAG
* DPARAM(2)=DH11
* DPARAM(3)=DH21
* DPARAM(4)=DH12
* DPARAM(5)=DH22
* @param[in] offDparam Offset of first element of array \b DPARAM in buffer object.
* Counted in elements.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support the
* floating point arithmetic with double precision;
* - the same error codes as the clblasSrotm() function otherwise.
*
* @ingroup ROTM
*/
clblasStatus
clblasDrotm(
size_t N,
cl_mem X,
size_t offx,
int incx,
cl_mem Y,
size_t offy,
int incy,
const cl_mem DPARAM,
size_t offDparam,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/*@}*/
/**
* @defgroup NRM2 NRM2 - Euclidean norm of a vector
* @ingroup BLAS1
*/
/*@{*/
/**
* @brief computes the euclidean norm of vector containing float elements
*
* NRM2 = sqrt( X' * X )
*
* @param[in] N Number of elements in vector \b X.
* @param[out] NRM2 Buffer object that will contain the NRM2 value
* @param[in] offNRM2 Offset to NRM2 value in \b NRM2 buffer object.
* Counted in elements.
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] scratchBuff Temporary cl_mem scratch buffer object that can hold minimum of (2*N) elements
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasNotInitialized if clblasSetup() was not called;
* - \b clblasInvalidValue if invalid parameters are passed:
* - \b N is zero, or
* - either \b incx is zero, or
* - the vector sizes along with the increments lead to
* accessing outside of any of the buffers;
* - \b clblasInvalidMemObject if any of \b X or \b NRM2 or \b scratchBuff object is
* Invalid, or an image object rather than the buffer one;
* - \b clblasOutOfHostMemory if the library can't allocate memory for
* internal structures;
* - \b clblasInvalidCommandQueue if the passed command queue is invalid;
* - \b clblasInvalidContext if a context a passed command queue belongs
* to was released;
* - \b clblasInvalidOperation if kernel compilation relating to a previous
* call has not completed for any of the target devices;
* - \b clblasCompilerNotAvailable if a compiler is not available;
* - \b clblasBuildProgramFailure if there is a failure to build a program
* executable.
*
* @ingroup NRM2
*/
clblasStatus
clblasSnrm2(
size_t N,
cl_mem NRM2,
size_t offNRM2,
const cl_mem X,
size_t offx,
int incx,
cl_mem scratchBuff,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @example example_snrm2.c
* Example of how to use the @ref clblasSnrm2 function.
*/
/**
* @brief computes the euclidean norm of vector containing double elements
*
* NRM2 = sqrt( X' * X )
*
* @param[in] N Number of elements in vector \b X.
* @param[out] NRM2 Buffer object that will contain the NRM2 value
* @param[in] offNRM2 Offset to NRM2 value in \b NRM2 buffer object.
* Counted in elements.
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] scratchBuff Temporary cl_mem scratch buffer object that can hold minimum of (2*N) elements
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support the
* floating point arithmetic with double precision;
* - the same error codes as the clblasSnrm2() function otherwise.
*
* @ingroup NRM2
*/
clblasStatus
clblasDnrm2(
size_t N,
cl_mem NRM2,
size_t offNRM2,
const cl_mem X,
size_t offx,
int incx,
cl_mem scratchBuff,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief computes the euclidean norm of vector containing float-complex elements
*
* NRM2 = sqrt( X**H * X )
*
* @param[in] N Number of elements in vector \b X.
* @param[out] NRM2 Buffer object that will contain the NRM2 value.
* Note that the answer of Scnrm2 is a real value.
* @param[in] offNRM2 Offset to NRM2 value in \b NRM2 buffer object.
* Counted in elements.
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] scratchBuff Temporary cl_mem scratch buffer object that can hold minimum of (2*N) elements
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - the same error codes as the clblasSnrm2() function otherwise.
*
* @ingroup NRM2
*/
clblasStatus
clblasScnrm2(
size_t N,
cl_mem NRM2,
size_t offNRM2,
const cl_mem X,
size_t offx,
int incx,
cl_mem scratchBuff,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief computes the euclidean norm of vector containing double-complex elements
*
* NRM2 = sqrt( X**H * X )
*
* @param[in] N Number of elements in vector \b X.
* @param[out] NRM2 Buffer object that will contain the NRM2 value.
* Note that the answer of Dznrm2 is a real value.
* @param[in] offNRM2 Offset to NRM2 value in \b NRM2 buffer object.
* Counted in elements.
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] scratchBuff Temporary cl_mem scratch buffer object that can hold minimum of (2*N) elements
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support the
* floating point arithmetic with double precision;
* - the same error codes as the clblasSnrm2() function otherwise.
* executable.
*
* @ingroup NRM2
*/
clblasStatus
clblasDznrm2(
size_t N,
cl_mem NRM2,
size_t offNRM2,
const cl_mem X,
size_t offx,
int incx,
cl_mem scratchBuff,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/*@}*/
/**
* @defgroup iAMAX iAMAX - Index of max absolute value
* @ingroup BLAS1
*/
/*@{*/
/**
* @brief index of max absolute value in a float array
*
* @param[in] N Number of elements in vector \b X.
* @param[out] iMax Buffer object storing the index of first absolute max.
* The index will be of type unsigned int
* @param[in] offiMax Offset for storing index in the buffer iMax
* Counted in elements.
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] scratchBuff Temprory cl_mem object to store intermediate results
It should be able to hold minimum of (2*N) elements
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasNotInitialized if clblasSetup() was not called;
* - \b clblasInvalidValue if invalid parameters are passed:
* - \b N is zero, or
* - either \b incx is zero, or
* - the vector sizes along with the increments lead to
* accessing outside of any of the buffers;
* - \b clblasInvalidMemObject if any of \b iMax \b X or \b scratchBuff object is
* Invalid, or an image object rather than the buffer one;
* - \b clblasOutOfHostMemory if the library can't allocate memory for
* internal structures;
* - \b clblasInvalidCommandQueue if the passed command queue is invalid;
* - \b clblasInvalidContext if the context, the passed command queue belongs
* to was released;
* - \b clblasInvalidOperation if kernel compilation relating to a previous
* call has not completed for any of the target devices;
* - \b clblasCompilerNotAvailable if a compiler is not available;
* - \b clblasBuildProgramFailure if there is a failure to build a program
* executable.
*
* @ingroup iAMAX
*/
clblasStatus
clblasiSamax(
size_t N,
cl_mem iMax,
size_t offiMax,
const cl_mem X,
size_t offx,
int incx,
cl_mem scratchBuff,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @example example_isamax.c
* Example of how to use the @ref clblasiSamax function.
*/
/**
* @brief index of max absolute value in a double array
*
* @param[in] N Number of elements in vector \b X.
* @param[out] iMax Buffer object storing the index of first absolute max.
* The index will be of type unsigned int
* @param[in] offiMax Offset for storing index in the buffer iMax
* Counted in elements.
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] scratchBuff Temprory cl_mem object to store intermediate results
It should be able to hold minimum of (2*N) elements
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support the
* floating point arithmetic with double precision;
* - the same error codes as the clblasiSamax() function otherwise.
*
* @ingroup iAMAX
*/
clblasStatus
clblasiDamax(
size_t N,
cl_mem iMax,
size_t offiMax,
const cl_mem X,
size_t offx,
int incx,
cl_mem scratchBuff,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief index of max absolute value in a complex float array
*
* @param[in] N Number of elements in vector \b X.
* @param[out] iMax Buffer object storing the index of first absolute max.
* The index will be of type unsigned int
* @param[in] offiMax Offset for storing index in the buffer iMax
* Counted in elements.
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] scratchBuff Temprory cl_mem object to store intermediate results
It should be able to hold minimum of (2*N) elements
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - the same error codes as the clblasiSamax() function otherwise.
*
* @ingroup iAMAX
*/
clblasStatus
clblasiCamax(
size_t N,
cl_mem iMax,
size_t offiMax,
const cl_mem X,
size_t offx,
int incx,
cl_mem scratchBuff,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief index of max absolute value in a complex double array
*
* @param[in] N Number of elements in vector \b X.
* @param[out] iMax Buffer object storing the index of first absolute max.
* The index will be of type unsigned int
* @param[in] offiMax Offset for storing index in the buffer iMax
* Counted in elements.
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] scratchBuff Temprory cl_mem object to store intermediate results
It should be able to hold minimum of (2*N) elements
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support the
* floating point arithmetic with double precision;
* - the same error codes as the clblasiSamax() function otherwise.
*
* @ingroup iAMAX
*/
clblasStatus
clblasiZamax(
size_t N,
cl_mem iMax,
size_t offiMax,
const cl_mem X,
size_t offx,
int incx,
cl_mem scratchBuff,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/*@}*/
/**
* @defgroup ASUM ASUM - Sum of absolute values
* @ingroup BLAS1
*/
/*@{*/
/**
* @brief absolute sum of values of a vector containing float elements
*
* @param[in] N Number of elements in vector \b X.
* @param[out] asum Buffer object that will contain the absoule sum value
* @param[in] offAsum Offset to absolute sum in \b asum buffer object.
* Counted in elements.
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] scratchBuff Temporary cl_mem scratch buffer object of minimum size N
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasNotInitialized if clblasSetup() was not called;
* - \b clblasInvalidValue if invalid parameters are passed:
* - \b N is zero, or
* - either \b incx is zero, or
* - the vector sizes along with the increments lead to
* accessing outside of any of the buffers;
* - \b clblasInvalidMemObject if any of \b X or \b asum or \b scratchBuff object is
* Invalid, or an image object rather than the buffer one;
* - \b clblasOutOfHostMemory if the library can't allocate memory for
* internal structures;
* - \b clblasInvalidCommandQueue if the passed command queue is invalid;
* - \b clblasInvalidContext if a context a passed command queue belongs
* to was released;
* - \b clblasInvalidOperation if kernel compilation relating to a previous
* call has not completed for any of the target devices;
* - \b clblasCompilerNotAvailable if a compiler is not available;
* - \b clblasBuildProgramFailure if there is a failure to build a program
* executable.
*
* @ingroup ASUM
*/
clblasStatus
clblasSasum(
size_t N,
cl_mem asum,
size_t offAsum,
const cl_mem X,
size_t offx,
int incx,
cl_mem scratchBuff,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @example example_sasum.c
* Example of how to use the @ref clblasSasum function.
*/
/**
* @brief absolute sum of values of a vector containing double elements
*
* @param[in] N Number of elements in vector \b X.
* @param[out] asum Buffer object that will contain the absoulte sum value
* @param[in] offAsum Offset to absoule sum in \b asum buffer object.
* Counted in elements.
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] scratchBuff Temporary cl_mem scratch buffer object of minimum size N
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support the
* floating point arithmetic with double precision;
* - the same error codes as the clblasSasum() function otherwise.
*
* @ingroup ASUM
*/
clblasStatus
clblasDasum(
size_t N,
cl_mem asum,
size_t offAsum,
const cl_mem X,
size_t offx,
int incx,
cl_mem scratchBuff,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief absolute sum of values of a vector containing float-complex elements
*
* @param[in] N Number of elements in vector \b X.
* @param[out] asum Buffer object that will contain the absolute sum value
* @param[in] offAsum Offset to absolute sum in \b asum buffer object.
* Counted in elements.
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] scratchBuff Temporary cl_mem scratch buffer object of minimum size N
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - the same error codes as the clblasSasum() function otherwise.
*
* @ingroup ASUM
*/
clblasStatus
clblasScasum(
size_t N,
cl_mem asum,
size_t offAsum,
const cl_mem X,
size_t offx,
int incx,
cl_mem scratchBuff,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief absolute sum of values of a vector containing double-complex elements
*
* @param[in] N Number of elements in vector \b X.
* @param[out] asum Buffer object that will contain the absolute sum value
* @param[in] offAsum Offset to absolute sum in \b asum buffer object.
* Counted in elements.
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] scratchBuff Temporary cl_mem scratch buffer object of minimum size N
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support the
* floating point arithmetic with double precision;
* - the same error codes as the clblasSasum() function otherwise.
*
* @ingroup ASUM
*/
clblasStatus
clblasDzasum(
size_t N,
cl_mem asum,
size_t offAsum,
const cl_mem X,
size_t offx,
int incx,
cl_mem scratchBuff,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/*@}*/
/**
* @defgroup BLAS2 BLAS-2 functions
*
* The Level 2 Basic Linear Algebra Subprograms are functions that perform
* matrix-vector operations.
*/
/*@{*/
/*@}*/
/**
* @defgroup GEMV GEMV - General matrix-Vector multiplication
* @ingroup BLAS2
*/
/*@{*/
/**
* @brief Matrix-vector product with a general rectangular matrix and
* float elements. Extended version.
*
* Matrix-vector products:
* - \f$ y \leftarrow \alpha A x + \beta y \f$
* - \f$ y \leftarrow \alpha A^T x + \beta y \f$
*
* @param[in] order Row/column order.
* @param[in] transA How matrix \b A is to be transposed.
* @param[in] M Number of rows in matrix \b A.
* @param[in] N Number of columns in matrix \b A.
* @param[in] alpha The factor of matrix \b A.
* @param[in] A Buffer object storing matrix \b A.
* @param[in] offA Offset of the first element of the matrix \b A in
* the buffer object. Counted in elements.
* @param[in] lda Leading dimension of matrix \b A. It cannot be less
* than \b N when the \b order parameter is set to
* \b clblasRowMajor,\n or less than \b M when the
* parameter is set to \b clblasColumnMajor.
* @param[in] x Buffer object storing vector \b x.
* @param[in] offx Offset of first element of vector \b x in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of \b x. It cannot be zero.
* @param[in] beta The factor of the vector \b y.
* @param[out] y Buffer object storing the vector \b y.
* @param[in] offy Offset of first element of vector \b y in buffer object.
* Counted in elements.
* @param[in] incy Increment for the elements of \b y. It cannot be zero.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidValue if \b offA exceeds the size of \b A buffer
* object;
* - the same error codes as the clblasSgemv() function otherwise.
*
* @ingroup GEMV
*/
clblasStatus
clblasSgemv(
clblasOrder order,
clblasTranspose transA,
size_t M,
size_t N,
cl_float alpha,
const cl_mem A,
size_t offA,
size_t lda,
const cl_mem x,
size_t offx,
int incx,
cl_float beta,
cl_mem y,
size_t offy,
int incy,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @example example_sgemv.c
* This is an example of how to use the @ref clblasSgemvEx function.
*/
/**
* @brief Matrix-vector product with a general rectangular matrix and
* double elements. Extended version.
*
* Matrix-vector products:
* - \f$ y \leftarrow \alpha A x + \beta y \f$
* - \f$ y \leftarrow \alpha A^T x + \beta y \f$
*
* @param[in] order Row/column order.
* @param[in] transA How matrix \b A is to be transposed.
* @param[in] M Number of rows in matrix \b A.
* @param[in] N Number of columns in matrix \b A.
* @param[in] alpha The factor of matrix \b A.
* @param[in] A Buffer object storing matrix \b A.
* @param[in] offA Offset of the first element of \b A in the buffer
* object. Counted in elements.
* @param[in] lda Leading dimension of matrix \b A. For a detailed description,
* see clblasSgemv().
* @param[in] x Buffer object storing vector \b x.
* @param[in] offx Offset of first element of vector \b x in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of \b x. It cannot be zero.
* @param[in] beta The factor of the vector \b y.
* @param[out] y Buffer object storing the vector \b y.
* @param[in] offy Offset of first element of vector \b y in buffer object.
* Counted in elements.
* @param[in] incy Increment for the elements of \b y. It cannot be zero.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support the
* floating point arithmetic with double precision;
* - \b clblasInvalidValue if \b offA exceeds the size of \b A buffer
* object;
* - the same error codes as the clblasSgemv() function otherwise.
*
* @ingroup GEMV
*/
clblasStatus
clblasDgemv(
clblasOrder order,
clblasTranspose transA,
size_t M,
size_t N,
cl_double alpha,
const cl_mem A,
size_t offA,
size_t lda,
const cl_mem x,
size_t offx,
int incx,
cl_double beta,
cl_mem y,
size_t offy,
int incy,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief Matrix-vector product with a general rectangular matrix and
* float complex elements. Extended version.
*
* Matrix-vector products:
* - \f$ y \leftarrow \alpha A x + \beta y \f$
* - \f$ y \leftarrow \alpha A^T x + \beta y \f$
*
* @param[in] order Row/column order.
* @param[in] transA How matrix \b A is to be transposed.
* @param[in] M Number of rows in matrix \b A.
* @param[in] N Number of columns in matrix \b A.
* @param[in] alpha The factor of matrix \b A.
* @param[in] A Buffer object storing matrix \b A.
* @param[in] offA Offset of the first element of the matrix \b A in
* the buffer object. Counted in elements
* @param[in] lda Leading dimension of matrix \b A. For a detailed description,
* see clblasSgemv().
* @param[in] x Buffer object storing vector \b x.
* @param[in] offx Offset of first element of vector \b x in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of \b x. It cannot be zero.
* @param[in] beta The factor of the vector \b y.
* @param[out] y Buffer object storing the vector \b y.
* @param[in] offy Offset of first element of vector \b y in buffer object.
* Counted in elements.
* @param[in] incy Increment for the elements of \b y. It cannot be zero.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidValue if \b offA exceeds the size of \b A buffer
* object;
* - the same error codes as the clblasSgemv() function otherwise.
*
* @ingroup GEMV
*/
clblasStatus
clblasCgemv(
clblasOrder order,
clblasTranspose transA,
size_t M,
size_t N,
FloatComplex alpha,
const cl_mem A,
size_t offA,
size_t lda,
const cl_mem x,
size_t offx,
int incx,
FloatComplex beta,
cl_mem y,
size_t offy,
int incy,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief Matrix-vector product with a general rectangular matrix and
* double complex elements. Extended version.
*
* Matrix-vector products:
* - \f$ y \leftarrow \alpha A x + \beta y \f$
* - \f$ y \leftarrow \alpha A^T x + \beta y \f$
*
* @param[in] order Row/column order.
* @param[in] transA How matrix \b A is to be transposed.
* @param[in] M Number of rows in matrix \b A.
* @param[in] N Number of columns in matrix \b A.
* @param[in] alpha The factor of matrix \b A.
* @param[in] A Buffer object storing matrix \b A.
* @param[in] offA Offset of the first element of the matrix \b A in
* the buffer object. Counted in elements.
* @param[in] lda Leading dimension of matrix \b A. For a detailed description,
* see clblasSgemv().
* @param[in] x Buffer object storing vector \b x.
* @param[in] offx Offset of first element of vector \b x in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of \b x. It cannot be zero.
* @param[in] beta The factor of the vector \b y.
* @param[out] y Buffer object storing the vector \b y.
* @param[in] offy Offset of first element of vector \b y in buffer object.
* Counted in elements.
* @param[in] incy Increment for the elements of \b y. It cannot be zero.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support the
* floating point arithmetic with double precision;
* - \b clblasInvalidValue if \b offA exceeds the size of \b A buffer
* object;
* - the same error codes as the clblasSgemv() function otherwise.
*
* @ingroup GEMV
*/
clblasStatus
clblasZgemv(
clblasOrder order,
clblasTranspose transA,
size_t M,
size_t N,
DoubleComplex alpha,
const cl_mem A,
size_t offA,
size_t lda,
const cl_mem x,
size_t offx,
int incx,
DoubleComplex beta,
cl_mem y,
size_t offy,
int incy,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/*@}*/
/**
* @defgroup SYMV SYMV - Symmetric matrix-Vector multiplication
* @ingroup BLAS2
*/
/*@{*/
/**
* @brief Matrix-vector product with a symmetric matrix and float elements.
*
*
* Matrix-vector products:
* - \f$ y \leftarrow \alpha A x + \beta y \f$
*
* @param[in] order Row/columns order.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] N Number of rows and columns in matrix \b A.
* @param[in] alpha The factor of matrix \b A.
* @param[in] A Buffer object storing matrix \b A.
* @param[in] offA Offset of the first element of the matrix \b A in
* the buffer object. Counted in elements.
* @param[in] lda Leading dimension of matrix \b A. It cannot less
* than \b N.
* @param[in] x Buffer object storing vector \b x.
* @param[in] offx Offset of first element of vector \b x in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of vector \b x. It cannot be zero.
* @param[in] beta The factor of vector \b y.
* @param[out] y Buffer object storing vector \b y.
* @param[in] offy Offset of first element of vector \b y in buffer object.
* Counted in elements.
* @param[in] incy Increment for the elements of vector \b y. It cannot be zero.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidValue if \b offA exceeds the size of \b A buffer
* object;
* - the same error codes as the clblasSgemv() function otherwise.
*
* @ingroup SYMV
*/
clblasStatus
clblasSsymv(
clblasOrder order,
clblasUplo uplo,
size_t N,
cl_float alpha,
const cl_mem A,
size_t offA,
size_t lda,
const cl_mem x,
size_t offx,
int incx,
cl_float beta,
cl_mem y,
size_t offy,
int incy,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @example example_ssymv.c
* This is an example of how to use the @ref clblasSsymv function.
*/
/**
* @brief Matrix-vector product with a symmetric matrix and double elements.
*
*
* Matrix-vector products:
* - \f$ y \leftarrow \alpha A x + \beta y \f$
*
* @param[in] order Row/columns order.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] N Number of rows and columns in matrix \b A.
* @param[in] alpha The factor of matrix \b A.
* @param[in] A Buffer object storing matrix \b A.
* @param[in] offA Offset of the first element of the matrix \b A in
* the buffer object. Counted in elements.
* @param[in] lda Leading dimension of matrix \b A. It cannot less
* than \b N.
* @param[in] x Buffer object storing vector \b x.
* @param[in] offx Offset of first element of vector \b x in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of vector \b x. It cannot be zero.
* @param[in] beta The factor of vector \b y.
* @param[out] y Buffer object storing vector \b y.
* @param[in] offy Offset of first element of vector \b y in buffer object.
* Counted in elements.
* @param[in] incy Increment for the elements of vector \b y. It cannot be zero.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support floating
* point arithmetic with double precision;
* - \b clblasInvalidValue if \b offA exceeds the size of \b A buffer
* object;
* - the same error codes as the clblasSsymv() function otherwise.
*
* @ingroup SYMV
*/
clblasStatus
clblasDsymv(
clblasOrder order,
clblasUplo uplo,
size_t N,
cl_double alpha,
const cl_mem A,
size_t offA,
size_t lda,
const cl_mem x,
size_t offx,
int incx,
cl_double beta,
cl_mem y,
size_t offy,
int incy,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/*@}*/
/**
* @defgroup HEMV HEMV - Hermitian matrix-vector multiplication
* @ingroup BLAS2
*/
/*@{*/
/**
* @brief Matrix-vector product with a hermitian matrix and float-complex elements.
*
* Matrix-vector products:
* - \f$ Y \leftarrow \alpha A X + \beta Y \f$
*
* @param[in] order Row/columns order.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] N Number of rows and columns in matrix \b A.
* @param[in] alpha The factor of matrix \b A.
* @param[in] A Buffer object storing matrix \b A.
* @param[in] offa Offset in number of elements for first element in matrix \b A.
* @param[in] lda Leading dimension of matrix \b A. It cannot less
* than \b N.
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of vector \b X. It cannot be zero.
* @param[in] beta The factor of vector \b Y.
* @param[out] Y Buffer object storing vector \b Y.
* @param[in] offy Offset of first element of vector \b Y in buffer object.
* Counted in elements.
* @param[in] incy Increment for the elements of vector \b Y. It cannot be zero.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasNotInitialized if clblasSetup() was not called;
* - \b clblasInvalidValue if invalid parameters are passed:
* - \b N is zero, or
* - either \b incx or \b incy is zero, or
* - any of the leading dimensions is invalid;
* - the matrix sizes or the vector sizes along with the increments lead to
* accessing outsize of any of the buffers;
* - \b clblasInvalidMemObject if either \b A, \b X, or \b Y object is
* invalid, or an image object rather than the buffer one;
* - \b clblasOutOfHostMemory if the library can't allocate memory for
* internal structures;
* - \b clblasInvalidCommandQueue if the passed command queue is invalid;
* - \b clblasInvalidContext if a context a passed command queue belongs to
* was released;
* - \b clblasInvalidOperation if kernel compilation relating to a previous
* call has not completed for any of the target devices;
* - \b clblasCompilerNotAvailable if a compiler is not available;
* - \b clblasBuildProgramFailure if there is a failure to build a program
* executable.
*
* @ingroup HEMV
*/
clblasStatus
clblasChemv(
clblasOrder order,
clblasUplo uplo,
size_t N,
FloatComplex alpha,
const cl_mem A,
size_t offa,
size_t lda,
const cl_mem X,
size_t offx,
int incx,
FloatComplex beta,
cl_mem Y,
size_t offy,
int incy,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief Matrix-vector product with a hermitian matrix and double-complex elements.
*
* Matrix-vector products:
* - \f$ Y \leftarrow \alpha A X + \beta Y \f$
*
* @param[in] order Row/columns order.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] N Number of rows and columns in matrix \b A.
* @param[in] alpha The factor of matrix \b A.
* @param[in] A Buffer object storing matrix \b A.
* @param[in] offa Offset in number of elements for first element in matrix \b A.
* @param[in] lda Leading dimension of matrix \b A. It cannot less
* than \b N.
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of vector \b X. It cannot be zero.
* @param[in] beta The factor of vector \b Y.
* @param[out] Y Buffer object storing vector \b Y.
* @param[in] offy Offset of first element of vector \b Y in buffer object.
* Counted in elements.
* @param[in] incy Increment for the elements of vector \b Y. It cannot be zero.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support floating
* point arithmetic with double precision;
* - the same error codes as the clblasChemv() function otherwise.
*
* @ingroup HEMV
*/
clblasStatus
clblasZhemv(
clblasOrder order,
clblasUplo uplo,
size_t N,
DoubleComplex alpha,
const cl_mem A,
size_t offa,
size_t lda,
const cl_mem X,
size_t offx,
int incx,
DoubleComplex beta,
cl_mem Y,
size_t offy,
int incy,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @example example_zhemv.cpp
* Example of how to use the @ref clblasZhemv function.
*/
/*@}*/
/**
* @defgroup TRMV TRMV - Triangular matrix vector multiply
* @ingroup BLAS2
*/
/*@{*/
/**
* @brief Matrix-vector product with a triangular matrix and
* float elements.
*
* Matrix-vector products:
* - \f$ X \leftarrow A X \f$
* - \f$ X \leftarrow A^T X \f$
*
* @param[in] order Row/column order.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] trans How matrix \b A is to be transposed.
* @param[in] diag Specify whether matrix \b A is unit triangular.
* @param[in] N Number of rows/columns in matrix \b A.
* @param[in] A Buffer object storing matrix \b A.
* @param[in] offa Offset in number of elements for first element in matrix \b A.
* @param[in] lda Leading dimension of matrix \b A. It cannot be less
* than \b N
* @param[out] X Buffer object storing vector \b X.
* @param[in] offx Offset in number of elements for first element in vector \b X.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] scratchBuff Temporary cl_mem scratch buffer object which can hold a
* minimum of (1 + (N-1)*abs(incx)) elements
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasNotInitialized if clblasSetup() was not called;
* - \b clblasInvalidValue if invalid parameters are passed:
* - either \b N or \b incx is zero, or
* - the leading dimension is invalid;
* - \b clblasInvalidMemObject if either \b A or \b X object is
* Invalid, or an image object rather than the buffer one;
* - \b clblasOutOfHostMemory if the library can't allocate memory for
* internal structures;
* - \b clblasInvalidCommandQueue if the passed command queue is invalid;
* - \b clblasInvalidContext if a context a passed command queue belongs
* to was released;
* - \b clblasInvalidOperation if kernel compilation relating to a previous
* call has not completed for any of the target devices;
* - \b clblasCompilerNotAvailable if a compiler is not available;
* - \b clblasBuildProgramFailure if there is a failure to build a program
* executable.
*
* @ingroup TRMV
*/
clblasStatus
clblasStrmv(
clblasOrder order,
clblasUplo uplo,
clblasTranspose trans,
clblasDiag diag,
size_t N,
const cl_mem A,
size_t offa,
size_t lda,
cl_mem X,
size_t offx,
int incx,
cl_mem scratchBuff,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @example example_strmv.c
* Example of how to use the @ref clblasStrmv function.
*/
/**
* @brief Matrix-vector product with a triangular matrix and
* double elements.
*
* Matrix-vector products:
* - \f$ X \leftarrow A X \f$
* - \f$ X \leftarrow A^T X \f$
*
* @param[in] order Row/column order.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] trans How matrix \b A is to be transposed.
* @param[in] diag Specify whether matrix \b A is unit triangular.
* @param[in] N Number of rows/columns in matrix \b A.
* @param[in] A Buffer object storing matrix \b A.
* @param[in] offa Offset in number of elements for first element in matrix \b A.
* @param[in] lda Leading dimension of matrix \b A. It cannot be less
* than \b N
* @param[out] X Buffer object storing vector \b X.
* @param[in] offx Offset in number of elements for first element in vector \b X.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] scratchBuff Temporary cl_mem scratch buffer object which can hold a
* minimum of (1 + (N-1)*abs(incx)) elements
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support floating
* point arithmetic with double precision;
* - the same error codes as the clblasStrmv() function otherwise.
*
* @ingroup TRMV
*/
clblasStatus
clblasDtrmv(
clblasOrder order,
clblasUplo uplo,
clblasTranspose trans,
clblasDiag diag,
size_t N,
const cl_mem A,
size_t offa,
size_t lda,
cl_mem X,
size_t offx,
int incx,
cl_mem scratchBuff,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief Matrix-vector product with a triangular matrix and
* float complex elements.
*
* Matrix-vector products:
* - \f$ X \leftarrow A X \f$
* - \f$ X \leftarrow A^T X \f$
*
* @param[in] order Row/column order.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] trans How matrix \b A is to be transposed.
* @param[in] diag Specify whether matrix \b A is unit triangular.
* @param[in] N Number of rows/columns in matrix \b A.
* @param[in] A Buffer object storing matrix \b A.
* @param[in] offa Offset in number of elements for first element in matrix \b A.
* @param[in] lda Leading dimension of matrix \b A. It cannot be less
* than \b N
* @param[out] X Buffer object storing vector \b X.
* @param[in] offx Offset in number of elements for first element in vector \b X.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] scratchBuff Temporary cl_mem scratch buffer object which can hold a
* minimum of (1 + (N-1)*abs(incx)) elements
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return The same result as the clblasStrmv() function.
* @ingroup TRMV
*/
clblasStatus
clblasCtrmv(
clblasOrder order,
clblasUplo uplo,
clblasTranspose trans,
clblasDiag diag,
size_t N,
const cl_mem A,
size_t offa,
size_t lda,
cl_mem X,
size_t offx,
int incx,
cl_mem scratchBuff,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief Matrix-vector product with a triangular matrix and
* double complex elements.
*
* Matrix-vector products:
* - \f$ X \leftarrow A X \f$
* - \f$ X \leftarrow A^T X \f$
*
* @param[in] order Row/column order.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] trans How matrix \b A is to be transposed.
* @param[in] diag Specify whether matrix \b A is unit triangular.
* @param[in] N Number of rows/columns in matrix \b A.
* @param[in] A Buffer object storing matrix \b A.
* @param[in] offa Offset in number of elements for first element in matrix \b A.
* @param[in] lda Leading dimension of matrix \b A. It cannot be less
* than \b N
* @param[out] X Buffer object storing vector \b X.
* @param[in] offx Offset in number of elements for first element in vector \b X.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] scratchBuff Temporary cl_mem scratch buffer object which can hold a
* minimum of (1 + (N-1)*abs(incx)) elements
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return The same result as the clblasDtrmv() function.
* @ingroup TRMV
*/
clblasStatus
clblasZtrmv(
clblasOrder order,
clblasUplo uplo,
clblasTranspose trans,
clblasDiag diag,
size_t N,
const cl_mem A,
size_t offa,
size_t lda,
cl_mem X,
size_t offx,
int incx,
cl_mem scratchBuff,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/*@}*/
/**
* @defgroup TRSV TRSV - Triangular matrix vector Solve
* @ingroup BLAS2
*/
/*@{*/
/**
* @brief solving triangular matrix problems with float elements.
*
* Matrix-vector products:
* - \f$ A X \leftarrow X \f$
* - \f$ A^T X \leftarrow X \f$
*
* @param[in] order Row/column order.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] trans How matrix \b A is to be transposed.
* @param[in] diag Specify whether matrix \b A is unit triangular.
* @param[in] N Number of rows/columns in matrix \b A.
* @param[in] A Buffer object storing matrix \b A.
* @param[in] offa Offset in number of elements for first element in matrix \b A.
* @param[in] lda Leading dimension of matrix \b A. It cannot be less
* than \b N
* @param[out] X Buffer object storing vector \b X.
* @param[in] offx Offset in number of elements for first element in vector \b X.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasNotInitialized if clblasSetup() was not called;
* - \b clblasInvalidValue if invalid parameters are passed:
* - either \b N or \b incx is zero, or
* - the leading dimension is invalid;
* - \b clblasInvalidMemObject if either \b A or \b X object is
* Invalid, or an image object rather than the buffer one;
* - \b clblasOutOfHostMemory if the library can't allocate memory for
* internal structures;
* - \b clblasInvalidCommandQueue if the passed command queue is invalid;
* - \b clblasInvalidContext if a context a passed command queue belongs
* to was released;
* - \b clblasInvalidOperation if kernel compilation relating to a previous
* call has not completed for any of the target devices;
* - \b clblasCompilerNotAvailable if a compiler is not available;
* - \b clblasBuildProgramFailure if there is a failure to build a program
* executable.
*
* @ingroup TRSV
*/
clblasStatus
clblasStrsv(
clblasOrder order,
clblasUplo uplo,
clblasTranspose trans,
clblasDiag diag,
size_t N,
const cl_mem A,
size_t offa,
size_t lda,
cl_mem X,
size_t offx,
int incx,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @example example_strsv.c
* Example of how to use the @ref clblasStrsv function.
*/
/**
* @brief solving triangular matrix problems with double elements.
*
* Matrix-vector products:
* - \f$ A X \leftarrow X \f$
* - \f$ A^T X \leftarrow X \f$
*
* @param[in] order Row/column order.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] trans How matrix \b A is to be transposed.
* @param[in] diag Specify whether matrix \b A is unit triangular.
* @param[in] N Number of rows/columns in matrix \b A.
* @param[in] A Buffer object storing matrix \b A.
* @param[in] offa Offset in number of elements for first element in matrix \b A.
* @param[in] lda Leading dimension of matrix \b A. It cannot be less
* than \b N
* @param[out] X Buffer object storing vector \b X.
* @param[in] offx Offset in number of elements for first element in vector \b X.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support floating
* point arithmetic with double precision;
* - the same error codes as the clblasStrsv() function otherwise.
*
* @ingroup TRSV
*/
clblasStatus
clblasDtrsv(
clblasOrder order,
clblasUplo uplo,
clblasTranspose trans,
clblasDiag diag,
size_t N,
const cl_mem A,
size_t offa,
size_t lda,
cl_mem X,
size_t offx,
int incx,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief solving triangular matrix problems with float-complex elements.
*
* Matrix-vector products:
* - \f$ A X \leftarrow X \f$
* - \f$ A^T X \leftarrow X \f$
*
* @param[in] order Row/column order.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] trans How matrix \b A is to be transposed.
* @param[in] diag Specify whether matrix \b A is unit triangular.
* @param[in] N Number of rows/columns in matrix \b A.
* @param[in] A Buffer object storing matrix \b A.
* @param[in] offa Offset in number of elements for first element in matrix \b A.
* @param[in] lda Leading dimension of matrix \b A. It cannot be less
* than \b N
* @param[out] X Buffer object storing vector \b X.
* @param[in] offx Offset in number of elements for first element in vector \b X.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return The same result as the clblasStrsv() function.
*
* @ingroup TRSV
*/
clblasStatus
clblasCtrsv(
clblasOrder order,
clblasUplo uplo,
clblasTranspose trans,
clblasDiag diag,
size_t N,
const cl_mem A,
size_t offa,
size_t lda,
cl_mem X,
size_t offx,
int incx,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief solving triangular matrix problems with double-complex elements.
*
* Matrix-vector products:
* - \f$ A X \leftarrow X \f$
* - \f$ A^T X \leftarrow X \f$
*
* @param[in] order Row/column order.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] trans How matrix \b A is to be transposed.
* @param[in] diag Specify whether matrix \b A is unit triangular.
* @param[in] N Number of rows/columns in matrix \b A.
* @param[in] A Buffer object storing matrix \b A.
* @param[in] offa Offset in number of elements for first element in matrix \b A.
* @param[in] lda Leading dimension of matrix \b A. It cannot be less
* than \b N
* @param[out] X Buffer object storing vector \b X.
* @param[in] offx Offset in number of elements for first element in vector \b X.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return The same result as the clblasDtrsv() function.
*
* @ingroup TRSV
*/
clblasStatus
clblasZtrsv(
clblasOrder order,
clblasUplo uplo,
clblasTranspose trans,
clblasDiag diag,
size_t N,
const cl_mem A,
size_t offa,
size_t lda,
cl_mem X,
size_t offx,
int incx,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/*@}*/
/**
* @defgroup GER GER - General matrix rank 1 operation
* @ingroup BLAS2
*/
/*@{*/
/**
* @brief vector-vector product with float elements and
* performs the rank 1 operation A
*
* Vector-vector products:
* - \f$ A \leftarrow \alpha X Y^T + A \f$
*
* @param[in] order Row/column order.
* @param[in] M Number of rows in matrix \b A.
* @param[in] N Number of columns in matrix \b A.
* @param[in] alpha specifies the scalar alpha.
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset in number of elements for the first element in vector \b X.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] Y Buffer object storing vector \b Y.
* @param[in] offy Offset in number of elements for the first element in vector \b Y.
* @param[in] incy Increment for the elements of \b Y. Must not be zero.
* @param[out] A Buffer object storing matrix \b A. On exit, A is
* overwritten by the updated matrix.
* @param[in] offa Offset in number of elements for the first element in matrix \b A.
* @param[in] lda Leading dimension of matrix \b A. It cannot be less
* than \b N when the \b order parameter is set to
* \b clblasRowMajor,\n or less than \b M when the
* parameter is set to \b clblasColumnMajor.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasNotInitialized if clblasSetup() was not called;
* - \b clblasInvalidValue if invalid parameters are passed:
* - \b M, \b N or
* - either \b incx or \b incy is zero, or
* - a leading dimension is invalid;
* - \b clblasInvalidMemObject if A, X, or Y object is invalid,
* or an image object rather than the buffer one;
* - \b clblasOutOfResources if you use image-based function implementation
* and no suitable scratch image available;
* - \b clblasOutOfHostMemory if the library can't allocate memory for
* internal structures;
* - \b clblasInvalidCommandQueue if the passed command queue is invalid;
* - \b clblasInvalidContext if a context a passed command queue belongs to
* was released;
* - \b clblasInvalidOperation if kernel compilation relating to a previous
* call has not completed for any of the target devices;
* - \b clblasCompilerNotAvailable if a compiler is not available;
* - \b clblasBuildProgramFailure if there is a failure to build a program
* executable.
*
* @ingroup GER
*/
clblasStatus
clblasSger(
clblasOrder order,
size_t M,
size_t N,
cl_float alpha,
const cl_mem X,
size_t offx,
int incx,
const cl_mem Y,
size_t offy,
int incy,
cl_mem A,
size_t offa,
size_t lda,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @example example_sger.c
* Example of how to use the @ref clblasSger function.
*/
/**
* @brief vector-vector product with double elements and
* performs the rank 1 operation A
*
* Vector-vector products:
* - \f$ A \leftarrow \alpha X Y^T + A \f$
*
* @param[in] order Row/column order.
* @param[in] M Number of rows in matrix \b A.
* @param[in] N Number of columns in matrix \b A.
* @param[in] alpha specifies the scalar alpha.
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset in number of elements for the first element in vector \b X.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] Y Buffer object storing vector \b Y.
* @param[in] offy Offset in number of elements for the first element in vector \b Y.
* @param[in] incy Increment for the elements of \b Y. Must not be zero.
* @param[out] A Buffer object storing matrix \b A. On exit, A is
* overwritten by the updated matrix.
* @param[in] offa Offset in number of elements for the first element in matrix \b A.
* @param[in] lda Leading dimension of matrix \b A. It cannot be less
* than \b N when the \b order parameter is set to
* \b clblasRowMajor,\n or less than \b M when the
* parameter is set to \b clblasColumnMajor.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support floating
* point arithmetic with double precision;
* - the same error codes as the clblasSger() function otherwise.
*
* @ingroup GER
*/
clblasStatus
clblasDger(
clblasOrder order,
size_t M,
size_t N,
cl_double alpha,
const cl_mem X,
size_t offx,
int incx,
const cl_mem Y,
size_t offy,
int incy,
cl_mem A,
size_t offa,
size_t lda,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/*@}*/
/**
* @defgroup GERU GERU - General matrix rank 1 operation
* @ingroup BLAS2
*/
/*@{*/
/**
* @brief vector-vector product with float complex elements and
* performs the rank 1 operation A
*
* Vector-vector products:
* - \f$ A \leftarrow \alpha X Y^T + A \f$
*
* @param[in] order Row/column order.
* @param[in] M Number of rows in matrix \b A.
* @param[in] N Number of columns in matrix \b A.
* @param[in] alpha specifies the scalar alpha.
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset in number of elements for the first element in vector \b X.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] Y Buffer object storing vector \b Y.
* @param[in] offy Offset in number of elements for the first element in vector \b Y.
* @param[in] incy Increment for the elements of \b Y. Must not be zero.
* @param[out] A Buffer object storing matrix \b A. On exit, A is
* overwritten by the updated matrix.
* @param[in] offa Offset in number of elements for the first element in matrix \b A.
* @param[in] lda Leading dimension of matrix \b A. It cannot be less
* than \b N when the \b order parameter is set to
* \b clblasRowMajor,\n or less than \b M when the
* parameter is set to \b clblasColumnMajor.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasNotInitialized if clblasSetup() was not called;
* - \b clblasInvalidValue if invalid parameters are passed:
* - \b M, \b N or
* - either \b incx or \b incy is zero, or
* - a leading dimension is invalid;
* - \b clblasInvalidMemObject if A, X, or Y object is invalid,
* or an image object rather than the buffer one;
* - \b clblasOutOfResources if you use image-based function implementation
* and no suitable scratch image available;
* - \b clblasOutOfHostMemory if the library can't allocate memory for
* internal structures;
* - \b clblasInvalidCommandQueue if the passed command queue is invalid;
* - \b clblasInvalidContext if a context a passed command queue belongs to
* was released;
* - \b clblasInvalidOperation if kernel compilation relating to a previous
* call has not completed for any of the target devices;
* - \b clblasCompilerNotAvailable if a compiler is not available;
* - \b clblasBuildProgramFailure if there is a failure to build a program
* executable.
*
* @ingroup GERU
*/
clblasStatus
clblasCgeru(
clblasOrder order,
size_t M,
size_t N,
cl_float2 alpha,
const cl_mem X,
size_t offx,
int incx,
const cl_mem Y,
size_t offy,
int incy,
cl_mem A ,
size_t offa,
size_t lda,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief vector-vector product with double complex elements and
* performs the rank 1 operation A
*
* Vector-vector products:
* - \f$ A \leftarrow \alpha X Y^T + A \f$
*
* @param[in] order Row/column order.
* @param[in] M Number of rows in matrix \b A.
* @param[in] N Number of columns in matrix \b A.
* @param[in] alpha specifies the scalar alpha.
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset in number of elements for the first element in vector \b X.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] Y Buffer object storing vector \b Y.
* @param[in] offy Offset in number of elements for the first element in vector \b Y.
* @param[in] incy Increment for the elements of \b Y. Must not be zero.
* @param[out] A Buffer object storing matrix \b A. On exit, A is
* overwritten by the updated matrix.
* @param[in] offa Offset in number of elements for the first element in matrix \b A.
* @param[in] lda Leading dimension of matrix \b A. It cannot be less
* than \b N when the \b order parameter is set to
* \b clblasRowMajor,\n or less than \b M when the
* parameter is set to \b clblasColumnMajor.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support floating
* point arithmetic with double precision;
* - the same error codes as the clblasCgeru() function otherwise.
*
* @ingroup GERU
*/
clblasStatus
clblasZgeru(
clblasOrder order,
size_t M,
size_t N,
cl_double2 alpha,
const cl_mem X,
size_t offx,
int incx,
const cl_mem Y,
size_t offy,
int incy,
cl_mem A,
size_t offa,
size_t lda,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/*@}*/
/**
* @defgroup GERC GERC - General matrix rank 1 operation
* @ingroup BLAS2
*/
/*@{*/
/**
* @brief vector-vector product with float complex elements and
* performs the rank 1 operation A
*
* Vector-vector products:
* - \f$ A \leftarrow \alpha X Y^H + A \f$
*
* @param[in] order Row/column order.
* @param[in] M Number of rows in matrix \b A.
* @param[in] N Number of columns in matrix \b A.
* @param[in] alpha specifies the scalar alpha.
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset in number of elements for the first element in vector \b X.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] Y Buffer object storing vector \b Y.
* @param[in] offy Offset in number of elements for the first element in vector \b Y.
* @param[in] incy Increment for the elements of \b Y. Must not be zero.
* @param[out] A Buffer object storing matrix \b A. On exit, A is
* overwritten by the updated matrix.
* @param[in] offa Offset in number of elements for the first element in matrix \b A.
* @param[in] lda Leading dimension of matrix \b A. It cannot be less
* than \b N when the \b order parameter is set to
* \b clblasRowMajor,\n or less than \b M when the
* parameter is set to \b clblasColumnMajor.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasNotInitialized if clblasSetup() was not called;
* - \b clblasInvalidValue if invalid parameters are passed:
* - \b M, \b N or
* - either \b incx or \b incy is zero, or
* - a leading dimension is invalid;
* - \b clblasInvalidMemObject if A, X, or Y object is invalid,
* or an image object rather than the buffer one;
* - \b clblasOutOfResources if you use image-based function implementation
* and no suitable scratch image available;
* - \b clblasOutOfHostMemory if the library can't allocate memory for
* internal structures;
* - \b clblasInvalidCommandQueue if the passed command queue is invalid;
* - \b clblasInvalidContext if a context a passed command queue belongs to
* was released;
* - \b clblasInvalidOperation if kernel compilation relating to a previous
* call has not completed for any of the target devices;
* - \b clblasCompilerNotAvailable if a compiler is not available;
* - \b clblasBuildProgramFailure if there is a failure to build a program
* executable.
*
* @ingroup GERC
*/
clblasStatus
clblasCgerc(
clblasOrder order,
size_t M,
size_t N,
cl_float2 alpha,
const cl_mem X,
size_t offx,
int incx,
const cl_mem Y,
size_t offy,
int incy,
cl_mem A ,
size_t offa,
size_t lda,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief vector-vector product with double complex elements and
* performs the rank 1 operation A
*
* Vector-vector products:
* - \f$ A \leftarrow \alpha X Y^H + A \f$
*
* @param[in] order Row/column order.
* @param[in] M Number of rows in matrix \b A.
* @param[in] N Number of columns in matrix \b A.
* @param[in] alpha specifies the scalar alpha.
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset in number of elements for the first element in vector \b X.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] Y Buffer object storing vector \b Y.
* @param[in] offy Offset in number of elements for the first element in vector \b Y.
* @param[in] incy Increment for the elements of \b Y. Must not be zero.
* @param[out] A Buffer object storing matrix \b A. On exit, A is
* overwritten by the updated matrix.
* @param[in] offa Offset in number of elements for the first element in matrix \b A.
* @param[in] lda Leading dimension of matrix \b A. It cannot be less
* than \b N when the \b order parameter is set to
* \b clblasRowMajor,\n or less than \b M when the
* parameter is set to \b clblasColumnMajor.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support floating
* point arithmetic with double precision;
* - the same error codes as the clblasCgerc() function otherwise.
*
* @ingroup GERC
*/
clblasStatus
clblasZgerc(
clblasOrder order,
size_t M,
size_t N,
cl_double2 alpha,
const cl_mem X,
size_t offx,
int incx,
const cl_mem Y,
size_t offy,
int incy,
cl_mem A,
size_t offa,
size_t lda,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/*@}*/
/**
* @defgroup SYR SYR - Symmetric rank 1 update
*
* The Level 2 Basic Linear Algebra Subprograms are functions that perform
* symmetric rank 1 update operations.
* @ingroup BLAS2
*/
/*@{*/
/**
* @brief Symmetric rank 1 operation with a general triangular matrix and
* float elements.
*
* Symmetric rank 1 operation:
* - \f$ A \leftarrow \alpha x x^T + A \f$
*
* @param[in] order Row/column order.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] N Number of columns in matrix \b A.
* @param[in] alpha The factor of matrix \b A.
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[out] A Buffer object storing matrix \b A.
* @param[in] offa Offset of first element of matrix \b A in buffer object.
* @param[in] lda Leading dimension of matrix \b A. It cannot be less
* than \b N.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasNotInitialized if clblasSetup() was not called;
* - \b clblasInvalidValue if invalid parameters are passed:
* - \b N is zero, or
* - either \b incx is zero, or
* - the leading dimension is invalid;
* - \b clblasInvalidMemObject if either \b A, \b X object is
* Invalid, or an image object rather than the buffer one;
* - \b clblasOutOfHostMemory if the library can't allocate memory for
* internal structures;
* - \b clblasInvalidCommandQueue if the passed command queue is invalid;
* - \b clblasInvalidContext if a context a passed command queue belongs
* to was released;
* - \b clblasInvalidOperation if kernel compilation relating to a previous
* call has not completed for any of the target devices;
* - \b clblasCompilerNotAvailable if a compiler is not available;
* - \b clblasBuildProgramFailure if there is a failure to build a program
* executable.
*
* @ingroup SYR
*/
clblasStatus
clblasSsyr(
clblasOrder order,
clblasUplo uplo,
size_t N,
cl_float alpha,
const cl_mem X,
size_t offx,
int incx,
cl_mem A,
size_t offa,
size_t lda,
cl_uint numCommandQueues,
cl_command_queue* commandQueues,
cl_uint numEventsInWaitList,
const cl_event* eventWaitList,
cl_event* events);
/**
* @brief Symmetric rank 1 operation with a general triangular matrix and
* double elements.
*
* Symmetric rank 1 operation:
* - \f$ A \leftarrow \alpha x x^T + A \f$
*
* @param[in] order Row/column order.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] N Number of columns in matrix \b A.
* @param[in] alpha The factor of matrix \b A.
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[out] A Buffer object storing matrix \b A.
* @param[in] offa Offset of first element of matrix \b A in buffer object.
* @param[in] lda Leading dimension of matrix \b A. It cannot be less
* than \b N.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support floating
* point arithmetic with double precision;
* - the same error codes as the clblasSsyr() function otherwise.
*
* @ingroup SYR
*/
clblasStatus
clblasDsyr(
clblasOrder order,
clblasUplo uplo,
size_t N,
cl_double alpha,
const cl_mem X,
size_t offx,
int incx,
cl_mem A,
size_t offa,
size_t lda,
cl_uint numCommandQueues,
cl_command_queue* commandQueues,
cl_uint numEventsInWaitList,
const cl_event* eventWaitList,
cl_event* events);
/*@}*/
/**
* @defgroup HER HER - Hermitian rank 1 operation
*
* The Level 2 Basic Linear Algebra Subprogram functions that perform
* hermitian rank 1 operations.
* @ingroup BLAS2
*/
/*@{*/
/**
* @brief hermitian rank 1 operation with a general triangular matrix and
* float-complex elements.
*
* hermitian rank 1 operation:
* - \f$ A \leftarrow \alpha X X^H + A \f$
*
* @param[in] order Row/column order.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] N Number of columns in matrix \b A.
* @param[in] alpha The factor of matrix \b A (a scalar float value)
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset in number of elements for the first element in vector \b X.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[out] A Buffer object storing matrix \b A.
* @param[in] offa Offset in number of elements for the first element in matrix \b A.
* @param[in] lda Leading dimension of matrix \b A. It cannot be less
* than \b N.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasNotInitialized if clblasSetup() was not called;
* - \b clblasInvalidValue if invalid parameters are passed:
* - \b N is zero, or
* - either \b incx is zero, or
* - the leading dimension is invalid;
* - \b clblasInvalidMemObject if either \b A, \b X object is
* Invalid, or an image object rather than the buffer one;
* - \b clblasOutOfHostMemory if the library can't allocate memory for
* internal structures;
* - \b clblasInvalidCommandQueue if the passed command queue is invalid;
* - \b clblasInvalidContext if a context a passed command queue belongs
* to was released;
* - \b clblasInvalidOperation if kernel compilation relating to a previous
* call has not completed for any of the target devices;
* - \b clblasCompilerNotAvailable if a compiler is not available;
* - \b clblasBuildProgramFailure if there is a failure to build a program
* executable.
*
* @ingroup HER
*/
clblasStatus
clblasCher(
clblasOrder order,
clblasUplo uplo,
size_t N,
cl_float alpha,
const cl_mem X,
size_t offx,
int incx,
cl_mem A,
size_t offa,
size_t lda,
cl_uint numCommandQueues,
cl_command_queue* commandQueues,
cl_uint numEventsInWaitList,
const cl_event* eventWaitList,
cl_event* events);
/**
* @example example_cher.c
* Example of how to use the @ref clblasCher function.
*/
/**
* @brief hermitian rank 1 operation with a general triangular matrix and
* double-complex elements.
*
* hermitian rank 1 operation:
* - \f$ A \leftarrow \alpha X X^H + A \f$
*
* @param[in] order Row/column order.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] N Number of columns in matrix \b A.
* @param[in] alpha The factor of matrix \b A (a scalar double value)
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset in number of elements for the first element in vector \b X.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[out] A Buffer object storing matrix \b A.
* @param[in] offa Offset in number of elements for the first element in matrix \b A.
* @param[in] lda Leading dimension of matrix \b A. It cannot be less
* than \b N.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support floating
* point arithmetic with double precision;
* - the same error codes as the clblasCher() function otherwise.
*
* @ingroup HER
*/
clblasStatus
clblasZher(
clblasOrder order,
clblasUplo uplo,
size_t N,
cl_double alpha,
const cl_mem X,
size_t offx,
int incx,
cl_mem A,
size_t offa,
size_t lda,
cl_uint numCommandQueues,
cl_command_queue* commandQueues,
cl_uint numEventsInWaitList,
const cl_event* eventWaitList,
cl_event* events);
/*@}*/
/**
* @defgroup SYR2 SYR2 - Symmetric rank 2 update
*
* The Level 2 Basic Linear Algebra Subprograms are functions that perform
* symmetric rank 2 update operations.
* @ingroup BLAS2
*/
/*@{*/
/**
* @brief Symmetric rank 2 operation with a general triangular matrix and
* float elements.
*
* Symmetric rank 2 operation:
* - \f$ A \leftarrow \alpha x y^T + \alpha y x^T + A \f$
*
* @param[in] order Row/column order.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] N Number of columns in matrix \b A.
* @param[in] alpha The factor of matrix \b A.
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] Y Buffer object storing vector \b Y.
* @param[in] offy Offset of first element of vector \b Y in buffer object.
* @param[in] incy Increment for the elements of \b Y. Must not be zero.
* @param[out] A Buffer object storing matrix \b A.
* @param[in] offa Offset of first element of matrix \b A in buffer object.
* @param[in] lda Leading dimension of matrix \b A. It cannot be less
* than \b N.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasNotInitialized if clblasSetup() was not called;
* - \b clblasInvalidValue if invalid parameters are passed:
* - either \b N is zero, or
* - either \b incx or \b incy is zero, or
* - the leading dimension is invalid;
* - \b clblasInvalidMemObject if either \b A, \b X, or \b Y object is
* Invalid, or an image object rather than the buffer one;
* - \b clblasOutOfHostMemory if the library can't allocate memory for
* internal structures;
* - \b clblasInvalidCommandQueue if the passed command queue is invalid;
* - \b clblasInvalidContext if a context a passed command queue belongs
* to was released;
* - \b clblasInvalidOperation if kernel compilation relating to a previous
* call has not completed for any of the target devices;
* - \b clblasCompilerNotAvailable if a compiler is not available;
* - \b clblasBuildProgramFailure if there is a failure to build a program
* executable.
*
* @ingroup SYR2
*/
clblasStatus
clblasSsyr2(
clblasOrder order,
clblasUplo uplo,
size_t N,
cl_float alpha,
const cl_mem X,
size_t offx,
int incx,
const cl_mem Y,
size_t offy,
int incy,
cl_mem A,
size_t offa,
size_t lda,
cl_uint numCommandQueues,
cl_command_queue* commandQueues,
cl_uint numEventsInWaitList,
const cl_event* eventWaitList,
cl_event* events);
/**
* @brief Symmetric rank 2 operation with a general triangular matrix and
* double elements.
*
* Symmetric rank 2 operation:
* - \f$ A \leftarrow \alpha x y^T + \alpha y x^T + A \f$
*
* @param[in] order Row/column order.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] N Number of columns in matrix \b A.
* @param[in] alpha The factor of matrix \b A.
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] Y Buffer object storing vector \b Y.
* @param[in] offy Offset of first element of vector \b Y in buffer object.
* @param[in] incy Increment for the elements of \b Y. Must not be zero.
* @param[out] A Buffer object storing matrix \b A.
* @param[in] offa Offset of first element of matrix \b A in buffer object.
* @param[in] lda Leading dimension of matrix \b A. It cannot be less
* than \b N.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasNotInitialized if clblasSetup() was not called;
* - \b clblasInvalidValue if invalid parameters are passed:
* - either \b N is zero, or
* - either \b incx or \b incy is zero, or
* - the leading dimension is invalid;
* - \b clblasInvalidMemObject if either \b A, \b X, or \b Y object is
* Invalid, or an image object rather than the buffer one;
* - \b clblasOutOfHostMemory if the library can't allocate memory for
* internal structures;
* - \b clblasInvalidCommandQueue if the passed command queue is invalid;
* - \b clblasInvalidContext if a context a passed command queue belongs
* to was released;
* - \b clblasInvalidOperation if kernel compilation relating to a previous
* call has not completed for any of the target devices;
* - \b clblasCompilerNotAvailable if a compiler is not available;
* - \b clblasBuildProgramFailure if there is a failure to build a program
* executable.
*
* @ingroup SYR2
*/
clblasStatus
clblasDsyr2(
clblasOrder order,
clblasUplo uplo,
size_t N,
cl_double alpha,
const cl_mem X,
size_t offx,
int incx,
const cl_mem Y,
size_t offy,
int incy,
cl_mem A,
size_t offa,
size_t lda,
cl_uint numCommandQueues,
cl_command_queue* commandQueues,
cl_uint numEventsInWaitList,
const cl_event* eventWaitList,
cl_event* events);
/*@}*/
/**
* @defgroup HER2 HER2 - Hermitian rank 2 update
*
* The Level 2 Basic Linear Algebra Subprograms are functions that perform
* hermitian rank 2 update operations.
* @ingroup BLAS2
*/
/*@{*/
/**
* @brief Hermitian rank 2 operation with a general triangular matrix and
* float-compelx elements.
*
* Hermitian rank 2 operation:
* - \f$ A \leftarrow \alpha X Y^H + \overline{ \alpha } Y X^H + A \f$
*
* @param[in] order Row/column order.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] N Number of columns in matrix \b A.
* @param[in] alpha The factor of matrix \b A.
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset in number of elements for the first element in vector \b X.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] Y Buffer object storing vector \b Y.
* @param[in] offy Offset in number of elements for the first element in vector \b Y.
* @param[in] incy Increment for the elements of \b Y. Must not be zero.
* @param[out] A Buffer object storing matrix \b A.
* @param[in] offa Offset in number of elements for the first element in matrix \b A.
* @param[in] lda Leading dimension of matrix \b A. It cannot be less
* than \b N.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasNotInitialized if clblasSetup() was not called;
* - \b clblasInvalidValue if invalid parameters are passed:
* - either \b N is zero, or
* - either \b incx or \b incy is zero, or
* - the leading dimension is invalid;
* - \b clblasInvalidMemObject if either \b A, \b X, or \b Y object is
* Invalid, or an image object rather than the buffer one;
* - \b clblasOutOfHostMemory if the library can't allocate memory for
* internal structures;
* - \b clblasInvalidCommandQueue if the passed command queue is invalid;
* - \b clblasInvalidContext if a context a passed command queue belongs
* to was released;
* - \b clblasInvalidOperation if kernel compilation relating to a previous
* call has not completed for any of the target devices;
* - \b clblasCompilerNotAvailable if a compiler is not available;
* - \b clblasBuildProgramFailure if there is a failure to build a program
* executable.
*
* @ingroup HER2
*/
clblasStatus
clblasCher2(
clblasOrder order,
clblasUplo uplo,
size_t N,
cl_float2 alpha,
const cl_mem X,
size_t offx,
int incx,
const cl_mem Y,
size_t offy,
int incy,
cl_mem A,
size_t offa,
size_t lda,
cl_uint numCommandQueues,
cl_command_queue* commandQueues,
cl_uint numEventsInWaitList,
const cl_event* eventWaitList,
cl_event* events);
/**
* @brief Hermitian rank 2 operation with a general triangular matrix and
* double-compelx elements.
*
* Hermitian rank 2 operation:
* - \f$ A \leftarrow \alpha X Y^H + \overline{ \alpha } Y X^H + A \f$
*
* @param[in] order Row/column order.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] N Number of columns in matrix \b A.
* @param[in] alpha The factor of matrix \b A.
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset in number of elements for the first element in vector \b X.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] Y Buffer object storing vector \b Y.
* @param[in] offy Offset in number of elements for the first element in vector \b Y.
* @param[in] incy Increment for the elements of \b Y. Must not be zero.
* @param[out] A Buffer object storing matrix \b A.
* @param[in] offa Offset in number of elements for the first element in matrix \b A.
* @param[in] lda Leading dimension of matrix \b A. It cannot be less
* than \b N.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support floating
* point arithmetic with double precision;
* - the same error codes as the clblasCher2() function otherwise.
*
* @ingroup HER2
*/
clblasStatus
clblasZher2(
clblasOrder order,
clblasUplo uplo,
size_t N,
cl_double2 alpha,
const cl_mem X,
size_t offx,
int incx,
const cl_mem Y,
size_t offy,
int incy,
cl_mem A,
size_t offa,
size_t lda,
cl_uint numCommandQueues,
cl_command_queue* commandQueues,
cl_uint numEventsInWaitList,
const cl_event* eventWaitList,
cl_event* events);
/**
* @example example_zher2.c
* Example of how to use the @ref clblasZher2 function.
*/
/*@}*/
/**
* @defgroup TPMV TPMV - Triangular packed matrix-vector multiply
* @ingroup BLAS2
*/
/*@{*/
/**
* @brief Matrix-vector product with a packed triangular matrix and
* float elements.
*
* Matrix-vector products:
* - \f$ X \leftarrow A X \f$
* - \f$ X \leftarrow A^T X \f$
*
* @param[in] order Row/column order.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] trans How matrix \b AP is to be transposed.
* @param[in] diag Specify whether matrix \b AP is unit triangular.
* @param[in] N Number of rows/columns in matrix \b A.
* @param[in] AP Buffer object storing matrix \b AP in packed format.
* @param[in] offa Offset in number of elements for first element in matrix \b AP.
* @param[out] X Buffer object storing vector \b X.
* @param[in] offx Offset in number of elements for first element in vector \b X.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] scratchBuff Temporary cl_mem scratch buffer object which can hold a
* minimum of (1 + (N-1)*abs(incx)) elements
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasNotInitialized if clblasSetup() was not called;
* - \b clblasInvalidValue if invalid parameters are passed:
* - either \b N or \b incx is zero
* - \b clblasInvalidMemObject if either \b AP or \b X object is
* Invalid, or an image object rather than the buffer one;
* - \b clblasOutOfHostMemory if the library can't allocate memory for
* internal structures;
* - \b clblasInvalidCommandQueue if the passed command queue is invalid;
* - \b clblasInvalidContext if a context a passed command queue belongs
* to was released;
* - \b clblasInvalidOperation if kernel compilation relating to a previous
* call has not completed for any of the target devices;
* - \b clblasCompilerNotAvailable if a compiler is not available;
* - \b clblasBuildProgramFailure if there is a failure to build a program
* executable.
*
* @ingroup TPMV
*/
clblasStatus
clblasStpmv(
clblasOrder order,
clblasUplo uplo,
clblasTranspose trans,
clblasDiag diag,
size_t N,
const cl_mem AP,
size_t offa,
cl_mem X,
size_t offx,
int incx,
cl_mem scratchBuff,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @example example_stpmv.c
* Example of how to use the @ref clblasStpmv function.
*/
/**
* @brief Matrix-vector product with a packed triangular matrix and
* double elements.
*
* Matrix-vector products:
* - \f$ X \leftarrow A X \f$
* - \f$ X \leftarrow A^T X \f$
*
* @param[in] order Row/column order.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] trans How matrix \b AP is to be transposed.
* @param[in] diag Specify whether matrix \b AP is unit triangular.
* @param[in] N Number of rows/columns in matrix \b AP.
* @param[in] AP Buffer object storing matrix \b AP in packed format.
* @param[in] offa Offset in number of elements for first element in matrix \b AP.
* @param[out] X Buffer object storing vector \b X.
* @param[in] offx Offset in number of elements for first element in vector \b X.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] scratchBuff Temporary cl_mem scratch buffer object which can hold a
* minimum of (1 + (N-1)*abs(incx)) elements
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support floating
* point arithmetic with double precision;
* - the same error codes as the clblasStpmv() function otherwise.
*
* @ingroup TPMV
*/
clblasStatus
clblasDtpmv(
clblasOrder order,
clblasUplo uplo,
clblasTranspose trans,
clblasDiag diag,
size_t N,
const cl_mem AP,
size_t offa,
cl_mem X,
size_t offx,
int incx,
cl_mem scratchBuff,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief Matrix-vector product with a packed triangular matrix and
* float-complex elements.
*
* Matrix-vector products:
* - \f$ X \leftarrow A X \f$
* - \f$ X \leftarrow A^T X \f$
*
* @param[in] order Row/column order.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] trans How matrix \b AP is to be transposed.
* @param[in] diag Specify whether matrix \b AP is unit triangular.
* @param[in] N Number of rows/columns in matrix \b AP.
* @param[in] AP Buffer object storing matrix \b AP in packed format.
* @param[in] offa Offset in number of elements for first element in matrix \b AP.
* @param[out] X Buffer object storing vector \b X.
* @param[in] offx Offset in number of elements for first element in vector \b X.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] scratchBuff Temporary cl_mem scratch buffer object which can hold a
* minimum of (1 + (N-1)*abs(incx)) elements
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return The same result as the clblasStpmv() function.
* @ingroup TPMV
*/
clblasStatus
clblasCtpmv(
clblasOrder order,
clblasUplo uplo,
clblasTranspose trans,
clblasDiag diag,
size_t N,
const cl_mem AP,
size_t offa,
cl_mem X,
size_t offx,
int incx,
cl_mem scratchBuff,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief Matrix-vector product with a packed triangular matrix and
* double-complex elements.
*
* Matrix-vector products:
* - \f$ X \leftarrow A X \f$
* - \f$ X \leftarrow A^T X \f$
*
* @param[in] order Row/column order.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] trans How matrix \b AP is to be transposed.
* @param[in] diag Specify whether matrix \b AP is unit triangular.
* @param[in] N Number of rows/columns in matrix \b AP.
* @param[in] AP Buffer object storing matrix \b AP in packed format.
* @param[in] offa Offset in number of elements for first element in matrix \b AP.
* @param[out] X Buffer object storing vector \b X.
* @param[in] offx Offset in number of elements for first element in vector \b X.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] scratchBuff Temporary cl_mem scratch buffer object which can hold a
* minimum of (1 + (N-1)*abs(incx)) elements
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return The same result as the clblasDtpmv() function.
* @ingroup TPMV
*/
clblasStatus
clblasZtpmv(
clblasOrder order,
clblasUplo uplo,
clblasTranspose trans,
clblasDiag diag,
size_t N,
const cl_mem AP,
size_t offa,
cl_mem X,
size_t offx,
int incx,
cl_mem scratchBuff,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/*@}*/
/**
* @defgroup TPSV TPSV - Triangular packed matrix vector solve
* @ingroup BLAS2
*/
/*@{*/
/**
* @brief solving triangular packed matrix problems with float elements.
*
* Matrix-vector products:
* - \f$ A X \leftarrow X \f$
* - \f$ A^T X \leftarrow X \f$
*
* @param[in] order Row/column order.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] trans How matrix \b A is to be transposed.
* @param[in] diag Specify whether matrix \b A is unit triangular.
* @param[in] N Number of rows/columns in matrix \b A.
* @param[in] A Buffer object storing matrix in packed format.\b A.
* @param[in] offa Offset in number of elements for first element in matrix \b A.
* @param[out] X Buffer object storing vector \b X.
* @param[in] offx Offset in number of elements for first element in vector \b X.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasNotInitialized if clblasSetup() was not called;
* - \b clblasInvalidValue if invalid parameters are passed:
* - either \b N or \b incx is zero, or
* - the leading dimension is invalid;
* - \b clblasInvalidMemObject if either \b A or \b X object is
* Invalid, or an image object rather than the buffer one;
* - \b clblasOutOfHostMemory if the library can't allocate memory for
* internal structures;
* - \b clblasInvalidCommandQueue if the passed command queue is invalid;
* - \b clblasInvalidContext if a context a passed command queue belongs
* to was released;
* - \b clblasInvalidOperation if kernel compilation relating to a previous
* call has not completed for any of the target devices;
* - \b clblasCompilerNotAvailable if a compiler is not available;
* - \b clblasBuildProgramFailure if there is a failure to build a program
* executable.
*
* @ingroup TPSV
*/
clblasStatus
clblasStpsv(
clblasOrder order,
clblasUplo uplo,
clblasTranspose trans,
clblasDiag diag,
size_t N,
const cl_mem A,
size_t offa,
cl_mem X,
size_t offx,
int incx,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @example example_stpsv.c
* Example of how to use the @ref clblasStpsv function.
*/
/**
* @brief solving triangular packed matrix problems with double elements.
*
* Matrix-vector products:
* - \f$ A X \leftarrow X \f$
* - \f$ A^T X \leftarrow X \f$
*
* @param[in] order Row/column order.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] trans How matrix \b A is to be transposed.
* @param[in] diag Specify whether matrix \b A is unit triangular.
* @param[in] N Number of rows/columns in matrix \b A.
* @param[in] A Buffer object storing matrix in packed format.\b A.
* @param[in] offa Offset in number of elements for first element in matrix \b A.
* @param[out] X Buffer object storing vector \b X.
* @param[in] offx Offset in number of elements for first element in vector \b X.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasNotInitialized if clblasSetup() was not called;
* - \b clblasInvalidValue if invalid parameters are passed:
* - either \b N or \b incx is zero, or
* - the leading dimension is invalid;
* - \b clblasInvalidMemObject if either \b A or \b X object is
* Invalid, or an image object rather than the buffer one;
* - \b clblasOutOfHostMemory if the library can't allocate memory for
* internal structures;
* - \b clblasInvalidCommandQueue if the passed command queue is invalid;
* - \b clblasInvalidContext if a context a passed command queue belongs
* to was released;
* - \b clblasInvalidOperation if kernel compilation relating to a previous
* call has not completed for any of the target devices;
* - \b clblasCompilerNotAvailable if a compiler is not available;
* - \b clblasBuildProgramFailure if there is a failure to build a program
* executable.
*
* @ingroup TPSV
*/
clblasStatus
clblasDtpsv(
clblasOrder order,
clblasUplo uplo,
clblasTranspose trans,
clblasDiag diag,
size_t N,
const cl_mem A,
size_t offa,
cl_mem X,
size_t offx,
int incx,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief solving triangular packed matrix problems with float complex elements.
*
* Matrix-vector products:
* - \f$ A X \leftarrow X \f$
* - \f$ A^T X \leftarrow X \f$
*
* @param[in] order Row/column order.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] trans How matrix \b A is to be transposed.
* @param[in] diag Specify whether matrix \b A is unit triangular.
* @param[in] N Number of rows/columns in matrix \b A.
* @param[in] A Buffer object storing matrix in packed format.\b A.
* @param[in] offa Offset in number of elements for first element in matrix \b A.
* @param[out] X Buffer object storing vector \b X.
* @param[in] offx Offset in number of elements for first element in vector \b X.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasNotInitialized if clblasSetup() was not called;
* - \b clblasInvalidValue if invalid parameters are passed:
* - either \b N or \b incx is zero, or
* - the leading dimension is invalid;
* - \b clblasInvalidMemObject if either \b A or \b X object is
* Invalid, or an image object rather than the buffer one;
* - \b clblasOutOfHostMemory if the library can't allocate memory for
* internal structures;
* - \b clblasInvalidCommandQueue if the passed command queue is invalid;
* - \b clblasInvalidContext if a context a passed command queue belongs
* to was released;
* - \b clblasInvalidOperation if kernel compilation relating to a previous
* call has not completed for any of the target devices;
* - \b clblasCompilerNotAvailable if a compiler is not available;
* - \b clblasBuildProgramFailure if there is a failure to build a program
* executable.
*
* @ingroup TPSV
*/
clblasStatus
clblasCtpsv(
clblasOrder order,
clblasUplo uplo,
clblasTranspose trans,
clblasDiag diag,
size_t N,
const cl_mem A,
size_t offa,
cl_mem X,
size_t offx,
int incx,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief solving triangular packed matrix problems with double complex elements.
*
* Matrix-vector products:
* - \f$ A X \leftarrow X \f$
* - \f$ A^T X \leftarrow X \f$
*
* @param[in] order Row/column order.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] trans How matrix \b A is to be transposed.
* @param[in] diag Specify whether matrix \b A is unit triangular.
* @param[in] N Number of rows/columns in matrix \b A.
* @param[in] A Buffer object storing matrix in packed format.\b A.
* @param[in] offa Offset in number of elements for first element in matrix \b A.
* @param[out] X Buffer object storing vector \b X.
* @param[in] offx Offset in number of elements for first element in vector \b X.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasNotInitialized if clblasSetup() was not called;
* - \b clblasInvalidValue if invalid parameters are passed:
* - either \b N or \b incx is zero, or
* - the leading dimension is invalid;
* - \b clblasInvalidMemObject if either \b A or \b X object is
* Invalid, or an image object rather than the buffer one;
* - \b clblasOutOfHostMemory if the library can't allocate memory for
* internal structures;
* - \b clblasInvalidCommandQueue if the passed command queue is invalid;
* - \b clblasInvalidContext if a context a passed command queue belongs
* to was released;
* - \b clblasInvalidOperation if kernel compilation relating to a previous
* call has not completed for any of the target devices;
* - \b clblasCompilerNotAvailable if a compiler is not available;
* - \b clblasBuildProgramFailure if there is a failure to build a program
* executable.
*
* @ingroup TPSV
*/
clblasStatus
clblasZtpsv(
clblasOrder order,
clblasUplo uplo,
clblasTranspose trans,
clblasDiag diag,
size_t N,
const cl_mem A,
size_t offa,
cl_mem X,
size_t offx,
int incx,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/*@}*/
/**
* @defgroup SPMV SPMV - Symmetric packed matrix vector multiply
* @ingroup BLAS2
*/
/*@{*/
/**
* @brief Matrix-vector product with a symmetric packed-matrix and float elements.
*
* Matrix-vector products:
* - \f$ Y \leftarrow \alpha A X + \beta Y \f$
*
* @param[in] order Row/columns order.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] N Number of rows and columns in matrix \b AP.
* @param[in] alpha The factor of matrix \b AP.
* @param[in] AP Buffer object storing matrix \b AP.
* @param[in] offa Offset in number of elements for first element in matrix \b AP.
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of vector \b X. It cannot be zero.
* @param[in] beta The factor of vector \b Y.
* @param[out] Y Buffer object storing vector \b Y.
* @param[in] offy Offset of first element of vector \b Y in buffer object.
* Counted in elements.
* @param[in] incy Increment for the elements of vector \b Y. It cannot be zero.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasNotInitialized if clblasSetup() was not called;
* - \b clblasInvalidValue if invalid parameters are passed:
* - \b N is zero, or
* - either \b incx or \b incy is zero, or
* - the matrix sizes or the vector sizes along with the increments lead to
* accessing outsize of any of the buffers;
* - \b clblasInvalidMemObject if either \b AP, \b X, or \b Y object is
* invalid, or an image object rather than the buffer one;
* - \b clblasOutOfHostMemory if the library can't allocate memory for
* internal structures;
* - \b clblasInvalidCommandQueue if the passed command queue is invalid;
* - \b clblasInvalidContext if a context a passed command queue belongs to
* was released;
* - \b clblasInvalidOperation if kernel compilation relating to a previous
* call has not completed for any of the target devices;
* - \b clblasCompilerNotAvailable if a compiler is not available;
* - \b clblasBuildProgramFailure if there is a failure to build a program
* executable.
*
* @ingroup SPMV
*/
clblasStatus
clblasSspmv(
clblasOrder order,
clblasUplo uplo,
size_t N,
cl_float alpha,
const cl_mem AP,
size_t offa,
const cl_mem X,
size_t offx,
int incx,
cl_float beta,
cl_mem Y,
size_t offy,
int incy,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @example example_sspmv.c
* This is an example of how to use the @ref clblasSspmv function.
*/
/**
* @brief Matrix-vector product with a symmetric packed-matrix and double elements.
*
* Matrix-vector products:
* - \f$ Y \leftarrow \alpha A X + \beta Y \f$
*
* @param[in] order Row/columns order.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] N Number of rows and columns in matrix \b AP.
* @param[in] alpha The factor of matrix \b AP.
* @param[in] AP Buffer object storing matrix \b AP.
* @param[in] offa Offset in number of elements for first element in matrix \b AP.
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of vector \b X. It cannot be zero.
* @param[in] beta The factor of vector \b Y.
* @param[out] Y Buffer object storing vector \b Y.
* @param[in] offy Offset of first element of vector \b Y in buffer object.
* Counted in elements.
* @param[in] incy Increment for the elements of vector \b Y. It cannot be zero.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support floating
* point arithmetic with double precision;
* - the same error codes as the clblasSspmv() function otherwise.
*
* @ingroup SPMV
*/
clblasStatus
clblasDspmv(
clblasOrder order,
clblasUplo uplo,
size_t N,
cl_double alpha,
const cl_mem AP,
size_t offa,
const cl_mem X,
size_t offx,
int incx,
cl_double beta,
cl_mem Y,
size_t offy,
int incy,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/*@}*/
/**
* @defgroup HPMV HPMV - Hermitian packed matrix-vector multiplication
* @ingroup BLAS2
*/
/*@{*/
/**
* @brief Matrix-vector product with a packed hermitian matrix and float-complex elements.
*
* Matrix-vector products:
* - \f$ Y \leftarrow \alpha A X + \beta Y \f$
*
* @param[in] order Row/columns order.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] N Number of rows and columns in matrix \b AP.
* @param[in] alpha The factor of matrix \b AP.
* @param[in] AP Buffer object storing packed matrix \b AP.
* @param[in] offa Offset in number of elements for first element in matrix \b AP.
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of vector \b X. It cannot be zero.
* @param[in] beta The factor of vector \b Y.
* @param[out] Y Buffer object storing vector \b Y.
* @param[in] offy Offset of first element of vector \b Y in buffer object.
* Counted in elements.
* @param[in] incy Increment for the elements of vector \b Y. It cannot be zero.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasNotInitialized if clblasSetup() was not called;
* - \b clblasInvalidValue if invalid parameters are passed:
* - \b N is zero, or
* - either \b incx or \b incy is zero, or
* - the matrix sizes or the vector sizes along with the increments lead to
* accessing outsize of any of the buffers;
* - \b clblasInvalidMemObject if either \b AP, \b X, or \b Y object is
* invalid, or an image object rather than the buffer one;
* - \b clblasOutOfHostMemory if the library can't allocate memory for
* internal structures;
* - \b clblasInvalidCommandQueue if the passed command queue is invalid;
* - \b clblasInvalidContext if a context a passed command queue belongs to
* was released;
* - \b clblasInvalidOperation if kernel compilation relating to a previous
* call has not completed for any of the target devices;
* - \b clblasCompilerNotAvailable if a compiler is not available;
* - \b clblasBuildProgramFailure if there is a failure to build a program
* executable.
*
* @ingroup HPMV
*/
clblasStatus
clblasChpmv(
clblasOrder order,
clblasUplo uplo,
size_t N,
cl_float2 alpha,
const cl_mem AP,
size_t offa,
const cl_mem X,
size_t offx,
int incx,
cl_float2 beta,
cl_mem Y,
size_t offy,
int incy,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @example example_chpmv.c
* This is an example of how to use the @ref clblasChpmv function.
*/
/**
* @brief Matrix-vector product with a packed hermitian matrix and double-complex elements.
*
* Matrix-vector products:
* - \f$ Y \leftarrow \alpha A X + \beta Y \f$
*
* @param[in] order Row/columns order.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] N Number of rows and columns in matrix \b AP.
* @param[in] alpha The factor of matrix \b AP.
* @param[in] AP Buffer object storing packed matrix \b AP.
* @param[in] offa Offset in number of elements for first element in matrix \b AP.
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of vector \b X. It cannot be zero.
* @param[in] beta The factor of vector \b Y.
* @param[out] Y Buffer object storing vector \b Y.
* @param[in] offy Offset of first element of vector \b Y in buffer object.
* Counted in elements.
* @param[in] incy Increment for the elements of vector \b Y. It cannot be zero.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support floating
* point arithmetic with double precision;
* - the same error codes as the clblasChpmv() function otherwise.
*
* @ingroup HPMV
*/
clblasStatus
clblasZhpmv(
clblasOrder order,
clblasUplo uplo,
size_t N,
cl_double2 alpha,
const cl_mem AP,
size_t offa,
const cl_mem X,
size_t offx,
int incx,
cl_double2 beta,
cl_mem Y,
size_t offy,
int incy,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/*@}*/
/**
* @defgroup SPR SPR - Symmetric packed matrix rank 1 update
*
* The Level 2 Basic Linear Algebra Subprograms are functions that perform
* symmetric rank 1 update operations on packed matrix
* @ingroup BLAS2
*/
/*@{*/
/**
* @brief Symmetric rank 1 operation with a general triangular packed-matrix and
* float elements.
*
* Symmetric rank 1 operation:
* - \f$ A \leftarrow \alpha X X^T + A \f$
*
* @param[in] order Row/column order.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] N Number of columns in matrix \b A.
* @param[in] alpha The factor of matrix \b A.
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[out] AP Buffer object storing packed-matrix \b AP.
* @param[in] offa Offset of first element of matrix \b AP in buffer object.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasNotInitialized if clblasSetup() was not called;
* - \b clblasInvalidValue if invalid parameters are passed:
* - \b N is zero, or
* - either \b incx is zero
* - \b clblasInvalidMemObject if either \b AP, \b X object is
* Invalid, or an image object rather than the buffer one;
* - \b clblasOutOfHostMemory if the library can't allocate memory for
* internal structures;
* - \b clblasInvalidCommandQueue if the passed command queue is invalid;
* - \b clblasInvalidContext if a context a passed command queue belongs
* to was released;
* - \b clblasInvalidOperation if kernel compilation relating to a previous
* call has not completed for any of the target devices;
* - \b clblasCompilerNotAvailable if a compiler is not available;
* - \b clblasBuildProgramFailure if there is a failure to build a program
* executable.
*
* @ingroup SPR
*/
clblasStatus
clblasSspr(
clblasOrder order,
clblasUplo uplo,
size_t N,
cl_float alpha,
const cl_mem X,
size_t offx,
int incx,
cl_mem AP,
size_t offa,
cl_uint numCommandQueues,
cl_command_queue* commandQueues,
cl_uint numEventsInWaitList,
const cl_event* eventWaitList,
cl_event* events);
/**
* @example example_sspr.c
* Example of how to use the @ref clblasSspr function.
*/
/**
* @brief Symmetric rank 1 operation with a general triangular packed-matrix and
* double elements.
*
* Symmetric rank 1 operation:
* - \f$ A \leftarrow \alpha X X^T + A \f$
*
* @param[in] order Row/column order.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] N Number of columns in matrix \b A.
* @param[in] alpha The factor of matrix \b A.
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[out] AP Buffer object storing packed-matrix \b AP.
* @param[in] offa Offset of first element of matrix \b AP in buffer object.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support floating
* point arithmetic with double precision;
* - the same error codes as the clblasSspr() function otherwise.
*
* @ingroup SPR
*/
clblasStatus
clblasDspr(
clblasOrder order,
clblasUplo uplo,
size_t N,
cl_double alpha,
const cl_mem X,
size_t offx,
int incx,
cl_mem AP,
size_t offa,
cl_uint numCommandQueues,
cl_command_queue* commandQueues,
cl_uint numEventsInWaitList,
const cl_event* eventWaitList,
cl_event* events);
/*@}*/
/**
* @defgroup HPR HPR - Hermitian packed matrix rank 1 update
*
* The Level 2 Basic Linear Algebra Subprogram functions that perform
* hermitian rank 1 operations on packed matrix
* @ingroup BLAS2
*/
/*@{*/
/**
* @brief hermitian rank 1 operation with a general triangular packed-matrix and
* float-complex elements.
*
* hermitian rank 1 operation:
* - \f$ A \leftarrow \alpha X X^H + A \f$
*
* @param[in] order Row/column order.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] N Number of columns in matrix \b A.
* @param[in] alpha The factor of matrix \b A (a scalar float value)
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset in number of elements for the first element in vector \b X.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[out] AP Buffer object storing matrix \b AP.
* @param[in] offa Offset in number of elements for the first element in matrix \b AP.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasNotInitialized if clblasSetup() was not called;
* - \b clblasInvalidValue if invalid parameters are passed:
* - \b N is zero, or
* - either \b incx is zero
* - \b clblasInvalidMemObject if either \b AP, \b X object is
* Invalid, or an image object rather than the buffer one;
* - \b clblasOutOfHostMemory if the library can't allocate memory for
* internal structures;
* - \b clblasInvalidCommandQueue if the passed command queue is invalid;
* - \b clblasInvalidContext if a context a passed command queue belongs
* to was released;
* - \b clblasInvalidOperation if kernel compilation relating to a previous
* call has not completed for any of the target devices;
* - \b clblasCompilerNotAvailable if a compiler is not available;
* - \b clblasBuildProgramFailure if there is a failure to build a program
* executable.
*
* @ingroup HPR
*/
clblasStatus
clblasChpr(
clblasOrder order,
clblasUplo uplo,
size_t N,
cl_float alpha,
const cl_mem X,
size_t offx,
int incx,
cl_mem AP,
size_t offa,
cl_uint numCommandQueues,
cl_command_queue* commandQueues,
cl_uint numEventsInWaitList,
const cl_event* eventWaitList,
cl_event* events);
/**
* @example example_chpr.c
* Example of how to use the @ref clblasChpr function.
*/
/**
* @brief hermitian rank 1 operation with a general triangular packed-matrix and
* double-complex elements.
*
* hermitian rank 1 operation:
* - \f$ A \leftarrow \alpha X X^H + A \f$
*
* @param[in] order Row/column order.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] N Number of columns in matrix \b A.
* @param[in] alpha The factor of matrix \b A (a scalar float value)
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset in number of elements for the first element in vector \b X.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[out] AP Buffer object storing matrix \b AP.
* @param[in] offa Offset in number of elements for the first element in matrix \b AP.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support floating
* point arithmetic with double precision;
* - the same error codes as the clblasChpr() function otherwise.
*
* @ingroup HPR
*/
clblasStatus
clblasZhpr(
clblasOrder order,
clblasUplo uplo,
size_t N,
cl_double alpha,
const cl_mem X,
size_t offx,
int incx,
cl_mem AP,
size_t offa,
cl_uint numCommandQueues,
cl_command_queue* commandQueues,
cl_uint numEventsInWaitList,
const cl_event* eventWaitList,
cl_event* events);
/*@}*/
/**
* @defgroup SPR2 SPR2 - Symmetric packed matrix rank 2 update
*
* The Level 2 Basic Linear Algebra Subprograms are functions that perform
* symmetric rank 2 update operations on packed matrices
* @ingroup BLAS2
*/
/*@{*/
/**
* @brief Symmetric rank 2 operation with a general triangular packed-matrix and
* float elements.
*
* Symmetric rank 2 operation:
* - \f$ A \leftarrow \alpha X Y^T + \alpha Y X^T + A \f$
*
* @param[in] order Row/column order.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] N Number of columns in matrix \b A.
* @param[in] alpha The factor of matrix \b A.
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] Y Buffer object storing vector \b Y.
* @param[in] offy Offset of first element of vector \b Y in buffer object.
* @param[in] incy Increment for the elements of \b Y. Must not be zero.
* @param[out] AP Buffer object storing packed-matrix \b AP.
* @param[in] offa Offset of first element of matrix \b AP in buffer object.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasNotInitialized if clblasSetup() was not called;
* - \b clblasInvalidValue if invalid parameters are passed:
* - either \b N is zero, or
* - either \b incx or \b incy is zero
* - \b clblasInvalidMemObject if either \b AP, \b X, or \b Y object is
* Invalid, or an image object rather than the buffer one;
* - \b clblasOutOfHostMemory if the library can't allocate memory for
* internal structures;
* - \b clblasInvalidCommandQueue if the passed command queue is invalid;
* - \b clblasInvalidContext if a context a passed command queue belongs
* to was released;
* - \b clblasInvalidOperation if kernel compilation relating to a previous
* call has not completed for any of the target devices;
* - \b clblasCompilerNotAvailable if a compiler is not available;
* - \b clblasBuildProgramFailure if there is a failure to build a program
* executable.
*
* @ingroup SPR2
*/
clblasStatus
clblasSspr2(
clblasOrder order,
clblasUplo uplo,
size_t N,
cl_float alpha,
const cl_mem X,
size_t offx,
int incx,
const cl_mem Y,
size_t offy,
int incy,
cl_mem AP,
size_t offa,
cl_uint numCommandQueues,
cl_command_queue* commandQueues,
cl_uint numEventsInWaitList,
const cl_event* eventWaitList,
cl_event* events);
/**
* @example example_sspr2.c
* Example of how to use the @ref clblasSspr2 function.
*/
/**
* @brief Symmetric rank 2 operation with a general triangular packed-matrix and
* double elements.
*
* Symmetric rank 2 operation:
* - \f$ A \leftarrow \alpha X Y^T + \alpha Y X^T + A \f$
*
* @param[in] order Row/column order.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] N Number of columns in matrix \b A.
* @param[in] alpha The factor of matrix \b A.
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] Y Buffer object storing vector \b Y.
* @param[in] offy Offset of first element of vector \b Y in buffer object.
* @param[in] incy Increment for the elements of \b Y. Must not be zero.
* @param[out] AP Buffer object storing packed-matrix \b AP.
* @param[in] offa Offset of first element of matrix \b AP in buffer object.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support floating
* point arithmetic with double precision;
* - the same error codes as the clblasSspr2() function otherwise.
*
* @ingroup SPR2
*/
clblasStatus
clblasDspr2(
clblasOrder order,
clblasUplo uplo,
size_t N,
cl_double alpha,
const cl_mem X,
size_t offx,
int incx,
const cl_mem Y,
size_t offy,
int incy,
cl_mem AP,
size_t offa,
cl_uint numCommandQueues,
cl_command_queue* commandQueues,
cl_uint numEventsInWaitList,
const cl_event* eventWaitList,
cl_event* events);
/*@}*/
/**
* @defgroup HPR2 HPR2 - Hermitian packed matrix rank 2 update
*
* The Level 2 Basic Linear Algebra Subprograms are functions that perform
* hermitian rank 2 update operations on packed matrices
* @ingroup BLAS2
*/
/*@{*/
/**
* @brief Hermitian rank 2 operation with a general triangular packed-matrix and
* float-compelx elements.
*
* Hermitian rank 2 operation:
* - \f$ A \leftarrow \alpha X Y^H + \conjg( alpha ) Y X^H + A \f$
*
* @param[in] order Row/column order.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] N Number of columns in matrix \b A.
* @param[in] alpha The factor of matrix \b A.
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset in number of elements for the first element in vector \b X.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] Y Buffer object storing vector \b Y.
* @param[in] offy Offset in number of elements for the first element in vector \b Y.
* @param[in] incy Increment for the elements of \b Y. Must not be zero.
* @param[out] AP Buffer object storing packed-matrix \b AP.
* @param[in] offa Offset in number of elements for the first element in matrix \b AP.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasNotInitialized if clblasSetup() was not called;
* - \b clblasInvalidValue if invalid parameters are passed:
* - either \b N is zero, or
* - either \b incx or \b incy is zero
* - \b clblasInvalidMemObject if either \b AP, \b X, or \b Y object is
* Invalid, or an image object rather than the buffer one;
* - \b clblasOutOfHostMemory if the library can't allocate memory for
* internal structures;
* - \b clblasInvalidCommandQueue if the passed command queue is invalid;
* - \b clblasInvalidContext if a context a passed command queue belongs
* to was released;
* - \b clblasInvalidOperation if kernel compilation relating to a previous
* call has not completed for any of the target devices;
* - \b clblasCompilerNotAvailable if a compiler is not available;
* - \b clblasBuildProgramFailure if there is a failure to build a program
* executable.
*
* @ingroup HPR2
*/
clblasStatus
clblasChpr2(
clblasOrder order,
clblasUplo uplo,
size_t N,
cl_float2 alpha,
const cl_mem X,
size_t offx,
int incx,
const cl_mem Y,
size_t offy,
int incy,
cl_mem AP,
size_t offa,
cl_uint numCommandQueues,
cl_command_queue* commandQueues,
cl_uint numEventsInWaitList,
const cl_event* eventWaitList,
cl_event* events);
/**
* @brief Hermitian rank 2 operation with a general triangular packed-matrix and
* double-compelx elements.
*
* Hermitian rank 2 operation:
* - \f$ A \leftarrow \alpha X Y^H + \conjg( alpha ) Y X^H + A \f$
*
* @param[in] order Row/column order.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] N Number of columns in matrix \b A.
* @param[in] alpha The factor of matrix \b A.
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset in number of elements for the first element in vector \b X.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] Y Buffer object storing vector \b Y.
* @param[in] offy Offset in number of elements for the first element in vector \b Y.
* @param[in] incy Increment for the elements of \b Y. Must not be zero.
* @param[out] AP Buffer object storing packed-matrix \b AP.
* @param[in] offa Offset in number of elements for the first element in matrix \b AP.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support floating
* point arithmetic with double precision;
* - the same error codes as the clblasChpr2() function otherwise.
*
* @ingroup HPR2
*/
clblasStatus
clblasZhpr2(
clblasOrder order,
clblasUplo uplo,
size_t N,
cl_double2 alpha,
const cl_mem X,
size_t offx,
int incx,
const cl_mem Y,
size_t offy,
int incy,
cl_mem AP,
size_t offa,
cl_uint numCommandQueues,
cl_command_queue* commandQueues,
cl_uint numEventsInWaitList,
const cl_event* eventWaitList,
cl_event* events);
/**
* @example example_zhpr2.c
* Example of how to use the @ref clblasZhpr2 function.
*/
/*@}*/
/**
* @defgroup GBMV GBMV - General banded matrix-vector multiplication
* @ingroup BLAS2
*/
/*@{*/
/**
* @brief Matrix-vector product with a general rectangular banded matrix and
* float elements.
*
* Matrix-vector products:
* - \f$ Y \leftarrow \alpha A X + \beta Y \f$
* - \f$ Y \leftarrow \alpha A^T X + \beta Y \f$
*
* @param[in] order Row/column order.
* @param[in] trans How matrix \b A is to be transposed.
* @param[in] M Number of rows in banded matrix \b A.
* @param[in] N Number of columns in banded matrix \b A.
* @param[in] KL Number of sub-diagonals in banded matrix \b A.
* @param[in] KU Number of super-diagonals in banded matrix \b A.
* @param[in] alpha The factor of banded matrix \b A.
* @param[in] A Buffer object storing banded matrix \b A.
* @param[in] offa Offset in number of elements for the first element in banded matrix \b A.
* @param[in] lda Leading dimension of banded matrix \b A. It cannot be less
* than ( \b KL + \b KU + 1 )
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] beta The factor of the vector \b Y.
* @param[out] Y Buffer object storing the vector \b y.
* @param[in] offy Offset of first element of vector \b Y in buffer object.
* Counted in elements.
* @param[in] incy Increment for the elements of \b Y. Must not be zero.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasNotInitialized if clblasSetup() was not called;
* - \b clblasInvalidValue if invalid parameters are passed:
* - either \b M or \b N is zero, or
* - KL is greater than \b M - 1, or
* - KU is greater than \b N - 1, or
* - either \b incx or \b incy is zero, or
* - any of the leading dimensions is invalid;
* - the matrix size or the vector sizes along with the increments lead to
* accessing outside of any of the buffers;
* - \b clblasInvalidMemObject if either \b A, \b X, or \b Y object is
* Invalid, or an image object rather than the buffer one;
* - \b clblasOutOfHostMemory if the library can't allocate memory for
* internal structures;
* - \b clblasInvalidCommandQueue if the passed command queue is invalid;
* - \b clblasInvalidContext if a context a passed command queue belongs
* to was released;
* - \b clblasInvalidOperation if kernel compilation relating to a previous
* call has not completed for any of the target devices;
* - \b clblasCompilerNotAvailable if a compiler is not available;
* - \b clblasBuildProgramFailure if there is a failure to build a program
* executable.
*
* @ingroup GBMV
*/
clblasStatus
clblasSgbmv(
clblasOrder order,
clblasTranspose trans,
size_t M,
size_t N,
size_t KL,
size_t KU,
cl_float alpha,
const cl_mem A,
size_t offa,
size_t lda,
const cl_mem X,
size_t offx,
int incx,
cl_float beta,
cl_mem Y,
size_t offy,
int incy,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @example example_sgbmv.c
* Example of how to use the @ref clblasSgbmv function.
*/
/**
* @brief Matrix-vector product with a general rectangular banded matrix and
* double elements.
*
* Matrix-vector products:
* - \f$ Y \leftarrow \alpha A X + \beta Y \f$
* - \f$ Y \leftarrow \alpha A^T X + \beta Y \f$
*
* @param[in] order Row/column order.
* @param[in] trans How matrix \b A is to be transposed.
* @param[in] M Number of rows in banded matrix \b A.
* @param[in] N Number of columns in banded matrix \b A.
* @param[in] KL Number of sub-diagonals in banded matrix \b A.
* @param[in] KU Number of super-diagonals in banded matrix \b A.
* @param[in] alpha The factor of banded matrix \b A.
* @param[in] A Buffer object storing banded matrix \b A.
* @param[in] offa Offset in number of elements for the first element in banded matrix \b A.
* @param[in] lda Leading dimension of banded matrix \b A. It cannot be less
* than ( \b KL + \b KU + 1 )
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] beta The factor of the vector \b Y.
* @param[out] Y Buffer object storing the vector \b y.
* @param[in] offy Offset of first element of vector \b Y in buffer object.
* Counted in elements.
* @param[in] incy Increment for the elements of \b Y. Must not be zero.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support floating
* point arithmetic with double precision;
* - the same error codes as the clblasSgbmv() function otherwise.
*
* @ingroup GBMV
*/
clblasStatus
clblasDgbmv(
clblasOrder order,
clblasTranspose trans,
size_t M,
size_t N,
size_t KL,
size_t KU,
cl_double alpha,
const cl_mem A,
size_t offa,
size_t lda,
const cl_mem X,
size_t offx,
int incx,
cl_double beta,
cl_mem Y,
size_t offy,
int incy,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief Matrix-vector product with a general rectangular banded matrix and
* float-complex elements.
*
* Matrix-vector products:
* - \f$ Y \leftarrow \alpha A X + \beta Y \f$
* - \f$ Y \leftarrow \alpha A^T X + \beta Y \f$
*
* @param[in] order Row/column order.
* @param[in] trans How matrix \b A is to be transposed.
* @param[in] M Number of rows in banded matrix \b A.
* @param[in] N Number of columns in banded matrix \b A.
* @param[in] KL Number of sub-diagonals in banded matrix \b A.
* @param[in] KU Number of super-diagonals in banded matrix \b A.
* @param[in] alpha The factor of banded matrix \b A.
* @param[in] A Buffer object storing banded matrix \b A.
* @param[in] offa Offset in number of elements for the first element in banded matrix \b A.
* @param[in] lda Leading dimension of banded matrix \b A. It cannot be less
* than ( \b KL + \b KU + 1 )
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] beta The factor of the vector \b Y.
* @param[out] Y Buffer object storing the vector \b y.
* @param[in] offy Offset of first element of vector \b Y in buffer object.
* Counted in elements.
* @param[in] incy Increment for the elements of \b Y. Must not be zero.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return The same result as the clblasSgbmv() function.
*
* @ingroup GBMV
*/
clblasStatus
clblasCgbmv(
clblasOrder order,
clblasTranspose trans,
size_t M,
size_t N,
size_t KL,
size_t KU,
cl_float2 alpha,
const cl_mem A,
size_t offa,
size_t lda,
const cl_mem X,
size_t offx,
int incx,
cl_float2 beta,
cl_mem Y,
size_t offy,
int incy,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief Matrix-vector product with a general rectangular banded matrix and
* double-complex elements.
*
* Matrix-vector products:
* - \f$ Y \leftarrow \alpha A X + \beta Y \f$
* - \f$ Y \leftarrow \alpha A^T X + \beta Y \f$
*
* @param[in] order Row/column order.
* @param[in] trans How matrix \b A is to be transposed.
* @param[in] M Number of rows in banded matrix \b A.
* @param[in] N Number of columns in banded matrix \b A.
* @param[in] KL Number of sub-diagonals in banded matrix \b A.
* @param[in] KU Number of super-diagonals in banded matrix \b A.
* @param[in] alpha The factor of banded matrix \b A.
* @param[in] A Buffer object storing banded matrix \b A.
* @param[in] offa Offset in number of elements for the first element in banded matrix \b A.
* @param[in] lda Leading dimension of banded matrix \b A. It cannot be less
* than ( \b KL + \b KU + 1 )
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] beta The factor of the vector \b Y.
* @param[out] Y Buffer object storing the vector \b y.
* @param[in] offy Offset of first element of vector \b Y in buffer object.
* Counted in elements.
* @param[in] incy Increment for the elements of \b Y. Must not be zero.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return The same result as the clblasDgbmv() function.
*
* @ingroup GBMV
*/
clblasStatus
clblasZgbmv(
clblasOrder order,
clblasTranspose trans,
size_t M,
size_t N,
size_t KL,
size_t KU,
cl_double2 alpha,
const cl_mem A,
size_t offa,
size_t lda,
const cl_mem X,
size_t offx,
int incx,
cl_double2 beta,
cl_mem Y,
size_t offy,
int incy,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/*@}*/
/**
* @defgroup TBMV TBMV - Triangular banded matrix vector multiply
* @ingroup BLAS2
*/
/*@{*/
/**
* @brief Matrix-vector product with a triangular banded matrix and
* float elements.
*
* Matrix-vector products:
* - \f$ X \leftarrow A X \f$
* - \f$ X \leftarrow A^T X \f$
*
* @param[in] order Row/column order.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] trans How matrix \b A is to be transposed.
* @param[in] diag Specify whether matrix \b A is unit triangular.
* @param[in] N Number of rows/columns in banded matrix \b A.
* @param[in] K Number of sub-diagonals/super-diagonals in triangular banded matrix \b A.
* @param[in] A Buffer object storing matrix \b A.
* @param[in] offa Offset in number of elements for first element in matrix \b A.
* @param[in] lda Leading dimension of matrix \b A. It cannot be less
* than ( \b K + 1 )
* @param[out] X Buffer object storing vector \b X.
* @param[in] offx Offset in number of elements for first element in vector \b X.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] scratchBuff Temporary cl_mem scratch buffer object which can hold a
* minimum of (1 + (N-1)*abs(incx)) elements
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasNotInitialized if clblasSetup() was not called;
* - \b clblasInvalidValue if invalid parameters are passed:
* - either \b N or \b incx is zero, or
* - K is greater than \b N - 1
* - the leading dimension is invalid;
* - \b clblasInvalidMemObject if either \b A or \b X object is
* Invalid, or an image object rather than the buffer one;
* - \b clblasOutOfHostMemory if the library can't allocate memory for
* internal structures;
* - \b clblasInvalidCommandQueue if the passed command queue is invalid;
* - \b clblasInvalidContext if a context a passed command queue belongs
* to was released;
* - \b clblasInvalidOperation if kernel compilation relating to a previous
* call has not completed for any of the target devices;
* - \b clblasCompilerNotAvailable if a compiler is not available;
* - \b clblasBuildProgramFailure if there is a failure to build a program
* executable.
*
* @ingroup TBMV
*/
clblasStatus
clblasStbmv(
clblasOrder order,
clblasUplo uplo,
clblasTranspose trans,
clblasDiag diag,
size_t N,
size_t K,
const cl_mem A,
size_t offa,
size_t lda,
cl_mem X,
size_t offx,
int incx,
cl_mem scratchBuff,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @example example_stbmv.c
* Example of how to use the @ref clblasStbmv function.
*/
/**
* @brief Matrix-vector product with a triangular banded matrix and
* double elements.
*
* Matrix-vector products:
* - \f$ X \leftarrow A X \f$
* - \f$ X \leftarrow A^T X \f$
*
* @param[in] order Row/column order.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] trans How matrix \b A is to be transposed.
* @param[in] diag Specify whether matrix \b A is unit triangular.
* @param[in] N Number of rows/columns in banded matrix \b A.
* @param[in] K Number of sub-diagonals/super-diagonals in triangular banded matrix \b A.
* @param[in] A Buffer object storing matrix \b A.
* @param[in] offa Offset in number of elements for first element in matrix \b A.
* @param[in] lda Leading dimension of matrix \b A. It cannot be less
* than ( \b K + 1 )
* @param[out] X Buffer object storing vector \b X.
* @param[in] offx Offset in number of elements for first element in vector \b X.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] scratchBuff Temporary cl_mem scratch buffer object which can hold a
* minimum of (1 + (N-1)*abs(incx)) elements
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support floating
* point arithmetic with double precision;
* - the same error codes as the clblasStbmv() function otherwise.
*
* @ingroup TBMV
*/
clblasStatus
clblasDtbmv(
clblasOrder order,
clblasUplo uplo,
clblasTranspose trans,
clblasDiag diag,
size_t N,
size_t K,
const cl_mem A,
size_t offa,
size_t lda,
cl_mem X,
size_t offx,
int incx,
cl_mem scratchBuff,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief Matrix-vector product with a triangular banded matrix and
* float-complex elements.
*
* Matrix-vector products:
* - \f$ X \leftarrow A X \f$
* - \f$ X \leftarrow A^T X \f$
*
* @param[in] order Row/column order.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] trans How matrix \b A is to be transposed.
* @param[in] diag Specify whether matrix \b A is unit triangular.
* @param[in] N Number of rows/columns in banded matrix \b A.
* @param[in] K Number of sub-diagonals/super-diagonals in triangular banded matrix \b A.
* @param[in] A Buffer object storing matrix \b A.
* @param[in] offa Offset in number of elements for first element in matrix \b A.
* @param[in] lda Leading dimension of matrix \b A. It cannot be less
* than ( \b K + 1 )
* @param[out] X Buffer object storing vector \b X.
* @param[in] offx Offset in number of elements for first element in vector \b X.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] scratchBuff Temporary cl_mem scratch buffer object which can hold a
* minimum of (1 + (N-1)*abs(incx)) elements
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return The same result as the clblasStbmv() function.
*
* @ingroup TBMV
*/
clblasStatus
clblasCtbmv(
clblasOrder order,
clblasUplo uplo,
clblasTranspose trans,
clblasDiag diag,
size_t N,
size_t K,
const cl_mem A,
size_t offa,
size_t lda,
cl_mem X,
size_t offx,
int incx,
cl_mem scratchBuff,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief Matrix-vector product with a triangular banded matrix and
* double-complex elements.
*
* Matrix-vector products:
* - \f$ X \leftarrow A X \f$
* - \f$ X \leftarrow A^T X \f$
*
* @param[in] order Row/column order.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] trans How matrix \b A is to be transposed.
* @param[in] diag Specify whether matrix \b A is unit triangular.
* @param[in] N Number of rows/columns in banded matrix \b A.
* @param[in] K Number of sub-diagonals/super-diagonals in triangular banded matrix \b A.
* @param[in] A Buffer object storing matrix \b A.
* @param[in] offa Offset in number of elements for first element in matrix \b A.
* @param[in] lda Leading dimension of matrix \b A. It cannot be less
* than ( \b K + 1 )
* @param[out] X Buffer object storing vector \b X.
* @param[in] offx Offset in number of elements for first element in vector \b X.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] scratchBuff Temporary cl_mem scratch buffer object which can hold a
* minimum of (1 + (N-1)*abs(incx)) elements
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return The same result as the clblasDtbmv() function.
*
* @ingroup TBMV
*/
clblasStatus
clblasZtbmv(
clblasOrder order,
clblasUplo uplo,
clblasTranspose trans,
clblasDiag diag,
size_t N,
size_t K,
const cl_mem A,
size_t offa,
size_t lda,
cl_mem X,
size_t offx,
int incx,
cl_mem scratchBuff,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/*@}*/
/**
* @defgroup SBMV SBMV - Symmetric banded matrix-vector multiplication
* @ingroup BLAS2
*/
/*@{*/
/**
* @brief Matrix-vector product with a symmetric banded matrix and float elements.
*
* Matrix-vector products:
* - \f$ Y \leftarrow \alpha A X + \beta Y \f$
*
* @param[in] order Row/columns order.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] N Number of rows and columns in banded matrix \b A.
* @param[in] K Number of sub-diagonals/super-diagonals in banded matrix \b A.
* @param[in] alpha The factor of matrix \b A.
* @param[in] A Buffer object storing matrix \b A.
* @param[in] offa Offset in number of elements for first element in matrix \b A.
* @param[in] lda Leading dimension of matrix \b A. It cannot be less
* than ( \b K + 1 )
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of vector \b X. It cannot be zero.
* @param[in] beta The factor of vector \b Y.
* @param[out] Y Buffer object storing vector \b Y.
* @param[in] offy Offset of first element of vector \b Y in buffer object.
* Counted in elements.
* @param[in] incy Increment for the elements of vector \b Y. It cannot be zero.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasNotInitialized if clblasSetup() was not called;
* - \b clblasInvalidValue if invalid parameters are passed:
* - either \b N or \b incx is zero, or
* - K is greater than \b N - 1
* - the leading dimension is invalid;
* - \b clblasInvalidMemObject if either \b A or \b X object is
* Invalid, or an image object rather than the buffer one;
* - \b clblasOutOfHostMemory if the library can't allocate memory for
* internal structures;
* - \b clblasInvalidCommandQueue if the passed command queue is invalid;
* - \b clblasInvalidContext if a context a passed command queue belongs
* to was released;
* - \b clblasInvalidOperation if kernel compilation relating to a previous
* call has not completed for any of the target devices;
* - \b clblasCompilerNotAvailable if a compiler is not available;
* - \b clblasBuildProgramFailure if there is a failure to build a program
* executable.
*
* @ingroup SBMV
*/
clblasStatus
clblasSsbmv(
clblasOrder order,
clblasUplo uplo,
size_t N,
size_t K,
cl_float alpha,
const cl_mem A,
size_t offa,
size_t lda,
const cl_mem X,
size_t offx,
int incx,
cl_float beta,
cl_mem Y,
size_t offy,
int incy,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @example example_ssbmv.c
* This is an example of how to use the @ref clblasSsbmv function.
*/
/**
* @brief Matrix-vector product with a symmetric banded matrix and double elements.
*
* Matrix-vector products:
* - \f$ Y \leftarrow \alpha A X + \beta Y \f$
*
* @param[in] order Row/columns order.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] N Number of rows and columns in banded matrix \b A.
* @param[in] K Number of sub-diagonals/super-diagonals in banded matrix \b A.
* @param[in] alpha The factor of matrix \b A.
* @param[in] A Buffer object storing matrix \b A.
* @param[in] offa Offset in number of elements for first element in matrix \b A.
* @param[in] lda Leading dimension of matrix \b A. It cannot be less
* than ( \b K + 1 )
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of vector \b X. It cannot be zero.
* @param[in] beta The factor of vector \b Y.
* @param[out] Y Buffer object storing vector \b Y.
* @param[in] offy Offset of first element of vector \b Y in buffer object.
* Counted in elements.
* @param[in] incy Increment for the elements of vector \b Y. It cannot be zero.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support floating
* point arithmetic with double precision;
* - the same error codes as the clblasSsbmv() function otherwise.
*
* @ingroup SBMV
*/
clblasStatus
clblasDsbmv(
clblasOrder order,
clblasUplo uplo,
size_t N,
size_t K,
cl_double alpha,
const cl_mem A,
size_t offa,
size_t lda,
const cl_mem X,
size_t offx,
int incx,
cl_double beta,
cl_mem Y,
size_t offy,
int incy,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/*@}*/
/**
* @defgroup HBMV HBMV - Hermitian banded matrix-vector multiplication
* @ingroup BLAS2
*/
/*@{*/
/**
* @brief Matrix-vector product with a hermitian banded matrix and float elements.
*
* Matrix-vector products:
* - \f$ Y \leftarrow \alpha A X + \beta Y \f$
*
* @param[in] order Row/columns order.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] N Number of rows and columns in banded matrix \b A.
* @param[in] K Number of sub-diagonals/super-diagonals in banded matrix \b A.
* @param[in] alpha The factor of matrix \b A.
* @param[in] A Buffer object storing matrix \b A.
* @param[in] offa Offset in number of elements for first element in matrix \b A.
* @param[in] lda Leading dimension of matrix \b A. It cannot be less
* than ( \b K + 1 )
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of vector \b X. It cannot be zero.
* @param[in] beta The factor of vector \b Y.
* @param[out] Y Buffer object storing vector \b Y.
* @param[in] offy Offset of first element of vector \b Y in buffer object.
* Counted in elements.
* @param[in] incy Increment for the elements of vector \b Y. It cannot be zero.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasNotInitialized if clblasSetup() was not called;
* - \b clblasInvalidValue if invalid parameters are passed:
* - either \b N or \b incx is zero, or
* - K is greater than \b N - 1
* - the leading dimension is invalid;
* - \b clblasInvalidMemObject if either \b A or \b X object is
* Invalid, or an image object rather than the buffer one;
* - \b clblasOutOfHostMemory if the library can't allocate memory for
* internal structures;
* - \b clblasInvalidCommandQueue if the passed command queue is invalid;
* - \b clblasInvalidContext if a context a passed command queue belongs
* to was released;
* - \b clblasInvalidOperation if kernel compilation relating to a previous
* call has not completed for any of the target devices;
* - \b clblasCompilerNotAvailable if a compiler is not available;
* - \b clblasBuildProgramFailure if there is a failure to build a program
* executable.
*
* @ingroup HBMV
*/
clblasStatus
clblasChbmv(
clblasOrder order,
clblasUplo uplo,
size_t N,
size_t K,
cl_float2 alpha,
const cl_mem A,
size_t offa,
size_t lda,
const cl_mem X,
size_t offx,
int incx,
cl_float2 beta,
cl_mem Y,
size_t offy,
int incy,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @example example_chbmv.c
* This is an example of how to use the @ref clblasChbmv function.
*/
/**
* @brief Matrix-vector product with a hermitian banded matrix and double elements.
*
* Matrix-vector products:
* - \f$ Y \leftarrow \alpha A X + \beta Y \f$
*
* @param[in] order Row/columns order.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] N Number of rows and columns in banded matrix \b A.
* @param[in] K Number of sub-diagonals/super-diagonals in banded matrix \b A.
* @param[in] alpha The factor of matrix \b A.
* @param[in] A Buffer object storing matrix \b A.
* @param[in] offa Offset in number of elements for first element in matrix \b A.
* @param[in] lda Leading dimension of matrix \b A. It cannot be less
* than ( \b K + 1 )
* @param[in] X Buffer object storing vector \b X.
* @param[in] offx Offset of first element of vector \b X in buffer object.
* Counted in elements.
* @param[in] incx Increment for the elements of vector \b X. It cannot be zero.
* @param[in] beta The factor of vector \b Y.
* @param[out] Y Buffer object storing vector \b Y.
* @param[in] offy Offset of first element of vector \b Y in buffer object.
* Counted in elements.
* @param[in] incy Increment for the elements of vector \b Y. It cannot be zero.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support floating
* point arithmetic with double precision;
* - the same error codes as the clblasChbmv() function otherwise.
*
* @ingroup HBMV
*/
clblasStatus
clblasZhbmv(
clblasOrder order,
clblasUplo uplo,
size_t N,
size_t K,
cl_double2 alpha,
const cl_mem A,
size_t offa,
size_t lda,
const cl_mem X,
size_t offx,
int incx,
cl_double2 beta,
cl_mem Y,
size_t offy,
int incy,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/*@}*/
/**
* @defgroup TBSV TBSV - Solving triangular banded matrix
* @ingroup BLAS2
*/
/*@{*/
/**
* @brief solving triangular banded matrix problems with float elements.
*
* Matrix-vector products:
* - \f$ A X \leftarrow X \f$
* - \f$ A^T X \leftarrow X \f$
*
* @param[in] order Row/column order.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] trans How matrix \b A is to be transposed.
* @param[in] diag Specify whether matrix \b A is unit triangular.
* @param[in] N Number of rows/columns in banded matrix \b A.
* @param[in] K Number of sub-diagonals/super-diagonals in triangular banded matrix \b A.
* @param[in] A Buffer object storing matrix \b A.
* @param[in] offa Offset in number of elements for first element in matrix \b A.
* @param[in] lda Leading dimension of matrix \b A. It cannot be less
* than ( \b K + 1 )
* @param[out] X Buffer object storing vector \b X.
* @param[in] offx Offset in number of elements for first element in vector \b X.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasNotInitialized if clblasSetup() was not called;
* - \b clblasInvalidValue if invalid parameters are passed:
* - either \b N or \b incx is zero, or
* - K is greater than \b N - 1
* - the leading dimension is invalid;
* - \b clblasInvalidMemObject if either \b A or \b X object is
* Invalid, or an image object rather than the buffer one;
* - \b clblasOutOfHostMemory if the library can't allocate memory for
* internal structures;
* - \b clblasInvalidCommandQueue if the passed command queue is invalid;
* - \b clblasInvalidContext if a context a passed command queue belongs
* to was released;
* - \b clblasInvalidOperation if kernel compilation relating to a previous
* call has not completed for any of the target devices;
* - \b clblasCompilerNotAvailable if a compiler is not available;
* - \b clblasBuildProgramFailure if there is a failure to build a program
* executable.
*
* @ingroup TBSV
*/
clblasStatus
clblasStbsv(
clblasOrder order,
clblasUplo uplo,
clblasTranspose trans,
clblasDiag diag,
size_t N,
size_t K,
const cl_mem A,
size_t offa,
size_t lda,
cl_mem X,
size_t offx,
int incx,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @example example_stbsv.c
* This is an example of how to use the @ref clblasStbsv function.
*/
/**
* @brief solving triangular banded matrix problems with double elements.
*
* Matrix-vector products:
* - \f$ A X \leftarrow X \f$
* - \f$ A^T X \leftarrow X \f$
*
* @param[in] order Row/column order.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] trans How matrix \b A is to be transposed.
* @param[in] diag Specify whether matrix \b A is unit triangular.
* @param[in] N Number of rows/columns in banded matrix \b A.
* @param[in] K Number of sub-diagonals/super-diagonals in triangular banded matrix \b A.
* @param[in] A Buffer object storing matrix \b A.
* @param[in] offa Offset in number of elements for first element in matrix \b A.
* @param[in] lda Leading dimension of matrix \b A. It cannot be less
* than ( \b K + 1 )
* @param[out] X Buffer object storing vector \b X.
* @param[in] offx Offset in number of elements for first element in vector \b X.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support floating
* point arithmetic with double precision;
* - the same error codes as the clblasStbsv() function otherwise.
*
* @ingroup TBSV
*/
clblasStatus
clblasDtbsv(
clblasOrder order,
clblasUplo uplo,
clblasTranspose trans,
clblasDiag diag,
size_t N,
size_t K,
const cl_mem A,
size_t offa,
size_t lda,
cl_mem X,
size_t offx,
int incx,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief solving triangular banded matrix problems with float-complex elements.
*
* Matrix-vector products:
* - \f$ A X \leftarrow X \f$
* - \f$ A^T X \leftarrow X \f$
*
* @param[in] order Row/column order.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] trans How matrix \b A is to be transposed.
* @param[in] diag Specify whether matrix \b A is unit triangular.
* @param[in] N Number of rows/columns in banded matrix \b A.
* @param[in] K Number of sub-diagonals/super-diagonals in triangular banded matrix \b A.
* @param[in] A Buffer object storing matrix \b A.
* @param[in] offa Offset in number of elements for first element in matrix \b A.
* @param[in] lda Leading dimension of matrix \b A. It cannot be less
* than ( \b K + 1 )
* @param[out] X Buffer object storing vector \b X.
* @param[in] offx Offset in number of elements for first element in vector \b X.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return The same result as the clblasStbsv() function.
*
* @ingroup TBSV
*/
clblasStatus
clblasCtbsv(
clblasOrder order,
clblasUplo uplo,
clblasTranspose trans,
clblasDiag diag,
size_t N,
size_t K,
const cl_mem A,
size_t offa,
size_t lda,
cl_mem X,
size_t offx,
int incx,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief solving triangular banded matrix problems with double-complex elements.
*
* Matrix-vector products:
* - \f$ A X \leftarrow X \f$
* - \f$ A^T X \leftarrow X \f$
*
* @param[in] order Row/column order.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] trans How matrix \b A is to be transposed.
* @param[in] diag Specify whether matrix \b A is unit triangular.
* @param[in] N Number of rows/columns in banded matrix \b A.
* @param[in] K Number of sub-diagonals/super-diagonals in triangular banded matrix \b A.
* @param[in] A Buffer object storing matrix \b A.
* @param[in] offa Offset in number of elements for first element in matrix \b A.
* @param[in] lda Leading dimension of matrix \b A. It cannot be less
* than ( \b K + 1 )
* @param[out] X Buffer object storing vector \b X.
* @param[in] offx Offset in number of elements for first element in vector \b X.
* @param[in] incx Increment for the elements of \b X. Must not be zero.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return The same result as the clblasDtbsv() function.
*
* @ingroup TBSV
*/
clblasStatus
clblasZtbsv(
clblasOrder order,
clblasUplo uplo,
clblasTranspose trans,
clblasDiag diag,
size_t N,
size_t K,
const cl_mem A,
size_t offa,
size_t lda,
cl_mem X,
size_t offx,
int incx,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/*@}*/
/**
* @defgroup BLAS3 BLAS-3 functions
*
* The Level 3 Basic Linear Algebra Subprograms are funcions that perform
* matrix-matrix operations.
*/
/*@{*/
/*@}*/
/**
* @defgroup GEMM GEMM - General matrix-matrix multiplication
* @ingroup BLAS3
*/
/*@{*/
/**
* @brief Matrix-matrix product of general rectangular matrices with float
* elements. Extended version.
*
* Matrix-matrix products:
* - \f$ C \leftarrow \alpha A B + \beta C \f$
* - \f$ C \leftarrow \alpha A^T B + \beta C \f$
* - \f$ C \leftarrow \alpha A B^T + \beta C \f$
* - \f$ C \leftarrow \alpha A^T B^T + \beta C \f$
*
* @param[in] order Row/column order.
* @param[in] transA How matrix \b A is to be transposed.
* @param[in] transB How matrix \b B is to be transposed.
* @param[in] M Number of rows in matrix \b A.
* @param[in] N Number of columns in matrix \b B.
* @param[in] K Number of columns in matrix \b A and rows in matrix \b B.
* @param[in] alpha The factor of matrix \b A.
* @param[in] A Buffer object storing matrix \b A.
* @param[in] offA Offset of the first element of the matrix \b A in the
* buffer object. Counted in elements.
* @param[in] lda Leading dimension of matrix \b A. It cannot be less
* than \b K when the \b order parameter is set to
* \b clblasRowMajor,\n or less than \b M when the
* parameter is set to \b clblasColumnMajor.
* @param[in] B Buffer object storing matrix \b B.
* @param[in] offB Offset of the first element of the matrix \b B in the
* buffer object. Counted in elements.
* @param[in] ldb Leading dimension of matrix \b B. It cannot be less
* than \b N when the \b order parameter is set to
* \b clblasRowMajor,\n or less than \b K
* when it is set to \b clblasColumnMajor.
* @param[in] beta The factor of matrix \b C.
* @param[out] C Buffer object storing matrix \b C.
* @param[in] offC Offset of the first element of the matrix \b C in the
* buffer object. Counted in elements.
* @param[in] ldc Leading dimension of matrix \b C. It cannot be less
* than \b N when the \b order parameter is set to
* \b clblasRowMajor,\n or less than \b M when
* it is set to \b clblasColumnMajorOrder.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidValue if either \b offA, \b offB or \b offC exceeds
* the size of the respective buffer object;
* - the same error codes as clblasSgemm() otherwise.
*
* @ingroup GEMM
*/
clblasStatus
clblasSgemm(
clblasOrder order,
clblasTranspose transA,
clblasTranspose transB,
size_t M,
size_t N,
size_t K,
cl_float alpha,
const cl_mem A,
size_t offA,
size_t lda,
const cl_mem B,
size_t offB,
size_t ldb,
cl_float beta,
cl_mem C,
size_t offC,
size_t ldc,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @example example_sgemm.c
* This is an example of how to use the @ref clblasSgemmEx function.
*/
/**
* @brief Matrix-matrix product of general rectangular matrices with double
* elements. Extended version.
*
* Matrix-matrix products:
* - \f$ C \leftarrow \alpha A B + \beta C \f$
* - \f$ C \leftarrow \alpha A^T B + \beta C \f$
* - \f$ C \leftarrow \alpha A B^T + \beta C \f$
* - \f$ C \leftarrow \alpha A^T B^T + \beta C \f$
*
* @param[in] order Row/column order.
* @param[in] transA How matrix \b A is to be transposed.
* @param[in] transB How matrix \b B is to be transposed.
* @param[in] M Number of rows in matrix \b A.
* @param[in] N Number of columns in matrix \b B.
* @param[in] K Number of columns in matrix \b A and rows in matrix \b B.
* @param[in] alpha The factor of matrix \b A.
* @param[in] A Buffer object storing matrix \b A.
* @param[in] offA Offset of the first element of the matrix \b A in the
* buffer object. Counted in elements.
* @param[in] lda Leading dimension of matrix \b A. For detailed description,
* see clblasSgemm().
* @param[in] B Buffer object storing matrix \b B.
* @param[in] offB Offset of the first element of the matrix \b B in the
* buffer object. Counted in elements.
* @param[in] ldb Leading dimension of matrix \b B. For detailed description,
* see clblasSgemm().
* @param[in] beta The factor of matrix \b C.
* @param[out] C Buffer object storing matrix \b C.
* @param[in] offC Offset of the first element of the matrix \b C in the
* buffer object. Counted in elements.
* @param[in] ldc Leading dimension of matrix \b C. For detailed description,
* see clblasSgemm().
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support floating
* point arithmetic with double precision;
* - \b clblasInvalidValue if either \b offA, \b offB or offC exceeds
* the size of the respective buffer object;
* - the same error codes as the clblasSgemm() function otherwise.
*
* @ingroup GEMM
*/
clblasStatus
clblasDgemm(
clblasOrder order,
clblasTranspose transA,
clblasTranspose transB,
size_t M,
size_t N,
size_t K,
cl_double alpha,
const cl_mem A,
size_t offA,
size_t lda,
const cl_mem B,
size_t offB,
size_t ldb,
cl_double beta,
cl_mem C,
size_t offC,
size_t ldc,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief Matrix-matrix product of general rectangular matrices with float
* complex elements. Extended version.
*
* Matrix-matrix products:
* - \f$ C \leftarrow \alpha A B + \beta C \f$
* - \f$ C \leftarrow \alpha A^T B + \beta C \f$
* - \f$ C \leftarrow \alpha A B^T + \beta C \f$
* - \f$ C \leftarrow \alpha A^T B^T + \beta C \f$
*
* @param[in] order Row/column order.
* @param[in] transA How matrix \b A is to be transposed.
* @param[in] transB How matrix \b B is to be transposed.
* @param[in] M Number of rows in matrix \b A.
* @param[in] N Number of columns in matrix \b B.
* @param[in] K Number of columns in matrix \b A and rows in matrix \b B.
* @param[in] alpha The factor of matrix \b A.
* @param[in] A Buffer object storing matrix \b A.
* @param[in] offA Offset of the first element of the matrix \b A in the
* buffer object. Counted in elements.
* @param[in] lda Leading dimension of matrix \b A. For detailed description,
* see clblasSgemm().
* @param[in] B Buffer object storing matrix \b B.
* @param[in] offB Offset of the first element of the matrix \b B in the
* buffer object. Counted in elements.
* @param[in] ldb Leading dimension of matrix \b B. For detailed description,
* see clblasSgemm().
* @param[in] beta The factor of matrix \b C.
* @param[out] C Buffer object storing matrix \b C.
* @param[in] offC Offset of the first element of the matrix \b C in the
* buffer object. Counted in elements.
* @param[in] ldc Leading dimension of matrix \b C. For detailed description,
* see clblasSgemm().
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidValue if either \b offA, \b offB or offC exceeds
* the size of the respective buffer object;
* - the same error codes as the clblasSgemm() function otherwise.
*
* @ingroup GEMM
*/
clblasStatus
clblasCgemm(
clblasOrder order,
clblasTranspose transA,
clblasTranspose transB,
size_t M,
size_t N,
size_t K,
FloatComplex alpha,
const cl_mem A,
size_t offA,
size_t lda,
const cl_mem B,
size_t offB,
size_t ldb,
FloatComplex beta,
cl_mem C,
size_t offC,
size_t ldc,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief Matrix-matrix product of general rectangular matrices with double
* complex elements. Exteneded version.
*
* Matrix-matrix products:
* - \f$ C \leftarrow \alpha A B + \beta C \f$
* - \f$ C \leftarrow \alpha A^T B + \beta C \f$
* - \f$ C \leftarrow \alpha A B^T + \beta C \f$
* - \f$ C \leftarrow \alpha A^T B^T + \beta C \f$
*
* @param[in] order Row/column order.
* @param[in] transA How matrix \b A is to be transposed.
* @param[in] transB How matrix \b B is to be transposed.
* @param[in] M Number of rows in matrix \b A.
* @param[in] N Number of columns in matrix \b B.
* @param[in] K Number of columns in matrix \b A and rows in matrix \b B.
* @param[in] alpha The factor of matrix \b A.
* @param[in] A Buffer object storing matrix \b A.
* @param[in] offA Offset of the first element of the matrix \b A in the
* buffer object. Counted in elements.
* @param[in] lda Leading dimension of matrix \b A. For detailed description,
* see clblasSgemm().
* @param[in] B Buffer object storing matrix \b B.
* @param[in] offB Offset of the first element of the matrix \b B in the
* buffer object. Counted in elements.
* @param[in] ldb Leading dimension of matrix \b B. For detailed description,
* see clblasSgemm().
* @param[in] beta The factor of matrix \b C.
* @param[out] C Buffer object storing matrix \b C.
* @param[in] offC Offset of the first element of the matrix \b C in the
* buffer object. Counted in elements.
* @param[in] ldc Leading dimension of matrix \b C. For detailed description,
* see clblasSgemm().
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support floating
* point arithmetic with double precision;
* - \b clblasInvalidValue if either \b offA, \b offB or offC exceeds
* the size of the respective buffer object;
* - the same error codes as the clblasSgemm() function otherwise.
*
* @ingroup GEMM
*/
clblasStatus
clblasZgemm(
clblasOrder order,
clblasTranspose transA,
clblasTranspose transB,
size_t M,
size_t N,
size_t K,
DoubleComplex alpha,
const cl_mem A,
size_t offA,
size_t lda,
const cl_mem B,
size_t offB,
size_t ldb,
DoubleComplex beta,
cl_mem C,
size_t offC,
size_t ldc,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/*@}*/
/**
* @defgroup TRMM TRMM - Triangular matrix-matrix multiplication
* @ingroup BLAS3
*/
/*@{*/
/**
* @brief Multiplying a matrix by a triangular matrix with float elements.
* Extended version.
*
* Matrix-triangular matrix products:
* - \f$ B \leftarrow \alpha A B \f$
* - \f$ B \leftarrow \alpha A^T B \f$
* - \f$ B \leftarrow \alpha B A \f$
* - \f$ B \leftarrow \alpha B A^T \f$
*
* where \b T is an upper or lower triangular matrix.
*
* @param[in] order Row/column order.
* @param[in] side The side of triangular matrix.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] transA How matrix \b A is to be transposed.
* @param[in] diag Specify whether matrix is unit triangular.
* @param[in] M Number of rows in matrix \b B.
* @param[in] N Number of columns in matrix \b B.
* @param[in] alpha The factor of matrix \b A.
* @param[in] A Buffer object storing matrix \b A.
* @param[in] offA Offset of the first element of the matrix \b A in the
* buffer object. Counted in elements.
* @param[in] lda Leading dimension of matrix \b A. It cannot be less
* than \b M when the \b side parameter is set to
* \b clblasLeft,\n or less than \b N when it is set
* to \b clblasRight.
* @param[out] B Buffer object storing matrix \b B.
* @param[in] offB Offset of the first element of the matrix \b B in the
* buffer object. Counted in elements.
* @param[in] ldb Leading dimension of matrix \b B. It cannot be less
* than \b N when the \b order parameter is set to
* \b clblasRowMajor,\n or not less than \b M
* when it is set to \b clblasColumnMajor.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidValue if either \b offA or \b offB exceeds the size
* of the respective buffer object;
* - the same error codes as clblasStrmm() otherwise.
*
* @ingroup TRMM
*/
clblasStatus
clblasStrmm(
clblasOrder order,
clblasSide side,
clblasUplo uplo,
clblasTranspose transA,
clblasDiag diag,
size_t M,
size_t N,
cl_float alpha,
const cl_mem A,
size_t offA,
size_t lda,
cl_mem B,
size_t offB,
size_t ldb,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @example example_strmm.c
* This is an example of how to use the @ref clblasStrmmEx function.
*/
/**
* @brief Multiplying a matrix by a triangular matrix with double elements.
* Extended version.
*
* Matrix-triangular matrix products:
* - \f$ B \leftarrow \alpha A B \f$
* - \f$ B \leftarrow \alpha A^T B \f$
* - \f$ B \leftarrow \alpha B A \f$
* - \f$ B \leftarrow \alpha B A^T \f$
*
* where \b T is an upper or lower triangular matrix.
*
* @param[in] order Row/column order.
* @param[in] side The side of triangular matrix.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] transA How matrix \b A is to be transposed.
* @param[in] diag Specify whether matrix is unit triangular.
* @param[in] M Number of rows in matrix \b B.
* @param[in] N Number of columns in matrix \b B.
* @param[in] alpha The factor of matrix \b A.
* @param[in] A Buffer object storing matrix \b A.
* @param[in] offA Offset of the first element of the matrix \b A in the
* buffer object. Counted in elements.
* @param[in] lda Leading dimension of matrix \b A. For detailed
* description, see clblasStrmm().
* @param[out] B Buffer object storing matrix \b B.
* @param[in] offB Offset of the first element of the matrix \b B in the
* buffer object. Counted in elements.
* @param[in] ldb Leading dimension of matrix \b B. For detailed
* description, see clblasStrmm().
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support floating
* point arithmetic with double precision;
* - \b clblasInvalidValue if either \b offA or \b offB exceeds the size
* of the respective buffer object;
* - the same error codes as the clblasStrmm() function otherwise.
*
* @ingroup TRMM
*/
clblasStatus
clblasDtrmm(
clblasOrder order,
clblasSide side,
clblasUplo uplo,
clblasTranspose transA,
clblasDiag diag,
size_t M,
size_t N,
cl_double alpha,
const cl_mem A,
size_t offA,
size_t lda,
cl_mem B,
size_t offB,
size_t ldb,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief Multiplying a matrix by a triangular matrix with float complex
* elements. Extended version.
*
* Matrix-triangular matrix products:
* - \f$ B \leftarrow \alpha A B \f$
* - \f$ B \leftarrow \alpha A^T B \f$
* - \f$ B \leftarrow \alpha B A \f$
* - \f$ B \leftarrow \alpha B A^T \f$
*
* where \b T is an upper or lower triangular matrix.
* @param[in] order Row/column order.
* @param[in] side The side of triangular matrix.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] transA How matrix \b A is to be transposed.
* @param[in] diag Specify whether matrix is unit triangular.
* @param[in] M Number of rows in matrix \b B.
* @param[in] N Number of columns in matrix \b B.
* @param[in] alpha The factor of matrix \b A.
* @param[in] offA Offset of the first element of the matrix \b A in the
* buffer object. Counted in elements.
* @param[in] A Buffer object storing matrix \b A.
* @param[in] lda Leading dimension of matrix \b A. For detailed
* description, see clblasStrmm().
* @param[out] B Buffer object storing matrix \b B.
* @param[in] offB Offset of the first element of the matrix \b B in the
* buffer object. Counted in elements.
* @param[in] ldb Leading dimension of matrix \b B. For detailed
* description, see clblasStrmm().
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidValue if either \b offA or \b offB exceeds the size
* of the respective buffer object;
* - the same error codes as clblasStrmm() otherwise.
*
* @ingroup TRMM
*/
clblasStatus
clblasCtrmm(
clblasOrder order,
clblasSide side,
clblasUplo uplo,
clblasTranspose transA,
clblasDiag diag,
size_t M,
size_t N,
FloatComplex alpha,
const cl_mem A,
size_t offA,
size_t lda,
cl_mem B,
size_t offB,
size_t ldb,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief Multiplying a matrix by a triangular matrix with double complex
* elements. Extended version.
*
* Matrix-triangular matrix products:
* - \f$ B \leftarrow \alpha A B \f$
* - \f$ B \leftarrow \alpha A^T B \f$
* - \f$ B \leftarrow \alpha B A \f$
* - \f$ B \leftarrow \alpha B A^T \f$
*
* where \b T is an upper or lower triangular matrix.
*
* @param[in] order Row/column order.
* @param[in] side The side of triangular matrix.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] transA How matrix \b A is to be transposed.
* @param[in] diag Specify whether matrix is unit triangular.
* @param[in] M Number of rows in matrix \b B.
* @param[in] N Number of columns in matrix \b B.
* @param[in] alpha The factor of matrix \b A.
* @param[in] A Buffer object storing matrix \b A.
* @param[in] offA Offset of the first element of the matrix \b A in the
* buffer object. Counted in elements.
* @param[in] lda Leading dimension of matrix \b A. For detailed
* description, see clblasStrmm().
* @param[out] B Buffer object storing matrix \b B.
* @param[in] offB Offset of the first element of the matrix \b B in the
* buffer object. Counted in elements.
* @param[in] ldb Leading dimension of matrix \b B. For detailed
* description, see clblasStrmm().
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support floating
* point arithmetic with double precision;
* - \b clblasInvalidValue if either \b offA or \b offB exceeds the size
* of the respective buffer object;
* - the same error codes as the clblasStrmm() function otherwise.
*
* @ingroup TRMM
*/
clblasStatus
clblasZtrmm(
clblasOrder order,
clblasSide side,
clblasUplo uplo,
clblasTranspose transA,
clblasDiag diag,
size_t M,
size_t N,
DoubleComplex alpha,
const cl_mem A,
size_t offA,
size_t lda,
cl_mem B,
size_t offB,
size_t ldb,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/*@}*/
/**
* @defgroup TRSM TRSM - Solving triangular systems of equations
* @ingroup BLAS3
*/
/*@{*/
/**
* @brief Solving triangular systems of equations with multiple right-hand
* sides and float elements. Extended version.
*
* Solving triangular systems of equations:
* - \f$ B \leftarrow \alpha A^{-1} B \f$
* - \f$ B \leftarrow \alpha A^{-T} B \f$
* - \f$ B \leftarrow \alpha B A^{-1} \f$
* - \f$ B \leftarrow \alpha B A^{-T} \f$
*
* where \b T is an upper or lower triangular matrix.
*
* @param[in] order Row/column order.
* @param[in] side The side of triangular matrix.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] transA How matrix \b A is to be transposed.
* @param[in] diag Specify whether matrix is unit triangular.
* @param[in] M Number of rows in matrix \b B.
* @param[in] N Number of columns in matrix \b B.
* @param[in] alpha The factor of matrix \b A.
* @param[in] A Buffer object storing matrix \b A.
* @param[in] offA Offset of the first element of the matrix \b A in the
* buffer object. Counted in elements.
* @param[in] lda Leading dimension of matrix \b A. It cannot be less
* than \b M when the \b side parameter is set to
* \b clblasLeft,\n or less than \b N
* when it is set to \b clblasRight.
* @param[out] B Buffer object storing matrix \b B.
* @param[in] offB Offset of the first element of the matrix \b B in the
* buffer object. Counted in elements.
* @param[in] ldb Leading dimension of matrix \b B. It cannot be less
* than \b N when the \b order parameter is set to
* \b clblasRowMajor,\n or less than \b M
* when it is set to \b clblasColumnMajor.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidValue if either \b offA or \b offB exceeds the size
* of the respective buffer object;
* - the same error codes as clblasStrsm() otherwise.
*
* @ingroup TRSM
*/
clblasStatus
clblasStrsm(
clblasOrder order,
clblasSide side,
clblasUplo uplo,
clblasTranspose transA,
clblasDiag diag,
size_t M,
size_t N,
cl_float alpha,
const cl_mem A,
size_t offA,
size_t lda,
cl_mem B,
size_t offB,
size_t ldb,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @example example_strsm.c
* This is an example of how to use the @ref clblasStrsmEx function.
*/
/**
* @brief Solving triangular systems of equations with multiple right-hand
* sides and double elements. Extended version.
*
* Solving triangular systems of equations:
* - \f$ B \leftarrow \alpha A^{-1} B \f$
* - \f$ B \leftarrow \alpha A^{-T} B \f$
* - \f$ B \leftarrow \alpha B A^{-1} \f$
* - \f$ B \leftarrow \alpha B A^{-T} \f$
*
* where \b T is an upper or lower triangular matrix.
*
* @param[in] order Row/column order.
* @param[in] side The side of triangular matrix.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] transA How matrix \b A is to be transposed.
* @param[in] diag Specify whether matrix is unit triangular.
* @param[in] M Number of rows in matrix \b B.
* @param[in] N Number of columns in matrix \b B.
* @param[in] alpha The factor of matrix \b A.
* @param[in] A Buffer object storing matrix \b A.
* @param[in] offA Offset of the first element of the matrix \b A in the
* buffer object. Counted in elements.
* @param[in] lda Leading dimension of matrix \b A. For detailed
* description, see clblasStrsm().
* @param[out] B Buffer object storing matrix \b B.
* @param[in] offB Offset of the first element of the matrix \b A in the
* buffer object. Counted in elements.
* @param[in] ldb Leading dimension of matrix \b B. For detailed
* description, see clblasStrsm().
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support floating
* point arithmetic with double precision;
* - \b clblasInvalidValue if either \b offA or \b offB exceeds the size
* of the respective buffer object;
* - the same error codes as the clblasStrsm() function otherwise.
*
* @ingroup TRSM
*/
clblasStatus
clblasDtrsm(
clblasOrder order,
clblasSide side,
clblasUplo uplo,
clblasTranspose transA,
clblasDiag diag,
size_t M,
size_t N,
cl_double alpha,
const cl_mem A,
size_t offA,
size_t lda,
cl_mem B,
size_t offB,
size_t ldb,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief Solving triangular systems of equations with multiple right-hand
* sides and float complex elements. Extended version.
*
* Solving triangular systems of equations:
* - \f$ B \leftarrow \alpha A^{-1} B \f$
* - \f$ B \leftarrow \alpha A^{-T} B \f$
* - \f$ B \leftarrow \alpha B A^{-1} \f$
* - \f$ B \leftarrow \alpha B A^{-T} \f$
*
* where \b T is an upper or lower triangular matrix.
*
* @param[in] order Row/column order.
* @param[in] side The side of triangular matrix.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] transA How matrix \b A is to be transposed.
* @param[in] diag Specify whether matrix is unit triangular.
* @param[in] M Number of rows in matrix \b B.
* @param[in] N Number of columns in matrix \b B.
* @param[in] alpha The factor of matrix \b A.
* @param[in] A Buffer object storing matrix \b A.
* @param[in] offA Offset of the first element of the matrix \b A in the
* buffer object. Counted in elements.
* @param[in] lda Leading dimension of matrix \b A. For detailed
* description, see clblasStrsm().
* @param[out] B Buffer object storing matrix \b B.
* @param[in] offB Offset of the first element of the matrix \b B in the
* buffer object. Counted in elements.
* @param[in] ldb Leading dimension of matrix \b B. For detailed
* description, see clblasStrsm().
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidValue if either \b offA or \b offB exceeds the size
* of the respective buffer object;
* - the same error codes as clblasStrsm() otherwise.
*
* @ingroup TRSM
*/
clblasStatus
clblasCtrsm(
clblasOrder order,
clblasSide side,
clblasUplo uplo,
clblasTranspose transA,
clblasDiag diag,
size_t M,
size_t N,
FloatComplex alpha,
const cl_mem A,
size_t offA,
size_t lda,
cl_mem B,
size_t offB,
size_t ldb,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief Solving triangular systems of equations with multiple right-hand
* sides and double complex elements. Extended version.
*
* Solving triangular systems of equations:
* - \f$ B \leftarrow \alpha A^{-1} B \f$
* - \f$ B \leftarrow \alpha A^{-T} B \f$
* - \f$ B \leftarrow \alpha B A^{-1} \f$
* - \f$ B \leftarrow \alpha B A^{-T} \f$
*
* where \b T is an upper or lower triangular matrix.
*
* @param[in] order Row/column order.
* @param[in] side The side of triangular matrix.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] transA How matrix \b A is to be transposed.
* @param[in] diag Specify whether matrix is unit triangular.
* @param[in] M Number of rows in matrix \b B.
* @param[in] N Number of columns in matrix \b B.
* @param[in] alpha The factor of matrix \b A.
* @param[in] A Buffer object storing matrix \b A.
* @param[in] offA Offset of the first element of the matrix \b A in the
* buffer object. Counted in elements.
* @param[in] lda Leading dimension of matrix \b A. For detailed
* description, see clblasStrsm().
* @param[out] B Buffer object storing matrix \b B.
* @param[in] offB Offset of the first element of the matrix \b B in the
* buffer object. Counted in elements.
* @param[in] ldb Leading dimension of matrix \b B. For detailed
* description, see clblasStrsm().
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support floating
* point arithmetic with double precision;
* - \b clblasInvalidValue if either \b offA or \b offB exceeds the size
* of the respective buffer object;
* - the same error codes as the clblasStrsm() function otherwise
*
* @ingroup TRSM
*/
clblasStatus
clblasZtrsm(
clblasOrder order,
clblasSide side,
clblasUplo uplo,
clblasTranspose transA,
clblasDiag diag,
size_t M,
size_t N,
DoubleComplex alpha,
const cl_mem A,
size_t offA,
size_t lda,
cl_mem B,
size_t offB,
size_t ldb,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/*@}*/
/**
* @defgroup SYRK SYRK - Symmetric rank-k update of a matrix
* @ingroup BLAS3
*/
/*@{*/
/**
* @brief Rank-k update of a symmetric matrix with float elements.
* Extended version.
*
* Rank-k updates:
* - \f$ C \leftarrow \alpha A A^T + \beta C \f$
* - \f$ C \leftarrow \alpha A^T A + \beta C \f$
*
* where \b C is a symmetric matrix.
*
* @param[in] order Row/column order.
* @param[in] uplo The triangle in matrix \b C being referenced.
* @param[in] transA How matrix \b A is to be transposed.
* @param[in] N Number of rows and columns in matrix \b C.
* @param[in] K Number of columns of the matrix \b A if it is not
* transposed, and number of rows otherwise.
* @param[in] alpha The factor of matrix \b A.
* @param[in] A Buffer object storing the matrix \b A.
* @param[in] offA Offset of the first element of the matrix \b A in the
* buffer object. Counted in elements.
* @param[in] lda Leading dimension of matrix \b A. It cannot be
* less than \b K if \b A is
* in the row-major format, and less than \b N
* otherwise.
* @param[in] beta The factor of the matrix \b C.
* @param[out] C Buffer object storing matrix \b C.
* @param[in] offC Offset of the first element of the matrix \b C in the
* buffer object. Counted in elements.
* @param[in] ldc Leading dimension of matric \b C. It cannot be less
* than \b N.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidValue if either \b offA or \b offC exceeds the size
* of the respective buffer object;
* - the same error codes as the clblasSsyrk() function otherwise.
*
* @ingroup SYRK
*/
clblasStatus
clblasSsyrk(
clblasOrder order,
clblasUplo uplo,
clblasTranspose transA,
size_t N,
size_t K,
cl_float alpha,
const cl_mem A,
size_t offA,
size_t lda,
cl_float beta,
cl_mem C,
size_t offC,
size_t ldc,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @example example_ssyrk.c
* This is an example of how to use the @ref clblasSsyrkEx function.
*/
/**
* @brief Rank-k update of a symmetric matrix with double elements.
* Extended version.
*
* Rank-k updates:
* - \f$ C \leftarrow \alpha A A^T + \beta C \f$
* - \f$ C \leftarrow \alpha A^T A + \beta C \f$
*
* where \b C is a symmetric matrix.
*
* @param[in] order Row/column order.
* @param[in] uplo The triangle in matrix \b C being referenced.
* @param[in] transA How matrix \b A is to be transposed.
* @param[in] N Number of rows and columns in matrix \b C.
* @param[in] K Number of columns of the matrix \b A if it is not
* transposed, and number of rows otherwise.
* @param[in] alpha The factor of matrix \b A.
* @param[in] A Buffer object storing the matrix \b A.
* @param[in] offA Offset of the first element of the matrix \b A in the
* buffer object. Counted in elements.
* @param[in] lda Leading dimension of matrix \b A. For detailed
* description, see clblasSsyrk().
* @param[in] beta The factor of the matrix \b C.
* @param[out] C Buffer object storing matrix \b C.
* @param[in] offC Offset of the first element of the matrix \b C in the
* buffer object. Counted in elements.
* @param[in] ldc Leading dimension of matrix \b C. It cannot be less
* than \b N.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support floating
* point arithmetic with double precision;
* - \b clblasInvalidValue if either \b offA or \b offC exceeds the size
* of the respective buffer object;
* - the same error codes as the clblasSsyrk() function otherwise.
*
* @ingroup SYRK
*/
clblasStatus
clblasDsyrk(
clblasOrder order,
clblasUplo uplo,
clblasTranspose transA,
size_t N,
size_t K,
cl_double alpha,
const cl_mem A,
size_t offA,
size_t lda,
cl_double beta,
cl_mem C,
size_t offC,
size_t ldc,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief Rank-k update of a symmetric matrix with complex float elements.
* Extended version.
*
* Rank-k updates:
* - \f$ C \leftarrow \alpha A A^T + \beta C \f$
* - \f$ C \leftarrow \alpha A^T A + \beta C \f$
*
* where \b C is a symmetric matrix.
*
* @param[in] order Row/column order.
* @param[in] uplo The triangle in matrix \b C being referenced.
* @param[in] transA How matrix \b A is to be transposed.
* @param[in] N Number of rows and columns in matrix \b C.
* @param[in] K Number of columns of the matrix \b A if it is not
* transposed, and number of rows otherwise.
* @param[in] alpha The factor of matrix \b A.
* @param[in] A Buffer object storing the matrix \b A.
* @param[in] offA Offset of the first element of the matrix \b A in the
* buffer object. Counted in elements.
* @param[in] lda Leading dimension of matrix \b A. For detailed
* description, see clblasSsyrk().
* @param[in] beta The factor of the matrix \b C.
* @param[out] C Buffer object storing matrix \b C.
* @param[in] offC Offset of the first element of the matrix \b C in the
* buffer object. Counted in elements.
* @param[in] ldc Leading dimension of matrix \b C. It cannot be less
* than \b N.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidValue if either \b offA or \b offC exceeds the size
* of the respective buffer object;
* - \b clblasInvalidValue if \b transA is set to \ref clblasConjTrans.
* - the same error codes as the clblasSsyrk() function otherwise.
*
* @ingroup SYRK
*/
clblasStatus
clblasCsyrk(
clblasOrder order,
clblasUplo uplo,
clblasTranspose transA,
size_t N,
size_t K,
FloatComplex alpha,
const cl_mem A,
size_t offA,
size_t lda,
FloatComplex beta,
cl_mem C,
size_t offC,
size_t ldc,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief Rank-k update of a symmetric matrix with complex double elements.
* Extended version.
*
* Rank-k updates:
* - \f$ C \leftarrow \alpha A A^T + \beta C \f$
* - \f$ C \leftarrow \alpha A^T A + \beta C \f$
*
* where \b C is a symmetric matrix.
*
* @param[in] order Row/column order.
* @param[in] uplo The triangle in matrix \b C being referenced.
* @param[in] transA How matrix \b A is to be transposed.
* @param[in] N Number of rows and columns in matrix \b C.
* @param[in] K Number of columns of the matrix \b A if it is not
* transposed, and number of rows otherwise.
* @param[in] alpha The factor of matrix \b A.
* @param[in] A Buffer object storing the matrix \b A.
* @param[in] offA Offset of the first element of the matrix \b A in the
* buffer object. Counted in elements.
* @param[in] lda Leading dimension of matrix \b A. For detailed
* description, see clblasSsyrk().
* @param[in] beta The factor of the matrix \b C.
* @param[out] C Buffer object storing matrix \b C.
* @param[in] offC Offset of the first element of the matrix \b C in the
* buffer object. Counted in elements.
* @param[in] ldc Leading dimension of matrix \b C. It cannot be less
* than \b N.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support floating
* point arithmetic with double precision;
* - \b clblasInvalidValue if either \b offA or \b offC exceeds the size
* of the respective buffer object;
* - \b clblasInvalidValue if \b transA is set to \ref clblasConjTrans.
* - the same error codes as the clblasSsyrk() function otherwise.
*
* @ingroup SYRK
*/
clblasStatus
clblasZsyrk(
clblasOrder order,
clblasUplo uplo,
clblasTranspose transA,
size_t N,
size_t K,
DoubleComplex alpha,
const cl_mem A,
size_t offA,
size_t lda,
DoubleComplex beta,
cl_mem C,
size_t offC,
size_t ldc,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/*@}*/
/**
* @defgroup SYR2K SYR2K - Symmetric rank-2k update to a matrix
* @ingroup BLAS3
*/
/*@{*/
/**
* @brief Rank-2k update of a symmetric matrix with float elements.
* Extended version.
*
* Rank-k updates:
* - \f$ C \leftarrow \alpha A B^T + \alpha B A^T + \beta C \f$
* - \f$ C \leftarrow \alpha A^T B + \alpha B^T A \beta C \f$
*
* where \b C is a symmetric matrix.
*
* @param[in] order Row/column order.
* @param[in] uplo The triangle in matrix \b C being referenced.
* @param[in] transAB How matrices \b A and \b B is to be transposed.
* @param[in] N Number of rows and columns in matrix \b C.
* @param[in] K Number of columns of the matrices \b A and \b B if they
* are not transposed, and number of rows otherwise.
* @param[in] alpha The factor of matrices \b A and \b B.
* @param[in] A Buffer object storing matrix \b A.
* @param[in] offA Offset of the first element of the matrix \b A in the
* buffer object. Counted in elements.
* @param[in] lda Leading dimension of matrix \b A. It cannot be less
* than \b K if \b A is
* in the row-major format, and less than \b N
* otherwise.
* @param[in] B Buffer object storing matrix \b B.
* @param[in] offB Offset of the first element of the matrix \b B in the
* buffer object. Counted in elements.
* @param[in] ldb Leading dimension of matrix \b B. It cannot be less
* less than \b K if \b B matches to the op(\b B) matrix
* in the row-major format, and less than \b N
* otherwise.
* @param[in] beta The factor of matrix \b C.
* @param[out] C Buffer object storing matrix \b C.
* @param[in] offC Offset of the first element of the matrix \b C in the
* buffer object. Counted in elements.
* @param[in] ldc Leading dimension of matrix \b C. It cannot be less
* than \b N.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidValue if either \b offA, \b offB or \b offC exceeds
* the size of the respective buffer object;
* - the same error codes as the clblasSsyr2k() function otherwise.
*
* @ingroup SYR2K
*/
clblasStatus
clblasSsyr2k(
clblasOrder order,
clblasUplo uplo,
clblasTranspose transAB,
size_t N,
size_t K,
cl_float alpha,
const cl_mem A,
size_t offA,
size_t lda,
const cl_mem B,
size_t offB,
size_t ldb,
cl_float beta,
cl_mem C,
size_t offC,
size_t ldc,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @example example_ssyr2k.c
* This is an example of how to use the @ref clblasSsyr2kEx function.
*/
/**
* @brief Rank-2k update of a symmetric matrix with double elements.
* Extended version.
*
* Rank-k updates:
* - \f$ C \leftarrow \alpha A B^T + \alpha B A^T + \beta C \f$
* - \f$ C \leftarrow \alpha A^T B + \alpha B^T A \beta C \f$
*
* where \b C is a symmetric matrix.
*
* @param[in] order Row/column order.
* @param[in] uplo The triangle in matrix \b C being referenced.
* @param[in] transAB How matrices \b A and \b B is to be transposed.
* @param[in] N Number of rows and columns in matrix \b C.
* @param[in] K Number of columns of the matrices \b A and \b B if they
* are not transposed, and number of rows otherwise.
* @param[in] alpha The factor of matrices \b A and \b B.
* @param[in] A Buffer object storing matrix \b A.
* @param[in] offA Offset of the first element of the matrix \b A in the
* buffer object. Counted in elements.
* @param[in] lda Leading dimension of matrix \b A. For detailed
* description, see clblasSsyr2k().
* @param[in] B Buffer object storing matrix \b B.
* @param[in] offB Offset of the first element of the matrix \b B in the
* buffer object. Counted in elements.
* @param[in] ldb Leading dimension of matrix \b B. For detailed
* description, see clblasSsyr2k().
* @param[in] beta The factor of matrix \b C.
* @param[out] C Buffer object storing matrix \b C.
* @param[in] offC Offset of the first element of the matrix \b C in the
* buffer object. Counted in elements.
* @param[in] ldc Leading dimension of matrix \b C. It cannot be less
* than \b N.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support floating
* point arithmetic with double precision;
* - \b clblasInvalidValue if either \b offA, \b offB or \b offC exceeds
* the size of the respective buffer object;
* - the same error codes as the clblasSsyr2k() function otherwise.
*
* @ingroup SYR2K
*/
clblasStatus
clblasDsyr2k(
clblasOrder order,
clblasUplo uplo,
clblasTranspose transAB,
size_t N,
size_t K,
cl_double alpha,
const cl_mem A,
size_t offA,
size_t lda,
const cl_mem B,
size_t offB,
size_t ldb,
cl_double beta,
cl_mem C,
size_t offC,
size_t ldc,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief Rank-2k update of a symmetric matrix with complex float elements.
* Extended version.
*
* Rank-k updates:
* - \f$ C \leftarrow \alpha A B^T + \alpha B A^T + \beta C \f$
* - \f$ C \leftarrow \alpha A^T B + \alpha B^T A \beta C \f$
*
* where \b C is a symmetric matrix.
*
* @param[in] order Row/column order.
* @param[in] uplo The triangle in matrix \b C being referenced.
* @param[in] transAB How matrices \b A and \b B is to be transposed.
* @param[in] N Number of rows and columns in matrix \b C.
* @param[in] K Number of columns of the matrices \b A and \b B if they
* are not transposed, and number of rows otherwise.
* @param[in] alpha The factor of matrices \b A and \b B.
* @param[in] A Buffer object storing matrix \b A.
* @param[in] offA Offset of the first element of the matrix \b A in the
* buffer object. Counted in elements.
* @param[in] lda Leading dimension of matrix \b A. For detailed
* description, see clblasSsyr2k().
* @param[in] B Buffer object storing matrix \b B.
* @param[in] offB Offset of the first element of the matrix \b B in the
* buffer object. Counted in elements.
* @param[in] ldb Leading dimension of matrix \b B. For detailed
* description, see clblasSsyr2k().
* @param[in] beta The factor of matrix \b C.
* @param[out] C Buffer object storing matrix \b C.
* @param[in] offC Offset of the first element of the matrix \b C in the
* buffer object. Counted in elements.
* @param[in] ldc Leading dimension of matrix \b C. It cannot be less
* than \b N.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidValue if either \b offA, \b offB or \b offC exceeds
* the size of the respective buffer object;
* - \b clblasInvalidValue if \b transAB is set to \ref clblasConjTrans.
* - the same error codes as the clblasSsyr2k() function otherwise.
*
* @ingroup SYR2K
*/
clblasStatus
clblasCsyr2k(
clblasOrder order,
clblasUplo uplo,
clblasTranspose transAB,
size_t N,
size_t K,
FloatComplex alpha,
const cl_mem A,
size_t offA,
size_t lda,
const cl_mem B,
size_t offB,
size_t ldb,
FloatComplex beta,
cl_mem C,
size_t offC,
size_t ldc,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief Rank-2k update of a symmetric matrix with complex double elements.
* Extended version.
*
* Rank-k updates:
* - \f$ C \leftarrow \alpha A B^T + \alpha B A^T + \beta C \f$
* - \f$ C \leftarrow \alpha A^T B + \alpha B^T A \beta C \f$
*
* where \b C is a symmetric matrix.
*
* @param[in] order Row/column order.
* @param[in] uplo The triangle in matrix \b C being referenced.
* @param[in] transAB How matrices \b A and \b B is to be transposed.
* @param[in] N Number of rows and columns in matrix \b C.
* @param[in] K Number of columns of the matrices \b A and \b B if they
* are not transposed, and number of rows otherwise.
* @param[in] alpha The factor of matrices \b A and \b B.
* @param[in] A Buffer object storing matrix \b A.
* @param[in] offA Offset of the first element of the matrix \b A in the
* buffer object. Counted in elements.
* @param[in] lda Leading dimension of matrix \b A. For detailed
* description, see clblasSsyr2k().
* @param[in] B Buffer object storing matrix \b B.
* @param[in] offB Offset of the first element of the matrix \b B in the
* buffer object. Counted in elements.
* @param[in] ldb Leading dimension of matrix \b B. For detailed
* description, see clblasSsyr2k().
* @param[in] beta The factor of matrix \b C.
* @param[out] C Buffer object storing matrix \b C.
* @param[in] offC Offset of the first element of the matrix \b C in the
* buffer object. Counted in elements.
* @param[in] ldc Leading dimension of matrix \b C. It cannot be less
* than \b N.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support floating
* point arithmetic with double precision;
* - \b clblasInvalidValue if either \b offA, \b offB or \b offC exceeds
* the size of the respective buffer object;
* - \b clblasInvalidValue if \b transAB is set to \ref clblasConjTrans.
* - the same error codes as the clblasSsyr2k() function otherwise.
*
* @ingroup SYR2K
*/
clblasStatus
clblasZsyr2k(
clblasOrder order,
clblasUplo uplo,
clblasTranspose transAB,
size_t N,
size_t K,
DoubleComplex alpha,
const cl_mem A,
size_t offA,
size_t lda,
const cl_mem B,
size_t offB,
size_t ldb,
DoubleComplex beta,
cl_mem C,
size_t offC,
size_t ldc,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/*@}*/
/**
* @defgroup SYMM SYMM - Symmetric matrix-matrix multiply
* @ingroup BLAS3
*/
/*@{*/
/**
* @brief Matrix-matrix product of symmetric rectangular matrices with float
* elements.
*
* Matrix-matrix products:
* - \f$ C \leftarrow \alpha A B + \beta C \f$
* - \f$ C \leftarrow \alpha B A + \beta C \f$
*
* @param[in] order Row/column order.
* @param[in] side The side of triangular matrix.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] M Number of rows in matrices \b B and \b C.
* @param[in] N Number of columns in matrices \b B and \b C.
* @param[in] alpha The factor of matrix \b A.
* @param[in] A Buffer object storing matrix \b A.
* @param[in] offa Offset of the first element of the matrix \b A in the
* buffer object. Counted in elements.
* @param[in] lda Leading dimension of matrix \b A. It cannot be less
* than \b M when the \b side parameter is set to
* \b clblasLeft,\n or less than \b N when the
* parameter is set to \b clblasRight.
* @param[in] B Buffer object storing matrix \b B.
* @param[in] offb Offset of the first element of the matrix \b B in the
* buffer object. Counted in elements.
* @param[in] ldb Leading dimension of matrix \b B. It cannot be less
* than \b N when the \b order parameter is set to
* \b clblasRowMajor,\n or less than \b M
* when it is set to \b clblasColumnMajor.
* @param[in] beta The factor of matrix \b C.
* @param[out] C Buffer object storing matrix \b C.
* @param[in] offc Offset of the first element of the matrix \b C in the
* buffer object. Counted in elements.
* @param[in] ldc Leading dimension of matrix \b C. It cannot be less
* than \b N when the \b order parameter is set to
* \b clblasRowMajor,\n or less than \b M when
* it is set to \b clblasColumnMajorOrder.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasNotInitialized if clblasSetup() was not called;
* - \b clblasInvalidValue if invalid parameters are passed:
* - \b M or \b N is zero, or
* - any of the leading dimensions is invalid;
* - the matrix sizes lead to accessing outsize of any of the buffers;
* - \b clblasInvalidMemObject if A, B, or C object is invalid,
* or an image object rather than the buffer one;
* - \b clblasOutOfResources if you use image-based function implementation
* and no suitable scratch image available;
* - \b clblasOutOfHostMemory if the library can't allocate memory for
* internal structures;
* - \b clblasInvalidCommandQueue if the passed command queue is invalid;
* - \b clblasInvalidContext if a context a passed command queue belongs to
* was released;
* - \b clblasInvalidOperation if kernel compilation relating to a previous
* call has not completed for any of the target devices;
* - \b clblasCompilerNotAvailable if a compiler is not available;
* - \b clblasBuildProgramFailure if there is a failure to build a program
* executable.
*
* @ingroup SYMM
*/
clblasStatus
clblasSsymm(
clblasOrder order,
clblasSide side,
clblasUplo uplo,
size_t M,
size_t N,
cl_float alpha,
const cl_mem A,
size_t offa,
size_t lda,
const cl_mem B,
size_t offb,
size_t ldb,
cl_float beta,
cl_mem C,
size_t offc,
size_t ldc,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @example example_ssymm.c
* This is an example of how to use the @ref clblasSsymm function.
*/
/**
* @brief Matrix-matrix product of symmetric rectangular matrices with double
* elements.
*
* Matrix-matrix products:
* - \f$ C \leftarrow \alpha A B + \beta C \f$
* - \f$ C \leftarrow \alpha B A + \beta C \f$
*
* @param[in] order Row/column order.
* @param[in] side The side of triangular matrix.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] M Number of rows in matrices \b B and \b C.
* @param[in] N Number of columns in matrices \b B and \b C.
* @param[in] alpha The factor of matrix \b A.
* @param[in] A Buffer object storing matrix \b A.
* @param[in] offa Offset of the first element of the matrix \b A in the
* buffer object. Counted in elements.
* @param[in] lda Leading dimension of matrix \b A. It cannot be less
* than \b M when the \b side parameter is set to
* \b clblasLeft,\n or less than \b N when the
* parameter is set to \b clblasRight.
* @param[in] B Buffer object storing matrix \b B.
* @param[in] offb Offset of the first element of the matrix \b B in the
* buffer object. Counted in elements.
* @param[in] ldb Leading dimension of matrix \b B. It cannot be less
* than \b N when the \b order parameter is set to
* \b clblasRowMajor,\n or less than \b M
* when it is set to \b clblasColumnMajor.
* @param[in] beta The factor of matrix \b C.
* @param[out] C Buffer object storing matrix \b C.
* @param[in] offc Offset of the first element of the matrix \b C in the
* buffer object. Counted in elements.
* @param[in] ldc Leading dimension of matrix \b C. It cannot be less
* than \b N when the \b order parameter is set to
* \b clblasRowMajor,\n or less than \b M when
* it is set to \b clblasColumnMajorOrder.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support floating
* point arithmetic with double precision;
* - the same error codes as the clblasSsymm() function otherwise.
*
* @ingroup SYMM
*/
clblasStatus
clblasDsymm(
clblasOrder order,
clblasSide side,
clblasUplo uplo,
size_t M,
size_t N,
cl_double alpha,
const cl_mem A,
size_t offa,
size_t lda,
const cl_mem B,
size_t offb,
size_t ldb,
cl_double beta,
cl_mem C,
size_t offc,
size_t ldc,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief Matrix-matrix product of symmetric rectangular matrices with
* float-complex elements.
*
* Matrix-matrix products:
* - \f$ C \leftarrow \alpha A B + \beta C \f$
* - \f$ C \leftarrow \alpha B A + \beta C \f$
*
* @param[in] order Row/column order.
* @param[in] side The side of triangular matrix.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] M Number of rows in matrices \b B and \b C.
* @param[in] N Number of columns in matrices \b B and \b C.
* @param[in] alpha The factor of matrix \b A.
* @param[in] A Buffer object storing matrix \b A.
* @param[in] offa Offset of the first element of the matrix \b A in the
* buffer object. Counted in elements.
* @param[in] lda Leading dimension of matrix \b A. It cannot be less
* than \b M when the \b side parameter is set to
* \b clblasLeft,\n or less than \b N when the
* parameter is set to \b clblasRight.
* @param[in] B Buffer object storing matrix \b B.
* @param[in] offb Offset of the first element of the matrix \b B in the
* buffer object. Counted in elements.
* @param[in] ldb Leading dimension of matrix \b B. It cannot be less
* than \b N when the \b order parameter is set to
* \b clblasRowMajor,\n or less than \b M
* when it is set to \b clblasColumnMajor.
* @param[in] beta The factor of matrix \b C.
* @param[out] C Buffer object storing matrix \b C.
* @param[in] offc Offset of the first element of the matrix \b C in the
* buffer object. Counted in elements.
* @param[in] ldc Leading dimension of matrix \b C. It cannot be less
* than \b N when the \b order parameter is set to
* \b clblasRowMajor,\n or less than \b M when
* it is set to \b clblasColumnMajorOrder.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return The same result as the clblasSsymm() function.
*
* @ingroup SYMM
*/
clblasStatus
clblasCsymm(
clblasOrder order,
clblasSide side,
clblasUplo uplo,
size_t M,
size_t N,
cl_float2 alpha,
const cl_mem A,
size_t offa,
size_t lda,
const cl_mem B,
size_t offb,
size_t ldb,
cl_float2 beta,
cl_mem C,
size_t offc,
size_t ldc,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief Matrix-matrix product of symmetric rectangular matrices with
* double-complex elements.
*
* Matrix-matrix products:
* - \f$ C \leftarrow \alpha A B + \beta C \f$
* - \f$ C \leftarrow \alpha B A + \beta C \f$
*
* @param[in] order Row/column order.
* @param[in] side The side of triangular matrix.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] M Number of rows in matrices \b B and \b C.
* @param[in] N Number of columns in matrices \b B and \b C.
* @param[in] alpha The factor of matrix \b A.
* @param[in] A Buffer object storing matrix \b A.
* @param[in] offa Offset of the first element of the matrix \b A in the
* buffer object. Counted in elements.
* @param[in] lda Leading dimension of matrix \b A. It cannot be less
* than \b M when the \b side parameter is set to
* \b clblasLeft,\n or less than \b N when the
* parameter is set to \b clblasRight.
* @param[in] B Buffer object storing matrix \b B.
* @param[in] offb Offset of the first element of the matrix \b B in the
* buffer object. Counted in elements.
* @param[in] ldb Leading dimension of matrix \b B. It cannot be less
* than \b N when the \b order parameter is set to
* \b clblasRowMajor,\n or less than \b M
* when it is set to \b clblasColumnMajor.
* @param[in] beta The factor of matrix \b C.
* @param[out] C Buffer object storing matrix \b C.
* @param[in] offc Offset of the first element of the matrix \b C in the
* buffer object. Counted in elements.
* @param[in] ldc Leading dimension of matrix \b C. It cannot be less
* than \b N when the \b order parameter is set to
* \b clblasRowMajor,\n or less than \b M when
* it is set to \b clblasColumnMajorOrder.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return The same result as the clblasDsymm() function.
*
* @ingroup SYMM
*/
clblasStatus
clblasZsymm(
clblasOrder order,
clblasSide side,
clblasUplo uplo,
size_t M,
size_t N,
cl_double2 alpha,
const cl_mem A,
size_t offa,
size_t lda,
const cl_mem B,
size_t offb,
size_t ldb,
cl_double2 beta,
cl_mem C,
size_t offc,
size_t ldc,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/*@}*/
/**
* @defgroup HEMM HEMM - Hermitian matrix-matrix multiplication
* @ingroup BLAS3
*/
/*@{*/
/**
* @brief Matrix-matrix product of hermitian rectangular matrices with
* float-complex elements.
*
* Matrix-matrix products:
* - \f$ C \leftarrow \alpha A B + \beta C \f$
* - \f$ C \leftarrow \alpha B A + \beta C \f$
*
* @param[in] order Row/column order.
* @param[in] side The side of triangular matrix.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] M Number of rows in matrices \b B and \b C.
* @param[in] N Number of columns in matrices \b B and \b C.
* @param[in] alpha The factor of matrix \b A.
* @param[in] A Buffer object storing matrix \b A.
* @param[in] offa Offset of the first element of the matrix \b A in the
* buffer object. Counted in elements.
* @param[in] lda Leading dimension of matrix \b A. It cannot be less
* than \b M when the \b side parameter is set to
* \b clblasLeft,\n or less than \b N when the
* parameter is set to \b clblasRight.
* @param[in] B Buffer object storing matrix \b B.
* @param[in] offb Offset of the first element of the matrix \b B in the
* buffer object. Counted in elements.
* @param[in] ldb Leading dimension of matrix \b B. It cannot be less
* than \b N when the \b order parameter is set to
* \b clblasRowMajor,\n or less than \b M
* when it is set to \b clblasColumnMajor.
* @param[in] beta The factor of matrix \b C.
* @param[out] C Buffer object storing matrix \b C.
* @param[in] offc Offset of the first element of the matrix \b C in the
* buffer object. Counted in elements.
* @param[in] ldc Leading dimension of matrix \b C. It cannot be less
* than \b N when the \b order parameter is set to
* \b clblasRowMajor,\n or less than \b M when
* it is set to \b clblasColumnMajorOrder.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasNotInitialized if clblasSetup() was not called;
* - \b clblasInvalidValue if invalid parameters are passed:
* - \b M or \b N is zero, or
* - any of the leading dimensions is invalid;
* - the matrix sizes lead to accessing outsize of any of the buffers;
* - \b clblasInvalidMemObject if A, B, or C object is invalid,
* or an image object rather than the buffer one;
* - \b clblasOutOfResources if you use image-based function implementation
* and no suitable scratch image available;
* - \b clblasOutOfHostMemory if the library can't allocate memory for
* internal structures;
* - \b clblasInvalidCommandQueue if the passed command queue is invalid;
* - \b clblasInvalidContext if a context a passed command queue belongs to
* was released;
* - \b clblasInvalidOperation if kernel compilation relating to a previous
* call has not completed for any of the target devices;
* - \b clblasCompilerNotAvailable if a compiler is not available;
* - \b clblasBuildProgramFailure if there is a failure to build a program
* executable.
*
* @ingroup HEMM
*/
clblasStatus
clblasChemm(
clblasOrder order,
clblasSide side,
clblasUplo uplo,
size_t M,
size_t N,
cl_float2 alpha,
const cl_mem A,
size_t offa,
size_t lda,
const cl_mem B,
size_t offb,
size_t ldb,
cl_float2 beta,
cl_mem C,
size_t offc,
size_t ldc,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @example example_chemm.cpp
* This is an example of how to use the @ref clblasChemm function.
*/
/**
* @brief Matrix-matrix product of hermitian rectangular matrices with
* double-complex elements.
*
* Matrix-matrix products:
* - \f$ C \leftarrow \alpha A B + \beta C \f$
* - \f$ C \leftarrow \alpha B A + \beta C \f$
*
* @param[in] order Row/column order.
* @param[in] side The side of triangular matrix.
* @param[in] uplo The triangle in matrix being referenced.
* @param[in] M Number of rows in matrices \b B and \b C.
* @param[in] N Number of columns in matrices \b B and \b C.
* @param[in] alpha The factor of matrix \b A.
* @param[in] A Buffer object storing matrix \b A.
* @param[in] offa Offset of the first element of the matrix \b A in the
* buffer object. Counted in elements.
* @param[in] lda Leading dimension of matrix \b A. It cannot be less
* than \b M when the \b side parameter is set to
* \b clblasLeft,\n or less than \b N when the
* parameter is set to \b clblasRight.
* @param[in] B Buffer object storing matrix \b B.
* @param[in] offb Offset of the first element of the matrix \b B in the
* buffer object. Counted in elements.
* @param[in] ldb Leading dimension of matrix \b B. It cannot be less
* than \b N when the \b order parameter is set to
* \b clblasRowMajor,\n or less than \b M
* when it is set to \b clblasColumnMajor.
* @param[in] beta The factor of matrix \b C.
* @param[out] C Buffer object storing matrix \b C.
* @param[in] offc Offset of the first element of the matrix \b C in the
* buffer object. Counted in elements.
* @param[in] ldc Leading dimension of matrix \b C. It cannot be less
* than \b N when the \b order parameter is set to
* \b clblasRowMajor,\n or less than \b M when
* it is set to \b clblasColumnMajorOrder.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support floating
* point arithmetic with double precision;
* - the same error codes as the clblasChemm() function otherwise.
*
* @ingroup HEMM
*/
clblasStatus
clblasZhemm(
clblasOrder order,
clblasSide side,
clblasUplo uplo,
size_t M,
size_t N,
cl_double2 alpha,
const cl_mem A,
size_t offa,
size_t lda,
const cl_mem B,
size_t offb,
size_t ldb,
cl_double2 beta,
cl_mem C,
size_t offc,
size_t ldc,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/*@}*/
/**
* @defgroup HERK HERK - Hermitian rank-k update to a matrix
* @ingroup BLAS3
*/
/*@{*/
/**
* @brief Rank-k update of a hermitian matrix with float-complex elements.
*
* Rank-k updates:
* - \f$ C \leftarrow \alpha A A^H + \beta C \f$
* - \f$ C \leftarrow \alpha A^H A + \beta C \f$
*
* where \b C is a hermitian matrix.
*
* @param[in] order Row/column order.
* @param[in] uplo The triangle in matrix \b C being referenced.
* @param[in] transA How matrix \b A is to be transposed.
* @param[in] N Number of rows and columns in matrix \b C.
* @param[in] K Number of columns of the matrix \b A if it is not
* transposed, and number of rows otherwise.
* @param[in] alpha The factor of matrix \b A.
* @param[in] A Buffer object storing the matrix \b A.
* @param[in] offa Offset in number of elements for the first element in matrix \b A.
* @param[in] lda Leading dimension of matrix \b A. It cannot be
* less than \b K if \b A is
* in the row-major format, and less than \b N
* otherwise.
* @param[in] beta The factor of the matrix \b C.
* @param[out] C Buffer object storing matrix \b C.
* @param[in] offc Offset in number of elements for the first element in matrix \b C.
* @param[in] ldc Leading dimension of matric \b C. It cannot be less
* than \b N.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasNotInitialized if clblasSetup() was not called;
* - \b clblasInvalidValue if invalid parameters are passed:
* - either \b N or \b K is zero, or
* - any of the leading dimensions is invalid;
* - the matrix sizes lead to accessing outsize of any of the buffers;
* - \b clblasInvalidMemObject if either \b A or \b C object is
* invalid, or an image object rather than the buffer one;
* - \b clblasOutOfHostMemory if the library can't allocate memory for
* internal structures;
* - \b clblasInvalidCommandQueue if the passed command queue is invalid;
* - \b clblasInvalidContext if a context a passed command queue belongs to
* was released.
*
* @ingroup HERK
*/
clblasStatus
clblasCherk(
clblasOrder order,
clblasUplo uplo,
clblasTranspose transA,
size_t N,
size_t K,
float alpha,
const cl_mem A,
size_t offa,
size_t lda,
float beta,
cl_mem C,
size_t offc,
size_t ldc,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @example example_cherk.cpp
* This is an example of how to use the @ref clblasCherk function.
*/
/**
* @brief Rank-k update of a hermitian matrix with double-complex elements.
*
* Rank-k updates:
* - \f$ C \leftarrow \alpha A A^H + \beta C \f$
* - \f$ C \leftarrow \alpha A^H A + \beta C \f$
*
* where \b C is a hermitian matrix.
*
* @param[in] order Row/column order.
* @param[in] uplo The triangle in matrix \b C being referenced.
* @param[in] transA How matrix \b A is to be transposed.
* @param[in] N Number of rows and columns in matrix \b C.
* @param[in] K Number of columns of the matrix \b A if it is not
* transposed, and number of rows otherwise.
* @param[in] alpha The factor of matrix \b A.
* @param[in] A Buffer object storing the matrix \b A.
* @param[in] offa Offset in number of elements for the first element in matrix \b A.
* @param[in] lda Leading dimension of matrix \b A. It cannot be
* less than \b K if \b A is
* in the row-major format, and less than \b N
* otherwise.
* @param[in] beta The factor of the matrix \b C.
* @param[out] C Buffer object storing matrix \b C.
* @param[in] offc Offset in number of elements for the first element in matrix \b C.
* @param[in] ldc Leading dimension of matric \b C. It cannot be less
* than \b N.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support floating
* point arithmetic with double precision;
* - the same error codes as the clblasCherk() function otherwise.
*
* @ingroup HERK
*/
clblasStatus
clblasZherk(
clblasOrder order,
clblasUplo uplo,
clblasTranspose transA,
size_t N,
size_t K,
double alpha,
const cl_mem A,
size_t offa,
size_t lda,
double beta,
cl_mem C,
size_t offc,
size_t ldc,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/*@}*/
/**
* @defgroup HER2K HER2K - Hermitian rank-2k update to a matrix
* @ingroup BLAS3
*/
/*@{*/
/**
* @brief Rank-2k update of a hermitian matrix with float-complex elements.
*
* Rank-k updates:
* - \f$ C \leftarrow \alpha A B^H + conj( \alpha ) B A^H + \beta C \f$
* - \f$ C \leftarrow \alpha A^H B + conj( \alpha ) B^H A + \beta C \f$
*
* where \b C is a hermitian matrix.
*
* @param[in] order Row/column order.
* @param[in] uplo The triangle in matrix \b C being referenced.
* @param[in] trans How matrix \b A is to be transposed.
* @param[in] N Number of rows and columns in matrix \b C.
* @param[in] K Number of columns of the matrix \b A if it is not
* transposed, and number of rows otherwise.
* @param[in] alpha The factor of matrix \b A.
* @param[in] A Buffer object storing the matrix \b A.
* @param[in] offa Offset in number of elements for the first element in matrix \b A.
* @param[in] lda Leading dimension of matrix \b A. It cannot be
* less than \b K if \b A is
* in the row-major format, and less than \b N
* otherwise. Vice-versa for transpose case.
* @param[in] B Buffer object storing the matrix \b B.
* @param[in] offb Offset in number of elements for the first element in matrix \b B.
* @param[in] ldb Leading dimension of matrix \b B. It cannot be
* less than \b K if \b B is
* in the row-major format, and less than \b N
* otherwise. Vice-versa for transpose case
* @param[in] beta The factor of the matrix \b C.
* @param[out] C Buffer object storing matrix \b C.
* @param[in] offc Offset in number of elements for the first element in matrix \b C.
* @param[in] ldc Leading dimension of matric \b C. It cannot be less
* than \b N.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasNotInitialized if clblasSetup() was not called;
* - \b clblasInvalidValue if invalid parameters are passed:
* - either \b N or \b K is zero, or
* - any of the leading dimensions is invalid;
* - the matrix sizes lead to accessing outsize of any of the buffers;
* - \b clblasInvalidMemObject if either \b A , \b B or \b C object is
* invalid, or an image object rather than the buffer one;
* - \b clblasOutOfHostMemory if the library can't allocate memory for
* internal structures;
* - \b clblasInvalidCommandQueue if the passed command queue is invalid;
* - \b clblasInvalidContext if a context a passed command queue belongs to
* was released.
*
* @ingroup HER2K
*/
clblasStatus
clblasCher2k(
clblasOrder order,
clblasUplo uplo,
clblasTranspose trans,
size_t N,
size_t K,
FloatComplex alpha,
const cl_mem A,
size_t offa,
size_t lda,
const cl_mem B,
size_t offb,
size_t ldb,
cl_float beta,
cl_mem C,
size_t offc,
size_t ldc,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @example example_cher2k.c
* This is an example of how to use the @ref clblasCher2k function.
*/
/**
* @brief Rank-2k update of a hermitian matrix with double-complex elements.
*
* Rank-k updates:
* - \f$ C \leftarrow \alpha A B^H + conj( \alpha ) B A^H + \beta C \f$
* - \f$ C \leftarrow \alpha A^H B + conj( \alpha ) B^H A + \beta C \f$
*
* where \b C is a hermitian matrix.
*
* @param[in] order Row/column order.
* @param[in] uplo The triangle in matrix \b C being referenced.
* @param[in] trans How matrix \b A is to be transposed.
* @param[in] N Number of rows and columns in matrix \b C.
* @param[in] K Number of columns of the matrix \b A if it is not
* transposed, and number of rows otherwise.
* @param[in] alpha The factor of matrix \b A.
* @param[in] A Buffer object storing the matrix \b A.
* @param[in] offa Offset in number of elements for the first element in matrix \b A.
* @param[in] lda Leading dimension of matrix \b A. It cannot be
* less than \b K if \b A is
* in the row-major format, and less than \b N
* otherwise. Vice-versa for transpose case.
* @param[in] B Buffer object storing the matrix \b B.
* @param[in] offb Offset in number of elements for the first element in matrix \b B.
* @param[in] ldb Leading dimension of matrix \b B. It cannot be
* less than \b K if B is
* in the row-major format, and less than \b N
* otherwise. Vice-versa for transpose case.
* @param[in] beta The factor of the matrix \b C.
* @param[out] C Buffer object storing matrix \b C.
* @param[in] offc Offset in number of elements for the first element in matrix \b C.
* @param[in] ldc Leading dimension of matric \b C. It cannot be less
* than \b N.
* @param[in] numCommandQueues Number of OpenCL command queues in which the
* task is to be performed.
* @param[in] commandQueues OpenCL command queues.
* @param[in] numEventsInWaitList Number of events in the event wait list.
* @param[in] eventWaitList Event wait list.
* @param[in] events Event objects per each command queue that identify
* a particular kernel execution instance.
*
* @return
* - \b clblasSuccess on success;
* - \b clblasInvalidDevice if a target device does not support floating
* point arithmetic with double precision;
* - the same error codes as the clblasCher2k() function otherwise.
*
* @ingroup HER2K
*/
clblasStatus
clblasZher2k(
clblasOrder order,
clblasUplo uplo,
clblasTranspose trans,
size_t N,
size_t K,
DoubleComplex alpha,
const cl_mem A,
size_t offa,
size_t lda,
const cl_mem B,
size_t offb,
size_t ldb,
cl_double beta,
cl_mem C,
size_t offc,
size_t ldc,
cl_uint numCommandQueues,
cl_command_queue *commandQueues,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/*@}*/
/**
* @brief Helper function to compute leading dimension and size of a matrix
*
* @param[in] order matrix ordering
* @param[in] rows number of rows
* @param[in] columns number of column
* @param[in] elemsize element size
* @param[in] padding additional padding on the leading dimension
* @param[out] ld if non-NULL *ld is filled with the leading dimension
* in elements
* @param[out] fullsize if non-NULL *fullsize is filled with the byte size
*
* @return
* - \b clblasSuccess for success
* - \b clblasInvalidValue if:
* - \b elementsize is 0
* - \b row and \b colums are both equal to 0
*/
clblasStatus clblasMatrixSizeInfo(
clblasOrder order,
size_t rows,
size_t columns,
size_t elemsize,
size_t padding,
size_t * ld,
size_t * fullsize);
/**
* @brief Allocates matrix on device and computes ld and size
*
* @param[in] context OpenCL context
* @param[in] order Row/column order.
* @param[in] rows number of rows
* @param[in] columns number of columns
* @param[in] elemsize element size
* @param[in] padding additional padding on the leading dimension
* @param[out] ld if non-NULL *ld is filled with the leading dimension
* in elements
* @param[out] fullsize if non-NULL *fullsize is filled with the byte size
* @param[in] err Error code (see \b clCreateBuffer() )
*
* @return
* - OpenCL memory object of the allocated matrix
*/
cl_mem clblasCreateMatrix(
cl_context context,
clblasOrder order,
size_t rows,
size_t columns,
size_t elemsize,
size_t padding,
size_t * ld,
size_t * fullsize,
cl_int * err);
/**
* @brief Allocates matrix on device with specified size and ld and computes its size
*
* @param[in] context OpenCL context
* @param[in] order Row/column order.
* @param[in] rows number of rows
* @param[in] columns number of columns
* @param[in] elemsize element size
* @param[in] padding additional padding on the leading dimension
* @param[out] ld the length of the leading dimensions. It cannot
* be less than \b columns when the \b order parameter is set to
* \b clblasRowMajor,\n or less than \b rows when the
* parameter is set to \b clblasColumnMajor.
* @param[out] fullsize if non-NULL *fullsize is filled with the byte size
* @param[in] err Error code (see \b clCreateBuffer() )
*
* @return
* - OpenCL memory object of the allocated matrix
*/
cl_mem clblasCreateMatrixWithLd( cl_context context,
clblasOrder order,
size_t rows,
size_t columns,
size_t elemsize,
size_t ld,
size_t * fullsize,
cl_int * err) ;
/**
* @brief Allocates matrix on device and initialize from existing similar matrix
* on host. See \b clblasCreateMatrixBuffer().
*
* @param[in] ld leading dimension in elements
* @param[in] host base address of host matrix data
* @param[in] off_host host matrix offset in elements
* @param[in] ld_host leading dimension of host matrix in elements
* @param[in] command_queue specifies the OpenCL queue
* @param[in] numEventsInWaitList specifies the number of OpenCL events
* to wait for
* @param[in] eventWaitList specifies the list of OpenCL events to
* wait for
*
* @return
* - OpenCL memory object of the allocated matrix
*/
cl_mem clblasCreateMatrixFromHost(
cl_context context,
clblasOrder order,
size_t rows,
size_t columns,
size_t elemsize,
size_t ld,
void * host,
size_t off_host,
size_t ld_host,
cl_command_queue command_queue,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_int * err);
/**
* @brief Copies synchronously a sub-matrix from host (A) to device (B).
*
* @param[in] order matrix ordering
* @param[in] element_size element size
* @param[in] A specifies the source matrix on the host
* @param[in] offA specifies the offset of matrix A in
* elements
* @param[in] ldA specifies the leading dimension of
* matrix A in elements
* @param[in] nrA specifies the number of rows of A
* in elements
* @param[in] ncA specifies the number of columns of A
* in elements
* @param[in] xA specifies the top-left x position to
* copy from A
* @param[in] yA specifies the top-left y position to
* copy from A
* @param[in] B specifies the destination matrix on the
* device
* @param[in] offB specifies the offset of matrix B in
* elements
* @param[in] ldB specifies the leading dimension of
* matrix B in bytes
* @param[in] nrB specifies the number of rows of B
* in elements
* @param[in] ncB specifies the number of columns of B
* in elements
* @param[in] xB specifies the top-left x position to
* copy from B
* @param[in] yB specifies the top-left y position to
* copy from B
* @param[in] nx specifies the number of elements to
* copy according to the x dimension (rows)
* @param[in] ny specifies the number of elements to
* copy according to the y dimension
* (columns)
* @param[in] command_queue specifies the OpenCL queue
* @param[in] numEventsInWaitList specifies the number of OpenCL events
* to wait for
* @param[in] eventWaitList specifies the list of OpenCL events to
* wait for
*
* @return
* - \b clblasSuccess for success
* - \b clblasInvalidValue if:
* - \b xA + \b offA + \b nx is superior to number of columns of A
* - \b xB + \b offB + \b nx is superior to number of columns of B
* - \b yA + \b ny is superior to number of rows of A
* - \b yB + \b ny is superior to number of rows of B
*/
clblasStatus clblasWriteSubMatrix(
clblasOrder order,
size_t element_size,
const void *A, size_t offA, size_t ldA,
size_t nrA, size_t ncA,
size_t xA, size_t yA,
cl_mem B, size_t offB, size_t ldB,
size_t nrB, size_t ncB,
size_t xB, size_t yB,
size_t nx, size_t ny,
cl_command_queue command_queue,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList);
/**
* @brief Copies asynchronously a sub-matrix from host (A) to device (B).
* See \b clblasWriteSubMatrix().
*
* @param[out] event Event objects per each command queue that identify a
* particular kernel execution instance.
*/
clblasStatus clblasWriteSubMatrixAsync(
clblasOrder order,
size_t element_size,
const void *A, size_t offA, size_t ldA,
size_t nrA, size_t ncA,
size_t xA, size_t yA,
cl_mem B, size_t offB, size_t ldB,
size_t nrB, size_t ncB,
size_t xB, size_t yB,
size_t nx, size_t ny,
cl_command_queue command_queue,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *event);
/**
* @brief Copies a sub-matrix from device (A) to host (B).
* See \b clblasWriteSubMatrix().
*
* @param[in] A specifies the source matrix on the device
* @param[in] B specifies the destination matrix on the host
*
* @return
* - see \b clblasWriteSubMatrix()
*/
clblasStatus clblasReadSubMatrix(
clblasOrder order,
size_t element_size,
const cl_mem A, size_t offA, size_t ldA,
size_t nrA, size_t ncA,
size_t xA, size_t yA,
void *B, size_t offB, size_t ldB,
size_t nrB, size_t ncB,
size_t xB, size_t yB,
size_t nx, size_t ny,
cl_command_queue command_queue,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList);
/**
* @brief Copies asynchronously a sub-matrix from device (A) to host (B).
* See \b clblasReadSubMatrix() and \b clblasWriteSubMatrixAsync().
*/
clblasStatus clblasReadSubMatrixAsync(
clblasOrder order,
size_t element_size,
const cl_mem A, size_t offA, size_t ldA,
size_t nrA, size_t ncA,
size_t xA, size_t yA,
void *B, size_t offB, size_t ldB,
size_t nrB, size_t ncB,
size_t xB, size_t yB,
size_t nx, size_t ny,
cl_command_queue command_queue,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *event);
/**
* @brief Copies a sub-matrix from device (A) to device (B).
* See \b clblasWriteSubMatrix().
*
* @param[in] A specifies the source matrix on the device
* @param[in] B specifies the destination matrix on the device
*
* @return
* - see \b clblasWriteSubMatrix()
*/
clblasStatus clblasCopySubMatrix(
clblasOrder order,
size_t element_size,
const cl_mem A, size_t offA, size_t ldA,
size_t nrA, size_t ncA,
size_t xA, size_t yA,
cl_mem B, size_t offB, size_t ldB,
size_t nrB, size_t ncB,
size_t xB, size_t yB,
size_t nx, size_t ny,
cl_command_queue command_queue,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList);
/**
* @brief Copies asynchronously a sub-matrix from device (A) to device (B).
* See \b clblasCopySubMatrix() and \b clblasWriteSubMatrixAsync().
*/
clblasStatus clblasCopySubMatrixAsync(
clblasOrder order,
size_t element_size,
const cl_mem A, size_t offA, size_t ldA,
size_t nrA, size_t ncA,
size_t xA, size_t yA,
cl_mem B, size_t offB, size_t ldB,
size_t nrB, size_t ncB,
size_t xB, size_t yB,
size_t nx, size_t ny,
cl_command_queue command_queue,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *event);
/**
* @brief Copies synchronously a vector from host (A) to device (B).
* See \b clblasWriteSubMatrix().
*
* @param[in] A specifies the source vector on the host
* @param[in] B specifies the destination vector on the device
*
* @return
* - see \b clblasWriteSubMatrix()
*/
clblasStatus clblasWriteVector(
size_t nb_elem,
size_t element_size,
const void *A, size_t offA,
cl_mem B, size_t offB,
cl_command_queue command_queue,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList);
/**
* @brief Copies asynchronously a vector from host (A) to device (B).
* See \b clblasWriteVector() and \b clblasWriteSubMatrixAsync().
*/
clblasStatus clblasWriteVectorAsync(
size_t nb_elem,
size_t element_size,
const void *A, size_t offA,
cl_mem B, size_t offB,
cl_command_queue command_queue,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief Copies synchronously a vector from device (A) to host (B).
* See \b clblasReadSubMatrix().
*
* @param[in] A specifies the source vector on the device
* @param[in] B specifies the destination vector on the host
*
* @return
* - see \b clblasReadSubMatrix()
*/
clblasStatus clblasReadVector(
size_t nb_elem,
size_t element_size,
const cl_mem A, size_t offA,
void * B, size_t offB,
cl_command_queue command_queue,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList);
/**
* @brief Copies asynchronously a vector from device (A) to host (B).
* See \b clblasReadVector() and \b clblasWriteSubMatrixAsync().
*/
clblasStatus clblasReadVectorAsync(
size_t nb_elem,
size_t element_size,
const cl_mem A, size_t offA,
void * B, size_t offB,
cl_command_queue command_queue,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief Copies synchronously a vector from device (A) to device (B).
* See \b clblasCopySubMatrix().
*
* @param[in] A specifies the source vector on the device
* @param[in] B specifies the destination vector on the device
*
* @return
* - see \b clblasCopySubMatrix()
*/
clblasStatus clblasCopyVector(
size_t nb_elem,
size_t element_size,
const cl_mem A, size_t offA,
cl_mem B, size_t offB,
cl_command_queue command_queue,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList);
/**
* @brief Copies asynchronously a vector from device (A) to device (B).
* See \b clblasCopyVector() and \b clblasWriteSubMatrixAsync().
*/
clblasStatus clblasCopyVectorAsync(
size_t nb_elem,
size_t element_size,
const cl_mem A, size_t offA,
cl_mem B, size_t offB,
cl_command_queue command_queue,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief Copies synchronously a whole matrix from host (A) to device (B).
* See \b clblasWriteSubMatrix().
*
* @param[in] A specifies the source matrix on the host
* @param[in] B specifies the destination matrix on the device
*
* @return
* - see \b clblasWriteSubMatrix()
*/
clblasStatus clblasWriteMatrix(
clblasOrder order,
size_t sx, size_t sy,
size_t element_size,
const void *A, size_t offA, size_t ldA,
cl_mem B, size_t offB, size_t ldB,
cl_command_queue command_queue,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList);
/**
* @brief Copies asynchronously a vector from host (A) to device (B).
* See \b clblasWriteMatrix() and \b clblasWriteSubMatrixAsync().
*/
clblasStatus clblasWriteMatrixAsync(
clblasOrder order,
size_t sx, size_t sy,
size_t element_size,
const void *A, size_t offA, size_t ldA,
cl_mem B, size_t offB, size_t ldB,
cl_command_queue command_queue,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief Copies synchronously a whole matrix from device (A) to host (B).
* See \b clblasReadSubMatrix().
*
* @param[in] A specifies the source vector on the device
* @param[in] B specifies the destination vector on the host
*
* @return
* - see \b clblasReadSubMatrix()
*/
clblasStatus clblasReadMatrix(
clblasOrder order,
size_t sx, size_t sy,
size_t element_size,
const cl_mem A, size_t offA, size_t ldA,
void * B, size_t offB, size_t ldB,
cl_command_queue command_queue,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList);
/**
* @brief Copies asynchronously a vector from device (A) to host (B).
* See \b clblasReadMatrix() and \b clblasWriteSubMatrixAsync().
*/
clblasStatus clblasReadMatrixAsync(
clblasOrder order,
size_t sx, size_t sy,
size_t element_size,
const cl_mem A, size_t offA, size_t ldA,
void * B, size_t offB, size_t ldB,
cl_command_queue command_queue,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief Copies synchronously a whole matrix from device (A) to device (B).
* See \b clblasCopySubMatrix().
*
* @param[in] A specifies the source matrix on the device
* @param[in] B specifies the destination matrix on the device
*
* @return
* - see \b clblasCopySubMatrix()
*/
clblasStatus clblasCopyMatrix(
clblasOrder order,
size_t sx, size_t sy,
size_t element_size,
const cl_mem A, size_t offA, size_t ldA,
cl_mem B, size_t offB, size_t ldB,
cl_command_queue command_queue,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList);
/**
* @brief Copies asynchronously a vector from device (A) to device (B).
* See \b clblasCopyMatrix() and \b clblasWriteSubMatrixAsync().
*/
clblasStatus clblasCopyMatrixAsync(
clblasOrder order,
size_t sx, size_t sy,
size_t element_size,
const cl_mem A, size_t offA, size_t ldA,
cl_mem B, size_t offB, size_t ldB,
cl_command_queue command_queue,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *events);
/**
* @brief Fill synchronously a vector with a pattern of a size element_size_bytes
*
* @param[in] nb_elem specifies the number of element in buffer A
* @param[in] element_size specifies the size of one element of A. Supported sizes correspond
* element size used in clBLAS (1,2,4,8,16)
* @param[in] A specifies the source vector on the device
* @param[in] offA specifies the offset of matrix A in
* elements
* @param[in] pattern specifies the host address of the pattern to fill with (element_size_bytes)
* @param[in] command_queue specifies the OpenCL queue
* @param[in] numEventsInWaitList specifies the number of OpenCL events
* to wait for
* @param[in] eventWaitList specifies the list of OpenCL events to
* wait for
* @return
* - see \b clblasWriteSubMatrix()
*/
clblasStatus clblasFillVector(
size_t nb_elem,
size_t element_size,
cl_mem A, size_t offA,
const void * host,
cl_command_queue command_queue,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList);
/**
* @brief Fill asynchronously a vector with a pattern of a size element_size_bytes
* See \b clblasFillVector().
*/
clblasStatus clblasFillVectorAsync(
size_t nb_elem,
size_t element_size,
cl_mem A, size_t offA,
const void * pattern,
cl_command_queue command_queue,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *event);
/**
* @brief Fill synchronously a matrix with a pattern of a size element_size_bytes
*
* @param[in] order specifies the matrix order
* @param[in] element_size specifies the size of one element of A. Supported sizes correspond
* element size used in clBLAS (1,2,4,8,16)
* @param[in] A specifies the source vector on the device
* @param[in] offA specifies the offset of matrix A in
* @param[in] ldA specifies the leading dimension of A
* @param[in] nrA specifies the number of row in A
* @param[in] ncA specifies the number of column in A
* @param[in] pattern specifies the host address of the pattern to fill with (element_size_bytes)
* @param[in] command_queue specifies the OpenCL queue
* @param[in] numEventsInWaitList specifies the number of OpenCL events to wait for
* @param[in] eventWaitList specifies the list of OpenCL events to wait for
* @return
* - see \b clblasWriteSubMatrix()
*/
clblasStatus clblasFillMatrix(
clblasOrder order,
size_t element_size,
cl_mem A, size_t offA, size_t ldA,
size_t nrA, size_t ncA,
const void *pattern,
cl_command_queue command_queue,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList);
/**
* @brief Partially fill a sub-matrix with a pattern of a size element_size_bytes
*
*
* @param[in] order specifies the matrix order
* @param[in] element_size specifies the size of one element of A. Supported values
* are to element sizes used in clBLAS - that is 1, 2, 4, 8 or 16
* @param[in] offA specifies the offset of matrix A in elements
* @param[in] ldA specifies the leading dimension of A in elements
* @param[in] nrA specifies the number of rows of A
* in elements
* @param[in] ncA specifies the number of columns of A
* in elements
* @param[in] xA specifies the top-left x position to
* copy from A
* @param[in] yA specifies the top-left y position to
* copy from A
* @param[in] nx specifies the number of elements to
* copy according to the x dimension (rows)
* @param[in] ny specifies the number of elements to
* copy according to the y dimension
* (columns)
* @param[in] pattern specifies the host address of the pattern to fill with (element_size_bytes)
* @param[in] command_queue specifies the OpenCL queue
* @param[in] numEventsInWaitList specifies the number of OpenCL events to wait for
* @param[in] eventWaitList specifies the list of OpenCL events to wait for
* @return
* - see \b clblasWriteSubMatrix()
*/
clblasStatus clblasFillSubMatrix(
clblasOrder order,
size_t element_size,
cl_mem A, size_t offA, size_t ldA,
size_t nrA, size_t ncA,
size_t xA, size_t yA,
size_t nx, size_t ny,
const void *pattern,
cl_command_queue command_queue,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList);
/**
* @brief Asynchronous asynchronously fill a sub-matrix with a pattern of a size element_size_bytes
* See \b clblasFillSubMatrix().
*/
clblasStatus clblasFillSubMatrixAsync(
clblasOrder order,
size_t element_size,
cl_mem A, size_t offA, size_t ldA,
size_t sxA, size_t syA,
int xA, int yA,
size_t nx, size_t ny,
const void *host,
cl_command_queue command_queue,
cl_uint numEventsInWaitList,
const cl_event *eventWaitList,
cl_event *event);
#ifdef __cplusplus
} /* extern "C" { */
#endif
#endif /* CLBLAS_H_ */