1 /* ************************************************************************ 2 * Copyright 2013 Advanced Micro Devices, Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 * ************************************************************************/ 16 17 18 19 #ifndef CLBLAS_H_ 20 #define CLBLAS_H_ 21 22 /** 23 * @mainpage OpenCL BLAS 24 * 25 * This is an implementation of 26 * <A HREF="http://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms"> 27 * Basic Linear Algebra Subprograms</A>, levels 1, 2 and 3 using 28 * <A HREF="http://www.khronos.org/opencl/">OpenCL</A> and optimized for 29 * the AMD GPU hardware. 30 */ 31 32 #if defined(__APPLE__) || defined(__MACOSX) 33 #include <OpenCL/cl.h> 34 #else 35 #include <CL/cl.h> 36 #endif 37 38 #include <clBLAS-complex.h> 39 40 #ifdef __cplusplus 41 extern "C" { 42 #endif 43 44 /** 45 * @defgroup OVERVIEW Overview 46 * 47 * This library provides an implementation of the Basic Linear Algebra Subprograms levels 1, 2 and 3, 48 * using OpenCL and optimized for AMD GPU hardware. It provides BLAS-1 functions 49 * SWAP, SCAL, COPY, AXPY, DOT, DOTU, DOTC, ROTG, ROTMG, ROT, ROTM, iAMAX, ASUM and NRM2, 50 * BLAS-2 functions GEMV, SYMV, TRMV, TRSV, HEMV, SYR, SYR2, HER, HER2, GER, GERU, GERC, 51 * TPMV, SPMV, HPMV, TPSV, SPR, SPR2, HPR, HPR2, GBMV, TBMV, SBMV, HBMV and TBSV 52 * and BLAS-3 functions GEMM, SYMM, TRMM, TRSM, HEMM, HERK, HER2K, SYRK and SYR2K. 53 * 54 * This library’s primary goal is to assist the end user to enqueue OpenCL 55 * kernels to process BLAS functions in an OpenCL-efficient manner, while 56 * keeping interfaces familiar to users who know how to use BLAS. All 57 * functions accept matrices through buffer objects. 58 * 59 * This library is entirely thread-safe with the exception of the following API : 60 * clblasSetup and clblasTeardown. 61 * Developers using the library can safely using any blas routine from different thread. 62 * 63 * @section deprecated 64 * This library provided support for the creation of scratch images to achieve better performance 65 * on older <a href="http://developer.amd.com/gpu/AMDAPPSDK/Pages/default.aspx">AMD APP SDK's</a>. 66 * However, memory buffers now give the same performance as buffers objects in the current SDK's. 67 * Scratch image buffers are being deprecated and users are advised not to use scratch images in 68 * new applications. 69 */ 70 71 /** 72 * @defgroup TYPES clblas types 73 */ 74 /*@{*/ 75 76 77 /** Shows how matrices are placed in memory. */ 78 typedef enum clblasOrder_ { 79 clblasRowMajor, /**< Every row is placed sequentially */ 80 clblasColumnMajor /**< Every column is placed sequentially */ 81 } clblasOrder; 82 83 /** Used to specify whether the matrix is to be transposed or not. */ 84 typedef enum clblasTranspose_ { 85 clblasNoTrans, /**< Operate with the matrix. */ 86 clblasTrans, /**< Operate with the transpose of the matrix. */ 87 clblasConjTrans /**< Operate with the conjugate transpose of 88 the matrix. */ 89 } clblasTranspose; 90 91 /** Used by the Hermitian, symmetric and triangular matrix 92 * routines to specify whether the upper or lower triangle is being referenced. 93 */ 94 typedef enum clblasUplo_ { 95 clblasUpper, /**< Upper triangle. */ 96 clblasLower /**< Lower triangle. */ 97 } clblasUplo; 98 99 /** It is used by the triangular matrix routines to specify whether the 100 * matrix is unit triangular. 101 */ 102 typedef enum clblasDiag_ { 103 clblasUnit, /**< Unit triangular. */ 104 clblasNonUnit /**< Non-unit triangular. */ 105 } clblasDiag; 106 107 /** Indicates the side matrix A is located relative to matrix B during multiplication. */ 108 typedef enum clblasSide_ { 109 clblasLeft, /**< Multiply general matrix by symmetric, 110 Hermitian or triangular matrix on the left. */ 111 clblasRight /**< Multiply general matrix by symmetric, 112 Hermitian or triangular matrix on the right. */ 113 } clblasSide; 114 115 /** 116 * @brief clblas error codes definition, incorporating OpenCL error 117 * definitions. 118 * 119 * This enumeration is a subset of the OpenCL error codes extended with some 120 * additional extra codes. For example, CL_OUT_OF_HOST_MEMORY, which is 121 * defined in cl.h is aliased as clblasOutOfHostMemory. 122 */ 123 typedef enum clblasStatus_ { 124 clblasSuccess = CL_SUCCESS, 125 clblasInvalidValue = CL_INVALID_VALUE, 126 clblasInvalidCommandQueue = CL_INVALID_COMMAND_QUEUE, 127 clblasInvalidContext = CL_INVALID_CONTEXT, 128 clblasInvalidMemObject = CL_INVALID_MEM_OBJECT, 129 clblasInvalidDevice = CL_INVALID_DEVICE, 130 clblasInvalidEventWaitList = CL_INVALID_EVENT_WAIT_LIST, 131 clblasOutOfResources = CL_OUT_OF_RESOURCES, 132 clblasOutOfHostMemory = CL_OUT_OF_HOST_MEMORY, 133 clblasInvalidOperation = CL_INVALID_OPERATION, 134 clblasCompilerNotAvailable = CL_COMPILER_NOT_AVAILABLE, 135 clblasBuildProgramFailure = CL_BUILD_PROGRAM_FAILURE, 136 /* Extended error codes */ 137 clblasNotImplemented = -1024, /**< Functionality is not implemented */ 138 clblasNotInitialized, /**< clblas library is not initialized yet */ 139 clblasInvalidMatA, /**< Matrix A is not a valid memory object */ 140 clblasInvalidMatB, /**< Matrix B is not a valid memory object */ 141 clblasInvalidMatC, /**< Matrix C is not a valid memory object */ 142 clblasInvalidVecX, /**< Vector X is not a valid memory object */ 143 clblasInvalidVecY, /**< Vector Y is not a valid memory object */ 144 clblasInvalidDim, /**< An input dimension (M,N,K) is invalid */ 145 clblasInvalidLeadDimA, /**< Leading dimension A must not be less than the size of the first dimension */ 146 clblasInvalidLeadDimB, /**< Leading dimension B must not be less than the size of the second dimension */ 147 clblasInvalidLeadDimC, /**< Leading dimension C must not be less than the size of the third dimension */ 148 clblasInvalidIncX, /**< The increment for a vector X must not be 0 */ 149 clblasInvalidIncY, /**< The increment for a vector Y must not be 0 */ 150 clblasInsufficientMemMatA, /**< The memory object for Matrix A is too small */ 151 clblasInsufficientMemMatB, /**< The memory object for Matrix B is too small */ 152 clblasInsufficientMemMatC, /**< The memory object for Matrix C is too small */ 153 clblasInsufficientMemVecX, /**< The memory object for Vector X is too small */ 154 clblasInsufficientMemVecY /**< The memory object for Vector Y is too small */ 155 } clblasStatus; 156 157 158 /*@}*/ 159 160 /** 161 * @defgroup VERSION Version information 162 */ 163 /*@{*/ 164 165 /** 166 * @brief Get the clblas library version info. 167 * 168 * @param[out] major Location to store library's major version. 169 * @param[out] minor Location to store library's minor version. 170 * @param[out] patch Location to store library's patch version. 171 * 172 * @returns always \b clblasSuccess. 173 * 174 * @ingroup VERSION 175 */ 176 clblasStatus 177 clblasGetVersion(cl_uint* major, cl_uint* minor, cl_uint* patch); 178 179 /*@}*/ 180 181 /** 182 * @defgroup INIT Initialize library 183 */ 184 /*@{*/ 185 186 /** 187 * @brief Initialize the clblas library. 188 * 189 * Must be called before any other clblas API function is invoked. 190 * @note This function is not thread-safe. 191 * 192 * @return 193 * - \b clblasSucces on success; 194 * - \b clblasOutOfHostMemory if there is not enough of memory to allocate 195 * library's internal structures; 196 * - \b clblasOutOfResources in case of requested resources scarcity. 197 * 198 * @ingroup INIT 199 */ 200 clblasStatus 201 clblasSetup(void); 202 203 /** 204 * @brief Finalize the usage of the clblas library. 205 * 206 * Frees all memory allocated for different computational kernel and other 207 * internal data. 208 * @note This function is not thread-safe. 209 * 210 * @ingroup INIT 211 */ 212 void 213 clblasTeardown(void); 214 215 /*@}*/ 216 217 /** 218 * @defgroup BLAS1 BLAS-1 functions 219 * 220 * The Level 1 Basic Linear Algebra Subprograms are functions that perform 221 * vector-vector operations. 222 */ 223 /*@{*/ 224 /*@}*/ 225 226 /** 227 * @defgroup SWAP SWAP - Swap elements from 2 vectors 228 * @ingroup BLAS1 229 */ 230 /*@{*/ 231 232 /** 233 * @brief interchanges two vectors of float. 234 * 235 * 236 * @param[in] N Number of elements in vector \b X. 237 * @param[out] X Buffer object storing vector \b X. 238 * @param[in] offx Offset of first element of vector \b X in buffer object. 239 * Counted in elements. 240 * @param[in] incx Increment for the elements of \b X. Must not be zero. 241 * @param[out] Y Buffer object storing the vector \b Y. 242 * @param[in] offy Offset of first element of vector \b Y in buffer object. 243 * Counted in elements. 244 * @param[in] incy Increment for the elements of \b Y. Must not be zero. 245 * @param[in] numCommandQueues Number of OpenCL command queues in which the 246 * task is to be performed. 247 * @param[in] commandQueues OpenCL command queues. 248 * @param[in] numEventsInWaitList Number of events in the event wait list. 249 * @param[in] eventWaitList Event wait list. 250 * @param[in] events Event objects per each command queue that identify 251 * a particular kernel execution instance. 252 * 253 * @return 254 * - \b clblasSuccess on success; 255 * - \b clblasNotInitialized if clblasSetup() was not called; 256 * - \b clblasInvalidValue if invalid parameters are passed: 257 * - \b N is zero, or 258 * - either \b incx or \b incy is zero, or 259 * - the vector sizes along with the increments lead to 260 * accessing outside of any of the buffers; 261 * - \b clblasInvalidMemObject if either \b X, or \b Y object is 262 * Invalid, or an image object rather than the buffer one; 263 * - \b clblasOutOfHostMemory if the library can't allocate memory for 264 * internal structures; 265 * - \b clblasInvalidCommandQueue if the passed command queue is invalid; 266 * - \b clblasInvalidContext if a context a passed command queue belongs 267 * to was released; 268 * - \b clblasInvalidOperation if kernel compilation relating to a previous 269 * call has not completed for any of the target devices; 270 * - \b clblasCompilerNotAvailable if a compiler is not available; 271 * - \b clblasBuildProgramFailure if there is a failure to build a program 272 * executable. 273 * 274 * @ingroup SWAP 275 */ 276 clblasStatus 277 clblasSswap( 278 size_t N, 279 cl_mem X, 280 size_t offx, 281 int incx, 282 cl_mem Y, 283 size_t offy, 284 int incy, 285 cl_uint numCommandQueues, 286 cl_command_queue *commandQueues, 287 cl_uint numEventsInWaitList, 288 const cl_event *eventWaitList, 289 cl_event *events); 290 291 /** 292 * @example example_sswap.c 293 * Example of how to use the @ref clblasSswap function. 294 */ 295 296 /** 297 * @brief interchanges two vectors of double. 298 * 299 * 300 * @param[in] N Number of elements in vector \b X. 301 * @param[out] X Buffer object storing vector \b X. 302 * @param[in] offx Offset of first element of vector \b X in buffer object. 303 * Counted in elements. 304 * @param[in] incx Increment for the elements of \b X. Must not be zero. 305 * @param[out] Y Buffer object storing the vector \b Y. 306 * @param[in] offy Offset of first element of vector \b Y in buffer object. 307 * Counted in elements. 308 * @param[in] incy Increment for the elements of \b Y. Must not be zero. 309 * @param[in] numCommandQueues Number of OpenCL command queues in which the 310 * task is to be performed. 311 * @param[in] commandQueues OpenCL command queues. 312 * @param[in] numEventsInWaitList Number of events in the event wait list. 313 * @param[in] eventWaitList Event wait list. 314 * @param[in] events Event objects per each command queue that identify 315 * a particular kernel execution instance. 316 * 317 * @return 318 * - \b clblasSuccess on success; 319 * - \b clblasInvalidDevice if a target device does not support the 320 * floating point arithmetic with double precision; 321 * - the same error codes as the clblasSswap() function otherwise. 322 * 323 * @ingroup SWAP 324 */ 325 clblasStatus 326 clblasDswap( 327 size_t N, 328 cl_mem X, 329 size_t offx, 330 int incx, 331 cl_mem Y, 332 size_t offy, 333 int incy, 334 cl_uint numCommandQueues, 335 cl_command_queue *commandQueues, 336 cl_uint numEventsInWaitList, 337 const cl_event *eventWaitList, 338 cl_event *events); 339 340 /** 341 * @brief interchanges two vectors of complex-float elements. 342 * 343 * 344 * @param[in] N Number of elements in vector \b X. 345 * @param[out] X Buffer object storing vector \b X. 346 * @param[in] offx Offset of first element of vector \b X in buffer object. 347 * Counted in elements. 348 * @param[in] incx Increment for the elements of \b X. Must not be zero. 349 * @param[out] Y Buffer object storing the vector \b Y. 350 * @param[in] offy Offset of first element of vector \b Y in buffer object. 351 * Counted in elements. 352 * @param[in] incy Increment for the elements of \b Y. Must not be zero. 353 * @param[in] numCommandQueues Number of OpenCL command queues in which the 354 * task is to be performed. 355 * @param[in] commandQueues OpenCL command queues. 356 * @param[in] numEventsInWaitList Number of events in the event wait list. 357 * @param[in] eventWaitList Event wait list. 358 * @param[in] events Event objects per each command queue that identify 359 * a particular kernel execution instance. 360 * 361 * @return 362 * - \b clblasSuccess on success; 363 * - the same error codes as the clblasSwap() function otherwise. 364 * 365 * @ingroup SWAP 366 */ 367 clblasStatus 368 clblasCswap( 369 size_t N, 370 cl_mem X, 371 size_t offx, 372 int incx, 373 cl_mem Y, 374 size_t offy, 375 int incy, 376 cl_uint numCommandQueues, 377 cl_command_queue *commandQueues, 378 cl_uint numEventsInWaitList, 379 const cl_event *eventWaitList, 380 cl_event *events); 381 382 /** 383 * @brief interchanges two vectors of double-complex elements. 384 * 385 * 386 * @param[in] N Number of elements in vector \b X. 387 * @param[out] X Buffer object storing vector \b X. 388 * @param[in] offx Offset of first element of vector \b X in buffer object. 389 * Counted in elements. 390 * @param[in] incx Increment for the elements of \b X. Must not be zero. 391 * @param[out] Y Buffer object storing the vector \b Y. 392 * @param[in] offy Offset of first element of vector \b Y in buffer object. 393 * Counted in elements. 394 * @param[in] incy Increment for the elements of \b Y. Must not be zero. 395 * @param[in] numCommandQueues Number of OpenCL command queues in which the 396 * task is to be performed. 397 * @param[in] commandQueues OpenCL command queues. 398 * @param[in] numEventsInWaitList Number of events in the event wait list. 399 * @param[in] eventWaitList Event wait list. 400 * @param[in] events Event objects per each command queue that identify 401 * a particular kernel execution instance. 402 * 403 * @return 404 * - \b clblasSuccess on success; 405 * - the same error codes as the clblasDwap() function otherwise. 406 * 407 * @ingroup SWAP 408 */ 409 clblasStatus 410 clblasZswap( 411 size_t N, 412 cl_mem X, 413 size_t offx, 414 int incx, 415 cl_mem Y, 416 size_t offy, 417 int incy, 418 cl_uint numCommandQueues, 419 cl_command_queue *commandQueues, 420 cl_uint numEventsInWaitList, 421 const cl_event *eventWaitList, 422 cl_event *events); 423 424 /*@}*/ 425 426 427 /** 428 * @defgroup SCAL SCAL - Scales a vector by a constant 429 * @ingroup BLAS1 430 */ 431 /*@{*/ 432 433 /** 434 * @brief Scales a float vector by a float constant 435 * 436 * - \f$ X \leftarrow \alpha X \f$ 437 * 438 * @param[in] N Number of elements in vector \b X. 439 * @param[in] alpha The constant factor for vector \b X. 440 * @param[out] X Buffer object storing vector \b X. 441 * @param[in] offx Offset of first element of vector \b X in buffer object. 442 * Counted in elements. 443 * @param[in] incx Increment for the elements of \b X. Must not be zero. 444 * @param[in] numCommandQueues Number of OpenCL command queues in which the 445 * task is to be performed. 446 * @param[in] commandQueues OpenCL command queues. 447 * @param[in] numEventsInWaitList Number of events in the event wait list. 448 * @param[in] eventWaitList Event wait list. 449 * @param[in] events Event objects per each command queue that identify 450 * a particular kernel execution instance. 451 * 452 * @return 453 * - \b clblasSuccess on success; 454 * - \b clblasNotInitialized if clblasSetup() was not called; 455 * - \b clblasInvalidValue if invalid parameters are passed: 456 * - \b N is zero, or 457 * - \b incx zero, or 458 * - the vector sizes along with the increments lead to 459 * accessing outside of any of the buffers; 460 * - \b clblasInvalidMemObject if either \b X, or \b Y object is 461 * Invalid, or an image object rather than the buffer one; 462 * - \b clblasOutOfHostMemory if the library can't allocate memory for 463 * internal structures; 464 * - \b clblasInvalidCommandQueue if the passed command queue is invalid; 465 * - \b clblasInvalidContext if a context a passed command queue belongs 466 * to was released; 467 * - \b clblasInvalidOperation if kernel compilation relating to a previous 468 * call has not completed for any of the target devices; 469 * - \b clblasCompilerNotAvailable if a compiler is not available; 470 * - \b clblasBuildProgramFailure if there is a failure to build a program 471 * executable. 472 * 473 * @ingroup SCAL 474 */ 475 clblasStatus 476 clblasSscal( 477 size_t N, 478 cl_float alpha, 479 cl_mem X, 480 size_t offx, 481 int incx, 482 cl_uint numCommandQueues, 483 cl_command_queue *commandQueues, 484 cl_uint numEventsInWaitList, 485 const cl_event *eventWaitList, 486 cl_event *events); 487 /** 488 * @example example_sscal.c 489 * Example of how to use the @ref clblasSscal function. 490 */ 491 492 /** 493 * @brief Scales a double vector by a double constant 494 * 495 * - \f$ X \leftarrow \alpha X \f$ 496 * 497 * @param[in] N Number of elements in vector \b X. 498 * @param[in] alpha The constant factor for vector \b X. 499 * @param[out] X Buffer object storing vector \b X. 500 * @param[in] offx Offset of first element of vector \b X in buffer object. 501 * Counted in elements. 502 * @param[in] incx Increment for the elements of \b X. Must not be zero. 503 * @param[in] numCommandQueues Number of OpenCL command queues in which the 504 * task is to be performed. 505 * @param[in] commandQueues OpenCL command queues. 506 * @param[in] numEventsInWaitList Number of events in the event wait list. 507 * @param[in] eventWaitList Event wait list. 508 * @param[in] events Event objects per each command queue that identify 509 * a particular kernel execution instance. 510 * 511 * @return 512 * - \b clblasSuccess on success; 513 * - \b clblasInvalidDevice if a target device does not support the 514 * floating point arithmetic with double precision; 515 * - the same error codes as the clblasSscal() function otherwise. 516 * 517 * @ingroup SCAL 518 */ 519 clblasStatus 520 clblasDscal( 521 size_t N, 522 cl_double alpha, 523 cl_mem X, 524 size_t offx, 525 int incx, 526 cl_uint numCommandQueues, 527 cl_command_queue *commandQueues, 528 cl_uint numEventsInWaitList, 529 const cl_event *eventWaitList, 530 cl_event *events); 531 532 /** 533 * @brief Scales a complex-float vector by a complex-float constant 534 * 535 * - \f$ X \leftarrow \alpha X \f$ 536 * 537 * @param[in] N Number of elements in vector \b X. 538 * @param[in] alpha The constant factor for vector \b X. 539 * @param[out] X Buffer object storing vector \b X. 540 * @param[in] offx Offset of first element of vector \b X in buffer object. 541 * Counted in elements. 542 * @param[in] incx Increment for the elements of \b X. Must not be zero. 543 * @param[in] numCommandQueues Number of OpenCL command queues in which the 544 * task is to be performed. 545 * @param[in] commandQueues OpenCL command queues. 546 * @param[in] numEventsInWaitList Number of events in the event wait list. 547 * @param[in] eventWaitList Event wait list. 548 * @param[in] events Event objects per each command queue that identify 549 * a particular kernel execution instance. 550 * 551 * @return 552 * - \b clblasSuccess on success; 553 * - the same error codes as the clblasSscal() function otherwise. 554 * 555 * @ingroup SCAL 556 */ 557 clblasStatus 558 clblasCscal( 559 size_t N, 560 cl_float2 alpha, 561 cl_mem X, 562 size_t offx, 563 int incx, 564 cl_uint numCommandQueues, 565 cl_command_queue *commandQueues, 566 cl_uint numEventsInWaitList, 567 const cl_event *eventWaitList, 568 cl_event *events); 569 570 /** 571 * @brief Scales a complex-double vector by a complex-double constant 572 * 573 * - \f$ X \leftarrow \alpha X \f$ 574 * 575 * @param[in] N Number of elements in vector \b X. 576 * @param[in] alpha The constant factor for vector \b X. 577 * @param[out] X Buffer object storing vector \b X. 578 * @param[in] offx Offset of first element of vector \b X in buffer object. 579 * Counted in elements. 580 * @param[in] incx Increment for the elements of \b X. Must not be zero. 581 * @param[in] numCommandQueues Number of OpenCL command queues in which the 582 * task is to be performed. 583 * @param[in] commandQueues OpenCL command queues. 584 * @param[in] numEventsInWaitList Number of events in the event wait list. 585 * @param[in] eventWaitList Event wait list. 586 * @param[in] events Event objects per each command queue that identify 587 * a particular kernel execution instance. 588 * 589 * @return 590 * - \b clblasSuccess on success; 591 * - the same error codes as the clblasDscal() function otherwise. 592 * 593 * @ingroup SCAL 594 */ 595 clblasStatus 596 clblasZscal( 597 size_t N, 598 cl_double2 alpha, 599 cl_mem X, 600 size_t offx, 601 int incx, 602 cl_uint numCommandQueues, 603 cl_command_queue *commandQueues, 604 cl_uint numEventsInWaitList, 605 const cl_event *eventWaitList, 606 cl_event *events); 607 608 /*@}*/ 609 610 /** 611 * @defgroup SSCAL SSCAL - Scales a complex vector by a real constant 612 * @ingroup BLAS1 613 */ 614 /*@{*/ 615 616 /** 617 * @brief Scales a complex-float vector by a float constant 618 * 619 * - \f$ X \leftarrow \alpha X \f$ 620 * 621 * @param[in] N Number of elements in vector \b X. 622 * @param[in] alpha The constant factor for vector \b X. 623 * @param[out] X Buffer object storing vector \b X. 624 * @param[in] offx Offset of first element of vector \b X in buffer object. 625 * Counted in elements. 626 * @param[in] incx Increment for the elements of \b X. Must not be zero. 627 * @param[in] numCommandQueues Number of OpenCL command queues in which the 628 * task is to be performed. 629 * @param[in] commandQueues OpenCL command queues. 630 * @param[in] numEventsInWaitList Number of events in the event wait list. 631 * @param[in] eventWaitList Event wait list. 632 * @param[in] events Event objects per each command queue that identify 633 * a particular kernel execution instance. 634 * 635 * @return 636 * - \b clblasSuccess on success; 637 * - \b clblasNotInitialized if clblasSetup() was not called; 638 * - \b clblasInvalidValue if invalid parameters are passed: 639 * - \b N is zero, or 640 * - \b incx zero, or 641 * - the vector sizes along with the increments lead to 642 * accessing outside of any of the buffers; 643 * - \b clblasInvalidMemObject if either \b X, or \b Y object is 644 * Invalid, or an image object rather than the buffer one; 645 * - \b clblasOutOfHostMemory if the library can't allocate memory for 646 * internal structures; 647 * - \b clblasInvalidCommandQueue if the passed command queue is invalid; 648 * - \b clblasInvalidContext if a context a passed command queue belongs 649 * to was released; 650 * - \b clblasInvalidOperation if kernel compilation relating to a previous 651 * call has not completed for any of the target devices; 652 * - \b clblasCompilerNotAvailable if a compiler is not available; 653 * - \b clblasBuildProgramFailure if there is a failure to build a program 654 * executable. 655 * 656 * @ingroup SSCAL 657 */ 658 clblasStatus 659 clblasCsscal( 660 size_t N, 661 cl_float alpha, 662 cl_mem X, 663 size_t offx, 664 int incx, 665 cl_uint numCommandQueues, 666 cl_command_queue *commandQueues, 667 cl_uint numEventsInWaitList, 668 const cl_event *eventWaitList, 669 cl_event *events); 670 /** 671 * @example example_csscal.c 672 * Example of how to use the @ref clblasCsscal function. 673 */ 674 675 /** 676 * @brief Scales a complex-double vector by a double constant 677 * 678 * - \f$ X \leftarrow \alpha X \f$ 679 * 680 * @param[in] N Number of elements in vector \b X. 681 * @param[in] alpha The constant factor for vector \b X. 682 * @param[out] X Buffer object storing vector \b X. 683 * @param[in] offx Offset of first element of vector \b X in buffer object. 684 * Counted in elements. 685 * @param[in] incx Increment for the elements of \b X. Must not be zero. 686 * @param[in] numCommandQueues Number of OpenCL command queues in which the 687 * task is to be performed. 688 * @param[in] commandQueues OpenCL command queues. 689 * @param[in] numEventsInWaitList Number of events in the event wait list. 690 * @param[in] eventWaitList Event wait list. 691 * @param[in] events Event objects per each command queue that identify 692 * a particular kernel execution instance. 693 * 694 * @return 695 * - \b clblasSuccess on success; 696 * - \b clblasInvalidDevice if a target device does not support the 697 * floating point arithmetic with double precision; 698 * - the same error codes as the clblasCsscal() function otherwise. 699 * 700 * @ingroup SSCAL 701 */ 702 clblasStatus 703 clblasZdscal( 704 size_t N, 705 cl_double alpha, 706 cl_mem X, 707 size_t offx, 708 int incx, 709 cl_uint numCommandQueues, 710 cl_command_queue *commandQueues, 711 cl_uint numEventsInWaitList, 712 const cl_event *eventWaitList, 713 cl_event *events); 714 715 /*@}*/ 716 717 718 /** 719 * @defgroup COPY COPY - Copies elements from vector X to vector Y 720 * @ingroup BLAS1 721 */ 722 /*@{*/ 723 724 /** 725 * @brief Copies float elements from vector X to vector Y 726 * 727 * - \f$ Y \leftarrow X \f$ 728 * 729 * @param[in] N Number of elements in vector \b X. 730 * @param[in] X Buffer object storing vector \b X. 731 * @param[in] offx Offset of first element of vector \b X in buffer object. 732 * Counted in elements. 733 * @param[in] incx Increment for the elements of \b X. Must not be zero. 734 * @param[out] Y Buffer object storing the vector \b Y. 735 * @param[in] offy Offset of first element of vector \b Y in buffer object. 736 * Counted in elements. 737 * @param[in] incy Increment for the elements of \b Y. Must not be zero. 738 * @param[in] numCommandQueues Number of OpenCL command queues in which the 739 * task is to be performed. 740 * @param[in] commandQueues OpenCL command queues. 741 * @param[in] numEventsInWaitList Number of events in the event wait list. 742 * @param[in] eventWaitList Event wait list. 743 * @param[in] events Event objects per each command queue that identify 744 * a particular kernel execution instance. 745 * 746 * @return 747 * - \b clblasSuccess on success; 748 * - \b clblasNotInitialized if clblasSetup() was not called; 749 * - \b clblasInvalidValue if invalid parameters are passed: 750 * - \b N is zero, or 751 * - either \b incx or \b incy is zero, or 752 * - the vector sizes along with the increments lead to 753 * accessing outside of any of the buffers; 754 * - \b clblasInvalidMemObject if either \b X, or \b Y object is 755 * Invalid, or an image object rather than the buffer one; 756 * - \b clblasOutOfHostMemory if the library can't allocate memory for 757 * internal structures; 758 * - \b clblasInvalidCommandQueue if the passed command queue is invalid; 759 * - \b clblasInvalidContext if a context a passed command queue belongs 760 * to was released; 761 * - \b clblasInvalidOperation if kernel compilation relating to a previous 762 * call has not completed for any of the target devices; 763 * - \b clblasCompilerNotAvailable if a compiler is not available; 764 * - \b clblasBuildProgramFailure if there is a failure to build a program 765 * executable. 766 * 767 * @ingroup COPY 768 */ 769 clblasStatus 770 clblasScopy( 771 size_t N, 772 const cl_mem X, 773 size_t offx, 774 int incx, 775 cl_mem Y, 776 size_t offy, 777 int incy, 778 cl_uint numCommandQueues, 779 cl_command_queue *commandQueues, 780 cl_uint numEventsInWaitList, 781 const cl_event *eventWaitList, 782 cl_event *events); 783 784 /** 785 * @example example_scopy.c 786 * Example of how to use the @ref clblasScopy function. 787 */ 788 789 /** 790 * @brief Copies double elements from vector X to vector Y 791 * 792 * - \f$ Y \leftarrow X \f$ 793 * 794 * @param[in] N Number of elements in vector \b X. 795 * @param[in] X Buffer object storing vector \b X. 796 * @param[in] offx Offset of first element of vector \b X in buffer object. 797 * Counted in elements. 798 * @param[in] incx Increment for the elements of \b X. Must not be zero. 799 * @param[out] Y Buffer object storing the vector \b Y. 800 * @param[in] offy Offset of first element of vector \b Y in buffer object. 801 * Counted in elements. 802 * @param[in] incy Increment for the elements of \b Y. Must not be zero. 803 * @param[in] numCommandQueues Number of OpenCL command queues in which the 804 * task is to be performed. 805 * @param[in] commandQueues OpenCL command queues. 806 * @param[in] numEventsInWaitList Number of events in the event wait list. 807 * @param[in] eventWaitList Event wait list. 808 * @param[in] events Event objects per each command queue that identify 809 * a particular kernel execution instance. 810 * 811 * @return 812 * - \b clblasSuccess on success; 813 * - \b clblasInvalidDevice if a target device does not support the 814 * floating point arithmetic with double precision; 815 * - the same error codes as the clblasScopy() function otherwise. 816 * 817 * @ingroup COPY 818 */ 819 clblasStatus 820 clblasDcopy( 821 size_t N, 822 const cl_mem X, 823 size_t offx, 824 int incx, 825 cl_mem Y, 826 size_t offy, 827 int incy, 828 cl_uint numCommandQueues, 829 cl_command_queue *commandQueues, 830 cl_uint numEventsInWaitList, 831 const cl_event *eventWaitList, 832 cl_event *events); 833 834 /** 835 * @brief Copies complex-float elements from vector X to vector Y 836 * 837 * - \f$ Y \leftarrow X \f$ 838 * 839 * @param[in] N Number of elements in vector \b X. 840 * @param[in] X Buffer object storing vector \b X. 841 * @param[in] offx Offset of first element of vector \b X in buffer object. 842 * Counted in elements. 843 * @param[in] incx Increment for the elements of \b X. Must not be zero. 844 * @param[out] Y Buffer object storing the vector \b Y. 845 * @param[in] offy Offset of first element of vector \b Y in buffer object. 846 * Counted in elements. 847 * @param[in] incy Increment for the elements of \b Y. Must not be zero. 848 * @param[in] numCommandQueues Number of OpenCL command queues in which the 849 * task is to be performed. 850 * @param[in] commandQueues OpenCL command queues. 851 * @param[in] numEventsInWaitList Number of events in the event wait list. 852 * @param[in] eventWaitList Event wait list. 853 * @param[in] events Event objects per each command queue that identify 854 * a particular kernel execution instance. 855 * 856 * @return 857 * - \b clblasSuccess on success; 858 * - the same error codes as the clblasScopy() function otherwise. 859 * 860 * @ingroup COPY 861 */ 862 clblasStatus 863 clblasCcopy( 864 size_t N, 865 const cl_mem X, 866 size_t offx, 867 int incx, 868 cl_mem Y, 869 size_t offy, 870 int incy, 871 cl_uint numCommandQueues, 872 cl_command_queue *commandQueues, 873 cl_uint numEventsInWaitList, 874 const cl_event *eventWaitList, 875 cl_event *events); 876 877 /** 878 * @brief Copies complex-double elements from vector X to vector Y 879 * 880 * - \f$ Y \leftarrow X \f$ 881 * 882 * @param[in] N Number of elements in vector \b X. 883 * @param[in] X Buffer object storing vector \b X. 884 * @param[in] offx Offset of first element of vector \b X in buffer object. 885 * Counted in elements. 886 * @param[in] incx Increment for the elements of \b X. Must not be zero. 887 * @param[out] Y Buffer object storing the vector \b Y. 888 * @param[in] offy Offset of first element of vector \b Y in buffer object. 889 * Counted in elements. 890 * @param[in] incy Increment for the elements of \b Y. Must not be zero. 891 * @param[in] numCommandQueues Number of OpenCL command queues in which the 892 * task is to be performed. 893 * @param[in] commandQueues OpenCL command queues. 894 * @param[in] numEventsInWaitList Number of events in the event wait list. 895 * @param[in] eventWaitList Event wait list. 896 * @param[in] events Event objects per each command queue that identify 897 * a particular kernel execution instance. 898 * 899 * @return 900 * - \b clblasSuccess on success; 901 * - the same error codes as the clblasDcopy() function otherwise. 902 * 903 * @ingroup COPY 904 */ 905 clblasStatus 906 clblasZcopy( 907 size_t N, 908 const cl_mem X, 909 size_t offx, 910 int incx, 911 cl_mem Y, 912 size_t offy, 913 int incy, 914 cl_uint numCommandQueues, 915 cl_command_queue *commandQueues, 916 cl_uint numEventsInWaitList, 917 const cl_event *eventWaitList, 918 cl_event *events); 919 920 /*@}*/ 921 922 /** 923 * @defgroup AXPY AXPY - Scale X and add to Y 924 * @ingroup BLAS1 925 */ 926 /*@{*/ 927 928 /** 929 * @brief Scale vector X of float elements and add to Y 930 * 931 * - \f$ Y \leftarrow \alpha X + Y \f$ 932 * 933 * @param[in] N Number of elements in vector \b X. 934 * @param[in] alpha The constant factor for vector \b X. 935 * @param[in] X Buffer object storing vector \b X. 936 * @param[in] offx Offset of first element of vector \b X in buffer object. 937 * Counted in elements. 938 * @param[in] incx Increment for the elements of \b X. Must not be zero. 939 * @param[out] Y Buffer object storing the vector \b Y. 940 * @param[in] offy Offset of first element of vector \b Y in buffer object. 941 * Counted in elements. 942 * @param[in] incy Increment for the elements of \b Y. Must not be zero. 943 * @param[in] numCommandQueues Number of OpenCL command queues in which the 944 * task is to be performed. 945 * @param[in] commandQueues OpenCL command queues. 946 * @param[in] numEventsInWaitList Number of events in the event wait list. 947 * @param[in] eventWaitList Event wait list. 948 * @param[in] events Event objects per each command queue that identify 949 * a particular kernel execution instance. 950 * 951 * @return 952 * - \b clblasSuccess on success; 953 * - \b clblasNotInitialized if clblasSetup() was not called; 954 * - \b clblasInvalidValue if invalid parameters are passed: 955 * - \b N is zero, or 956 * - either \b incx or \b incy is zero, or 957 * - the vector sizes along with the increments lead to 958 * accessing outside of any of the buffers; 959 * - \b clblasInvalidMemObject if either \b X, or \b Y object is 960 * Invalid, or an image object rather than the buffer one; 961 * - \b clblasOutOfHostMemory if the library can't allocate memory for 962 * internal structures; 963 * - \b clblasInvalidCommandQueue if the passed command queue is invalid; 964 * - \b clblasInvalidContext if a context a passed command queue belongs 965 * to was released; 966 * - \b clblasInvalidOperation if kernel compilation relating to a previous 967 * call has not completed for any of the target devices; 968 * - \b clblasCompilerNotAvailable if a compiler is not available; 969 * - \b clblasBuildProgramFailure if there is a failure to build a program 970 * executable. 971 * 972 * @ingroup AXPY 973 */ 974 clblasStatus 975 clblasSaxpy( 976 size_t N, 977 cl_float alpha, 978 const cl_mem X, 979 size_t offx, 980 int incx, 981 cl_mem Y, 982 size_t offy, 983 int incy, 984 cl_uint numCommandQueues, 985 cl_command_queue *commandQueues, 986 cl_uint numEventsInWaitList, 987 const cl_event *eventWaitList, 988 cl_event *events); 989 990 /** 991 * @example example_saxpy.c 992 * Example of how to use the @ref clblasSaxpy function. 993 */ 994 995 /** 996 * @brief Scale vector X of double elements and add to Y 997 * 998 * - \f$ Y \leftarrow \alpha X + Y \f$ 999 * 1000 * @param[in] N Number of elements in vector \b X. 1001 * @param[in] alpha The constant factor for vector \b X. 1002 * @param[in] X Buffer object storing vector \b X. 1003 * @param[in] offx Offset of first element of vector \b X in buffer object. 1004 * Counted in elements. 1005 * @param[in] incx Increment for the elements of \b X. Must not be zero. 1006 * @param[out] Y Buffer object storing the vector \b Y. 1007 * @param[in] offy Offset of first element of vector \b Y in buffer object. 1008 * Counted in elements. 1009 * @param[in] incy Increment for the elements of \b Y. Must not be zero. 1010 * @param[in] numCommandQueues Number of OpenCL command queues in which the 1011 * task is to be performed. 1012 * @param[in] commandQueues OpenCL command queues. 1013 * @param[in] numEventsInWaitList Number of events in the event wait list. 1014 * @param[in] eventWaitList Event wait list. 1015 * @param[in] events Event objects per each command queue that identify 1016 * a particular kernel execution instance. 1017 * 1018 * @return 1019 * - \b clblasSuccess on success; 1020 * - \b clblasInvalidDevice if a target device does not support the 1021 * floating point arithmetic with double precision; 1022 * - the same error codes as the clblasSaxpy() function otherwise. 1023 * 1024 * @ingroup AXPY 1025 */ 1026 clblasStatus 1027 clblasDaxpy( 1028 size_t N, 1029 cl_double alpha, 1030 const cl_mem X, 1031 size_t offx, 1032 int incx, 1033 cl_mem Y, 1034 size_t offy, 1035 int incy, 1036 cl_uint numCommandQueues, 1037 cl_command_queue *commandQueues, 1038 cl_uint numEventsInWaitList, 1039 const cl_event *eventWaitList, 1040 cl_event *events); 1041 1042 /** 1043 * @brief Scale vector X of complex-float elements and add to Y 1044 * 1045 * - \f$ Y \leftarrow \alpha X + Y \f$ 1046 * 1047 * @param[in] N Number of elements in vector \b X. 1048 * @param[in] alpha The constant factor for vector \b X. 1049 * @param[in] X Buffer object storing vector \b X. 1050 * @param[in] offx Offset of first element of vector \b X in buffer object. 1051 * Counted in elements. 1052 * @param[in] incx Increment for the elements of \b X. Must not be zero. 1053 * @param[out] Y Buffer object storing the vector \b Y. 1054 * @param[in] offy Offset of first element of vector \b Y in buffer object. 1055 * Counted in elements. 1056 * @param[in] incy Increment for the elements of \b Y. Must not be zero. 1057 * @param[in] numCommandQueues Number of OpenCL command queues in which the 1058 * task is to be performed. 1059 * @param[in] commandQueues OpenCL command queues. 1060 * @param[in] numEventsInWaitList Number of events in the event wait list. 1061 * @param[in] eventWaitList Event wait list. 1062 * @param[in] events Event objects per each command queue that identify 1063 * a particular kernel execution instance. 1064 * 1065 * @return 1066 * - \b clblasSuccess on success; 1067 * - the same error codes as the clblasSaxpy() function otherwise. 1068 * 1069 * @ingroup AXPY 1070 */ 1071 clblasStatus 1072 clblasCaxpy( 1073 size_t N, 1074 cl_float2 alpha, 1075 const cl_mem X, 1076 size_t offx, 1077 int incx, 1078 cl_mem Y, 1079 size_t offy, 1080 int incy, 1081 cl_uint numCommandQueues, 1082 cl_command_queue *commandQueues, 1083 cl_uint numEventsInWaitList, 1084 const cl_event *eventWaitList, 1085 cl_event *events); 1086 1087 /** 1088 * @brief Scale vector X of double-complex elements and add to Y 1089 * 1090 * - \f$ Y \leftarrow \alpha X + Y \f$ 1091 * 1092 * @param[in] N Number of elements in vector \b X. 1093 * @param[in] alpha The constant factor for vector \b X. 1094 * @param[in] X Buffer object storing vector \b X. 1095 * @param[in] offx Offset of first element of vector \b X in buffer object. 1096 * Counted in elements. 1097 * @param[in] incx Increment for the elements of \b X. Must not be zero. 1098 * @param[out] Y Buffer object storing the vector \b Y. 1099 * @param[in] offy Offset of first element of vector \b Y in buffer object. 1100 * Counted in elements. 1101 * @param[in] incy Increment for the elements of \b Y. Must not be zero. 1102 * @param[in] numCommandQueues Number of OpenCL command queues in which the 1103 * task is to be performed. 1104 * @param[in] commandQueues OpenCL command queues. 1105 * @param[in] numEventsInWaitList Number of events in the event wait list. 1106 * @param[in] eventWaitList Event wait list. 1107 * @param[in] events Event objects per each command queue that identify 1108 * a particular kernel execution instance. 1109 * 1110 * @return 1111 * - \b clblasSuccess on success; 1112 * - the same error codes as the clblasDaxpy() function otherwise. 1113 * 1114 * @ingroup AXPY 1115 */ 1116 clblasStatus 1117 clblasZaxpy( 1118 size_t N, 1119 cl_double2 alpha, 1120 const cl_mem X, 1121 size_t offx, 1122 int incx, 1123 cl_mem Y, 1124 size_t offy, 1125 int incy, 1126 cl_uint numCommandQueues, 1127 cl_command_queue *commandQueues, 1128 cl_uint numEventsInWaitList, 1129 const cl_event *eventWaitList, 1130 cl_event *events); 1131 1132 /*@}*/ 1133 1134 1135 /** 1136 * @defgroup DOT DOT - Dot product of two vectors 1137 * @ingroup BLAS1 1138 */ 1139 /*@{*/ 1140 1141 /** 1142 * @brief dot product of two vectors containing float elements 1143 * 1144 * @param[in] N Number of elements in vector \b X. 1145 * @param[out] dotProduct Buffer object that will contain the dot-product value 1146 * @param[in] offDP Offset to dot-product in \b dotProduct buffer object. 1147 * Counted in elements. 1148 * @param[in] X Buffer object storing vector \b X. 1149 * @param[in] offx Offset of first element of vector \b X in buffer object. 1150 * Counted in elements. 1151 * @param[in] incx Increment for the elements of \b X. Must not be zero. 1152 * @param[in] Y Buffer object storing the vector \b Y. 1153 * @param[in] offy Offset of first element of vector \b Y in buffer object. 1154 * Counted in elements. 1155 * @param[in] incy Increment for the elements of \b Y. Must not be zero. 1156 * @param[in] scratchBuff Temporary cl_mem scratch buffer object of minimum size N 1157 * @param[in] numCommandQueues Number of OpenCL command queues in which the 1158 * task is to be performed. 1159 * @param[in] commandQueues OpenCL command queues. 1160 * @param[in] numEventsInWaitList Number of events in the event wait list. 1161 * @param[in] eventWaitList Event wait list. 1162 * @param[in] events Event objects per each command queue that identify 1163 * a particular kernel execution instance. 1164 * 1165 * @return 1166 * - \b clblasSuccess on success; 1167 * - \b clblasNotInitialized if clblasSetup() was not called; 1168 * - \b clblasInvalidValue if invalid parameters are passed: 1169 * - \b N is zero, or 1170 * - either \b incx or \b incy is zero, or 1171 * - the vector sizes along with the increments lead to 1172 * accessing outside of any of the buffers; 1173 * - \b clblasInvalidMemObject if either \b X, \b Y or \b dotProduct object is 1174 * Invalid, or an image object rather than the buffer one; 1175 * - \b clblasOutOfHostMemory if the library can't allocate memory for 1176 * internal structures; 1177 * - \b clblasInvalidCommandQueue if the passed command queue is invalid; 1178 * - \b clblasInvalidContext if a context a passed command queue belongs 1179 * to was released; 1180 * - \b clblasInvalidOperation if kernel compilation relating to a previous 1181 * call has not completed for any of the target devices; 1182 * - \b clblasCompilerNotAvailable if a compiler is not available; 1183 * - \b clblasBuildProgramFailure if there is a failure to build a program 1184 * executable. 1185 * 1186 * @ingroup DOT 1187 */ 1188 clblasStatus 1189 clblasSdot( 1190 size_t N, 1191 cl_mem dotProduct, 1192 size_t offDP, 1193 const cl_mem X, 1194 size_t offx, 1195 int incx, 1196 const cl_mem Y, 1197 size_t offy, 1198 int incy, 1199 cl_mem scratchBuff, 1200 cl_uint numCommandQueues, 1201 cl_command_queue *commandQueues, 1202 cl_uint numEventsInWaitList, 1203 const cl_event *eventWaitList, 1204 cl_event *events); 1205 1206 /** 1207 * @example example_sdot.c 1208 * Example of how to use the @ref clblasSdot function. 1209 */ 1210 1211 /** 1212 * @brief dot product of two vectors containing double elements 1213 * 1214 * @param[in] N Number of elements in vector \b X. 1215 * @param[out] dotProduct Buffer object that will contain the dot-product value 1216 * @param[in] offDP Offset to dot-product in \b dotProduct buffer object. 1217 * Counted in elements. 1218 * @param[in] X Buffer object storing vector \b X. 1219 * @param[in] offx Offset of first element of vector \b X in buffer object. 1220 * Counted in elements. 1221 * @param[in] incx Increment for the elements of \b X. Must not be zero. 1222 * @param[in] Y Buffer object storing the vector \b Y. 1223 * @param[in] offy Offset of first element of vector \b Y in buffer object. 1224 * Counted in elements. 1225 * @param[in] incy Increment for the elements of \b Y. Must not be zero. 1226 * @param[in] scratchBuff Temporary cl_mem scratch buffer object of minimum size N 1227 * @param[in] numCommandQueues Number of OpenCL command queues in which the 1228 * task is to be performed. 1229 * @param[in] commandQueues OpenCL command queues. 1230 * @param[in] numEventsInWaitList Number of events in the event wait list. 1231 * @param[in] eventWaitList Event wait list. 1232 * @param[in] events Event objects per each command queue that identify 1233 * a particular kernel execution instance. 1234 * 1235 * @return 1236 * - \b clblasSuccess on success; 1237 * - \b clblasInvalidDevice if a target device does not support the 1238 * floating point arithmetic with double precision; 1239 * - the same error codes as the clblasSdot() function otherwise. 1240 * 1241 * @ingroup DOT 1242 */ 1243 clblasStatus 1244 clblasDdot( 1245 size_t N, 1246 cl_mem dotProduct, 1247 size_t offDP, 1248 const cl_mem X, 1249 size_t offx, 1250 int incx, 1251 const cl_mem Y, 1252 size_t offy, 1253 int incy, 1254 cl_mem scratchBuff, 1255 cl_uint numCommandQueues, 1256 cl_command_queue *commandQueues, 1257 cl_uint numEventsInWaitList, 1258 const cl_event *eventWaitList, 1259 cl_event *events); 1260 1261 1262 /** 1263 * @brief dot product of two vectors containing float-complex elements 1264 * 1265 * @param[in] N Number of elements in vector \b X. 1266 * @param[out] dotProduct Buffer object that will contain the dot-product value 1267 * @param[in] offDP Offset to dot-product in \b dotProduct buffer object. 1268 * Counted in elements. 1269 * @param[in] X Buffer object storing vector \b X. 1270 * @param[in] offx Offset of first element of vector \b X in buffer object. 1271 * Counted in elements. 1272 * @param[in] incx Increment for the elements of \b X. Must not be zero. 1273 * @param[in] Y Buffer object storing the vector \b Y. 1274 * @param[in] offy Offset of first element of vector \b Y in buffer object. 1275 * Counted in elements. 1276 * @param[in] incy Increment for the elements of \b Y. Must not be zero. 1277 * @param[in] scratchBuff Temporary cl_mem scratch buffer object of minimum size N 1278 * @param[in] numCommandQueues Number of OpenCL command queues in which the 1279 * task is to be performed. 1280 * @param[in] commandQueues OpenCL command queues. 1281 * @param[in] numEventsInWaitList Number of events in the event wait list. 1282 * @param[in] eventWaitList Event wait list. 1283 * @param[in] events Event objects per each command queue that identify 1284 * a particular kernel execution instance. 1285 * 1286 * @return 1287 * - \b clblasSuccess on success; 1288 * - the same error codes as the clblasSdot() function otherwise. 1289 * 1290 * @ingroup DOT 1291 */ 1292 1293 clblasStatus 1294 clblasCdotu( 1295 size_t N, 1296 cl_mem dotProduct, 1297 size_t offDP, 1298 const cl_mem X, 1299 size_t offx, 1300 int incx, 1301 const cl_mem Y, 1302 size_t offy, 1303 int incy, 1304 cl_mem scratchBuff, 1305 cl_uint numCommandQueues, 1306 cl_command_queue *commandQueues, 1307 cl_uint numEventsInWaitList, 1308 const cl_event *eventWaitList, 1309 cl_event *events); 1310 1311 1312 /** 1313 * @brief dot product of two vectors containing double-complex elements 1314 * 1315 * @param[in] N Number of elements in vector \b X. 1316 * @param[out] dotProduct Buffer object that will contain the dot-product value 1317 * @param[in] offDP Offset to dot-product in \b dotProduct buffer object. 1318 * Counted in elements. 1319 * @param[in] X Buffer object storing vector \b X. 1320 * @param[in] offx Offset of first element of vector \b X in buffer object. 1321 * Counted in elements. 1322 * @param[in] incx Increment for the elements of \b X. Must not be zero. 1323 * @param[in] Y Buffer object storing the vector \b Y. 1324 * @param[in] offy Offset of first element of vector \b Y in buffer object. 1325 * Counted in elements. 1326 * @param[in] incy Increment for the elements of \b Y. Must not be zero. 1327 * @param[in] scratchBuff Temporary cl_mem scratch buffer object of minimum size N 1328 * @param[in] numCommandQueues Number of OpenCL command queues in which the 1329 * task is to be performed. 1330 * @param[in] commandQueues OpenCL command queues. 1331 * @param[in] numEventsInWaitList Number of events in the event wait list. 1332 * @param[in] eventWaitList Event wait list. 1333 * @param[in] events Event objects per each command queue that identify 1334 * a particular kernel execution instance. 1335 * 1336 * @return 1337 * - \b clblasSuccess on success; 1338 * - \b clblasInvalidDevice if a target device does not support the 1339 * floating point arithmetic with double precision; 1340 * - the same error codes as the clblasSdot() function otherwise. 1341 * 1342 * @ingroup DOT 1343 */ 1344 1345 clblasStatus 1346 clblasZdotu( 1347 size_t N, 1348 cl_mem dotProduct, 1349 size_t offDP, 1350 const cl_mem X, 1351 size_t offx, 1352 int incx, 1353 const cl_mem Y, 1354 size_t offy, 1355 int incy, 1356 cl_mem scratchBuff, 1357 cl_uint numCommandQueues, 1358 cl_command_queue *commandQueues, 1359 cl_uint numEventsInWaitList, 1360 const cl_event *eventWaitList, 1361 cl_event *events); 1362 1363 1364 /** 1365 * @brief dot product of two vectors containing float-complex elements conjugating the first vector 1366 * 1367 * @param[in] N Number of elements in vector \b X. 1368 * @param[out] dotProduct Buffer object that will contain the dot-product value 1369 * @param[in] offDP Offset to dot-product in \b dotProduct buffer object. 1370 * Counted in elements. 1371 * @param[in] X Buffer object storing vector \b X. 1372 * @param[in] offx Offset of first element of vector \b X in buffer object. 1373 * Counted in elements. 1374 * @param[in] incx Increment for the elements of \b X. Must not be zero. 1375 * @param[in] Y Buffer object storing the vector \b Y. 1376 * @param[in] offy Offset of first element of vector \b Y in buffer object. 1377 * Counted in elements. 1378 * @param[in] incy Increment for the elements of \b Y. Must not be zero. 1379 * @param[in] scratchBuff Temporary cl_mem scratch buffer object of minimum size N 1380 * @param[in] numCommandQueues Number of OpenCL command queues in which the 1381 * task is to be performed. 1382 * @param[in] commandQueues OpenCL command queues. 1383 * @param[in] numEventsInWaitList Number of events in the event wait list. 1384 * @param[in] eventWaitList Event wait list. 1385 * @param[in] events Event objects per each command queue that identify 1386 * a particular kernel execution instance. 1387 * 1388 * @return 1389 * - \b clblasSuccess on success; 1390 * - the same error codes as the clblasSdot() function otherwise. 1391 * 1392 * @ingroup DOT 1393 */ 1394 1395 clblasStatus 1396 clblasCdotc( 1397 size_t N, 1398 cl_mem dotProduct, 1399 size_t offDP, 1400 const cl_mem X, 1401 size_t offx, 1402 int incx, 1403 const cl_mem Y, 1404 size_t offy, 1405 int incy, 1406 cl_mem scratchBuff, 1407 cl_uint numCommandQueues, 1408 cl_command_queue *commandQueues, 1409 cl_uint numEventsInWaitList, 1410 const cl_event *eventWaitList, 1411 cl_event *events); 1412 1413 1414 /** 1415 * @brief dot product of two vectors containing double-complex elements conjugating the first vector 1416 * 1417 * @param[in] N Number of elements in vector \b X. 1418 * @param[out] dotProduct Buffer object that will contain the dot-product value 1419 * @param[in] offDP Offset to dot-product in \b dotProduct buffer object. 1420 * Counted in elements. 1421 * @param[in] X Buffer object storing vector \b X. 1422 * @param[in] offx Offset of first element of vector \b X in buffer object. 1423 * Counted in elements. 1424 * @param[in] incx Increment for the elements of \b X. Must not be zero. 1425 * @param[in] Y Buffer object storing the vector \b Y. 1426 * @param[in] offy Offset of first element of vector \b Y in buffer object. 1427 * Counted in elements. 1428 * @param[in] incy Increment for the elements of \b Y. Must not be zero. 1429 * @param[in] scratchBuff Temporary cl_mem scratch buffer object of minimum size N 1430 * @param[in] numCommandQueues Number of OpenCL command queues in which the 1431 * task is to be performed. 1432 * @param[in] commandQueues OpenCL command queues. 1433 * @param[in] numEventsInWaitList Number of events in the event wait list. 1434 * @param[in] eventWaitList Event wait list. 1435 * @param[in] events Event objects per each command queue that identify 1436 * a particular kernel execution instance. 1437 * 1438 * @return 1439 * - \b clblasSuccess on success; 1440 * - \b clblasInvalidDevice if a target device does not support the 1441 * floating point arithmetic with double precision; 1442 * - the same error codes as the clblasSdot() function otherwise. 1443 * 1444 * @ingroup DOT 1445 */ 1446 1447 clblasStatus 1448 clblasZdotc( 1449 size_t N, 1450 cl_mem dotProduct, 1451 size_t offDP, 1452 const cl_mem X, 1453 size_t offx, 1454 int incx, 1455 const cl_mem Y, 1456 size_t offy, 1457 int incy, 1458 cl_mem scratchBuff, 1459 cl_uint numCommandQueues, 1460 cl_command_queue *commandQueues, 1461 cl_uint numEventsInWaitList, 1462 const cl_event *eventWaitList, 1463 cl_event *events); 1464 1465 /*@}*/ 1466 1467 1468 /** 1469 * @defgroup ROTG ROTG - Constructs givens plane rotation 1470 * @ingroup BLAS1 1471 */ 1472 /*@{*/ 1473 1474 /** 1475 * @brief construct givens plane rotation on float elements 1476 * 1477 * @param[out] SA Buffer object that contains SA 1478 * @param[in] offSA Offset to SA in \b SA buffer object. 1479 * Counted in elements. 1480 * @param[out] SB Buffer object that contains SB 1481 * @param[in] offSB Offset to SB in \b SB buffer object. 1482 * Counted in elements. 1483 * @param[out] C Buffer object that contains C 1484 * @param[in] offC Offset to C in \b C buffer object. 1485 * Counted in elements. 1486 * @param[out] S Buffer object that contains S 1487 * @param[in] offS Offset to S in \b S buffer object. 1488 * Counted in elements. 1489 * @param[in] numCommandQueues Number of OpenCL command queues in which the 1490 * task is to be performed. 1491 * @param[in] commandQueues OpenCL command queues. 1492 * @param[in] numEventsInWaitList Number of events in the event wait list. 1493 * @param[in] eventWaitList Event wait list. 1494 * @param[in] events Event objects per each command queue that identify 1495 * a particular kernel execution instance. 1496 * 1497 * @return 1498 * - \b clblasSuccess on success; 1499 * - \b clblasNotInitialized if clblasSetup() was not called; 1500 * - \b clblasInvalidMemObject if either \b SA, \b SB, \b C or \b S object is 1501 * Invalid, or an image object rather than the buffer one; 1502 * - \b clblasOutOfHostMemory if the library can't allocate memory for 1503 * internal structures; 1504 * - \b clblasInvalidCommandQueue if the passed command queue is invalid; 1505 * - \b clblasInvalidContext if a context a passed command queue belongs 1506 * to was released; 1507 * - \b clblasInvalidOperation if kernel compilation relating to a previous 1508 * call has not completed for any of the target devices; 1509 * - \b clblasCompilerNotAvailable if a compiler is not available; 1510 * - \b clblasBuildProgramFailure if there is a failure to build a program 1511 * executable. 1512 * 1513 * @ingroup ROTG 1514 */ 1515 clblasStatus 1516 clblasSrotg( 1517 cl_mem SA, 1518 size_t offSA, 1519 cl_mem SB, 1520 size_t offSB, 1521 cl_mem C, 1522 size_t offC, 1523 cl_mem S, 1524 size_t offS, 1525 cl_uint numCommandQueues, 1526 cl_command_queue *commandQueues, 1527 cl_uint numEventsInWaitList, 1528 const cl_event *eventWaitList, 1529 cl_event *events); 1530 1531 /** 1532 * @example example_srotg.c 1533 * Example of how to use the @ref clblasSrotg function. 1534 */ 1535 1536 /** 1537 * @brief construct givens plane rotation on double elements 1538 * 1539 * @param[out] DA Buffer object that contains DA 1540 * @param[in] offDA Offset to DA in \b DA buffer object. 1541 * Counted in elements. 1542 * @param[out] DB Buffer object that contains DB 1543 * @param[in] offDB Offset to DB in \b DB buffer object. 1544 * Counted in elements. 1545 * @param[out] C Buffer object that contains C 1546 * @param[in] offC Offset to C in \b C buffer object. 1547 * Counted in elements. 1548 * @param[out] S Buffer object that contains S 1549 * @param[in] offS Offset to S in \b S buffer object. 1550 * Counted in elements. 1551 * @param[in] numCommandQueues Number of OpenCL command queues in which the 1552 * task is to be performed. 1553 * @param[in] commandQueues OpenCL command queues. 1554 * @param[in] numEventsInWaitList Number of events in the event wait list. 1555 * @param[in] eventWaitList Event wait list. 1556 * @param[in] events Event objects per each command queue that identify 1557 * a particular kernel execution instance. 1558 * 1559 * @return 1560 * - \b clblasSuccess on success; 1561 * - \b clblasInvalidDevice if a target device does not support the 1562 * floating point arithmetic with double precision; 1563 * - the same error codes as the clblasSrotg() function otherwise. 1564 * 1565 * @ingroup ROTG 1566 */ 1567 clblasStatus 1568 clblasDrotg( 1569 cl_mem DA, 1570 size_t offDA, 1571 cl_mem DB, 1572 size_t offDB, 1573 cl_mem C, 1574 size_t offC, 1575 cl_mem S, 1576 size_t offS, 1577 cl_uint numCommandQueues, 1578 cl_command_queue *commandQueues, 1579 cl_uint numEventsInWaitList, 1580 const cl_event *eventWaitList, 1581 cl_event *events); 1582 1583 /** 1584 * @brief construct givens plane rotation on float-complex elements 1585 * 1586 * @param[out] CA Buffer object that contains CA 1587 * @param[in] offCA Offset to CA in \b CA buffer object. 1588 * Counted in elements. 1589 * @param[out] CB Buffer object that contains CB 1590 * @param[in] offCB Offset to CB in \b CB buffer object. 1591 * Counted in elements. 1592 * @param[out] C Buffer object that contains C. C is real. 1593 * @param[in] offC Offset to C in \b C buffer object. 1594 * Counted in elements. 1595 * @param[out] S Buffer object that contains S 1596 * @param[in] offS Offset to S in \b S buffer object. 1597 * Counted in elements. 1598 * @param[in] numCommandQueues Number of OpenCL command queues in which the 1599 * task is to be performed. 1600 * @param[in] commandQueues OpenCL command queues. 1601 * @param[in] numEventsInWaitList Number of events in the event wait list. 1602 * @param[in] eventWaitList Event wait list. 1603 * @param[in] events Event objects per each command queue that identify 1604 * a particular kernel execution instance. 1605 * 1606 * @return 1607 * - \b clblasSuccess on success; 1608 * - the same error codes as the clblasSrotg() function otherwise. 1609 * 1610 * @ingroup ROTG 1611 */ 1612 clblasStatus 1613 clblasCrotg( 1614 cl_mem CA, 1615 size_t offCA, 1616 cl_mem CB, 1617 size_t offCB, 1618 cl_mem C, 1619 size_t offC, 1620 cl_mem S, 1621 size_t offS, 1622 cl_uint numCommandQueues, 1623 cl_command_queue *commandQueues, 1624 cl_uint numEventsInWaitList, 1625 const cl_event *eventWaitList, 1626 cl_event *events); 1627 1628 /** 1629 * @brief construct givens plane rotation on double-complex elements 1630 * 1631 * @param[out] CA Buffer object that contains CA 1632 * @param[in] offCA Offset to CA in \b CA buffer object. 1633 * Counted in elements. 1634 * @param[out] CB Buffer object that contains CB 1635 * @param[in] offCB Offset to CB in \b CB buffer object. 1636 * Counted in elements. 1637 * @param[out] C Buffer object that contains C. C is real. 1638 * @param[in] offC Offset to C in \b C buffer object. 1639 * Counted in elements. 1640 * @param[out] S Buffer object that contains S 1641 * @param[in] offS Offset to S in \b S buffer object. 1642 * Counted in elements. 1643 * @param[in] numCommandQueues Number of OpenCL command queues in which the 1644 * task is to be performed. 1645 * @param[in] commandQueues OpenCL command queues. 1646 * @param[in] numEventsInWaitList Number of events in the event wait list. 1647 * @param[in] eventWaitList Event wait list. 1648 * @param[in] events Event objects per each command queue that identify 1649 * a particular kernel execution instance. 1650 * 1651 * @return 1652 * - \b clblasSuccess on success; 1653 * - the same error codes as the clblasDrotg() function otherwise. 1654 * 1655 * @ingroup ROTG 1656 */ 1657 clblasStatus 1658 clblasZrotg( 1659 cl_mem CA, 1660 size_t offCA, 1661 cl_mem CB, 1662 size_t offCB, 1663 cl_mem C, 1664 size_t offC, 1665 cl_mem S, 1666 size_t offS, 1667 cl_uint numCommandQueues, 1668 cl_command_queue *commandQueues, 1669 cl_uint numEventsInWaitList, 1670 const cl_event *eventWaitList, 1671 cl_event *events); 1672 1673 /*@}*/ 1674 1675 /** 1676 * @defgroup ROTMG ROTMG - Constructs the modified givens rotation 1677 * @ingroup BLAS1 1678 */ 1679 /*@{*/ 1680 1681 /** 1682 * @brief construct the modified givens rotation on float elements 1683 * 1684 * @param[out] SD1 Buffer object that contains SD1 1685 * @param[in] offSD1 Offset to SD1 in \b SD1 buffer object. 1686 * Counted in elements. 1687 * @param[out] SD2 Buffer object that contains SD2 1688 * @param[in] offSD2 Offset to SD2 in \b SD2 buffer object. 1689 * Counted in elements. 1690 * @param[out] SX1 Buffer object that contains SX1 1691 * @param[in] offSX1 Offset to SX1 in \b SX1 buffer object. 1692 * Counted in elements. 1693 * @param[in] SY1 Buffer object that contains SY1 1694 * @param[in] offSY1 Offset to SY1 in \b SY1 buffer object. 1695 * Counted in elements. 1696 * @param[out] SPARAM Buffer object that contains SPARAM array of minimum length 5 1697 SPARAM(0) = SFLAG 1698 SPARAM(1) = SH11 1699 SPARAM(2) = SH21 1700 SPARAM(3) = SH12 1701 SPARAM(4) = SH22 1702 1703 * @param[in] offSparam Offset to SPARAM in \b SPARAM buffer object. 1704 * Counted in elements. 1705 * @param[in] numCommandQueues Number of OpenCL command queues in which the 1706 * task is to be performed. 1707 * @param[in] commandQueues OpenCL command queues. 1708 * @param[in] numEventsInWaitList Number of events in the event wait list. 1709 * @param[in] eventWaitList Event wait list. 1710 * @param[in] events Event objects per each command queue that identify 1711 * a particular kernel execution instance. 1712 * 1713 * @return 1714 * - \b clblasSuccess on success; 1715 * - \b clblasNotInitialized if clblasSetup() was not called; 1716 * - \b clblasInvalidMemObject if either \b SX1, \b SY1, \b SD1, \b SD2 or \b SPARAM object is 1717 * Invalid, or an image object rather than the buffer one; 1718 * - \b clblasOutOfHostMemory if the library can't allocate memory for 1719 * internal structures; 1720 * - \b clblasInvalidCommandQueue if the passed command queue is invalid; 1721 * - \b clblasInvalidContext if a context a passed command queue belongs 1722 * to was released; 1723 * - \b clblasInvalidOperation if kernel compilation relating to a previous 1724 * call has not completed for any of the target devices; 1725 * - \b clblasCompilerNotAvailable if a compiler is not available; 1726 * - \b clblasBuildProgramFailure if there is a failure to build a program 1727 * executable. 1728 * 1729 * @ingroup ROTMG 1730 */ 1731 clblasStatus 1732 clblasSrotmg( 1733 cl_mem SD1, 1734 size_t offSD1, 1735 cl_mem SD2, 1736 size_t offSD2, 1737 cl_mem SX1, 1738 size_t offSX1, 1739 const cl_mem SY1, 1740 size_t offSY1, 1741 cl_mem SPARAM, 1742 size_t offSparam, 1743 cl_uint numCommandQueues, 1744 cl_command_queue *commandQueues, 1745 cl_uint numEventsInWaitList, 1746 const cl_event *eventWaitList, 1747 cl_event *events); 1748 1749 /** 1750 * @example example_srotmg.c 1751 * Example of how to use the @ref clblasSrotmg function. 1752 */ 1753 1754 /** 1755 * @brief construct the modified givens rotation on double elements 1756 * 1757 * @param[out] DD1 Buffer object that contains DD1 1758 * @param[in] offDD1 Offset to DD1 in \b DD1 buffer object. 1759 * Counted in elements. 1760 * @param[out] DD2 Buffer object that contains DD2 1761 * @param[in] offDD2 Offset to DD2 in \b DD2 buffer object. 1762 * Counted in elements. 1763 * @param[out] DX1 Buffer object that contains DX1 1764 * @param[in] offDX1 Offset to DX1 in \b DX1 buffer object. 1765 * Counted in elements. 1766 * @param[in] DY1 Buffer object that contains DY1 1767 * @param[in] offDY1 Offset to DY1 in \b DY1 buffer object. 1768 * Counted in elements. 1769 * @param[out] DPARAM Buffer object that contains DPARAM array of minimum length 5 1770 DPARAM(0) = DFLAG 1771 DPARAM(1) = DH11 1772 DPARAM(2) = DH21 1773 DPARAM(3) = DH12 1774 DPARAM(4) = DH22 1775 1776 * @param[in] offDparam Offset to DPARAM in \b DPARAM buffer object. 1777 * Counted in elements. 1778 * @param[in] numCommandQueues Number of OpenCL command queues in which the 1779 * task is to be performed. 1780 * @param[in] commandQueues OpenCL command queues. 1781 * @param[in] numEventsInWaitList Number of events in the event wait list. 1782 * @param[in] eventWaitList Event wait list. 1783 * @param[in] events Event objects per each command queue that identify 1784 * a particular kernel execution instance. 1785 * 1786 * @return 1787 * - \b clblasSuccess on success; 1788 * - \b clblasInvalidDevice if a target device does not support the 1789 * floating point arithmetic with double precision; 1790 * - the same error codes as the clblasSrotmg() function otherwise. 1791 * 1792 * @ingroup ROTMG 1793 */ 1794 clblasStatus 1795 clblasDrotmg( 1796 cl_mem DD1, 1797 size_t offDD1, 1798 cl_mem DD2, 1799 size_t offDD2, 1800 cl_mem DX1, 1801 size_t offDX1, 1802 const cl_mem DY1, 1803 size_t offDY1, 1804 cl_mem DPARAM, 1805 size_t offDparam, 1806 cl_uint numCommandQueues, 1807 cl_command_queue *commandQueues, 1808 cl_uint numEventsInWaitList, 1809 const cl_event *eventWaitList, 1810 cl_event *events); 1811 1812 /*@}*/ 1813 1814 1815 /** 1816 * @defgroup ROT ROT - Apply givens rotation 1817 * @ingroup BLAS1 1818 */ 1819 /*@{*/ 1820 1821 /** 1822 * @brief applies a plane rotation for float elements 1823 * 1824 * @param[in] N Number of elements in vector \b X and \b Y. 1825 * @param[out] X Buffer object storing vector \b X. 1826 * @param[in] offx Offset of first element of vector \b X in buffer object. 1827 * Counted in elements. 1828 * @param[in] incx Increment for the elements of \b X. Must not be zero. 1829 * @param[out] Y Buffer object storing the vector \b Y. 1830 * @param[in] offy Offset of first element of vector \b Y in buffer object. 1831 * Counted in elements. 1832 * @param[in] incy Increment for the elements of \b Y. Must not be zero. 1833 * @param[in] C C specifies the cosine, cos. 1834 * @param[in] S S specifies the sine, sin. 1835 * @param[in] numCommandQueues Number of OpenCL command queues in which the 1836 * task is to be performed. 1837 * @param[in] commandQueues OpenCL command queues. 1838 * @param[in] numEventsInWaitList Number of events in the event wait list. 1839 * @param[in] eventWaitList Event wait list. 1840 * @param[in] events Event objects per each command queue that identify 1841 * a particular kernel execution instance. 1842 * 1843 * @return 1844 * - \b clblasSuccess on success; 1845 * - \b clblasNotInitialized if clblasSetup() was not called; 1846 * - \b clblasInvalidValue if invalid parameters are passed: 1847 * - \b N is zero, or 1848 * - either \b incx or \b incy is zero, or 1849 * - the vector sizes along with the increments lead to 1850 * accessing outside of any of the buffers; 1851 * - \b clblasInvalidMemObject if either \b X, or \b Y object is 1852 * Invalid, or an image object rather than the buffer one; 1853 * - \b clblasOutOfHostMemory if the library can't allocate memory for 1854 * internal structures; 1855 * - \b clblasInvalidCommandQueue if the passed command queue is invalid; 1856 * - \b clblasInvalidContext if a context a passed command queue belongs 1857 * to was released; 1858 * - \b clblasInvalidOperation if kernel compilation relating to a previous 1859 * call has not completed for any of the target devices; 1860 * - \b clblasCompilerNotAvailable if a compiler is not available; 1861 * - \b clblasBuildProgramFailure if there is a failure to build a program 1862 * executable. 1863 * 1864 * @ingroup ROT 1865 */ 1866 clblasStatus 1867 clblasSrot( 1868 size_t N, 1869 cl_mem X, 1870 size_t offx, 1871 int incx, 1872 cl_mem Y, 1873 size_t offy, 1874 int incy, 1875 cl_float C, 1876 cl_float S, 1877 cl_uint numCommandQueues, 1878 cl_command_queue *commandQueues, 1879 cl_uint numEventsInWaitList, 1880 const cl_event *eventWaitList, 1881 cl_event *events); 1882 1883 /** 1884 * @example example_srot.c 1885 * Example of how to use the @ref clblasSrot function. 1886 */ 1887 1888 /** 1889 * @brief applies a plane rotation for double elements 1890 * 1891 * @param[in] N Number of elements in vector \b X and \b Y. 1892 * @param[out] X Buffer object storing vector \b X. 1893 * @param[in] offx Offset of first element of vector \b X in buffer object. 1894 * Counted in elements. 1895 * @param[in] incx Increment for the elements of \b X. Must not be zero. 1896 * @param[out] Y Buffer object storing the vector \b Y. 1897 * @param[in] offy Offset of first element of vector \b Y in buffer object. 1898 * Counted in elements. 1899 * @param[in] incy Increment for the elements of \b Y. Must not be zero. 1900 * @param[in] C C specifies the cosine, cos. 1901 * @param[in] S S specifies the sine, sin. 1902 * @param[in] numCommandQueues Number of OpenCL command queues in which the 1903 * task is to be performed. 1904 * @param[in] commandQueues OpenCL command queues. 1905 * @param[in] numEventsInWaitList Number of events in the event wait list. 1906 * @param[in] eventWaitList Event wait list. 1907 * @param[in] events Event objects per each command queue that identify 1908 * a particular kernel execution instance. 1909 * 1910 * @return 1911 * - \b clblasSuccess on success; 1912 * - \b clblasInvalidDevice if a target device does not support the 1913 * floating point arithmetic with double precision; 1914 * - the same error codes as the clblasSrot() function otherwise. 1915 * 1916 * @ingroup ROT 1917 */ 1918 clblasStatus 1919 clblasDrot( 1920 size_t N, 1921 cl_mem X, 1922 size_t offx, 1923 int incx, 1924 cl_mem Y, 1925 size_t offy, 1926 int incy, 1927 cl_double C, 1928 cl_double S, 1929 cl_uint numCommandQueues, 1930 cl_command_queue *commandQueues, 1931 cl_uint numEventsInWaitList, 1932 const cl_event *eventWaitList, 1933 cl_event *events); 1934 1935 /** 1936 * @brief applies a plane rotation for float-complex elements 1937 * 1938 * @param[in] N Number of elements in vector \b X and \b Y. 1939 * @param[out] X Buffer object storing vector \b X. 1940 * @param[in] offx Offset of first element of vector \b X in buffer object. 1941 * Counted in elements. 1942 * @param[in] incx Increment for the elements of \b X. Must not be zero. 1943 * @param[out] Y Buffer object storing the vector \b Y. 1944 * @param[in] offy Offset of first element of vector \b Y in buffer object. 1945 * Counted in elements. 1946 * @param[in] incy Increment for the elements of \b Y. Must not be zero. 1947 * @param[in] C C specifies the cosine, cos. This number is real 1948 * @param[in] S S specifies the sine, sin. This number is real 1949 * @param[in] numCommandQueues Number of OpenCL command queues in which the 1950 * task is to be performed. 1951 * @param[in] commandQueues OpenCL command queues. 1952 * @param[in] numEventsInWaitList Number of events in the event wait list. 1953 * @param[in] eventWaitList Event wait list. 1954 * @param[in] events Event objects per each command queue that identify 1955 * a particular kernel execution instance. 1956 * 1957 * @return 1958 * - \b clblasSuccess on success; 1959 * - the same error codes as the clblasSrot() function otherwise. 1960 * 1961 * @ingroup ROT 1962 */ 1963 clblasStatus 1964 clblasCsrot( 1965 size_t N, 1966 cl_mem X, 1967 size_t offx, 1968 int incx, 1969 cl_mem Y, 1970 size_t offy, 1971 int incy, 1972 cl_float C, 1973 cl_float S, 1974 cl_uint numCommandQueues, 1975 cl_command_queue *commandQueues, 1976 cl_uint numEventsInWaitList, 1977 const cl_event *eventWaitList, 1978 cl_event *events); 1979 1980 /** 1981 * @brief applies a plane rotation for double-complex elements 1982 * 1983 * @param[in] N Number of elements in vector \b X and \b Y. 1984 * @param[out] X Buffer object storing vector \b X. 1985 * @param[in] offx Offset of first element of vector \b X in buffer object. 1986 * Counted in elements. 1987 * @param[in] incx Increment for the elements of \b X. Must not be zero. 1988 * @param[out] Y Buffer object storing the vector \b Y. 1989 * @param[in] offy Offset of first element of vector \b Y in buffer object. 1990 * Counted in elements. 1991 * @param[in] incy Increment for the elements of \b Y. Must not be zero. 1992 * @param[in] C C specifies the cosine, cos. This number is real 1993 * @param[in] S S specifies the sine, sin. This number is real 1994 * @param[in] numCommandQueues Number of OpenCL command queues in which the 1995 * task is to be performed. 1996 * @param[in] commandQueues OpenCL command queues. 1997 * @param[in] numEventsInWaitList Number of events in the event wait list. 1998 * @param[in] eventWaitList Event wait list. 1999 * @param[in] events Event objects per each command queue that identify 2000 * a particular kernel execution instance. 2001 * 2002 * @return 2003 * - \b clblasSuccess on success; 2004 * - \b clblasInvalidDevice if a target device does not support the 2005 * floating point arithmetic with double precision; 2006 * - the same error codes as the clblasSrot() function otherwise. 2007 * 2008 * @ingroup ROT 2009 */ 2010 clblasStatus 2011 clblasZdrot( 2012 size_t N, 2013 cl_mem X, 2014 size_t offx, 2015 int incx, 2016 cl_mem Y, 2017 size_t offy, 2018 int incy, 2019 cl_double C, 2020 cl_double S, 2021 cl_uint numCommandQueues, 2022 cl_command_queue *commandQueues, 2023 cl_uint numEventsInWaitList, 2024 const cl_event *eventWaitList, 2025 cl_event *events); 2026 2027 /*@}*/ 2028 2029 /** 2030 * @defgroup ROTM ROTM - Apply modified givens rotation for points in the plane 2031 * @ingroup BLAS1 2032 */ 2033 /*@{*/ 2034 2035 /** 2036 * @brief modified givens rotation for float elements 2037 * 2038 * @param[in] N Number of elements in vector \b X and \b Y. 2039 * @param[out] X Buffer object storing vector \b X. 2040 * @param[in] offx Offset of first element of vector \b X in buffer object. 2041 * Counted in elements. 2042 * @param[in] incx Increment for the elements of \b X. Must not be zero. 2043 * @param[out] Y Buffer object storing the vector \b Y. 2044 * @param[in] offy Offset of first element of vector \b Y in buffer object. 2045 * Counted in elements. 2046 * @param[in] incy Increment for the elements of \b Y. Must not be zero. 2047 * @param[in] SPARAM Buffer object that contains SPARAM array of minimum length 5 2048 * SPARAM(1)=SFLAG 2049 * SPARAM(2)=SH11 2050 * SPARAM(3)=SH21 2051 * SPARAM(4)=SH12 2052 * SPARAM(5)=SH22 2053 * @param[in] offSparam Offset of first element of array \b SPARAM in buffer object. 2054 * Counted in elements. 2055 * @param[in] numCommandQueues Number of OpenCL command queues in which the 2056 * task is to be performed. 2057 * @param[in] commandQueues OpenCL command queues. 2058 * @param[in] numEventsInWaitList Number of events in the event wait list. 2059 * @param[in] eventWaitList Event wait list. 2060 * @param[in] events Event objects per each command queue that identify 2061 * a particular kernel execution instance. 2062 * 2063 * @return 2064 * - \b clblasSuccess on success; 2065 * - \b clblasNotInitialized if clblasSetup() was not called; 2066 * - \b clblasInvalidValue if invalid parameters are passed: 2067 * - \b N is zero, or 2068 * - either \b incx or \b incy is zero, or 2069 * - the vector sizes along with the increments lead to 2070 * accessing outside of any of the buffers; 2071 * - \b clblasInvalidMemObject if either \b X, \b Y or \b SPARAM object is 2072 * Invalid, or an image object rather than the buffer one; 2073 * - \b clblasOutOfHostMemory if the library can't allocate memory for 2074 * internal structures; 2075 * - \b clblasInvalidCommandQueue if the passed command queue is invalid; 2076 * - \b clblasInvalidContext if a context a passed command queue belongs 2077 * to was released; 2078 * - \b clblasInvalidOperation if kernel compilation relating to a previous 2079 * call has not completed for any of the target devices; 2080 * - \b clblasCompilerNotAvailable if a compiler is not available; 2081 * - \b clblasBuildProgramFailure if there is a failure to build a program 2082 * executable. 2083 * 2084 * @ingroup ROTM 2085 */ 2086 clblasStatus 2087 clblasSrotm( 2088 size_t N, 2089 cl_mem X, 2090 size_t offx, 2091 int incx, 2092 cl_mem Y, 2093 size_t offy, 2094 int incy, 2095 const cl_mem SPARAM, 2096 size_t offSparam, 2097 cl_uint numCommandQueues, 2098 cl_command_queue *commandQueues, 2099 cl_uint numEventsInWaitList, 2100 const cl_event *eventWaitList, 2101 cl_event *events); 2102 2103 /** 2104 * @example example_srotm.c 2105 * Example of how to use the @ref clblasSrotm function. 2106 */ 2107 2108 /** 2109 * @brief modified givens rotation for double elements 2110 * 2111 * @param[in] N Number of elements in vector \b X and \b Y. 2112 * @param[out] X Buffer object storing vector \b X. 2113 * @param[in] offx Offset of first element of vector \b X in buffer object. 2114 * Counted in elements. 2115 * @param[in] incx Increment for the elements of \b X. Must not be zero. 2116 * @param[out] Y Buffer object storing the vector \b Y. 2117 * @param[in] offy Offset of first element of vector \b Y in buffer object. 2118 * Counted in elements. 2119 * @param[in] incy Increment for the elements of \b Y. Must not be zero. 2120 * @param[in] DPARAM Buffer object that contains SPARAM array of minimum length 5 2121 * DPARAM(1)=DFLAG 2122 * DPARAM(2)=DH11 2123 * DPARAM(3)=DH21 2124 * DPARAM(4)=DH12 2125 * DPARAM(5)=DH22 2126 * @param[in] offDparam Offset of first element of array \b DPARAM in buffer object. 2127 * Counted in elements. 2128 * @param[in] numCommandQueues Number of OpenCL command queues in which the 2129 * task is to be performed. 2130 * @param[in] commandQueues OpenCL command queues. 2131 * @param[in] numEventsInWaitList Number of events in the event wait list. 2132 * @param[in] eventWaitList Event wait list. 2133 * @param[in] events Event objects per each command queue that identify 2134 * a particular kernel execution instance. 2135 * 2136 * @return 2137 * - \b clblasSuccess on success; 2138 * - \b clblasInvalidDevice if a target device does not support the 2139 * floating point arithmetic with double precision; 2140 * - the same error codes as the clblasSrotm() function otherwise. 2141 * 2142 * @ingroup ROTM 2143 */ 2144 clblasStatus 2145 clblasDrotm( 2146 size_t N, 2147 cl_mem X, 2148 size_t offx, 2149 int incx, 2150 cl_mem Y, 2151 size_t offy, 2152 int incy, 2153 const cl_mem DPARAM, 2154 size_t offDparam, 2155 cl_uint numCommandQueues, 2156 cl_command_queue *commandQueues, 2157 cl_uint numEventsInWaitList, 2158 const cl_event *eventWaitList, 2159 cl_event *events); 2160 2161 /*@}*/ 2162 2163 /** 2164 * @defgroup NRM2 NRM2 - Euclidean norm of a vector 2165 * @ingroup BLAS1 2166 */ 2167 /*@{*/ 2168 2169 /** 2170 * @brief computes the euclidean norm of vector containing float elements 2171 * 2172 * NRM2 = sqrt( X' * X ) 2173 * 2174 * @param[in] N Number of elements in vector \b X. 2175 * @param[out] NRM2 Buffer object that will contain the NRM2 value 2176 * @param[in] offNRM2 Offset to NRM2 value in \b NRM2 buffer object. 2177 * Counted in elements. 2178 * @param[in] X Buffer object storing vector \b X. 2179 * @param[in] offx Offset of first element of vector \b X in buffer object. 2180 * Counted in elements. 2181 * @param[in] incx Increment for the elements of \b X. Must not be zero. 2182 * @param[in] scratchBuff Temporary cl_mem scratch buffer object that can hold minimum of (2*N) elements 2183 * @param[in] numCommandQueues Number of OpenCL command queues in which the 2184 * task is to be performed. 2185 * @param[in] commandQueues OpenCL command queues. 2186 * @param[in] numEventsInWaitList Number of events in the event wait list. 2187 * @param[in] eventWaitList Event wait list. 2188 * @param[in] events Event objects per each command queue that identify 2189 * a particular kernel execution instance. 2190 * 2191 * @return 2192 * - \b clblasSuccess on success; 2193 * - \b clblasNotInitialized if clblasSetup() was not called; 2194 * - \b clblasInvalidValue if invalid parameters are passed: 2195 * - \b N is zero, or 2196 * - either \b incx is zero, or 2197 * - the vector sizes along with the increments lead to 2198 * accessing outside of any of the buffers; 2199 * - \b clblasInvalidMemObject if any of \b X or \b NRM2 or \b scratchBuff object is 2200 * Invalid, or an image object rather than the buffer one; 2201 * - \b clblasOutOfHostMemory if the library can't allocate memory for 2202 * internal structures; 2203 * - \b clblasInvalidCommandQueue if the passed command queue is invalid; 2204 * - \b clblasInvalidContext if a context a passed command queue belongs 2205 * to was released; 2206 * - \b clblasInvalidOperation if kernel compilation relating to a previous 2207 * call has not completed for any of the target devices; 2208 * - \b clblasCompilerNotAvailable if a compiler is not available; 2209 * - \b clblasBuildProgramFailure if there is a failure to build a program 2210 * executable. 2211 * 2212 * @ingroup NRM2 2213 */ 2214 clblasStatus 2215 clblasSnrm2( 2216 size_t N, 2217 cl_mem NRM2, 2218 size_t offNRM2, 2219 const cl_mem X, 2220 size_t offx, 2221 int incx, 2222 cl_mem scratchBuff, 2223 cl_uint numCommandQueues, 2224 cl_command_queue *commandQueues, 2225 cl_uint numEventsInWaitList, 2226 const cl_event *eventWaitList, 2227 cl_event *events); 2228 2229 /** 2230 * @example example_snrm2.c 2231 * Example of how to use the @ref clblasSnrm2 function. 2232 */ 2233 2234 /** 2235 * @brief computes the euclidean norm of vector containing double elements 2236 * 2237 * NRM2 = sqrt( X' * X ) 2238 * 2239 * @param[in] N Number of elements in vector \b X. 2240 * @param[out] NRM2 Buffer object that will contain the NRM2 value 2241 * @param[in] offNRM2 Offset to NRM2 value in \b NRM2 buffer object. 2242 * Counted in elements. 2243 * @param[in] X Buffer object storing vector \b X. 2244 * @param[in] offx Offset of first element of vector \b X in buffer object. 2245 * Counted in elements. 2246 * @param[in] incx Increment for the elements of \b X. Must not be zero. 2247 * @param[in] scratchBuff Temporary cl_mem scratch buffer object that can hold minimum of (2*N) elements 2248 * @param[in] numCommandQueues Number of OpenCL command queues in which the 2249 * task is to be performed. 2250 * @param[in] commandQueues OpenCL command queues. 2251 * @param[in] numEventsInWaitList Number of events in the event wait list. 2252 * @param[in] eventWaitList Event wait list. 2253 * @param[in] events Event objects per each command queue that identify 2254 * a particular kernel execution instance. 2255 * 2256 * @return 2257 * - \b clblasSuccess on success; 2258 * - \b clblasInvalidDevice if a target device does not support the 2259 * floating point arithmetic with double precision; 2260 * - the same error codes as the clblasSnrm2() function otherwise. 2261 * 2262 * @ingroup NRM2 2263 */ 2264 clblasStatus 2265 clblasDnrm2( 2266 size_t N, 2267 cl_mem NRM2, 2268 size_t offNRM2, 2269 const cl_mem X, 2270 size_t offx, 2271 int incx, 2272 cl_mem scratchBuff, 2273 cl_uint numCommandQueues, 2274 cl_command_queue *commandQueues, 2275 cl_uint numEventsInWaitList, 2276 const cl_event *eventWaitList, 2277 cl_event *events); 2278 2279 /** 2280 * @brief computes the euclidean norm of vector containing float-complex elements 2281 * 2282 * NRM2 = sqrt( X**H * X ) 2283 * 2284 * @param[in] N Number of elements in vector \b X. 2285 * @param[out] NRM2 Buffer object that will contain the NRM2 value. 2286 * Note that the answer of Scnrm2 is a real value. 2287 * @param[in] offNRM2 Offset to NRM2 value in \b NRM2 buffer object. 2288 * Counted in elements. 2289 * @param[in] X Buffer object storing vector \b X. 2290 * @param[in] offx Offset of first element of vector \b X in buffer object. 2291 * Counted in elements. 2292 * @param[in] incx Increment for the elements of \b X. Must not be zero. 2293 * @param[in] scratchBuff Temporary cl_mem scratch buffer object that can hold minimum of (2*N) elements 2294 * @param[in] numCommandQueues Number of OpenCL command queues in which the 2295 * task is to be performed. 2296 * @param[in] commandQueues OpenCL command queues. 2297 * @param[in] numEventsInWaitList Number of events in the event wait list. 2298 * @param[in] eventWaitList Event wait list. 2299 * @param[in] events Event objects per each command queue that identify 2300 * a particular kernel execution instance. 2301 * 2302 * @return 2303 * - \b clblasSuccess on success; 2304 * - the same error codes as the clblasSnrm2() function otherwise. 2305 * 2306 * @ingroup NRM2 2307 */ 2308 clblasStatus 2309 clblasScnrm2( 2310 size_t N, 2311 cl_mem NRM2, 2312 size_t offNRM2, 2313 const cl_mem X, 2314 size_t offx, 2315 int incx, 2316 cl_mem scratchBuff, 2317 cl_uint numCommandQueues, 2318 cl_command_queue *commandQueues, 2319 cl_uint numEventsInWaitList, 2320 const cl_event *eventWaitList, 2321 cl_event *events); 2322 2323 /** 2324 * @brief computes the euclidean norm of vector containing double-complex elements 2325 * 2326 * NRM2 = sqrt( X**H * X ) 2327 * 2328 * @param[in] N Number of elements in vector \b X. 2329 * @param[out] NRM2 Buffer object that will contain the NRM2 value. 2330 * Note that the answer of Dznrm2 is a real value. 2331 * @param[in] offNRM2 Offset to NRM2 value in \b NRM2 buffer object. 2332 * Counted in elements. 2333 * @param[in] X Buffer object storing vector \b X. 2334 * @param[in] offx Offset of first element of vector \b X in buffer object. 2335 * Counted in elements. 2336 * @param[in] incx Increment for the elements of \b X. Must not be zero. 2337 * @param[in] scratchBuff Temporary cl_mem scratch buffer object that can hold minimum of (2*N) elements 2338 * @param[in] numCommandQueues Number of OpenCL command queues in which the 2339 * task is to be performed. 2340 * @param[in] commandQueues OpenCL command queues. 2341 * @param[in] numEventsInWaitList Number of events in the event wait list. 2342 * @param[in] eventWaitList Event wait list. 2343 * @param[in] events Event objects per each command queue that identify 2344 * a particular kernel execution instance. 2345 * 2346 * @return 2347 * - \b clblasSuccess on success; 2348 * - \b clblasInvalidDevice if a target device does not support the 2349 * floating point arithmetic with double precision; 2350 * - the same error codes as the clblasSnrm2() function otherwise. 2351 * executable. 2352 * 2353 * @ingroup NRM2 2354 */ 2355 clblasStatus 2356 clblasDznrm2( 2357 size_t N, 2358 cl_mem NRM2, 2359 size_t offNRM2, 2360 const cl_mem X, 2361 size_t offx, 2362 int incx, 2363 cl_mem scratchBuff, 2364 cl_uint numCommandQueues, 2365 cl_command_queue *commandQueues, 2366 cl_uint numEventsInWaitList, 2367 const cl_event *eventWaitList, 2368 cl_event *events); 2369 2370 /*@}*/ 2371 2372 /** 2373 * @defgroup iAMAX iAMAX - Index of max absolute value 2374 * @ingroup BLAS1 2375 */ 2376 /*@{*/ 2377 2378 /** 2379 * @brief index of max absolute value in a float array 2380 * 2381 * @param[in] N Number of elements in vector \b X. 2382 * @param[out] iMax Buffer object storing the index of first absolute max. 2383 * The index will be of type unsigned int 2384 * @param[in] offiMax Offset for storing index in the buffer iMax 2385 * Counted in elements. 2386 * @param[in] X Buffer object storing vector \b X. 2387 * @param[in] offx Offset of first element of vector \b X in buffer object. 2388 * Counted in elements. 2389 * @param[in] incx Increment for the elements of \b X. Must not be zero. 2390 * @param[in] scratchBuff Temprory cl_mem object to store intermediate results 2391 It should be able to hold minimum of (2*N) elements 2392 * @param[in] numCommandQueues Number of OpenCL command queues in which the 2393 * task is to be performed. 2394 * @param[in] commandQueues OpenCL command queues. 2395 * @param[in] numEventsInWaitList Number of events in the event wait list. 2396 * @param[in] eventWaitList Event wait list. 2397 * @param[in] events Event objects per each command queue that identify 2398 * a particular kernel execution instance. 2399 * 2400 * @return 2401 * - \b clblasSuccess on success; 2402 * - \b clblasNotInitialized if clblasSetup() was not called; 2403 * - \b clblasInvalidValue if invalid parameters are passed: 2404 * - \b N is zero, or 2405 * - either \b incx is zero, or 2406 * - the vector sizes along with the increments lead to 2407 * accessing outside of any of the buffers; 2408 * - \b clblasInvalidMemObject if any of \b iMax \b X or \b scratchBuff object is 2409 * Invalid, or an image object rather than the buffer one; 2410 * - \b clblasOutOfHostMemory if the library can't allocate memory for 2411 * internal structures; 2412 * - \b clblasInvalidCommandQueue if the passed command queue is invalid; 2413 * - \b clblasInvalidContext if the context, the passed command queue belongs 2414 * to was released; 2415 * - \b clblasInvalidOperation if kernel compilation relating to a previous 2416 * call has not completed for any of the target devices; 2417 * - \b clblasCompilerNotAvailable if a compiler is not available; 2418 * - \b clblasBuildProgramFailure if there is a failure to build a program 2419 * executable. 2420 * 2421 * @ingroup iAMAX 2422 */ 2423 clblasStatus 2424 clblasiSamax( 2425 size_t N, 2426 cl_mem iMax, 2427 size_t offiMax, 2428 const cl_mem X, 2429 size_t offx, 2430 int incx, 2431 cl_mem scratchBuff, 2432 cl_uint numCommandQueues, 2433 cl_command_queue *commandQueues, 2434 cl_uint numEventsInWaitList, 2435 const cl_event *eventWaitList, 2436 cl_event *events); 2437 /** 2438 * @example example_isamax.c 2439 * Example of how to use the @ref clblasiSamax function. 2440 */ 2441 2442 2443 /** 2444 * @brief index of max absolute value in a double array 2445 * 2446 * @param[in] N Number of elements in vector \b X. 2447 * @param[out] iMax Buffer object storing the index of first absolute max. 2448 * The index will be of type unsigned int 2449 * @param[in] offiMax Offset for storing index in the buffer iMax 2450 * Counted in elements. 2451 * @param[in] X Buffer object storing vector \b X. 2452 * @param[in] offx Offset of first element of vector \b X in buffer object. 2453 * Counted in elements. 2454 * @param[in] incx Increment for the elements of \b X. Must not be zero. 2455 * @param[in] scratchBuff Temprory cl_mem object to store intermediate results 2456 It should be able to hold minimum of (2*N) elements 2457 * @param[in] numCommandQueues Number of OpenCL command queues in which the 2458 * task is to be performed. 2459 * @param[in] commandQueues OpenCL command queues. 2460 * @param[in] numEventsInWaitList Number of events in the event wait list. 2461 * @param[in] eventWaitList Event wait list. 2462 * @param[in] events Event objects per each command queue that identify 2463 * a particular kernel execution instance. 2464 * 2465 * @return 2466 * - \b clblasSuccess on success; 2467 * - \b clblasInvalidDevice if a target device does not support the 2468 * floating point arithmetic with double precision; 2469 * - the same error codes as the clblasiSamax() function otherwise. 2470 * 2471 * @ingroup iAMAX 2472 */ 2473 clblasStatus 2474 clblasiDamax( 2475 size_t N, 2476 cl_mem iMax, 2477 size_t offiMax, 2478 const cl_mem X, 2479 size_t offx, 2480 int incx, 2481 cl_mem scratchBuff, 2482 cl_uint numCommandQueues, 2483 cl_command_queue *commandQueues, 2484 cl_uint numEventsInWaitList, 2485 const cl_event *eventWaitList, 2486 cl_event *events); 2487 2488 /** 2489 * @brief index of max absolute value in a complex float array 2490 * 2491 * @param[in] N Number of elements in vector \b X. 2492 * @param[out] iMax Buffer object storing the index of first absolute max. 2493 * The index will be of type unsigned int 2494 * @param[in] offiMax Offset for storing index in the buffer iMax 2495 * Counted in elements. 2496 * @param[in] X Buffer object storing vector \b X. 2497 * @param[in] offx Offset of first element of vector \b X in buffer object. 2498 * Counted in elements. 2499 * @param[in] incx Increment for the elements of \b X. Must not be zero. 2500 * @param[in] scratchBuff Temprory cl_mem object to store intermediate results 2501 It should be able to hold minimum of (2*N) elements 2502 * @param[in] numCommandQueues Number of OpenCL command queues in which the 2503 * task is to be performed. 2504 * @param[in] commandQueues OpenCL command queues. 2505 * @param[in] numEventsInWaitList Number of events in the event wait list. 2506 * @param[in] eventWaitList Event wait list. 2507 * @param[in] events Event objects per each command queue that identify 2508 * a particular kernel execution instance. 2509 * 2510 * @return 2511 * - \b clblasSuccess on success; 2512 * - the same error codes as the clblasiSamax() function otherwise. 2513 * 2514 * @ingroup iAMAX 2515 */ 2516 clblasStatus 2517 clblasiCamax( 2518 size_t N, 2519 cl_mem iMax, 2520 size_t offiMax, 2521 const cl_mem X, 2522 size_t offx, 2523 int incx, 2524 cl_mem scratchBuff, 2525 cl_uint numCommandQueues, 2526 cl_command_queue *commandQueues, 2527 cl_uint numEventsInWaitList, 2528 const cl_event *eventWaitList, 2529 cl_event *events); 2530 2531 /** 2532 * @brief index of max absolute value in a complex double array 2533 * 2534 * @param[in] N Number of elements in vector \b X. 2535 * @param[out] iMax Buffer object storing the index of first absolute max. 2536 * The index will be of type unsigned int 2537 * @param[in] offiMax Offset for storing index in the buffer iMax 2538 * Counted in elements. 2539 * @param[in] X Buffer object storing vector \b X. 2540 * @param[in] offx Offset of first element of vector \b X in buffer object. 2541 * Counted in elements. 2542 * @param[in] incx Increment for the elements of \b X. Must not be zero. 2543 * @param[in] scratchBuff Temprory cl_mem object to store intermediate results 2544 It should be able to hold minimum of (2*N) elements 2545 * @param[in] numCommandQueues Number of OpenCL command queues in which the 2546 * task is to be performed. 2547 * @param[in] commandQueues OpenCL command queues. 2548 * @param[in] numEventsInWaitList Number of events in the event wait list. 2549 * @param[in] eventWaitList Event wait list. 2550 * @param[in] events Event objects per each command queue that identify 2551 * a particular kernel execution instance. 2552 * 2553 * @return 2554 * @return 2555 * - \b clblasSuccess on success; 2556 * - \b clblasInvalidDevice if a target device does not support the 2557 * floating point arithmetic with double precision; 2558 * - the same error codes as the clblasiSamax() function otherwise. 2559 * 2560 * @ingroup iAMAX 2561 */ 2562 clblasStatus 2563 clblasiZamax( 2564 size_t N, 2565 cl_mem iMax, 2566 size_t offiMax, 2567 const cl_mem X, 2568 size_t offx, 2569 int incx, 2570 cl_mem scratchBuff, 2571 cl_uint numCommandQueues, 2572 cl_command_queue *commandQueues, 2573 cl_uint numEventsInWaitList, 2574 const cl_event *eventWaitList, 2575 cl_event *events); 2576 2577 /*@}*/ 2578 2579 /** 2580 * @defgroup ASUM ASUM - Sum of absolute values 2581 * @ingroup BLAS1 2582 */ 2583 /*@{*/ 2584 2585 /** 2586 * @brief absolute sum of values of a vector containing float elements 2587 * 2588 * @param[in] N Number of elements in vector \b X. 2589 * @param[out] asum Buffer object that will contain the absoule sum value 2590 * @param[in] offAsum Offset to absolute sum in \b asum buffer object. 2591 * Counted in elements. 2592 * @param[in] X Buffer object storing vector \b X. 2593 * @param[in] offx Offset of first element of vector \b X in buffer object. 2594 * Counted in elements. 2595 * @param[in] incx Increment for the elements of \b X. Must not be zero. 2596 * @param[in] scratchBuff Temporary cl_mem scratch buffer object of minimum size N 2597 * @param[in] numCommandQueues Number of OpenCL command queues in which the 2598 * task is to be performed. 2599 * @param[in] commandQueues OpenCL command queues. 2600 * @param[in] numEventsInWaitList Number of events in the event wait list. 2601 * @param[in] eventWaitList Event wait list. 2602 * @param[in] events Event objects per each command queue that identify 2603 * a particular kernel execution instance. 2604 * 2605 * @return 2606 * - \b clblasSuccess on success; 2607 * - \b clblasNotInitialized if clblasSetup() was not called; 2608 * - \b clblasInvalidValue if invalid parameters are passed: 2609 * - \b N is zero, or 2610 * - either \b incx is zero, or 2611 * - the vector sizes along with the increments lead to 2612 * accessing outside of any of the buffers; 2613 * - \b clblasInvalidMemObject if any of \b X or \b asum or \b scratchBuff object is 2614 * Invalid, or an image object rather than the buffer one; 2615 * - \b clblasOutOfHostMemory if the library can't allocate memory for 2616 * internal structures; 2617 * - \b clblasInvalidCommandQueue if the passed command queue is invalid; 2618 * - \b clblasInvalidContext if a context a passed command queue belongs 2619 * to was released; 2620 * - \b clblasInvalidOperation if kernel compilation relating to a previous 2621 * call has not completed for any of the target devices; 2622 * - \b clblasCompilerNotAvailable if a compiler is not available; 2623 * - \b clblasBuildProgramFailure if there is a failure to build a program 2624 * executable. 2625 * 2626 * @ingroup ASUM 2627 */ 2628 2629 clblasStatus 2630 clblasSasum( 2631 size_t N, 2632 cl_mem asum, 2633 size_t offAsum, 2634 const cl_mem X, 2635 size_t offx, 2636 int incx, 2637 cl_mem scratchBuff, 2638 cl_uint numCommandQueues, 2639 cl_command_queue *commandQueues, 2640 cl_uint numEventsInWaitList, 2641 const cl_event *eventWaitList, 2642 cl_event *events); 2643 2644 /** 2645 * @example example_sasum.c 2646 * Example of how to use the @ref clblasSasum function. 2647 */ 2648 2649 /** 2650 * @brief absolute sum of values of a vector containing double elements 2651 * 2652 * @param[in] N Number of elements in vector \b X. 2653 * @param[out] asum Buffer object that will contain the absoulte sum value 2654 * @param[in] offAsum Offset to absoule sum in \b asum buffer object. 2655 * Counted in elements. 2656 * @param[in] X Buffer object storing vector \b X. 2657 * @param[in] offx Offset of first element of vector \b X in buffer object. 2658 * Counted in elements. 2659 * @param[in] incx Increment for the elements of \b X. Must not be zero. 2660 * @param[in] scratchBuff Temporary cl_mem scratch buffer object of minimum size N 2661 * @param[in] numCommandQueues Number of OpenCL command queues in which the 2662 * task is to be performed. 2663 * @param[in] commandQueues OpenCL command queues. 2664 * @param[in] numEventsInWaitList Number of events in the event wait list. 2665 * @param[in] eventWaitList Event wait list. 2666 * @param[in] events Event objects per each command queue that identify 2667 * a particular kernel execution instance. 2668 * 2669 * @return 2670 * - \b clblasSuccess on success; 2671 * - \b clblasInvalidDevice if a target device does not support the 2672 * floating point arithmetic with double precision; 2673 * - the same error codes as the clblasSasum() function otherwise. 2674 * 2675 * @ingroup ASUM 2676 */ 2677 2678 clblasStatus 2679 clblasDasum( 2680 size_t N, 2681 cl_mem asum, 2682 size_t offAsum, 2683 const cl_mem X, 2684 size_t offx, 2685 int incx, 2686 cl_mem scratchBuff, 2687 cl_uint numCommandQueues, 2688 cl_command_queue *commandQueues, 2689 cl_uint numEventsInWaitList, 2690 const cl_event *eventWaitList, 2691 cl_event *events); 2692 2693 2694 /** 2695 * @brief absolute sum of values of a vector containing float-complex elements 2696 * 2697 * @param[in] N Number of elements in vector \b X. 2698 * @param[out] asum Buffer object that will contain the absolute sum value 2699 * @param[in] offAsum Offset to absolute sum in \b asum buffer object. 2700 * Counted in elements. 2701 * @param[in] X Buffer object storing vector \b X. 2702 * @param[in] offx Offset of first element of vector \b X in buffer object. 2703 * Counted in elements. 2704 * @param[in] incx Increment for the elements of \b X. Must not be zero. 2705 * @param[in] scratchBuff Temporary cl_mem scratch buffer object of minimum size N 2706 * @param[in] numCommandQueues Number of OpenCL command queues in which the 2707 * task is to be performed. 2708 * @param[in] commandQueues OpenCL command queues. 2709 * @param[in] numEventsInWaitList Number of events in the event wait list. 2710 * @param[in] eventWaitList Event wait list. 2711 * @param[in] events Event objects per each command queue that identify 2712 * a particular kernel execution instance. 2713 * 2714 * @return 2715 * - \b clblasSuccess on success; 2716 * - the same error codes as the clblasSasum() function otherwise. 2717 * 2718 * @ingroup ASUM 2719 */ 2720 2721 clblasStatus 2722 clblasScasum( 2723 size_t N, 2724 cl_mem asum, 2725 size_t offAsum, 2726 const cl_mem X, 2727 size_t offx, 2728 int incx, 2729 cl_mem scratchBuff, 2730 cl_uint numCommandQueues, 2731 cl_command_queue *commandQueues, 2732 cl_uint numEventsInWaitList, 2733 const cl_event *eventWaitList, 2734 cl_event *events); 2735 2736 2737 /** 2738 * @brief absolute sum of values of a vector containing double-complex elements 2739 * 2740 * @param[in] N Number of elements in vector \b X. 2741 * @param[out] asum Buffer object that will contain the absolute sum value 2742 * @param[in] offAsum Offset to absolute sum in \b asum buffer object. 2743 * Counted in elements. 2744 * @param[in] X Buffer object storing vector \b X. 2745 * @param[in] offx Offset of first element of vector \b X in buffer object. 2746 * Counted in elements. 2747 * @param[in] incx Increment for the elements of \b X. Must not be zero. 2748 * @param[in] scratchBuff Temporary cl_mem scratch buffer object of minimum size N 2749 * @param[in] numCommandQueues Number of OpenCL command queues in which the 2750 * task is to be performed. 2751 * @param[in] commandQueues OpenCL command queues. 2752 * @param[in] numEventsInWaitList Number of events in the event wait list. 2753 * @param[in] eventWaitList Event wait list. 2754 * @param[in] events Event objects per each command queue that identify 2755 * a particular kernel execution instance. 2756 * 2757 * @return 2758 * - \b clblasSuccess on success; 2759 * - \b clblasInvalidDevice if a target device does not support the 2760 * floating point arithmetic with double precision; 2761 * - the same error codes as the clblasSasum() function otherwise. 2762 * 2763 * @ingroup ASUM 2764 */ 2765 2766 clblasStatus 2767 clblasDzasum( 2768 size_t N, 2769 cl_mem asum, 2770 size_t offAsum, 2771 const cl_mem X, 2772 size_t offx, 2773 int incx, 2774 cl_mem scratchBuff, 2775 cl_uint numCommandQueues, 2776 cl_command_queue *commandQueues, 2777 cl_uint numEventsInWaitList, 2778 const cl_event *eventWaitList, 2779 cl_event *events); 2780 2781 /*@}*/ 2782 2783 /** 2784 * @defgroup BLAS2 BLAS-2 functions 2785 * 2786 * The Level 2 Basic Linear Algebra Subprograms are functions that perform 2787 * matrix-vector operations. 2788 */ 2789 /*@{*/ 2790 /*@}*/ 2791 2792 2793 /** 2794 * @defgroup GEMV GEMV - General matrix-Vector multiplication 2795 * @ingroup BLAS2 2796 */ 2797 /*@{*/ 2798 2799 /** 2800 * @brief Matrix-vector product with a general rectangular matrix and 2801 * float elements. Extended version. 2802 * 2803 * Matrix-vector products: 2804 * - \f$ y \leftarrow \alpha A x + \beta y \f$ 2805 * - \f$ y \leftarrow \alpha A^T x + \beta y \f$ 2806 * 2807 * @param[in] order Row/column order. 2808 * @param[in] transA How matrix \b A is to be transposed. 2809 * @param[in] M Number of rows in matrix \b A. 2810 * @param[in] N Number of columns in matrix \b A. 2811 * @param[in] alpha The factor of matrix \b A. 2812 * @param[in] A Buffer object storing matrix \b A. 2813 * @param[in] offA Offset of the first element of the matrix \b A in 2814 * the buffer object. Counted in elements. 2815 * @param[in] lda Leading dimension of matrix \b A. It cannot be less 2816 * than \b N when the \b order parameter is set to 2817 * \b clblasRowMajor,\n or less than \b M when the 2818 * parameter is set to \b clblasColumnMajor. 2819 * @param[in] x Buffer object storing vector \b x. 2820 * @param[in] offx Offset of first element of vector \b x in buffer object. 2821 * Counted in elements. 2822 * @param[in] incx Increment for the elements of \b x. It cannot be zero. 2823 * @param[in] beta The factor of the vector \b y. 2824 * @param[out] y Buffer object storing the vector \b y. 2825 * @param[in] offy Offset of first element of vector \b y in buffer object. 2826 * Counted in elements. 2827 * @param[in] incy Increment for the elements of \b y. It cannot be zero. 2828 * @param[in] numCommandQueues Number of OpenCL command queues in which the 2829 * task is to be performed. 2830 * @param[in] commandQueues OpenCL command queues. 2831 * @param[in] numEventsInWaitList Number of events in the event wait list. 2832 * @param[in] eventWaitList Event wait list. 2833 * @param[in] events Event objects per each command queue that identify 2834 * a particular kernel execution instance. 2835 * 2836 * @return 2837 * - \b clblasSuccess on success; 2838 * - \b clblasInvalidValue if \b offA exceeds the size of \b A buffer 2839 * object; 2840 * - the same error codes as the clblasSgemv() function otherwise. 2841 * 2842 * @ingroup GEMV 2843 */ 2844 clblasStatus 2845 clblasSgemv( 2846 clblasOrder order, 2847 clblasTranspose transA, 2848 size_t M, 2849 size_t N, 2850 cl_float alpha, 2851 const cl_mem A, 2852 size_t offA, 2853 size_t lda, 2854 const cl_mem x, 2855 size_t offx, 2856 int incx, 2857 cl_float beta, 2858 cl_mem y, 2859 size_t offy, 2860 int incy, 2861 cl_uint numCommandQueues, 2862 cl_command_queue *commandQueues, 2863 cl_uint numEventsInWaitList, 2864 const cl_event *eventWaitList, 2865 cl_event *events); 2866 2867 /** 2868 * @example example_sgemv.c 2869 * This is an example of how to use the @ref clblasSgemvEx function. 2870 */ 2871 2872 /** 2873 * @brief Matrix-vector product with a general rectangular matrix and 2874 * double elements. Extended version. 2875 * 2876 * Matrix-vector products: 2877 * - \f$ y \leftarrow \alpha A x + \beta y \f$ 2878 * - \f$ y \leftarrow \alpha A^T x + \beta y \f$ 2879 * 2880 * @param[in] order Row/column order. 2881 * @param[in] transA How matrix \b A is to be transposed. 2882 * @param[in] M Number of rows in matrix \b A. 2883 * @param[in] N Number of columns in matrix \b A. 2884 * @param[in] alpha The factor of matrix \b A. 2885 * @param[in] A Buffer object storing matrix \b A. 2886 * @param[in] offA Offset of the first element of \b A in the buffer 2887 * object. Counted in elements. 2888 * @param[in] lda Leading dimension of matrix \b A. For a detailed description, 2889 * see clblasSgemv(). 2890 * @param[in] x Buffer object storing vector \b x. 2891 * @param[in] offx Offset of first element of vector \b x in buffer object. 2892 * Counted in elements. 2893 * @param[in] incx Increment for the elements of \b x. It cannot be zero. 2894 * @param[in] beta The factor of the vector \b y. 2895 * @param[out] y Buffer object storing the vector \b y. 2896 * @param[in] offy Offset of first element of vector \b y in buffer object. 2897 * Counted in elements. 2898 * @param[in] incy Increment for the elements of \b y. It cannot be zero. 2899 * @param[in] numCommandQueues Number of OpenCL command queues in which the 2900 * task is to be performed. 2901 * @param[in] commandQueues OpenCL command queues. 2902 * @param[in] numEventsInWaitList Number of events in the event wait list. 2903 * @param[in] eventWaitList Event wait list. 2904 * @param[in] events Event objects per each command queue that identify 2905 * a particular kernel execution instance. 2906 * 2907 * @return 2908 * - \b clblasSuccess on success; 2909 * - \b clblasInvalidDevice if a target device does not support the 2910 * floating point arithmetic with double precision; 2911 * - \b clblasInvalidValue if \b offA exceeds the size of \b A buffer 2912 * object; 2913 * - the same error codes as the clblasSgemv() function otherwise. 2914 * 2915 * @ingroup GEMV 2916 */ 2917 clblasStatus 2918 clblasDgemv( 2919 clblasOrder order, 2920 clblasTranspose transA, 2921 size_t M, 2922 size_t N, 2923 cl_double alpha, 2924 const cl_mem A, 2925 size_t offA, 2926 size_t lda, 2927 const cl_mem x, 2928 size_t offx, 2929 int incx, 2930 cl_double beta, 2931 cl_mem y, 2932 size_t offy, 2933 int incy, 2934 cl_uint numCommandQueues, 2935 cl_command_queue *commandQueues, 2936 cl_uint numEventsInWaitList, 2937 const cl_event *eventWaitList, 2938 cl_event *events); 2939 2940 /** 2941 * @brief Matrix-vector product with a general rectangular matrix and 2942 * float complex elements. Extended version. 2943 * 2944 * Matrix-vector products: 2945 * - \f$ y \leftarrow \alpha A x + \beta y \f$ 2946 * - \f$ y \leftarrow \alpha A^T x + \beta y \f$ 2947 * 2948 * @param[in] order Row/column order. 2949 * @param[in] transA How matrix \b A is to be transposed. 2950 * @param[in] M Number of rows in matrix \b A. 2951 * @param[in] N Number of columns in matrix \b A. 2952 * @param[in] alpha The factor of matrix \b A. 2953 * @param[in] A Buffer object storing matrix \b A. 2954 * @param[in] offA Offset of the first element of the matrix \b A in 2955 * the buffer object. Counted in elements 2956 * @param[in] lda Leading dimension of matrix \b A. For a detailed description, 2957 * see clblasSgemv(). 2958 * @param[in] x Buffer object storing vector \b x. 2959 * @param[in] offx Offset of first element of vector \b x in buffer object. 2960 * Counted in elements. 2961 * @param[in] incx Increment for the elements of \b x. It cannot be zero. 2962 * @param[in] beta The factor of the vector \b y. 2963 * @param[out] y Buffer object storing the vector \b y. 2964 * @param[in] offy Offset of first element of vector \b y in buffer object. 2965 * Counted in elements. 2966 * @param[in] incy Increment for the elements of \b y. It cannot be zero. 2967 * @param[in] numCommandQueues Number of OpenCL command queues in which the 2968 * task is to be performed. 2969 * @param[in] commandQueues OpenCL command queues. 2970 * @param[in] numEventsInWaitList Number of events in the event wait list. 2971 * @param[in] eventWaitList Event wait list. 2972 * @param[in] events Event objects per each command queue that identify 2973 * a particular kernel execution instance. 2974 * 2975 * @return 2976 * - \b clblasSuccess on success; 2977 * - \b clblasInvalidValue if \b offA exceeds the size of \b A buffer 2978 * object; 2979 * - the same error codes as the clblasSgemv() function otherwise. 2980 * 2981 * @ingroup GEMV 2982 */ 2983 clblasStatus 2984 clblasCgemv( 2985 clblasOrder order, 2986 clblasTranspose transA, 2987 size_t M, 2988 size_t N, 2989 FloatComplex alpha, 2990 const cl_mem A, 2991 size_t offA, 2992 size_t lda, 2993 const cl_mem x, 2994 size_t offx, 2995 int incx, 2996 FloatComplex beta, 2997 cl_mem y, 2998 size_t offy, 2999 int incy, 3000 cl_uint numCommandQueues, 3001 cl_command_queue *commandQueues, 3002 cl_uint numEventsInWaitList, 3003 const cl_event *eventWaitList, 3004 cl_event *events); 3005 3006 /** 3007 * @brief Matrix-vector product with a general rectangular matrix and 3008 * double complex elements. Extended version. 3009 * 3010 * Matrix-vector products: 3011 * - \f$ y \leftarrow \alpha A x + \beta y \f$ 3012 * - \f$ y \leftarrow \alpha A^T x + \beta y \f$ 3013 * 3014 * @param[in] order Row/column order. 3015 * @param[in] transA How matrix \b A is to be transposed. 3016 * @param[in] M Number of rows in matrix \b A. 3017 * @param[in] N Number of columns in matrix \b A. 3018 * @param[in] alpha The factor of matrix \b A. 3019 * @param[in] A Buffer object storing matrix \b A. 3020 * @param[in] offA Offset of the first element of the matrix \b A in 3021 * the buffer object. Counted in elements. 3022 * @param[in] lda Leading dimension of matrix \b A. For a detailed description, 3023 * see clblasSgemv(). 3024 * @param[in] x Buffer object storing vector \b x. 3025 * @param[in] offx Offset of first element of vector \b x in buffer object. 3026 * Counted in elements. 3027 * @param[in] incx Increment for the elements of \b x. It cannot be zero. 3028 * @param[in] beta The factor of the vector \b y. 3029 * @param[out] y Buffer object storing the vector \b y. 3030 * @param[in] offy Offset of first element of vector \b y in buffer object. 3031 * Counted in elements. 3032 * @param[in] incy Increment for the elements of \b y. It cannot be zero. 3033 * @param[in] numCommandQueues Number of OpenCL command queues in which the 3034 * task is to be performed. 3035 * @param[in] commandQueues OpenCL command queues. 3036 * @param[in] numEventsInWaitList Number of events in the event wait list. 3037 * @param[in] eventWaitList Event wait list. 3038 * @param[in] events Event objects per each command queue that identify 3039 * a particular kernel execution instance. 3040 * 3041 * @return 3042 * - \b clblasSuccess on success; 3043 * - \b clblasInvalidDevice if a target device does not support the 3044 * floating point arithmetic with double precision; 3045 * - \b clblasInvalidValue if \b offA exceeds the size of \b A buffer 3046 * object; 3047 * - the same error codes as the clblasSgemv() function otherwise. 3048 * 3049 * @ingroup GEMV 3050 */ 3051 clblasStatus 3052 clblasZgemv( 3053 clblasOrder order, 3054 clblasTranspose transA, 3055 size_t M, 3056 size_t N, 3057 DoubleComplex alpha, 3058 const cl_mem A, 3059 size_t offA, 3060 size_t lda, 3061 const cl_mem x, 3062 size_t offx, 3063 int incx, 3064 DoubleComplex beta, 3065 cl_mem y, 3066 size_t offy, 3067 int incy, 3068 cl_uint numCommandQueues, 3069 cl_command_queue *commandQueues, 3070 cl_uint numEventsInWaitList, 3071 const cl_event *eventWaitList, 3072 cl_event *events); 3073 3074 /*@}*/ 3075 3076 /** 3077 * @defgroup SYMV SYMV - Symmetric matrix-Vector multiplication 3078 * @ingroup BLAS2 3079 */ 3080 3081 /*@{*/ 3082 3083 /** 3084 * @brief Matrix-vector product with a symmetric matrix and float elements. 3085 * 3086 * 3087 * Matrix-vector products: 3088 * - \f$ y \leftarrow \alpha A x + \beta y \f$ 3089 * 3090 * @param[in] order Row/columns order. 3091 * @param[in] uplo The triangle in matrix being referenced. 3092 * @param[in] N Number of rows and columns in matrix \b A. 3093 * @param[in] alpha The factor of matrix \b A. 3094 * @param[in] A Buffer object storing matrix \b A. 3095 * @param[in] offA Offset of the first element of the matrix \b A in 3096 * the buffer object. Counted in elements. 3097 * @param[in] lda Leading dimension of matrix \b A. It cannot less 3098 * than \b N. 3099 * @param[in] x Buffer object storing vector \b x. 3100 * @param[in] offx Offset of first element of vector \b x in buffer object. 3101 * Counted in elements. 3102 * @param[in] incx Increment for the elements of vector \b x. It cannot be zero. 3103 * @param[in] beta The factor of vector \b y. 3104 * @param[out] y Buffer object storing vector \b y. 3105 * @param[in] offy Offset of first element of vector \b y in buffer object. 3106 * Counted in elements. 3107 * @param[in] incy Increment for the elements of vector \b y. It cannot be zero. 3108 * @param[in] numCommandQueues Number of OpenCL command queues in which the 3109 * task is to be performed. 3110 * @param[in] commandQueues OpenCL command queues. 3111 * @param[in] numEventsInWaitList Number of events in the event wait list. 3112 * @param[in] eventWaitList Event wait list. 3113 * @param[in] events Event objects per each command queue that identify 3114 * a particular kernel execution instance. 3115 * 3116 * @return 3117 * - \b clblasSuccess on success; 3118 * - \b clblasInvalidValue if \b offA exceeds the size of \b A buffer 3119 * object; 3120 * - the same error codes as the clblasSgemv() function otherwise. 3121 * 3122 * @ingroup SYMV 3123 */ 3124 clblasStatus 3125 clblasSsymv( 3126 clblasOrder order, 3127 clblasUplo uplo, 3128 size_t N, 3129 cl_float alpha, 3130 const cl_mem A, 3131 size_t offA, 3132 size_t lda, 3133 const cl_mem x, 3134 size_t offx, 3135 int incx, 3136 cl_float beta, 3137 cl_mem y, 3138 size_t offy, 3139 int incy, 3140 cl_uint numCommandQueues, 3141 cl_command_queue *commandQueues, 3142 cl_uint numEventsInWaitList, 3143 const cl_event *eventWaitList, 3144 cl_event *events); 3145 3146 /** 3147 * @example example_ssymv.c 3148 * This is an example of how to use the @ref clblasSsymv function. 3149 */ 3150 3151 /** 3152 * @brief Matrix-vector product with a symmetric matrix and double elements. 3153 * 3154 * 3155 * Matrix-vector products: 3156 * - \f$ y \leftarrow \alpha A x + \beta y \f$ 3157 * 3158 * @param[in] order Row/columns order. 3159 * @param[in] uplo The triangle in matrix being referenced. 3160 * @param[in] N Number of rows and columns in matrix \b A. 3161 * @param[in] alpha The factor of matrix \b A. 3162 * @param[in] A Buffer object storing matrix \b A. 3163 * @param[in] offA Offset of the first element of the matrix \b A in 3164 * the buffer object. Counted in elements. 3165 * @param[in] lda Leading dimension of matrix \b A. It cannot less 3166 * than \b N. 3167 * @param[in] x Buffer object storing vector \b x. 3168 * @param[in] offx Offset of first element of vector \b x in buffer object. 3169 * Counted in elements. 3170 * @param[in] incx Increment for the elements of vector \b x. It cannot be zero. 3171 * @param[in] beta The factor of vector \b y. 3172 * @param[out] y Buffer object storing vector \b y. 3173 * @param[in] offy Offset of first element of vector \b y in buffer object. 3174 * Counted in elements. 3175 * @param[in] incy Increment for the elements of vector \b y. It cannot be zero. 3176 * @param[in] numCommandQueues Number of OpenCL command queues in which the 3177 * task is to be performed. 3178 * @param[in] commandQueues OpenCL command queues. 3179 * @param[in] numEventsInWaitList Number of events in the event wait list. 3180 * @param[in] eventWaitList Event wait list. 3181 * @param[in] events Event objects per each command queue that identify 3182 * a particular kernel execution instance. 3183 * 3184 * @return 3185 * - \b clblasSuccess on success; 3186 * - \b clblasInvalidDevice if a target device does not support floating 3187 * point arithmetic with double precision; 3188 * - \b clblasInvalidValue if \b offA exceeds the size of \b A buffer 3189 * object; 3190 * - the same error codes as the clblasSsymv() function otherwise. 3191 * 3192 * @ingroup SYMV 3193 */ 3194 clblasStatus 3195 clblasDsymv( 3196 clblasOrder order, 3197 clblasUplo uplo, 3198 size_t N, 3199 cl_double alpha, 3200 const cl_mem A, 3201 size_t offA, 3202 size_t lda, 3203 const cl_mem x, 3204 size_t offx, 3205 int incx, 3206 cl_double beta, 3207 cl_mem y, 3208 size_t offy, 3209 int incy, 3210 cl_uint numCommandQueues, 3211 cl_command_queue *commandQueues, 3212 cl_uint numEventsInWaitList, 3213 const cl_event *eventWaitList, 3214 cl_event *events); 3215 3216 /*@}*/ 3217 3218 3219 /** 3220 * @defgroup HEMV HEMV - Hermitian matrix-vector multiplication 3221 * @ingroup BLAS2 3222 */ 3223 /*@{*/ 3224 3225 /** 3226 * @brief Matrix-vector product with a hermitian matrix and float-complex elements. 3227 * 3228 * Matrix-vector products: 3229 * - \f$ Y \leftarrow \alpha A X + \beta Y \f$ 3230 * 3231 * @param[in] order Row/columns order. 3232 * @param[in] uplo The triangle in matrix being referenced. 3233 * @param[in] N Number of rows and columns in matrix \b A. 3234 * @param[in] alpha The factor of matrix \b A. 3235 * @param[in] A Buffer object storing matrix \b A. 3236 * @param[in] offa Offset in number of elements for first element in matrix \b A. 3237 * @param[in] lda Leading dimension of matrix \b A. It cannot less 3238 * than \b N. 3239 * @param[in] X Buffer object storing vector \b X. 3240 * @param[in] offx Offset of first element of vector \b X in buffer object. 3241 * Counted in elements. 3242 * @param[in] incx Increment for the elements of vector \b X. It cannot be zero. 3243 * @param[in] beta The factor of vector \b Y. 3244 * @param[out] Y Buffer object storing vector \b Y. 3245 * @param[in] offy Offset of first element of vector \b Y in buffer object. 3246 * Counted in elements. 3247 * @param[in] incy Increment for the elements of vector \b Y. It cannot be zero. 3248 * @param[in] numCommandQueues Number of OpenCL command queues in which the 3249 * task is to be performed. 3250 * @param[in] commandQueues OpenCL command queues. 3251 * @param[in] numEventsInWaitList Number of events in the event wait list. 3252 * @param[in] eventWaitList Event wait list. 3253 * @param[in] events Event objects per each command queue that identify 3254 * a particular kernel execution instance. 3255 * 3256 * @return 3257 * - \b clblasSuccess on success; 3258 * - \b clblasNotInitialized if clblasSetup() was not called; 3259 * - \b clblasInvalidValue if invalid parameters are passed: 3260 * - \b N is zero, or 3261 * - either \b incx or \b incy is zero, or 3262 * - any of the leading dimensions is invalid; 3263 * - the matrix sizes or the vector sizes along with the increments lead to 3264 * accessing outsize of any of the buffers; 3265 * - \b clblasInvalidMemObject if either \b A, \b X, or \b Y object is 3266 * invalid, or an image object rather than the buffer one; 3267 * - \b clblasOutOfHostMemory if the library can't allocate memory for 3268 * internal structures; 3269 * - \b clblasInvalidCommandQueue if the passed command queue is invalid; 3270 * - \b clblasInvalidContext if a context a passed command queue belongs to 3271 * was released; 3272 * - \b clblasInvalidOperation if kernel compilation relating to a previous 3273 * call has not completed for any of the target devices; 3274 * - \b clblasCompilerNotAvailable if a compiler is not available; 3275 * - \b clblasBuildProgramFailure if there is a failure to build a program 3276 * executable. 3277 * 3278 * @ingroup HEMV 3279 */ 3280 clblasStatus 3281 clblasChemv( 3282 clblasOrder order, 3283 clblasUplo uplo, 3284 size_t N, 3285 FloatComplex alpha, 3286 const cl_mem A, 3287 size_t offa, 3288 size_t lda, 3289 const cl_mem X, 3290 size_t offx, 3291 int incx, 3292 FloatComplex beta, 3293 cl_mem Y, 3294 size_t offy, 3295 int incy, 3296 cl_uint numCommandQueues, 3297 cl_command_queue *commandQueues, 3298 cl_uint numEventsInWaitList, 3299 const cl_event *eventWaitList, 3300 cl_event *events); 3301 3302 /** 3303 * @brief Matrix-vector product with a hermitian matrix and double-complex elements. 3304 * 3305 * Matrix-vector products: 3306 * - \f$ Y \leftarrow \alpha A X + \beta Y \f$ 3307 * 3308 * @param[in] order Row/columns order. 3309 * @param[in] uplo The triangle in matrix being referenced. 3310 * @param[in] N Number of rows and columns in matrix \b A. 3311 * @param[in] alpha The factor of matrix \b A. 3312 * @param[in] A Buffer object storing matrix \b A. 3313 * @param[in] offa Offset in number of elements for first element in matrix \b A. 3314 * @param[in] lda Leading dimension of matrix \b A. It cannot less 3315 * than \b N. 3316 * @param[in] X Buffer object storing vector \b X. 3317 * @param[in] offx Offset of first element of vector \b X in buffer object. 3318 * Counted in elements. 3319 * @param[in] incx Increment for the elements of vector \b X. It cannot be zero. 3320 * @param[in] beta The factor of vector \b Y. 3321 * @param[out] Y Buffer object storing vector \b Y. 3322 * @param[in] offy Offset of first element of vector \b Y in buffer object. 3323 * Counted in elements. 3324 * @param[in] incy Increment for the elements of vector \b Y. It cannot be zero. 3325 * @param[in] numCommandQueues Number of OpenCL command queues in which the 3326 * task is to be performed. 3327 * @param[in] commandQueues OpenCL command queues. 3328 * @param[in] numEventsInWaitList Number of events in the event wait list. 3329 * @param[in] eventWaitList Event wait list. 3330 * @param[in] events Event objects per each command queue that identify 3331 * a particular kernel execution instance. 3332 * 3333 * @return 3334 * - \b clblasSuccess on success; 3335 * - \b clblasInvalidDevice if a target device does not support floating 3336 * point arithmetic with double precision; 3337 * - the same error codes as the clblasChemv() function otherwise. 3338 * 3339 * @ingroup HEMV 3340 */ 3341 clblasStatus 3342 clblasZhemv( 3343 clblasOrder order, 3344 clblasUplo uplo, 3345 size_t N, 3346 DoubleComplex alpha, 3347 const cl_mem A, 3348 size_t offa, 3349 size_t lda, 3350 const cl_mem X, 3351 size_t offx, 3352 int incx, 3353 DoubleComplex beta, 3354 cl_mem Y, 3355 size_t offy, 3356 int incy, 3357 cl_uint numCommandQueues, 3358 cl_command_queue *commandQueues, 3359 cl_uint numEventsInWaitList, 3360 const cl_event *eventWaitList, 3361 cl_event *events); 3362 /** 3363 * @example example_zhemv.cpp 3364 * Example of how to use the @ref clblasZhemv function. 3365 */ 3366 /*@}*/ 3367 3368 3369 3370 /** 3371 * @defgroup TRMV TRMV - Triangular matrix vector multiply 3372 * @ingroup BLAS2 3373 */ 3374 /*@{*/ 3375 3376 /** 3377 * @brief Matrix-vector product with a triangular matrix and 3378 * float elements. 3379 * 3380 * Matrix-vector products: 3381 * - \f$ X \leftarrow A X \f$ 3382 * - \f$ X \leftarrow A^T X \f$ 3383 * 3384 * @param[in] order Row/column order. 3385 * @param[in] uplo The triangle in matrix being referenced. 3386 * @param[in] trans How matrix \b A is to be transposed. 3387 * @param[in] diag Specify whether matrix \b A is unit triangular. 3388 * @param[in] N Number of rows/columns in matrix \b A. 3389 * @param[in] A Buffer object storing matrix \b A. 3390 * @param[in] offa Offset in number of elements for first element in matrix \b A. 3391 * @param[in] lda Leading dimension of matrix \b A. It cannot be less 3392 * than \b N 3393 * @param[out] X Buffer object storing vector \b X. 3394 * @param[in] offx Offset in number of elements for first element in vector \b X. 3395 * @param[in] incx Increment for the elements of \b X. Must not be zero. 3396 * @param[in] scratchBuff Temporary cl_mem scratch buffer object which can hold a 3397 * minimum of (1 + (N-1)*abs(incx)) elements 3398 * @param[in] numCommandQueues Number of OpenCL command queues in which the 3399 * task is to be performed. 3400 * @param[in] commandQueues OpenCL command queues. 3401 * @param[in] numEventsInWaitList Number of events in the event wait list. 3402 * @param[in] eventWaitList Event wait list. 3403 * @param[in] events Event objects per each command queue that identify 3404 * a particular kernel execution instance. 3405 * 3406 * @return 3407 * - \b clblasSuccess on success; 3408 * - \b clblasNotInitialized if clblasSetup() was not called; 3409 * - \b clblasInvalidValue if invalid parameters are passed: 3410 * - either \b N or \b incx is zero, or 3411 * - the leading dimension is invalid; 3412 * - \b clblasInvalidMemObject if either \b A or \b X object is 3413 * Invalid, or an image object rather than the buffer one; 3414 * - \b clblasOutOfHostMemory if the library can't allocate memory for 3415 * internal structures; 3416 * - \b clblasInvalidCommandQueue if the passed command queue is invalid; 3417 * - \b clblasInvalidContext if a context a passed command queue belongs 3418 * to was released; 3419 * - \b clblasInvalidOperation if kernel compilation relating to a previous 3420 * call has not completed for any of the target devices; 3421 * - \b clblasCompilerNotAvailable if a compiler is not available; 3422 * - \b clblasBuildProgramFailure if there is a failure to build a program 3423 * executable. 3424 * 3425 * @ingroup TRMV 3426 */ 3427 clblasStatus 3428 clblasStrmv( 3429 clblasOrder order, 3430 clblasUplo uplo, 3431 clblasTranspose trans, 3432 clblasDiag diag, 3433 size_t N, 3434 const cl_mem A, 3435 size_t offa, 3436 size_t lda, 3437 cl_mem X, 3438 size_t offx, 3439 int incx, 3440 cl_mem scratchBuff, 3441 cl_uint numCommandQueues, 3442 cl_command_queue *commandQueues, 3443 cl_uint numEventsInWaitList, 3444 const cl_event *eventWaitList, 3445 cl_event *events); 3446 3447 /** 3448 * @example example_strmv.c 3449 * Example of how to use the @ref clblasStrmv function. 3450 */ 3451 3452 /** 3453 * @brief Matrix-vector product with a triangular matrix and 3454 * double elements. 3455 * 3456 * Matrix-vector products: 3457 * - \f$ X \leftarrow A X \f$ 3458 * - \f$ X \leftarrow A^T X \f$ 3459 * 3460 * @param[in] order Row/column order. 3461 * @param[in] uplo The triangle in matrix being referenced. 3462 * @param[in] trans How matrix \b A is to be transposed. 3463 * @param[in] diag Specify whether matrix \b A is unit triangular. 3464 * @param[in] N Number of rows/columns in matrix \b A. 3465 * @param[in] A Buffer object storing matrix \b A. 3466 * @param[in] offa Offset in number of elements for first element in matrix \b A. 3467 * @param[in] lda Leading dimension of matrix \b A. It cannot be less 3468 * than \b N 3469 * @param[out] X Buffer object storing vector \b X. 3470 * @param[in] offx Offset in number of elements for first element in vector \b X. 3471 * @param[in] incx Increment for the elements of \b X. Must not be zero. 3472 * @param[in] scratchBuff Temporary cl_mem scratch buffer object which can hold a 3473 * minimum of (1 + (N-1)*abs(incx)) elements 3474 * @param[in] numCommandQueues Number of OpenCL command queues in which the 3475 * task is to be performed. 3476 * @param[in] commandQueues OpenCL command queues. 3477 * @param[in] numEventsInWaitList Number of events in the event wait list. 3478 * @param[in] eventWaitList Event wait list. 3479 * @param[in] events Event objects per each command queue that identify 3480 * a particular kernel execution instance. 3481 * 3482 * @return 3483 * - \b clblasSuccess on success; 3484 * - \b clblasInvalidDevice if a target device does not support floating 3485 * point arithmetic with double precision; 3486 * - the same error codes as the clblasStrmv() function otherwise. 3487 * 3488 * @ingroup TRMV 3489 */ 3490 clblasStatus 3491 clblasDtrmv( 3492 clblasOrder order, 3493 clblasUplo uplo, 3494 clblasTranspose trans, 3495 clblasDiag diag, 3496 size_t N, 3497 const cl_mem A, 3498 size_t offa, 3499 size_t lda, 3500 cl_mem X, 3501 size_t offx, 3502 int incx, 3503 cl_mem scratchBuff, 3504 cl_uint numCommandQueues, 3505 cl_command_queue *commandQueues, 3506 cl_uint numEventsInWaitList, 3507 const cl_event *eventWaitList, 3508 cl_event *events); 3509 3510 /** 3511 * @brief Matrix-vector product with a triangular matrix and 3512 * float complex elements. 3513 * 3514 * Matrix-vector products: 3515 * - \f$ X \leftarrow A X \f$ 3516 * - \f$ X \leftarrow A^T X \f$ 3517 * 3518 * @param[in] order Row/column order. 3519 * @param[in] uplo The triangle in matrix being referenced. 3520 * @param[in] trans How matrix \b A is to be transposed. 3521 * @param[in] diag Specify whether matrix \b A is unit triangular. 3522 * @param[in] N Number of rows/columns in matrix \b A. 3523 * @param[in] A Buffer object storing matrix \b A. 3524 * @param[in] offa Offset in number of elements for first element in matrix \b A. 3525 * @param[in] lda Leading dimension of matrix \b A. It cannot be less 3526 * than \b N 3527 * @param[out] X Buffer object storing vector \b X. 3528 * @param[in] offx Offset in number of elements for first element in vector \b X. 3529 * @param[in] incx Increment for the elements of \b X. Must not be zero. 3530 * @param[in] scratchBuff Temporary cl_mem scratch buffer object which can hold a 3531 * minimum of (1 + (N-1)*abs(incx)) elements 3532 * @param[in] numCommandQueues Number of OpenCL command queues in which the 3533 * task is to be performed. 3534 * @param[in] commandQueues OpenCL command queues. 3535 * @param[in] numEventsInWaitList Number of events in the event wait list. 3536 * @param[in] eventWaitList Event wait list. 3537 * @param[in] events Event objects per each command queue that identify 3538 * a particular kernel execution instance. 3539 * 3540 * @return The same result as the clblasStrmv() function. 3541 * @ingroup TRMV 3542 */ 3543 clblasStatus 3544 clblasCtrmv( 3545 clblasOrder order, 3546 clblasUplo uplo, 3547 clblasTranspose trans, 3548 clblasDiag diag, 3549 size_t N, 3550 const cl_mem A, 3551 size_t offa, 3552 size_t lda, 3553 cl_mem X, 3554 size_t offx, 3555 int incx, 3556 cl_mem scratchBuff, 3557 cl_uint numCommandQueues, 3558 cl_command_queue *commandQueues, 3559 cl_uint numEventsInWaitList, 3560 const cl_event *eventWaitList, 3561 cl_event *events); 3562 3563 /** 3564 * @brief Matrix-vector product with a triangular matrix and 3565 * double complex elements. 3566 * 3567 * Matrix-vector products: 3568 * - \f$ X \leftarrow A X \f$ 3569 * - \f$ X \leftarrow A^T X \f$ 3570 * 3571 * @param[in] order Row/column order. 3572 * @param[in] uplo The triangle in matrix being referenced. 3573 * @param[in] trans How matrix \b A is to be transposed. 3574 * @param[in] diag Specify whether matrix \b A is unit triangular. 3575 * @param[in] N Number of rows/columns in matrix \b A. 3576 * @param[in] A Buffer object storing matrix \b A. 3577 * @param[in] offa Offset in number of elements for first element in matrix \b A. 3578 * @param[in] lda Leading dimension of matrix \b A. It cannot be less 3579 * than \b N 3580 * @param[out] X Buffer object storing vector \b X. 3581 * @param[in] offx Offset in number of elements for first element in vector \b X. 3582 * @param[in] incx Increment for the elements of \b X. Must not be zero. 3583 * @param[in] scratchBuff Temporary cl_mem scratch buffer object which can hold a 3584 * minimum of (1 + (N-1)*abs(incx)) elements 3585 * @param[in] numCommandQueues Number of OpenCL command queues in which the 3586 * task is to be performed. 3587 * @param[in] commandQueues OpenCL command queues. 3588 * @param[in] numEventsInWaitList Number of events in the event wait list. 3589 * @param[in] eventWaitList Event wait list. 3590 * @param[in] events Event objects per each command queue that identify 3591 * a particular kernel execution instance. 3592 * 3593 * @return The same result as the clblasDtrmv() function. 3594 * @ingroup TRMV 3595 */ 3596 clblasStatus 3597 clblasZtrmv( 3598 clblasOrder order, 3599 clblasUplo uplo, 3600 clblasTranspose trans, 3601 clblasDiag diag, 3602 size_t N, 3603 const cl_mem A, 3604 size_t offa, 3605 size_t lda, 3606 cl_mem X, 3607 size_t offx, 3608 int incx, 3609 cl_mem scratchBuff, 3610 cl_uint numCommandQueues, 3611 cl_command_queue *commandQueues, 3612 cl_uint numEventsInWaitList, 3613 const cl_event *eventWaitList, 3614 cl_event *events); 3615 3616 3617 /*@}*/ 3618 3619 /** 3620 * @defgroup TRSV TRSV - Triangular matrix vector Solve 3621 * @ingroup BLAS2 3622 */ 3623 /*@{*/ 3624 3625 /** 3626 * @brief solving triangular matrix problems with float elements. 3627 * 3628 * Matrix-vector products: 3629 * - \f$ A X \leftarrow X \f$ 3630 * - \f$ A^T X \leftarrow X \f$ 3631 * 3632 * @param[in] order Row/column order. 3633 * @param[in] uplo The triangle in matrix being referenced. 3634 * @param[in] trans How matrix \b A is to be transposed. 3635 * @param[in] diag Specify whether matrix \b A is unit triangular. 3636 * @param[in] N Number of rows/columns in matrix \b A. 3637 * @param[in] A Buffer object storing matrix \b A. 3638 * @param[in] offa Offset in number of elements for first element in matrix \b A. 3639 * @param[in] lda Leading dimension of matrix \b A. It cannot be less 3640 * than \b N 3641 * @param[out] X Buffer object storing vector \b X. 3642 * @param[in] offx Offset in number of elements for first element in vector \b X. 3643 * @param[in] incx Increment for the elements of \b X. Must not be zero. 3644 * @param[in] numCommandQueues Number of OpenCL command queues in which the 3645 * task is to be performed. 3646 * @param[in] commandQueues OpenCL command queues. 3647 * @param[in] numEventsInWaitList Number of events in the event wait list. 3648 * @param[in] eventWaitList Event wait list. 3649 * @param[in] events Event objects per each command queue that identify 3650 * a particular kernel execution instance. 3651 * 3652 * @return 3653 * - \b clblasSuccess on success; 3654 * - \b clblasNotInitialized if clblasSetup() was not called; 3655 * - \b clblasInvalidValue if invalid parameters are passed: 3656 * - either \b N or \b incx is zero, or 3657 * - the leading dimension is invalid; 3658 * - \b clblasInvalidMemObject if either \b A or \b X object is 3659 * Invalid, or an image object rather than the buffer one; 3660 * - \b clblasOutOfHostMemory if the library can't allocate memory for 3661 * internal structures; 3662 * - \b clblasInvalidCommandQueue if the passed command queue is invalid; 3663 * - \b clblasInvalidContext if a context a passed command queue belongs 3664 * to was released; 3665 * - \b clblasInvalidOperation if kernel compilation relating to a previous 3666 * call has not completed for any of the target devices; 3667 * - \b clblasCompilerNotAvailable if a compiler is not available; 3668 * - \b clblasBuildProgramFailure if there is a failure to build a program 3669 * executable. 3670 * 3671 * @ingroup TRSV 3672 */ 3673 clblasStatus 3674 clblasStrsv( 3675 clblasOrder order, 3676 clblasUplo uplo, 3677 clblasTranspose trans, 3678 clblasDiag diag, 3679 size_t N, 3680 const cl_mem A, 3681 size_t offa, 3682 size_t lda, 3683 cl_mem X, 3684 size_t offx, 3685 int incx, 3686 cl_uint numCommandQueues, 3687 cl_command_queue *commandQueues, 3688 cl_uint numEventsInWaitList, 3689 const cl_event *eventWaitList, 3690 cl_event *events); 3691 3692 /** 3693 * @example example_strsv.c 3694 * Example of how to use the @ref clblasStrsv function. 3695 */ 3696 3697 3698 /** 3699 * @brief solving triangular matrix problems with double elements. 3700 * 3701 * Matrix-vector products: 3702 * - \f$ A X \leftarrow X \f$ 3703 * - \f$ A^T X \leftarrow X \f$ 3704 * 3705 * @param[in] order Row/column order. 3706 * @param[in] uplo The triangle in matrix being referenced. 3707 * @param[in] trans How matrix \b A is to be transposed. 3708 * @param[in] diag Specify whether matrix \b A is unit triangular. 3709 * @param[in] N Number of rows/columns in matrix \b A. 3710 * @param[in] A Buffer object storing matrix \b A. 3711 * @param[in] offa Offset in number of elements for first element in matrix \b A. 3712 * @param[in] lda Leading dimension of matrix \b A. It cannot be less 3713 * than \b N 3714 * @param[out] X Buffer object storing vector \b X. 3715 * @param[in] offx Offset in number of elements for first element in vector \b X. 3716 * @param[in] incx Increment for the elements of \b X. Must not be zero. 3717 * @param[in] numCommandQueues Number of OpenCL command queues in which the 3718 * task is to be performed. 3719 * @param[in] commandQueues OpenCL command queues. 3720 * @param[in] numEventsInWaitList Number of events in the event wait list. 3721 * @param[in] eventWaitList Event wait list. 3722 * @param[in] events Event objects per each command queue that identify 3723 * a particular kernel execution instance. 3724 * 3725 * @return 3726 * - \b clblasSuccess on success; 3727 * - \b clblasInvalidDevice if a target device does not support floating 3728 * point arithmetic with double precision; 3729 * - the same error codes as the clblasStrsv() function otherwise. 3730 * 3731 * @ingroup TRSV 3732 */ 3733 clblasStatus 3734 clblasDtrsv( 3735 clblasOrder order, 3736 clblasUplo uplo, 3737 clblasTranspose trans, 3738 clblasDiag diag, 3739 size_t N, 3740 const cl_mem A, 3741 size_t offa, 3742 size_t lda, 3743 cl_mem X, 3744 size_t offx, 3745 int incx, 3746 cl_uint numCommandQueues, 3747 cl_command_queue *commandQueues, 3748 cl_uint numEventsInWaitList, 3749 const cl_event *eventWaitList, 3750 cl_event *events); 3751 3752 3753 /** 3754 * @brief solving triangular matrix problems with float-complex elements. 3755 * 3756 * Matrix-vector products: 3757 * - \f$ A X \leftarrow X \f$ 3758 * - \f$ A^T X \leftarrow X \f$ 3759 * 3760 * @param[in] order Row/column order. 3761 * @param[in] uplo The triangle in matrix being referenced. 3762 * @param[in] trans How matrix \b A is to be transposed. 3763 * @param[in] diag Specify whether matrix \b A is unit triangular. 3764 * @param[in] N Number of rows/columns in matrix \b A. 3765 * @param[in] A Buffer object storing matrix \b A. 3766 * @param[in] offa Offset in number of elements for first element in matrix \b A. 3767 * @param[in] lda Leading dimension of matrix \b A. It cannot be less 3768 * than \b N 3769 * @param[out] X Buffer object storing vector \b X. 3770 * @param[in] offx Offset in number of elements for first element in vector \b X. 3771 * @param[in] incx Increment for the elements of \b X. Must not be zero. 3772 * @param[in] numCommandQueues Number of OpenCL command queues in which the 3773 * task is to be performed. 3774 * @param[in] commandQueues OpenCL command queues. 3775 * @param[in] numEventsInWaitList Number of events in the event wait list. 3776 * @param[in] eventWaitList Event wait list. 3777 * @param[in] events Event objects per each command queue that identify 3778 * a particular kernel execution instance. 3779 * 3780 * @return The same result as the clblasStrsv() function. 3781 * 3782 * @ingroup TRSV 3783 */ 3784 clblasStatus 3785 clblasCtrsv( 3786 clblasOrder order, 3787 clblasUplo uplo, 3788 clblasTranspose trans, 3789 clblasDiag diag, 3790 size_t N, 3791 const cl_mem A, 3792 size_t offa, 3793 size_t lda, 3794 cl_mem X, 3795 size_t offx, 3796 int incx, 3797 cl_uint numCommandQueues, 3798 cl_command_queue *commandQueues, 3799 cl_uint numEventsInWaitList, 3800 const cl_event *eventWaitList, 3801 cl_event *events); 3802 3803 3804 /** 3805 * @brief solving triangular matrix problems with double-complex elements. 3806 * 3807 * Matrix-vector products: 3808 * - \f$ A X \leftarrow X \f$ 3809 * - \f$ A^T X \leftarrow X \f$ 3810 * 3811 * @param[in] order Row/column order. 3812 * @param[in] uplo The triangle in matrix being referenced. 3813 * @param[in] trans How matrix \b A is to be transposed. 3814 * @param[in] diag Specify whether matrix \b A is unit triangular. 3815 * @param[in] N Number of rows/columns in matrix \b A. 3816 * @param[in] A Buffer object storing matrix \b A. 3817 * @param[in] offa Offset in number of elements for first element in matrix \b A. 3818 * @param[in] lda Leading dimension of matrix \b A. It cannot be less 3819 * than \b N 3820 * @param[out] X Buffer object storing vector \b X. 3821 * @param[in] offx Offset in number of elements for first element in vector \b X. 3822 * @param[in] incx Increment for the elements of \b X. Must not be zero. 3823 * @param[in] numCommandQueues Number of OpenCL command queues in which the 3824 * task is to be performed. 3825 * @param[in] commandQueues OpenCL command queues. 3826 * @param[in] numEventsInWaitList Number of events in the event wait list. 3827 * @param[in] eventWaitList Event wait list. 3828 * @param[in] events Event objects per each command queue that identify 3829 * a particular kernel execution instance. 3830 * 3831 * @return The same result as the clblasDtrsv() function. 3832 * 3833 * @ingroup TRSV 3834 */ 3835 clblasStatus 3836 clblasZtrsv( 3837 clblasOrder order, 3838 clblasUplo uplo, 3839 clblasTranspose trans, 3840 clblasDiag diag, 3841 size_t N, 3842 const cl_mem A, 3843 size_t offa, 3844 size_t lda, 3845 cl_mem X, 3846 size_t offx, 3847 int incx, 3848 cl_uint numCommandQueues, 3849 cl_command_queue *commandQueues, 3850 cl_uint numEventsInWaitList, 3851 const cl_event *eventWaitList, 3852 cl_event *events); 3853 3854 /*@}*/ 3855 3856 /** 3857 * @defgroup GER GER - General matrix rank 1 operation 3858 * @ingroup BLAS2 3859 */ 3860 /*@{*/ 3861 3862 /** 3863 * @brief vector-vector product with float elements and 3864 * performs the rank 1 operation A 3865 * 3866 * Vector-vector products: 3867 * - \f$ A \leftarrow \alpha X Y^T + A \f$ 3868 * 3869 * @param[in] order Row/column order. 3870 * @param[in] M Number of rows in matrix \b A. 3871 * @param[in] N Number of columns in matrix \b A. 3872 * @param[in] alpha specifies the scalar alpha. 3873 * @param[in] X Buffer object storing vector \b X. 3874 * @param[in] offx Offset in number of elements for the first element in vector \b X. 3875 * @param[in] incx Increment for the elements of \b X. Must not be zero. 3876 * @param[in] Y Buffer object storing vector \b Y. 3877 * @param[in] offy Offset in number of elements for the first element in vector \b Y. 3878 * @param[in] incy Increment for the elements of \b Y. Must not be zero. 3879 * @param[out] A Buffer object storing matrix \b A. On exit, A is 3880 * overwritten by the updated matrix. 3881 * @param[in] offa Offset in number of elements for the first element in matrix \b A. 3882 * @param[in] lda Leading dimension of matrix \b A. It cannot be less 3883 * than \b N when the \b order parameter is set to 3884 * \b clblasRowMajor,\n or less than \b M when the 3885 * parameter is set to \b clblasColumnMajor. 3886 * @param[in] numCommandQueues Number of OpenCL command queues in which the 3887 * task is to be performed. 3888 * @param[in] commandQueues OpenCL command queues. 3889 * @param[in] numEventsInWaitList Number of events in the event wait list. 3890 * @param[in] eventWaitList Event wait list. 3891 * @param[in] events Event objects per each command queue that identify 3892 * a particular kernel execution instance. 3893 * 3894 * @return 3895 * - \b clblasSuccess on success; 3896 * - \b clblasNotInitialized if clblasSetup() was not called; 3897 * - \b clblasInvalidValue if invalid parameters are passed: 3898 * - \b M, \b N or 3899 * - either \b incx or \b incy is zero, or 3900 * - a leading dimension is invalid; 3901 * - \b clblasInvalidMemObject if A, X, or Y object is invalid, 3902 * or an image object rather than the buffer one; 3903 * - \b clblasOutOfResources if you use image-based function implementation 3904 * and no suitable scratch image available; 3905 * - \b clblasOutOfHostMemory if the library can't allocate memory for 3906 * internal structures; 3907 * - \b clblasInvalidCommandQueue if the passed command queue is invalid; 3908 * - \b clblasInvalidContext if a context a passed command queue belongs to 3909 * was released; 3910 * - \b clblasInvalidOperation if kernel compilation relating to a previous 3911 * call has not completed for any of the target devices; 3912 * - \b clblasCompilerNotAvailable if a compiler is not available; 3913 * - \b clblasBuildProgramFailure if there is a failure to build a program 3914 * executable. 3915 * 3916 * @ingroup GER 3917 */ 3918 clblasStatus 3919 clblasSger( 3920 clblasOrder order, 3921 size_t M, 3922 size_t N, 3923 cl_float alpha, 3924 const cl_mem X, 3925 size_t offx, 3926 int incx, 3927 const cl_mem Y, 3928 size_t offy, 3929 int incy, 3930 cl_mem A, 3931 size_t offa, 3932 size_t lda, 3933 cl_uint numCommandQueues, 3934 cl_command_queue *commandQueues, 3935 cl_uint numEventsInWaitList, 3936 const cl_event *eventWaitList, 3937 cl_event *events); 3938 3939 /** 3940 * @example example_sger.c 3941 * Example of how to use the @ref clblasSger function. 3942 */ 3943 3944 3945 /** 3946 * @brief vector-vector product with double elements and 3947 * performs the rank 1 operation A 3948 * 3949 * Vector-vector products: 3950 * - \f$ A \leftarrow \alpha X Y^T + A \f$ 3951 * 3952 * @param[in] order Row/column order. 3953 * @param[in] M Number of rows in matrix \b A. 3954 * @param[in] N Number of columns in matrix \b A. 3955 * @param[in] alpha specifies the scalar alpha. 3956 * @param[in] X Buffer object storing vector \b X. 3957 * @param[in] offx Offset in number of elements for the first element in vector \b X. 3958 * @param[in] incx Increment for the elements of \b X. Must not be zero. 3959 * @param[in] Y Buffer object storing vector \b Y. 3960 * @param[in] offy Offset in number of elements for the first element in vector \b Y. 3961 * @param[in] incy Increment for the elements of \b Y. Must not be zero. 3962 * @param[out] A Buffer object storing matrix \b A. On exit, A is 3963 * overwritten by the updated matrix. 3964 * @param[in] offa Offset in number of elements for the first element in matrix \b A. 3965 * @param[in] lda Leading dimension of matrix \b A. It cannot be less 3966 * than \b N when the \b order parameter is set to 3967 * \b clblasRowMajor,\n or less than \b M when the 3968 * parameter is set to \b clblasColumnMajor. 3969 * @param[in] numCommandQueues Number of OpenCL command queues in which the 3970 * task is to be performed. 3971 * @param[in] commandQueues OpenCL command queues. 3972 * @param[in] numEventsInWaitList Number of events in the event wait list. 3973 * @param[in] eventWaitList Event wait list. 3974 * @param[in] events Event objects per each command queue that identify 3975 * a particular kernel execution instance. 3976 * 3977 * @return 3978 * - \b clblasSuccess on success; 3979 * - \b clblasInvalidDevice if a target device does not support floating 3980 * point arithmetic with double precision; 3981 * - the same error codes as the clblasSger() function otherwise. 3982 * 3983 * @ingroup GER 3984 */ 3985 clblasStatus 3986 clblasDger( 3987 clblasOrder order, 3988 size_t M, 3989 size_t N, 3990 cl_double alpha, 3991 const cl_mem X, 3992 size_t offx, 3993 int incx, 3994 const cl_mem Y, 3995 size_t offy, 3996 int incy, 3997 cl_mem A, 3998 size_t offa, 3999 size_t lda, 4000 cl_uint numCommandQueues, 4001 cl_command_queue *commandQueues, 4002 cl_uint numEventsInWaitList, 4003 const cl_event *eventWaitList, 4004 cl_event *events); 4005 /*@}*/ 4006 4007 /** 4008 * @defgroup GERU GERU - General matrix rank 1 operation 4009 * @ingroup BLAS2 4010 */ 4011 /*@{*/ 4012 4013 /** 4014 * @brief vector-vector product with float complex elements and 4015 * performs the rank 1 operation A 4016 * 4017 * Vector-vector products: 4018 * - \f$ A \leftarrow \alpha X Y^T + A \f$ 4019 * 4020 * @param[in] order Row/column order. 4021 * @param[in] M Number of rows in matrix \b A. 4022 * @param[in] N Number of columns in matrix \b A. 4023 * @param[in] alpha specifies the scalar alpha. 4024 * @param[in] X Buffer object storing vector \b X. 4025 * @param[in] offx Offset in number of elements for the first element in vector \b X. 4026 * @param[in] incx Increment for the elements of \b X. Must not be zero. 4027 * @param[in] Y Buffer object storing vector \b Y. 4028 * @param[in] offy Offset in number of elements for the first element in vector \b Y. 4029 * @param[in] incy Increment for the elements of \b Y. Must not be zero. 4030 * @param[out] A Buffer object storing matrix \b A. On exit, A is 4031 * overwritten by the updated matrix. 4032 * @param[in] offa Offset in number of elements for the first element in matrix \b A. 4033 * @param[in] lda Leading dimension of matrix \b A. It cannot be less 4034 * than \b N when the \b order parameter is set to 4035 * \b clblasRowMajor,\n or less than \b M when the 4036 * parameter is set to \b clblasColumnMajor. 4037 * @param[in] numCommandQueues Number of OpenCL command queues in which the 4038 * task is to be performed. 4039 * @param[in] commandQueues OpenCL command queues. 4040 * @param[in] numEventsInWaitList Number of events in the event wait list. 4041 * @param[in] eventWaitList Event wait list. 4042 * @param[in] events Event objects per each command queue that identify 4043 * a particular kernel execution instance. 4044 * 4045 * @return 4046 * - \b clblasSuccess on success; 4047 * - \b clblasNotInitialized if clblasSetup() was not called; 4048 * - \b clblasInvalidValue if invalid parameters are passed: 4049 * - \b M, \b N or 4050 * - either \b incx or \b incy is zero, or 4051 * - a leading dimension is invalid; 4052 * - \b clblasInvalidMemObject if A, X, or Y object is invalid, 4053 * or an image object rather than the buffer one; 4054 * - \b clblasOutOfResources if you use image-based function implementation 4055 * and no suitable scratch image available; 4056 * - \b clblasOutOfHostMemory if the library can't allocate memory for 4057 * internal structures; 4058 * - \b clblasInvalidCommandQueue if the passed command queue is invalid; 4059 * - \b clblasInvalidContext if a context a passed command queue belongs to 4060 * was released; 4061 * - \b clblasInvalidOperation if kernel compilation relating to a previous 4062 * call has not completed for any of the target devices; 4063 * - \b clblasCompilerNotAvailable if a compiler is not available; 4064 * - \b clblasBuildProgramFailure if there is a failure to build a program 4065 * executable. 4066 * 4067 * @ingroup GERU 4068 */ 4069 clblasStatus 4070 clblasCgeru( 4071 clblasOrder order, 4072 size_t M, 4073 size_t N, 4074 cl_float2 alpha, 4075 const cl_mem X, 4076 size_t offx, 4077 int incx, 4078 const cl_mem Y, 4079 size_t offy, 4080 int incy, 4081 cl_mem A , 4082 size_t offa, 4083 size_t lda, 4084 cl_uint numCommandQueues, 4085 cl_command_queue *commandQueues, 4086 cl_uint numEventsInWaitList, 4087 const cl_event *eventWaitList, 4088 cl_event *events); 4089 4090 /** 4091 * @brief vector-vector product with double complex elements and 4092 * performs the rank 1 operation A 4093 * 4094 * Vector-vector products: 4095 * - \f$ A \leftarrow \alpha X Y^T + A \f$ 4096 * 4097 * @param[in] order Row/column order. 4098 * @param[in] M Number of rows in matrix \b A. 4099 * @param[in] N Number of columns in matrix \b A. 4100 * @param[in] alpha specifies the scalar alpha. 4101 * @param[in] X Buffer object storing vector \b X. 4102 * @param[in] offx Offset in number of elements for the first element in vector \b X. 4103 * @param[in] incx Increment for the elements of \b X. Must not be zero. 4104 * @param[in] Y Buffer object storing vector \b Y. 4105 * @param[in] offy Offset in number of elements for the first element in vector \b Y. 4106 * @param[in] incy Increment for the elements of \b Y. Must not be zero. 4107 * @param[out] A Buffer object storing matrix \b A. On exit, A is 4108 * overwritten by the updated matrix. 4109 * @param[in] offa Offset in number of elements for the first element in matrix \b A. 4110 * @param[in] lda Leading dimension of matrix \b A. It cannot be less 4111 * than \b N when the \b order parameter is set to 4112 * \b clblasRowMajor,\n or less than \b M when the 4113 * parameter is set to \b clblasColumnMajor. 4114 * @param[in] numCommandQueues Number of OpenCL command queues in which the 4115 * task is to be performed. 4116 * @param[in] commandQueues OpenCL command queues. 4117 * @param[in] numEventsInWaitList Number of events in the event wait list. 4118 * @param[in] eventWaitList Event wait list. 4119 * @param[in] events Event objects per each command queue that identify 4120 * a particular kernel execution instance. 4121 * 4122 * @return 4123 * - \b clblasSuccess on success; 4124 * - \b clblasInvalidDevice if a target device does not support floating 4125 * point arithmetic with double precision; 4126 * - the same error codes as the clblasCgeru() function otherwise. 4127 * 4128 * @ingroup GERU 4129 */ 4130 clblasStatus 4131 clblasZgeru( 4132 clblasOrder order, 4133 size_t M, 4134 size_t N, 4135 cl_double2 alpha, 4136 const cl_mem X, 4137 size_t offx, 4138 int incx, 4139 const cl_mem Y, 4140 size_t offy, 4141 int incy, 4142 cl_mem A, 4143 size_t offa, 4144 size_t lda, 4145 cl_uint numCommandQueues, 4146 cl_command_queue *commandQueues, 4147 cl_uint numEventsInWaitList, 4148 const cl_event *eventWaitList, 4149 cl_event *events); 4150 /*@}*/ 4151 4152 /** 4153 * @defgroup GERC GERC - General matrix rank 1 operation 4154 * @ingroup BLAS2 4155 */ 4156 /*@{*/ 4157 4158 /** 4159 * @brief vector-vector product with float complex elements and 4160 * performs the rank 1 operation A 4161 * 4162 * Vector-vector products: 4163 * - \f$ A \leftarrow \alpha X Y^H + A \f$ 4164 * 4165 * @param[in] order Row/column order. 4166 * @param[in] M Number of rows in matrix \b A. 4167 * @param[in] N Number of columns in matrix \b A. 4168 * @param[in] alpha specifies the scalar alpha. 4169 * @param[in] X Buffer object storing vector \b X. 4170 * @param[in] offx Offset in number of elements for the first element in vector \b X. 4171 * @param[in] incx Increment for the elements of \b X. Must not be zero. 4172 * @param[in] Y Buffer object storing vector \b Y. 4173 * @param[in] offy Offset in number of elements for the first element in vector \b Y. 4174 * @param[in] incy Increment for the elements of \b Y. Must not be zero. 4175 * @param[out] A Buffer object storing matrix \b A. On exit, A is 4176 * overwritten by the updated matrix. 4177 * @param[in] offa Offset in number of elements for the first element in matrix \b A. 4178 * @param[in] lda Leading dimension of matrix \b A. It cannot be less 4179 * than \b N when the \b order parameter is set to 4180 * \b clblasRowMajor,\n or less than \b M when the 4181 * parameter is set to \b clblasColumnMajor. 4182 * @param[in] numCommandQueues Number of OpenCL command queues in which the 4183 * task is to be performed. 4184 * @param[in] commandQueues OpenCL command queues. 4185 * @param[in] numEventsInWaitList Number of events in the event wait list. 4186 * @param[in] eventWaitList Event wait list. 4187 * @param[in] events Event objects per each command queue that identify 4188 * a particular kernel execution instance. 4189 * 4190 * @return 4191 * - \b clblasSuccess on success; 4192 * - \b clblasNotInitialized if clblasSetup() was not called; 4193 * - \b clblasInvalidValue if invalid parameters are passed: 4194 * - \b M, \b N or 4195 * - either \b incx or \b incy is zero, or 4196 * - a leading dimension is invalid; 4197 * - \b clblasInvalidMemObject if A, X, or Y object is invalid, 4198 * or an image object rather than the buffer one; 4199 * - \b clblasOutOfResources if you use image-based function implementation 4200 * and no suitable scratch image available; 4201 * - \b clblasOutOfHostMemory if the library can't allocate memory for 4202 * internal structures; 4203 * - \b clblasInvalidCommandQueue if the passed command queue is invalid; 4204 * - \b clblasInvalidContext if a context a passed command queue belongs to 4205 * was released; 4206 * - \b clblasInvalidOperation if kernel compilation relating to a previous 4207 * call has not completed for any of the target devices; 4208 * - \b clblasCompilerNotAvailable if a compiler is not available; 4209 * - \b clblasBuildProgramFailure if there is a failure to build a program 4210 * executable. 4211 * 4212 * @ingroup GERC 4213 */ 4214 4215 clblasStatus 4216 clblasCgerc( 4217 clblasOrder order, 4218 size_t M, 4219 size_t N, 4220 cl_float2 alpha, 4221 const cl_mem X, 4222 size_t offx, 4223 int incx, 4224 const cl_mem Y, 4225 size_t offy, 4226 int incy, 4227 cl_mem A , 4228 size_t offa, 4229 size_t lda, 4230 cl_uint numCommandQueues, 4231 cl_command_queue *commandQueues, 4232 cl_uint numEventsInWaitList, 4233 const cl_event *eventWaitList, 4234 cl_event *events); 4235 4236 /** 4237 * @brief vector-vector product with double complex elements and 4238 * performs the rank 1 operation A 4239 * 4240 * Vector-vector products: 4241 * - \f$ A \leftarrow \alpha X Y^H + A \f$ 4242 * 4243 * @param[in] order Row/column order. 4244 * @param[in] M Number of rows in matrix \b A. 4245 * @param[in] N Number of columns in matrix \b A. 4246 * @param[in] alpha specifies the scalar alpha. 4247 * @param[in] X Buffer object storing vector \b X. 4248 * @param[in] offx Offset in number of elements for the first element in vector \b X. 4249 * @param[in] incx Increment for the elements of \b X. Must not be zero. 4250 * @param[in] Y Buffer object storing vector \b Y. 4251 * @param[in] offy Offset in number of elements for the first element in vector \b Y. 4252 * @param[in] incy Increment for the elements of \b Y. Must not be zero. 4253 * @param[out] A Buffer object storing matrix \b A. On exit, A is 4254 * overwritten by the updated matrix. 4255 * @param[in] offa Offset in number of elements for the first element in matrix \b A. 4256 * @param[in] lda Leading dimension of matrix \b A. It cannot be less 4257 * than \b N when the \b order parameter is set to 4258 * \b clblasRowMajor,\n or less than \b M when the 4259 * parameter is set to \b clblasColumnMajor. 4260 * @param[in] numCommandQueues Number of OpenCL command queues in which the 4261 * task is to be performed. 4262 * @param[in] commandQueues OpenCL command queues. 4263 * @param[in] numEventsInWaitList Number of events in the event wait list. 4264 * @param[in] eventWaitList Event wait list. 4265 * @param[in] events Event objects per each command queue that identify 4266 * a particular kernel execution instance. 4267 * 4268 * @return 4269 * - \b clblasSuccess on success; 4270 * - \b clblasInvalidDevice if a target device does not support floating 4271 * point arithmetic with double precision; 4272 * - the same error codes as the clblasCgerc() function otherwise. 4273 * 4274 * @ingroup GERC 4275 */ 4276 clblasStatus 4277 clblasZgerc( 4278 clblasOrder order, 4279 size_t M, 4280 size_t N, 4281 cl_double2 alpha, 4282 const cl_mem X, 4283 size_t offx, 4284 int incx, 4285 const cl_mem Y, 4286 size_t offy, 4287 int incy, 4288 cl_mem A, 4289 size_t offa, 4290 size_t lda, 4291 cl_uint numCommandQueues, 4292 cl_command_queue *commandQueues, 4293 cl_uint numEventsInWaitList, 4294 const cl_event *eventWaitList, 4295 cl_event *events); 4296 4297 4298 /*@}*/ 4299 4300 /** 4301 * @defgroup SYR SYR - Symmetric rank 1 update 4302 * 4303 * The Level 2 Basic Linear Algebra Subprograms are functions that perform 4304 * symmetric rank 1 update operations. 4305 * @ingroup BLAS2 4306 */ 4307 4308 /*@{*/ 4309 /** 4310 * @brief Symmetric rank 1 operation with a general triangular matrix and 4311 * float elements. 4312 * 4313 * Symmetric rank 1 operation: 4314 * - \f$ A \leftarrow \alpha x x^T + A \f$ 4315 * 4316 * @param[in] order Row/column order. 4317 * @param[in] uplo The triangle in matrix being referenced. 4318 * @param[in] N Number of columns in matrix \b A. 4319 * @param[in] alpha The factor of matrix \b A. 4320 * @param[in] X Buffer object storing vector \b X. 4321 * @param[in] offx Offset of first element of vector \b X in buffer object. 4322 * @param[in] incx Increment for the elements of \b X. Must not be zero. 4323 * @param[out] A Buffer object storing matrix \b A. 4324 * @param[in] offa Offset of first element of matrix \b A in buffer object. 4325 * @param[in] lda Leading dimension of matrix \b A. It cannot be less 4326 * than \b N. 4327 * @param[in] numCommandQueues Number of OpenCL command queues in which the 4328 * task is to be performed. 4329 * @param[in] commandQueues OpenCL command queues. 4330 * @param[in] numEventsInWaitList Number of events in the event wait list. 4331 * @param[in] eventWaitList Event wait list. 4332 * @param[in] events Event objects per each command queue that identify 4333 * a particular kernel execution instance. 4334 * 4335 * @return 4336 * - \b clblasSuccess on success; 4337 * - \b clblasNotInitialized if clblasSetup() was not called; 4338 * - \b clblasInvalidValue if invalid parameters are passed: 4339 * - \b N is zero, or 4340 * - either \b incx is zero, or 4341 * - the leading dimension is invalid; 4342 * - \b clblasInvalidMemObject if either \b A, \b X object is 4343 * Invalid, or an image object rather than the buffer one; 4344 * - \b clblasOutOfHostMemory if the library can't allocate memory for 4345 * internal structures; 4346 * - \b clblasInvalidCommandQueue if the passed command queue is invalid; 4347 * - \b clblasInvalidContext if a context a passed command queue belongs 4348 * to was released; 4349 * - \b clblasInvalidOperation if kernel compilation relating to a previous 4350 * call has not completed for any of the target devices; 4351 * - \b clblasCompilerNotAvailable if a compiler is not available; 4352 * - \b clblasBuildProgramFailure if there is a failure to build a program 4353 * executable. 4354 * 4355 * @ingroup SYR 4356 */ 4357 clblasStatus 4358 clblasSsyr( 4359 clblasOrder order, 4360 clblasUplo uplo, 4361 size_t N, 4362 cl_float alpha, 4363 const cl_mem X, 4364 size_t offx, 4365 int incx, 4366 cl_mem A, 4367 size_t offa, 4368 size_t lda, 4369 cl_uint numCommandQueues, 4370 cl_command_queue* commandQueues, 4371 cl_uint numEventsInWaitList, 4372 const cl_event* eventWaitList, 4373 cl_event* events); 4374 4375 /** 4376 * @brief Symmetric rank 1 operation with a general triangular matrix and 4377 * double elements. 4378 * 4379 * Symmetric rank 1 operation: 4380 * - \f$ A \leftarrow \alpha x x^T + A \f$ 4381 * 4382 * @param[in] order Row/column order. 4383 * @param[in] uplo The triangle in matrix being referenced. 4384 * @param[in] N Number of columns in matrix \b A. 4385 * @param[in] alpha The factor of matrix \b A. 4386 * @param[in] X Buffer object storing vector \b X. 4387 * @param[in] offx Offset of first element of vector \b X in buffer object. 4388 * @param[in] incx Increment for the elements of \b X. Must not be zero. 4389 * @param[out] A Buffer object storing matrix \b A. 4390 * @param[in] offa Offset of first element of matrix \b A in buffer object. 4391 * @param[in] lda Leading dimension of matrix \b A. It cannot be less 4392 * than \b N. 4393 * @param[in] numCommandQueues Number of OpenCL command queues in which the 4394 * task is to be performed. 4395 * @param[in] commandQueues OpenCL command queues. 4396 * @param[in] numEventsInWaitList Number of events in the event wait list. 4397 * @param[in] eventWaitList Event wait list. 4398 * @param[in] events Event objects per each command queue that identify 4399 * a particular kernel execution instance. 4400 * 4401 * @return 4402 * - \b clblasSuccess on success; 4403 * - \b clblasInvalidDevice if a target device does not support floating 4404 * point arithmetic with double precision; 4405 * - the same error codes as the clblasSsyr() function otherwise. 4406 * 4407 * @ingroup SYR 4408 */ 4409 4410 clblasStatus 4411 clblasDsyr( 4412 clblasOrder order, 4413 clblasUplo uplo, 4414 size_t N, 4415 cl_double alpha, 4416 const cl_mem X, 4417 size_t offx, 4418 int incx, 4419 cl_mem A, 4420 size_t offa, 4421 size_t lda, 4422 cl_uint numCommandQueues, 4423 cl_command_queue* commandQueues, 4424 cl_uint numEventsInWaitList, 4425 const cl_event* eventWaitList, 4426 cl_event* events); 4427 /*@}*/ 4428 4429 4430 /** 4431 * @defgroup HER HER - Hermitian rank 1 operation 4432 * 4433 * The Level 2 Basic Linear Algebra Subprogram functions that perform 4434 * hermitian rank 1 operations. 4435 * @ingroup BLAS2 4436 */ 4437 4438 /*@{*/ 4439 /** 4440 * @brief hermitian rank 1 operation with a general triangular matrix and 4441 * float-complex elements. 4442 * 4443 * hermitian rank 1 operation: 4444 * - \f$ A \leftarrow \alpha X X^H + A \f$ 4445 * 4446 * @param[in] order Row/column order. 4447 * @param[in] uplo The triangle in matrix being referenced. 4448 * @param[in] N Number of columns in matrix \b A. 4449 * @param[in] alpha The factor of matrix \b A (a scalar float value) 4450 * @param[in] X Buffer object storing vector \b X. 4451 * @param[in] offx Offset in number of elements for the first element in vector \b X. 4452 * @param[in] incx Increment for the elements of \b X. Must not be zero. 4453 * @param[out] A Buffer object storing matrix \b A. 4454 * @param[in] offa Offset in number of elements for the first element in matrix \b A. 4455 * @param[in] lda Leading dimension of matrix \b A. It cannot be less 4456 * than \b N. 4457 * @param[in] numCommandQueues Number of OpenCL command queues in which the 4458 * task is to be performed. 4459 * @param[in] commandQueues OpenCL command queues. 4460 * @param[in] numEventsInWaitList Number of events in the event wait list. 4461 * @param[in] eventWaitList Event wait list. 4462 * @param[in] events Event objects per each command queue that identify 4463 * a particular kernel execution instance. 4464 * 4465 * @return 4466 * - \b clblasSuccess on success; 4467 * - \b clblasNotInitialized if clblasSetup() was not called; 4468 * - \b clblasInvalidValue if invalid parameters are passed: 4469 * - \b N is zero, or 4470 * - either \b incx is zero, or 4471 * - the leading dimension is invalid; 4472 * - \b clblasInvalidMemObject if either \b A, \b X object is 4473 * Invalid, or an image object rather than the buffer one; 4474 * - \b clblasOutOfHostMemory if the library can't allocate memory for 4475 * internal structures; 4476 * - \b clblasInvalidCommandQueue if the passed command queue is invalid; 4477 * - \b clblasInvalidContext if a context a passed command queue belongs 4478 * to was released; 4479 * - \b clblasInvalidOperation if kernel compilation relating to a previous 4480 * call has not completed for any of the target devices; 4481 * - \b clblasCompilerNotAvailable if a compiler is not available; 4482 * - \b clblasBuildProgramFailure if there is a failure to build a program 4483 * executable. 4484 * 4485 * @ingroup HER 4486 */ 4487 clblasStatus 4488 clblasCher( 4489 clblasOrder order, 4490 clblasUplo uplo, 4491 size_t N, 4492 cl_float alpha, 4493 const cl_mem X, 4494 size_t offx, 4495 int incx, 4496 cl_mem A, 4497 size_t offa, 4498 size_t lda, 4499 cl_uint numCommandQueues, 4500 cl_command_queue* commandQueues, 4501 cl_uint numEventsInWaitList, 4502 const cl_event* eventWaitList, 4503 cl_event* events); 4504 /** 4505 * @example example_cher.c 4506 * Example of how to use the @ref clblasCher function. 4507 */ 4508 4509 /** 4510 * @brief hermitian rank 1 operation with a general triangular matrix and 4511 * double-complex elements. 4512 * 4513 * hermitian rank 1 operation: 4514 * - \f$ A \leftarrow \alpha X X^H + A \f$ 4515 * 4516 * @param[in] order Row/column order. 4517 * @param[in] uplo The triangle in matrix being referenced. 4518 * @param[in] N Number of columns in matrix \b A. 4519 * @param[in] alpha The factor of matrix \b A (a scalar double value) 4520 * @param[in] X Buffer object storing vector \b X. 4521 * @param[in] offx Offset in number of elements for the first element in vector \b X. 4522 * @param[in] incx Increment for the elements of \b X. Must not be zero. 4523 * @param[out] A Buffer object storing matrix \b A. 4524 * @param[in] offa Offset in number of elements for the first element in matrix \b A. 4525 * @param[in] lda Leading dimension of matrix \b A. It cannot be less 4526 * than \b N. 4527 * @param[in] numCommandQueues Number of OpenCL command queues in which the 4528 * task is to be performed. 4529 * @param[in] commandQueues OpenCL command queues. 4530 * @param[in] numEventsInWaitList Number of events in the event wait list. 4531 * @param[in] eventWaitList Event wait list. 4532 * @param[in] events Event objects per each command queue that identify 4533 * a particular kernel execution instance. 4534 * 4535 * @return 4536 * - \b clblasSuccess on success; 4537 * - \b clblasInvalidDevice if a target device does not support floating 4538 * point arithmetic with double precision; 4539 * - the same error codes as the clblasCher() function otherwise. 4540 * 4541 * @ingroup HER 4542 */ 4543 clblasStatus 4544 clblasZher( 4545 clblasOrder order, 4546 clblasUplo uplo, 4547 size_t N, 4548 cl_double alpha, 4549 const cl_mem X, 4550 size_t offx, 4551 int incx, 4552 cl_mem A, 4553 size_t offa, 4554 size_t lda, 4555 cl_uint numCommandQueues, 4556 cl_command_queue* commandQueues, 4557 cl_uint numEventsInWaitList, 4558 const cl_event* eventWaitList, 4559 cl_event* events); 4560 /*@}*/ 4561 4562 /** 4563 * @defgroup SYR2 SYR2 - Symmetric rank 2 update 4564 * 4565 * The Level 2 Basic Linear Algebra Subprograms are functions that perform 4566 * symmetric rank 2 update operations. 4567 * @ingroup BLAS2 4568 */ 4569 4570 /*@{*/ 4571 /** 4572 * @brief Symmetric rank 2 operation with a general triangular matrix and 4573 * float elements. 4574 * 4575 * Symmetric rank 2 operation: 4576 * - \f$ A \leftarrow \alpha x y^T + \alpha y x^T + A \f$ 4577 * 4578 * @param[in] order Row/column order. 4579 * @param[in] uplo The triangle in matrix being referenced. 4580 * @param[in] N Number of columns in matrix \b A. 4581 * @param[in] alpha The factor of matrix \b A. 4582 * @param[in] X Buffer object storing vector \b X. 4583 * @param[in] offx Offset of first element of vector \b X in buffer object. 4584 * @param[in] incx Increment for the elements of \b X. Must not be zero. 4585 * @param[in] Y Buffer object storing vector \b Y. 4586 * @param[in] offy Offset of first element of vector \b Y in buffer object. 4587 * @param[in] incy Increment for the elements of \b Y. Must not be zero. 4588 * @param[out] A Buffer object storing matrix \b A. 4589 * @param[in] offa Offset of first element of matrix \b A in buffer object. 4590 * @param[in] lda Leading dimension of matrix \b A. It cannot be less 4591 * than \b N. 4592 * @param[in] numCommandQueues Number of OpenCL command queues in which the 4593 * task is to be performed. 4594 * @param[in] commandQueues OpenCL command queues. 4595 * @param[in] numEventsInWaitList Number of events in the event wait list. 4596 * @param[in] eventWaitList Event wait list. 4597 * @param[in] events Event objects per each command queue that identify 4598 * a particular kernel execution instance. 4599 * 4600 * @return 4601 * - \b clblasSuccess on success; 4602 * - \b clblasNotInitialized if clblasSetup() was not called; 4603 * - \b clblasInvalidValue if invalid parameters are passed: 4604 * - either \b N is zero, or 4605 * - either \b incx or \b incy is zero, or 4606 * - the leading dimension is invalid; 4607 * - \b clblasInvalidMemObject if either \b A, \b X, or \b Y object is 4608 * Invalid, or an image object rather than the buffer one; 4609 * - \b clblasOutOfHostMemory if the library can't allocate memory for 4610 * internal structures; 4611 * - \b clblasInvalidCommandQueue if the passed command queue is invalid; 4612 * - \b clblasInvalidContext if a context a passed command queue belongs 4613 * to was released; 4614 * - \b clblasInvalidOperation if kernel compilation relating to a previous 4615 * call has not completed for any of the target devices; 4616 * - \b clblasCompilerNotAvailable if a compiler is not available; 4617 * - \b clblasBuildProgramFailure if there is a failure to build a program 4618 * executable. 4619 * 4620 * @ingroup SYR2 4621 */ 4622 4623 clblasStatus 4624 clblasSsyr2( 4625 clblasOrder order, 4626 clblasUplo uplo, 4627 size_t N, 4628 cl_float alpha, 4629 const cl_mem X, 4630 size_t offx, 4631 int incx, 4632 const cl_mem Y, 4633 size_t offy, 4634 int incy, 4635 cl_mem A, 4636 size_t offa, 4637 size_t lda, 4638 cl_uint numCommandQueues, 4639 cl_command_queue* commandQueues, 4640 cl_uint numEventsInWaitList, 4641 const cl_event* eventWaitList, 4642 cl_event* events); 4643 4644 /** 4645 * @brief Symmetric rank 2 operation with a general triangular matrix and 4646 * double elements. 4647 * 4648 * Symmetric rank 2 operation: 4649 * - \f$ A \leftarrow \alpha x y^T + \alpha y x^T + A \f$ 4650 * 4651 * @param[in] order Row/column order. 4652 * @param[in] uplo The triangle in matrix being referenced. 4653 * @param[in] N Number of columns in matrix \b A. 4654 * @param[in] alpha The factor of matrix \b A. 4655 * @param[in] X Buffer object storing vector \b X. 4656 * @param[in] offx Offset of first element of vector \b X in buffer object. 4657 * @param[in] incx Increment for the elements of \b X. Must not be zero. 4658 * @param[in] Y Buffer object storing vector \b Y. 4659 * @param[in] offy Offset of first element of vector \b Y in buffer object. 4660 * @param[in] incy Increment for the elements of \b Y. Must not be zero. 4661 * @param[out] A Buffer object storing matrix \b A. 4662 * @param[in] offa Offset of first element of matrix \b A in buffer object. 4663 * @param[in] lda Leading dimension of matrix \b A. It cannot be less 4664 * than \b N. 4665 * @param[in] numCommandQueues Number of OpenCL command queues in which the 4666 * task is to be performed. 4667 * @param[in] commandQueues OpenCL command queues. 4668 * @param[in] numEventsInWaitList Number of events in the event wait list. 4669 * @param[in] eventWaitList Event wait list. 4670 * @param[in] events Event objects per each command queue that identify 4671 * a particular kernel execution instance. 4672 * 4673 * @return 4674 * - \b clblasSuccess on success; 4675 * - \b clblasNotInitialized if clblasSetup() was not called; 4676 * - \b clblasInvalidValue if invalid parameters are passed: 4677 * - either \b N is zero, or 4678 * - either \b incx or \b incy is zero, or 4679 * - the leading dimension is invalid; 4680 * - \b clblasInvalidMemObject if either \b A, \b X, or \b Y object is 4681 * Invalid, or an image object rather than the buffer one; 4682 * - \b clblasOutOfHostMemory if the library can't allocate memory for 4683 * internal structures; 4684 * - \b clblasInvalidCommandQueue if the passed command queue is invalid; 4685 * - \b clblasInvalidContext if a context a passed command queue belongs 4686 * to was released; 4687 * - \b clblasInvalidOperation if kernel compilation relating to a previous 4688 * call has not completed for any of the target devices; 4689 * - \b clblasCompilerNotAvailable if a compiler is not available; 4690 * - \b clblasBuildProgramFailure if there is a failure to build a program 4691 * executable. 4692 * 4693 * @ingroup SYR2 4694 */ 4695 4696 clblasStatus 4697 clblasDsyr2( 4698 clblasOrder order, 4699 clblasUplo uplo, 4700 size_t N, 4701 cl_double alpha, 4702 const cl_mem X, 4703 size_t offx, 4704 int incx, 4705 const cl_mem Y, 4706 size_t offy, 4707 int incy, 4708 cl_mem A, 4709 size_t offa, 4710 size_t lda, 4711 cl_uint numCommandQueues, 4712 cl_command_queue* commandQueues, 4713 cl_uint numEventsInWaitList, 4714 const cl_event* eventWaitList, 4715 cl_event* events); 4716 4717 /*@}*/ 4718 4719 /** 4720 * @defgroup HER2 HER2 - Hermitian rank 2 update 4721 * 4722 * The Level 2 Basic Linear Algebra Subprograms are functions that perform 4723 * hermitian rank 2 update operations. 4724 * @ingroup BLAS2 4725 */ 4726 4727 /*@{*/ 4728 /** 4729 * @brief Hermitian rank 2 operation with a general triangular matrix and 4730 * float-compelx elements. 4731 * 4732 * Hermitian rank 2 operation: 4733 * - \f$ A \leftarrow \alpha X Y^H + \overline{ \alpha } Y X^H + A \f$ 4734 * 4735 * @param[in] order Row/column order. 4736 * @param[in] uplo The triangle in matrix being referenced. 4737 * @param[in] N Number of columns in matrix \b A. 4738 * @param[in] alpha The factor of matrix \b A. 4739 * @param[in] X Buffer object storing vector \b X. 4740 * @param[in] offx Offset in number of elements for the first element in vector \b X. 4741 * @param[in] incx Increment for the elements of \b X. Must not be zero. 4742 * @param[in] Y Buffer object storing vector \b Y. 4743 * @param[in] offy Offset in number of elements for the first element in vector \b Y. 4744 * @param[in] incy Increment for the elements of \b Y. Must not be zero. 4745 * @param[out] A Buffer object storing matrix \b A. 4746 * @param[in] offa Offset in number of elements for the first element in matrix \b A. 4747 * @param[in] lda Leading dimension of matrix \b A. It cannot be less 4748 * than \b N. 4749 * @param[in] numCommandQueues Number of OpenCL command queues in which the 4750 * task is to be performed. 4751 * @param[in] commandQueues OpenCL command queues. 4752 * @param[in] numEventsInWaitList Number of events in the event wait list. 4753 * @param[in] eventWaitList Event wait list. 4754 * @param[in] events Event objects per each command queue that identify 4755 * a particular kernel execution instance. 4756 * 4757 * @return 4758 * - \b clblasSuccess on success; 4759 * - \b clblasNotInitialized if clblasSetup() was not called; 4760 * - \b clblasInvalidValue if invalid parameters are passed: 4761 * - either \b N is zero, or 4762 * - either \b incx or \b incy is zero, or 4763 * - the leading dimension is invalid; 4764 * - \b clblasInvalidMemObject if either \b A, \b X, or \b Y object is 4765 * Invalid, or an image object rather than the buffer one; 4766 * - \b clblasOutOfHostMemory if the library can't allocate memory for 4767 * internal structures; 4768 * - \b clblasInvalidCommandQueue if the passed command queue is invalid; 4769 * - \b clblasInvalidContext if a context a passed command queue belongs 4770 * to was released; 4771 * - \b clblasInvalidOperation if kernel compilation relating to a previous 4772 * call has not completed for any of the target devices; 4773 * - \b clblasCompilerNotAvailable if a compiler is not available; 4774 * - \b clblasBuildProgramFailure if there is a failure to build a program 4775 * executable. 4776 * 4777 * @ingroup HER2 4778 */ 4779 clblasStatus 4780 clblasCher2( 4781 clblasOrder order, 4782 clblasUplo uplo, 4783 size_t N, 4784 cl_float2 alpha, 4785 const cl_mem X, 4786 size_t offx, 4787 int incx, 4788 const cl_mem Y, 4789 size_t offy, 4790 int incy, 4791 cl_mem A, 4792 size_t offa, 4793 size_t lda, 4794 cl_uint numCommandQueues, 4795 cl_command_queue* commandQueues, 4796 cl_uint numEventsInWaitList, 4797 const cl_event* eventWaitList, 4798 cl_event* events); 4799 4800 4801 /** 4802 * @brief Hermitian rank 2 operation with a general triangular matrix and 4803 * double-compelx elements. 4804 * 4805 * Hermitian rank 2 operation: 4806 * - \f$ A \leftarrow \alpha X Y^H + \overline{ \alpha } Y X^H + A \f$ 4807 * 4808 * @param[in] order Row/column order. 4809 * @param[in] uplo The triangle in matrix being referenced. 4810 * @param[in] N Number of columns in matrix \b A. 4811 * @param[in] alpha The factor of matrix \b A. 4812 * @param[in] X Buffer object storing vector \b X. 4813 * @param[in] offx Offset in number of elements for the first element in vector \b X. 4814 * @param[in] incx Increment for the elements of \b X. Must not be zero. 4815 * @param[in] Y Buffer object storing vector \b Y. 4816 * @param[in] offy Offset in number of elements for the first element in vector \b Y. 4817 * @param[in] incy Increment for the elements of \b Y. Must not be zero. 4818 * @param[out] A Buffer object storing matrix \b A. 4819 * @param[in] offa Offset in number of elements for the first element in matrix \b A. 4820 * @param[in] lda Leading dimension of matrix \b A. It cannot be less 4821 * than \b N. 4822 * @param[in] numCommandQueues Number of OpenCL command queues in which the 4823 * task is to be performed. 4824 * @param[in] commandQueues OpenCL command queues. 4825 * @param[in] numEventsInWaitList Number of events in the event wait list. 4826 * @param[in] eventWaitList Event wait list. 4827 * @param[in] events Event objects per each command queue that identify 4828 * a particular kernel execution instance. 4829 * 4830 * @return 4831 * - \b clblasSuccess on success; 4832 * - \b clblasInvalidDevice if a target device does not support floating 4833 * point arithmetic with double precision; 4834 * - the same error codes as the clblasCher2() function otherwise. 4835 * 4836 * @ingroup HER2 4837 */ 4838 clblasStatus 4839 clblasZher2( 4840 clblasOrder order, 4841 clblasUplo uplo, 4842 size_t N, 4843 cl_double2 alpha, 4844 const cl_mem X, 4845 size_t offx, 4846 int incx, 4847 const cl_mem Y, 4848 size_t offy, 4849 int incy, 4850 cl_mem A, 4851 size_t offa, 4852 size_t lda, 4853 cl_uint numCommandQueues, 4854 cl_command_queue* commandQueues, 4855 cl_uint numEventsInWaitList, 4856 const cl_event* eventWaitList, 4857 cl_event* events); 4858 4859 /** 4860 * @example example_zher2.c 4861 * Example of how to use the @ref clblasZher2 function. 4862 */ 4863 4864 /*@}*/ 4865 4866 /** 4867 * @defgroup TPMV TPMV - Triangular packed matrix-vector multiply 4868 * @ingroup BLAS2 4869 */ 4870 /*@{*/ 4871 4872 /** 4873 * @brief Matrix-vector product with a packed triangular matrix and 4874 * float elements. 4875 * 4876 * Matrix-vector products: 4877 * - \f$ X \leftarrow A X \f$ 4878 * - \f$ X \leftarrow A^T X \f$ 4879 * 4880 * @param[in] order Row/column order. 4881 * @param[in] uplo The triangle in matrix being referenced. 4882 * @param[in] trans How matrix \b AP is to be transposed. 4883 * @param[in] diag Specify whether matrix \b AP is unit triangular. 4884 * @param[in] N Number of rows/columns in matrix \b A. 4885 * @param[in] AP Buffer object storing matrix \b AP in packed format. 4886 * @param[in] offa Offset in number of elements for first element in matrix \b AP. 4887 * @param[out] X Buffer object storing vector \b X. 4888 * @param[in] offx Offset in number of elements for first element in vector \b X. 4889 * @param[in] incx Increment for the elements of \b X. Must not be zero. 4890 * @param[in] scratchBuff Temporary cl_mem scratch buffer object which can hold a 4891 * minimum of (1 + (N-1)*abs(incx)) elements 4892 * @param[in] numCommandQueues Number of OpenCL command queues in which the 4893 * task is to be performed. 4894 * @param[in] commandQueues OpenCL command queues. 4895 * @param[in] numEventsInWaitList Number of events in the event wait list. 4896 * @param[in] eventWaitList Event wait list. 4897 * @param[in] events Event objects per each command queue that identify 4898 * a particular kernel execution instance. 4899 * 4900 * @return 4901 * - \b clblasSuccess on success; 4902 * - \b clblasNotInitialized if clblasSetup() was not called; 4903 * - \b clblasInvalidValue if invalid parameters are passed: 4904 * - either \b N or \b incx is zero 4905 * - \b clblasInvalidMemObject if either \b AP or \b X object is 4906 * Invalid, or an image object rather than the buffer one; 4907 * - \b clblasOutOfHostMemory if the library can't allocate memory for 4908 * internal structures; 4909 * - \b clblasInvalidCommandQueue if the passed command queue is invalid; 4910 * - \b clblasInvalidContext if a context a passed command queue belongs 4911 * to was released; 4912 * - \b clblasInvalidOperation if kernel compilation relating to a previous 4913 * call has not completed for any of the target devices; 4914 * - \b clblasCompilerNotAvailable if a compiler is not available; 4915 * - \b clblasBuildProgramFailure if there is a failure to build a program 4916 * executable. 4917 * 4918 * @ingroup TPMV 4919 */ 4920 clblasStatus 4921 clblasStpmv( 4922 clblasOrder order, 4923 clblasUplo uplo, 4924 clblasTranspose trans, 4925 clblasDiag diag, 4926 size_t N, 4927 const cl_mem AP, 4928 size_t offa, 4929 cl_mem X, 4930 size_t offx, 4931 int incx, 4932 cl_mem scratchBuff, 4933 cl_uint numCommandQueues, 4934 cl_command_queue *commandQueues, 4935 cl_uint numEventsInWaitList, 4936 const cl_event *eventWaitList, 4937 cl_event *events); 4938 4939 /** 4940 * @example example_stpmv.c 4941 * Example of how to use the @ref clblasStpmv function. 4942 */ 4943 4944 /** 4945 * @brief Matrix-vector product with a packed triangular matrix and 4946 * double elements. 4947 * 4948 * Matrix-vector products: 4949 * - \f$ X \leftarrow A X \f$ 4950 * - \f$ X \leftarrow A^T X \f$ 4951 * 4952 * @param[in] order Row/column order. 4953 * @param[in] uplo The triangle in matrix being referenced. 4954 * @param[in] trans How matrix \b AP is to be transposed. 4955 * @param[in] diag Specify whether matrix \b AP is unit triangular. 4956 * @param[in] N Number of rows/columns in matrix \b AP. 4957 * @param[in] AP Buffer object storing matrix \b AP in packed format. 4958 * @param[in] offa Offset in number of elements for first element in matrix \b AP. 4959 * @param[out] X Buffer object storing vector \b X. 4960 * @param[in] offx Offset in number of elements for first element in vector \b X. 4961 * @param[in] incx Increment for the elements of \b X. Must not be zero. 4962 * @param[in] scratchBuff Temporary cl_mem scratch buffer object which can hold a 4963 * minimum of (1 + (N-1)*abs(incx)) elements 4964 * @param[in] numCommandQueues Number of OpenCL command queues in which the 4965 * task is to be performed. 4966 * @param[in] commandQueues OpenCL command queues. 4967 * @param[in] numEventsInWaitList Number of events in the event wait list. 4968 * @param[in] eventWaitList Event wait list. 4969 * @param[in] events Event objects per each command queue that identify 4970 * a particular kernel execution instance. 4971 * 4972 * @return 4973 * - \b clblasSuccess on success; 4974 * - \b clblasInvalidDevice if a target device does not support floating 4975 * point arithmetic with double precision; 4976 * - the same error codes as the clblasStpmv() function otherwise. 4977 * 4978 * @ingroup TPMV 4979 */ 4980 clblasStatus 4981 clblasDtpmv( 4982 clblasOrder order, 4983 clblasUplo uplo, 4984 clblasTranspose trans, 4985 clblasDiag diag, 4986 size_t N, 4987 const cl_mem AP, 4988 size_t offa, 4989 cl_mem X, 4990 size_t offx, 4991 int incx, 4992 cl_mem scratchBuff, 4993 cl_uint numCommandQueues, 4994 cl_command_queue *commandQueues, 4995 cl_uint numEventsInWaitList, 4996 const cl_event *eventWaitList, 4997 cl_event *events); 4998 4999 /** 5000 * @brief Matrix-vector product with a packed triangular matrix and 5001 * float-complex elements. 5002 * 5003 * Matrix-vector products: 5004 * - \f$ X \leftarrow A X \f$ 5005 * - \f$ X \leftarrow A^T X \f$ 5006 * 5007 * @param[in] order Row/column order. 5008 * @param[in] uplo The triangle in matrix being referenced. 5009 * @param[in] trans How matrix \b AP is to be transposed. 5010 * @param[in] diag Specify whether matrix \b AP is unit triangular. 5011 * @param[in] N Number of rows/columns in matrix \b AP. 5012 * @param[in] AP Buffer object storing matrix \b AP in packed format. 5013 * @param[in] offa Offset in number of elements for first element in matrix \b AP. 5014 * @param[out] X Buffer object storing vector \b X. 5015 * @param[in] offx Offset in number of elements for first element in vector \b X. 5016 * @param[in] incx Increment for the elements of \b X. Must not be zero. 5017 * @param[in] scratchBuff Temporary cl_mem scratch buffer object which can hold a 5018 * minimum of (1 + (N-1)*abs(incx)) elements 5019 * @param[in] numCommandQueues Number of OpenCL command queues in which the 5020 * task is to be performed. 5021 * @param[in] commandQueues OpenCL command queues. 5022 * @param[in] numEventsInWaitList Number of events in the event wait list. 5023 * @param[in] eventWaitList Event wait list. 5024 * @param[in] events Event objects per each command queue that identify 5025 * a particular kernel execution instance. 5026 * 5027 * @return The same result as the clblasStpmv() function. 5028 * @ingroup TPMV 5029 */ 5030 clblasStatus 5031 clblasCtpmv( 5032 clblasOrder order, 5033 clblasUplo uplo, 5034 clblasTranspose trans, 5035 clblasDiag diag, 5036 size_t N, 5037 const cl_mem AP, 5038 size_t offa, 5039 cl_mem X, 5040 size_t offx, 5041 int incx, 5042 cl_mem scratchBuff, 5043 cl_uint numCommandQueues, 5044 cl_command_queue *commandQueues, 5045 cl_uint numEventsInWaitList, 5046 const cl_event *eventWaitList, 5047 cl_event *events); 5048 5049 /** 5050 * @brief Matrix-vector product with a packed triangular matrix and 5051 * double-complex elements. 5052 * 5053 * Matrix-vector products: 5054 * - \f$ X \leftarrow A X \f$ 5055 * - \f$ X \leftarrow A^T X \f$ 5056 * 5057 * @param[in] order Row/column order. 5058 * @param[in] uplo The triangle in matrix being referenced. 5059 * @param[in] trans How matrix \b AP is to be transposed. 5060 * @param[in] diag Specify whether matrix \b AP is unit triangular. 5061 * @param[in] N Number of rows/columns in matrix \b AP. 5062 * @param[in] AP Buffer object storing matrix \b AP in packed format. 5063 * @param[in] offa Offset in number of elements for first element in matrix \b AP. 5064 * @param[out] X Buffer object storing vector \b X. 5065 * @param[in] offx Offset in number of elements for first element in vector \b X. 5066 * @param[in] incx Increment for the elements of \b X. Must not be zero. 5067 * @param[in] scratchBuff Temporary cl_mem scratch buffer object which can hold a 5068 * minimum of (1 + (N-1)*abs(incx)) elements 5069 * @param[in] numCommandQueues Number of OpenCL command queues in which the 5070 * task is to be performed. 5071 * @param[in] commandQueues OpenCL command queues. 5072 * @param[in] numEventsInWaitList Number of events in the event wait list. 5073 * @param[in] eventWaitList Event wait list. 5074 * @param[in] events Event objects per each command queue that identify 5075 * a particular kernel execution instance. 5076 * 5077 * @return The same result as the clblasDtpmv() function. 5078 * @ingroup TPMV 5079 */ 5080 clblasStatus 5081 clblasZtpmv( 5082 clblasOrder order, 5083 clblasUplo uplo, 5084 clblasTranspose trans, 5085 clblasDiag diag, 5086 size_t N, 5087 const cl_mem AP, 5088 size_t offa, 5089 cl_mem X, 5090 size_t offx, 5091 int incx, 5092 cl_mem scratchBuff, 5093 cl_uint numCommandQueues, 5094 cl_command_queue *commandQueues, 5095 cl_uint numEventsInWaitList, 5096 const cl_event *eventWaitList, 5097 cl_event *events); 5098 /*@}*/ 5099 5100 5101 5102 /** 5103 * @defgroup TPSV TPSV - Triangular packed matrix vector solve 5104 * @ingroup BLAS2 5105 */ 5106 /*@{*/ 5107 5108 /** 5109 * @brief solving triangular packed matrix problems with float elements. 5110 * 5111 * Matrix-vector products: 5112 * - \f$ A X \leftarrow X \f$ 5113 * - \f$ A^T X \leftarrow X \f$ 5114 * 5115 * @param[in] order Row/column order. 5116 * @param[in] uplo The triangle in matrix being referenced. 5117 * @param[in] trans How matrix \b A is to be transposed. 5118 * @param[in] diag Specify whether matrix \b A is unit triangular. 5119 * @param[in] N Number of rows/columns in matrix \b A. 5120 * @param[in] A Buffer object storing matrix in packed format.\b A. 5121 * @param[in] offa Offset in number of elements for first element in matrix \b A. 5122 * @param[out] X Buffer object storing vector \b X. 5123 * @param[in] offx Offset in number of elements for first element in vector \b X. 5124 * @param[in] incx Increment for the elements of \b X. Must not be zero. 5125 * @param[in] numCommandQueues Number of OpenCL command queues in which the 5126 * task is to be performed. 5127 * @param[in] commandQueues OpenCL command queues. 5128 * @param[in] numEventsInWaitList Number of events in the event wait list. 5129 * @param[in] eventWaitList Event wait list. 5130 * @param[in] events Event objects per each command queue that identify 5131 * a particular kernel execution instance. 5132 * 5133 * @return 5134 * - \b clblasSuccess on success; 5135 * - \b clblasNotInitialized if clblasSetup() was not called; 5136 * - \b clblasInvalidValue if invalid parameters are passed: 5137 * - either \b N or \b incx is zero, or 5138 * - the leading dimension is invalid; 5139 * - \b clblasInvalidMemObject if either \b A or \b X object is 5140 * Invalid, or an image object rather than the buffer one; 5141 * - \b clblasOutOfHostMemory if the library can't allocate memory for 5142 * internal structures; 5143 * - \b clblasInvalidCommandQueue if the passed command queue is invalid; 5144 * - \b clblasInvalidContext if a context a passed command queue belongs 5145 * to was released; 5146 * - \b clblasInvalidOperation if kernel compilation relating to a previous 5147 * call has not completed for any of the target devices; 5148 * - \b clblasCompilerNotAvailable if a compiler is not available; 5149 * - \b clblasBuildProgramFailure if there is a failure to build a program 5150 * executable. 5151 * 5152 * @ingroup TPSV 5153 */ 5154 5155 clblasStatus 5156 clblasStpsv( 5157 clblasOrder order, 5158 clblasUplo uplo, 5159 clblasTranspose trans, 5160 clblasDiag diag, 5161 size_t N, 5162 const cl_mem A, 5163 size_t offa, 5164 cl_mem X, 5165 size_t offx, 5166 int incx, 5167 cl_uint numCommandQueues, 5168 cl_command_queue *commandQueues, 5169 cl_uint numEventsInWaitList, 5170 const cl_event *eventWaitList, 5171 cl_event *events); 5172 5173 /** 5174 * @example example_stpsv.c 5175 * Example of how to use the @ref clblasStpsv function. 5176 */ 5177 5178 /** 5179 * @brief solving triangular packed matrix problems with double elements. 5180 * 5181 * Matrix-vector products: 5182 * - \f$ A X \leftarrow X \f$ 5183 * - \f$ A^T X \leftarrow X \f$ 5184 * 5185 * @param[in] order Row/column order. 5186 * @param[in] uplo The triangle in matrix being referenced. 5187 * @param[in] trans How matrix \b A is to be transposed. 5188 * @param[in] diag Specify whether matrix \b A is unit triangular. 5189 * @param[in] N Number of rows/columns in matrix \b A. 5190 * @param[in] A Buffer object storing matrix in packed format.\b A. 5191 * @param[in] offa Offset in number of elements for first element in matrix \b A. 5192 * @param[out] X Buffer object storing vector \b X. 5193 * @param[in] offx Offset in number of elements for first element in vector \b X. 5194 * @param[in] incx Increment for the elements of \b X. Must not be zero. 5195 * @param[in] numCommandQueues Number of OpenCL command queues in which the 5196 * task is to be performed. 5197 * @param[in] commandQueues OpenCL command queues. 5198 * @param[in] numEventsInWaitList Number of events in the event wait list. 5199 * @param[in] eventWaitList Event wait list. 5200 * @param[in] events Event objects per each command queue that identify 5201 * a particular kernel execution instance. 5202 * 5203 * @return 5204 * - \b clblasSuccess on success; 5205 * - \b clblasNotInitialized if clblasSetup() was not called; 5206 * - \b clblasInvalidValue if invalid parameters are passed: 5207 * - either \b N or \b incx is zero, or 5208 * - the leading dimension is invalid; 5209 * - \b clblasInvalidMemObject if either \b A or \b X object is 5210 * Invalid, or an image object rather than the buffer one; 5211 * - \b clblasOutOfHostMemory if the library can't allocate memory for 5212 * internal structures; 5213 * - \b clblasInvalidCommandQueue if the passed command queue is invalid; 5214 * - \b clblasInvalidContext if a context a passed command queue belongs 5215 * to was released; 5216 * - \b clblasInvalidOperation if kernel compilation relating to a previous 5217 * call has not completed for any of the target devices; 5218 * - \b clblasCompilerNotAvailable if a compiler is not available; 5219 * - \b clblasBuildProgramFailure if there is a failure to build a program 5220 * executable. 5221 * 5222 * @ingroup TPSV 5223 */ 5224 5225 clblasStatus 5226 clblasDtpsv( 5227 clblasOrder order, 5228 clblasUplo uplo, 5229 clblasTranspose trans, 5230 clblasDiag diag, 5231 size_t N, 5232 const cl_mem A, 5233 size_t offa, 5234 cl_mem X, 5235 size_t offx, 5236 int incx, 5237 cl_uint numCommandQueues, 5238 cl_command_queue *commandQueues, 5239 cl_uint numEventsInWaitList, 5240 const cl_event *eventWaitList, 5241 cl_event *events); 5242 5243 /** 5244 * @brief solving triangular packed matrix problems with float complex elements. 5245 * 5246 * Matrix-vector products: 5247 * - \f$ A X \leftarrow X \f$ 5248 * - \f$ A^T X \leftarrow X \f$ 5249 * 5250 * @param[in] order Row/column order. 5251 * @param[in] uplo The triangle in matrix being referenced. 5252 * @param[in] trans How matrix \b A is to be transposed. 5253 * @param[in] diag Specify whether matrix \b A is unit triangular. 5254 * @param[in] N Number of rows/columns in matrix \b A. 5255 * @param[in] A Buffer object storing matrix in packed format.\b A. 5256 * @param[in] offa Offset in number of elements for first element in matrix \b A. 5257 * @param[out] X Buffer object storing vector \b X. 5258 * @param[in] offx Offset in number of elements for first element in vector \b X. 5259 * @param[in] incx Increment for the elements of \b X. Must not be zero. 5260 * @param[in] numCommandQueues Number of OpenCL command queues in which the 5261 * task is to be performed. 5262 * @param[in] commandQueues OpenCL command queues. 5263 * @param[in] numEventsInWaitList Number of events in the event wait list. 5264 * @param[in] eventWaitList Event wait list. 5265 * @param[in] events Event objects per each command queue that identify 5266 * a particular kernel execution instance. 5267 * 5268 * @return 5269 * - \b clblasSuccess on success; 5270 * - \b clblasNotInitialized if clblasSetup() was not called; 5271 * - \b clblasInvalidValue if invalid parameters are passed: 5272 * - either \b N or \b incx is zero, or 5273 * - the leading dimension is invalid; 5274 * - \b clblasInvalidMemObject if either \b A or \b X object is 5275 * Invalid, or an image object rather than the buffer one; 5276 * - \b clblasOutOfHostMemory if the library can't allocate memory for 5277 * internal structures; 5278 * - \b clblasInvalidCommandQueue if the passed command queue is invalid; 5279 * - \b clblasInvalidContext if a context a passed command queue belongs 5280 * to was released; 5281 * - \b clblasInvalidOperation if kernel compilation relating to a previous 5282 * call has not completed for any of the target devices; 5283 * - \b clblasCompilerNotAvailable if a compiler is not available; 5284 * - \b clblasBuildProgramFailure if there is a failure to build a program 5285 * executable. 5286 * 5287 * @ingroup TPSV 5288 */ 5289 5290 clblasStatus 5291 clblasCtpsv( 5292 clblasOrder order, 5293 clblasUplo uplo, 5294 clblasTranspose trans, 5295 clblasDiag diag, 5296 size_t N, 5297 const cl_mem A, 5298 size_t offa, 5299 cl_mem X, 5300 size_t offx, 5301 int incx, 5302 cl_uint numCommandQueues, 5303 cl_command_queue *commandQueues, 5304 cl_uint numEventsInWaitList, 5305 const cl_event *eventWaitList, 5306 cl_event *events); 5307 5308 /** 5309 * @brief solving triangular packed matrix problems with double complex elements. 5310 * 5311 * Matrix-vector products: 5312 * - \f$ A X \leftarrow X \f$ 5313 * - \f$ A^T X \leftarrow X \f$ 5314 * 5315 * @param[in] order Row/column order. 5316 * @param[in] uplo The triangle in matrix being referenced. 5317 * @param[in] trans How matrix \b A is to be transposed. 5318 * @param[in] diag Specify whether matrix \b A is unit triangular. 5319 * @param[in] N Number of rows/columns in matrix \b A. 5320 * @param[in] A Buffer object storing matrix in packed format.\b A. 5321 * @param[in] offa Offset in number of elements for first element in matrix \b A. 5322 * @param[out] X Buffer object storing vector \b X. 5323 * @param[in] offx Offset in number of elements for first element in vector \b X. 5324 * @param[in] incx Increment for the elements of \b X. Must not be zero. 5325 * @param[in] numCommandQueues Number of OpenCL command queues in which the 5326 * task is to be performed. 5327 * @param[in] commandQueues OpenCL command queues. 5328 * @param[in] numEventsInWaitList Number of events in the event wait list. 5329 * @param[in] eventWaitList Event wait list. 5330 * @param[in] events Event objects per each command queue that identify 5331 * a particular kernel execution instance. 5332 * 5333 * @return 5334 * - \b clblasSuccess on success; 5335 * - \b clblasNotInitialized if clblasSetup() was not called; 5336 * - \b clblasInvalidValue if invalid parameters are passed: 5337 * - either \b N or \b incx is zero, or 5338 * - the leading dimension is invalid; 5339 * - \b clblasInvalidMemObject if either \b A or \b X object is 5340 * Invalid, or an image object rather than the buffer one; 5341 * - \b clblasOutOfHostMemory if the library can't allocate memory for 5342 * internal structures; 5343 * - \b clblasInvalidCommandQueue if the passed command queue is invalid; 5344 * - \b clblasInvalidContext if a context a passed command queue belongs 5345 * to was released; 5346 * - \b clblasInvalidOperation if kernel compilation relating to a previous 5347 * call has not completed for any of the target devices; 5348 * - \b clblasCompilerNotAvailable if a compiler is not available; 5349 * - \b clblasBuildProgramFailure if there is a failure to build a program 5350 * executable. 5351 * 5352 * @ingroup TPSV 5353 */ 5354 5355 clblasStatus 5356 clblasZtpsv( 5357 clblasOrder order, 5358 clblasUplo uplo, 5359 clblasTranspose trans, 5360 clblasDiag diag, 5361 size_t N, 5362 const cl_mem A, 5363 size_t offa, 5364 cl_mem X, 5365 size_t offx, 5366 int incx, 5367 cl_uint numCommandQueues, 5368 cl_command_queue *commandQueues, 5369 cl_uint numEventsInWaitList, 5370 const cl_event *eventWaitList, 5371 cl_event *events); 5372 /*@}*/ 5373 5374 5375 /** 5376 * @defgroup SPMV SPMV - Symmetric packed matrix vector multiply 5377 * @ingroup BLAS2 5378 */ 5379 5380 /*@{*/ 5381 5382 /** 5383 * @brief Matrix-vector product with a symmetric packed-matrix and float elements. 5384 * 5385 * Matrix-vector products: 5386 * - \f$ Y \leftarrow \alpha A X + \beta Y \f$ 5387 * 5388 * @param[in] order Row/columns order. 5389 * @param[in] uplo The triangle in matrix being referenced. 5390 * @param[in] N Number of rows and columns in matrix \b AP. 5391 * @param[in] alpha The factor of matrix \b AP. 5392 * @param[in] AP Buffer object storing matrix \b AP. 5393 * @param[in] offa Offset in number of elements for first element in matrix \b AP. 5394 * @param[in] X Buffer object storing vector \b X. 5395 * @param[in] offx Offset of first element of vector \b X in buffer object. 5396 * Counted in elements. 5397 * @param[in] incx Increment for the elements of vector \b X. It cannot be zero. 5398 * @param[in] beta The factor of vector \b Y. 5399 * @param[out] Y Buffer object storing vector \b Y. 5400 * @param[in] offy Offset of first element of vector \b Y in buffer object. 5401 * Counted in elements. 5402 * @param[in] incy Increment for the elements of vector \b Y. It cannot be zero. 5403 * @param[in] numCommandQueues Number of OpenCL command queues in which the 5404 * task is to be performed. 5405 * @param[in] commandQueues OpenCL command queues. 5406 * @param[in] numEventsInWaitList Number of events in the event wait list. 5407 * @param[in] eventWaitList Event wait list. 5408 * @param[in] events Event objects per each command queue that identify 5409 * a particular kernel execution instance. 5410 * 5411 * @return 5412 * - \b clblasSuccess on success; 5413 * - \b clblasNotInitialized if clblasSetup() was not called; 5414 * - \b clblasInvalidValue if invalid parameters are passed: 5415 * - \b N is zero, or 5416 * - either \b incx or \b incy is zero, or 5417 * - the matrix sizes or the vector sizes along with the increments lead to 5418 * accessing outsize of any of the buffers; 5419 * - \b clblasInvalidMemObject if either \b AP, \b X, or \b Y object is 5420 * invalid, or an image object rather than the buffer one; 5421 * - \b clblasOutOfHostMemory if the library can't allocate memory for 5422 * internal structures; 5423 * - \b clblasInvalidCommandQueue if the passed command queue is invalid; 5424 * - \b clblasInvalidContext if a context a passed command queue belongs to 5425 * was released; 5426 * - \b clblasInvalidOperation if kernel compilation relating to a previous 5427 * call has not completed for any of the target devices; 5428 * - \b clblasCompilerNotAvailable if a compiler is not available; 5429 * - \b clblasBuildProgramFailure if there is a failure to build a program 5430 * executable. 5431 * 5432 * @ingroup SPMV 5433 */ 5434 clblasStatus 5435 clblasSspmv( 5436 clblasOrder order, 5437 clblasUplo uplo, 5438 size_t N, 5439 cl_float alpha, 5440 const cl_mem AP, 5441 size_t offa, 5442 const cl_mem X, 5443 size_t offx, 5444 int incx, 5445 cl_float beta, 5446 cl_mem Y, 5447 size_t offy, 5448 int incy, 5449 cl_uint numCommandQueues, 5450 cl_command_queue *commandQueues, 5451 cl_uint numEventsInWaitList, 5452 const cl_event *eventWaitList, 5453 cl_event *events); 5454 5455 /** 5456 * @example example_sspmv.c 5457 * This is an example of how to use the @ref clblasSspmv function. 5458 */ 5459 5460 /** 5461 * @brief Matrix-vector product with a symmetric packed-matrix and double elements. 5462 * 5463 * Matrix-vector products: 5464 * - \f$ Y \leftarrow \alpha A X + \beta Y \f$ 5465 * 5466 * @param[in] order Row/columns order. 5467 * @param[in] uplo The triangle in matrix being referenced. 5468 * @param[in] N Number of rows and columns in matrix \b AP. 5469 * @param[in] alpha The factor of matrix \b AP. 5470 * @param[in] AP Buffer object storing matrix \b AP. 5471 * @param[in] offa Offset in number of elements for first element in matrix \b AP. 5472 * @param[in] X Buffer object storing vector \b X. 5473 * @param[in] offx Offset of first element of vector \b X in buffer object. 5474 * Counted in elements. 5475 * @param[in] incx Increment for the elements of vector \b X. It cannot be zero. 5476 * @param[in] beta The factor of vector \b Y. 5477 * @param[out] Y Buffer object storing vector \b Y. 5478 * @param[in] offy Offset of first element of vector \b Y in buffer object. 5479 * Counted in elements. 5480 * @param[in] incy Increment for the elements of vector \b Y. It cannot be zero. 5481 * @param[in] numCommandQueues Number of OpenCL command queues in which the 5482 * task is to be performed. 5483 * @param[in] commandQueues OpenCL command queues. 5484 * @param[in] numEventsInWaitList Number of events in the event wait list. 5485 * @param[in] eventWaitList Event wait list. 5486 * @param[in] events Event objects per each command queue that identify 5487 * a particular kernel execution instance. 5488 * 5489 * @return 5490 * - \b clblasSuccess on success; 5491 * - \b clblasInvalidDevice if a target device does not support floating 5492 * point arithmetic with double precision; 5493 * - the same error codes as the clblasSspmv() function otherwise. 5494 * 5495 * @ingroup SPMV 5496 */ 5497 clblasStatus 5498 clblasDspmv( 5499 clblasOrder order, 5500 clblasUplo uplo, 5501 size_t N, 5502 cl_double alpha, 5503 const cl_mem AP, 5504 size_t offa, 5505 const cl_mem X, 5506 size_t offx, 5507 int incx, 5508 cl_double beta, 5509 cl_mem Y, 5510 size_t offy, 5511 int incy, 5512 cl_uint numCommandQueues, 5513 cl_command_queue *commandQueues, 5514 cl_uint numEventsInWaitList, 5515 const cl_event *eventWaitList, 5516 cl_event *events); 5517 /*@}*/ 5518 5519 5520 5521 /** 5522 * @defgroup HPMV HPMV - Hermitian packed matrix-vector multiplication 5523 * @ingroup BLAS2 5524 */ 5525 5526 /*@{*/ 5527 5528 /** 5529 * @brief Matrix-vector product with a packed hermitian matrix and float-complex elements. 5530 * 5531 * Matrix-vector products: 5532 * - \f$ Y \leftarrow \alpha A X + \beta Y \f$ 5533 * 5534 * @param[in] order Row/columns order. 5535 * @param[in] uplo The triangle in matrix being referenced. 5536 * @param[in] N Number of rows and columns in matrix \b AP. 5537 * @param[in] alpha The factor of matrix \b AP. 5538 * @param[in] AP Buffer object storing packed matrix \b AP. 5539 * @param[in] offa Offset in number of elements for first element in matrix \b AP. 5540 * @param[in] X Buffer object storing vector \b X. 5541 * @param[in] offx Offset of first element of vector \b X in buffer object. 5542 * Counted in elements. 5543 * @param[in] incx Increment for the elements of vector \b X. It cannot be zero. 5544 * @param[in] beta The factor of vector \b Y. 5545 * @param[out] Y Buffer object storing vector \b Y. 5546 * @param[in] offy Offset of first element of vector \b Y in buffer object. 5547 * Counted in elements. 5548 * @param[in] incy Increment for the elements of vector \b Y. It cannot be zero. 5549 * @param[in] numCommandQueues Number of OpenCL command queues in which the 5550 * task is to be performed. 5551 * @param[in] commandQueues OpenCL command queues. 5552 * @param[in] numEventsInWaitList Number of events in the event wait list. 5553 * @param[in] eventWaitList Event wait list. 5554 * @param[in] events Event objects per each command queue that identify 5555 * a particular kernel execution instance. 5556 * 5557 * @return 5558 * - \b clblasSuccess on success; 5559 * - \b clblasNotInitialized if clblasSetup() was not called; 5560 * - \b clblasInvalidValue if invalid parameters are passed: 5561 * - \b N is zero, or 5562 * - either \b incx or \b incy is zero, or 5563 * - the matrix sizes or the vector sizes along with the increments lead to 5564 * accessing outsize of any of the buffers; 5565 * - \b clblasInvalidMemObject if either \b AP, \b X, or \b Y object is 5566 * invalid, or an image object rather than the buffer one; 5567 * - \b clblasOutOfHostMemory if the library can't allocate memory for 5568 * internal structures; 5569 * - \b clblasInvalidCommandQueue if the passed command queue is invalid; 5570 * - \b clblasInvalidContext if a context a passed command queue belongs to 5571 * was released; 5572 * - \b clblasInvalidOperation if kernel compilation relating to a previous 5573 * call has not completed for any of the target devices; 5574 * - \b clblasCompilerNotAvailable if a compiler is not available; 5575 * - \b clblasBuildProgramFailure if there is a failure to build a program 5576 * executable. 5577 * 5578 * @ingroup HPMV 5579 */ 5580 clblasStatus 5581 clblasChpmv( 5582 clblasOrder order, 5583 clblasUplo uplo, 5584 size_t N, 5585 cl_float2 alpha, 5586 const cl_mem AP, 5587 size_t offa, 5588 const cl_mem X, 5589 size_t offx, 5590 int incx, 5591 cl_float2 beta, 5592 cl_mem Y, 5593 size_t offy, 5594 int incy, 5595 cl_uint numCommandQueues, 5596 cl_command_queue *commandQueues, 5597 cl_uint numEventsInWaitList, 5598 const cl_event *eventWaitList, 5599 cl_event *events); 5600 5601 /** 5602 * @example example_chpmv.c 5603 * This is an example of how to use the @ref clblasChpmv function. 5604 */ 5605 5606 5607 /** 5608 * @brief Matrix-vector product with a packed hermitian matrix and double-complex elements. 5609 * 5610 * Matrix-vector products: 5611 * - \f$ Y \leftarrow \alpha A X + \beta Y \f$ 5612 * 5613 * @param[in] order Row/columns order. 5614 * @param[in] uplo The triangle in matrix being referenced. 5615 * @param[in] N Number of rows and columns in matrix \b AP. 5616 * @param[in] alpha The factor of matrix \b AP. 5617 * @param[in] AP Buffer object storing packed matrix \b AP. 5618 * @param[in] offa Offset in number of elements for first element in matrix \b AP. 5619 * @param[in] X Buffer object storing vector \b X. 5620 * @param[in] offx Offset of first element of vector \b X in buffer object. 5621 * Counted in elements. 5622 * @param[in] incx Increment for the elements of vector \b X. It cannot be zero. 5623 * @param[in] beta The factor of vector \b Y. 5624 * @param[out] Y Buffer object storing vector \b Y. 5625 * @param[in] offy Offset of first element of vector \b Y in buffer object. 5626 * Counted in elements. 5627 * @param[in] incy Increment for the elements of vector \b Y. It cannot be zero. 5628 * @param[in] numCommandQueues Number of OpenCL command queues in which the 5629 * task is to be performed. 5630 * @param[in] commandQueues OpenCL command queues. 5631 * @param[in] numEventsInWaitList Number of events in the event wait list. 5632 * @param[in] eventWaitList Event wait list. 5633 * @param[in] events Event objects per each command queue that identify 5634 * a particular kernel execution instance. 5635 * 5636 * @return 5637 * - \b clblasSuccess on success; 5638 * - \b clblasInvalidDevice if a target device does not support floating 5639 * point arithmetic with double precision; 5640 * - the same error codes as the clblasChpmv() function otherwise. 5641 * 5642 * @ingroup HPMV 5643 */ 5644 clblasStatus 5645 clblasZhpmv( 5646 clblasOrder order, 5647 clblasUplo uplo, 5648 size_t N, 5649 cl_double2 alpha, 5650 const cl_mem AP, 5651 size_t offa, 5652 const cl_mem X, 5653 size_t offx, 5654 int incx, 5655 cl_double2 beta, 5656 cl_mem Y, 5657 size_t offy, 5658 int incy, 5659 cl_uint numCommandQueues, 5660 cl_command_queue *commandQueues, 5661 cl_uint numEventsInWaitList, 5662 const cl_event *eventWaitList, 5663 cl_event *events); 5664 /*@}*/ 5665 5666 5667 /** 5668 * @defgroup SPR SPR - Symmetric packed matrix rank 1 update 5669 * 5670 * The Level 2 Basic Linear Algebra Subprograms are functions that perform 5671 * symmetric rank 1 update operations on packed matrix 5672 * @ingroup BLAS2 5673 */ 5674 5675 /*@{*/ 5676 /** 5677 * @brief Symmetric rank 1 operation with a general triangular packed-matrix and 5678 * float elements. 5679 * 5680 * Symmetric rank 1 operation: 5681 * - \f$ A \leftarrow \alpha X X^T + A \f$ 5682 * 5683 * @param[in] order Row/column order. 5684 * @param[in] uplo The triangle in matrix being referenced. 5685 * @param[in] N Number of columns in matrix \b A. 5686 * @param[in] alpha The factor of matrix \b A. 5687 * @param[in] X Buffer object storing vector \b X. 5688 * @param[in] offx Offset of first element of vector \b X in buffer object. 5689 * @param[in] incx Increment for the elements of \b X. Must not be zero. 5690 * @param[out] AP Buffer object storing packed-matrix \b AP. 5691 * @param[in] offa Offset of first element of matrix \b AP in buffer object. 5692 * @param[in] numCommandQueues Number of OpenCL command queues in which the 5693 * task is to be performed. 5694 * @param[in] commandQueues OpenCL command queues. 5695 * @param[in] numEventsInWaitList Number of events in the event wait list. 5696 * @param[in] eventWaitList Event wait list. 5697 * @param[in] events Event objects per each command queue that identify 5698 * a particular kernel execution instance. 5699 * 5700 * @return 5701 * - \b clblasSuccess on success; 5702 * - \b clblasNotInitialized if clblasSetup() was not called; 5703 * - \b clblasInvalidValue if invalid parameters are passed: 5704 * - \b N is zero, or 5705 * - either \b incx is zero 5706 * - \b clblasInvalidMemObject if either \b AP, \b X object is 5707 * Invalid, or an image object rather than the buffer one; 5708 * - \b clblasOutOfHostMemory if the library can't allocate memory for 5709 * internal structures; 5710 * - \b clblasInvalidCommandQueue if the passed command queue is invalid; 5711 * - \b clblasInvalidContext if a context a passed command queue belongs 5712 * to was released; 5713 * - \b clblasInvalidOperation if kernel compilation relating to a previous 5714 * call has not completed for any of the target devices; 5715 * - \b clblasCompilerNotAvailable if a compiler is not available; 5716 * - \b clblasBuildProgramFailure if there is a failure to build a program 5717 * executable. 5718 * 5719 * @ingroup SPR 5720 */ 5721 clblasStatus 5722 clblasSspr( 5723 clblasOrder order, 5724 clblasUplo uplo, 5725 size_t N, 5726 cl_float alpha, 5727 const cl_mem X, 5728 size_t offx, 5729 int incx, 5730 cl_mem AP, 5731 size_t offa, 5732 cl_uint numCommandQueues, 5733 cl_command_queue* commandQueues, 5734 cl_uint numEventsInWaitList, 5735 const cl_event* eventWaitList, 5736 cl_event* events); 5737 /** 5738 * @example example_sspr.c 5739 * Example of how to use the @ref clblasSspr function. 5740 */ 5741 5742 /** 5743 * @brief Symmetric rank 1 operation with a general triangular packed-matrix and 5744 * double elements. 5745 * 5746 * Symmetric rank 1 operation: 5747 * - \f$ A \leftarrow \alpha X X^T + A \f$ 5748 * 5749 * @param[in] order Row/column order. 5750 * @param[in] uplo The triangle in matrix being referenced. 5751 * @param[in] N Number of columns in matrix \b A. 5752 * @param[in] alpha The factor of matrix \b A. 5753 * @param[in] X Buffer object storing vector \b X. 5754 * @param[in] offx Offset of first element of vector \b X in buffer object. 5755 * @param[in] incx Increment for the elements of \b X. Must not be zero. 5756 * @param[out] AP Buffer object storing packed-matrix \b AP. 5757 * @param[in] offa Offset of first element of matrix \b AP in buffer object. 5758 * @param[in] numCommandQueues Number of OpenCL command queues in which the 5759 * task is to be performed. 5760 * @param[in] commandQueues OpenCL command queues. 5761 * @param[in] numEventsInWaitList Number of events in the event wait list. 5762 * @param[in] eventWaitList Event wait list. 5763 * @param[in] events Event objects per each command queue that identify 5764 * a particular kernel execution instance. 5765 * 5766 * @return 5767 * - \b clblasSuccess on success; 5768 * - \b clblasInvalidDevice if a target device does not support floating 5769 * point arithmetic with double precision; 5770 * - the same error codes as the clblasSspr() function otherwise. 5771 * 5772 * @ingroup SPR 5773 */ 5774 5775 clblasStatus 5776 clblasDspr( 5777 clblasOrder order, 5778 clblasUplo uplo, 5779 size_t N, 5780 cl_double alpha, 5781 const cl_mem X, 5782 size_t offx, 5783 int incx, 5784 cl_mem AP, 5785 size_t offa, 5786 cl_uint numCommandQueues, 5787 cl_command_queue* commandQueues, 5788 cl_uint numEventsInWaitList, 5789 const cl_event* eventWaitList, 5790 cl_event* events); 5791 /*@}*/ 5792 5793 /** 5794 * @defgroup HPR HPR - Hermitian packed matrix rank 1 update 5795 * 5796 * The Level 2 Basic Linear Algebra Subprogram functions that perform 5797 * hermitian rank 1 operations on packed matrix 5798 * @ingroup BLAS2 5799 */ 5800 5801 /*@{*/ 5802 /** 5803 * @brief hermitian rank 1 operation with a general triangular packed-matrix and 5804 * float-complex elements. 5805 * 5806 * hermitian rank 1 operation: 5807 * - \f$ A \leftarrow \alpha X X^H + A \f$ 5808 * 5809 * @param[in] order Row/column order. 5810 * @param[in] uplo The triangle in matrix being referenced. 5811 * @param[in] N Number of columns in matrix \b A. 5812 * @param[in] alpha The factor of matrix \b A (a scalar float value) 5813 * @param[in] X Buffer object storing vector \b X. 5814 * @param[in] offx Offset in number of elements for the first element in vector \b X. 5815 * @param[in] incx Increment for the elements of \b X. Must not be zero. 5816 * @param[out] AP Buffer object storing matrix \b AP. 5817 * @param[in] offa Offset in number of elements for the first element in matrix \b AP. 5818 * @param[in] numCommandQueues Number of OpenCL command queues in which the 5819 * task is to be performed. 5820 * @param[in] commandQueues OpenCL command queues. 5821 * @param[in] numEventsInWaitList Number of events in the event wait list. 5822 * @param[in] eventWaitList Event wait list. 5823 * @param[in] events Event objects per each command queue that identify 5824 * a particular kernel execution instance. 5825 * 5826 * @return 5827 * - \b clblasSuccess on success; 5828 * - \b clblasNotInitialized if clblasSetup() was not called; 5829 * - \b clblasInvalidValue if invalid parameters are passed: 5830 * - \b N is zero, or 5831 * - either \b incx is zero 5832 * - \b clblasInvalidMemObject if either \b AP, \b X object is 5833 * Invalid, or an image object rather than the buffer one; 5834 * - \b clblasOutOfHostMemory if the library can't allocate memory for 5835 * internal structures; 5836 * - \b clblasInvalidCommandQueue if the passed command queue is invalid; 5837 * - \b clblasInvalidContext if a context a passed command queue belongs 5838 * to was released; 5839 * - \b clblasInvalidOperation if kernel compilation relating to a previous 5840 * call has not completed for any of the target devices; 5841 * - \b clblasCompilerNotAvailable if a compiler is not available; 5842 * - \b clblasBuildProgramFailure if there is a failure to build a program 5843 * executable. 5844 * 5845 * @ingroup HPR 5846 */ 5847 clblasStatus 5848 clblasChpr( 5849 clblasOrder order, 5850 clblasUplo uplo, 5851 size_t N, 5852 cl_float alpha, 5853 const cl_mem X, 5854 size_t offx, 5855 int incx, 5856 cl_mem AP, 5857 size_t offa, 5858 cl_uint numCommandQueues, 5859 cl_command_queue* commandQueues, 5860 cl_uint numEventsInWaitList, 5861 const cl_event* eventWaitList, 5862 cl_event* events); 5863 /** 5864 * @example example_chpr.c 5865 * Example of how to use the @ref clblasChpr function. 5866 */ 5867 5868 /** 5869 * @brief hermitian rank 1 operation with a general triangular packed-matrix and 5870 * double-complex elements. 5871 * 5872 * hermitian rank 1 operation: 5873 * - \f$ A \leftarrow \alpha X X^H + A \f$ 5874 * 5875 * @param[in] order Row/column order. 5876 * @param[in] uplo The triangle in matrix being referenced. 5877 * @param[in] N Number of columns in matrix \b A. 5878 * @param[in] alpha The factor of matrix \b A (a scalar float value) 5879 * @param[in] X Buffer object storing vector \b X. 5880 * @param[in] offx Offset in number of elements for the first element in vector \b X. 5881 * @param[in] incx Increment for the elements of \b X. Must not be zero. 5882 * @param[out] AP Buffer object storing matrix \b AP. 5883 * @param[in] offa Offset in number of elements for the first element in matrix \b AP. 5884 * @param[in] numCommandQueues Number of OpenCL command queues in which the 5885 * task is to be performed. 5886 * @param[in] commandQueues OpenCL command queues. 5887 * @param[in] numEventsInWaitList Number of events in the event wait list. 5888 * @param[in] eventWaitList Event wait list. 5889 * @param[in] events Event objects per each command queue that identify 5890 * a particular kernel execution instance. 5891 * 5892 * @return 5893 * - \b clblasSuccess on success; 5894 * - \b clblasInvalidDevice if a target device does not support floating 5895 * point arithmetic with double precision; 5896 * - the same error codes as the clblasChpr() function otherwise. 5897 * 5898 * @ingroup HPR 5899 */ 5900 clblasStatus 5901 clblasZhpr( 5902 clblasOrder order, 5903 clblasUplo uplo, 5904 size_t N, 5905 cl_double alpha, 5906 const cl_mem X, 5907 size_t offx, 5908 int incx, 5909 cl_mem AP, 5910 size_t offa, 5911 cl_uint numCommandQueues, 5912 cl_command_queue* commandQueues, 5913 cl_uint numEventsInWaitList, 5914 const cl_event* eventWaitList, 5915 cl_event* events); 5916 /*@}*/ 5917 5918 /** 5919 * @defgroup SPR2 SPR2 - Symmetric packed matrix rank 2 update 5920 * 5921 * The Level 2 Basic Linear Algebra Subprograms are functions that perform 5922 * symmetric rank 2 update operations on packed matrices 5923 * @ingroup BLAS2 5924 */ 5925 5926 /*@{*/ 5927 /** 5928 * @brief Symmetric rank 2 operation with a general triangular packed-matrix and 5929 * float elements. 5930 * 5931 * Symmetric rank 2 operation: 5932 * - \f$ A \leftarrow \alpha X Y^T + \alpha Y X^T + A \f$ 5933 * 5934 * @param[in] order Row/column order. 5935 * @param[in] uplo The triangle in matrix being referenced. 5936 * @param[in] N Number of columns in matrix \b A. 5937 * @param[in] alpha The factor of matrix \b A. 5938 * @param[in] X Buffer object storing vector \b X. 5939 * @param[in] offx Offset of first element of vector \b X in buffer object. 5940 * @param[in] incx Increment for the elements of \b X. Must not be zero. 5941 * @param[in] Y Buffer object storing vector \b Y. 5942 * @param[in] offy Offset of first element of vector \b Y in buffer object. 5943 * @param[in] incy Increment for the elements of \b Y. Must not be zero. 5944 * @param[out] AP Buffer object storing packed-matrix \b AP. 5945 * @param[in] offa Offset of first element of matrix \b AP in buffer object. 5946 * @param[in] numCommandQueues Number of OpenCL command queues in which the 5947 * task is to be performed. 5948 * @param[in] commandQueues OpenCL command queues. 5949 * @param[in] numEventsInWaitList Number of events in the event wait list. 5950 * @param[in] eventWaitList Event wait list. 5951 * @param[in] events Event objects per each command queue that identify 5952 * a particular kernel execution instance. 5953 * 5954 * @return 5955 * - \b clblasSuccess on success; 5956 * - \b clblasNotInitialized if clblasSetup() was not called; 5957 * - \b clblasInvalidValue if invalid parameters are passed: 5958 * - either \b N is zero, or 5959 * - either \b incx or \b incy is zero 5960 * - \b clblasInvalidMemObject if either \b AP, \b X, or \b Y object is 5961 * Invalid, or an image object rather than the buffer one; 5962 * - \b clblasOutOfHostMemory if the library can't allocate memory for 5963 * internal structures; 5964 * - \b clblasInvalidCommandQueue if the passed command queue is invalid; 5965 * - \b clblasInvalidContext if a context a passed command queue belongs 5966 * to was released; 5967 * - \b clblasInvalidOperation if kernel compilation relating to a previous 5968 * call has not completed for any of the target devices; 5969 * - \b clblasCompilerNotAvailable if a compiler is not available; 5970 * - \b clblasBuildProgramFailure if there is a failure to build a program 5971 * executable. 5972 * 5973 * @ingroup SPR2 5974 */ 5975 5976 clblasStatus 5977 clblasSspr2( 5978 clblasOrder order, 5979 clblasUplo uplo, 5980 size_t N, 5981 cl_float alpha, 5982 const cl_mem X, 5983 size_t offx, 5984 int incx, 5985 const cl_mem Y, 5986 size_t offy, 5987 int incy, 5988 cl_mem AP, 5989 size_t offa, 5990 cl_uint numCommandQueues, 5991 cl_command_queue* commandQueues, 5992 cl_uint numEventsInWaitList, 5993 const cl_event* eventWaitList, 5994 cl_event* events); 5995 /** 5996 * @example example_sspr2.c 5997 * Example of how to use the @ref clblasSspr2 function. 5998 */ 5999 6000 /** 6001 * @brief Symmetric rank 2 operation with a general triangular packed-matrix and 6002 * double elements. 6003 * 6004 * Symmetric rank 2 operation: 6005 * - \f$ A \leftarrow \alpha X Y^T + \alpha Y X^T + A \f$ 6006 * 6007 * @param[in] order Row/column order. 6008 * @param[in] uplo The triangle in matrix being referenced. 6009 * @param[in] N Number of columns in matrix \b A. 6010 * @param[in] alpha The factor of matrix \b A. 6011 * @param[in] X Buffer object storing vector \b X. 6012 * @param[in] offx Offset of first element of vector \b X in buffer object. 6013 * @param[in] incx Increment for the elements of \b X. Must not be zero. 6014 * @param[in] Y Buffer object storing vector \b Y. 6015 * @param[in] offy Offset of first element of vector \b Y in buffer object. 6016 * @param[in] incy Increment for the elements of \b Y. Must not be zero. 6017 * @param[out] AP Buffer object storing packed-matrix \b AP. 6018 * @param[in] offa Offset of first element of matrix \b AP in buffer object. 6019 * @param[in] numCommandQueues Number of OpenCL command queues in which the 6020 * task is to be performed. 6021 * @param[in] commandQueues OpenCL command queues. 6022 * @param[in] numEventsInWaitList Number of events in the event wait list. 6023 * @param[in] eventWaitList Event wait list. 6024 * @param[in] events Event objects per each command queue that identify 6025 * a particular kernel execution instance. 6026 * 6027 * @return 6028 * - \b clblasSuccess on success; 6029 * - \b clblasInvalidDevice if a target device does not support floating 6030 * point arithmetic with double precision; 6031 * - the same error codes as the clblasSspr2() function otherwise. 6032 * 6033 * @ingroup SPR2 6034 */ 6035 6036 clblasStatus 6037 clblasDspr2( 6038 clblasOrder order, 6039 clblasUplo uplo, 6040 size_t N, 6041 cl_double alpha, 6042 const cl_mem X, 6043 size_t offx, 6044 int incx, 6045 const cl_mem Y, 6046 size_t offy, 6047 int incy, 6048 cl_mem AP, 6049 size_t offa, 6050 cl_uint numCommandQueues, 6051 cl_command_queue* commandQueues, 6052 cl_uint numEventsInWaitList, 6053 const cl_event* eventWaitList, 6054 cl_event* events); 6055 /*@}*/ 6056 6057 /** 6058 * @defgroup HPR2 HPR2 - Hermitian packed matrix rank 2 update 6059 * 6060 * The Level 2 Basic Linear Algebra Subprograms are functions that perform 6061 * hermitian rank 2 update operations on packed matrices 6062 * @ingroup BLAS2 6063 */ 6064 6065 /*@{*/ 6066 /** 6067 * @brief Hermitian rank 2 operation with a general triangular packed-matrix and 6068 * float-compelx elements. 6069 * 6070 * Hermitian rank 2 operation: 6071 * - \f$ A \leftarrow \alpha X Y^H + \conjg( alpha ) Y X^H + A \f$ 6072 * 6073 * @param[in] order Row/column order. 6074 * @param[in] uplo The triangle in matrix being referenced. 6075 * @param[in] N Number of columns in matrix \b A. 6076 * @param[in] alpha The factor of matrix \b A. 6077 * @param[in] X Buffer object storing vector \b X. 6078 * @param[in] offx Offset in number of elements for the first element in vector \b X. 6079 * @param[in] incx Increment for the elements of \b X. Must not be zero. 6080 * @param[in] Y Buffer object storing vector \b Y. 6081 * @param[in] offy Offset in number of elements for the first element in vector \b Y. 6082 * @param[in] incy Increment for the elements of \b Y. Must not be zero. 6083 * @param[out] AP Buffer object storing packed-matrix \b AP. 6084 * @param[in] offa Offset in number of elements for the first element in matrix \b AP. 6085 * @param[in] numCommandQueues Number of OpenCL command queues in which the 6086 * task is to be performed. 6087 * @param[in] commandQueues OpenCL command queues. 6088 * @param[in] numEventsInWaitList Number of events in the event wait list. 6089 * @param[in] eventWaitList Event wait list. 6090 * @param[in] events Event objects per each command queue that identify 6091 * a particular kernel execution instance. 6092 * 6093 * @return 6094 * - \b clblasSuccess on success; 6095 * - \b clblasNotInitialized if clblasSetup() was not called; 6096 * - \b clblasInvalidValue if invalid parameters are passed: 6097 * - either \b N is zero, or 6098 * - either \b incx or \b incy is zero 6099 * - \b clblasInvalidMemObject if either \b AP, \b X, or \b Y object is 6100 * Invalid, or an image object rather than the buffer one; 6101 * - \b clblasOutOfHostMemory if the library can't allocate memory for 6102 * internal structures; 6103 * - \b clblasInvalidCommandQueue if the passed command queue is invalid; 6104 * - \b clblasInvalidContext if a context a passed command queue belongs 6105 * to was released; 6106 * - \b clblasInvalidOperation if kernel compilation relating to a previous 6107 * call has not completed for any of the target devices; 6108 * - \b clblasCompilerNotAvailable if a compiler is not available; 6109 * - \b clblasBuildProgramFailure if there is a failure to build a program 6110 * executable. 6111 * 6112 * @ingroup HPR2 6113 */ 6114 clblasStatus 6115 clblasChpr2( 6116 clblasOrder order, 6117 clblasUplo uplo, 6118 size_t N, 6119 cl_float2 alpha, 6120 const cl_mem X, 6121 size_t offx, 6122 int incx, 6123 const cl_mem Y, 6124 size_t offy, 6125 int incy, 6126 cl_mem AP, 6127 size_t offa, 6128 cl_uint numCommandQueues, 6129 cl_command_queue* commandQueues, 6130 cl_uint numEventsInWaitList, 6131 const cl_event* eventWaitList, 6132 cl_event* events); 6133 6134 6135 /** 6136 * @brief Hermitian rank 2 operation with a general triangular packed-matrix and 6137 * double-compelx elements. 6138 * 6139 * Hermitian rank 2 operation: 6140 * - \f$ A \leftarrow \alpha X Y^H + \conjg( alpha ) Y X^H + A \f$ 6141 * 6142 * @param[in] order Row/column order. 6143 * @param[in] uplo The triangle in matrix being referenced. 6144 * @param[in] N Number of columns in matrix \b A. 6145 * @param[in] alpha The factor of matrix \b A. 6146 * @param[in] X Buffer object storing vector \b X. 6147 * @param[in] offx Offset in number of elements for the first element in vector \b X. 6148 * @param[in] incx Increment for the elements of \b X. Must not be zero. 6149 * @param[in] Y Buffer object storing vector \b Y. 6150 * @param[in] offy Offset in number of elements for the first element in vector \b Y. 6151 * @param[in] incy Increment for the elements of \b Y. Must not be zero. 6152 * @param[out] AP Buffer object storing packed-matrix \b AP. 6153 * @param[in] offa Offset in number of elements for the first element in matrix \b AP. 6154 * @param[in] numCommandQueues Number of OpenCL command queues in which the 6155 * task is to be performed. 6156 * @param[in] commandQueues OpenCL command queues. 6157 * @param[in] numEventsInWaitList Number of events in the event wait list. 6158 * @param[in] eventWaitList Event wait list. 6159 * @param[in] events Event objects per each command queue that identify 6160 * a particular kernel execution instance. 6161 * 6162 * @return 6163 * - \b clblasSuccess on success; 6164 * - \b clblasInvalidDevice if a target device does not support floating 6165 * point arithmetic with double precision; 6166 * - the same error codes as the clblasChpr2() function otherwise. 6167 * 6168 * @ingroup HPR2 6169 */ 6170 clblasStatus 6171 clblasZhpr2( 6172 clblasOrder order, 6173 clblasUplo uplo, 6174 size_t N, 6175 cl_double2 alpha, 6176 const cl_mem X, 6177 size_t offx, 6178 int incx, 6179 const cl_mem Y, 6180 size_t offy, 6181 int incy, 6182 cl_mem AP, 6183 size_t offa, 6184 cl_uint numCommandQueues, 6185 cl_command_queue* commandQueues, 6186 cl_uint numEventsInWaitList, 6187 const cl_event* eventWaitList, 6188 cl_event* events); 6189 6190 /** 6191 * @example example_zhpr2.c 6192 * Example of how to use the @ref clblasZhpr2 function. 6193 */ 6194 /*@}*/ 6195 6196 6197 6198 /** 6199 * @defgroup GBMV GBMV - General banded matrix-vector multiplication 6200 * @ingroup BLAS2 6201 */ 6202 /*@{*/ 6203 6204 /** 6205 * @brief Matrix-vector product with a general rectangular banded matrix and 6206 * float elements. 6207 * 6208 * Matrix-vector products: 6209 * - \f$ Y \leftarrow \alpha A X + \beta Y \f$ 6210 * - \f$ Y \leftarrow \alpha A^T X + \beta Y \f$ 6211 * 6212 * @param[in] order Row/column order. 6213 * @param[in] trans How matrix \b A is to be transposed. 6214 * @param[in] M Number of rows in banded matrix \b A. 6215 * @param[in] N Number of columns in banded matrix \b A. 6216 * @param[in] KL Number of sub-diagonals in banded matrix \b A. 6217 * @param[in] KU Number of super-diagonals in banded matrix \b A. 6218 * @param[in] alpha The factor of banded matrix \b A. 6219 * @param[in] A Buffer object storing banded matrix \b A. 6220 * @param[in] offa Offset in number of elements for the first element in banded matrix \b A. 6221 * @param[in] lda Leading dimension of banded matrix \b A. It cannot be less 6222 * than ( \b KL + \b KU + 1 ) 6223 * @param[in] X Buffer object storing vector \b X. 6224 * @param[in] offx Offset of first element of vector \b X in buffer object. 6225 * Counted in elements. 6226 * @param[in] incx Increment for the elements of \b X. Must not be zero. 6227 * @param[in] beta The factor of the vector \b Y. 6228 * @param[out] Y Buffer object storing the vector \b y. 6229 * @param[in] offy Offset of first element of vector \b Y in buffer object. 6230 * Counted in elements. 6231 * @param[in] incy Increment for the elements of \b Y. Must not be zero. 6232 * @param[in] numCommandQueues Number of OpenCL command queues in which the 6233 * task is to be performed. 6234 * @param[in] commandQueues OpenCL command queues. 6235 * @param[in] numEventsInWaitList Number of events in the event wait list. 6236 * @param[in] eventWaitList Event wait list. 6237 * @param[in] events Event objects per each command queue that identify 6238 * a particular kernel execution instance. 6239 * 6240 * @return 6241 * - \b clblasSuccess on success; 6242 * - \b clblasNotInitialized if clblasSetup() was not called; 6243 * - \b clblasInvalidValue if invalid parameters are passed: 6244 * - either \b M or \b N is zero, or 6245 * - KL is greater than \b M - 1, or 6246 * - KU is greater than \b N - 1, or 6247 * - either \b incx or \b incy is zero, or 6248 * - any of the leading dimensions is invalid; 6249 * - the matrix size or the vector sizes along with the increments lead to 6250 * accessing outside of any of the buffers; 6251 * - \b clblasInvalidMemObject if either \b A, \b X, or \b Y object is 6252 * Invalid, or an image object rather than the buffer one; 6253 * - \b clblasOutOfHostMemory if the library can't allocate memory for 6254 * internal structures; 6255 * - \b clblasInvalidCommandQueue if the passed command queue is invalid; 6256 * - \b clblasInvalidContext if a context a passed command queue belongs 6257 * to was released; 6258 * - \b clblasInvalidOperation if kernel compilation relating to a previous 6259 * call has not completed for any of the target devices; 6260 * - \b clblasCompilerNotAvailable if a compiler is not available; 6261 * - \b clblasBuildProgramFailure if there is a failure to build a program 6262 * executable. 6263 * 6264 * @ingroup GBMV 6265 */ 6266 clblasStatus 6267 clblasSgbmv( 6268 clblasOrder order, 6269 clblasTranspose trans, 6270 size_t M, 6271 size_t N, 6272 size_t KL, 6273 size_t KU, 6274 cl_float alpha, 6275 const cl_mem A, 6276 size_t offa, 6277 size_t lda, 6278 const cl_mem X, 6279 size_t offx, 6280 int incx, 6281 cl_float beta, 6282 cl_mem Y, 6283 size_t offy, 6284 int incy, 6285 cl_uint numCommandQueues, 6286 cl_command_queue *commandQueues, 6287 cl_uint numEventsInWaitList, 6288 const cl_event *eventWaitList, 6289 cl_event *events); 6290 /** 6291 * @example example_sgbmv.c 6292 * Example of how to use the @ref clblasSgbmv function. 6293 */ 6294 6295 6296 /** 6297 * @brief Matrix-vector product with a general rectangular banded matrix and 6298 * double elements. 6299 * 6300 * Matrix-vector products: 6301 * - \f$ Y \leftarrow \alpha A X + \beta Y \f$ 6302 * - \f$ Y \leftarrow \alpha A^T X + \beta Y \f$ 6303 * 6304 * @param[in] order Row/column order. 6305 * @param[in] trans How matrix \b A is to be transposed. 6306 * @param[in] M Number of rows in banded matrix \b A. 6307 * @param[in] N Number of columns in banded matrix \b A. 6308 * @param[in] KL Number of sub-diagonals in banded matrix \b A. 6309 * @param[in] KU Number of super-diagonals in banded matrix \b A. 6310 * @param[in] alpha The factor of banded matrix \b A. 6311 * @param[in] A Buffer object storing banded matrix \b A. 6312 * @param[in] offa Offset in number of elements for the first element in banded matrix \b A. 6313 * @param[in] lda Leading dimension of banded matrix \b A. It cannot be less 6314 * than ( \b KL + \b KU + 1 ) 6315 * @param[in] X Buffer object storing vector \b X. 6316 * @param[in] offx Offset of first element of vector \b X in buffer object. 6317 * Counted in elements. 6318 * @param[in] incx Increment for the elements of \b X. Must not be zero. 6319 * @param[in] beta The factor of the vector \b Y. 6320 * @param[out] Y Buffer object storing the vector \b y. 6321 * @param[in] offy Offset of first element of vector \b Y in buffer object. 6322 * Counted in elements. 6323 * @param[in] incy Increment for the elements of \b Y. Must not be zero. 6324 * @param[in] numCommandQueues Number of OpenCL command queues in which the 6325 * task is to be performed. 6326 * @param[in] commandQueues OpenCL command queues. 6327 * @param[in] numEventsInWaitList Number of events in the event wait list. 6328 * @param[in] eventWaitList Event wait list. 6329 * @param[in] events Event objects per each command queue that identify 6330 * a particular kernel execution instance. 6331 * 6332 * @return 6333 * - \b clblasSuccess on success; 6334 * - \b clblasInvalidDevice if a target device does not support floating 6335 * point arithmetic with double precision; 6336 * - the same error codes as the clblasSgbmv() function otherwise. 6337 * 6338 * @ingroup GBMV 6339 */ 6340 clblasStatus 6341 clblasDgbmv( 6342 clblasOrder order, 6343 clblasTranspose trans, 6344 size_t M, 6345 size_t N, 6346 size_t KL, 6347 size_t KU, 6348 cl_double alpha, 6349 const cl_mem A, 6350 size_t offa, 6351 size_t lda, 6352 const cl_mem X, 6353 size_t offx, 6354 int incx, 6355 cl_double beta, 6356 cl_mem Y, 6357 size_t offy, 6358 int incy, 6359 cl_uint numCommandQueues, 6360 cl_command_queue *commandQueues, 6361 cl_uint numEventsInWaitList, 6362 const cl_event *eventWaitList, 6363 cl_event *events); 6364 6365 6366 /** 6367 * @brief Matrix-vector product with a general rectangular banded matrix and 6368 * float-complex elements. 6369 * 6370 * Matrix-vector products: 6371 * - \f$ Y \leftarrow \alpha A X + \beta Y \f$ 6372 * - \f$ Y \leftarrow \alpha A^T X + \beta Y \f$ 6373 * 6374 * @param[in] order Row/column order. 6375 * @param[in] trans How matrix \b A is to be transposed. 6376 * @param[in] M Number of rows in banded matrix \b A. 6377 * @param[in] N Number of columns in banded matrix \b A. 6378 * @param[in] KL Number of sub-diagonals in banded matrix \b A. 6379 * @param[in] KU Number of super-diagonals in banded matrix \b A. 6380 * @param[in] alpha The factor of banded matrix \b A. 6381 * @param[in] A Buffer object storing banded matrix \b A. 6382 * @param[in] offa Offset in number of elements for the first element in banded matrix \b A. 6383 * @param[in] lda Leading dimension of banded matrix \b A. It cannot be less 6384 * than ( \b KL + \b KU + 1 ) 6385 * @param[in] X Buffer object storing vector \b X. 6386 * @param[in] offx Offset of first element of vector \b X in buffer object. 6387 * Counted in elements. 6388 * @param[in] incx Increment for the elements of \b X. Must not be zero. 6389 * @param[in] beta The factor of the vector \b Y. 6390 * @param[out] Y Buffer object storing the vector \b y. 6391 * @param[in] offy Offset of first element of vector \b Y in buffer object. 6392 * Counted in elements. 6393 * @param[in] incy Increment for the elements of \b Y. Must not be zero. 6394 * @param[in] numCommandQueues Number of OpenCL command queues in which the 6395 * task is to be performed. 6396 * @param[in] commandQueues OpenCL command queues. 6397 * @param[in] numEventsInWaitList Number of events in the event wait list. 6398 * @param[in] eventWaitList Event wait list. 6399 * @param[in] events Event objects per each command queue that identify 6400 * a particular kernel execution instance. 6401 * 6402 * @return The same result as the clblasSgbmv() function. 6403 * 6404 * @ingroup GBMV 6405 */ 6406 clblasStatus 6407 clblasCgbmv( 6408 clblasOrder order, 6409 clblasTranspose trans, 6410 size_t M, 6411 size_t N, 6412 size_t KL, 6413 size_t KU, 6414 cl_float2 alpha, 6415 const cl_mem A, 6416 size_t offa, 6417 size_t lda, 6418 const cl_mem X, 6419 size_t offx, 6420 int incx, 6421 cl_float2 beta, 6422 cl_mem Y, 6423 size_t offy, 6424 int incy, 6425 cl_uint numCommandQueues, 6426 cl_command_queue *commandQueues, 6427 cl_uint numEventsInWaitList, 6428 const cl_event *eventWaitList, 6429 cl_event *events); 6430 6431 6432 /** 6433 * @brief Matrix-vector product with a general rectangular banded matrix and 6434 * double-complex elements. 6435 * 6436 * Matrix-vector products: 6437 * - \f$ Y \leftarrow \alpha A X + \beta Y \f$ 6438 * - \f$ Y \leftarrow \alpha A^T X + \beta Y \f$ 6439 * 6440 * @param[in] order Row/column order. 6441 * @param[in] trans How matrix \b A is to be transposed. 6442 * @param[in] M Number of rows in banded matrix \b A. 6443 * @param[in] N Number of columns in banded matrix \b A. 6444 * @param[in] KL Number of sub-diagonals in banded matrix \b A. 6445 * @param[in] KU Number of super-diagonals in banded matrix \b A. 6446 * @param[in] alpha The factor of banded matrix \b A. 6447 * @param[in] A Buffer object storing banded matrix \b A. 6448 * @param[in] offa Offset in number of elements for the first element in banded matrix \b A. 6449 * @param[in] lda Leading dimension of banded matrix \b A. It cannot be less 6450 * than ( \b KL + \b KU + 1 ) 6451 * @param[in] X Buffer object storing vector \b X. 6452 * @param[in] offx Offset of first element of vector \b X in buffer object. 6453 * Counted in elements. 6454 * @param[in] incx Increment for the elements of \b X. Must not be zero. 6455 * @param[in] beta The factor of the vector \b Y. 6456 * @param[out] Y Buffer object storing the vector \b y. 6457 * @param[in] offy Offset of first element of vector \b Y in buffer object. 6458 * Counted in elements. 6459 * @param[in] incy Increment for the elements of \b Y. Must not be zero. 6460 * @param[in] numCommandQueues Number of OpenCL command queues in which the 6461 * task is to be performed. 6462 * @param[in] commandQueues OpenCL command queues. 6463 * @param[in] numEventsInWaitList Number of events in the event wait list. 6464 * @param[in] eventWaitList Event wait list. 6465 * @param[in] events Event objects per each command queue that identify 6466 * a particular kernel execution instance. 6467 * 6468 * @return The same result as the clblasDgbmv() function. 6469 * 6470 * @ingroup GBMV 6471 */ 6472 clblasStatus 6473 clblasZgbmv( 6474 clblasOrder order, 6475 clblasTranspose trans, 6476 size_t M, 6477 size_t N, 6478 size_t KL, 6479 size_t KU, 6480 cl_double2 alpha, 6481 const cl_mem A, 6482 size_t offa, 6483 size_t lda, 6484 const cl_mem X, 6485 size_t offx, 6486 int incx, 6487 cl_double2 beta, 6488 cl_mem Y, 6489 size_t offy, 6490 int incy, 6491 cl_uint numCommandQueues, 6492 cl_command_queue *commandQueues, 6493 cl_uint numEventsInWaitList, 6494 const cl_event *eventWaitList, 6495 cl_event *events); 6496 /*@}*/ 6497 6498 6499 /** 6500 * @defgroup TBMV TBMV - Triangular banded matrix vector multiply 6501 * @ingroup BLAS2 6502 */ 6503 /*@{*/ 6504 6505 /** 6506 * @brief Matrix-vector product with a triangular banded matrix and 6507 * float elements. 6508 * 6509 * Matrix-vector products: 6510 * - \f$ X \leftarrow A X \f$ 6511 * - \f$ X \leftarrow A^T X \f$ 6512 * 6513 * @param[in] order Row/column order. 6514 * @param[in] uplo The triangle in matrix being referenced. 6515 * @param[in] trans How matrix \b A is to be transposed. 6516 * @param[in] diag Specify whether matrix \b A is unit triangular. 6517 * @param[in] N Number of rows/columns in banded matrix \b A. 6518 * @param[in] K Number of sub-diagonals/super-diagonals in triangular banded matrix \b A. 6519 * @param[in] A Buffer object storing matrix \b A. 6520 * @param[in] offa Offset in number of elements for first element in matrix \b A. 6521 * @param[in] lda Leading dimension of matrix \b A. It cannot be less 6522 * than ( \b K + 1 ) 6523 * @param[out] X Buffer object storing vector \b X. 6524 * @param[in] offx Offset in number of elements for first element in vector \b X. 6525 * @param[in] incx Increment for the elements of \b X. Must not be zero. 6526 * @param[in] scratchBuff Temporary cl_mem scratch buffer object which can hold a 6527 * minimum of (1 + (N-1)*abs(incx)) elements 6528 * @param[in] numCommandQueues Number of OpenCL command queues in which the 6529 * task is to be performed. 6530 * @param[in] commandQueues OpenCL command queues. 6531 * @param[in] numEventsInWaitList Number of events in the event wait list. 6532 * @param[in] eventWaitList Event wait list. 6533 * @param[in] events Event objects per each command queue that identify 6534 * a particular kernel execution instance. 6535 * 6536 * @return 6537 * - \b clblasSuccess on success; 6538 * - \b clblasNotInitialized if clblasSetup() was not called; 6539 * - \b clblasInvalidValue if invalid parameters are passed: 6540 * - either \b N or \b incx is zero, or 6541 * - K is greater than \b N - 1 6542 * - the leading dimension is invalid; 6543 * - \b clblasInvalidMemObject if either \b A or \b X object is 6544 * Invalid, or an image object rather than the buffer one; 6545 * - \b clblasOutOfHostMemory if the library can't allocate memory for 6546 * internal structures; 6547 * - \b clblasInvalidCommandQueue if the passed command queue is invalid; 6548 * - \b clblasInvalidContext if a context a passed command queue belongs 6549 * to was released; 6550 * - \b clblasInvalidOperation if kernel compilation relating to a previous 6551 * call has not completed for any of the target devices; 6552 * - \b clblasCompilerNotAvailable if a compiler is not available; 6553 * - \b clblasBuildProgramFailure if there is a failure to build a program 6554 * executable. 6555 * 6556 * @ingroup TBMV 6557 */ 6558 clblasStatus 6559 clblasStbmv( 6560 clblasOrder order, 6561 clblasUplo uplo, 6562 clblasTranspose trans, 6563 clblasDiag diag, 6564 size_t N, 6565 size_t K, 6566 const cl_mem A, 6567 size_t offa, 6568 size_t lda, 6569 cl_mem X, 6570 size_t offx, 6571 int incx, 6572 cl_mem scratchBuff, 6573 cl_uint numCommandQueues, 6574 cl_command_queue *commandQueues, 6575 cl_uint numEventsInWaitList, 6576 const cl_event *eventWaitList, 6577 cl_event *events); 6578 /** 6579 * @example example_stbmv.c 6580 * Example of how to use the @ref clblasStbmv function. 6581 */ 6582 6583 6584 /** 6585 * @brief Matrix-vector product with a triangular banded matrix and 6586 * double elements. 6587 * 6588 * Matrix-vector products: 6589 * - \f$ X \leftarrow A X \f$ 6590 * - \f$ X \leftarrow A^T X \f$ 6591 * 6592 * @param[in] order Row/column order. 6593 * @param[in] uplo The triangle in matrix being referenced. 6594 * @param[in] trans How matrix \b A is to be transposed. 6595 * @param[in] diag Specify whether matrix \b A is unit triangular. 6596 * @param[in] N Number of rows/columns in banded matrix \b A. 6597 * @param[in] K Number of sub-diagonals/super-diagonals in triangular banded matrix \b A. 6598 * @param[in] A Buffer object storing matrix \b A. 6599 * @param[in] offa Offset in number of elements for first element in matrix \b A. 6600 * @param[in] lda Leading dimension of matrix \b A. It cannot be less 6601 * than ( \b K + 1 ) 6602 * @param[out] X Buffer object storing vector \b X. 6603 * @param[in] offx Offset in number of elements for first element in vector \b X. 6604 * @param[in] incx Increment for the elements of \b X. Must not be zero. 6605 * @param[in] scratchBuff Temporary cl_mem scratch buffer object which can hold a 6606 * minimum of (1 + (N-1)*abs(incx)) elements 6607 * @param[in] numCommandQueues Number of OpenCL command queues in which the 6608 * task is to be performed. 6609 * @param[in] commandQueues OpenCL command queues. 6610 * @param[in] numEventsInWaitList Number of events in the event wait list. 6611 * @param[in] eventWaitList Event wait list. 6612 * @param[in] events Event objects per each command queue that identify 6613 * a particular kernel execution instance. 6614 * 6615 * @return 6616 * - \b clblasSuccess on success; 6617 * - \b clblasInvalidDevice if a target device does not support floating 6618 * point arithmetic with double precision; 6619 * - the same error codes as the clblasStbmv() function otherwise. 6620 * 6621 * @ingroup TBMV 6622 */ 6623 clblasStatus 6624 clblasDtbmv( 6625 clblasOrder order, 6626 clblasUplo uplo, 6627 clblasTranspose trans, 6628 clblasDiag diag, 6629 size_t N, 6630 size_t K, 6631 const cl_mem A, 6632 size_t offa, 6633 size_t lda, 6634 cl_mem X, 6635 size_t offx, 6636 int incx, 6637 cl_mem scratchBuff, 6638 cl_uint numCommandQueues, 6639 cl_command_queue *commandQueues, 6640 cl_uint numEventsInWaitList, 6641 const cl_event *eventWaitList, 6642 cl_event *events); 6643 6644 6645 /** 6646 * @brief Matrix-vector product with a triangular banded matrix and 6647 * float-complex elements. 6648 * 6649 * Matrix-vector products: 6650 * - \f$ X \leftarrow A X \f$ 6651 * - \f$ X \leftarrow A^T X \f$ 6652 * 6653 * @param[in] order Row/column order. 6654 * @param[in] uplo The triangle in matrix being referenced. 6655 * @param[in] trans How matrix \b A is to be transposed. 6656 * @param[in] diag Specify whether matrix \b A is unit triangular. 6657 * @param[in] N Number of rows/columns in banded matrix \b A. 6658 * @param[in] K Number of sub-diagonals/super-diagonals in triangular banded matrix \b A. 6659 * @param[in] A Buffer object storing matrix \b A. 6660 * @param[in] offa Offset in number of elements for first element in matrix \b A. 6661 * @param[in] lda Leading dimension of matrix \b A. It cannot be less 6662 * than ( \b K + 1 ) 6663 * @param[out] X Buffer object storing vector \b X. 6664 * @param[in] offx Offset in number of elements for first element in vector \b X. 6665 * @param[in] incx Increment for the elements of \b X. Must not be zero. 6666 * @param[in] scratchBuff Temporary cl_mem scratch buffer object which can hold a 6667 * minimum of (1 + (N-1)*abs(incx)) elements 6668 * @param[in] numCommandQueues Number of OpenCL command queues in which the 6669 * task is to be performed. 6670 * @param[in] commandQueues OpenCL command queues. 6671 * @param[in] numEventsInWaitList Number of events in the event wait list. 6672 * @param[in] eventWaitList Event wait list. 6673 * @param[in] events Event objects per each command queue that identify 6674 * a particular kernel execution instance. 6675 * 6676 * @return The same result as the clblasStbmv() function. 6677 * 6678 * @ingroup TBMV 6679 */ 6680 clblasStatus 6681 clblasCtbmv( 6682 clblasOrder order, 6683 clblasUplo uplo, 6684 clblasTranspose trans, 6685 clblasDiag diag, 6686 size_t N, 6687 size_t K, 6688 const cl_mem A, 6689 size_t offa, 6690 size_t lda, 6691 cl_mem X, 6692 size_t offx, 6693 int incx, 6694 cl_mem scratchBuff, 6695 cl_uint numCommandQueues, 6696 cl_command_queue *commandQueues, 6697 cl_uint numEventsInWaitList, 6698 const cl_event *eventWaitList, 6699 cl_event *events); 6700 6701 6702 /** 6703 * @brief Matrix-vector product with a triangular banded matrix and 6704 * double-complex elements. 6705 * 6706 * Matrix-vector products: 6707 * - \f$ X \leftarrow A X \f$ 6708 * - \f$ X \leftarrow A^T X \f$ 6709 * 6710 * @param[in] order Row/column order. 6711 * @param[in] uplo The triangle in matrix being referenced. 6712 * @param[in] trans How matrix \b A is to be transposed. 6713 * @param[in] diag Specify whether matrix \b A is unit triangular. 6714 * @param[in] N Number of rows/columns in banded matrix \b A. 6715 * @param[in] K Number of sub-diagonals/super-diagonals in triangular banded matrix \b A. 6716 * @param[in] A Buffer object storing matrix \b A. 6717 * @param[in] offa Offset in number of elements for first element in matrix \b A. 6718 * @param[in] lda Leading dimension of matrix \b A. It cannot be less 6719 * than ( \b K + 1 ) 6720 * @param[out] X Buffer object storing vector \b X. 6721 * @param[in] offx Offset in number of elements for first element in vector \b X. 6722 * @param[in] incx Increment for the elements of \b X. Must not be zero. 6723 * @param[in] scratchBuff Temporary cl_mem scratch buffer object which can hold a 6724 * minimum of (1 + (N-1)*abs(incx)) elements 6725 * @param[in] numCommandQueues Number of OpenCL command queues in which the 6726 * task is to be performed. 6727 * @param[in] commandQueues OpenCL command queues. 6728 * @param[in] numEventsInWaitList Number of events in the event wait list. 6729 * @param[in] eventWaitList Event wait list. 6730 * @param[in] events Event objects per each command queue that identify 6731 * a particular kernel execution instance. 6732 * 6733 * @return The same result as the clblasDtbmv() function. 6734 * 6735 * @ingroup TBMV 6736 */ 6737 clblasStatus 6738 clblasZtbmv( 6739 clblasOrder order, 6740 clblasUplo uplo, 6741 clblasTranspose trans, 6742 clblasDiag diag, 6743 size_t N, 6744 size_t K, 6745 const cl_mem A, 6746 size_t offa, 6747 size_t lda, 6748 cl_mem X, 6749 size_t offx, 6750 int incx, 6751 cl_mem scratchBuff, 6752 cl_uint numCommandQueues, 6753 cl_command_queue *commandQueues, 6754 cl_uint numEventsInWaitList, 6755 const cl_event *eventWaitList, 6756 cl_event *events); 6757 /*@}*/ 6758 6759 6760 /** 6761 * @defgroup SBMV SBMV - Symmetric banded matrix-vector multiplication 6762 * @ingroup BLAS2 6763 */ 6764 /*@{*/ 6765 6766 /** 6767 * @brief Matrix-vector product with a symmetric banded matrix and float elements. 6768 * 6769 * Matrix-vector products: 6770 * - \f$ Y \leftarrow \alpha A X + \beta Y \f$ 6771 * 6772 * @param[in] order Row/columns order. 6773 * @param[in] uplo The triangle in matrix being referenced. 6774 * @param[in] N Number of rows and columns in banded matrix \b A. 6775 * @param[in] K Number of sub-diagonals/super-diagonals in banded matrix \b A. 6776 * @param[in] alpha The factor of matrix \b A. 6777 * @param[in] A Buffer object storing matrix \b A. 6778 * @param[in] offa Offset in number of elements for first element in matrix \b A. 6779 * @param[in] lda Leading dimension of matrix \b A. It cannot be less 6780 * than ( \b K + 1 ) 6781 * @param[in] X Buffer object storing vector \b X. 6782 * @param[in] offx Offset of first element of vector \b X in buffer object. 6783 * Counted in elements. 6784 * @param[in] incx Increment for the elements of vector \b X. It cannot be zero. 6785 * @param[in] beta The factor of vector \b Y. 6786 * @param[out] Y Buffer object storing vector \b Y. 6787 * @param[in] offy Offset of first element of vector \b Y in buffer object. 6788 * Counted in elements. 6789 * @param[in] incy Increment for the elements of vector \b Y. It cannot be zero. 6790 * @param[in] numCommandQueues Number of OpenCL command queues in which the 6791 * task is to be performed. 6792 * @param[in] commandQueues OpenCL command queues. 6793 * @param[in] numEventsInWaitList Number of events in the event wait list. 6794 * @param[in] eventWaitList Event wait list. 6795 * @param[in] events Event objects per each command queue that identify 6796 * a particular kernel execution instance. 6797 * 6798 * @return 6799 * - \b clblasSuccess on success; 6800 * - \b clblasNotInitialized if clblasSetup() was not called; 6801 * - \b clblasInvalidValue if invalid parameters are passed: 6802 * - either \b N or \b incx is zero, or 6803 * - K is greater than \b N - 1 6804 * - the leading dimension is invalid; 6805 * - \b clblasInvalidMemObject if either \b A or \b X object is 6806 * Invalid, or an image object rather than the buffer one; 6807 * - \b clblasOutOfHostMemory if the library can't allocate memory for 6808 * internal structures; 6809 * - \b clblasInvalidCommandQueue if the passed command queue is invalid; 6810 * - \b clblasInvalidContext if a context a passed command queue belongs 6811 * to was released; 6812 * - \b clblasInvalidOperation if kernel compilation relating to a previous 6813 * call has not completed for any of the target devices; 6814 * - \b clblasCompilerNotAvailable if a compiler is not available; 6815 * - \b clblasBuildProgramFailure if there is a failure to build a program 6816 * executable. 6817 * 6818 * @ingroup SBMV 6819 */ 6820 clblasStatus 6821 clblasSsbmv( 6822 clblasOrder order, 6823 clblasUplo uplo, 6824 size_t N, 6825 size_t K, 6826 cl_float alpha, 6827 const cl_mem A, 6828 size_t offa, 6829 size_t lda, 6830 const cl_mem X, 6831 size_t offx, 6832 int incx, 6833 cl_float beta, 6834 cl_mem Y, 6835 size_t offy, 6836 int incy, 6837 cl_uint numCommandQueues, 6838 cl_command_queue *commandQueues, 6839 cl_uint numEventsInWaitList, 6840 const cl_event *eventWaitList, 6841 cl_event *events); 6842 /** 6843 * @example example_ssbmv.c 6844 * This is an example of how to use the @ref clblasSsbmv function. 6845 */ 6846 6847 6848 /** 6849 * @brief Matrix-vector product with a symmetric banded matrix and double elements. 6850 * 6851 * Matrix-vector products: 6852 * - \f$ Y \leftarrow \alpha A X + \beta Y \f$ 6853 * 6854 * @param[in] order Row/columns order. 6855 * @param[in] uplo The triangle in matrix being referenced. 6856 * @param[in] N Number of rows and columns in banded matrix \b A. 6857 * @param[in] K Number of sub-diagonals/super-diagonals in banded matrix \b A. 6858 * @param[in] alpha The factor of matrix \b A. 6859 * @param[in] A Buffer object storing matrix \b A. 6860 * @param[in] offa Offset in number of elements for first element in matrix \b A. 6861 * @param[in] lda Leading dimension of matrix \b A. It cannot be less 6862 * than ( \b K + 1 ) 6863 * @param[in] X Buffer object storing vector \b X. 6864 * @param[in] offx Offset of first element of vector \b X in buffer object. 6865 * Counted in elements. 6866 * @param[in] incx Increment for the elements of vector \b X. It cannot be zero. 6867 * @param[in] beta The factor of vector \b Y. 6868 * @param[out] Y Buffer object storing vector \b Y. 6869 * @param[in] offy Offset of first element of vector \b Y in buffer object. 6870 * Counted in elements. 6871 * @param[in] incy Increment for the elements of vector \b Y. It cannot be zero. 6872 * @param[in] numCommandQueues Number of OpenCL command queues in which the 6873 * task is to be performed. 6874 * @param[in] commandQueues OpenCL command queues. 6875 * @param[in] numEventsInWaitList Number of events in the event wait list. 6876 * @param[in] eventWaitList Event wait list. 6877 * @param[in] events Event objects per each command queue that identify 6878 * a particular kernel execution instance. 6879 * 6880 * @return 6881 * - \b clblasSuccess on success; 6882 * - \b clblasInvalidDevice if a target device does not support floating 6883 * point arithmetic with double precision; 6884 * - the same error codes as the clblasSsbmv() function otherwise. 6885 * 6886 * @ingroup SBMV 6887 */ 6888 clblasStatus 6889 clblasDsbmv( 6890 clblasOrder order, 6891 clblasUplo uplo, 6892 size_t N, 6893 size_t K, 6894 cl_double alpha, 6895 const cl_mem A, 6896 size_t offa, 6897 size_t lda, 6898 const cl_mem X, 6899 size_t offx, 6900 int incx, 6901 cl_double beta, 6902 cl_mem Y, 6903 size_t offy, 6904 int incy, 6905 cl_uint numCommandQueues, 6906 cl_command_queue *commandQueues, 6907 cl_uint numEventsInWaitList, 6908 const cl_event *eventWaitList, 6909 cl_event *events); 6910 6911 /*@}*/ 6912 6913 6914 /** 6915 * @defgroup HBMV HBMV - Hermitian banded matrix-vector multiplication 6916 * @ingroup BLAS2 6917 */ 6918 /*@{*/ 6919 6920 /** 6921 * @brief Matrix-vector product with a hermitian banded matrix and float elements. 6922 * 6923 * Matrix-vector products: 6924 * - \f$ Y \leftarrow \alpha A X + \beta Y \f$ 6925 * 6926 * @param[in] order Row/columns order. 6927 * @param[in] uplo The triangle in matrix being referenced. 6928 * @param[in] N Number of rows and columns in banded matrix \b A. 6929 * @param[in] K Number of sub-diagonals/super-diagonals in banded matrix \b A. 6930 * @param[in] alpha The factor of matrix \b A. 6931 * @param[in] A Buffer object storing matrix \b A. 6932 * @param[in] offa Offset in number of elements for first element in matrix \b A. 6933 * @param[in] lda Leading dimension of matrix \b A. It cannot be less 6934 * than ( \b K + 1 ) 6935 * @param[in] X Buffer object storing vector \b X. 6936 * @param[in] offx Offset of first element of vector \b X in buffer object. 6937 * Counted in elements. 6938 * @param[in] incx Increment for the elements of vector \b X. It cannot be zero. 6939 * @param[in] beta The factor of vector \b Y. 6940 * @param[out] Y Buffer object storing vector \b Y. 6941 * @param[in] offy Offset of first element of vector \b Y in buffer object. 6942 * Counted in elements. 6943 * @param[in] incy Increment for the elements of vector \b Y. It cannot be zero. 6944 * @param[in] numCommandQueues Number of OpenCL command queues in which the 6945 * task is to be performed. 6946 * @param[in] commandQueues OpenCL command queues. 6947 * @param[in] numEventsInWaitList Number of events in the event wait list. 6948 * @param[in] eventWaitList Event wait list. 6949 * @param[in] events Event objects per each command queue that identify 6950 * a particular kernel execution instance. 6951 * 6952 * @return 6953 * - \b clblasSuccess on success; 6954 * - \b clblasNotInitialized if clblasSetup() was not called; 6955 * - \b clblasInvalidValue if invalid parameters are passed: 6956 * - either \b N or \b incx is zero, or 6957 * - K is greater than \b N - 1 6958 * - the leading dimension is invalid; 6959 * - \b clblasInvalidMemObject if either \b A or \b X object is 6960 * Invalid, or an image object rather than the buffer one; 6961 * - \b clblasOutOfHostMemory if the library can't allocate memory for 6962 * internal structures; 6963 * - \b clblasInvalidCommandQueue if the passed command queue is invalid; 6964 * - \b clblasInvalidContext if a context a passed command queue belongs 6965 * to was released; 6966 * - \b clblasInvalidOperation if kernel compilation relating to a previous 6967 * call has not completed for any of the target devices; 6968 * - \b clblasCompilerNotAvailable if a compiler is not available; 6969 * - \b clblasBuildProgramFailure if there is a failure to build a program 6970 * executable. 6971 * 6972 * @ingroup HBMV 6973 */ 6974 clblasStatus 6975 clblasChbmv( 6976 clblasOrder order, 6977 clblasUplo uplo, 6978 size_t N, 6979 size_t K, 6980 cl_float2 alpha, 6981 const cl_mem A, 6982 size_t offa, 6983 size_t lda, 6984 const cl_mem X, 6985 size_t offx, 6986 int incx, 6987 cl_float2 beta, 6988 cl_mem Y, 6989 size_t offy, 6990 int incy, 6991 cl_uint numCommandQueues, 6992 cl_command_queue *commandQueues, 6993 cl_uint numEventsInWaitList, 6994 const cl_event *eventWaitList, 6995 cl_event *events); 6996 /** 6997 * @example example_chbmv.c 6998 * This is an example of how to use the @ref clblasChbmv function. 6999 */ 7000 7001 7002 /** 7003 * @brief Matrix-vector product with a hermitian banded matrix and double elements. 7004 * 7005 * Matrix-vector products: 7006 * - \f$ Y \leftarrow \alpha A X + \beta Y \f$ 7007 * 7008 * @param[in] order Row/columns order. 7009 * @param[in] uplo The triangle in matrix being referenced. 7010 * @param[in] N Number of rows and columns in banded matrix \b A. 7011 * @param[in] K Number of sub-diagonals/super-diagonals in banded matrix \b A. 7012 * @param[in] alpha The factor of matrix \b A. 7013 * @param[in] A Buffer object storing matrix \b A. 7014 * @param[in] offa Offset in number of elements for first element in matrix \b A. 7015 * @param[in] lda Leading dimension of matrix \b A. It cannot be less 7016 * than ( \b K + 1 ) 7017 * @param[in] X Buffer object storing vector \b X. 7018 * @param[in] offx Offset of first element of vector \b X in buffer object. 7019 * Counted in elements. 7020 * @param[in] incx Increment for the elements of vector \b X. It cannot be zero. 7021 * @param[in] beta The factor of vector \b Y. 7022 * @param[out] Y Buffer object storing vector \b Y. 7023 * @param[in] offy Offset of first element of vector \b Y in buffer object. 7024 * Counted in elements. 7025 * @param[in] incy Increment for the elements of vector \b Y. It cannot be zero. 7026 * @param[in] numCommandQueues Number of OpenCL command queues in which the 7027 * task is to be performed. 7028 * @param[in] commandQueues OpenCL command queues. 7029 * @param[in] numEventsInWaitList Number of events in the event wait list. 7030 * @param[in] eventWaitList Event wait list. 7031 * @param[in] events Event objects per each command queue that identify 7032 * a particular kernel execution instance. 7033 * 7034 * @return 7035 * - \b clblasSuccess on success; 7036 * - \b clblasInvalidDevice if a target device does not support floating 7037 * point arithmetic with double precision; 7038 * - the same error codes as the clblasChbmv() function otherwise. 7039 * 7040 * @ingroup HBMV 7041 */ 7042 clblasStatus 7043 clblasZhbmv( 7044 clblasOrder order, 7045 clblasUplo uplo, 7046 size_t N, 7047 size_t K, 7048 cl_double2 alpha, 7049 const cl_mem A, 7050 size_t offa, 7051 size_t lda, 7052 const cl_mem X, 7053 size_t offx, 7054 int incx, 7055 cl_double2 beta, 7056 cl_mem Y, 7057 size_t offy, 7058 int incy, 7059 cl_uint numCommandQueues, 7060 cl_command_queue *commandQueues, 7061 cl_uint numEventsInWaitList, 7062 const cl_event *eventWaitList, 7063 cl_event *events); 7064 7065 /*@}*/ 7066 7067 7068 /** 7069 * @defgroup TBSV TBSV - Solving triangular banded matrix 7070 * @ingroup BLAS2 7071 */ 7072 /*@{*/ 7073 7074 /** 7075 * @brief solving triangular banded matrix problems with float elements. 7076 * 7077 * Matrix-vector products: 7078 * - \f$ A X \leftarrow X \f$ 7079 * - \f$ A^T X \leftarrow X \f$ 7080 * 7081 * @param[in] order Row/column order. 7082 * @param[in] uplo The triangle in matrix being referenced. 7083 * @param[in] trans How matrix \b A is to be transposed. 7084 * @param[in] diag Specify whether matrix \b A is unit triangular. 7085 * @param[in] N Number of rows/columns in banded matrix \b A. 7086 * @param[in] K Number of sub-diagonals/super-diagonals in triangular banded matrix \b A. 7087 * @param[in] A Buffer object storing matrix \b A. 7088 * @param[in] offa Offset in number of elements for first element in matrix \b A. 7089 * @param[in] lda Leading dimension of matrix \b A. It cannot be less 7090 * than ( \b K + 1 ) 7091 * @param[out] X Buffer object storing vector \b X. 7092 * @param[in] offx Offset in number of elements for first element in vector \b X. 7093 * @param[in] incx Increment for the elements of \b X. Must not be zero. 7094 * @param[in] numCommandQueues Number of OpenCL command queues in which the 7095 * task is to be performed. 7096 * @param[in] commandQueues OpenCL command queues. 7097 * @param[in] numEventsInWaitList Number of events in the event wait list. 7098 * @param[in] eventWaitList Event wait list. 7099 * @param[in] events Event objects per each command queue that identify 7100 * a particular kernel execution instance. 7101 * 7102 * @return 7103 * - \b clblasSuccess on success; 7104 * - \b clblasNotInitialized if clblasSetup() was not called; 7105 * - \b clblasInvalidValue if invalid parameters are passed: 7106 * - either \b N or \b incx is zero, or 7107 * - K is greater than \b N - 1 7108 * - the leading dimension is invalid; 7109 * - \b clblasInvalidMemObject if either \b A or \b X object is 7110 * Invalid, or an image object rather than the buffer one; 7111 * - \b clblasOutOfHostMemory if the library can't allocate memory for 7112 * internal structures; 7113 * - \b clblasInvalidCommandQueue if the passed command queue is invalid; 7114 * - \b clblasInvalidContext if a context a passed command queue belongs 7115 * to was released; 7116 * - \b clblasInvalidOperation if kernel compilation relating to a previous 7117 * call has not completed for any of the target devices; 7118 * - \b clblasCompilerNotAvailable if a compiler is not available; 7119 * - \b clblasBuildProgramFailure if there is a failure to build a program 7120 * executable. 7121 * 7122 * @ingroup TBSV 7123 */ 7124 clblasStatus 7125 clblasStbsv( 7126 clblasOrder order, 7127 clblasUplo uplo, 7128 clblasTranspose trans, 7129 clblasDiag diag, 7130 size_t N, 7131 size_t K, 7132 const cl_mem A, 7133 size_t offa, 7134 size_t lda, 7135 cl_mem X, 7136 size_t offx, 7137 int incx, 7138 cl_uint numCommandQueues, 7139 cl_command_queue *commandQueues, 7140 cl_uint numEventsInWaitList, 7141 const cl_event *eventWaitList, 7142 cl_event *events); 7143 /** 7144 * @example example_stbsv.c 7145 * This is an example of how to use the @ref clblasStbsv function. 7146 */ 7147 7148 7149 /** 7150 * @brief solving triangular banded matrix problems with double elements. 7151 * 7152 * Matrix-vector products: 7153 * - \f$ A X \leftarrow X \f$ 7154 * - \f$ A^T X \leftarrow X \f$ 7155 * 7156 * @param[in] order Row/column order. 7157 * @param[in] uplo The triangle in matrix being referenced. 7158 * @param[in] trans How matrix \b A is to be transposed. 7159 * @param[in] diag Specify whether matrix \b A is unit triangular. 7160 * @param[in] N Number of rows/columns in banded matrix \b A. 7161 * @param[in] K Number of sub-diagonals/super-diagonals in triangular banded matrix \b A. 7162 * @param[in] A Buffer object storing matrix \b A. 7163 * @param[in] offa Offset in number of elements for first element in matrix \b A. 7164 * @param[in] lda Leading dimension of matrix \b A. It cannot be less 7165 * than ( \b K + 1 ) 7166 * @param[out] X Buffer object storing vector \b X. 7167 * @param[in] offx Offset in number of elements for first element in vector \b X. 7168 * @param[in] incx Increment for the elements of \b X. Must not be zero. 7169 * @param[in] numCommandQueues Number of OpenCL command queues in which the 7170 * task is to be performed. 7171 * @param[in] commandQueues OpenCL command queues. 7172 * @param[in] numEventsInWaitList Number of events in the event wait list. 7173 * @param[in] eventWaitList Event wait list. 7174 * @param[in] events Event objects per each command queue that identify 7175 * a particular kernel execution instance. 7176 * 7177 * @return 7178 * - \b clblasSuccess on success; 7179 * - \b clblasInvalidDevice if a target device does not support floating 7180 * point arithmetic with double precision; 7181 * - the same error codes as the clblasStbsv() function otherwise. 7182 * 7183 * @ingroup TBSV 7184 */ 7185 clblasStatus 7186 clblasDtbsv( 7187 clblasOrder order, 7188 clblasUplo uplo, 7189 clblasTranspose trans, 7190 clblasDiag diag, 7191 size_t N, 7192 size_t K, 7193 const cl_mem A, 7194 size_t offa, 7195 size_t lda, 7196 cl_mem X, 7197 size_t offx, 7198 int incx, 7199 cl_uint numCommandQueues, 7200 cl_command_queue *commandQueues, 7201 cl_uint numEventsInWaitList, 7202 const cl_event *eventWaitList, 7203 cl_event *events); 7204 7205 /** 7206 * @brief solving triangular banded matrix problems with float-complex elements. 7207 * 7208 * Matrix-vector products: 7209 * - \f$ A X \leftarrow X \f$ 7210 * - \f$ A^T X \leftarrow X \f$ 7211 * 7212 * @param[in] order Row/column order. 7213 * @param[in] uplo The triangle in matrix being referenced. 7214 * @param[in] trans How matrix \b A is to be transposed. 7215 * @param[in] diag Specify whether matrix \b A is unit triangular. 7216 * @param[in] N Number of rows/columns in banded matrix \b A. 7217 * @param[in] K Number of sub-diagonals/super-diagonals in triangular banded matrix \b A. 7218 * @param[in] A Buffer object storing matrix \b A. 7219 * @param[in] offa Offset in number of elements for first element in matrix \b A. 7220 * @param[in] lda Leading dimension of matrix \b A. It cannot be less 7221 * than ( \b K + 1 ) 7222 * @param[out] X Buffer object storing vector \b X. 7223 * @param[in] offx Offset in number of elements for first element in vector \b X. 7224 * @param[in] incx Increment for the elements of \b X. Must not be zero. 7225 * @param[in] numCommandQueues Number of OpenCL command queues in which the 7226 * task is to be performed. 7227 * @param[in] commandQueues OpenCL command queues. 7228 * @param[in] numEventsInWaitList Number of events in the event wait list. 7229 * @param[in] eventWaitList Event wait list. 7230 * @param[in] events Event objects per each command queue that identify 7231 * a particular kernel execution instance. 7232 * 7233 * @return The same result as the clblasStbsv() function. 7234 * 7235 * @ingroup TBSV 7236 */ 7237 clblasStatus 7238 clblasCtbsv( 7239 clblasOrder order, 7240 clblasUplo uplo, 7241 clblasTranspose trans, 7242 clblasDiag diag, 7243 size_t N, 7244 size_t K, 7245 const cl_mem A, 7246 size_t offa, 7247 size_t lda, 7248 cl_mem X, 7249 size_t offx, 7250 int incx, 7251 cl_uint numCommandQueues, 7252 cl_command_queue *commandQueues, 7253 cl_uint numEventsInWaitList, 7254 const cl_event *eventWaitList, 7255 cl_event *events); 7256 7257 /** 7258 * @brief solving triangular banded matrix problems with double-complex elements. 7259 * 7260 * Matrix-vector products: 7261 * - \f$ A X \leftarrow X \f$ 7262 * - \f$ A^T X \leftarrow X \f$ 7263 * 7264 * @param[in] order Row/column order. 7265 * @param[in] uplo The triangle in matrix being referenced. 7266 * @param[in] trans How matrix \b A is to be transposed. 7267 * @param[in] diag Specify whether matrix \b A is unit triangular. 7268 * @param[in] N Number of rows/columns in banded matrix \b A. 7269 * @param[in] K Number of sub-diagonals/super-diagonals in triangular banded matrix \b A. 7270 * @param[in] A Buffer object storing matrix \b A. 7271 * @param[in] offa Offset in number of elements for first element in matrix \b A. 7272 * @param[in] lda Leading dimension of matrix \b A. It cannot be less 7273 * than ( \b K + 1 ) 7274 * @param[out] X Buffer object storing vector \b X. 7275 * @param[in] offx Offset in number of elements for first element in vector \b X. 7276 * @param[in] incx Increment for the elements of \b X. Must not be zero. 7277 * @param[in] numCommandQueues Number of OpenCL command queues in which the 7278 * task is to be performed. 7279 * @param[in] commandQueues OpenCL command queues. 7280 * @param[in] numEventsInWaitList Number of events in the event wait list. 7281 * @param[in] eventWaitList Event wait list. 7282 * @param[in] events Event objects per each command queue that identify 7283 * a particular kernel execution instance. 7284 * 7285 * @return The same result as the clblasDtbsv() function. 7286 * 7287 * @ingroup TBSV 7288 */ 7289 clblasStatus 7290 clblasZtbsv( 7291 clblasOrder order, 7292 clblasUplo uplo, 7293 clblasTranspose trans, 7294 clblasDiag diag, 7295 size_t N, 7296 size_t K, 7297 const cl_mem A, 7298 size_t offa, 7299 size_t lda, 7300 cl_mem X, 7301 size_t offx, 7302 int incx, 7303 cl_uint numCommandQueues, 7304 cl_command_queue *commandQueues, 7305 cl_uint numEventsInWaitList, 7306 const cl_event *eventWaitList, 7307 cl_event *events); 7308 7309 /*@}*/ 7310 7311 7312 /** 7313 * @defgroup BLAS3 BLAS-3 functions 7314 * 7315 * The Level 3 Basic Linear Algebra Subprograms are funcions that perform 7316 * matrix-matrix operations. 7317 */ 7318 /*@{*/ 7319 /*@}*/ 7320 7321 /** 7322 * @defgroup GEMM GEMM - General matrix-matrix multiplication 7323 * @ingroup BLAS3 7324 */ 7325 /*@{*/ 7326 7327 /** 7328 * @brief Matrix-matrix product of general rectangular matrices with float 7329 * elements. Extended version. 7330 * 7331 * Matrix-matrix products: 7332 * - \f$ C \leftarrow \alpha A B + \beta C \f$ 7333 * - \f$ C \leftarrow \alpha A^T B + \beta C \f$ 7334 * - \f$ C \leftarrow \alpha A B^T + \beta C \f$ 7335 * - \f$ C \leftarrow \alpha A^T B^T + \beta C \f$ 7336 * 7337 * @param[in] order Row/column order. 7338 * @param[in] transA How matrix \b A is to be transposed. 7339 * @param[in] transB How matrix \b B is to be transposed. 7340 * @param[in] M Number of rows in matrix \b A. 7341 * @param[in] N Number of columns in matrix \b B. 7342 * @param[in] K Number of columns in matrix \b A and rows in matrix \b B. 7343 * @param[in] alpha The factor of matrix \b A. 7344 * @param[in] A Buffer object storing matrix \b A. 7345 * @param[in] offA Offset of the first element of the matrix \b A in the 7346 * buffer object. Counted in elements. 7347 * @param[in] lda Leading dimension of matrix \b A. It cannot be less 7348 * than \b K when the \b order parameter is set to 7349 * \b clblasRowMajor,\n or less than \b M when the 7350 * parameter is set to \b clblasColumnMajor. 7351 * @param[in] B Buffer object storing matrix \b B. 7352 * @param[in] offB Offset of the first element of the matrix \b B in the 7353 * buffer object. Counted in elements. 7354 * @param[in] ldb Leading dimension of matrix \b B. It cannot be less 7355 * than \b N when the \b order parameter is set to 7356 * \b clblasRowMajor,\n or less than \b K 7357 * when it is set to \b clblasColumnMajor. 7358 * @param[in] beta The factor of matrix \b C. 7359 * @param[out] C Buffer object storing matrix \b C. 7360 * @param[in] offC Offset of the first element of the matrix \b C in the 7361 * buffer object. Counted in elements. 7362 * @param[in] ldc Leading dimension of matrix \b C. It cannot be less 7363 * than \b N when the \b order parameter is set to 7364 * \b clblasRowMajor,\n or less than \b M when 7365 * it is set to \b clblasColumnMajorOrder. 7366 * @param[in] numCommandQueues Number of OpenCL command queues in which the 7367 * task is to be performed. 7368 * @param[in] commandQueues OpenCL command queues. 7369 * @param[in] numEventsInWaitList Number of events in the event wait list. 7370 * @param[in] eventWaitList Event wait list. 7371 * @param[in] events Event objects per each command queue that identify 7372 * a particular kernel execution instance. 7373 * 7374 * @return 7375 * - \b clblasSuccess on success; 7376 * - \b clblasInvalidValue if either \b offA, \b offB or \b offC exceeds 7377 * the size of the respective buffer object; 7378 * - the same error codes as clblasSgemm() otherwise. 7379 * 7380 * @ingroup GEMM 7381 */ 7382 clblasStatus 7383 clblasSgemm( 7384 clblasOrder order, 7385 clblasTranspose transA, 7386 clblasTranspose transB, 7387 size_t M, 7388 size_t N, 7389 size_t K, 7390 cl_float alpha, 7391 const cl_mem A, 7392 size_t offA, 7393 size_t lda, 7394 const cl_mem B, 7395 size_t offB, 7396 size_t ldb, 7397 cl_float beta, 7398 cl_mem C, 7399 size_t offC, 7400 size_t ldc, 7401 cl_uint numCommandQueues, 7402 cl_command_queue *commandQueues, 7403 cl_uint numEventsInWaitList, 7404 const cl_event *eventWaitList, 7405 cl_event *events); 7406 7407 /** 7408 * @example example_sgemm.c 7409 * This is an example of how to use the @ref clblasSgemmEx function. 7410 */ 7411 7412 /** 7413 * @brief Matrix-matrix product of general rectangular matrices with double 7414 * elements. Extended version. 7415 * 7416 * Matrix-matrix products: 7417 * - \f$ C \leftarrow \alpha A B + \beta C \f$ 7418 * - \f$ C \leftarrow \alpha A^T B + \beta C \f$ 7419 * - \f$ C \leftarrow \alpha A B^T + \beta C \f$ 7420 * - \f$ C \leftarrow \alpha A^T B^T + \beta C \f$ 7421 * 7422 * @param[in] order Row/column order. 7423 * @param[in] transA How matrix \b A is to be transposed. 7424 * @param[in] transB How matrix \b B is to be transposed. 7425 * @param[in] M Number of rows in matrix \b A. 7426 * @param[in] N Number of columns in matrix \b B. 7427 * @param[in] K Number of columns in matrix \b A and rows in matrix \b B. 7428 * @param[in] alpha The factor of matrix \b A. 7429 * @param[in] A Buffer object storing matrix \b A. 7430 * @param[in] offA Offset of the first element of the matrix \b A in the 7431 * buffer object. Counted in elements. 7432 * @param[in] lda Leading dimension of matrix \b A. For detailed description, 7433 * see clblasSgemm(). 7434 * @param[in] B Buffer object storing matrix \b B. 7435 * @param[in] offB Offset of the first element of the matrix \b B in the 7436 * buffer object. Counted in elements. 7437 * @param[in] ldb Leading dimension of matrix \b B. For detailed description, 7438 * see clblasSgemm(). 7439 * @param[in] beta The factor of matrix \b C. 7440 * @param[out] C Buffer object storing matrix \b C. 7441 * @param[in] offC Offset of the first element of the matrix \b C in the 7442 * buffer object. Counted in elements. 7443 * @param[in] ldc Leading dimension of matrix \b C. For detailed description, 7444 * see clblasSgemm(). 7445 * @param[in] numCommandQueues Number of OpenCL command queues in which the 7446 * task is to be performed. 7447 * @param[in] commandQueues OpenCL command queues. 7448 * @param[in] numEventsInWaitList Number of events in the event wait list. 7449 * @param[in] eventWaitList Event wait list. 7450 * @param[in] events Event objects per each command queue that identify 7451 * a particular kernel execution instance. 7452 * 7453 * @return 7454 * - \b clblasSuccess on success; 7455 * - \b clblasInvalidDevice if a target device does not support floating 7456 * point arithmetic with double precision; 7457 * - \b clblasInvalidValue if either \b offA, \b offB or offC exceeds 7458 * the size of the respective buffer object; 7459 * - the same error codes as the clblasSgemm() function otherwise. 7460 * 7461 * @ingroup GEMM 7462 */ 7463 clblasStatus 7464 clblasDgemm( 7465 clblasOrder order, 7466 clblasTranspose transA, 7467 clblasTranspose transB, 7468 size_t M, 7469 size_t N, 7470 size_t K, 7471 cl_double alpha, 7472 const cl_mem A, 7473 size_t offA, 7474 size_t lda, 7475 const cl_mem B, 7476 size_t offB, 7477 size_t ldb, 7478 cl_double beta, 7479 cl_mem C, 7480 size_t offC, 7481 size_t ldc, 7482 cl_uint numCommandQueues, 7483 cl_command_queue *commandQueues, 7484 cl_uint numEventsInWaitList, 7485 const cl_event *eventWaitList, 7486 cl_event *events); 7487 7488 /** 7489 * @brief Matrix-matrix product of general rectangular matrices with float 7490 * complex elements. Extended version. 7491 * 7492 * Matrix-matrix products: 7493 * - \f$ C \leftarrow \alpha A B + \beta C \f$ 7494 * - \f$ C \leftarrow \alpha A^T B + \beta C \f$ 7495 * - \f$ C \leftarrow \alpha A B^T + \beta C \f$ 7496 * - \f$ C \leftarrow \alpha A^T B^T + \beta C \f$ 7497 * 7498 * @param[in] order Row/column order. 7499 * @param[in] transA How matrix \b A is to be transposed. 7500 * @param[in] transB How matrix \b B is to be transposed. 7501 * @param[in] M Number of rows in matrix \b A. 7502 * @param[in] N Number of columns in matrix \b B. 7503 * @param[in] K Number of columns in matrix \b A and rows in matrix \b B. 7504 * @param[in] alpha The factor of matrix \b A. 7505 * @param[in] A Buffer object storing matrix \b A. 7506 * @param[in] offA Offset of the first element of the matrix \b A in the 7507 * buffer object. Counted in elements. 7508 * @param[in] lda Leading dimension of matrix \b A. For detailed description, 7509 * see clblasSgemm(). 7510 * @param[in] B Buffer object storing matrix \b B. 7511 * @param[in] offB Offset of the first element of the matrix \b B in the 7512 * buffer object. Counted in elements. 7513 * @param[in] ldb Leading dimension of matrix \b B. For detailed description, 7514 * see clblasSgemm(). 7515 * @param[in] beta The factor of matrix \b C. 7516 * @param[out] C Buffer object storing matrix \b C. 7517 * @param[in] offC Offset of the first element of the matrix \b C in the 7518 * buffer object. Counted in elements. 7519 * @param[in] ldc Leading dimension of matrix \b C. For detailed description, 7520 * see clblasSgemm(). 7521 * @param[in] numCommandQueues Number of OpenCL command queues in which the 7522 * task is to be performed. 7523 * @param[in] commandQueues OpenCL command queues. 7524 * @param[in] numEventsInWaitList Number of events in the event wait list. 7525 * @param[in] eventWaitList Event wait list. 7526 * @param[in] events Event objects per each command queue that identify 7527 * a particular kernel execution instance. 7528 * 7529 * @return 7530 * - \b clblasSuccess on success; 7531 * - \b clblasInvalidValue if either \b offA, \b offB or offC exceeds 7532 * the size of the respective buffer object; 7533 * - the same error codes as the clblasSgemm() function otherwise. 7534 * 7535 * @ingroup GEMM 7536 */ 7537 clblasStatus 7538 clblasCgemm( 7539 clblasOrder order, 7540 clblasTranspose transA, 7541 clblasTranspose transB, 7542 size_t M, 7543 size_t N, 7544 size_t K, 7545 FloatComplex alpha, 7546 const cl_mem A, 7547 size_t offA, 7548 size_t lda, 7549 const cl_mem B, 7550 size_t offB, 7551 size_t ldb, 7552 FloatComplex beta, 7553 cl_mem C, 7554 size_t offC, 7555 size_t ldc, 7556 cl_uint numCommandQueues, 7557 cl_command_queue *commandQueues, 7558 cl_uint numEventsInWaitList, 7559 const cl_event *eventWaitList, 7560 cl_event *events); 7561 7562 /** 7563 * @brief Matrix-matrix product of general rectangular matrices with double 7564 * complex elements. Exteneded version. 7565 * 7566 * Matrix-matrix products: 7567 * - \f$ C \leftarrow \alpha A B + \beta C \f$ 7568 * - \f$ C \leftarrow \alpha A^T B + \beta C \f$ 7569 * - \f$ C \leftarrow \alpha A B^T + \beta C \f$ 7570 * - \f$ C \leftarrow \alpha A^T B^T + \beta C \f$ 7571 * 7572 * @param[in] order Row/column order. 7573 * @param[in] transA How matrix \b A is to be transposed. 7574 * @param[in] transB How matrix \b B is to be transposed. 7575 * @param[in] M Number of rows in matrix \b A. 7576 * @param[in] N Number of columns in matrix \b B. 7577 * @param[in] K Number of columns in matrix \b A and rows in matrix \b B. 7578 * @param[in] alpha The factor of matrix \b A. 7579 * @param[in] A Buffer object storing matrix \b A. 7580 * @param[in] offA Offset of the first element of the matrix \b A in the 7581 * buffer object. Counted in elements. 7582 * @param[in] lda Leading dimension of matrix \b A. For detailed description, 7583 * see clblasSgemm(). 7584 * @param[in] B Buffer object storing matrix \b B. 7585 * @param[in] offB Offset of the first element of the matrix \b B in the 7586 * buffer object. Counted in elements. 7587 * @param[in] ldb Leading dimension of matrix \b B. For detailed description, 7588 * see clblasSgemm(). 7589 * @param[in] beta The factor of matrix \b C. 7590 * @param[out] C Buffer object storing matrix \b C. 7591 * @param[in] offC Offset of the first element of the matrix \b C in the 7592 * buffer object. Counted in elements. 7593 * @param[in] ldc Leading dimension of matrix \b C. For detailed description, 7594 * see clblasSgemm(). 7595 * @param[in] numCommandQueues Number of OpenCL command queues in which the 7596 * task is to be performed. 7597 * @param[in] commandQueues OpenCL command queues. 7598 * @param[in] numEventsInWaitList Number of events in the event wait list. 7599 * @param[in] eventWaitList Event wait list. 7600 * @param[in] events Event objects per each command queue that identify 7601 * a particular kernel execution instance. 7602 * 7603 * @return 7604 * - \b clblasSuccess on success; 7605 * - \b clblasInvalidDevice if a target device does not support floating 7606 * point arithmetic with double precision; 7607 * - \b clblasInvalidValue if either \b offA, \b offB or offC exceeds 7608 * the size of the respective buffer object; 7609 * - the same error codes as the clblasSgemm() function otherwise. 7610 * 7611 * @ingroup GEMM 7612 */ 7613 clblasStatus 7614 clblasZgemm( 7615 clblasOrder order, 7616 clblasTranspose transA, 7617 clblasTranspose transB, 7618 size_t M, 7619 size_t N, 7620 size_t K, 7621 DoubleComplex alpha, 7622 const cl_mem A, 7623 size_t offA, 7624 size_t lda, 7625 const cl_mem B, 7626 size_t offB, 7627 size_t ldb, 7628 DoubleComplex beta, 7629 cl_mem C, 7630 size_t offC, 7631 size_t ldc, 7632 cl_uint numCommandQueues, 7633 cl_command_queue *commandQueues, 7634 cl_uint numEventsInWaitList, 7635 const cl_event *eventWaitList, 7636 cl_event *events); 7637 7638 /*@}*/ 7639 7640 /** 7641 * @defgroup TRMM TRMM - Triangular matrix-matrix multiplication 7642 * @ingroup BLAS3 7643 */ 7644 /*@{*/ 7645 7646 /** 7647 * @brief Multiplying a matrix by a triangular matrix with float elements. 7648 * Extended version. 7649 * 7650 * Matrix-triangular matrix products: 7651 * - \f$ B \leftarrow \alpha A B \f$ 7652 * - \f$ B \leftarrow \alpha A^T B \f$ 7653 * - \f$ B \leftarrow \alpha B A \f$ 7654 * - \f$ B \leftarrow \alpha B A^T \f$ 7655 * 7656 * where \b T is an upper or lower triangular matrix. 7657 * 7658 * @param[in] order Row/column order. 7659 * @param[in] side The side of triangular matrix. 7660 * @param[in] uplo The triangle in matrix being referenced. 7661 * @param[in] transA How matrix \b A is to be transposed. 7662 * @param[in] diag Specify whether matrix is unit triangular. 7663 * @param[in] M Number of rows in matrix \b B. 7664 * @param[in] N Number of columns in matrix \b B. 7665 * @param[in] alpha The factor of matrix \b A. 7666 * @param[in] A Buffer object storing matrix \b A. 7667 * @param[in] offA Offset of the first element of the matrix \b A in the 7668 * buffer object. Counted in elements. 7669 * @param[in] lda Leading dimension of matrix \b A. It cannot be less 7670 * than \b M when the \b side parameter is set to 7671 * \b clblasLeft,\n or less than \b N when it is set 7672 * to \b clblasRight. 7673 * @param[out] B Buffer object storing matrix \b B. 7674 * @param[in] offB Offset of the first element of the matrix \b B in the 7675 * buffer object. Counted in elements. 7676 * @param[in] ldb Leading dimension of matrix \b B. It cannot be less 7677 * than \b N when the \b order parameter is set to 7678 * \b clblasRowMajor,\n or not less than \b M 7679 * when it is set to \b clblasColumnMajor. 7680 * @param[in] numCommandQueues Number of OpenCL command queues in which the 7681 * task is to be performed. 7682 * @param[in] commandQueues OpenCL command queues. 7683 * @param[in] numEventsInWaitList Number of events in the event wait list. 7684 * @param[in] eventWaitList Event wait list. 7685 * @param[in] events Event objects per each command queue that identify 7686 * a particular kernel execution instance. 7687 * 7688 * @return 7689 * - \b clblasSuccess on success; 7690 * - \b clblasInvalidValue if either \b offA or \b offB exceeds the size 7691 * of the respective buffer object; 7692 * - the same error codes as clblasStrmm() otherwise. 7693 * 7694 * @ingroup TRMM 7695 */ 7696 clblasStatus 7697 clblasStrmm( 7698 clblasOrder order, 7699 clblasSide side, 7700 clblasUplo uplo, 7701 clblasTranspose transA, 7702 clblasDiag diag, 7703 size_t M, 7704 size_t N, 7705 cl_float alpha, 7706 const cl_mem A, 7707 size_t offA, 7708 size_t lda, 7709 cl_mem B, 7710 size_t offB, 7711 size_t ldb, 7712 cl_uint numCommandQueues, 7713 cl_command_queue *commandQueues, 7714 cl_uint numEventsInWaitList, 7715 const cl_event *eventWaitList, 7716 cl_event *events); 7717 7718 /** 7719 * @example example_strmm.c 7720 * This is an example of how to use the @ref clblasStrmmEx function. 7721 */ 7722 7723 /** 7724 * @brief Multiplying a matrix by a triangular matrix with double elements. 7725 * Extended version. 7726 * 7727 * Matrix-triangular matrix products: 7728 * - \f$ B \leftarrow \alpha A B \f$ 7729 * - \f$ B \leftarrow \alpha A^T B \f$ 7730 * - \f$ B \leftarrow \alpha B A \f$ 7731 * - \f$ B \leftarrow \alpha B A^T \f$ 7732 * 7733 * where \b T is an upper or lower triangular matrix. 7734 * 7735 * @param[in] order Row/column order. 7736 * @param[in] side The side of triangular matrix. 7737 * @param[in] uplo The triangle in matrix being referenced. 7738 * @param[in] transA How matrix \b A is to be transposed. 7739 * @param[in] diag Specify whether matrix is unit triangular. 7740 * @param[in] M Number of rows in matrix \b B. 7741 * @param[in] N Number of columns in matrix \b B. 7742 * @param[in] alpha The factor of matrix \b A. 7743 * @param[in] A Buffer object storing matrix \b A. 7744 * @param[in] offA Offset of the first element of the matrix \b A in the 7745 * buffer object. Counted in elements. 7746 * @param[in] lda Leading dimension of matrix \b A. For detailed 7747 * description, see clblasStrmm(). 7748 * @param[out] B Buffer object storing matrix \b B. 7749 * @param[in] offB Offset of the first element of the matrix \b B in the 7750 * buffer object. Counted in elements. 7751 * @param[in] ldb Leading dimension of matrix \b B. For detailed 7752 * description, see clblasStrmm(). 7753 * @param[in] numCommandQueues Number of OpenCL command queues in which the 7754 * task is to be performed. 7755 * @param[in] commandQueues OpenCL command queues. 7756 * @param[in] numEventsInWaitList Number of events in the event wait list. 7757 * @param[in] eventWaitList Event wait list. 7758 * @param[in] events Event objects per each command queue that identify 7759 * a particular kernel execution instance. 7760 * 7761 * @return 7762 * - \b clblasSuccess on success; 7763 * - \b clblasInvalidDevice if a target device does not support floating 7764 * point arithmetic with double precision; 7765 * - \b clblasInvalidValue if either \b offA or \b offB exceeds the size 7766 * of the respective buffer object; 7767 * - the same error codes as the clblasStrmm() function otherwise. 7768 * 7769 * @ingroup TRMM 7770 */ 7771 clblasStatus 7772 clblasDtrmm( 7773 clblasOrder order, 7774 clblasSide side, 7775 clblasUplo uplo, 7776 clblasTranspose transA, 7777 clblasDiag diag, 7778 size_t M, 7779 size_t N, 7780 cl_double alpha, 7781 const cl_mem A, 7782 size_t offA, 7783 size_t lda, 7784 cl_mem B, 7785 size_t offB, 7786 size_t ldb, 7787 cl_uint numCommandQueues, 7788 cl_command_queue *commandQueues, 7789 cl_uint numEventsInWaitList, 7790 const cl_event *eventWaitList, 7791 cl_event *events); 7792 7793 /** 7794 * @brief Multiplying a matrix by a triangular matrix with float complex 7795 * elements. Extended version. 7796 * 7797 * Matrix-triangular matrix products: 7798 * - \f$ B \leftarrow \alpha A B \f$ 7799 * - \f$ B \leftarrow \alpha A^T B \f$ 7800 * - \f$ B \leftarrow \alpha B A \f$ 7801 * - \f$ B \leftarrow \alpha B A^T \f$ 7802 * 7803 * where \b T is an upper or lower triangular matrix. 7804 * @param[in] order Row/column order. 7805 * @param[in] side The side of triangular matrix. 7806 * @param[in] uplo The triangle in matrix being referenced. 7807 * @param[in] transA How matrix \b A is to be transposed. 7808 * @param[in] diag Specify whether matrix is unit triangular. 7809 * @param[in] M Number of rows in matrix \b B. 7810 * @param[in] N Number of columns in matrix \b B. 7811 * @param[in] alpha The factor of matrix \b A. 7812 * @param[in] offA Offset of the first element of the matrix \b A in the 7813 * buffer object. Counted in elements. 7814 * @param[in] A Buffer object storing matrix \b A. 7815 * @param[in] lda Leading dimension of matrix \b A. For detailed 7816 * description, see clblasStrmm(). 7817 * @param[out] B Buffer object storing matrix \b B. 7818 * @param[in] offB Offset of the first element of the matrix \b B in the 7819 * buffer object. Counted in elements. 7820 * @param[in] ldb Leading dimension of matrix \b B. For detailed 7821 * description, see clblasStrmm(). 7822 * @param[in] numCommandQueues Number of OpenCL command queues in which the 7823 * task is to be performed. 7824 * @param[in] commandQueues OpenCL command queues. 7825 * @param[in] numEventsInWaitList Number of events in the event wait list. 7826 * @param[in] eventWaitList Event wait list. 7827 * @param[in] events Event objects per each command queue that identify 7828 * a particular kernel execution instance. 7829 * 7830 * @return 7831 * - \b clblasSuccess on success; 7832 * - \b clblasInvalidValue if either \b offA or \b offB exceeds the size 7833 * of the respective buffer object; 7834 * - the same error codes as clblasStrmm() otherwise. 7835 * 7836 * @ingroup TRMM 7837 */ 7838 clblasStatus 7839 clblasCtrmm( 7840 clblasOrder order, 7841 clblasSide side, 7842 clblasUplo uplo, 7843 clblasTranspose transA, 7844 clblasDiag diag, 7845 size_t M, 7846 size_t N, 7847 FloatComplex alpha, 7848 const cl_mem A, 7849 size_t offA, 7850 size_t lda, 7851 cl_mem B, 7852 size_t offB, 7853 size_t ldb, 7854 cl_uint numCommandQueues, 7855 cl_command_queue *commandQueues, 7856 cl_uint numEventsInWaitList, 7857 const cl_event *eventWaitList, 7858 cl_event *events); 7859 7860 /** 7861 * @brief Multiplying a matrix by a triangular matrix with double complex 7862 * elements. Extended version. 7863 * 7864 * Matrix-triangular matrix products: 7865 * - \f$ B \leftarrow \alpha A B \f$ 7866 * - \f$ B \leftarrow \alpha A^T B \f$ 7867 * - \f$ B \leftarrow \alpha B A \f$ 7868 * - \f$ B \leftarrow \alpha B A^T \f$ 7869 * 7870 * where \b T is an upper or lower triangular matrix. 7871 * 7872 * @param[in] order Row/column order. 7873 * @param[in] side The side of triangular matrix. 7874 * @param[in] uplo The triangle in matrix being referenced. 7875 * @param[in] transA How matrix \b A is to be transposed. 7876 * @param[in] diag Specify whether matrix is unit triangular. 7877 * @param[in] M Number of rows in matrix \b B. 7878 * @param[in] N Number of columns in matrix \b B. 7879 * @param[in] alpha The factor of matrix \b A. 7880 * @param[in] A Buffer object storing matrix \b A. 7881 * @param[in] offA Offset of the first element of the matrix \b A in the 7882 * buffer object. Counted in elements. 7883 * @param[in] lda Leading dimension of matrix \b A. For detailed 7884 * description, see clblasStrmm(). 7885 * @param[out] B Buffer object storing matrix \b B. 7886 * @param[in] offB Offset of the first element of the matrix \b B in the 7887 * buffer object. Counted in elements. 7888 * @param[in] ldb Leading dimension of matrix \b B. For detailed 7889 * description, see clblasStrmm(). 7890 * @param[in] numCommandQueues Number of OpenCL command queues in which the 7891 * task is to be performed. 7892 * @param[in] commandQueues OpenCL command queues. 7893 * @param[in] numEventsInWaitList Number of events in the event wait list. 7894 * @param[in] eventWaitList Event wait list. 7895 * @param[in] events Event objects per each command queue that identify 7896 * a particular kernel execution instance. 7897 * 7898 * @return 7899 * - \b clblasSuccess on success; 7900 * - \b clblasInvalidDevice if a target device does not support floating 7901 * point arithmetic with double precision; 7902 * - \b clblasInvalidValue if either \b offA or \b offB exceeds the size 7903 * of the respective buffer object; 7904 * - the same error codes as the clblasStrmm() function otherwise. 7905 * 7906 * @ingroup TRMM 7907 */ 7908 clblasStatus 7909 clblasZtrmm( 7910 clblasOrder order, 7911 clblasSide side, 7912 clblasUplo uplo, 7913 clblasTranspose transA, 7914 clblasDiag diag, 7915 size_t M, 7916 size_t N, 7917 DoubleComplex alpha, 7918 const cl_mem A, 7919 size_t offA, 7920 size_t lda, 7921 cl_mem B, 7922 size_t offB, 7923 size_t ldb, 7924 cl_uint numCommandQueues, 7925 cl_command_queue *commandQueues, 7926 cl_uint numEventsInWaitList, 7927 const cl_event *eventWaitList, 7928 cl_event *events); 7929 7930 /*@}*/ 7931 7932 /** 7933 * @defgroup TRSM TRSM - Solving triangular systems of equations 7934 * @ingroup BLAS3 7935 */ 7936 /*@{*/ 7937 7938 /** 7939 * @brief Solving triangular systems of equations with multiple right-hand 7940 * sides and float elements. Extended version. 7941 * 7942 * Solving triangular systems of equations: 7943 * - \f$ B \leftarrow \alpha A^{-1} B \f$ 7944 * - \f$ B \leftarrow \alpha A^{-T} B \f$ 7945 * - \f$ B \leftarrow \alpha B A^{-1} \f$ 7946 * - \f$ B \leftarrow \alpha B A^{-T} \f$ 7947 * 7948 * where \b T is an upper or lower triangular matrix. 7949 * 7950 * @param[in] order Row/column order. 7951 * @param[in] side The side of triangular matrix. 7952 * @param[in] uplo The triangle in matrix being referenced. 7953 * @param[in] transA How matrix \b A is to be transposed. 7954 * @param[in] diag Specify whether matrix is unit triangular. 7955 * @param[in] M Number of rows in matrix \b B. 7956 * @param[in] N Number of columns in matrix \b B. 7957 * @param[in] alpha The factor of matrix \b A. 7958 * @param[in] A Buffer object storing matrix \b A. 7959 * @param[in] offA Offset of the first element of the matrix \b A in the 7960 * buffer object. Counted in elements. 7961 * @param[in] lda Leading dimension of matrix \b A. It cannot be less 7962 * than \b M when the \b side parameter is set to 7963 * \b clblasLeft,\n or less than \b N 7964 * when it is set to \b clblasRight. 7965 * @param[out] B Buffer object storing matrix \b B. 7966 * @param[in] offB Offset of the first element of the matrix \b B in the 7967 * buffer object. Counted in elements. 7968 * @param[in] ldb Leading dimension of matrix \b B. It cannot be less 7969 * than \b N when the \b order parameter is set to 7970 * \b clblasRowMajor,\n or less than \b M 7971 * when it is set to \b clblasColumnMajor. 7972 * @param[in] numCommandQueues Number of OpenCL command queues in which the 7973 * task is to be performed. 7974 * @param[in] commandQueues OpenCL command queues. 7975 * @param[in] numEventsInWaitList Number of events in the event wait list. 7976 * @param[in] eventWaitList Event wait list. 7977 * @param[in] events Event objects per each command queue that identify 7978 * a particular kernel execution instance. 7979 * 7980 * @return 7981 * - \b clblasSuccess on success; 7982 * - \b clblasInvalidValue if either \b offA or \b offB exceeds the size 7983 * of the respective buffer object; 7984 * - the same error codes as clblasStrsm() otherwise. 7985 * 7986 * @ingroup TRSM 7987 */ 7988 clblasStatus 7989 clblasStrsm( 7990 clblasOrder order, 7991 clblasSide side, 7992 clblasUplo uplo, 7993 clblasTranspose transA, 7994 clblasDiag diag, 7995 size_t M, 7996 size_t N, 7997 cl_float alpha, 7998 const cl_mem A, 7999 size_t offA, 8000 size_t lda, 8001 cl_mem B, 8002 size_t offB, 8003 size_t ldb, 8004 cl_uint numCommandQueues, 8005 cl_command_queue *commandQueues, 8006 cl_uint numEventsInWaitList, 8007 const cl_event *eventWaitList, 8008 cl_event *events); 8009 8010 /** 8011 * @example example_strsm.c 8012 * This is an example of how to use the @ref clblasStrsmEx function. 8013 */ 8014 8015 /** 8016 * @brief Solving triangular systems of equations with multiple right-hand 8017 * sides and double elements. Extended version. 8018 * 8019 * Solving triangular systems of equations: 8020 * - \f$ B \leftarrow \alpha A^{-1} B \f$ 8021 * - \f$ B \leftarrow \alpha A^{-T} B \f$ 8022 * - \f$ B \leftarrow \alpha B A^{-1} \f$ 8023 * - \f$ B \leftarrow \alpha B A^{-T} \f$ 8024 * 8025 * where \b T is an upper or lower triangular matrix. 8026 * 8027 * @param[in] order Row/column order. 8028 * @param[in] side The side of triangular matrix. 8029 * @param[in] uplo The triangle in matrix being referenced. 8030 * @param[in] transA How matrix \b A is to be transposed. 8031 * @param[in] diag Specify whether matrix is unit triangular. 8032 * @param[in] M Number of rows in matrix \b B. 8033 * @param[in] N Number of columns in matrix \b B. 8034 * @param[in] alpha The factor of matrix \b A. 8035 * @param[in] A Buffer object storing matrix \b A. 8036 * @param[in] offA Offset of the first element of the matrix \b A in the 8037 * buffer object. Counted in elements. 8038 * @param[in] lda Leading dimension of matrix \b A. For detailed 8039 * description, see clblasStrsm(). 8040 * @param[out] B Buffer object storing matrix \b B. 8041 * @param[in] offB Offset of the first element of the matrix \b A in the 8042 * buffer object. Counted in elements. 8043 * @param[in] ldb Leading dimension of matrix \b B. For detailed 8044 * description, see clblasStrsm(). 8045 * @param[in] numCommandQueues Number of OpenCL command queues in which the 8046 * task is to be performed. 8047 * @param[in] commandQueues OpenCL command queues. 8048 * @param[in] numEventsInWaitList Number of events in the event wait list. 8049 * @param[in] eventWaitList Event wait list. 8050 * @param[in] events Event objects per each command queue that identify 8051 * a particular kernel execution instance. 8052 * 8053 * @return 8054 * - \b clblasSuccess on success; 8055 * - \b clblasInvalidDevice if a target device does not support floating 8056 * point arithmetic with double precision; 8057 * - \b clblasInvalidValue if either \b offA or \b offB exceeds the size 8058 * of the respective buffer object; 8059 * - the same error codes as the clblasStrsm() function otherwise. 8060 * 8061 * @ingroup TRSM 8062 */ 8063 clblasStatus 8064 clblasDtrsm( 8065 clblasOrder order, 8066 clblasSide side, 8067 clblasUplo uplo, 8068 clblasTranspose transA, 8069 clblasDiag diag, 8070 size_t M, 8071 size_t N, 8072 cl_double alpha, 8073 const cl_mem A, 8074 size_t offA, 8075 size_t lda, 8076 cl_mem B, 8077 size_t offB, 8078 size_t ldb, 8079 cl_uint numCommandQueues, 8080 cl_command_queue *commandQueues, 8081 cl_uint numEventsInWaitList, 8082 const cl_event *eventWaitList, 8083 cl_event *events); 8084 8085 /** 8086 * @brief Solving triangular systems of equations with multiple right-hand 8087 * sides and float complex elements. Extended version. 8088 * 8089 * Solving triangular systems of equations: 8090 * - \f$ B \leftarrow \alpha A^{-1} B \f$ 8091 * - \f$ B \leftarrow \alpha A^{-T} B \f$ 8092 * - \f$ B \leftarrow \alpha B A^{-1} \f$ 8093 * - \f$ B \leftarrow \alpha B A^{-T} \f$ 8094 * 8095 * where \b T is an upper or lower triangular matrix. 8096 * 8097 * @param[in] order Row/column order. 8098 * @param[in] side The side of triangular matrix. 8099 * @param[in] uplo The triangle in matrix being referenced. 8100 * @param[in] transA How matrix \b A is to be transposed. 8101 * @param[in] diag Specify whether matrix is unit triangular. 8102 * @param[in] M Number of rows in matrix \b B. 8103 * @param[in] N Number of columns in matrix \b B. 8104 * @param[in] alpha The factor of matrix \b A. 8105 * @param[in] A Buffer object storing matrix \b A. 8106 * @param[in] offA Offset of the first element of the matrix \b A in the 8107 * buffer object. Counted in elements. 8108 * @param[in] lda Leading dimension of matrix \b A. For detailed 8109 * description, see clblasStrsm(). 8110 * @param[out] B Buffer object storing matrix \b B. 8111 * @param[in] offB Offset of the first element of the matrix \b B in the 8112 * buffer object. Counted in elements. 8113 * @param[in] ldb Leading dimension of matrix \b B. For detailed 8114 * description, see clblasStrsm(). 8115 * @param[in] numCommandQueues Number of OpenCL command queues in which the 8116 * task is to be performed. 8117 * @param[in] commandQueues OpenCL command queues. 8118 * @param[in] numEventsInWaitList Number of events in the event wait list. 8119 * @param[in] eventWaitList Event wait list. 8120 * @param[in] events Event objects per each command queue that identify 8121 * a particular kernel execution instance. 8122 * 8123 * @return 8124 * - \b clblasSuccess on success; 8125 * - \b clblasInvalidValue if either \b offA or \b offB exceeds the size 8126 * of the respective buffer object; 8127 * - the same error codes as clblasStrsm() otherwise. 8128 * 8129 * @ingroup TRSM 8130 */ 8131 clblasStatus 8132 clblasCtrsm( 8133 clblasOrder order, 8134 clblasSide side, 8135 clblasUplo uplo, 8136 clblasTranspose transA, 8137 clblasDiag diag, 8138 size_t M, 8139 size_t N, 8140 FloatComplex alpha, 8141 const cl_mem A, 8142 size_t offA, 8143 size_t lda, 8144 cl_mem B, 8145 size_t offB, 8146 size_t ldb, 8147 cl_uint numCommandQueues, 8148 cl_command_queue *commandQueues, 8149 cl_uint numEventsInWaitList, 8150 const cl_event *eventWaitList, 8151 cl_event *events); 8152 8153 /** 8154 * @brief Solving triangular systems of equations with multiple right-hand 8155 * sides and double complex elements. Extended version. 8156 * 8157 * Solving triangular systems of equations: 8158 * - \f$ B \leftarrow \alpha A^{-1} B \f$ 8159 * - \f$ B \leftarrow \alpha A^{-T} B \f$ 8160 * - \f$ B \leftarrow \alpha B A^{-1} \f$ 8161 * - \f$ B \leftarrow \alpha B A^{-T} \f$ 8162 * 8163 * where \b T is an upper or lower triangular matrix. 8164 * 8165 * @param[in] order Row/column order. 8166 * @param[in] side The side of triangular matrix. 8167 * @param[in] uplo The triangle in matrix being referenced. 8168 * @param[in] transA How matrix \b A is to be transposed. 8169 * @param[in] diag Specify whether matrix is unit triangular. 8170 * @param[in] M Number of rows in matrix \b B. 8171 * @param[in] N Number of columns in matrix \b B. 8172 * @param[in] alpha The factor of matrix \b A. 8173 * @param[in] A Buffer object storing matrix \b A. 8174 * @param[in] offA Offset of the first element of the matrix \b A in the 8175 * buffer object. Counted in elements. 8176 * @param[in] lda Leading dimension of matrix \b A. For detailed 8177 * description, see clblasStrsm(). 8178 * @param[out] B Buffer object storing matrix \b B. 8179 * @param[in] offB Offset of the first element of the matrix \b B in the 8180 * buffer object. Counted in elements. 8181 * @param[in] ldb Leading dimension of matrix \b B. For detailed 8182 * description, see clblasStrsm(). 8183 * @param[in] numCommandQueues Number of OpenCL command queues in which the 8184 * task is to be performed. 8185 * @param[in] commandQueues OpenCL command queues. 8186 * @param[in] numEventsInWaitList Number of events in the event wait list. 8187 * @param[in] eventWaitList Event wait list. 8188 * @param[in] events Event objects per each command queue that identify 8189 * a particular kernel execution instance. 8190 * 8191 * @return 8192 * - \b clblasSuccess on success; 8193 * - \b clblasInvalidDevice if a target device does not support floating 8194 * point arithmetic with double precision; 8195 * - \b clblasInvalidValue if either \b offA or \b offB exceeds the size 8196 * of the respective buffer object; 8197 * - the same error codes as the clblasStrsm() function otherwise 8198 * 8199 * @ingroup TRSM 8200 */ 8201 clblasStatus 8202 clblasZtrsm( 8203 clblasOrder order, 8204 clblasSide side, 8205 clblasUplo uplo, 8206 clblasTranspose transA, 8207 clblasDiag diag, 8208 size_t M, 8209 size_t N, 8210 DoubleComplex alpha, 8211 const cl_mem A, 8212 size_t offA, 8213 size_t lda, 8214 cl_mem B, 8215 size_t offB, 8216 size_t ldb, 8217 cl_uint numCommandQueues, 8218 cl_command_queue *commandQueues, 8219 cl_uint numEventsInWaitList, 8220 const cl_event *eventWaitList, 8221 cl_event *events); 8222 8223 /*@}*/ 8224 8225 /** 8226 * @defgroup SYRK SYRK - Symmetric rank-k update of a matrix 8227 * @ingroup BLAS3 8228 */ 8229 8230 /*@{*/ 8231 8232 /** 8233 * @brief Rank-k update of a symmetric matrix with float elements. 8234 * Extended version. 8235 * 8236 * Rank-k updates: 8237 * - \f$ C \leftarrow \alpha A A^T + \beta C \f$ 8238 * - \f$ C \leftarrow \alpha A^T A + \beta C \f$ 8239 * 8240 * where \b C is a symmetric matrix. 8241 * 8242 * @param[in] order Row/column order. 8243 * @param[in] uplo The triangle in matrix \b C being referenced. 8244 * @param[in] transA How matrix \b A is to be transposed. 8245 * @param[in] N Number of rows and columns in matrix \b C. 8246 * @param[in] K Number of columns of the matrix \b A if it is not 8247 * transposed, and number of rows otherwise. 8248 * @param[in] alpha The factor of matrix \b A. 8249 * @param[in] A Buffer object storing the matrix \b A. 8250 * @param[in] offA Offset of the first element of the matrix \b A in the 8251 * buffer object. Counted in elements. 8252 * @param[in] lda Leading dimension of matrix \b A. It cannot be 8253 * less than \b K if \b A is 8254 * in the row-major format, and less than \b N 8255 * otherwise. 8256 * @param[in] beta The factor of the matrix \b C. 8257 * @param[out] C Buffer object storing matrix \b C. 8258 * @param[in] offC Offset of the first element of the matrix \b C in the 8259 * buffer object. Counted in elements. 8260 * @param[in] ldc Leading dimension of matric \b C. It cannot be less 8261 * than \b N. 8262 * @param[in] numCommandQueues Number of OpenCL command queues in which the 8263 * task is to be performed. 8264 * @param[in] commandQueues OpenCL command queues. 8265 * @param[in] numEventsInWaitList Number of events in the event wait list. 8266 * @param[in] eventWaitList Event wait list. 8267 * @param[in] events Event objects per each command queue that identify 8268 * a particular kernel execution instance. 8269 * 8270 * @return 8271 * - \b clblasSuccess on success; 8272 * - \b clblasInvalidValue if either \b offA or \b offC exceeds the size 8273 * of the respective buffer object; 8274 * - the same error codes as the clblasSsyrk() function otherwise. 8275 * 8276 * @ingroup SYRK 8277 */ 8278 clblasStatus 8279 clblasSsyrk( 8280 clblasOrder order, 8281 clblasUplo uplo, 8282 clblasTranspose transA, 8283 size_t N, 8284 size_t K, 8285 cl_float alpha, 8286 const cl_mem A, 8287 size_t offA, 8288 size_t lda, 8289 cl_float beta, 8290 cl_mem C, 8291 size_t offC, 8292 size_t ldc, 8293 cl_uint numCommandQueues, 8294 cl_command_queue *commandQueues, 8295 cl_uint numEventsInWaitList, 8296 const cl_event *eventWaitList, 8297 cl_event *events); 8298 8299 /** 8300 * @example example_ssyrk.c 8301 * This is an example of how to use the @ref clblasSsyrkEx function. 8302 */ 8303 8304 /** 8305 * @brief Rank-k update of a symmetric matrix with double elements. 8306 * Extended version. 8307 * 8308 * Rank-k updates: 8309 * - \f$ C \leftarrow \alpha A A^T + \beta C \f$ 8310 * - \f$ C \leftarrow \alpha A^T A + \beta C \f$ 8311 * 8312 * where \b C is a symmetric matrix. 8313 * 8314 * @param[in] order Row/column order. 8315 * @param[in] uplo The triangle in matrix \b C being referenced. 8316 * @param[in] transA How matrix \b A is to be transposed. 8317 * @param[in] N Number of rows and columns in matrix \b C. 8318 * @param[in] K Number of columns of the matrix \b A if it is not 8319 * transposed, and number of rows otherwise. 8320 * @param[in] alpha The factor of matrix \b A. 8321 * @param[in] A Buffer object storing the matrix \b A. 8322 * @param[in] offA Offset of the first element of the matrix \b A in the 8323 * buffer object. Counted in elements. 8324 * @param[in] lda Leading dimension of matrix \b A. For detailed 8325 * description, see clblasSsyrk(). 8326 * @param[in] beta The factor of the matrix \b C. 8327 * @param[out] C Buffer object storing matrix \b C. 8328 * @param[in] offC Offset of the first element of the matrix \b C in the 8329 * buffer object. Counted in elements. 8330 * @param[in] ldc Leading dimension of matrix \b C. It cannot be less 8331 * than \b N. 8332 * @param[in] numCommandQueues Number of OpenCL command queues in which the 8333 * task is to be performed. 8334 * @param[in] commandQueues OpenCL command queues. 8335 * @param[in] numEventsInWaitList Number of events in the event wait list. 8336 * @param[in] eventWaitList Event wait list. 8337 * @param[in] events Event objects per each command queue that identify 8338 * a particular kernel execution instance. 8339 * 8340 * @return 8341 * - \b clblasSuccess on success; 8342 * - \b clblasInvalidDevice if a target device does not support floating 8343 * point arithmetic with double precision; 8344 * - \b clblasInvalidValue if either \b offA or \b offC exceeds the size 8345 * of the respective buffer object; 8346 * - the same error codes as the clblasSsyrk() function otherwise. 8347 * 8348 * @ingroup SYRK 8349 */ 8350 clblasStatus 8351 clblasDsyrk( 8352 clblasOrder order, 8353 clblasUplo uplo, 8354 clblasTranspose transA, 8355 size_t N, 8356 size_t K, 8357 cl_double alpha, 8358 const cl_mem A, 8359 size_t offA, 8360 size_t lda, 8361 cl_double beta, 8362 cl_mem C, 8363 size_t offC, 8364 size_t ldc, 8365 cl_uint numCommandQueues, 8366 cl_command_queue *commandQueues, 8367 cl_uint numEventsInWaitList, 8368 const cl_event *eventWaitList, 8369 cl_event *events); 8370 8371 /** 8372 * @brief Rank-k update of a symmetric matrix with complex float elements. 8373 * Extended version. 8374 * 8375 * Rank-k updates: 8376 * - \f$ C \leftarrow \alpha A A^T + \beta C \f$ 8377 * - \f$ C \leftarrow \alpha A^T A + \beta C \f$ 8378 * 8379 * where \b C is a symmetric matrix. 8380 * 8381 * @param[in] order Row/column order. 8382 * @param[in] uplo The triangle in matrix \b C being referenced. 8383 * @param[in] transA How matrix \b A is to be transposed. 8384 * @param[in] N Number of rows and columns in matrix \b C. 8385 * @param[in] K Number of columns of the matrix \b A if it is not 8386 * transposed, and number of rows otherwise. 8387 * @param[in] alpha The factor of matrix \b A. 8388 * @param[in] A Buffer object storing the matrix \b A. 8389 * @param[in] offA Offset of the first element of the matrix \b A in the 8390 * buffer object. Counted in elements. 8391 * @param[in] lda Leading dimension of matrix \b A. For detailed 8392 * description, see clblasSsyrk(). 8393 * @param[in] beta The factor of the matrix \b C. 8394 * @param[out] C Buffer object storing matrix \b C. 8395 * @param[in] offC Offset of the first element of the matrix \b C in the 8396 * buffer object. Counted in elements. 8397 * @param[in] ldc Leading dimension of matrix \b C. It cannot be less 8398 * than \b N. 8399 * @param[in] numCommandQueues Number of OpenCL command queues in which the 8400 * task is to be performed. 8401 * @param[in] commandQueues OpenCL command queues. 8402 * @param[in] numEventsInWaitList Number of events in the event wait list. 8403 * @param[in] eventWaitList Event wait list. 8404 * @param[in] events Event objects per each command queue that identify 8405 * a particular kernel execution instance. 8406 * 8407 * @return 8408 * - \b clblasSuccess on success; 8409 * - \b clblasInvalidValue if either \b offA or \b offC exceeds the size 8410 * of the respective buffer object; 8411 * - \b clblasInvalidValue if \b transA is set to \ref clblasConjTrans. 8412 * - the same error codes as the clblasSsyrk() function otherwise. 8413 * 8414 * @ingroup SYRK 8415 */ 8416 clblasStatus 8417 clblasCsyrk( 8418 clblasOrder order, 8419 clblasUplo uplo, 8420 clblasTranspose transA, 8421 size_t N, 8422 size_t K, 8423 FloatComplex alpha, 8424 const cl_mem A, 8425 size_t offA, 8426 size_t lda, 8427 FloatComplex beta, 8428 cl_mem C, 8429 size_t offC, 8430 size_t ldc, 8431 cl_uint numCommandQueues, 8432 cl_command_queue *commandQueues, 8433 cl_uint numEventsInWaitList, 8434 const cl_event *eventWaitList, 8435 cl_event *events); 8436 8437 /** 8438 * @brief Rank-k update of a symmetric matrix with complex double elements. 8439 * Extended version. 8440 * 8441 * Rank-k updates: 8442 * - \f$ C \leftarrow \alpha A A^T + \beta C \f$ 8443 * - \f$ C \leftarrow \alpha A^T A + \beta C \f$ 8444 * 8445 * where \b C is a symmetric matrix. 8446 * 8447 * @param[in] order Row/column order. 8448 * @param[in] uplo The triangle in matrix \b C being referenced. 8449 * @param[in] transA How matrix \b A is to be transposed. 8450 * @param[in] N Number of rows and columns in matrix \b C. 8451 * @param[in] K Number of columns of the matrix \b A if it is not 8452 * transposed, and number of rows otherwise. 8453 * @param[in] alpha The factor of matrix \b A. 8454 * @param[in] A Buffer object storing the matrix \b A. 8455 * @param[in] offA Offset of the first element of the matrix \b A in the 8456 * buffer object. Counted in elements. 8457 * @param[in] lda Leading dimension of matrix \b A. For detailed 8458 * description, see clblasSsyrk(). 8459 * @param[in] beta The factor of the matrix \b C. 8460 * @param[out] C Buffer object storing matrix \b C. 8461 * @param[in] offC Offset of the first element of the matrix \b C in the 8462 * buffer object. Counted in elements. 8463 * @param[in] ldc Leading dimension of matrix \b C. It cannot be less 8464 * than \b N. 8465 * @param[in] numCommandQueues Number of OpenCL command queues in which the 8466 * task is to be performed. 8467 * @param[in] commandQueues OpenCL command queues. 8468 * @param[in] numEventsInWaitList Number of events in the event wait list. 8469 * @param[in] eventWaitList Event wait list. 8470 * @param[in] events Event objects per each command queue that identify 8471 * a particular kernel execution instance. 8472 * 8473 * @return 8474 * - \b clblasSuccess on success; 8475 * - \b clblasInvalidDevice if a target device does not support floating 8476 * point arithmetic with double precision; 8477 * - \b clblasInvalidValue if either \b offA or \b offC exceeds the size 8478 * of the respective buffer object; 8479 * - \b clblasInvalidValue if \b transA is set to \ref clblasConjTrans. 8480 * - the same error codes as the clblasSsyrk() function otherwise. 8481 * 8482 * @ingroup SYRK 8483 */ 8484 clblasStatus 8485 clblasZsyrk( 8486 clblasOrder order, 8487 clblasUplo uplo, 8488 clblasTranspose transA, 8489 size_t N, 8490 size_t K, 8491 DoubleComplex alpha, 8492 const cl_mem A, 8493 size_t offA, 8494 size_t lda, 8495 DoubleComplex beta, 8496 cl_mem C, 8497 size_t offC, 8498 size_t ldc, 8499 cl_uint numCommandQueues, 8500 cl_command_queue *commandQueues, 8501 cl_uint numEventsInWaitList, 8502 const cl_event *eventWaitList, 8503 cl_event *events); 8504 8505 /*@}*/ 8506 8507 /** 8508 * @defgroup SYR2K SYR2K - Symmetric rank-2k update to a matrix 8509 * @ingroup BLAS3 8510 */ 8511 8512 /*@{*/ 8513 8514 /** 8515 * @brief Rank-2k update of a symmetric matrix with float elements. 8516 * Extended version. 8517 * 8518 * Rank-k updates: 8519 * - \f$ C \leftarrow \alpha A B^T + \alpha B A^T + \beta C \f$ 8520 * - \f$ C \leftarrow \alpha A^T B + \alpha B^T A \beta C \f$ 8521 * 8522 * where \b C is a symmetric matrix. 8523 * 8524 * @param[in] order Row/column order. 8525 * @param[in] uplo The triangle in matrix \b C being referenced. 8526 * @param[in] transAB How matrices \b A and \b B is to be transposed. 8527 * @param[in] N Number of rows and columns in matrix \b C. 8528 * @param[in] K Number of columns of the matrices \b A and \b B if they 8529 * are not transposed, and number of rows otherwise. 8530 * @param[in] alpha The factor of matrices \b A and \b B. 8531 * @param[in] A Buffer object storing matrix \b A. 8532 * @param[in] offA Offset of the first element of the matrix \b A in the 8533 * buffer object. Counted in elements. 8534 * @param[in] lda Leading dimension of matrix \b A. It cannot be less 8535 * than \b K if \b A is 8536 * in the row-major format, and less than \b N 8537 * otherwise. 8538 * @param[in] B Buffer object storing matrix \b B. 8539 * @param[in] offB Offset of the first element of the matrix \b B in the 8540 * buffer object. Counted in elements. 8541 * @param[in] ldb Leading dimension of matrix \b B. It cannot be less 8542 * less than \b K if \b B matches to the op(\b B) matrix 8543 * in the row-major format, and less than \b N 8544 * otherwise. 8545 * @param[in] beta The factor of matrix \b C. 8546 * @param[out] C Buffer object storing matrix \b C. 8547 * @param[in] offC Offset of the first element of the matrix \b C in the 8548 * buffer object. Counted in elements. 8549 * @param[in] ldc Leading dimension of matrix \b C. It cannot be less 8550 * than \b N. 8551 * @param[in] numCommandQueues Number of OpenCL command queues in which the 8552 * task is to be performed. 8553 * @param[in] commandQueues OpenCL command queues. 8554 * @param[in] numEventsInWaitList Number of events in the event wait list. 8555 * @param[in] eventWaitList Event wait list. 8556 * @param[in] events Event objects per each command queue that identify 8557 * a particular kernel execution instance. 8558 * 8559 * @return 8560 * - \b clblasSuccess on success; 8561 * - \b clblasInvalidValue if either \b offA, \b offB or \b offC exceeds 8562 * the size of the respective buffer object; 8563 * - the same error codes as the clblasSsyr2k() function otherwise. 8564 * 8565 * @ingroup SYR2K 8566 */ 8567 clblasStatus 8568 clblasSsyr2k( 8569 clblasOrder order, 8570 clblasUplo uplo, 8571 clblasTranspose transAB, 8572 size_t N, 8573 size_t K, 8574 cl_float alpha, 8575 const cl_mem A, 8576 size_t offA, 8577 size_t lda, 8578 const cl_mem B, 8579 size_t offB, 8580 size_t ldb, 8581 cl_float beta, 8582 cl_mem C, 8583 size_t offC, 8584 size_t ldc, 8585 cl_uint numCommandQueues, 8586 cl_command_queue *commandQueues, 8587 cl_uint numEventsInWaitList, 8588 const cl_event *eventWaitList, 8589 cl_event *events); 8590 8591 /** 8592 * @example example_ssyr2k.c 8593 * This is an example of how to use the @ref clblasSsyr2kEx function. 8594 */ 8595 8596 /** 8597 * @brief Rank-2k update of a symmetric matrix with double elements. 8598 * Extended version. 8599 * 8600 * Rank-k updates: 8601 * - \f$ C \leftarrow \alpha A B^T + \alpha B A^T + \beta C \f$ 8602 * - \f$ C \leftarrow \alpha A^T B + \alpha B^T A \beta C \f$ 8603 * 8604 * where \b C is a symmetric matrix. 8605 * 8606 * @param[in] order Row/column order. 8607 * @param[in] uplo The triangle in matrix \b C being referenced. 8608 * @param[in] transAB How matrices \b A and \b B is to be transposed. 8609 * @param[in] N Number of rows and columns in matrix \b C. 8610 * @param[in] K Number of columns of the matrices \b A and \b B if they 8611 * are not transposed, and number of rows otherwise. 8612 * @param[in] alpha The factor of matrices \b A and \b B. 8613 * @param[in] A Buffer object storing matrix \b A. 8614 * @param[in] offA Offset of the first element of the matrix \b A in the 8615 * buffer object. Counted in elements. 8616 * @param[in] lda Leading dimension of matrix \b A. For detailed 8617 * description, see clblasSsyr2k(). 8618 * @param[in] B Buffer object storing matrix \b B. 8619 * @param[in] offB Offset of the first element of the matrix \b B in the 8620 * buffer object. Counted in elements. 8621 * @param[in] ldb Leading dimension of matrix \b B. For detailed 8622 * description, see clblasSsyr2k(). 8623 * @param[in] beta The factor of matrix \b C. 8624 * @param[out] C Buffer object storing matrix \b C. 8625 * @param[in] offC Offset of the first element of the matrix \b C in the 8626 * buffer object. Counted in elements. 8627 * @param[in] ldc Leading dimension of matrix \b C. It cannot be less 8628 * than \b N. 8629 * @param[in] numCommandQueues Number of OpenCL command queues in which the 8630 * task is to be performed. 8631 * @param[in] commandQueues OpenCL command queues. 8632 * @param[in] numEventsInWaitList Number of events in the event wait list. 8633 * @param[in] eventWaitList Event wait list. 8634 * @param[in] events Event objects per each command queue that identify 8635 * a particular kernel execution instance. 8636 * 8637 * @return 8638 * - \b clblasSuccess on success; 8639 * - \b clblasInvalidDevice if a target device does not support floating 8640 * point arithmetic with double precision; 8641 * - \b clblasInvalidValue if either \b offA, \b offB or \b offC exceeds 8642 * the size of the respective buffer object; 8643 * - the same error codes as the clblasSsyr2k() function otherwise. 8644 * 8645 * @ingroup SYR2K 8646 */ 8647 clblasStatus 8648 clblasDsyr2k( 8649 clblasOrder order, 8650 clblasUplo uplo, 8651 clblasTranspose transAB, 8652 size_t N, 8653 size_t K, 8654 cl_double alpha, 8655 const cl_mem A, 8656 size_t offA, 8657 size_t lda, 8658 const cl_mem B, 8659 size_t offB, 8660 size_t ldb, 8661 cl_double beta, 8662 cl_mem C, 8663 size_t offC, 8664 size_t ldc, 8665 cl_uint numCommandQueues, 8666 cl_command_queue *commandQueues, 8667 cl_uint numEventsInWaitList, 8668 const cl_event *eventWaitList, 8669 cl_event *events); 8670 8671 /** 8672 * @brief Rank-2k update of a symmetric matrix with complex float elements. 8673 * Extended version. 8674 * 8675 * Rank-k updates: 8676 * - \f$ C \leftarrow \alpha A B^T + \alpha B A^T + \beta C \f$ 8677 * - \f$ C \leftarrow \alpha A^T B + \alpha B^T A \beta C \f$ 8678 * 8679 * where \b C is a symmetric matrix. 8680 * 8681 * @param[in] order Row/column order. 8682 * @param[in] uplo The triangle in matrix \b C being referenced. 8683 * @param[in] transAB How matrices \b A and \b B is to be transposed. 8684 * @param[in] N Number of rows and columns in matrix \b C. 8685 * @param[in] K Number of columns of the matrices \b A and \b B if they 8686 * are not transposed, and number of rows otherwise. 8687 * @param[in] alpha The factor of matrices \b A and \b B. 8688 * @param[in] A Buffer object storing matrix \b A. 8689 * @param[in] offA Offset of the first element of the matrix \b A in the 8690 * buffer object. Counted in elements. 8691 * @param[in] lda Leading dimension of matrix \b A. For detailed 8692 * description, see clblasSsyr2k(). 8693 * @param[in] B Buffer object storing matrix \b B. 8694 * @param[in] offB Offset of the first element of the matrix \b B in the 8695 * buffer object. Counted in elements. 8696 * @param[in] ldb Leading dimension of matrix \b B. For detailed 8697 * description, see clblasSsyr2k(). 8698 * @param[in] beta The factor of matrix \b C. 8699 * @param[out] C Buffer object storing matrix \b C. 8700 * @param[in] offC Offset of the first element of the matrix \b C in the 8701 * buffer object. Counted in elements. 8702 * @param[in] ldc Leading dimension of matrix \b C. It cannot be less 8703 * than \b N. 8704 * @param[in] numCommandQueues Number of OpenCL command queues in which the 8705 * task is to be performed. 8706 * @param[in] commandQueues OpenCL command queues. 8707 * @param[in] numEventsInWaitList Number of events in the event wait list. 8708 * @param[in] eventWaitList Event wait list. 8709 * @param[in] events Event objects per each command queue that identify 8710 * a particular kernel execution instance. 8711 * 8712 * @return 8713 * - \b clblasSuccess on success; 8714 * - \b clblasInvalidValue if either \b offA, \b offB or \b offC exceeds 8715 * the size of the respective buffer object; 8716 * - \b clblasInvalidValue if \b transAB is set to \ref clblasConjTrans. 8717 * - the same error codes as the clblasSsyr2k() function otherwise. 8718 * 8719 * @ingroup SYR2K 8720 */ 8721 clblasStatus 8722 clblasCsyr2k( 8723 clblasOrder order, 8724 clblasUplo uplo, 8725 clblasTranspose transAB, 8726 size_t N, 8727 size_t K, 8728 FloatComplex alpha, 8729 const cl_mem A, 8730 size_t offA, 8731 size_t lda, 8732 const cl_mem B, 8733 size_t offB, 8734 size_t ldb, 8735 FloatComplex beta, 8736 cl_mem C, 8737 size_t offC, 8738 size_t ldc, 8739 cl_uint numCommandQueues, 8740 cl_command_queue *commandQueues, 8741 cl_uint numEventsInWaitList, 8742 const cl_event *eventWaitList, 8743 cl_event *events); 8744 8745 /** 8746 * @brief Rank-2k update of a symmetric matrix with complex double elements. 8747 * Extended version. 8748 * 8749 * Rank-k updates: 8750 * - \f$ C \leftarrow \alpha A B^T + \alpha B A^T + \beta C \f$ 8751 * - \f$ C \leftarrow \alpha A^T B + \alpha B^T A \beta C \f$ 8752 * 8753 * where \b C is a symmetric matrix. 8754 * 8755 * @param[in] order Row/column order. 8756 * @param[in] uplo The triangle in matrix \b C being referenced. 8757 * @param[in] transAB How matrices \b A and \b B is to be transposed. 8758 * @param[in] N Number of rows and columns in matrix \b C. 8759 * @param[in] K Number of columns of the matrices \b A and \b B if they 8760 * are not transposed, and number of rows otherwise. 8761 * @param[in] alpha The factor of matrices \b A and \b B. 8762 * @param[in] A Buffer object storing matrix \b A. 8763 * @param[in] offA Offset of the first element of the matrix \b A in the 8764 * buffer object. Counted in elements. 8765 * @param[in] lda Leading dimension of matrix \b A. For detailed 8766 * description, see clblasSsyr2k(). 8767 * @param[in] B Buffer object storing matrix \b B. 8768 * @param[in] offB Offset of the first element of the matrix \b B in the 8769 * buffer object. Counted in elements. 8770 * @param[in] ldb Leading dimension of matrix \b B. For detailed 8771 * description, see clblasSsyr2k(). 8772 * @param[in] beta The factor of matrix \b C. 8773 * @param[out] C Buffer object storing matrix \b C. 8774 * @param[in] offC Offset of the first element of the matrix \b C in the 8775 * buffer object. Counted in elements. 8776 * @param[in] ldc Leading dimension of matrix \b C. It cannot be less 8777 * than \b N. 8778 * @param[in] numCommandQueues Number of OpenCL command queues in which the 8779 * task is to be performed. 8780 * @param[in] commandQueues OpenCL command queues. 8781 * @param[in] numEventsInWaitList Number of events in the event wait list. 8782 * @param[in] eventWaitList Event wait list. 8783 * @param[in] events Event objects per each command queue that identify 8784 * a particular kernel execution instance. 8785 * 8786 * @return 8787 * - \b clblasSuccess on success; 8788 * - \b clblasInvalidDevice if a target device does not support floating 8789 * point arithmetic with double precision; 8790 * - \b clblasInvalidValue if either \b offA, \b offB or \b offC exceeds 8791 * the size of the respective buffer object; 8792 * - \b clblasInvalidValue if \b transAB is set to \ref clblasConjTrans. 8793 * - the same error codes as the clblasSsyr2k() function otherwise. 8794 * 8795 * @ingroup SYR2K 8796 */ 8797 clblasStatus 8798 clblasZsyr2k( 8799 clblasOrder order, 8800 clblasUplo uplo, 8801 clblasTranspose transAB, 8802 size_t N, 8803 size_t K, 8804 DoubleComplex alpha, 8805 const cl_mem A, 8806 size_t offA, 8807 size_t lda, 8808 const cl_mem B, 8809 size_t offB, 8810 size_t ldb, 8811 DoubleComplex beta, 8812 cl_mem C, 8813 size_t offC, 8814 size_t ldc, 8815 cl_uint numCommandQueues, 8816 cl_command_queue *commandQueues, 8817 cl_uint numEventsInWaitList, 8818 const cl_event *eventWaitList, 8819 cl_event *events); 8820 /*@}*/ 8821 8822 8823 /** 8824 * @defgroup SYMM SYMM - Symmetric matrix-matrix multiply 8825 * @ingroup BLAS3 8826 */ 8827 /*@{*/ 8828 8829 /** 8830 * @brief Matrix-matrix product of symmetric rectangular matrices with float 8831 * elements. 8832 * 8833 * Matrix-matrix products: 8834 * - \f$ C \leftarrow \alpha A B + \beta C \f$ 8835 * - \f$ C \leftarrow \alpha B A + \beta C \f$ 8836 * 8837 * @param[in] order Row/column order. 8838 * @param[in] side The side of triangular matrix. 8839 * @param[in] uplo The triangle in matrix being referenced. 8840 * @param[in] M Number of rows in matrices \b B and \b C. 8841 * @param[in] N Number of columns in matrices \b B and \b C. 8842 * @param[in] alpha The factor of matrix \b A. 8843 * @param[in] A Buffer object storing matrix \b A. 8844 * @param[in] offa Offset of the first element of the matrix \b A in the 8845 * buffer object. Counted in elements. 8846 * @param[in] lda Leading dimension of matrix \b A. It cannot be less 8847 * than \b M when the \b side parameter is set to 8848 * \b clblasLeft,\n or less than \b N when the 8849 * parameter is set to \b clblasRight. 8850 * @param[in] B Buffer object storing matrix \b B. 8851 * @param[in] offb Offset of the first element of the matrix \b B in the 8852 * buffer object. Counted in elements. 8853 * @param[in] ldb Leading dimension of matrix \b B. It cannot be less 8854 * than \b N when the \b order parameter is set to 8855 * \b clblasRowMajor,\n or less than \b M 8856 * when it is set to \b clblasColumnMajor. 8857 * @param[in] beta The factor of matrix \b C. 8858 * @param[out] C Buffer object storing matrix \b C. 8859 * @param[in] offc Offset of the first element of the matrix \b C in the 8860 * buffer object. Counted in elements. 8861 * @param[in] ldc Leading dimension of matrix \b C. It cannot be less 8862 * than \b N when the \b order parameter is set to 8863 * \b clblasRowMajor,\n or less than \b M when 8864 * it is set to \b clblasColumnMajorOrder. 8865 * @param[in] numCommandQueues Number of OpenCL command queues in which the 8866 * task is to be performed. 8867 * @param[in] commandQueues OpenCL command queues. 8868 * @param[in] numEventsInWaitList Number of events in the event wait list. 8869 * @param[in] eventWaitList Event wait list. 8870 * @param[in] events Event objects per each command queue that identify 8871 * a particular kernel execution instance. 8872 * 8873 * @return 8874 * - \b clblasSuccess on success; 8875 * - \b clblasNotInitialized if clblasSetup() was not called; 8876 * - \b clblasInvalidValue if invalid parameters are passed: 8877 * - \b M or \b N is zero, or 8878 * - any of the leading dimensions is invalid; 8879 * - the matrix sizes lead to accessing outsize of any of the buffers; 8880 * - \b clblasInvalidMemObject if A, B, or C object is invalid, 8881 * or an image object rather than the buffer one; 8882 * - \b clblasOutOfResources if you use image-based function implementation 8883 * and no suitable scratch image available; 8884 * - \b clblasOutOfHostMemory if the library can't allocate memory for 8885 * internal structures; 8886 * - \b clblasInvalidCommandQueue if the passed command queue is invalid; 8887 * - \b clblasInvalidContext if a context a passed command queue belongs to 8888 * was released; 8889 * - \b clblasInvalidOperation if kernel compilation relating to a previous 8890 * call has not completed for any of the target devices; 8891 * - \b clblasCompilerNotAvailable if a compiler is not available; 8892 * - \b clblasBuildProgramFailure if there is a failure to build a program 8893 * executable. 8894 * 8895 * @ingroup SYMM 8896 */ 8897 clblasStatus 8898 clblasSsymm( 8899 clblasOrder order, 8900 clblasSide side, 8901 clblasUplo uplo, 8902 size_t M, 8903 size_t N, 8904 cl_float alpha, 8905 const cl_mem A, 8906 size_t offa, 8907 size_t lda, 8908 const cl_mem B, 8909 size_t offb, 8910 size_t ldb, 8911 cl_float beta, 8912 cl_mem C, 8913 size_t offc, 8914 size_t ldc, 8915 cl_uint numCommandQueues, 8916 cl_command_queue *commandQueues, 8917 cl_uint numEventsInWaitList, 8918 const cl_event *eventWaitList, 8919 cl_event *events); 8920 /** 8921 * @example example_ssymm.c 8922 * This is an example of how to use the @ref clblasSsymm function. 8923 */ 8924 8925 8926 /** 8927 * @brief Matrix-matrix product of symmetric rectangular matrices with double 8928 * elements. 8929 * 8930 * Matrix-matrix products: 8931 * - \f$ C \leftarrow \alpha A B + \beta C \f$ 8932 * - \f$ C \leftarrow \alpha B A + \beta C \f$ 8933 * 8934 * @param[in] order Row/column order. 8935 * @param[in] side The side of triangular matrix. 8936 * @param[in] uplo The triangle in matrix being referenced. 8937 * @param[in] M Number of rows in matrices \b B and \b C. 8938 * @param[in] N Number of columns in matrices \b B and \b C. 8939 * @param[in] alpha The factor of matrix \b A. 8940 * @param[in] A Buffer object storing matrix \b A. 8941 * @param[in] offa Offset of the first element of the matrix \b A in the 8942 * buffer object. Counted in elements. 8943 * @param[in] lda Leading dimension of matrix \b A. It cannot be less 8944 * than \b M when the \b side parameter is set to 8945 * \b clblasLeft,\n or less than \b N when the 8946 * parameter is set to \b clblasRight. 8947 * @param[in] B Buffer object storing matrix \b B. 8948 * @param[in] offb Offset of the first element of the matrix \b B in the 8949 * buffer object. Counted in elements. 8950 * @param[in] ldb Leading dimension of matrix \b B. It cannot be less 8951 * than \b N when the \b order parameter is set to 8952 * \b clblasRowMajor,\n or less than \b M 8953 * when it is set to \b clblasColumnMajor. 8954 * @param[in] beta The factor of matrix \b C. 8955 * @param[out] C Buffer object storing matrix \b C. 8956 * @param[in] offc Offset of the first element of the matrix \b C in the 8957 * buffer object. Counted in elements. 8958 * @param[in] ldc Leading dimension of matrix \b C. It cannot be less 8959 * than \b N when the \b order parameter is set to 8960 * \b clblasRowMajor,\n or less than \b M when 8961 * it is set to \b clblasColumnMajorOrder. 8962 * @param[in] numCommandQueues Number of OpenCL command queues in which the 8963 * task is to be performed. 8964 * @param[in] commandQueues OpenCL command queues. 8965 * @param[in] numEventsInWaitList Number of events in the event wait list. 8966 * @param[in] eventWaitList Event wait list. 8967 * @param[in] events Event objects per each command queue that identify 8968 * a particular kernel execution instance. 8969 * 8970 * @return 8971 * - \b clblasSuccess on success; 8972 * - \b clblasInvalidDevice if a target device does not support floating 8973 * point arithmetic with double precision; 8974 * - the same error codes as the clblasSsymm() function otherwise. 8975 * 8976 * @ingroup SYMM 8977 */ 8978 clblasStatus 8979 clblasDsymm( 8980 clblasOrder order, 8981 clblasSide side, 8982 clblasUplo uplo, 8983 size_t M, 8984 size_t N, 8985 cl_double alpha, 8986 const cl_mem A, 8987 size_t offa, 8988 size_t lda, 8989 const cl_mem B, 8990 size_t offb, 8991 size_t ldb, 8992 cl_double beta, 8993 cl_mem C, 8994 size_t offc, 8995 size_t ldc, 8996 cl_uint numCommandQueues, 8997 cl_command_queue *commandQueues, 8998 cl_uint numEventsInWaitList, 8999 const cl_event *eventWaitList, 9000 cl_event *events); 9001 9002 9003 /** 9004 * @brief Matrix-matrix product of symmetric rectangular matrices with 9005 * float-complex elements. 9006 * 9007 * Matrix-matrix products: 9008 * - \f$ C \leftarrow \alpha A B + \beta C \f$ 9009 * - \f$ C \leftarrow \alpha B A + \beta C \f$ 9010 * 9011 * @param[in] order Row/column order. 9012 * @param[in] side The side of triangular matrix. 9013 * @param[in] uplo The triangle in matrix being referenced. 9014 * @param[in] M Number of rows in matrices \b B and \b C. 9015 * @param[in] N Number of columns in matrices \b B and \b C. 9016 * @param[in] alpha The factor of matrix \b A. 9017 * @param[in] A Buffer object storing matrix \b A. 9018 * @param[in] offa Offset of the first element of the matrix \b A in the 9019 * buffer object. Counted in elements. 9020 * @param[in] lda Leading dimension of matrix \b A. It cannot be less 9021 * than \b M when the \b side parameter is set to 9022 * \b clblasLeft,\n or less than \b N when the 9023 * parameter is set to \b clblasRight. 9024 * @param[in] B Buffer object storing matrix \b B. 9025 * @param[in] offb Offset of the first element of the matrix \b B in the 9026 * buffer object. Counted in elements. 9027 * @param[in] ldb Leading dimension of matrix \b B. It cannot be less 9028 * than \b N when the \b order parameter is set to 9029 * \b clblasRowMajor,\n or less than \b M 9030 * when it is set to \b clblasColumnMajor. 9031 * @param[in] beta The factor of matrix \b C. 9032 * @param[out] C Buffer object storing matrix \b C. 9033 * @param[in] offc Offset of the first element of the matrix \b C in the 9034 * buffer object. Counted in elements. 9035 * @param[in] ldc Leading dimension of matrix \b C. It cannot be less 9036 * than \b N when the \b order parameter is set to 9037 * \b clblasRowMajor,\n or less than \b M when 9038 * it is set to \b clblasColumnMajorOrder. 9039 * @param[in] numCommandQueues Number of OpenCL command queues in which the 9040 * task is to be performed. 9041 * @param[in] commandQueues OpenCL command queues. 9042 * @param[in] numEventsInWaitList Number of events in the event wait list. 9043 * @param[in] eventWaitList Event wait list. 9044 * @param[in] events Event objects per each command queue that identify 9045 * a particular kernel execution instance. 9046 * 9047 * @return The same result as the clblasSsymm() function. 9048 * 9049 * @ingroup SYMM 9050 */ 9051 clblasStatus 9052 clblasCsymm( 9053 clblasOrder order, 9054 clblasSide side, 9055 clblasUplo uplo, 9056 size_t M, 9057 size_t N, 9058 cl_float2 alpha, 9059 const cl_mem A, 9060 size_t offa, 9061 size_t lda, 9062 const cl_mem B, 9063 size_t offb, 9064 size_t ldb, 9065 cl_float2 beta, 9066 cl_mem C, 9067 size_t offc, 9068 size_t ldc, 9069 cl_uint numCommandQueues, 9070 cl_command_queue *commandQueues, 9071 cl_uint numEventsInWaitList, 9072 const cl_event *eventWaitList, 9073 cl_event *events); 9074 9075 /** 9076 * @brief Matrix-matrix product of symmetric rectangular matrices with 9077 * double-complex elements. 9078 * 9079 * Matrix-matrix products: 9080 * - \f$ C \leftarrow \alpha A B + \beta C \f$ 9081 * - \f$ C \leftarrow \alpha B A + \beta C \f$ 9082 * 9083 * @param[in] order Row/column order. 9084 * @param[in] side The side of triangular matrix. 9085 * @param[in] uplo The triangle in matrix being referenced. 9086 * @param[in] M Number of rows in matrices \b B and \b C. 9087 * @param[in] N Number of columns in matrices \b B and \b C. 9088 * @param[in] alpha The factor of matrix \b A. 9089 * @param[in] A Buffer object storing matrix \b A. 9090 * @param[in] offa Offset of the first element of the matrix \b A in the 9091 * buffer object. Counted in elements. 9092 * @param[in] lda Leading dimension of matrix \b A. It cannot be less 9093 * than \b M when the \b side parameter is set to 9094 * \b clblasLeft,\n or less than \b N when the 9095 * parameter is set to \b clblasRight. 9096 * @param[in] B Buffer object storing matrix \b B. 9097 * @param[in] offb Offset of the first element of the matrix \b B in the 9098 * buffer object. Counted in elements. 9099 * @param[in] ldb Leading dimension of matrix \b B. It cannot be less 9100 * than \b N when the \b order parameter is set to 9101 * \b clblasRowMajor,\n or less than \b M 9102 * when it is set to \b clblasColumnMajor. 9103 * @param[in] beta The factor of matrix \b C. 9104 * @param[out] C Buffer object storing matrix \b C. 9105 * @param[in] offc Offset of the first element of the matrix \b C in the 9106 * buffer object. Counted in elements. 9107 * @param[in] ldc Leading dimension of matrix \b C. It cannot be less 9108 * than \b N when the \b order parameter is set to 9109 * \b clblasRowMajor,\n or less than \b M when 9110 * it is set to \b clblasColumnMajorOrder. 9111 * @param[in] numCommandQueues Number of OpenCL command queues in which the 9112 * task is to be performed. 9113 * @param[in] commandQueues OpenCL command queues. 9114 * @param[in] numEventsInWaitList Number of events in the event wait list. 9115 * @param[in] eventWaitList Event wait list. 9116 * @param[in] events Event objects per each command queue that identify 9117 * a particular kernel execution instance. 9118 * 9119 * @return The same result as the clblasDsymm() function. 9120 * 9121 * @ingroup SYMM 9122 */ 9123 clblasStatus 9124 clblasZsymm( 9125 clblasOrder order, 9126 clblasSide side, 9127 clblasUplo uplo, 9128 size_t M, 9129 size_t N, 9130 cl_double2 alpha, 9131 const cl_mem A, 9132 size_t offa, 9133 size_t lda, 9134 const cl_mem B, 9135 size_t offb, 9136 size_t ldb, 9137 cl_double2 beta, 9138 cl_mem C, 9139 size_t offc, 9140 size_t ldc, 9141 cl_uint numCommandQueues, 9142 cl_command_queue *commandQueues, 9143 cl_uint numEventsInWaitList, 9144 const cl_event *eventWaitList, 9145 cl_event *events); 9146 /*@}*/ 9147 9148 9149 /** 9150 * @defgroup HEMM HEMM - Hermitian matrix-matrix multiplication 9151 * @ingroup BLAS3 9152 */ 9153 /*@{*/ 9154 9155 /** 9156 * @brief Matrix-matrix product of hermitian rectangular matrices with 9157 * float-complex elements. 9158 * 9159 * Matrix-matrix products: 9160 * - \f$ C \leftarrow \alpha A B + \beta C \f$ 9161 * - \f$ C \leftarrow \alpha B A + \beta C \f$ 9162 * 9163 * @param[in] order Row/column order. 9164 * @param[in] side The side of triangular matrix. 9165 * @param[in] uplo The triangle in matrix being referenced. 9166 * @param[in] M Number of rows in matrices \b B and \b C. 9167 * @param[in] N Number of columns in matrices \b B and \b C. 9168 * @param[in] alpha The factor of matrix \b A. 9169 * @param[in] A Buffer object storing matrix \b A. 9170 * @param[in] offa Offset of the first element of the matrix \b A in the 9171 * buffer object. Counted in elements. 9172 * @param[in] lda Leading dimension of matrix \b A. It cannot be less 9173 * than \b M when the \b side parameter is set to 9174 * \b clblasLeft,\n or less than \b N when the 9175 * parameter is set to \b clblasRight. 9176 * @param[in] B Buffer object storing matrix \b B. 9177 * @param[in] offb Offset of the first element of the matrix \b B in the 9178 * buffer object. Counted in elements. 9179 * @param[in] ldb Leading dimension of matrix \b B. It cannot be less 9180 * than \b N when the \b order parameter is set to 9181 * \b clblasRowMajor,\n or less than \b M 9182 * when it is set to \b clblasColumnMajor. 9183 * @param[in] beta The factor of matrix \b C. 9184 * @param[out] C Buffer object storing matrix \b C. 9185 * @param[in] offc Offset of the first element of the matrix \b C in the 9186 * buffer object. Counted in elements. 9187 * @param[in] ldc Leading dimension of matrix \b C. It cannot be less 9188 * than \b N when the \b order parameter is set to 9189 * \b clblasRowMajor,\n or less than \b M when 9190 * it is set to \b clblasColumnMajorOrder. 9191 * @param[in] numCommandQueues Number of OpenCL command queues in which the 9192 * task is to be performed. 9193 * @param[in] commandQueues OpenCL command queues. 9194 * @param[in] numEventsInWaitList Number of events in the event wait list. 9195 * @param[in] eventWaitList Event wait list. 9196 * @param[in] events Event objects per each command queue that identify 9197 * a particular kernel execution instance. 9198 * 9199 * @return 9200 * - \b clblasSuccess on success; 9201 * - \b clblasNotInitialized if clblasSetup() was not called; 9202 * - \b clblasInvalidValue if invalid parameters are passed: 9203 * - \b M or \b N is zero, or 9204 * - any of the leading dimensions is invalid; 9205 * - the matrix sizes lead to accessing outsize of any of the buffers; 9206 * - \b clblasInvalidMemObject if A, B, or C object is invalid, 9207 * or an image object rather than the buffer one; 9208 * - \b clblasOutOfResources if you use image-based function implementation 9209 * and no suitable scratch image available; 9210 * - \b clblasOutOfHostMemory if the library can't allocate memory for 9211 * internal structures; 9212 * - \b clblasInvalidCommandQueue if the passed command queue is invalid; 9213 * - \b clblasInvalidContext if a context a passed command queue belongs to 9214 * was released; 9215 * - \b clblasInvalidOperation if kernel compilation relating to a previous 9216 * call has not completed for any of the target devices; 9217 * - \b clblasCompilerNotAvailable if a compiler is not available; 9218 * - \b clblasBuildProgramFailure if there is a failure to build a program 9219 * executable. 9220 * 9221 * @ingroup HEMM 9222 */ 9223 clblasStatus 9224 clblasChemm( 9225 clblasOrder order, 9226 clblasSide side, 9227 clblasUplo uplo, 9228 size_t M, 9229 size_t N, 9230 cl_float2 alpha, 9231 const cl_mem A, 9232 size_t offa, 9233 size_t lda, 9234 const cl_mem B, 9235 size_t offb, 9236 size_t ldb, 9237 cl_float2 beta, 9238 cl_mem C, 9239 size_t offc, 9240 size_t ldc, 9241 cl_uint numCommandQueues, 9242 cl_command_queue *commandQueues, 9243 cl_uint numEventsInWaitList, 9244 const cl_event *eventWaitList, 9245 cl_event *events); 9246 /** 9247 * @example example_chemm.cpp 9248 * This is an example of how to use the @ref clblasChemm function. 9249 */ 9250 9251 9252 /** 9253 * @brief Matrix-matrix product of hermitian rectangular matrices with 9254 * double-complex elements. 9255 * 9256 * Matrix-matrix products: 9257 * - \f$ C \leftarrow \alpha A B + \beta C \f$ 9258 * - \f$ C \leftarrow \alpha B A + \beta C \f$ 9259 * 9260 * @param[in] order Row/column order. 9261 * @param[in] side The side of triangular matrix. 9262 * @param[in] uplo The triangle in matrix being referenced. 9263 * @param[in] M Number of rows in matrices \b B and \b C. 9264 * @param[in] N Number of columns in matrices \b B and \b C. 9265 * @param[in] alpha The factor of matrix \b A. 9266 * @param[in] A Buffer object storing matrix \b A. 9267 * @param[in] offa Offset of the first element of the matrix \b A in the 9268 * buffer object. Counted in elements. 9269 * @param[in] lda Leading dimension of matrix \b A. It cannot be less 9270 * than \b M when the \b side parameter is set to 9271 * \b clblasLeft,\n or less than \b N when the 9272 * parameter is set to \b clblasRight. 9273 * @param[in] B Buffer object storing matrix \b B. 9274 * @param[in] offb Offset of the first element of the matrix \b B in the 9275 * buffer object. Counted in elements. 9276 * @param[in] ldb Leading dimension of matrix \b B. It cannot be less 9277 * than \b N when the \b order parameter is set to 9278 * \b clblasRowMajor,\n or less than \b M 9279 * when it is set to \b clblasColumnMajor. 9280 * @param[in] beta The factor of matrix \b C. 9281 * @param[out] C Buffer object storing matrix \b C. 9282 * @param[in] offc Offset of the first element of the matrix \b C in the 9283 * buffer object. Counted in elements. 9284 * @param[in] ldc Leading dimension of matrix \b C. It cannot be less 9285 * than \b N when the \b order parameter is set to 9286 * \b clblasRowMajor,\n or less than \b M when 9287 * it is set to \b clblasColumnMajorOrder. 9288 * @param[in] numCommandQueues Number of OpenCL command queues in which the 9289 * task is to be performed. 9290 * @param[in] commandQueues OpenCL command queues. 9291 * @param[in] numEventsInWaitList Number of events in the event wait list. 9292 * @param[in] eventWaitList Event wait list. 9293 * @param[in] events Event objects per each command queue that identify 9294 * a particular kernel execution instance. 9295 * 9296 * @return 9297 * - \b clblasSuccess on success; 9298 * - \b clblasInvalidDevice if a target device does not support floating 9299 * point arithmetic with double precision; 9300 * - the same error codes as the clblasChemm() function otherwise. 9301 * 9302 * @ingroup HEMM 9303 */ 9304 clblasStatus 9305 clblasZhemm( 9306 clblasOrder order, 9307 clblasSide side, 9308 clblasUplo uplo, 9309 size_t M, 9310 size_t N, 9311 cl_double2 alpha, 9312 const cl_mem A, 9313 size_t offa, 9314 size_t lda, 9315 const cl_mem B, 9316 size_t offb, 9317 size_t ldb, 9318 cl_double2 beta, 9319 cl_mem C, 9320 size_t offc, 9321 size_t ldc, 9322 cl_uint numCommandQueues, 9323 cl_command_queue *commandQueues, 9324 cl_uint numEventsInWaitList, 9325 const cl_event *eventWaitList, 9326 cl_event *events); 9327 /*@}*/ 9328 9329 9330 /** 9331 * @defgroup HERK HERK - Hermitian rank-k update to a matrix 9332 * @ingroup BLAS3 9333 */ 9334 /*@{*/ 9335 9336 /** 9337 * @brief Rank-k update of a hermitian matrix with float-complex elements. 9338 * 9339 * Rank-k updates: 9340 * - \f$ C \leftarrow \alpha A A^H + \beta C \f$ 9341 * - \f$ C \leftarrow \alpha A^H A + \beta C \f$ 9342 * 9343 * where \b C is a hermitian matrix. 9344 * 9345 * @param[in] order Row/column order. 9346 * @param[in] uplo The triangle in matrix \b C being referenced. 9347 * @param[in] transA How matrix \b A is to be transposed. 9348 * @param[in] N Number of rows and columns in matrix \b C. 9349 * @param[in] K Number of columns of the matrix \b A if it is not 9350 * transposed, and number of rows otherwise. 9351 * @param[in] alpha The factor of matrix \b A. 9352 * @param[in] A Buffer object storing the matrix \b A. 9353 * @param[in] offa Offset in number of elements for the first element in matrix \b A. 9354 * @param[in] lda Leading dimension of matrix \b A. It cannot be 9355 * less than \b K if \b A is 9356 * in the row-major format, and less than \b N 9357 * otherwise. 9358 * @param[in] beta The factor of the matrix \b C. 9359 * @param[out] C Buffer object storing matrix \b C. 9360 * @param[in] offc Offset in number of elements for the first element in matrix \b C. 9361 * @param[in] ldc Leading dimension of matric \b C. It cannot be less 9362 * than \b N. 9363 * @param[in] numCommandQueues Number of OpenCL command queues in which the 9364 * task is to be performed. 9365 * @param[in] commandQueues OpenCL command queues. 9366 * @param[in] numEventsInWaitList Number of events in the event wait list. 9367 * @param[in] eventWaitList Event wait list. 9368 * @param[in] events Event objects per each command queue that identify 9369 * a particular kernel execution instance. 9370 * 9371 * @return 9372 * - \b clblasSuccess on success; 9373 * - \b clblasNotInitialized if clblasSetup() was not called; 9374 * - \b clblasInvalidValue if invalid parameters are passed: 9375 * - either \b N or \b K is zero, or 9376 * - any of the leading dimensions is invalid; 9377 * - the matrix sizes lead to accessing outsize of any of the buffers; 9378 * - \b clblasInvalidMemObject if either \b A or \b C object is 9379 * invalid, or an image object rather than the buffer one; 9380 * - \b clblasOutOfHostMemory if the library can't allocate memory for 9381 * internal structures; 9382 * - \b clblasInvalidCommandQueue if the passed command queue is invalid; 9383 * - \b clblasInvalidContext if a context a passed command queue belongs to 9384 * was released. 9385 * 9386 * @ingroup HERK 9387 */ 9388 clblasStatus 9389 clblasCherk( 9390 clblasOrder order, 9391 clblasUplo uplo, 9392 clblasTranspose transA, 9393 size_t N, 9394 size_t K, 9395 float alpha, 9396 const cl_mem A, 9397 size_t offa, 9398 size_t lda, 9399 float beta, 9400 cl_mem C, 9401 size_t offc, 9402 size_t ldc, 9403 cl_uint numCommandQueues, 9404 cl_command_queue *commandQueues, 9405 cl_uint numEventsInWaitList, 9406 const cl_event *eventWaitList, 9407 cl_event *events); 9408 /** 9409 * @example example_cherk.cpp 9410 * This is an example of how to use the @ref clblasCherk function. 9411 */ 9412 9413 9414 /** 9415 * @brief Rank-k update of a hermitian matrix with double-complex elements. 9416 * 9417 * Rank-k updates: 9418 * - \f$ C \leftarrow \alpha A A^H + \beta C \f$ 9419 * - \f$ C \leftarrow \alpha A^H A + \beta C \f$ 9420 * 9421 * where \b C is a hermitian matrix. 9422 * 9423 * @param[in] order Row/column order. 9424 * @param[in] uplo The triangle in matrix \b C being referenced. 9425 * @param[in] transA How matrix \b A is to be transposed. 9426 * @param[in] N Number of rows and columns in matrix \b C. 9427 * @param[in] K Number of columns of the matrix \b A if it is not 9428 * transposed, and number of rows otherwise. 9429 * @param[in] alpha The factor of matrix \b A. 9430 * @param[in] A Buffer object storing the matrix \b A. 9431 * @param[in] offa Offset in number of elements for the first element in matrix \b A. 9432 * @param[in] lda Leading dimension of matrix \b A. It cannot be 9433 * less than \b K if \b A is 9434 * in the row-major format, and less than \b N 9435 * otherwise. 9436 * @param[in] beta The factor of the matrix \b C. 9437 * @param[out] C Buffer object storing matrix \b C. 9438 * @param[in] offc Offset in number of elements for the first element in matrix \b C. 9439 * @param[in] ldc Leading dimension of matric \b C. It cannot be less 9440 * than \b N. 9441 * @param[in] numCommandQueues Number of OpenCL command queues in which the 9442 * task is to be performed. 9443 * @param[in] commandQueues OpenCL command queues. 9444 * @param[in] numEventsInWaitList Number of events in the event wait list. 9445 * @param[in] eventWaitList Event wait list. 9446 * @param[in] events Event objects per each command queue that identify 9447 * a particular kernel execution instance. 9448 * 9449 * @return 9450 * - \b clblasSuccess on success; 9451 * - \b clblasInvalidDevice if a target device does not support floating 9452 * point arithmetic with double precision; 9453 * - the same error codes as the clblasCherk() function otherwise. 9454 * 9455 * @ingroup HERK 9456 */ 9457 clblasStatus 9458 clblasZherk( 9459 clblasOrder order, 9460 clblasUplo uplo, 9461 clblasTranspose transA, 9462 size_t N, 9463 size_t K, 9464 double alpha, 9465 const cl_mem A, 9466 size_t offa, 9467 size_t lda, 9468 double beta, 9469 cl_mem C, 9470 size_t offc, 9471 size_t ldc, 9472 cl_uint numCommandQueues, 9473 cl_command_queue *commandQueues, 9474 cl_uint numEventsInWaitList, 9475 const cl_event *eventWaitList, 9476 cl_event *events); 9477 /*@}*/ 9478 9479 9480 /** 9481 * @defgroup HER2K HER2K - Hermitian rank-2k update to a matrix 9482 * @ingroup BLAS3 9483 */ 9484 /*@{*/ 9485 9486 /** 9487 * @brief Rank-2k update of a hermitian matrix with float-complex elements. 9488 * 9489 * Rank-k updates: 9490 * - \f$ C \leftarrow \alpha A B^H + conj( \alpha ) B A^H + \beta C \f$ 9491 * - \f$ C \leftarrow \alpha A^H B + conj( \alpha ) B^H A + \beta C \f$ 9492 * 9493 * where \b C is a hermitian matrix. 9494 * 9495 * @param[in] order Row/column order. 9496 * @param[in] uplo The triangle in matrix \b C being referenced. 9497 * @param[in] trans How matrix \b A is to be transposed. 9498 * @param[in] N Number of rows and columns in matrix \b C. 9499 * @param[in] K Number of columns of the matrix \b A if it is not 9500 * transposed, and number of rows otherwise. 9501 * @param[in] alpha The factor of matrix \b A. 9502 * @param[in] A Buffer object storing the matrix \b A. 9503 * @param[in] offa Offset in number of elements for the first element in matrix \b A. 9504 * @param[in] lda Leading dimension of matrix \b A. It cannot be 9505 * less than \b K if \b A is 9506 * in the row-major format, and less than \b N 9507 * otherwise. Vice-versa for transpose case. 9508 * @param[in] B Buffer object storing the matrix \b B. 9509 * @param[in] offb Offset in number of elements for the first element in matrix \b B. 9510 * @param[in] ldb Leading dimension of matrix \b B. It cannot be 9511 * less than \b K if \b B is 9512 * in the row-major format, and less than \b N 9513 * otherwise. Vice-versa for transpose case 9514 * @param[in] beta The factor of the matrix \b C. 9515 * @param[out] C Buffer object storing matrix \b C. 9516 * @param[in] offc Offset in number of elements for the first element in matrix \b C. 9517 * @param[in] ldc Leading dimension of matric \b C. It cannot be less 9518 * than \b N. 9519 * @param[in] numCommandQueues Number of OpenCL command queues in which the 9520 * task is to be performed. 9521 * @param[in] commandQueues OpenCL command queues. 9522 * @param[in] numEventsInWaitList Number of events in the event wait list. 9523 * @param[in] eventWaitList Event wait list. 9524 * @param[in] events Event objects per each command queue that identify 9525 * a particular kernel execution instance. 9526 * 9527 * @return 9528 * - \b clblasSuccess on success; 9529 * - \b clblasNotInitialized if clblasSetup() was not called; 9530 * - \b clblasInvalidValue if invalid parameters are passed: 9531 * - either \b N or \b K is zero, or 9532 * - any of the leading dimensions is invalid; 9533 * - the matrix sizes lead to accessing outsize of any of the buffers; 9534 * - \b clblasInvalidMemObject if either \b A , \b B or \b C object is 9535 * invalid, or an image object rather than the buffer one; 9536 * - \b clblasOutOfHostMemory if the library can't allocate memory for 9537 * internal structures; 9538 * - \b clblasInvalidCommandQueue if the passed command queue is invalid; 9539 * - \b clblasInvalidContext if a context a passed command queue belongs to 9540 * was released. 9541 * 9542 * @ingroup HER2K 9543 */ 9544 clblasStatus 9545 clblasCher2k( 9546 clblasOrder order, 9547 clblasUplo uplo, 9548 clblasTranspose trans, 9549 size_t N, 9550 size_t K, 9551 FloatComplex alpha, 9552 const cl_mem A, 9553 size_t offa, 9554 size_t lda, 9555 const cl_mem B, 9556 size_t offb, 9557 size_t ldb, 9558 cl_float beta, 9559 cl_mem C, 9560 size_t offc, 9561 size_t ldc, 9562 cl_uint numCommandQueues, 9563 cl_command_queue *commandQueues, 9564 cl_uint numEventsInWaitList, 9565 const cl_event *eventWaitList, 9566 cl_event *events); 9567 /** 9568 * @example example_cher2k.c 9569 * This is an example of how to use the @ref clblasCher2k function. 9570 */ 9571 9572 9573 /** 9574 * @brief Rank-2k update of a hermitian matrix with double-complex elements. 9575 * 9576 * Rank-k updates: 9577 * - \f$ C \leftarrow \alpha A B^H + conj( \alpha ) B A^H + \beta C \f$ 9578 * - \f$ C \leftarrow \alpha A^H B + conj( \alpha ) B^H A + \beta C \f$ 9579 * 9580 * where \b C is a hermitian matrix. 9581 * 9582 * @param[in] order Row/column order. 9583 * @param[in] uplo The triangle in matrix \b C being referenced. 9584 * @param[in] trans How matrix \b A is to be transposed. 9585 * @param[in] N Number of rows and columns in matrix \b C. 9586 * @param[in] K Number of columns of the matrix \b A if it is not 9587 * transposed, and number of rows otherwise. 9588 * @param[in] alpha The factor of matrix \b A. 9589 * @param[in] A Buffer object storing the matrix \b A. 9590 * @param[in] offa Offset in number of elements for the first element in matrix \b A. 9591 * @param[in] lda Leading dimension of matrix \b A. It cannot be 9592 * less than \b K if \b A is 9593 * in the row-major format, and less than \b N 9594 * otherwise. Vice-versa for transpose case. 9595 * @param[in] B Buffer object storing the matrix \b B. 9596 * @param[in] offb Offset in number of elements for the first element in matrix \b B. 9597 * @param[in] ldb Leading dimension of matrix \b B. It cannot be 9598 * less than \b K if B is 9599 * in the row-major format, and less than \b N 9600 * otherwise. Vice-versa for transpose case. 9601 * @param[in] beta The factor of the matrix \b C. 9602 * @param[out] C Buffer object storing matrix \b C. 9603 * @param[in] offc Offset in number of elements for the first element in matrix \b C. 9604 * @param[in] ldc Leading dimension of matric \b C. It cannot be less 9605 * than \b N. 9606 * @param[in] numCommandQueues Number of OpenCL command queues in which the 9607 * task is to be performed. 9608 * @param[in] commandQueues OpenCL command queues. 9609 * @param[in] numEventsInWaitList Number of events in the event wait list. 9610 * @param[in] eventWaitList Event wait list. 9611 * @param[in] events Event objects per each command queue that identify 9612 * a particular kernel execution instance. 9613 * 9614 * @return 9615 * - \b clblasSuccess on success; 9616 * - \b clblasInvalidDevice if a target device does not support floating 9617 * point arithmetic with double precision; 9618 * - the same error codes as the clblasCher2k() function otherwise. 9619 * 9620 * @ingroup HER2K 9621 */ 9622 clblasStatus 9623 clblasZher2k( 9624 clblasOrder order, 9625 clblasUplo uplo, 9626 clblasTranspose trans, 9627 size_t N, 9628 size_t K, 9629 DoubleComplex alpha, 9630 const cl_mem A, 9631 size_t offa, 9632 size_t lda, 9633 const cl_mem B, 9634 size_t offb, 9635 size_t ldb, 9636 cl_double beta, 9637 cl_mem C, 9638 size_t offc, 9639 size_t ldc, 9640 cl_uint numCommandQueues, 9641 cl_command_queue *commandQueues, 9642 cl_uint numEventsInWaitList, 9643 const cl_event *eventWaitList, 9644 cl_event *events); 9645 /*@}*/ 9646 9647 /** 9648 * @brief Helper function to compute leading dimension and size of a matrix 9649 * 9650 * @param[in] order matrix ordering 9651 * @param[in] rows number of rows 9652 * @param[in] columns number of column 9653 * @param[in] elemsize element size 9654 * @param[in] padding additional padding on the leading dimension 9655 * @param[out] ld if non-NULL *ld is filled with the leading dimension 9656 * in elements 9657 * @param[out] fullsize if non-NULL *fullsize is filled with the byte size 9658 * 9659 * @return 9660 * - \b clblasSuccess for success 9661 * - \b clblasInvalidValue if: 9662 * - \b elementsize is 0 9663 * - \b row and \b colums are both equal to 0 9664 */ 9665 clblasStatus clblasMatrixSizeInfo( 9666 clblasOrder order, 9667 size_t rows, 9668 size_t columns, 9669 size_t elemsize, 9670 size_t padding, 9671 size_t * ld, 9672 size_t * fullsize); 9673 9674 /** 9675 * @brief Allocates matrix on device and computes ld and size 9676 * 9677 * @param[in] context OpenCL context 9678 * @param[in] order Row/column order. 9679 * @param[in] rows number of rows 9680 * @param[in] columns number of columns 9681 * @param[in] elemsize element size 9682 * @param[in] padding additional padding on the leading dimension 9683 * @param[out] ld if non-NULL *ld is filled with the leading dimension 9684 * in elements 9685 * @param[out] fullsize if non-NULL *fullsize is filled with the byte size 9686 * @param[in] err Error code (see \b clCreateBuffer() ) 9687 * 9688 * @return 9689 * - OpenCL memory object of the allocated matrix 9690 */ 9691 cl_mem clblasCreateMatrix( 9692 cl_context context, 9693 clblasOrder order, 9694 size_t rows, 9695 size_t columns, 9696 size_t elemsize, 9697 size_t padding, 9698 size_t * ld, 9699 size_t * fullsize, 9700 cl_int * err); 9701 9702 9703 /** 9704 * @brief Allocates matrix on device with specified size and ld and computes its size 9705 * 9706 * @param[in] context OpenCL context 9707 * @param[in] order Row/column order. 9708 * @param[in] rows number of rows 9709 * @param[in] columns number of columns 9710 * @param[in] elemsize element size 9711 * @param[in] padding additional padding on the leading dimension 9712 * @param[out] ld the length of the leading dimensions. It cannot 9713 * be less than \b columns when the \b order parameter is set to 9714 * \b clblasRowMajor,\n or less than \b rows when the 9715 * parameter is set to \b clblasColumnMajor. 9716 * @param[out] fullsize if non-NULL *fullsize is filled with the byte size 9717 * @param[in] err Error code (see \b clCreateBuffer() ) 9718 * 9719 * @return 9720 * - OpenCL memory object of the allocated matrix 9721 */ 9722 cl_mem clblasCreateMatrixWithLd( cl_context context, 9723 clblasOrder order, 9724 size_t rows, 9725 size_t columns, 9726 size_t elemsize, 9727 size_t ld, 9728 size_t * fullsize, 9729 cl_int * err) ; 9730 9731 9732 /** 9733 * @brief Allocates matrix on device and initialize from existing similar matrix 9734 * on host. See \b clblasCreateMatrixBuffer(). 9735 * 9736 * @param[in] ld leading dimension in elements 9737 * @param[in] host base address of host matrix data 9738 * @param[in] off_host host matrix offset in elements 9739 * @param[in] ld_host leading dimension of host matrix in elements 9740 * @param[in] command_queue specifies the OpenCL queue 9741 * @param[in] numEventsInWaitList specifies the number of OpenCL events 9742 * to wait for 9743 * @param[in] eventWaitList specifies the list of OpenCL events to 9744 * wait for 9745 * 9746 * @return 9747 * - OpenCL memory object of the allocated matrix 9748 */ 9749 cl_mem clblasCreateMatrixFromHost( 9750 cl_context context, 9751 clblasOrder order, 9752 size_t rows, 9753 size_t columns, 9754 size_t elemsize, 9755 size_t ld, 9756 void * host, 9757 size_t off_host, 9758 size_t ld_host, 9759 cl_command_queue command_queue, 9760 cl_uint numEventsInWaitList, 9761 const cl_event *eventWaitList, 9762 cl_int * err); 9763 9764 /** 9765 * @brief Copies synchronously a sub-matrix from host (A) to device (B). 9766 * 9767 * @param[in] order matrix ordering 9768 * @param[in] element_size element size 9769 * @param[in] A specifies the source matrix on the host 9770 * @param[in] offA specifies the offset of matrix A in 9771 * elements 9772 * @param[in] ldA specifies the leading dimension of 9773 * matrix A in elements 9774 * @param[in] nrA specifies the number of rows of A 9775 * in elements 9776 * @param[in] ncA specifies the number of columns of A 9777 * in elements 9778 * @param[in] xA specifies the top-left x position to 9779 * copy from A 9780 * @param[in] yA specifies the top-left y position to 9781 * copy from A 9782 * @param[in] B specifies the destination matrix on the 9783 * device 9784 * @param[in] offB specifies the offset of matrix B in 9785 * elements 9786 * @param[in] ldB specifies the leading dimension of 9787 * matrix B in bytes 9788 * @param[in] nrB specifies the number of rows of B 9789 * in elements 9790 * @param[in] ncB specifies the number of columns of B 9791 * in elements 9792 * @param[in] xB specifies the top-left x position to 9793 * copy from B 9794 * @param[in] yB specifies the top-left y position to 9795 * copy from B 9796 * @param[in] nx specifies the number of elements to 9797 * copy according to the x dimension (rows) 9798 * @param[in] ny specifies the number of elements to 9799 * copy according to the y dimension 9800 * (columns) 9801 * @param[in] command_queue specifies the OpenCL queue 9802 * @param[in] numEventsInWaitList specifies the number of OpenCL events 9803 * to wait for 9804 * @param[in] eventWaitList specifies the list of OpenCL events to 9805 * wait for 9806 * 9807 * @return 9808 * - \b clblasSuccess for success 9809 * - \b clblasInvalidValue if: 9810 * - \b xA + \b offA + \b nx is superior to number of columns of A 9811 * - \b xB + \b offB + \b nx is superior to number of columns of B 9812 * - \b yA + \b ny is superior to number of rows of A 9813 * - \b yB + \b ny is superior to number of rows of B 9814 */ 9815 clblasStatus clblasWriteSubMatrix( 9816 clblasOrder order, 9817 size_t element_size, 9818 const void *A, size_t offA, size_t ldA, 9819 size_t nrA, size_t ncA, 9820 size_t xA, size_t yA, 9821 cl_mem B, size_t offB, size_t ldB, 9822 size_t nrB, size_t ncB, 9823 size_t xB, size_t yB, 9824 size_t nx, size_t ny, 9825 cl_command_queue command_queue, 9826 cl_uint numEventsInWaitList, 9827 const cl_event *eventWaitList); 9828 9829 /** 9830 * @brief Copies asynchronously a sub-matrix from host (A) to device (B). 9831 * See \b clblasWriteSubMatrix(). 9832 * 9833 * @param[out] event Event objects per each command queue that identify a 9834 * particular kernel execution instance. 9835 */ 9836 clblasStatus clblasWriteSubMatrixAsync( 9837 clblasOrder order, 9838 size_t element_size, 9839 const void *A, size_t offA, size_t ldA, 9840 size_t nrA, size_t ncA, 9841 size_t xA, size_t yA, 9842 cl_mem B, size_t offB, size_t ldB, 9843 size_t nrB, size_t ncB, 9844 size_t xB, size_t yB, 9845 size_t nx, size_t ny, 9846 cl_command_queue command_queue, 9847 cl_uint numEventsInWaitList, 9848 const cl_event *eventWaitList, 9849 cl_event *event); 9850 9851 /** 9852 * @brief Copies a sub-matrix from device (A) to host (B). 9853 * See \b clblasWriteSubMatrix(). 9854 * 9855 * @param[in] A specifies the source matrix on the device 9856 * @param[in] B specifies the destination matrix on the host 9857 * 9858 * @return 9859 * - see \b clblasWriteSubMatrix() 9860 */ 9861 clblasStatus clblasReadSubMatrix( 9862 clblasOrder order, 9863 size_t element_size, 9864 const cl_mem A, size_t offA, size_t ldA, 9865 size_t nrA, size_t ncA, 9866 size_t xA, size_t yA, 9867 void *B, size_t offB, size_t ldB, 9868 size_t nrB, size_t ncB, 9869 size_t xB, size_t yB, 9870 size_t nx, size_t ny, 9871 cl_command_queue command_queue, 9872 cl_uint numEventsInWaitList, 9873 const cl_event *eventWaitList); 9874 9875 /** 9876 * @brief Copies asynchronously a sub-matrix from device (A) to host (B). 9877 * See \b clblasReadSubMatrix() and \b clblasWriteSubMatrixAsync(). 9878 */ 9879 clblasStatus clblasReadSubMatrixAsync( 9880 clblasOrder order, 9881 size_t element_size, 9882 const cl_mem A, size_t offA, size_t ldA, 9883 size_t nrA, size_t ncA, 9884 size_t xA, size_t yA, 9885 void *B, size_t offB, size_t ldB, 9886 size_t nrB, size_t ncB, 9887 size_t xB, size_t yB, 9888 size_t nx, size_t ny, 9889 cl_command_queue command_queue, 9890 cl_uint numEventsInWaitList, 9891 const cl_event *eventWaitList, 9892 cl_event *event); 9893 9894 /** 9895 * @brief Copies a sub-matrix from device (A) to device (B). 9896 * See \b clblasWriteSubMatrix(). 9897 * 9898 * @param[in] A specifies the source matrix on the device 9899 * @param[in] B specifies the destination matrix on the device 9900 * 9901 * @return 9902 * - see \b clblasWriteSubMatrix() 9903 */ 9904 clblasStatus clblasCopySubMatrix( 9905 clblasOrder order, 9906 size_t element_size, 9907 const cl_mem A, size_t offA, size_t ldA, 9908 size_t nrA, size_t ncA, 9909 size_t xA, size_t yA, 9910 cl_mem B, size_t offB, size_t ldB, 9911 size_t nrB, size_t ncB, 9912 size_t xB, size_t yB, 9913 size_t nx, size_t ny, 9914 cl_command_queue command_queue, 9915 cl_uint numEventsInWaitList, 9916 const cl_event *eventWaitList); 9917 9918 /** 9919 * @brief Copies asynchronously a sub-matrix from device (A) to device (B). 9920 * See \b clblasCopySubMatrix() and \b clblasWriteSubMatrixAsync(). 9921 */ 9922 clblasStatus clblasCopySubMatrixAsync( 9923 clblasOrder order, 9924 size_t element_size, 9925 const cl_mem A, size_t offA, size_t ldA, 9926 size_t nrA, size_t ncA, 9927 size_t xA, size_t yA, 9928 cl_mem B, size_t offB, size_t ldB, 9929 size_t nrB, size_t ncB, 9930 size_t xB, size_t yB, 9931 size_t nx, size_t ny, 9932 cl_command_queue command_queue, 9933 cl_uint numEventsInWaitList, 9934 const cl_event *eventWaitList, 9935 cl_event *event); 9936 9937 /** 9938 * @brief Copies synchronously a vector from host (A) to device (B). 9939 * See \b clblasWriteSubMatrix(). 9940 * 9941 * @param[in] A specifies the source vector on the host 9942 * @param[in] B specifies the destination vector on the device 9943 * 9944 * @return 9945 * - see \b clblasWriteSubMatrix() 9946 */ 9947 clblasStatus clblasWriteVector( 9948 size_t nb_elem, 9949 size_t element_size, 9950 const void *A, size_t offA, 9951 cl_mem B, size_t offB, 9952 cl_command_queue command_queue, 9953 cl_uint numEventsInWaitList, 9954 const cl_event *eventWaitList); 9955 9956 /** 9957 * @brief Copies asynchronously a vector from host (A) to device (B). 9958 * See \b clblasWriteVector() and \b clblasWriteSubMatrixAsync(). 9959 */ 9960 clblasStatus clblasWriteVectorAsync( 9961 size_t nb_elem, 9962 size_t element_size, 9963 const void *A, size_t offA, 9964 cl_mem B, size_t offB, 9965 cl_command_queue command_queue, 9966 cl_uint numEventsInWaitList, 9967 const cl_event *eventWaitList, 9968 cl_event *events); 9969 9970 /** 9971 * @brief Copies synchronously a vector from device (A) to host (B). 9972 * See \b clblasReadSubMatrix(). 9973 * 9974 * @param[in] A specifies the source vector on the device 9975 * @param[in] B specifies the destination vector on the host 9976 * 9977 * @return 9978 * - see \b clblasReadSubMatrix() 9979 */ 9980 clblasStatus clblasReadVector( 9981 size_t nb_elem, 9982 size_t element_size, 9983 const cl_mem A, size_t offA, 9984 void * B, size_t offB, 9985 cl_command_queue command_queue, 9986 cl_uint numEventsInWaitList, 9987 const cl_event *eventWaitList); 9988 9989 /** 9990 * @brief Copies asynchronously a vector from device (A) to host (B). 9991 * See \b clblasReadVector() and \b clblasWriteSubMatrixAsync(). 9992 */ 9993 clblasStatus clblasReadVectorAsync( 9994 size_t nb_elem, 9995 size_t element_size, 9996 const cl_mem A, size_t offA, 9997 void * B, size_t offB, 9998 cl_command_queue command_queue, 9999 cl_uint numEventsInWaitList, 10000 const cl_event *eventWaitList, 10001 cl_event *events); 10002 10003 /** 10004 * @brief Copies synchronously a vector from device (A) to device (B). 10005 * See \b clblasCopySubMatrix(). 10006 * 10007 * @param[in] A specifies the source vector on the device 10008 * @param[in] B specifies the destination vector on the device 10009 * 10010 * @return 10011 * - see \b clblasCopySubMatrix() 10012 */ 10013 clblasStatus clblasCopyVector( 10014 size_t nb_elem, 10015 size_t element_size, 10016 const cl_mem A, size_t offA, 10017 cl_mem B, size_t offB, 10018 cl_command_queue command_queue, 10019 cl_uint numEventsInWaitList, 10020 const cl_event *eventWaitList); 10021 10022 /** 10023 * @brief Copies asynchronously a vector from device (A) to device (B). 10024 * See \b clblasCopyVector() and \b clblasWriteSubMatrixAsync(). 10025 */ 10026 clblasStatus clblasCopyVectorAsync( 10027 size_t nb_elem, 10028 size_t element_size, 10029 const cl_mem A, size_t offA, 10030 cl_mem B, size_t offB, 10031 cl_command_queue command_queue, 10032 cl_uint numEventsInWaitList, 10033 const cl_event *eventWaitList, 10034 cl_event *events); 10035 10036 /** 10037 * @brief Copies synchronously a whole matrix from host (A) to device (B). 10038 * See \b clblasWriteSubMatrix(). 10039 * 10040 * @param[in] A specifies the source matrix on the host 10041 * @param[in] B specifies the destination matrix on the device 10042 * 10043 * @return 10044 * - see \b clblasWriteSubMatrix() 10045 */ 10046 clblasStatus clblasWriteMatrix( 10047 clblasOrder order, 10048 size_t sx, size_t sy, 10049 size_t element_size, 10050 const void *A, size_t offA, size_t ldA, 10051 cl_mem B, size_t offB, size_t ldB, 10052 cl_command_queue command_queue, 10053 cl_uint numEventsInWaitList, 10054 const cl_event *eventWaitList); 10055 10056 /** 10057 * @brief Copies asynchronously a vector from host (A) to device (B). 10058 * See \b clblasWriteMatrix() and \b clblasWriteSubMatrixAsync(). 10059 */ 10060 clblasStatus clblasWriteMatrixAsync( 10061 clblasOrder order, 10062 size_t sx, size_t sy, 10063 size_t element_size, 10064 const void *A, size_t offA, size_t ldA, 10065 cl_mem B, size_t offB, size_t ldB, 10066 cl_command_queue command_queue, 10067 cl_uint numEventsInWaitList, 10068 const cl_event *eventWaitList, 10069 cl_event *events); 10070 10071 /** 10072 * @brief Copies synchronously a whole matrix from device (A) to host (B). 10073 * See \b clblasReadSubMatrix(). 10074 * 10075 * @param[in] A specifies the source vector on the device 10076 * @param[in] B specifies the destination vector on the host 10077 * 10078 * @return 10079 * - see \b clblasReadSubMatrix() 10080 */ 10081 clblasStatus clblasReadMatrix( 10082 clblasOrder order, 10083 size_t sx, size_t sy, 10084 size_t element_size, 10085 const cl_mem A, size_t offA, size_t ldA, 10086 void * B, size_t offB, size_t ldB, 10087 cl_command_queue command_queue, 10088 cl_uint numEventsInWaitList, 10089 const cl_event *eventWaitList); 10090 10091 /** 10092 * @brief Copies asynchronously a vector from device (A) to host (B). 10093 * See \b clblasReadMatrix() and \b clblasWriteSubMatrixAsync(). 10094 */ 10095 clblasStatus clblasReadMatrixAsync( 10096 clblasOrder order, 10097 size_t sx, size_t sy, 10098 size_t element_size, 10099 const cl_mem A, size_t offA, size_t ldA, 10100 void * B, size_t offB, size_t ldB, 10101 cl_command_queue command_queue, 10102 cl_uint numEventsInWaitList, 10103 const cl_event *eventWaitList, 10104 cl_event *events); 10105 10106 /** 10107 * @brief Copies synchronously a whole matrix from device (A) to device (B). 10108 * See \b clblasCopySubMatrix(). 10109 * 10110 * @param[in] A specifies the source matrix on the device 10111 * @param[in] B specifies the destination matrix on the device 10112 * 10113 * @return 10114 * - see \b clblasCopySubMatrix() 10115 */ 10116 clblasStatus clblasCopyMatrix( 10117 clblasOrder order, 10118 size_t sx, size_t sy, 10119 size_t element_size, 10120 const cl_mem A, size_t offA, size_t ldA, 10121 cl_mem B, size_t offB, size_t ldB, 10122 cl_command_queue command_queue, 10123 cl_uint numEventsInWaitList, 10124 const cl_event *eventWaitList); 10125 10126 /** 10127 * @brief Copies asynchronously a vector from device (A) to device (B). 10128 * See \b clblasCopyMatrix() and \b clblasWriteSubMatrixAsync(). 10129 */ 10130 clblasStatus clblasCopyMatrixAsync( 10131 clblasOrder order, 10132 size_t sx, size_t sy, 10133 size_t element_size, 10134 const cl_mem A, size_t offA, size_t ldA, 10135 cl_mem B, size_t offB, size_t ldB, 10136 cl_command_queue command_queue, 10137 cl_uint numEventsInWaitList, 10138 const cl_event *eventWaitList, 10139 cl_event *events); 10140 10141 /** 10142 * @brief Fill synchronously a vector with a pattern of a size element_size_bytes 10143 * 10144 * @param[in] nb_elem specifies the number of element in buffer A 10145 * @param[in] element_size specifies the size of one element of A. Supported sizes correspond 10146 * element size used in clBLAS (1,2,4,8,16) 10147 * @param[in] A specifies the source vector on the device 10148 * @param[in] offA specifies the offset of matrix A in 10149 * elements 10150 * @param[in] pattern specifies the host address of the pattern to fill with (element_size_bytes) 10151 * @param[in] command_queue specifies the OpenCL queue 10152 * @param[in] numEventsInWaitList specifies the number of OpenCL events 10153 * to wait for 10154 * @param[in] eventWaitList specifies the list of OpenCL events to 10155 * wait for 10156 * @return 10157 * - see \b clblasWriteSubMatrix() 10158 */ 10159 clblasStatus clblasFillVector( 10160 size_t nb_elem, 10161 size_t element_size, 10162 cl_mem A, size_t offA, 10163 const void * host, 10164 cl_command_queue command_queue, 10165 cl_uint numEventsInWaitList, 10166 const cl_event *eventWaitList); 10167 10168 /** 10169 * @brief Fill asynchronously a vector with a pattern of a size element_size_bytes 10170 * See \b clblasFillVector(). 10171 */ 10172 clblasStatus clblasFillVectorAsync( 10173 size_t nb_elem, 10174 size_t element_size, 10175 cl_mem A, size_t offA, 10176 const void * pattern, 10177 cl_command_queue command_queue, 10178 cl_uint numEventsInWaitList, 10179 const cl_event *eventWaitList, 10180 cl_event *event); 10181 10182 /** 10183 * @brief Fill synchronously a matrix with a pattern of a size element_size_bytes 10184 * 10185 * @param[in] order specifies the matrix order 10186 * @param[in] element_size specifies the size of one element of A. Supported sizes correspond 10187 * element size used in clBLAS (1,2,4,8,16) 10188 * @param[in] A specifies the source vector on the device 10189 * @param[in] offA specifies the offset of matrix A in 10190 * @param[in] ldA specifies the leading dimension of A 10191 * @param[in] nrA specifies the number of row in A 10192 * @param[in] ncA specifies the number of column in A 10193 * @param[in] pattern specifies the host address of the pattern to fill with (element_size_bytes) 10194 * @param[in] command_queue specifies the OpenCL queue 10195 * @param[in] numEventsInWaitList specifies the number of OpenCL events to wait for 10196 * @param[in] eventWaitList specifies the list of OpenCL events to wait for 10197 * @return 10198 * - see \b clblasWriteSubMatrix() 10199 */ 10200 clblasStatus clblasFillMatrix( 10201 clblasOrder order, 10202 size_t element_size, 10203 cl_mem A, size_t offA, size_t ldA, 10204 size_t nrA, size_t ncA, 10205 const void *pattern, 10206 cl_command_queue command_queue, 10207 cl_uint numEventsInWaitList, 10208 const cl_event *eventWaitList); 10209 10210 10211 /** 10212 * @brief Partially fill a sub-matrix with a pattern of a size element_size_bytes 10213 * 10214 * 10215 * @param[in] order specifies the matrix order 10216 * @param[in] element_size specifies the size of one element of A. Supported values 10217 * are to element sizes used in clBLAS - that is 1, 2, 4, 8 or 16 10218 * @param[in] offA specifies the offset of matrix A in elements 10219 * @param[in] ldA specifies the leading dimension of A in elements 10220 * @param[in] nrA specifies the number of rows of A 10221 * in elements 10222 * @param[in] ncA specifies the number of columns of A 10223 * in elements 10224 * @param[in] xA specifies the top-left x position to 10225 * copy from A 10226 * @param[in] yA specifies the top-left y position to 10227 * copy from A 10228 * @param[in] nx specifies the number of elements to 10229 * copy according to the x dimension (rows) 10230 * @param[in] ny specifies the number of elements to 10231 * copy according to the y dimension 10232 * (columns) 10233 * @param[in] pattern specifies the host address of the pattern to fill with (element_size_bytes) 10234 * @param[in] command_queue specifies the OpenCL queue 10235 * @param[in] numEventsInWaitList specifies the number of OpenCL events to wait for 10236 * @param[in] eventWaitList specifies the list of OpenCL events to wait for 10237 * @return 10238 * - see \b clblasWriteSubMatrix() 10239 */ 10240 10241 clblasStatus clblasFillSubMatrix( 10242 clblasOrder order, 10243 size_t element_size, 10244 cl_mem A, size_t offA, size_t ldA, 10245 size_t nrA, size_t ncA, 10246 size_t xA, size_t yA, 10247 size_t nx, size_t ny, 10248 const void *pattern, 10249 cl_command_queue command_queue, 10250 cl_uint numEventsInWaitList, 10251 const cl_event *eventWaitList); 10252 10253 /** 10254 * @brief Asynchronous asynchronously fill a sub-matrix with a pattern of a size element_size_bytes 10255 * See \b clblasFillSubMatrix(). 10256 */ 10257 clblasStatus clblasFillSubMatrixAsync( 10258 clblasOrder order, 10259 size_t element_size, 10260 cl_mem A, size_t offA, size_t ldA, 10261 size_t sxA, size_t syA, 10262 int xA, int yA, 10263 size_t nx, size_t ny, 10264 const void *host, 10265 cl_command_queue command_queue, 10266 cl_uint numEventsInWaitList, 10267 const cl_event *eventWaitList, 10268 cl_event *event); 10269 10270 10271 10272 #ifdef __cplusplus 10273 } /* extern "C" { */ 10274 #endif 10275 10276 #endif /* CLBLAS_H_ */ 10277