1 /* ************************************************************************ 2 * Copyright 2013 Advanced Micro Devices, Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 * ************************************************************************/ 16 17 18 #ifndef KERNEL_EXTRA_H_ 19 #define KERNEL_EXTRA_H_ 20 21 #include <cltypes.h> 22 23 enum { 24 MAX_SOLVER_PRIVATE_SIZE = 16 25 }; 26 27 // 28 // Moving BUILD_OPTS_MAXLEN here. Originally in clblas-internal.h 29 // Including "clblas-internal.h" 30 enum { 31 MEMPAT_PER_BLASFN = 8, 32 BUILD_OPTS_MAXLEN = 256 33 }; 34 35 /** 36 * @internal 37 * @brief BLAS kernel type identifiers 38 * 39 * @ingroup BLAS_SOLVERIF_SPEC 40 */ 41 typedef enum CLBlasKernelType { 42 CLBLAS_COMPUTING_KERNEL, /**< Main computing kernel */ 43 CLBLAS_PREP_A_KERNEL, /**< Kernel preparing matrix A */ 44 CLBLAS_PREP_B_KERNEL, /**< Kernel preparing matrix B */ 45 MAX_CLBLAS_KERNELS_PER_STEP 46 } CLBlasKernelType; 47 48 /** 49 * @internal 50 * @defgroup BLAS_SOLVERIF_SPEC BLAS specifics 51 * @ingroup SOLVERIF 52 */ 53 54 /*@{*/ 55 56 /** 57 * @brief BLAS kernel flags 58 * 59 * These flags uniquely determine problem options kernels are generated for 60 */ 61 typedef enum KernelExtraFlags { 62 /** Matches to a problem without any options */ 63 KEXTRA_NO_FLAGS = 0, 64 KEXTRA_TRANS_A = 0x01, /**< Matrix A should be transposed */ 65 /** matrix A should be took in the conjugate form */ 66 KEXTRA_CONJUGATE_A = 0x02, 67 KEXTRA_TRANS_B = 0x04, /**< matrix B should be transposed */ 68 /** Matrix B should be taken in the conjugate form */ 69 KEXTRA_CONJUGATE_B = 0x08, 70 KEXTRA_COLUMN_MAJOR = 0x10, /**< Order is column major */ 71 /** 72 * Matrix A is upper triangular, it is lower triangular 73 * if this flag is not set 74 */ 75 KEXTRA_UPPER_TRIANG = 0x20, 76 /** 77 * Matrix A is placed on the right, it is placed 78 * on the left if this flag is not set 79 */ 80 KEXTRA_SIDE_RIGHT = 0x40, 81 /** 82 * Unit diagonal matrix 83 */ 84 KEXTRA_UNIT_DIAGONAL = 0x80, 85 /** kernel should process tails of upper level blocks in M dimension */ 86 KEXTRA_TAILS_M = 0x100, 87 /** kernel should process tails of upper level blocks in N dimension */ 88 KEXTRA_TAILS_N = 0x200, 89 /** kernel should process tails of upper level blocks in K dimension */ 90 KEXTRA_TAILS_K = 0x400, 91 /** Beta multiplier is zero */ 92 KEXTRA_BETA_ZERO = 0x800, 93 /** Disable vectorization at block copying for matrix A */ 94 KEXTRA_NO_COPY_VEC_A = 0x1000, 95 /** Disable vectorization at block copying for matrix B */ 96 KEXTRA_NO_COPY_VEC_B = 0x2000, 97 /** Disable vectorization at block copying for matrix C */ 98 KEXTRA_NO_COPY_VEC_C = 0x4000, 99 // SYRXK specific flags 100 /** Diagonal solution blocks are evaluated in a separate kernel */ 101 KEXTRA_SYRK_SEPARATE_DIAGONAL = 0x8000, 102 /** Evaluate diagonal solution blocks for a SYRXK function */ 103 KEXTRA_SYRK_EVALUATE_DIAGONAL = 0x10000, 104 /** 2k rank update */ 105 KEXTRA_SYRK_2K_RANK = 0x20000, 106 // BLAS2 specific flags 107 /** Incx increment is one */ 108 KEXTRA_INCX_ONE = 0x40000, 109 /** Incy increment is one */ 110 KEXTRA_INCY_ONE = 0x80000, 111 // Generator specific flags 112 /** MAD function can be used */ 113 // FIXME: throw this kludge away 114 KEXTRA_ENABLE_MAD = 0x100000, 115 // FIXME: It's a kludge, pass further DeviceIndent structure to generators 116 KEXTRA_VENDOR_AMD = 0x200000, 117 /* Flags showing not zero starting offsets for kernels */ 118 KEXTRA_STARTM_NOT_ZERO = 0x400000, 119 KEXTRA_STARTN_NOT_ZERO = 0x800000, 120 //KEXTRA_STARTK_NOT_ZERO = 0x2000000, 121 /** Matrix A offset in a memory object is not zero */ 122 KEXTRA_A_OFF_NOT_ZERO = 0x1000000, 123 /** Matrix B or vector X offset in a memory object is not zero */ 124 KEXTRA_BX_OFF_NOT_ZERO = 0x2000000, 125 /** Matrix C or vector Y offset in a memory object is not zero */ 126 KEXTRA_CY_OFF_NOT_ZERO = 0x4000000, 127 /** kernel should process tails of lower level blocks in M dimension */ 128 KEXTRA_TAILS_M_LOWER = 0x8000000, 129 /** kernel should process tails of lower level blocks in N dimension */ 130 KEXTRA_TAILS_N_LOWER = 0x10000000, 131 /** kernel should process tails of lower level blocks in K dimension */ 132 KEXTRA_TAILS_K_LOWER = 0x20000000 133 } KernelExtraFlags; 134 135 /** 136 * @internal 137 * @brief extra information CLBLAS kernel generator 138 * @ingroup BLAS_SOLVERIF_SPEC 139 */ 140 typedef struct CLBLASKernExtra { 141 DataType dtype; /**< Data type */ 142 KernelExtraFlags flags; /**< Kernel flags identifying a problem */ 143 CLBlasKernelType kernType; /**< Kernel type */ 144 // Fixme: Deprecate it; now it is just for backward compatibility 145 unsigned int vecLen; /**< vector length to evaluate with */ 146 /** vector length for matrix A elements to evaluate with */ 147 unsigned int vecLenA; 148 /** vector length for matrix B elements to evaluate with */ 149 unsigned int vecLenB; 150 /* 151 * FIXME: remove this kludge; vectorization for the result should be 152 * autodetected 153 */ 154 unsigned int vecLenC; 155 char solverPriv[MAX_SOLVER_PRIVATE_SIZE]; 156 char buildOptions[BUILD_OPTS_MAXLEN]; // Build Flags used for the kernel call 157 } CLBLASKernExtra; 158 159 /* 160 * function to compare blas kernels extra information 161 */ 162 int 163 clblasKernelExtraCmp(const void *extra, const void *extraKey); 164 165 /*@}*/ 166 167 #endif /* KERNEL_EXTRA_H_ */ 168