1 /* ************************************************************************ 2 * Copyright 2013 Advanced Micro Devices, Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 * ************************************************************************/ 16 17 18 #ifndef BLAS_KGEN_LEGACY_H_ 19 #define BLAS_KGEN_LEGACY_H_ 20 21 #include "../blas_kgen.h" 22 23 /** 24 * @internal 25 * @brief Block multiplier flags 26 * @ingroup BLAS_MAJOR_GENS 27 */ 28 typedef enum BlkmulFlags { 29 BLKMUL_NO_FLAGS, /**< No flags */ 30 BLKMUL_TRANSPOSE = 0x01, /**< Transpose result */ 31 BLKMUL_IMAGE_PACKED = 0x02, /**< Data in image are packed */ 32 /** 33 * Accumulate multiplication results to a 34 * private location provided by caller 35 */ 36 BLKMUL_OUTPUT_PRIVATE = 0x04, 37 BLKMUL_SKEW_ROW = 0x08, /**< Use skew over block rows */ 38 BLKMUL_SKEW_COLUMN = 0x10, /**< Use skew over block columns */ 39 BLKMUL_INLINE = 0x20, /**< Generate an inline version */ 40 BLKMUL_TRANSPOSED_B = 0x40, /**< Block B is transposed */ 41 /** Don't use "&" operation in cyclic address evaluation, use always "%" */ 42 BLKMUL_AVOID_AND = 0x80 43 } BlkMulFlags; 44 45 /** 46 * @internal 47 * @brief Block multiplier core 48 * @ingroup BLAS_MAJOR_GENS 49 */ 50 typedef enum BlkmulCore { 51 /** Use separate multiplication and summation implemented by hand */ 52 BLKMUL_SEPARATE_MULADD, 53 /** Use the 'dot' function */ 54 BLKMUL_DOT, 55 /** Use the 'mad' function */ 56 BLKMUL_MAD 57 } BlkmulCore; 58 59 /** 60 * @internal 61 * @brief Argument names for the inline version of the block 62 * multiplier 63 * @ingroup BLAS_MAJOR_GENS 64 */ 65 typedef struct BlkmulArgNames { 66 const char *coordA; /**< Matrix A start coordinates */ 67 const char *coordB; /**< Matrix B start coordinates */ 68 const char *skewRow; /**< Skew over rows */ 69 const char *skewCol; /**< Skew over columns */ 70 const char *k; /**< Counter name in the loop over K */ 71 const char *vectBoundK; /**< Bound in the loop over K */ 72 } BlkmulArgNames; 73 74 /** 75 * @internal 76 * @brief Options for matrix block multiplication 77 * generator 78 * @ingroup BLAS_MAJOR_GENS 79 */ 80 typedef struct BlkMulOpts { 81 /** OpenCL memory object type storing matrix (whole or its blocks) A */ 82 CLMemType aMobj; 83 /** OpenCL memory object type storing matrix (whole or its blocks) A */ 84 CLMemType bMobj; 85 BlkMulFlags flags; /**< Specific flags */ 86 BlkmulCore core; /**< Multiply and add core */ 87 /** List of argument names for the inline version */ 88 BlkmulArgNames argNames; 89 } BlkMulOpts; 90 91 void 92 declareBlasEnums(struct KgenContext *ctx); 93 94 /** 95 * @internal 96 * @brief Matrix block multiplication generator 97 * 98 * @param[out] ctx Generator context 99 * @param[in] subdims Subproblem dimensions; the first level reflects 100 * dimensions of the large blocks processed with the 101 * whole work group, and the second level 102 * reflects sizes of immediately multiplied small 103 * blocks within the single work item 104 * @param[in] dtype Data type the multiplying function will be 105 * generated for 106 * @param[in] opts Block multiplication options 107 * 108 * Generated functions have the following definitions: \n 109 *\n 110 * For the buffer based version: 111 * @code 112 * void 113 * funcName( 114 * <type> alpha, 115 * LPtr A, 116 * LPtr B, 117 * LPtr C, 118 * [,int2 skewRow] 119 * [,int skewCol]); 120 * @endcode 121 * 122 * Function naming rule: 123 * (type prefix)gemmBlock[Transp]_<width>_<height> 124 *\n 125 * It's assumed A, B and C point to start of data to be 126 * processed during this step. 127 *\n 128 * For the image based version: \n 129 * @code 130 * void 131 * funcName( 132 * <type> alpha, 133 * __read_only image2d_t A, 134 * int2 coordA, 135 * __read_only image2d_t B, 136 * int2 coordB, 137 * LPtr C, 138 * [,int2 skewRow], 139 * [,int skewCol]); 140 * @endcode 141 * 142 * Where coordA and coordB mean start image coordinates to fetch data from. 143 *\n 144 * For the image based version a mixed variant is possible when 145 * either A or B blocks are passed through the local memory. 146 *\n 147 * The 'skewRow' and 'skewCol' are optional arguments if the 148 * 'BLKMUL_SKEW_ROW' and "BLKMUL_SKEW_COLUMN" flag is specified 149 * respectively. 'y' field of the row skew is for the block A, and the 150 * 'x' one is for the block B. 151 *\n 152 * Output result can be put directly into a private location provided by the 153 * caller instead of the local one. It is achieved with 'BLKMUL_OUTPUT_PRIVATE' 154 * flag using. 155 *\n 156 * Pointer to this location should have the following types depending on the type 157 * of processed data: \n 158 * - float4 - for float 159 * - float2 - for complex float 160 * - double2 - for double and complex double 161 *\n\n 162 * Alpha is not taken in this case. 163 *\n 164 * The multiplier can be generated as well in the form of the dedicated 165 * function as in the inline form inserted to a kernel. \n In case of inline 166 * version the block multiplier becomes in fact the tile multiplier. In this 167 * case the caller should provide iteration over K. 168 * 169 * @return 0 on success, -EOVERFLOW on source buffer overflowing 170 */ 171 172 /** 173 * @internal 174 * @defgroup BLAS_MAJOR_GENS BLAS specific generators 175 * @ingroup MAJOR_GENS 176 */ 177 /*@{*/ 178 int 179 blkMulGen( 180 struct KgenContext *ctx, 181 const SubproblemDim subdims[2], 182 DataType dtype, 183 const BlkMulOpts *opts); 184 185 int 186 updateResultGenOld( 187 struct KgenContext *ctx, 188 const BlasGenSettings *gset, 189 UpdateResultOp op, 190 UpdateResultFlags flags, 191 const UpresVarNames *uvarNames); 192 193 /*@}*/ 194 195 #endif /* BLAS_KGEN_LEGACY_H_ */ 196