1 /* ************************************************************************ 2 * Copyright 2013 Advanced Micro Devices, Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 * ************************************************************************/ 16 17 18 /* 19 * API to manupulate with matrix tiles 20 */ 21 22 #ifndef TILE_H_ 23 #define TILE_H_ 24 25 #include <kerngen.h> 26 #include <blas_funcs.h> 27 28 #define tileLineElemNum forEachTile 29 30 struct BlasGenSettings; 31 32 enum { 33 MAX_TILE_BASE_NAMELEN = sizeof(Kstring) - 25, 34 /* 35 * It may be 16 vector components at maximum. Adding the length of the 36 * subscript and selector operator, 2 digit index, and the end-line symbol, 37 * to the maximum base name length we get the maximum tile element string 38 * length 39 */ 40 MAX_TILE_ELEMENT_STRLEN = sizeof(Kstring) - 1, 41 MAX_TILE_VECLEN = 8 42 }; 43 44 /** 45 * @internal 46 * @brief Flags showing tile storing specifics 47 * @ignroup TILES 48 */ 49 typedef enum TileCreationFlags { 50 /** Tile C should be forced to non-transposed form */ 51 TILE_C_FORCE_NOTRANS = 0x01, 52 /** tile vector length is equal to the length of fetched vectors */ 53 TILE_WITH_FETCH_VECLEN = 0x02, 54 /** 55 * If depending of transposing vector length is greater than 56 * number of rows or columns, store several rows or columns respectively 57 * in each vector 58 */ 59 TILE_PACKED = 0x04 60 } TileCreationFlags; 61 62 /** 63 * @internal 64 * @brief Type of storage in the private memory 65 * @ingroup TILES 66 */ 67 typedef enum PrivateStorageType { 68 /** Tile is stored in array */ 69 PRIV_STORAGE_ARRAY, 70 /** Tile is stored in a set of variables */ 71 PRIV_STORAGE_VARIABLE_SET 72 } PrivateStorageType; 73 74 typedef enum TileCopyOps { 75 TILECOPY_ASSIGN, 76 TILECOPY_ADD_ASSIGN, 77 TILECOPY_SUB_ASSIGN, 78 TILECOPY_MUL_ASSIGN, 79 TILECOPY_DIV_ASSIGN, 80 TILECOPY_MOD_ASSIGN 81 } TileCopyOps; 82 83 /** 84 * @internal 85 * @brief Tile element half types 86 * @ingroup TILES 87 */ 88 typedef enum TileElementHalf { 89 TE_HALF_LOW, 90 TE_HALF_HIGH 91 } TileElementHalf; 92 93 /** 94 * @internal 95 * @brief Matrix tile stored in a private area 96 * @ingroup TILES 97 */ 98 typedef struct Tile { 99 const char *baseName; 100 unsigned int nrRows; 101 unsigned int nrCols; 102 unsigned int vecLen; 103 DataType dtype; 104 PrivateStorageType storType; 105 /** Flag of storing tile in the transposed form */ 106 bool trans; 107 /* 108 * Depending on the transposing several rows or columns can be fit 109 * into single vector. It makes sense only when number of rows or column 110 * respectively is less than vector length 111 */ 112 bool packed; 113 } Tile; 114 115 /** 116 * @internal 117 * @brief Initialize tile 118 * 119 * @param[out] tile Tile description structure to fill 120 * @param[in] baseName Tile base name 121 * @param[in] nrRows Number of rows in the tile 122 * @param[in] nrCols Number of columns in the tile 123 * @param[in] vecLen Length of one native OpenCL element being a part of 124 * the tile 125 * @param[in] dtype Data type 126 * @param[in] storType Tile storate type 127 * @param[in] trans Shows if tile is stored in the transposed form 128 * or direct 129 * @param[in] packed Tile is stored in packed form. Has not effect if 130 * a single line can be fit into the single vector. 131 * 132 * If \b vecLen param is above MAX_TILE_VECLEN then will be truncated into 133 * MAX_TILE_VECLEN. 134 * 135 * @ingroup TILES 136 */ 137 void 138 initTile( 139 Tile *tile, 140 const char *baseName, 141 unsigned int nrRows, 142 unsigned int nrCols, 143 unsigned int vecLen, 144 DataType dtype, 145 PrivateStorageType storType, 146 bool trans, 147 bool packed); 148 149 /** 150 * @internal 151 * @brief Initialize matrix tile from generator settings 152 * 153 * @param[out] gset Generator settings which tile should be initialized in 154 * @param[in] funcID BLAS function ID 155 * @param[in] flags Tile creation flags 156 * @param[in] storType Storage type 157 * 158 * If \b baseName field of a tile structure in the generator settings is zero, 159 * it is initialized with the default value: "a" for the matrix A, "b" for 160 * the matrix B, "x" for the vector X, "c" for the matrix C, and "y" for the 161 * vector Y. 162 * 163 * As X and Y are column-vectors from the math point of view, tiles for them 164 * are always packed irrespectively the TileCreationFlags::TILE_PACKED flag 165 * is specified or not. 166 * 167 * 168 * Transposition of C tile matches transposition of C matrix by default, until 169 * the TILE_C_FORCE_NOTRANS flag is not set. If the flag is set, tile is 170 * forced to be initialized as non-transposed and veclen must be verified. 171 * 172 */ 173 void 174 initDefaultTiles( 175 struct BlasGenSettings *gset, 176 BlasFunctionID funcID, 177 TileCreationFlags flags, 178 PrivateStorageType storType); 179 180 /** 181 * @internal 182 * @brief Get entire number of vectors in the tile 183 * 184 * @param[in] tile Tile to get number of vectors of 185 */ 186 unsigned int 187 tileVectorsNum(const Tile *tile); 188 189 /** 190 * @internal 191 * @brief Size of entire tile storage in elements 192 * 193 * @param[in] tile Tile to get size of 194 */ 195 unsigned int 196 tileStorageSize(const Tile *tile); 197 198 /** 199 * @brief Get length of tile line segment 200 * 201 * @param[in] Tile Source tile 202 * 203 * Under that segment it is assumed such a part of line which doesn't cross over 204 * vector bound and row/column bound depending on the tile is transposed or not. 205 * In the other words, this is a piece of data which provides maximum possible 206 * vectorization don't breaking correctness. 207 */ 208 unsigned int 209 tileLineSegmentLen(const Tile *tile); 210 211 /** 212 * @internal 213 * @brief Declare variables needed to store a tile 214 * 215 * @param[out] ctx Generator context 216 * @param[in] gset Generator settings containing desctiptors of 217 * tiles to declare storages for 218 * 219 * If a tile is fit into a single variable of the native type matching 220 * to the tile's vector length, it is declared a single variable with the name 221 * matching the \b baseName field being a part of the @ref Tile structure. 222 * If not, the following rules are applied. If the tile is needed to be stored 223 * in a private array, variable name matches the base name and array size 224 * is sufficient to fit such a tile. If the tile is needed to be stored 225 * in a set of variables which names are arranged as the base name followed 226 * with an integer index starting from zero and incremented by one for each 227 * subsequent variable. 228 * 229 * @return 0 on success, and -EOVERFLOW if the source buffer is overflowed 230 * 231 * @ingroup TILES 232 */ 233 int 234 declareTileStorages(struct KgenContext *ctx, const struct BlasGenSettings *gset); 235 236 /** 237 * @internal 238 * @brief Declare variable needed to store one tile 239 * 240 * @param[out] ctx Generator context 241 * @param[in] tile Tile settings containing desctiptors of 242 * a tile to declare storages for 243 * 244 * If a tile is fit into a single variable of the native type matching 245 * to the tile's vector length, it is declared a single variable with the name 246 * matching the \b baseName field being a part of the @ref Tile structure. 247 * If not, the following rules are applied. If the tile is needed to be stored 248 * in a private array, variable name matches the base name and array size 249 * is sufficient to fit such a tile. If the tile is needed to be stored 250 * in a set of variables which names are arranged as the base name followed 251 * with an integer index starting from zero and incremented by one for each 252 * subsequent variable. 253 * 254 * @return 0 on success, and -EOVERFLOW if the source buffer is overflowed 255 * 256 * @ingroup TILES 257 */ 258 int 259 declareOneTileStorage(struct KgenContext *ctx, const Tile *tile); 260 261 /** 262 * @internal 263 * @brief Sprintf element composed of one or several data elements 264 * stored in the tile 265 * 266 * @param[out] str Kernel string object to store tile element 267 * expression 268 * @param[in] tile Tile description structure 269 * @param[in] row Row of the starting element 270 * @param[in] col Element column 271 * @param[in] len Number of tile elements needed to be captured by 272 * the expression 273 * 274 * \b row should be less than number of rows and \b col should be less than 275 * number of columns in the tile. Traversal of a tile line is not allowed. 276 * That means \b col plus \b len should be not greater than number of columns 277 * if the tile is stored in direct form, and \b row plus \b len should be not 278 * greater than number of rows if the tile is stored in transposed form. 279 * If it is not hold true in debug mode, an assertion is triggered. 280 * In the release may produce a wrong code which can be even not compilable. 281 * 282 * @ingroup TILES 283 */ 284 void 285 sprintfTileElement( 286 Kstring *str, 287 const Tile *tile, 288 unsigned int row, 289 unsigned int col, 290 unsigned int len); 291 292 /** 293 * @internal 294 * @brief Sprintf half of a single complex data element stored in the tile 295 * 296 * @param[out] str Kernel string object to store tile element 297 * expression 298 * @param[in] tile Tile description structure 299 * @param[in] row Row of the starting element 300 * @param[in] col Element column 301 * @param[in] half Half type 302 * 303 * The restrictions for \b row and \b col are the same as for 304 * sprintfTileElement(). This function is applicable only for tiles containing 305 * complex data and must not be used in case of real data. 306 * 307 * @ingroup TILES 308 */ 309 void 310 sprintfTileElementHalf( 311 Kstring *str, 312 const Tile *tile, 313 unsigned int row, 314 unsigned int col, 315 TileElementHalf half); 316 317 /** 318 * @internal 319 * @brief Sprintf element composed of one or several data elements 320 * stored in each of the tiles 321 * 322 * @param[out] kstrs Kernel string objects array to store element 323 * expression for each tile 324 * @param[in] row Vectorizable element row 325 * @param[in] col Vectorizable element column 326 * @param[in] num Number of tile description structure 327 * @param[in] first First tile description structure 328 * 329 * Decides how many vectored access in for each line of each tile will be and 330 * does sprintfTileElement() for each of tiles. This function can have got any 331 * value of \b row \b and \b col \b. \b kstrs \b and \b tile->baseName \b can 332 * have NULL, then no sprintfTileElement() will be executed. 333 * 334 * @return 0 if no sprintf tiles, or number of vectors in one line 335 * 336 * @ingroup TILES 337 */ 338 int 339 forEachTile(Kstring *kstrs, 340 unsigned int row, 341 unsigned int col, 342 unsigned int num, 343 Tile *first, 344 ...); 345 346 /** 347 * @internal 348 * @brief Generate assigning a tile element with zero 349 * 350 * @param[out] ctx Generator context 351 * @param[in] tile Tile description structure 352 * @param[in] row Row of the starting element 353 * @param[in] col Element column 354 * @param[in] len Number of elements needed to be assigned with zero 355 * 356 * See decription of sprintfTileElement() for more details about restrictions 357 * on \b row, \b col and \b len. 358 * 359 * @ingroup TILES 360 */ 361 void 362 genSetZeroInTile( 363 struct KgenContext *ctx, 364 const Tile *tile, 365 unsigned int row, 366 unsigned int col, 367 unsigned int len); 368 369 /** 370 * @internal 371 * @brief Generate assigning a tile element with unit 372 * 373 * @internal 374 * @brief Generate assigning a tile element with zero 375 * 376 * @param[out] ctx Generator context 377 * @param[in] tile Tile description structure 378 * @param[in] row Row of the starting element 379 * @param[in] col Element column 380 * 381 * \b row should be less than number of rows and \b col should be less than 382 * number of columns in the tile. If it is not hold true in debug mode, 383 * an assertion is triggered. In the release may produce a wrong code which 384 * can be even not compilable. 385 * 386 * @ingroup TILES 387 */ 388 void 389 genSetUnitInTile( 390 struct KgenContext *ctx, 391 const Tile *tile, 392 unsigned int row, 393 unsigned int col); 394 395 /** 396 * @internal 397 * @brief Generate zeroing an entire tile 398 * 399 * @param[out] ctx Generator context 400 * @param[in] tile Tile description structure 401 * 402 * @ingroup TILES 403 */ 404 void 405 genZeroTile(struct KgenContext *ctx, const Tile *tile); 406 407 /** 408 * @internal 409 * @brief Generate copying between 2 tiles 410 * 411 * @param[out] ctx Generator context 412 * @param[in] dst Destination tile 413 * @param[in] src Source tile 414 * 415 * @ingroup TILES 416 */ 417 void 418 genTileCopy( 419 struct KgenContext *ctx, 420 const Tile *dst, 421 const Tile *src, 422 TileCopyOps op); 423 424 #endif /* TILE_H_ */ 425