1 /* ************************************************************************
2  * Copyright 2013 Advanced Micro Devices, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  * ************************************************************************/
16 
17 
18 /*
19  * API to manupulate with matrix tiles
20  */
21 
22 #ifndef TILE_H_
23 #define TILE_H_
24 
25 #include <kerngen.h>
26 #include <blas_funcs.h>
27 
28 #define tileLineElemNum forEachTile
29 
30 struct BlasGenSettings;
31 
32 enum {
33     MAX_TILE_BASE_NAMELEN = sizeof(Kstring) - 25,
34     /*
35      * It may be 16 vector components at maximum. Adding the length of the
36      * subscript and selector operator, 2 digit index, and the end-line symbol,
37      * to the maximum base name length we get the maximum tile element string
38      * length
39      */
40     MAX_TILE_ELEMENT_STRLEN = sizeof(Kstring) - 1,
41     MAX_TILE_VECLEN = 8
42 };
43 
44 /**
45  * @internal
46  * @brief Flags showing tile storing specifics
47  * @ignroup TILES
48  */
49 typedef enum TileCreationFlags {
50     /** Tile C should be forced to non-transposed form */
51     TILE_C_FORCE_NOTRANS = 0x01,
52     /** tile vector length is equal to the length of fetched vectors */
53     TILE_WITH_FETCH_VECLEN = 0x02,
54     /**
55      * If depending of transposing vector length is greater than
56      * number of rows or columns, store several rows or columns respectively
57      * in each vector
58      */
59     TILE_PACKED = 0x04
60 } TileCreationFlags;
61 
62 /**
63  * @internal
64  * @brief Type of storage in the private memory
65  * @ingroup TILES
66  */
67 typedef enum PrivateStorageType {
68     /** Tile is stored in array */
69     PRIV_STORAGE_ARRAY,
70     /** Tile is stored in a set of variables */
71     PRIV_STORAGE_VARIABLE_SET
72 } PrivateStorageType;
73 
74 typedef enum TileCopyOps {
75     TILECOPY_ASSIGN,
76     TILECOPY_ADD_ASSIGN,
77     TILECOPY_SUB_ASSIGN,
78     TILECOPY_MUL_ASSIGN,
79     TILECOPY_DIV_ASSIGN,
80     TILECOPY_MOD_ASSIGN
81 } TileCopyOps;
82 
83 /**
84  * @internal
85  * @brief Tile element half types
86  * @ingroup TILES
87  */
88 typedef enum TileElementHalf {
89     TE_HALF_LOW,
90     TE_HALF_HIGH
91 } TileElementHalf;
92 
93 /**
94  * @internal
95  * @brief Matrix tile stored in a private area
96  * @ingroup TILES
97  */
98 typedef struct Tile {
99     const char *baseName;
100     unsigned int nrRows;
101     unsigned int nrCols;
102     unsigned int vecLen;
103     DataType dtype;
104     PrivateStorageType storType;
105     /** Flag of storing tile in the transposed form */
106     bool trans;
107     /*
108      * Depending on the transposing several rows or columns can be fit
109      * into single vector. It makes sense only when number of rows or column
110      * respectively is less than vector length
111      */
112     bool packed;
113 } Tile;
114 
115 /**
116  * @internal
117  * @brief Initialize tile
118  *
119  * @param[out] tile      Tile description structure to fill
120  * @param[in] baseName   Tile base name
121  * @param[in] nrRows     Number of rows in the tile
122  * @param[in] nrCols     Number of columns in the tile
123  * @param[in] vecLen     Length of one native OpenCL element being a part of
124  *                       the tile
125  * @param[in] dtype      Data type
126  * @param[in] storType   Tile storate type
127  * @param[in] trans      Shows if tile is stored in the transposed form
128  *                       or direct
129  * @param[in] packed     Tile is stored in packed form. Has not effect if
130  *                       a single line can be fit into the single vector.
131  *
132  * If \b vecLen param is above MAX_TILE_VECLEN then will be truncated into
133  * MAX_TILE_VECLEN.
134  *
135  * @ingroup TILES
136  */
137 void
138 initTile(
139     Tile *tile,
140     const char *baseName,
141     unsigned int nrRows,
142     unsigned int nrCols,
143     unsigned int vecLen,
144     DataType dtype,
145     PrivateStorageType storType,
146     bool trans,
147     bool packed);
148 
149 /**
150  * @internal
151  * @brief Initialize matrix tile from generator settings
152  *
153  * @param[out] gset      Generator settings which tile should be initialized in
154  * @param[in] funcID     BLAS function ID
155  * @param[in] flags      Tile creation flags
156  * @param[in] storType   Storage type
157  *
158  * If \b baseName field of a tile structure in the generator settings is zero,
159  * it is initialized with the default value: "a" for the matrix A, "b" for
160  * the matrix B, "x" for the vector X, "c" for the matrix C, and "y" for the
161  * vector Y.
162  *
163  * As X and Y are column-vectors from the math point of view, tiles for them
164  * are always packed irrespectively the TileCreationFlags::TILE_PACKED flag
165  * is specified or not.
166  *
167  *
168  * Transposition of C tile matches transposition of C matrix by default, until
169  * the TILE_C_FORCE_NOTRANS flag is not set. If the flag is set, tile is
170  * forced to be initialized as non-transposed and veclen must be verified.
171  *
172  */
173 void
174 initDefaultTiles(
175     struct BlasGenSettings *gset,
176     BlasFunctionID funcID,
177     TileCreationFlags flags,
178     PrivateStorageType storType);
179 
180 /**
181  * @internal
182  * @brief Get entire number of vectors in the tile
183  *
184  * @param[in] tile          Tile to get number of vectors of
185  */
186 unsigned int
187 tileVectorsNum(const Tile *tile);
188 
189 /**
190  * @internal
191  * @brief Size of entire tile storage in elements
192  *
193  * @param[in] tile          Tile to get size of
194  */
195 unsigned int
196 tileStorageSize(const Tile *tile);
197 
198 /**
199  * @brief Get length of tile line segment
200  *
201  * @param[in] Tile       Source tile
202  *
203  * Under that segment it is assumed such a part of line which doesn't cross over
204  * vector bound and row/column bound depending on the tile is transposed or not.
205  * In the other words, this is a piece of data which provides maximum possible
206  * vectorization don't breaking correctness.
207  */
208 unsigned int
209 tileLineSegmentLen(const Tile *tile);
210 
211 /**
212  * @internal
213  * @brief Declare variables needed to store a tile
214  *
215  * @param[out] ctx        Generator context
216  * @param[in] gset        Generator settings containing desctiptors of
217  *                        tiles to declare storages for
218  *
219  * If a tile is fit into a single variable of the native type matching
220  * to the tile's vector length, it is declared a single variable with the name
221  * matching the \b baseName field being a part of the @ref Tile structure.
222  * If not, the following rules are applied. If the tile is needed to be stored
223  * in a private array, variable name matches the base name and array size
224  * is sufficient to fit such a tile. If the tile is needed to be stored
225  * in a set of variables which names are arranged as the base name followed
226  * with an integer index starting from zero and incremented by one for each
227  * subsequent variable.
228  *
229  * @return 0 on success, and -EOVERFLOW if the source buffer is overflowed
230  *
231  * @ingroup TILES
232  */
233 int
234 declareTileStorages(struct KgenContext *ctx, const struct BlasGenSettings *gset);
235 
236 /**
237  * @internal
238  * @brief Declare variable needed to store one tile
239  *
240  * @param[out] ctx        Generator context
241  * @param[in] tile        Tile settings containing desctiptors of
242  *                        a tile to declare storages for
243  *
244  * If a tile is fit into a single variable of the native type matching
245  * to the tile's vector length, it is declared a single variable with the name
246  * matching the \b baseName field being a part of the @ref Tile structure.
247  * If not, the following rules are applied. If the tile is needed to be stored
248  * in a private array, variable name matches the base name and array size
249  * is sufficient to fit such a tile. If the tile is needed to be stored
250  * in a set of variables which names are arranged as the base name followed
251  * with an integer index starting from zero and incremented by one for each
252  * subsequent variable.
253  *
254  * @return 0 on success, and -EOVERFLOW if the source buffer is overflowed
255  *
256  * @ingroup TILES
257  */
258 int
259 declareOneTileStorage(struct KgenContext *ctx, const Tile *tile);
260 
261 /**
262  * @internal
263  * @brief Sprintf element composed of one or several data elements
264  *        stored in the tile
265  *
266  * @param[out] str          Kernel string object to store tile element
267  *                          expression
268  * @param[in] tile          Tile description structure
269  * @param[in] row           Row of the starting element
270  * @param[in] col           Element column
271  * @param[in] len           Number of tile elements needed to be captured by
272  *                          the expression
273  *
274  * \b row should be less than number of rows and \b col should be less than
275  * number of columns in the tile. Traversal of a tile line is not allowed.
276  * That means \b col plus \b len should be not greater than number of columns
277  * if the tile is stored in direct form, and \b row plus \b len should be not
278  * greater than number of rows if the tile is stored in transposed form.
279  * If it is not hold true in debug mode, an assertion is triggered.
280  * In the release may produce a wrong code which can be even not compilable.
281  *
282  * @ingroup TILES
283  */
284 void
285 sprintfTileElement(
286     Kstring *str,
287     const Tile *tile,
288     unsigned int row,
289     unsigned int col,
290     unsigned int len);
291 
292 /**
293  * @internal
294  * @brief Sprintf half of a single complex data element stored in the tile
295  *
296  * @param[out] str          Kernel string object to store tile element
297  *                          expression
298  * @param[in] tile          Tile description structure
299  * @param[in] row           Row of the starting element
300  * @param[in] col           Element column
301  * @param[in] half          Half type
302  *
303  * The restrictions for \b row and \b col are the same as for
304  * sprintfTileElement(). This function is applicable only for tiles containing
305  * complex data and must not be used in case of real data.
306  *
307  * @ingroup TILES
308  */
309 void
310 sprintfTileElementHalf(
311     Kstring *str,
312     const Tile *tile,
313     unsigned int row,
314     unsigned int col,
315     TileElementHalf half);
316 
317 /**
318  * @internal
319  * @brief Sprintf element composed of one or several data elements
320  *        stored in each of the tiles
321  *
322  * @param[out] kstrs        Kernel string objects array to store element
323  *                          expression for each tile
324  * @param[in] row           Vectorizable element row
325  * @param[in] col           Vectorizable element column
326  * @param[in] num           Number of tile description structure
327  * @param[in] first         First tile description structure
328  *
329  * Decides how many vectored access in for each line of each tile will be and
330  * does sprintfTileElement() for each of tiles. This function can have got any
331  * value of \b row \b and \b col \b. \b kstrs \b and \b tile->baseName \b can
332  * have NULL, then no sprintfTileElement() will be executed.
333  *
334  * @return 0 if no sprintf tiles, or number of vectors in one line
335  *
336  * @ingroup TILES
337  */
338 int
339 forEachTile(Kstring *kstrs,
340             unsigned int row,
341             unsigned int col,
342             unsigned int num,
343             Tile *first,
344             ...);
345 
346 /**
347  * @internal
348  * @brief Generate assigning a tile element with zero
349  *
350  * @param[out] ctx      Generator context
351  * @param[in] tile      Tile description structure
352  * @param[in] row       Row of the starting element
353  * @param[in] col       Element column
354  * @param[in] len       Number of elements needed to be assigned with zero
355  *
356  * See decription of sprintfTileElement() for more details about restrictions
357  * on \b row, \b col and \b len.
358  *
359  * @ingroup TILES
360  */
361 void
362 genSetZeroInTile(
363     struct KgenContext *ctx,
364     const Tile *tile,
365     unsigned int row,
366     unsigned int col,
367     unsigned int len);
368 
369 /**
370  * @internal
371  * @brief Generate assigning a tile element with unit
372  *
373  * @internal
374  * @brief Generate assigning a tile element with zero
375  *
376  * @param[out] ctx      Generator context
377  * @param[in] tile      Tile description structure
378  * @param[in] row       Row of the starting element
379  * @param[in] col       Element column
380  *
381  * \b row should be less than number of rows and \b col should be less than
382  * number of columns in the tile. If it is not hold true in debug mode,
383  * an assertion is triggered. In the release may produce a wrong code which
384  * can be even not compilable.
385  *
386  * @ingroup TILES
387  */
388 void
389 genSetUnitInTile(
390     struct KgenContext *ctx,
391     const Tile *tile,
392     unsigned int row,
393     unsigned int col);
394 
395 /**
396  * @internal
397  * @brief Generate zeroing an entire tile
398  *
399  * @param[out] ctx      Generator context
400  * @param[in] tile      Tile description structure
401  *
402  * @ingroup TILES
403  */
404 void
405 genZeroTile(struct KgenContext *ctx, const Tile *tile);
406 
407 /**
408  * @internal
409  * @brief Generate copying between 2 tiles
410  *
411  * @param[out] ctx      Generator context
412  * @param[in] dst       Destination tile
413  * @param[in] src       Source tile
414  *
415  * @ingroup TILES
416  */
417 void
418 genTileCopy(
419     struct KgenContext *ctx,
420     const Tile *dst,
421     const Tile *src,
422     TileCopyOps op);
423 
424 #endif /* TILE_H_ */
425