1 /* ************************************************************************
2  * Copyright 2013 Advanced Micro Devices, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  * ************************************************************************/
16 
17 
18 #include <sys/types.h>
19 #include <ctype.h>
20 #include <stdarg.h>
21 #include <string.h>
22 #include <stdio.h>
23 #include <assert.h>
24 
25 #include <defbool.h>
26 #include <clblas_stddef.h>
27 
28 #include "blas_kgen.h"
29 
30 // assign tile's base name to 'name' if it is assigned to zero pointer
31 static __inline void
selectTileBaseName(Tile * tile,const char * name)32 selectTileBaseName(Tile *tile, const char *name)
33 {
34     if (tile->baseName == NULL) {
35         tile->baseName = name;
36     }
37 }
38 
39 static void
selectDefaultTileVecLen(Tile * tile,TileCreationFlags tflags,const BlasGenSettings * gset,BlasFunctionID funcID,MatrixRole mrole)40 selectDefaultTileVecLen(
41     Tile *tile,
42     TileCreationFlags tflags,
43     const BlasGenSettings *gset,
44     BlasFunctionID funcID,
45     MatrixRole mrole)
46 {
47     if (tflags & TILE_WITH_FETCH_VECLEN) {
48         tile->vecLen = getVecLen(gset, funcID, mrole);
49     }
50     else {
51         size_t w;
52 
53         w = (tile->trans) ? tile->nrRows : tile->nrCols;
54         if (tile->packed) {
55             size_t wpad, height;
56 
57             wpad = roundUpPow2(w);
58             height = (tile->trans) ? tile->nrCols : tile->nrRows;
59             tile->vecLen = (unsigned int)szmin(height * wpad, MAX_TILE_VECLEN);
60         }
61         else {
62             tile->vecLen = (unsigned int)roundUpPow2(w);
63             tile->vecLen = (unsigned int)szmin(tile->vecLen, MAX_TILE_VECLEN);
64         }
65     }
66 }
67 
68 // physical tile pitch, can be less than one vector in case of packed mode
69 static unsigned int
tilePitch(const Tile * tile)70 tilePitch(const Tile *tile)
71 {
72     unsigned int pitch;
73 
74     if (!tile->trans) {
75         if (tile->packed) {
76             pitch = (unsigned int)roundUpPow2(tile->nrCols);
77         }
78         else {
79             pitch = (unsigned int)roundUp(tile->nrCols, tile->vecLen);
80         }
81     }
82     else {
83         if (tile->packed) {
84             pitch = (unsigned int)roundUpPow2(tile->nrRows);
85         }
86         else {
87             pitch = (unsigned int)roundUp(tile->nrRows, tile->vecLen);
88         }
89     }
90 
91     return pitch;
92 }
93 
94 void
initTile(Tile * tile,const char * baseName,unsigned int nrRows,unsigned int nrCols,unsigned int vecLen,DataType dtype,PrivateStorageType storType,bool trans,bool packed)95 initTile(
96     Tile *tile,
97     const char *baseName,
98     unsigned int nrRows,
99     unsigned int nrCols,
100     unsigned int vecLen,
101     DataType dtype,
102     PrivateStorageType storType,
103     bool trans,
104     bool packed)
105 {
106     assert(baseName == NULL || strlen(baseName) <= MAX_TILE_BASE_NAMELEN);
107 
108     tile->baseName = baseName;
109     tile->nrRows = nrRows;
110     tile->nrCols = nrCols;
111     tile->vecLen = umin(MAX_TILE_VECLEN, vecLen);
112     tile->dtype = dtype;
113     tile->storType = storType;
114     tile->trans = trans;
115     tile->packed = packed;
116 }
117 
118 void
initDefaultTiles(BlasGenSettings * gset,BlasFunctionID funcID,TileCreationFlags flags,PrivateStorageType storType)119 initDefaultTiles(
120     BlasGenSettings *gset,
121     BlasFunctionID funcID,
122     TileCreationFlags flags,
123     PrivateStorageType storType)
124 {
125     const SubproblemDim *dim = &gset->subdims[1];
126     KernelExtraFlags kflags = gset->kextra->flags;
127     DataType dtype = gset->kextra->dtype;
128     Tile *tile;
129     const char *name;
130     int level;
131     bool packed;
132 
133     level = funcBlasLevel(funcID);
134     packed = ((flags & TILE_PACKED) != 0);
135 
136     tile = &gset->tileA;
137     selectTileBaseName(tile, "a");
138     initTile(tile, tile->baseName, (unsigned int)dim->y,
139              (unsigned int)dim->bwidth, 1, dtype, storType, false, packed);
140 
141     tile->trans = isMatrixAccessColMaj(funcID, kflags, MATRIX_A);
142     if (!(gset->flags & BGF_WHOLE_A)) {
143         if (tile->trans) {
144             tile->nrCols = 1;
145         }
146         else {
147             tile->nrRows = 1;
148         }
149     }
150     selectDefaultTileVecLen(tile, flags, gset, funcID, MATRIX_A);
151 
152     tile = &gset->tileBX;
153     name = (level == 2) ? "x" : "b";
154     selectTileBaseName(tile, name);
155     initTile(tile, tile->baseName, (unsigned int)dim->bwidth,
156              (unsigned int)dim->x, 1, dtype, storType, false, packed);
157 
158     /*
159      * NOTE: Tiles for the level 2 functions are forced to be transposed
160      *       in order to allow user to fetch elements belonging to different
161      *       rows which is very useful in case of unit increment between
162      *       elements because provides faster access to the global memory.
163      */
164     if (level == 2) {
165         tile->trans = true;
166     }
167     else {
168         tile->trans = !isMatrixAccessColMaj(funcID, kflags, MATRIX_B);
169     }
170     selectDefaultTileVecLen(tile, flags, gset, funcID, MATRIX_B);
171 
172     tile = &gset->tileCY;
173     name = (level == 2) ? "y" : "c";
174     selectTileBaseName(tile, name);
175 
176     initTile(tile, tile->baseName, (unsigned int)dim->y,
177              (unsigned int)dim->x, 1, dtype, storType, false,
178              packed);
179 
180     if (level == 2) {
181         tile->trans = true;
182     }
183     else if (!(flags & TILE_C_FORCE_NOTRANS)) {
184         tile->trans = isMatrixAccessColMaj(funcID, kflags, MATRIX_C);
185     }
186     selectDefaultTileVecLen(tile, flags, gset, funcID, MATRIX_C);
187 
188     // FIXME: remove the restriction
189     /*if (isComplexType(tile->dtype)) {
190         tile->vecLen = 1;
191     }*/
192 }
193 
194 unsigned int
tileVectorsNum(const Tile * tile)195 tileVectorsNum(const Tile *tile)
196 {
197     size_t pitch, height;
198 
199     pitch = tilePitch(tile);
200     height = (tile->trans) ? tile->nrCols : tile->nrRows;
201 
202     return (unsigned int)divRoundUp(height * pitch, tile->vecLen);
203 }
204 
205 unsigned int
tileStorageSize(const Tile * tile)206 tileStorageSize(const Tile *tile)
207 {
208     unsigned int u;
209 
210     u = tileVectorsNum(tile) * tile->vecLen;
211 
212     return u;
213 }
214 
215 unsigned int
tileLineSegmentLen(const Tile * tile)216 tileLineSegmentLen(const Tile *tile)
217 {
218     unsigned int pitch;
219     unsigned int len;
220 
221     pitch = tilePitch(tile);
222     len = umin(pitch, tile->vecLen);
223     if (tile->trans) {
224         len = umin(len, tile->nrRows);
225     }
226     else {
227         len = umin(len, tile->nrCols);
228     }
229 
230     return len;
231 }
232 
233 int
declareOneTileStorage(struct KgenContext * ctx,const Tile * tile)234 declareOneTileStorage(struct KgenContext *ctx, const Tile *tile)
235 {
236     char tmp[1024];
237     const char *tname;
238     int r;
239     size_t size;
240 
241     getVectorTypeName(tile->dtype, tile->vecLen, &tname, NULL);
242     size = tileVectorsNum(tile);
243     if (tile->storType == PRIV_STORAGE_ARRAY) {
244         sprintf(tmp, "%s %s[%lu];\n", tname, tile->baseName, size);
245     }
246     else {
247         size_t i;
248         char *p;
249 
250         sprintf(tmp, "%s %s0", tname, tile->baseName);
251         p = tmp + strlen(tmp);
252         for (i = 1; i < size; i++) {
253             sprintf(p, ", %s%lu", tile->baseName, i);
254             p += strlen(p);
255         }
256         strcpy(p, ";\n");
257     }
258 
259     r = kgenAddStmt(ctx, tmp);
260 
261     return (r) ? -EOVERFLOW : 0;
262 }
263 
264 int
declareTileStorages(struct KgenContext * ctx,const BlasGenSettings * gset)265 declareTileStorages(struct KgenContext *ctx, const BlasGenSettings *gset)
266 {
267     int ret;
268 
269     ret = declareOneTileStorage(ctx, &gset->tileA);
270     if (!ret) {
271         ret = declareOneTileStorage(ctx, &gset->tileBX);
272     }
273     if (!ret) {
274         declareOneTileStorage(ctx, &gset->tileCY);
275     }
276 
277     return ret;
278 }
279 
280 void
sprintfTileElement(Kstring * str,const Tile * tile,unsigned int row,unsigned int col,unsigned int len)281 sprintfTileElement(
282     Kstring *str,
283     const Tile *tile,
284     unsigned int row,
285     unsigned int col,
286     unsigned int len)
287 {
288     unsigned int pitch;
289     unsigned int elemLen;
290     unsigned int off;
291     unsigned int vecLen = tile->vecLen;
292     char vchunk[24];
293 
294     if (len == 0) {
295         len = vecLen;
296     }
297 
298     pitch = tilePitch(tile);
299     elemLen = isComplexType(tile->dtype) ? 2 : 1;
300     if (!tile->trans) {
301         assert((row < tile->nrRows) && (col + len <= tile->nrCols));
302         off = (row * pitch + col) * elemLen;
303     }
304     else {
305         assert((row + len <= tile->nrRows) && (col < tile->nrCols));
306         off = (col * pitch + row) * elemLen;
307     }
308 
309     vecLen *= elemLen;
310     sprintfVecChunk(vchunk, vecLen, len * elemLen, off % vecLen);
311 
312     if (tile->storType == PRIV_STORAGE_ARRAY) {
313         sprintf(str->buf, "%s[%u]%s", tile->baseName, off / vecLen, vchunk);
314     }
315     else {
316         sprintf(str->buf, "%s%u%s", tile->baseName, off / vecLen, vchunk);
317     }
318 }
319 
320 void
sprintfTileElementHalf(Kstring * str,const Tile * tile,unsigned int row,unsigned int col,TileElementHalf half)321 sprintfTileElementHalf(
322     Kstring *str,
323     const Tile *tile,
324     unsigned int row,
325     unsigned int col,
326     TileElementHalf half)
327 {
328     int len;
329 
330     assert(isComplexType(tile->dtype));
331 
332     // sprintf the full element and the drop an unneded half
333     sprintfTileElement(str, tile, row, col, 1);
334     len = (int)strlen(str->buf);
335     if (half == TE_HALF_HIGH) {
336         str->buf[len - 2] = str->buf[len - 1];
337     }
338     str->buf[len - 1] = '\0';
339 }
340 
341 int
forEachTile(Kstring * kstr,unsigned int row,unsigned int col,unsigned int num,Tile * first,...)342 forEachTile(Kstring *kstr, unsigned int row, unsigned int col,
343             unsigned int num, Tile *first, ...)
344 {
345    unsigned int minVecLen = first->vecLen;
346    unsigned int valRow = first->nrRows;
347    unsigned int valCol = first->nrCols;
348    va_list argptr;
349    unsigned int i;
350 
351    va_start(argptr, first);
352    for (i = 1; i < num; i++) {
353        Tile * cur = va_arg( argptr, Tile * );
354        minVecLen = umin(minVecLen, cur->vecLen);
355    }
356    va_end(argptr);
357 
358    if (first->trans) {
359        valRow /= minVecLen;
360    }
361    else {
362        valCol /= minVecLen;
363    }
364 
365    if (row >= valRow || col >= valCol /*|| row < 0 || col < 0*/) { //would be signed
366        return 0;
367    }
368    if (kstr) {
369        va_start(argptr, first);
370        for (i = 0; i < num; i++) {
371            Tile * cur = i ? va_arg( argptr, Tile * ) : first;
372            if (cur->baseName) {
373                unsigned int vRow = (cur->trans ? row * minVecLen : row);
374                unsigned int vCol = (cur->trans ? col : col * minVecLen);
375                sprintfTileElement(&kstr[i], cur, vRow, vCol, minVecLen);
376            }
377        }
378        va_end(argptr);
379    }
380    return first->trans ? valRow : valCol;
381 }
382 
383 void
genSetZeroInTile(struct KgenContext * ctx,const Tile * tile,unsigned int row,unsigned int col,unsigned int len)384 genSetZeroInTile(
385     struct KgenContext *ctx,
386     const Tile *tile,
387     unsigned int row,
388     unsigned int col,
389     unsigned int len)
390 {
391     char tmp[1024];
392     Kstring elem;
393 
394     sprintfTileElement(&elem, tile, row, col, len);
395     sprintf(tmp, "%s = 0;\n", elem.buf);
396     kgenAddStmt(ctx, tmp);
397 }
398 
399 void
genSetUnitInTile(struct KgenContext * ctx,const Tile * tile,unsigned int row,unsigned int col)400 genSetUnitInTile(
401     struct KgenContext *ctx,
402     const Tile *tile,
403     unsigned int row,
404     unsigned int col)
405 {
406     char tmp[1024];
407     Kstring elem;
408     const char *s;
409 
410     sprintfTileElement(&elem, tile, row, col, 1);
411     s = strOne(tile->dtype);
412     sprintf(tmp, "%s = %s;\n", elem.buf, s);
413     kgenAddStmt(ctx, tmp);
414 }
415 
416 void
genZeroTile(struct KgenContext * ctx,const Tile * tile)417 genZeroTile(struct KgenContext *ctx, const Tile *tile)
418 {
419     char tmp[1024];
420     Kstring elem;
421     unsigned int incRows, incCols;
422     unsigned int i, j, v;
423 
424     v = tileLineSegmentLen(tile);
425     if (!tile->trans) {
426         incRows = 1;
427         incCols = v;
428     }
429     else {
430         incRows = v;
431         incCols = 1;
432     }
433 
434     for (i = 0; i < tile->nrRows; i += incRows) {
435         for (j = 0; j < tile->nrCols; j += incCols) {
436             sprintfTileElement(&elem, tile, i, j, v);
437             sprintf(tmp, "%s = 0;\n", elem.buf);
438             kgenAddStmt(ctx, tmp);
439         }
440     }
441 
442     kgenAddBlankLine(ctx);
443 }
444 
445 void
genTileCopy(struct KgenContext * ctx,const Tile * dst,const Tile * src,TileCopyOps op)446 genTileCopy(
447     struct KgenContext *ctx,
448     const Tile *dst,
449     const Tile *src,
450     TileCopyOps op)
451 {
452     char tmp[1024];
453     Kstring el1, el2;
454     unsigned int nrRows, nrCols;
455     unsigned int incRows, incCols;
456     unsigned int vlen;
457     unsigned int i, j;
458 
459     nrRows = umin(dst->nrRows, src->nrRows);
460     nrCols = umin(dst->nrCols, src->nrCols);
461     if (dst->trans != src->trans) {
462         vlen = 1;
463         incRows = incCols = 1;
464     }
465     else {
466         vlen = umin(dst->vecLen, src->vecLen);
467         if (!dst->trans) {
468             incRows = 1;
469             incCols = umin(dst->nrCols, src->nrCols);
470             incCols = umin(incCols, vlen);
471         }
472         else {
473             incRows = umin(dst->nrRows, src->nrRows);
474             incRows = umin(incRows, vlen);
475             incCols = 1;
476         }
477     }
478 
479     for (i = 0; i < nrRows; i += incRows) {
480         for (j = 0; j < nrCols; j += incCols) {
481             sprintfTileElement(&el1, dst, i, j, vlen);
482             sprintfTileElement(&el2, src, i, j, vlen);
483             switch( op )
484             {
485                 case TILECOPY_ASSIGN:
486                     sprintf(tmp, "%s = %s;\n", el1.buf, el2.buf);
487                     break;
488 
489                 case TILECOPY_ADD_ASSIGN:
490                     sprintf(tmp, "%s += %s;\n", el1.buf, el2.buf);
491                     break;
492 
493                 case TILECOPY_SUB_ASSIGN:
494                     sprintf(tmp, "%s -= %s;\n", el1.buf, el2.buf);
495                     break;
496 
497                 case TILECOPY_MUL_ASSIGN:
498                     sprintf(tmp, "%s *= %s;\n", el1.buf, el2.buf);
499                     break;
500 
501                 case TILECOPY_DIV_ASSIGN:
502                     sprintf(tmp, "%s /= %s;\n", el1.buf, el2.buf);
503                     break;
504 
505                 case TILECOPY_MOD_ASSIGN:
506                     sprintf(tmp, "%s %%= %s;\n", el1.buf, el2.buf);
507                     break;
508 
509                 default:
510                     break;
511             }
512             kgenAddStmt(ctx, tmp);
513         }
514     }
515 
516     kgenAddBlankLine(ctx);
517 }
518