1 /* ************************************************************************
2 * Copyright 2013 Advanced Micro Devices, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 * ************************************************************************/
16
17
18 #include <sys/types.h>
19 #include <ctype.h>
20 #include <stdarg.h>
21 #include <string.h>
22 #include <stdio.h>
23 #include <assert.h>
24
25 #include <defbool.h>
26 #include <clblas_stddef.h>
27
28 #include "blas_kgen.h"
29
30 // assign tile's base name to 'name' if it is assigned to zero pointer
31 static __inline void
selectTileBaseName(Tile * tile,const char * name)32 selectTileBaseName(Tile *tile, const char *name)
33 {
34 if (tile->baseName == NULL) {
35 tile->baseName = name;
36 }
37 }
38
39 static void
selectDefaultTileVecLen(Tile * tile,TileCreationFlags tflags,const BlasGenSettings * gset,BlasFunctionID funcID,MatrixRole mrole)40 selectDefaultTileVecLen(
41 Tile *tile,
42 TileCreationFlags tflags,
43 const BlasGenSettings *gset,
44 BlasFunctionID funcID,
45 MatrixRole mrole)
46 {
47 if (tflags & TILE_WITH_FETCH_VECLEN) {
48 tile->vecLen = getVecLen(gset, funcID, mrole);
49 }
50 else {
51 size_t w;
52
53 w = (tile->trans) ? tile->nrRows : tile->nrCols;
54 if (tile->packed) {
55 size_t wpad, height;
56
57 wpad = roundUpPow2(w);
58 height = (tile->trans) ? tile->nrCols : tile->nrRows;
59 tile->vecLen = (unsigned int)szmin(height * wpad, MAX_TILE_VECLEN);
60 }
61 else {
62 tile->vecLen = (unsigned int)roundUpPow2(w);
63 tile->vecLen = (unsigned int)szmin(tile->vecLen, MAX_TILE_VECLEN);
64 }
65 }
66 }
67
68 // physical tile pitch, can be less than one vector in case of packed mode
69 static unsigned int
tilePitch(const Tile * tile)70 tilePitch(const Tile *tile)
71 {
72 unsigned int pitch;
73
74 if (!tile->trans) {
75 if (tile->packed) {
76 pitch = (unsigned int)roundUpPow2(tile->nrCols);
77 }
78 else {
79 pitch = (unsigned int)roundUp(tile->nrCols, tile->vecLen);
80 }
81 }
82 else {
83 if (tile->packed) {
84 pitch = (unsigned int)roundUpPow2(tile->nrRows);
85 }
86 else {
87 pitch = (unsigned int)roundUp(tile->nrRows, tile->vecLen);
88 }
89 }
90
91 return pitch;
92 }
93
94 void
initTile(Tile * tile,const char * baseName,unsigned int nrRows,unsigned int nrCols,unsigned int vecLen,DataType dtype,PrivateStorageType storType,bool trans,bool packed)95 initTile(
96 Tile *tile,
97 const char *baseName,
98 unsigned int nrRows,
99 unsigned int nrCols,
100 unsigned int vecLen,
101 DataType dtype,
102 PrivateStorageType storType,
103 bool trans,
104 bool packed)
105 {
106 assert(baseName == NULL || strlen(baseName) <= MAX_TILE_BASE_NAMELEN);
107
108 tile->baseName = baseName;
109 tile->nrRows = nrRows;
110 tile->nrCols = nrCols;
111 tile->vecLen = umin(MAX_TILE_VECLEN, vecLen);
112 tile->dtype = dtype;
113 tile->storType = storType;
114 tile->trans = trans;
115 tile->packed = packed;
116 }
117
118 void
initDefaultTiles(BlasGenSettings * gset,BlasFunctionID funcID,TileCreationFlags flags,PrivateStorageType storType)119 initDefaultTiles(
120 BlasGenSettings *gset,
121 BlasFunctionID funcID,
122 TileCreationFlags flags,
123 PrivateStorageType storType)
124 {
125 const SubproblemDim *dim = &gset->subdims[1];
126 KernelExtraFlags kflags = gset->kextra->flags;
127 DataType dtype = gset->kextra->dtype;
128 Tile *tile;
129 const char *name;
130 int level;
131 bool packed;
132
133 level = funcBlasLevel(funcID);
134 packed = ((flags & TILE_PACKED) != 0);
135
136 tile = &gset->tileA;
137 selectTileBaseName(tile, "a");
138 initTile(tile, tile->baseName, (unsigned int)dim->y,
139 (unsigned int)dim->bwidth, 1, dtype, storType, false, packed);
140
141 tile->trans = isMatrixAccessColMaj(funcID, kflags, MATRIX_A);
142 if (!(gset->flags & BGF_WHOLE_A)) {
143 if (tile->trans) {
144 tile->nrCols = 1;
145 }
146 else {
147 tile->nrRows = 1;
148 }
149 }
150 selectDefaultTileVecLen(tile, flags, gset, funcID, MATRIX_A);
151
152 tile = &gset->tileBX;
153 name = (level == 2) ? "x" : "b";
154 selectTileBaseName(tile, name);
155 initTile(tile, tile->baseName, (unsigned int)dim->bwidth,
156 (unsigned int)dim->x, 1, dtype, storType, false, packed);
157
158 /*
159 * NOTE: Tiles for the level 2 functions are forced to be transposed
160 * in order to allow user to fetch elements belonging to different
161 * rows which is very useful in case of unit increment between
162 * elements because provides faster access to the global memory.
163 */
164 if (level == 2) {
165 tile->trans = true;
166 }
167 else {
168 tile->trans = !isMatrixAccessColMaj(funcID, kflags, MATRIX_B);
169 }
170 selectDefaultTileVecLen(tile, flags, gset, funcID, MATRIX_B);
171
172 tile = &gset->tileCY;
173 name = (level == 2) ? "y" : "c";
174 selectTileBaseName(tile, name);
175
176 initTile(tile, tile->baseName, (unsigned int)dim->y,
177 (unsigned int)dim->x, 1, dtype, storType, false,
178 packed);
179
180 if (level == 2) {
181 tile->trans = true;
182 }
183 else if (!(flags & TILE_C_FORCE_NOTRANS)) {
184 tile->trans = isMatrixAccessColMaj(funcID, kflags, MATRIX_C);
185 }
186 selectDefaultTileVecLen(tile, flags, gset, funcID, MATRIX_C);
187
188 // FIXME: remove the restriction
189 /*if (isComplexType(tile->dtype)) {
190 tile->vecLen = 1;
191 }*/
192 }
193
194 unsigned int
tileVectorsNum(const Tile * tile)195 tileVectorsNum(const Tile *tile)
196 {
197 size_t pitch, height;
198
199 pitch = tilePitch(tile);
200 height = (tile->trans) ? tile->nrCols : tile->nrRows;
201
202 return (unsigned int)divRoundUp(height * pitch, tile->vecLen);
203 }
204
205 unsigned int
tileStorageSize(const Tile * tile)206 tileStorageSize(const Tile *tile)
207 {
208 unsigned int u;
209
210 u = tileVectorsNum(tile) * tile->vecLen;
211
212 return u;
213 }
214
215 unsigned int
tileLineSegmentLen(const Tile * tile)216 tileLineSegmentLen(const Tile *tile)
217 {
218 unsigned int pitch;
219 unsigned int len;
220
221 pitch = tilePitch(tile);
222 len = umin(pitch, tile->vecLen);
223 if (tile->trans) {
224 len = umin(len, tile->nrRows);
225 }
226 else {
227 len = umin(len, tile->nrCols);
228 }
229
230 return len;
231 }
232
233 int
declareOneTileStorage(struct KgenContext * ctx,const Tile * tile)234 declareOneTileStorage(struct KgenContext *ctx, const Tile *tile)
235 {
236 char tmp[1024];
237 const char *tname;
238 int r;
239 size_t size;
240
241 getVectorTypeName(tile->dtype, tile->vecLen, &tname, NULL);
242 size = tileVectorsNum(tile);
243 if (tile->storType == PRIV_STORAGE_ARRAY) {
244 sprintf(tmp, "%s %s[%lu];\n", tname, tile->baseName, size);
245 }
246 else {
247 size_t i;
248 char *p;
249
250 sprintf(tmp, "%s %s0", tname, tile->baseName);
251 p = tmp + strlen(tmp);
252 for (i = 1; i < size; i++) {
253 sprintf(p, ", %s%lu", tile->baseName, i);
254 p += strlen(p);
255 }
256 strcpy(p, ";\n");
257 }
258
259 r = kgenAddStmt(ctx, tmp);
260
261 return (r) ? -EOVERFLOW : 0;
262 }
263
264 int
declareTileStorages(struct KgenContext * ctx,const BlasGenSettings * gset)265 declareTileStorages(struct KgenContext *ctx, const BlasGenSettings *gset)
266 {
267 int ret;
268
269 ret = declareOneTileStorage(ctx, &gset->tileA);
270 if (!ret) {
271 ret = declareOneTileStorage(ctx, &gset->tileBX);
272 }
273 if (!ret) {
274 declareOneTileStorage(ctx, &gset->tileCY);
275 }
276
277 return ret;
278 }
279
280 void
sprintfTileElement(Kstring * str,const Tile * tile,unsigned int row,unsigned int col,unsigned int len)281 sprintfTileElement(
282 Kstring *str,
283 const Tile *tile,
284 unsigned int row,
285 unsigned int col,
286 unsigned int len)
287 {
288 unsigned int pitch;
289 unsigned int elemLen;
290 unsigned int off;
291 unsigned int vecLen = tile->vecLen;
292 char vchunk[24];
293
294 if (len == 0) {
295 len = vecLen;
296 }
297
298 pitch = tilePitch(tile);
299 elemLen = isComplexType(tile->dtype) ? 2 : 1;
300 if (!tile->trans) {
301 assert((row < tile->nrRows) && (col + len <= tile->nrCols));
302 off = (row * pitch + col) * elemLen;
303 }
304 else {
305 assert((row + len <= tile->nrRows) && (col < tile->nrCols));
306 off = (col * pitch + row) * elemLen;
307 }
308
309 vecLen *= elemLen;
310 sprintfVecChunk(vchunk, vecLen, len * elemLen, off % vecLen);
311
312 if (tile->storType == PRIV_STORAGE_ARRAY) {
313 sprintf(str->buf, "%s[%u]%s", tile->baseName, off / vecLen, vchunk);
314 }
315 else {
316 sprintf(str->buf, "%s%u%s", tile->baseName, off / vecLen, vchunk);
317 }
318 }
319
320 void
sprintfTileElementHalf(Kstring * str,const Tile * tile,unsigned int row,unsigned int col,TileElementHalf half)321 sprintfTileElementHalf(
322 Kstring *str,
323 const Tile *tile,
324 unsigned int row,
325 unsigned int col,
326 TileElementHalf half)
327 {
328 int len;
329
330 assert(isComplexType(tile->dtype));
331
332 // sprintf the full element and the drop an unneded half
333 sprintfTileElement(str, tile, row, col, 1);
334 len = (int)strlen(str->buf);
335 if (half == TE_HALF_HIGH) {
336 str->buf[len - 2] = str->buf[len - 1];
337 }
338 str->buf[len - 1] = '\0';
339 }
340
341 int
forEachTile(Kstring * kstr,unsigned int row,unsigned int col,unsigned int num,Tile * first,...)342 forEachTile(Kstring *kstr, unsigned int row, unsigned int col,
343 unsigned int num, Tile *first, ...)
344 {
345 unsigned int minVecLen = first->vecLen;
346 unsigned int valRow = first->nrRows;
347 unsigned int valCol = first->nrCols;
348 va_list argptr;
349 unsigned int i;
350
351 va_start(argptr, first);
352 for (i = 1; i < num; i++) {
353 Tile * cur = va_arg( argptr, Tile * );
354 minVecLen = umin(minVecLen, cur->vecLen);
355 }
356 va_end(argptr);
357
358 if (first->trans) {
359 valRow /= minVecLen;
360 }
361 else {
362 valCol /= minVecLen;
363 }
364
365 if (row >= valRow || col >= valCol /*|| row < 0 || col < 0*/) { //would be signed
366 return 0;
367 }
368 if (kstr) {
369 va_start(argptr, first);
370 for (i = 0; i < num; i++) {
371 Tile * cur = i ? va_arg( argptr, Tile * ) : first;
372 if (cur->baseName) {
373 unsigned int vRow = (cur->trans ? row * minVecLen : row);
374 unsigned int vCol = (cur->trans ? col : col * minVecLen);
375 sprintfTileElement(&kstr[i], cur, vRow, vCol, minVecLen);
376 }
377 }
378 va_end(argptr);
379 }
380 return first->trans ? valRow : valCol;
381 }
382
383 void
genSetZeroInTile(struct KgenContext * ctx,const Tile * tile,unsigned int row,unsigned int col,unsigned int len)384 genSetZeroInTile(
385 struct KgenContext *ctx,
386 const Tile *tile,
387 unsigned int row,
388 unsigned int col,
389 unsigned int len)
390 {
391 char tmp[1024];
392 Kstring elem;
393
394 sprintfTileElement(&elem, tile, row, col, len);
395 sprintf(tmp, "%s = 0;\n", elem.buf);
396 kgenAddStmt(ctx, tmp);
397 }
398
399 void
genSetUnitInTile(struct KgenContext * ctx,const Tile * tile,unsigned int row,unsigned int col)400 genSetUnitInTile(
401 struct KgenContext *ctx,
402 const Tile *tile,
403 unsigned int row,
404 unsigned int col)
405 {
406 char tmp[1024];
407 Kstring elem;
408 const char *s;
409
410 sprintfTileElement(&elem, tile, row, col, 1);
411 s = strOne(tile->dtype);
412 sprintf(tmp, "%s = %s;\n", elem.buf, s);
413 kgenAddStmt(ctx, tmp);
414 }
415
416 void
genZeroTile(struct KgenContext * ctx,const Tile * tile)417 genZeroTile(struct KgenContext *ctx, const Tile *tile)
418 {
419 char tmp[1024];
420 Kstring elem;
421 unsigned int incRows, incCols;
422 unsigned int i, j, v;
423
424 v = tileLineSegmentLen(tile);
425 if (!tile->trans) {
426 incRows = 1;
427 incCols = v;
428 }
429 else {
430 incRows = v;
431 incCols = 1;
432 }
433
434 for (i = 0; i < tile->nrRows; i += incRows) {
435 for (j = 0; j < tile->nrCols; j += incCols) {
436 sprintfTileElement(&elem, tile, i, j, v);
437 sprintf(tmp, "%s = 0;\n", elem.buf);
438 kgenAddStmt(ctx, tmp);
439 }
440 }
441
442 kgenAddBlankLine(ctx);
443 }
444
445 void
genTileCopy(struct KgenContext * ctx,const Tile * dst,const Tile * src,TileCopyOps op)446 genTileCopy(
447 struct KgenContext *ctx,
448 const Tile *dst,
449 const Tile *src,
450 TileCopyOps op)
451 {
452 char tmp[1024];
453 Kstring el1, el2;
454 unsigned int nrRows, nrCols;
455 unsigned int incRows, incCols;
456 unsigned int vlen;
457 unsigned int i, j;
458
459 nrRows = umin(dst->nrRows, src->nrRows);
460 nrCols = umin(dst->nrCols, src->nrCols);
461 if (dst->trans != src->trans) {
462 vlen = 1;
463 incRows = incCols = 1;
464 }
465 else {
466 vlen = umin(dst->vecLen, src->vecLen);
467 if (!dst->trans) {
468 incRows = 1;
469 incCols = umin(dst->nrCols, src->nrCols);
470 incCols = umin(incCols, vlen);
471 }
472 else {
473 incRows = umin(dst->nrRows, src->nrRows);
474 incRows = umin(incRows, vlen);
475 incCols = 1;
476 }
477 }
478
479 for (i = 0; i < nrRows; i += incRows) {
480 for (j = 0; j < nrCols; j += incCols) {
481 sprintfTileElement(&el1, dst, i, j, vlen);
482 sprintfTileElement(&el2, src, i, j, vlen);
483 switch( op )
484 {
485 case TILECOPY_ASSIGN:
486 sprintf(tmp, "%s = %s;\n", el1.buf, el2.buf);
487 break;
488
489 case TILECOPY_ADD_ASSIGN:
490 sprintf(tmp, "%s += %s;\n", el1.buf, el2.buf);
491 break;
492
493 case TILECOPY_SUB_ASSIGN:
494 sprintf(tmp, "%s -= %s;\n", el1.buf, el2.buf);
495 break;
496
497 case TILECOPY_MUL_ASSIGN:
498 sprintf(tmp, "%s *= %s;\n", el1.buf, el2.buf);
499 break;
500
501 case TILECOPY_DIV_ASSIGN:
502 sprintf(tmp, "%s /= %s;\n", el1.buf, el2.buf);
503 break;
504
505 case TILECOPY_MOD_ASSIGN:
506 sprintf(tmp, "%s %%= %s;\n", el1.buf, el2.buf);
507 break;
508
509 default:
510 break;
511 }
512 kgenAddStmt(ctx, tmp);
513 }
514 }
515
516 kgenAddBlankLine(ctx);
517 }
518