1 /* ************************************************************************
2  * Copyright 2013 Advanced Micro Devices, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  * ************************************************************************/
16 
17 
18 /*
19  * This module contains implementation of API for checking
20  * decompositions and calculate granularity
21  */
22 
23 #include <sys/types.h>
24 #include <assert.h>
25 #include <clblas_stddef.h>
26 
27 #include "blas_kgen.h"
28 
29 static __inline bool
checkSizeStepRelation(size_t size,size_t step)30 checkSizeStepRelation(size_t size, size_t step)
31 {
32     return ((size == SUBDIM_UNUSED) ||
33             (size && (size % step == 0)));
34 }
35 
36 bool
decompSanityCheck(const SubproblemDim * subdims,unsigned int minSize,unsigned int maxSize,unsigned int maxRegs,DataType dtype,bool wholeA)37 decompSanityCheck(
38     const SubproblemDim *subdims,
39     unsigned int minSize,
40     unsigned int maxSize,
41     unsigned int maxRegs,
42     DataType dtype,
43     bool wholeA)
44 {
45     bool ret;
46 
47     if( 0 == subdims[0].x ||
48         0 == subdims[0].y ||
49         0 == subdims[0].bwidth ||
50         0 == subdims[1].x ||
51         0 == subdims[1].y ||
52         0 == subdims[1].bwidth ){
53 
54         return false;
55     }
56 
57     if ( ((subdims[1].x < minSize) ||(subdims[1].x > maxSize)) ||
58          ((subdims[1].y < minSize) || (subdims[1].y > maxSize)) ||
59          ((subdims[1].bwidth < minSize) || (subdims[1].bwidth > maxSize)) ) {
60 
61         return false;
62     }
63 
64     // the group block must consist of integer number of subgroup blocks
65     if( subdims[0].x % subdims[1].itemX ||
66         subdims[0].y % subdims[1].itemY ||
67         subdims[0].bwidth % subdims[1].bwidth ){
68 
69         return false;
70     }
71 
72     ret = checkSizeStepRelation(subdims[0].itemX, subdims[0].x);
73     ret = ret && checkSizeStepRelation(subdims[0].itemY, subdims[0].y);
74     ret = ret && checkSizeStepRelation(subdims[1].itemX, subdims[1].x);
75     ret = ret && checkSizeStepRelation(subdims[1].itemY, subdims[1].y);
76     if (ret) {
77         size_t regUse;
78         size_t regsA;
79 
80         if (wholeA) {
81             regsA = subdims[1].y * subdims[1].bwidth;
82         }
83         else {
84             regsA = szmax(subdims[1].y, subdims[1].bwidth);
85         }
86 
87         // estimate register usage, drop
88         // inevitably slowed decompositions
89         regUse =
90             ( regsA +
91               subdims[1].bwidth * subdims[1].x +
92               subdims[1].x * subdims[1].y ) *
93              dtypeSize(dtype);
94 
95         regUse /= 16; // 16 bytes per register
96         ret = (regUse <= maxRegs);
97     }
98 
99     return ret;
100 }
101 
102 void
calcPgranDedicated(PGranularity * pgran,const SubproblemDim * subdims,int xdim,int level)103 calcPgranDedicated(
104     PGranularity *pgran,
105     const SubproblemDim *subdims,
106     int xdim,
107     int level)
108 {
109     unsigned int xg, yg;
110 
111     DUMMY_ARG_USAGE(level);
112 
113     assert((xdim >= -1) && (xdim <= 1));
114 
115     xg = (unsigned int)(subdims[0].x / subdims[1].itemX);
116     yg = (unsigned int)(subdims[0].y / subdims[1].itemY);
117     if (xdim == -1) {
118         pgran->wgSize[0] = xg * yg;
119         pgran->wgSize[1] = 1;
120         pgran->wgDim = 1;
121     }
122     else {
123         pgran->wgSize[xdim] = xg;
124         pgran->wgSize[1 - xdim] = yg;
125         pgran->wgDim = 2;
126     }
127 }
128 
129 void
calcPgranCooperative(PGranularity * pgran,const SubproblemDim * subdims,int xdim,int ydim,int level)130 calcPgranCooperative(
131     PGranularity *pgran,
132     const SubproblemDim *subdims,
133     int xdim,
134     int ydim,
135     int level)
136 {
137     unsigned int xg, yg;
138 
139     DUMMY_ARG_USAGE(level);
140 
141     assert((xdim >= 0) && (xdim <= 2));
142     assert((ydim >= 0) && (ydim <= 2));
143     assert((xdim && ydim) && (!xdim && !ydim));
144     assert(!( ((xdim == 2) && (ydim == 0)) ||
145               ((ydim == 2) && (xdim == 0)) ));
146 
147     xg = (unsigned int)(subdims[0].x / subdims[1].itemX);
148     yg = (unsigned int)(subdims[0].y / subdims[1].itemY);
149     if (xdim == ydim) {
150         pgran->wgSize[xdim] = xg * yg;
151     }
152     else {
153         pgran->wgSize[xdim] = xg;
154         pgran->wgSize[ydim] = yg;
155     }
156 
157     if ((xdim > 0) || (ydim > 0)) {
158         pgran->wgSize[0] = (unsigned int)(subdims[0].bwidth / subdims[1].bwidth);
159     }
160 
161     pgran->wgDim = umax(xdim, ydim) + 1;
162 }
163 
164