1 /* ************************************************************************
2 * Copyright 2013 Advanced Micro Devices, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 * ************************************************************************/
16
17
18 /*
19 * This module contains implementation of API for checking
20 * decompositions and calculate granularity
21 */
22
23 #include <sys/types.h>
24 #include <assert.h>
25 #include <clblas_stddef.h>
26
27 #include "blas_kgen.h"
28
29 static __inline bool
checkSizeStepRelation(size_t size,size_t step)30 checkSizeStepRelation(size_t size, size_t step)
31 {
32 return ((size == SUBDIM_UNUSED) ||
33 (size && (size % step == 0)));
34 }
35
36 bool
decompSanityCheck(const SubproblemDim * subdims,unsigned int minSize,unsigned int maxSize,unsigned int maxRegs,DataType dtype,bool wholeA)37 decompSanityCheck(
38 const SubproblemDim *subdims,
39 unsigned int minSize,
40 unsigned int maxSize,
41 unsigned int maxRegs,
42 DataType dtype,
43 bool wholeA)
44 {
45 bool ret;
46
47 if( 0 == subdims[0].x ||
48 0 == subdims[0].y ||
49 0 == subdims[0].bwidth ||
50 0 == subdims[1].x ||
51 0 == subdims[1].y ||
52 0 == subdims[1].bwidth ){
53
54 return false;
55 }
56
57 if ( ((subdims[1].x < minSize) ||(subdims[1].x > maxSize)) ||
58 ((subdims[1].y < minSize) || (subdims[1].y > maxSize)) ||
59 ((subdims[1].bwidth < minSize) || (subdims[1].bwidth > maxSize)) ) {
60
61 return false;
62 }
63
64 // the group block must consist of integer number of subgroup blocks
65 if( subdims[0].x % subdims[1].itemX ||
66 subdims[0].y % subdims[1].itemY ||
67 subdims[0].bwidth % subdims[1].bwidth ){
68
69 return false;
70 }
71
72 ret = checkSizeStepRelation(subdims[0].itemX, subdims[0].x);
73 ret = ret && checkSizeStepRelation(subdims[0].itemY, subdims[0].y);
74 ret = ret && checkSizeStepRelation(subdims[1].itemX, subdims[1].x);
75 ret = ret && checkSizeStepRelation(subdims[1].itemY, subdims[1].y);
76 if (ret) {
77 size_t regUse;
78 size_t regsA;
79
80 if (wholeA) {
81 regsA = subdims[1].y * subdims[1].bwidth;
82 }
83 else {
84 regsA = szmax(subdims[1].y, subdims[1].bwidth);
85 }
86
87 // estimate register usage, drop
88 // inevitably slowed decompositions
89 regUse =
90 ( regsA +
91 subdims[1].bwidth * subdims[1].x +
92 subdims[1].x * subdims[1].y ) *
93 dtypeSize(dtype);
94
95 regUse /= 16; // 16 bytes per register
96 ret = (regUse <= maxRegs);
97 }
98
99 return ret;
100 }
101
102 void
calcPgranDedicated(PGranularity * pgran,const SubproblemDim * subdims,int xdim,int level)103 calcPgranDedicated(
104 PGranularity *pgran,
105 const SubproblemDim *subdims,
106 int xdim,
107 int level)
108 {
109 unsigned int xg, yg;
110
111 DUMMY_ARG_USAGE(level);
112
113 assert((xdim >= -1) && (xdim <= 1));
114
115 xg = (unsigned int)(subdims[0].x / subdims[1].itemX);
116 yg = (unsigned int)(subdims[0].y / subdims[1].itemY);
117 if (xdim == -1) {
118 pgran->wgSize[0] = xg * yg;
119 pgran->wgSize[1] = 1;
120 pgran->wgDim = 1;
121 }
122 else {
123 pgran->wgSize[xdim] = xg;
124 pgran->wgSize[1 - xdim] = yg;
125 pgran->wgDim = 2;
126 }
127 }
128
129 void
calcPgranCooperative(PGranularity * pgran,const SubproblemDim * subdims,int xdim,int ydim,int level)130 calcPgranCooperative(
131 PGranularity *pgran,
132 const SubproblemDim *subdims,
133 int xdim,
134 int ydim,
135 int level)
136 {
137 unsigned int xg, yg;
138
139 DUMMY_ARG_USAGE(level);
140
141 assert((xdim >= 0) && (xdim <= 2));
142 assert((ydim >= 0) && (ydim <= 2));
143 assert((xdim && ydim) && (!xdim && !ydim));
144 assert(!( ((xdim == 2) && (ydim == 0)) ||
145 ((ydim == 2) && (xdim == 0)) ));
146
147 xg = (unsigned int)(subdims[0].x / subdims[1].itemX);
148 yg = (unsigned int)(subdims[0].y / subdims[1].itemY);
149 if (xdim == ydim) {
150 pgran->wgSize[xdim] = xg * yg;
151 }
152 else {
153 pgran->wgSize[xdim] = xg;
154 pgran->wgSize[ydim] = yg;
155 }
156
157 if ((xdim > 0) || (ydim > 0)) {
158 pgran->wgSize[0] = (unsigned int)(subdims[0].bwidth / subdims[1].bwidth);
159 }
160
161 pgran->wgDim = umax(xdim, ydim) + 1;
162 }
163
164