1 /*
2
3 BLIS
4 An object-based framework for developing high-performance BLAS-like
5 libraries.
6
7 Copyright (C) 2014, The University of Texas at Austin
8
9 Redistribution and use in source and binary forms, with or without
10 modification, are permitted provided that the following conditions are
11 met:
12 - Redistributions of source code must retain the above copyright
13 notice, this list of conditions and the following disclaimer.
14 - Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in the
16 documentation and/or other materials provided with the distribution.
17 - Neither the name(s) of the copyright holder(s) nor the names of its
18 contributors may be used to endorse or promote products derived
19 from this software without specific prior written permission.
20
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32
33 */
34
35 // blksz_t query
36
bli_blksz_get_def(num_t dt,blksz_t * b)37 BLIS_INLINE dim_t bli_blksz_get_def
38 (
39 num_t dt,
40 blksz_t* b
41 )
42 {
43 return b->v[ dt ];
44 }
45
bli_blksz_get_max(num_t dt,blksz_t * b)46 BLIS_INLINE dim_t bli_blksz_get_max
47 (
48 num_t dt,
49 blksz_t* b
50 )
51 {
52 return b->e[ dt ];
53 }
54
55
56 // blksz_t modification
57
bli_blksz_set_def(dim_t val,num_t dt,blksz_t * b)58 BLIS_INLINE void bli_blksz_set_def
59 (
60 dim_t val,
61 num_t dt,
62 blksz_t* b
63 )
64 {
65 b->v[ dt ] = val;
66 }
67
bli_blksz_set_max(dim_t val,num_t dt,blksz_t * b)68 BLIS_INLINE void bli_blksz_set_max
69 (
70 dim_t val,
71 num_t dt,
72 blksz_t* b
73 )
74 {
75 b->e[ dt ] = val;
76 }
77
bli_blksz_copy(blksz_t * b_src,blksz_t * b_dst)78 BLIS_INLINE void bli_blksz_copy
79 (
80 blksz_t* b_src,
81 blksz_t* b_dst
82 )
83 {
84 *b_dst = *b_src;
85 }
86
bli_blksz_copy_if_pos(blksz_t * b_src,blksz_t * b_dst)87 BLIS_INLINE void bli_blksz_copy_if_pos
88 (
89 blksz_t* b_src,
90 blksz_t* b_dst
91 )
92 {
93 // Copy the blocksize values over to b_dst one-by-one so that
94 // we can skip the ones that are non-positive.
95
96 const dim_t v_s = bli_blksz_get_def( BLIS_FLOAT, b_src );
97 const dim_t v_d = bli_blksz_get_def( BLIS_DOUBLE, b_src );
98 const dim_t v_c = bli_blksz_get_def( BLIS_SCOMPLEX, b_src );
99 const dim_t v_z = bli_blksz_get_def( BLIS_DCOMPLEX, b_src );
100
101 const dim_t e_s = bli_blksz_get_max( BLIS_FLOAT, b_src );
102 const dim_t e_d = bli_blksz_get_max( BLIS_DOUBLE, b_src );
103 const dim_t e_c = bli_blksz_get_max( BLIS_SCOMPLEX, b_src );
104 const dim_t e_z = bli_blksz_get_max( BLIS_DCOMPLEX, b_src );
105
106 if ( v_s > 0 ) bli_blksz_set_def( v_s, BLIS_FLOAT, b_dst );
107 if ( v_d > 0 ) bli_blksz_set_def( v_d, BLIS_DOUBLE, b_dst );
108 if ( v_c > 0 ) bli_blksz_set_def( v_c, BLIS_SCOMPLEX, b_dst );
109 if ( v_z > 0 ) bli_blksz_set_def( v_z, BLIS_DCOMPLEX, b_dst );
110
111 if ( e_s > 0 ) bli_blksz_set_max( e_s, BLIS_FLOAT, b_dst );
112 if ( e_d > 0 ) bli_blksz_set_max( e_d, BLIS_DOUBLE, b_dst );
113 if ( e_c > 0 ) bli_blksz_set_max( e_c, BLIS_SCOMPLEX, b_dst );
114 if ( e_z > 0 ) bli_blksz_set_max( e_z, BLIS_DCOMPLEX, b_dst );
115 }
116
bli_blksz_copy_def_dt(num_t dt_src,blksz_t * b_src,num_t dt_dst,blksz_t * b_dst)117 BLIS_INLINE void bli_blksz_copy_def_dt
118 (
119 num_t dt_src, blksz_t* b_src,
120 num_t dt_dst, blksz_t* b_dst
121 )
122 {
123 const dim_t val = bli_blksz_get_def( dt_src, b_src );
124
125 bli_blksz_set_def( val, dt_dst, b_dst );
126 }
127
bli_blksz_copy_max_dt(num_t dt_src,blksz_t * b_src,num_t dt_dst,blksz_t * b_dst)128 BLIS_INLINE void bli_blksz_copy_max_dt
129 (
130 num_t dt_src, blksz_t* b_src,
131 num_t dt_dst, blksz_t* b_dst
132 )
133 {
134 const dim_t val = bli_blksz_get_max( dt_src, b_src );
135
136 bli_blksz_set_max( val, dt_dst, b_dst );
137 }
138
bli_blksz_copy_dt(num_t dt_src,blksz_t * b_src,num_t dt_dst,blksz_t * b_dst)139 BLIS_INLINE void bli_blksz_copy_dt
140 (
141 num_t dt_src, blksz_t* b_src,
142 num_t dt_dst, blksz_t* b_dst
143 )
144 {
145 bli_blksz_copy_def_dt( dt_src, b_src, dt_dst, b_dst );
146 bli_blksz_copy_max_dt( dt_src, b_src, dt_dst, b_dst );
147 }
148
bli_blksz_scale_def(dim_t num,dim_t den,num_t dt,blksz_t * b)149 BLIS_INLINE void bli_blksz_scale_def
150 (
151 dim_t num,
152 dim_t den,
153 num_t dt,
154 blksz_t* b
155 )
156 {
157 const dim_t val = bli_blksz_get_def( dt, b );
158
159 bli_blksz_set_def( ( val * num ) / den, dt, b );
160 }
161
bli_blksz_scale_max(dim_t num,dim_t den,num_t dt,blksz_t * b)162 BLIS_INLINE void bli_blksz_scale_max
163 (
164 dim_t num,
165 dim_t den,
166 num_t dt,
167 blksz_t* b
168 )
169 {
170 const dim_t val = bli_blksz_get_max( dt, b );
171
172 bli_blksz_set_max( ( val * num ) / den, dt, b );
173 }
174
bli_blksz_scale_def_max(dim_t num,dim_t den,num_t dt,blksz_t * b)175 BLIS_INLINE void bli_blksz_scale_def_max
176 (
177 dim_t num,
178 dim_t den,
179 num_t dt,
180 blksz_t* b
181 )
182 {
183 bli_blksz_scale_def( num, den, dt, b );
184 bli_blksz_scale_max( num, den, dt, b );
185 }
186
187 // -----------------------------------------------------------------------------
188
189 BLIS_EXPORT_BLIS blksz_t* bli_blksz_create_ed
190 (
191 dim_t b_s, dim_t be_s,
192 dim_t b_d, dim_t be_d,
193 dim_t b_c, dim_t be_c,
194 dim_t b_z, dim_t be_z
195 );
196
197 BLIS_EXPORT_BLIS blksz_t* bli_blksz_create
198 (
199 dim_t b_s, dim_t b_d, dim_t b_c, dim_t b_z,
200 dim_t be_s, dim_t be_d, dim_t be_c, dim_t be_z
201 );
202
203 BLIS_EXPORT_BLIS void bli_blksz_init_ed
204 (
205 blksz_t* b,
206 dim_t b_s, dim_t be_s,
207 dim_t b_d, dim_t be_d,
208 dim_t b_c, dim_t be_c,
209 dim_t b_z, dim_t be_z
210 );
211
212 BLIS_EXPORT_BLIS void bli_blksz_init
213 (
214 blksz_t* b,
215 dim_t b_s, dim_t b_d, dim_t b_c, dim_t b_z,
216 dim_t be_s, dim_t be_d, dim_t be_c, dim_t be_z
217 );
218
219 BLIS_EXPORT_BLIS void bli_blksz_init_easy
220 (
221 blksz_t* b,
222 dim_t b_s, dim_t b_d, dim_t b_c, dim_t b_z
223 );
224
225 BLIS_EXPORT_BLIS void bli_blksz_free
226 (
227 blksz_t* b
228 );
229
230 // -----------------------------------------------------------------------------
231
232 #if 0
233 BLIS_EXPORT_BLIS void bli_blksz_reduce_dt_to
234 (
235 num_t dt_bm, blksz_t* bmult,
236 num_t dt_bs, blksz_t* blksz
237 );
238 #endif
239
240 void bli_blksz_reduce_def_to
241 (
242 num_t dt_bm, blksz_t* bmult,
243 num_t dt_bs, blksz_t* blksz
244 );
245
246 void bli_blksz_reduce_max_to
247 (
248 num_t dt_bm, blksz_t* bmult,
249 num_t dt_bs, blksz_t* blksz
250 );
251 // -----------------------------------------------------------------------------
252
253 dim_t bli_determine_blocksize
254 (
255 dir_t direct,
256 dim_t i,
257 dim_t dim,
258 obj_t* obj,
259 bszid_t bszid,
260 cntx_t* cntx
261 );
262
263 dim_t bli_determine_blocksize_f
264 (
265 dim_t i,
266 dim_t dim,
267 obj_t* obj,
268 bszid_t bszid,
269 cntx_t* cntx
270 );
271
272 dim_t bli_determine_blocksize_b
273 (
274 dim_t i,
275 dim_t dim,
276 obj_t* obj,
277 bszid_t bszid,
278 cntx_t* cntx
279 );
280
281 dim_t bli_determine_blocksize_f_sub
282 (
283 dim_t i,
284 dim_t dim,
285 dim_t b_alg,
286 dim_t b_max
287 );
288
289 dim_t bli_determine_blocksize_b_sub
290 (
291 dim_t i,
292 dim_t dim,
293 dim_t b_alg,
294 dim_t b_max
295 );
296
297