1 /*
2
3 BLIS
4 An object-based framework for developing high-performance BLAS-like
5 libraries.
6
7 Copyright (C) 2014, The University of Texas at Austin
8
9 Redistribution and use in source and binary forms, with or without
10 modification, are permitted provided that the following conditions are
11 met:
12 - Redistributions of source code must retain the above copyright
13 notice, this list of conditions and the following disclaimer.
14 - Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in the
16 documentation and/or other materials provided with the distribution.
17 - Neither the name(s) of the copyright holder(s) nor the names of its
18 contributors may be used to endorse or promote products derived
19 from this software without specific prior written permission.
20
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32
33 */
34
35 #include "blis.h"
36
bli_blksz_create_ed(dim_t b_s,dim_t be_s,dim_t b_d,dim_t be_d,dim_t b_c,dim_t be_c,dim_t b_z,dim_t be_z)37 blksz_t* bli_blksz_create_ed
38 (
39 dim_t b_s, dim_t be_s,
40 dim_t b_d, dim_t be_d,
41 dim_t b_c, dim_t be_c,
42 dim_t b_z, dim_t be_z
43 )
44 {
45 blksz_t* b = bli_malloc_intl( sizeof( blksz_t ) );
46
47 bli_blksz_init_ed
48 (
49 b,
50 b_s, be_s,
51 b_d, be_d,
52 b_c, be_c,
53 b_z, be_z
54 );
55
56 return b;
57 }
58
bli_blksz_create(dim_t b_s,dim_t b_d,dim_t b_c,dim_t b_z,dim_t be_s,dim_t be_d,dim_t be_c,dim_t be_z)59 blksz_t* bli_blksz_create
60 (
61 dim_t b_s, dim_t b_d, dim_t b_c, dim_t b_z,
62 dim_t be_s, dim_t be_d, dim_t be_c, dim_t be_z
63 )
64 {
65 blksz_t* b = bli_malloc_intl( sizeof( blksz_t ) );
66
67 bli_blksz_init
68 (
69 b,
70 b_s, b_d, b_c, b_z,
71 be_s, be_d, be_c, be_z
72 );
73
74 return b;
75 }
76
bli_blksz_init_ed(blksz_t * b,dim_t b_s,dim_t be_s,dim_t b_d,dim_t be_d,dim_t b_c,dim_t be_c,dim_t b_z,dim_t be_z)77 void bli_blksz_init_ed
78 (
79 blksz_t* b,
80 dim_t b_s, dim_t be_s,
81 dim_t b_d, dim_t be_d,
82 dim_t b_c, dim_t be_c,
83 dim_t b_z, dim_t be_z
84 )
85 {
86 b->v[BLIS_FLOAT] = b_s;
87 b->v[BLIS_DOUBLE] = b_d;
88 b->v[BLIS_SCOMPLEX] = b_c;
89 b->v[BLIS_DCOMPLEX] = b_z;
90
91 b->e[BLIS_FLOAT] = be_s;
92 b->e[BLIS_DOUBLE] = be_d;
93 b->e[BLIS_SCOMPLEX] = be_c;
94 b->e[BLIS_DCOMPLEX] = be_z;
95 }
96
bli_blksz_init(blksz_t * b,dim_t b_s,dim_t b_d,dim_t b_c,dim_t b_z,dim_t be_s,dim_t be_d,dim_t be_c,dim_t be_z)97 void bli_blksz_init
98 (
99 blksz_t* b,
100 dim_t b_s, dim_t b_d, dim_t b_c, dim_t b_z,
101 dim_t be_s, dim_t be_d, dim_t be_c, dim_t be_z
102 )
103 {
104 b->v[BLIS_FLOAT] = b_s;
105 b->v[BLIS_DOUBLE] = b_d;
106 b->v[BLIS_SCOMPLEX] = b_c;
107 b->v[BLIS_DCOMPLEX] = b_z;
108
109 b->e[BLIS_FLOAT] = be_s;
110 b->e[BLIS_DOUBLE] = be_d;
111 b->e[BLIS_SCOMPLEX] = be_c;
112 b->e[BLIS_DCOMPLEX] = be_z;
113 }
114
bli_blksz_init_easy(blksz_t * b,dim_t b_s,dim_t b_d,dim_t b_c,dim_t b_z)115 void bli_blksz_init_easy
116 (
117 blksz_t* b,
118 dim_t b_s, dim_t b_d, dim_t b_c, dim_t b_z
119 )
120 {
121 b->v[BLIS_FLOAT] = b->e[BLIS_FLOAT] = b_s;
122 b->v[BLIS_DOUBLE] = b->e[BLIS_DOUBLE] = b_d;
123 b->v[BLIS_SCOMPLEX] = b->e[BLIS_SCOMPLEX] = b_c;
124 b->v[BLIS_DCOMPLEX] = b->e[BLIS_DCOMPLEX] = b_z;
125 }
126
bli_blksz_free(blksz_t * b)127 void bli_blksz_free
128 (
129 blksz_t* b
130 )
131 {
132 bli_free_intl( b );
133 }
134
135 // -----------------------------------------------------------------------------
136
137 #if 0
138 void bli_blksz_reduce_dt_to
139 (
140 num_t dt_bm, blksz_t* bmult,
141 num_t dt_bs, blksz_t* blksz
142 )
143 {
144 dim_t blksz_def = bli_blksz_get_def( dt_bs, blksz );
145 dim_t blksz_max = bli_blksz_get_max( dt_bs, blksz );
146
147 dim_t bmult_val = bli_blksz_get_def( dt_bm, bmult );
148
149 // If the blocksize multiple is zero, we do nothing.
150 if ( bmult_val == 0 ) return;
151
152 // Round the default and maximum blocksize values down to their
153 // respective nearest multiples of bmult_val. (Notice that we
154 // ignore the "max" entry in the bmult object since that would
155 // correspond to the packing dimension, which plays no role
156 // as a blocksize multiple.)
157 blksz_def = ( blksz_def / bmult_val ) * bmult_val;
158 blksz_max = ( blksz_max / bmult_val ) * bmult_val;
159
160 // Make sure the new blocksize values are at least the blocksize
161 // multiple.
162 if ( blksz_def == 0 ) blksz_def = bmult_val;
163 if ( blksz_max == 0 ) blksz_max = bmult_val;
164
165 // Store the new blocksizes back to the object.
166 bli_blksz_set_def( blksz_def, dt_bs, blksz );
167 bli_blksz_set_max( blksz_max, dt_bs, blksz );
168 }
169 #endif
170
171 // -----------------------------------------------------------------------------
172
bli_blksz_reduce_def_to(num_t dt_bm,blksz_t * bmult,num_t dt_bs,blksz_t * blksz)173 void bli_blksz_reduce_def_to
174 (
175 num_t dt_bm, blksz_t* bmult,
176 num_t dt_bs, blksz_t* blksz
177 )
178 {
179 dim_t blksz_def = bli_blksz_get_def( dt_bs, blksz );
180
181 dim_t bmult_val = bli_blksz_get_def( dt_bm, bmult );
182
183 // If the blocksize multiple is zero, we do nothing.
184 if ( bmult_val == 0 ) return;
185
186 // Round the default and maximum blocksize values down to their
187 // respective nearest multiples of bmult_val. (Notice that we
188 // ignore the "max" entry in the bmult object since that would
189 // correspond to the packing dimension, which plays no role
190 // as a blocksize multiple.)
191 blksz_def = ( blksz_def / bmult_val ) * bmult_val;
192
193 // Make sure the new blocksize values are at least the blocksize
194 // multiple.
195 if ( blksz_def == 0 ) blksz_def = bmult_val;
196
197 // Store the new blocksizes back to the object.
198 bli_blksz_set_def( blksz_def, dt_bs, blksz );
199 }
200
201 // -----------------------------------------------------------------------------
202
bli_blksz_reduce_max_to(num_t dt_bm,blksz_t * bmult,num_t dt_bs,blksz_t * blksz)203 void bli_blksz_reduce_max_to
204 (
205 num_t dt_bm, blksz_t* bmult,
206 num_t dt_bs, blksz_t* blksz
207 )
208 {
209 dim_t blksz_max = bli_blksz_get_max( dt_bs, blksz );
210
211 dim_t bmult_val = bli_blksz_get_def( dt_bm, bmult );
212
213 // If the blocksize multiple is zero, we do nothing.
214 if ( bmult_val == 0 ) return;
215
216 // Round the blocksize values down to its nearest multiple of
217 // of bmult_val. (Notice that we ignore the "max" entry in the
218 // bmult object since that would correspond to the packing
219 // dimension, which plays no role as a blocksize multiple.)
220 blksz_max = ( blksz_max / bmult_val ) * bmult_val;
221
222 // Make sure the new blocksize value is at least the blocksize
223 // multiple.
224 if ( blksz_max == 0 ) blksz_max = bmult_val;
225
226 // Store the new blocksize back to the object.
227 bli_blksz_set_max( blksz_max, dt_bs, blksz );
228 }
229
230 // -----------------------------------------------------------------------------
231
bli_determine_blocksize(dir_t direct,dim_t i,dim_t dim,obj_t * obj,bszid_t bszid,cntx_t * cntx)232 dim_t bli_determine_blocksize
233 (
234 dir_t direct,
235 dim_t i,
236 dim_t dim,
237 obj_t* obj,
238 bszid_t bszid,
239 cntx_t* cntx
240 )
241 {
242 if ( direct == BLIS_FWD )
243 return bli_determine_blocksize_f( i, dim, obj, bszid, cntx );
244 else
245 return bli_determine_blocksize_b( i, dim, obj, bszid, cntx );
246 }
247
bli_determine_blocksize_f(dim_t i,dim_t dim,obj_t * obj,bszid_t bszid,cntx_t * cntx)248 dim_t bli_determine_blocksize_f
249 (
250 dim_t i,
251 dim_t dim,
252 obj_t* obj,
253 bszid_t bszid,
254 cntx_t* cntx
255 )
256 {
257 num_t dt;
258 blksz_t* bsize;
259 dim_t b_alg, b_max;
260 dim_t b_use;
261
262 // Extract the execution datatype and use it to query the corresponding
263 // blocksize and blocksize maximum values from the blksz_t object.
264 dt = bli_obj_exec_dt( obj );
265 bsize = bli_cntx_get_blksz( bszid, cntx );
266 b_alg = bli_blksz_get_def( dt, bsize );
267 b_max = bli_blksz_get_max( dt, bsize );
268
269 b_use = bli_determine_blocksize_f_sub( i, dim, b_alg, b_max );
270
271 return b_use;
272 }
273
bli_determine_blocksize_b(dim_t i,dim_t dim,obj_t * obj,bszid_t bszid,cntx_t * cntx)274 dim_t bli_determine_blocksize_b
275 (
276 dim_t i,
277 dim_t dim,
278 obj_t* obj,
279 bszid_t bszid,
280 cntx_t* cntx
281 )
282 {
283 num_t dt;
284 blksz_t* bsize;
285 dim_t b_alg, b_max;
286 dim_t b_use;
287
288 // Extract the execution datatype and use it to query the corresponding
289 // blocksize and blocksize maximum values from the blksz_t object.
290 dt = bli_obj_exec_dt( obj );
291 bsize = bli_cntx_get_blksz( bszid, cntx );
292 b_alg = bli_blksz_get_def( dt, bsize );
293 b_max = bli_blksz_get_max( dt, bsize );
294
295 b_use = bli_determine_blocksize_b_sub( i, dim, b_alg, b_max );
296
297 return b_use;
298 }
299
bli_determine_blocksize_f_sub(dim_t i,dim_t dim,dim_t b_alg,dim_t b_max)300 dim_t bli_determine_blocksize_f_sub
301 (
302 dim_t i,
303 dim_t dim,
304 dim_t b_alg,
305 dim_t b_max
306 )
307 {
308 dim_t b_now;
309 dim_t dim_left_now;
310
311 // We assume that this function is being called from an algorithm that
312 // is moving "forward" (ie: top to bottom, left to right, top-left
313 // to bottom-right).
314
315 // Compute how much of the matrix dimension is left, including the
316 // chunk that will correspond to the blocksize we are computing now.
317 dim_left_now = dim - i;
318
319 // If the dimension currently remaining is less than the maximum
320 // blocksize, use it instead of the default blocksize b_alg.
321 // Otherwise, use b_alg.
322 if ( dim_left_now <= b_max )
323 {
324 b_now = dim_left_now;
325 }
326 else
327 {
328 b_now = b_alg;
329 }
330
331 return b_now;
332 }
333
bli_determine_blocksize_b_sub(dim_t i,dim_t dim,dim_t b_alg,dim_t b_max)334 dim_t bli_determine_blocksize_b_sub
335 (
336 dim_t i,
337 dim_t dim,
338 dim_t b_alg,
339 dim_t b_max
340 )
341 {
342 dim_t b_now;
343 dim_t dim_left_now;
344 dim_t dim_at_edge;
345
346 // We assume that this function is being called from an algorithm that
347 // is moving "backward" (ie: bottom to top, right to left, bottom-right
348 // to top-left).
349
350 // Compute how much of the matrix dimension is left, including the
351 // chunk that will correspond to the blocksize we are computing now.
352 dim_left_now = dim - i;
353
354 // Sanity check: if dim_left_now is zero, then we can return zero
355 // without going any further.
356 if ( dim_left_now == 0 )
357 return 0;
358
359 dim_at_edge = dim_left_now % b_alg;
360
361 // If dim_left_now is a multiple of b_alg, we can safely return b_alg
362 // without going any further.
363 if ( dim_at_edge == 0 )
364 return b_alg;
365
366 // If the dimension currently remaining is less than the maximum
367 // blocksize, use it as the chosen blocksize. If this is not the case,
368 // then we know dim_left_now is greater than the maximum blocksize.
369 // To determine how much of it we should use for the current blocksize,
370 // we inspect dim_at_edge; if it is smaller than (or equal to) b_max -
371 // b_alg, then we use b_alg + dim_at_edge. Otherwise, dim_at_edge is
372 // greater than b_max - b_alg, in which case we use dim_at_edge.
373 if ( dim_left_now <= b_max )
374 {
375 b_now = dim_left_now;
376 }
377 else // if ( dim_left_now > b_max )
378 {
379 if ( dim_at_edge <= b_max - b_alg )
380 {
381 b_now = b_alg + dim_at_edge;
382 }
383 else // if ( dim_at_edge > b_max - b_alg )
384 {
385 b_now = dim_at_edge;
386 }
387 }
388
389 return b_now;
390 }
391
392