1 /*
2 
3    BLIS
4    An object-based framework for developing high-performance BLAS-like
5    libraries.
6 
7    Copyright (C) 2014, The University of Texas at Austin
8 
9    Redistribution and use in source and binary forms, with or without
10    modification, are permitted provided that the following conditions are
11    met:
12     - Redistributions of source code must retain the above copyright
13       notice, this list of conditions and the following disclaimer.
14     - Redistributions in binary form must reproduce the above copyright
15       notice, this list of conditions and the following disclaimer in the
16       documentation and/or other materials provided with the distribution.
17     - Neither the name(s) of the copyright holder(s) nor the names of its
18       contributors may be used to endorse or promote products derived
19       from this software without specific prior written permission.
20 
21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25    HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 
33 */
34 
35 #include "blis.h"
36 
bli_blksz_create_ed(dim_t b_s,dim_t be_s,dim_t b_d,dim_t be_d,dim_t b_c,dim_t be_c,dim_t b_z,dim_t be_z)37 blksz_t* bli_blksz_create_ed
38      (
39        dim_t b_s, dim_t be_s,
40        dim_t b_d, dim_t be_d,
41        dim_t b_c, dim_t be_c,
42        dim_t b_z, dim_t be_z
43      )
44 {
45 	blksz_t* b = bli_malloc_intl( sizeof( blksz_t ) );
46 
47 	bli_blksz_init_ed
48 	(
49 	  b,
50 	  b_s, be_s,
51 	  b_d, be_d,
52 	  b_c, be_c,
53 	  b_z, be_z
54 	);
55 
56 	return b;
57 }
58 
bli_blksz_create(dim_t b_s,dim_t b_d,dim_t b_c,dim_t b_z,dim_t be_s,dim_t be_d,dim_t be_c,dim_t be_z)59 blksz_t* bli_blksz_create
60      (
61        dim_t b_s,  dim_t b_d,  dim_t b_c,  dim_t b_z,
62        dim_t be_s, dim_t be_d, dim_t be_c, dim_t be_z
63      )
64 {
65 	blksz_t* b = bli_malloc_intl( sizeof( blksz_t ) );
66 
67 	bli_blksz_init
68 	(
69 	  b,
70 	  b_s,  b_d,  b_c,  b_z,
71 	  be_s, be_d, be_c, be_z
72 	);
73 
74 	return b;
75 }
76 
bli_blksz_init_ed(blksz_t * b,dim_t b_s,dim_t be_s,dim_t b_d,dim_t be_d,dim_t b_c,dim_t be_c,dim_t b_z,dim_t be_z)77 void bli_blksz_init_ed
78      (
79        blksz_t* b,
80        dim_t b_s, dim_t be_s,
81        dim_t b_d, dim_t be_d,
82        dim_t b_c, dim_t be_c,
83        dim_t b_z, dim_t be_z
84      )
85 {
86 	b->v[BLIS_FLOAT]    = b_s;
87 	b->v[BLIS_DOUBLE]   = b_d;
88 	b->v[BLIS_SCOMPLEX] = b_c;
89 	b->v[BLIS_DCOMPLEX] = b_z;
90 
91 	b->e[BLIS_FLOAT]    = be_s;
92 	b->e[BLIS_DOUBLE]   = be_d;
93 	b->e[BLIS_SCOMPLEX] = be_c;
94 	b->e[BLIS_DCOMPLEX] = be_z;
95 }
96 
bli_blksz_init(blksz_t * b,dim_t b_s,dim_t b_d,dim_t b_c,dim_t b_z,dim_t be_s,dim_t be_d,dim_t be_c,dim_t be_z)97 void bli_blksz_init
98      (
99        blksz_t* b,
100        dim_t b_s,  dim_t b_d,  dim_t b_c,  dim_t b_z,
101        dim_t be_s, dim_t be_d, dim_t be_c, dim_t be_z
102      )
103 {
104 	b->v[BLIS_FLOAT]    = b_s;
105 	b->v[BLIS_DOUBLE]   = b_d;
106 	b->v[BLIS_SCOMPLEX] = b_c;
107 	b->v[BLIS_DCOMPLEX] = b_z;
108 
109 	b->e[BLIS_FLOAT]    = be_s;
110 	b->e[BLIS_DOUBLE]   = be_d;
111 	b->e[BLIS_SCOMPLEX] = be_c;
112 	b->e[BLIS_DCOMPLEX] = be_z;
113 }
114 
bli_blksz_init_easy(blksz_t * b,dim_t b_s,dim_t b_d,dim_t b_c,dim_t b_z)115 void bli_blksz_init_easy
116      (
117        blksz_t* b,
118        dim_t b_s,  dim_t b_d,  dim_t b_c,  dim_t b_z
119      )
120 {
121 	b->v[BLIS_FLOAT]    = b->e[BLIS_FLOAT]    = b_s;
122 	b->v[BLIS_DOUBLE]   = b->e[BLIS_DOUBLE]   = b_d;
123 	b->v[BLIS_SCOMPLEX] = b->e[BLIS_SCOMPLEX] = b_c;
124 	b->v[BLIS_DCOMPLEX] = b->e[BLIS_DCOMPLEX] = b_z;
125 }
126 
bli_blksz_free(blksz_t * b)127 void bli_blksz_free
128      (
129        blksz_t* b
130      )
131 {
132 	bli_free_intl( b );
133 }
134 
135 // -----------------------------------------------------------------------------
136 
137 #if 0
138 void bli_blksz_reduce_dt_to
139      (
140        num_t dt_bm, blksz_t* bmult,
141        num_t dt_bs, blksz_t* blksz
142      )
143 {
144 	dim_t blksz_def = bli_blksz_get_def( dt_bs, blksz );
145 	dim_t blksz_max = bli_blksz_get_max( dt_bs, blksz );
146 
147 	dim_t bmult_val = bli_blksz_get_def( dt_bm, bmult );
148 
149 	// If the blocksize multiple is zero, we do nothing.
150 	if ( bmult_val == 0 ) return;
151 
152 	// Round the default and maximum blocksize values down to their
153 	// respective nearest multiples of bmult_val. (Notice that we
154 	// ignore the "max" entry in the bmult object since that would
155 	// correspond to the packing dimension, which plays no role
156 	// as a blocksize multiple.)
157 	blksz_def = ( blksz_def / bmult_val ) * bmult_val;
158 	blksz_max = ( blksz_max / bmult_val ) * bmult_val;
159 
160 	// Make sure the new blocksize values are at least the blocksize
161 	// multiple.
162 	if ( blksz_def == 0 ) blksz_def = bmult_val;
163 	if ( blksz_max == 0 ) blksz_max = bmult_val;
164 
165 	// Store the new blocksizes back to the object.
166 	bli_blksz_set_def( blksz_def, dt_bs, blksz );
167 	bli_blksz_set_max( blksz_max, dt_bs, blksz );
168 }
169 #endif
170 
171 // -----------------------------------------------------------------------------
172 
bli_blksz_reduce_def_to(num_t dt_bm,blksz_t * bmult,num_t dt_bs,blksz_t * blksz)173 void bli_blksz_reduce_def_to
174      (
175        num_t dt_bm, blksz_t* bmult,
176        num_t dt_bs, blksz_t* blksz
177      )
178 {
179 	dim_t blksz_def = bli_blksz_get_def( dt_bs, blksz );
180 
181 	dim_t bmult_val = bli_blksz_get_def( dt_bm, bmult );
182 
183 	// If the blocksize multiple is zero, we do nothing.
184 	if ( bmult_val == 0 ) return;
185 
186 	// Round the default and maximum blocksize values down to their
187 	// respective nearest multiples of bmult_val. (Notice that we
188 	// ignore the "max" entry in the bmult object since that would
189 	// correspond to the packing dimension, which plays no role
190 	// as a blocksize multiple.)
191 	blksz_def = ( blksz_def / bmult_val ) * bmult_val;
192 
193 	// Make sure the new blocksize values are at least the blocksize
194 	// multiple.
195 	if ( blksz_def == 0 ) blksz_def = bmult_val;
196 
197 	// Store the new blocksizes back to the object.
198 	bli_blksz_set_def( blksz_def, dt_bs, blksz );
199 }
200 
201 // -----------------------------------------------------------------------------
202 
bli_blksz_reduce_max_to(num_t dt_bm,blksz_t * bmult,num_t dt_bs,blksz_t * blksz)203 void bli_blksz_reduce_max_to
204      (
205        num_t dt_bm, blksz_t* bmult,
206        num_t dt_bs, blksz_t* blksz
207      )
208 {
209 	dim_t blksz_max = bli_blksz_get_max( dt_bs, blksz );
210 
211 	dim_t bmult_val = bli_blksz_get_def( dt_bm, bmult );
212 
213 	// If the blocksize multiple is zero, we do nothing.
214 	if ( bmult_val == 0 ) return;
215 
216 	// Round the blocksize values down to its nearest multiple of
217 	// of bmult_val. (Notice that we ignore the "max" entry in the
218 	// bmult object since that would correspond to the packing
219 	// dimension, which plays no role as a blocksize multiple.)
220 	blksz_max = ( blksz_max / bmult_val ) * bmult_val;
221 
222 	// Make sure the new blocksize value is at least the blocksize
223 	// multiple.
224 	if ( blksz_max == 0 ) blksz_max = bmult_val;
225 
226 	// Store the new blocksize back to the object.
227 	bli_blksz_set_max( blksz_max, dt_bs, blksz );
228 }
229 
230 // -----------------------------------------------------------------------------
231 
bli_determine_blocksize(dir_t direct,dim_t i,dim_t dim,obj_t * obj,bszid_t bszid,cntx_t * cntx)232 dim_t bli_determine_blocksize
233      (
234        dir_t   direct,
235        dim_t   i,
236        dim_t   dim,
237        obj_t*  obj,
238        bszid_t bszid,
239        cntx_t* cntx
240      )
241 {
242 	if ( direct == BLIS_FWD )
243 		return bli_determine_blocksize_f( i, dim, obj, bszid, cntx );
244 	else
245 		return bli_determine_blocksize_b( i, dim, obj, bszid, cntx );
246 }
247 
bli_determine_blocksize_f(dim_t i,dim_t dim,obj_t * obj,bszid_t bszid,cntx_t * cntx)248 dim_t bli_determine_blocksize_f
249      (
250        dim_t   i,
251        dim_t   dim,
252        obj_t*  obj,
253        bszid_t bszid,
254        cntx_t* cntx
255      )
256 {
257 	num_t    dt;
258 	blksz_t* bsize;
259 	dim_t    b_alg, b_max;
260 	dim_t    b_use;
261 
262 	// Extract the execution datatype and use it to query the corresponding
263 	// blocksize and blocksize maximum values from the blksz_t object.
264 	dt    = bli_obj_exec_dt( obj );
265 	bsize = bli_cntx_get_blksz( bszid, cntx );
266 	b_alg = bli_blksz_get_def( dt, bsize );
267 	b_max = bli_blksz_get_max( dt, bsize );
268 
269 	b_use = bli_determine_blocksize_f_sub( i, dim, b_alg, b_max );
270 
271 	return b_use;
272 }
273 
bli_determine_blocksize_b(dim_t i,dim_t dim,obj_t * obj,bszid_t bszid,cntx_t * cntx)274 dim_t bli_determine_blocksize_b
275      (
276        dim_t   i,
277        dim_t   dim,
278        obj_t*  obj,
279        bszid_t bszid,
280        cntx_t* cntx
281      )
282 {
283 	num_t    dt;
284 	blksz_t* bsize;
285 	dim_t    b_alg, b_max;
286 	dim_t    b_use;
287 
288 	// Extract the execution datatype and use it to query the corresponding
289 	// blocksize and blocksize maximum values from the blksz_t object.
290 	dt    = bli_obj_exec_dt( obj );
291 	bsize = bli_cntx_get_blksz( bszid, cntx );
292 	b_alg = bli_blksz_get_def( dt, bsize );
293 	b_max = bli_blksz_get_max( dt, bsize );
294 
295 	b_use = bli_determine_blocksize_b_sub( i, dim, b_alg, b_max );
296 
297 	return b_use;
298 }
299 
bli_determine_blocksize_f_sub(dim_t i,dim_t dim,dim_t b_alg,dim_t b_max)300 dim_t bli_determine_blocksize_f_sub
301      (
302        dim_t  i,
303        dim_t  dim,
304        dim_t  b_alg,
305        dim_t  b_max
306      )
307 {
308 	dim_t b_now;
309 	dim_t dim_left_now;
310 
311 	// We assume that this function is being called from an algorithm that
312 	// is moving "forward" (ie: top to bottom, left to right, top-left
313 	// to bottom-right).
314 
315 	// Compute how much of the matrix dimension is left, including the
316 	// chunk that will correspond to the blocksize we are computing now.
317 	dim_left_now = dim - i;
318 
319 	// If the dimension currently remaining is less than the maximum
320 	// blocksize, use it instead of the default blocksize b_alg.
321 	// Otherwise, use b_alg.
322 	if ( dim_left_now <= b_max )
323 	{
324 		b_now = dim_left_now;
325 	}
326 	else
327 	{
328 		b_now = b_alg;
329 	}
330 
331 	return b_now;
332 }
333 
bli_determine_blocksize_b_sub(dim_t i,dim_t dim,dim_t b_alg,dim_t b_max)334 dim_t bli_determine_blocksize_b_sub
335      (
336        dim_t  i,
337        dim_t  dim,
338        dim_t  b_alg,
339        dim_t  b_max
340      )
341 {
342 	dim_t b_now;
343 	dim_t dim_left_now;
344 	dim_t dim_at_edge;
345 
346 	// We assume that this function is being called from an algorithm that
347 	// is moving "backward" (ie: bottom to top, right to left, bottom-right
348 	// to top-left).
349 
350 	// Compute how much of the matrix dimension is left, including the
351 	// chunk that will correspond to the blocksize we are computing now.
352 	dim_left_now = dim - i;
353 
354 	// Sanity check: if dim_left_now is zero, then we can return zero
355 	// without going any further.
356 	if ( dim_left_now == 0 )
357 		return 0;
358 
359 	dim_at_edge = dim_left_now % b_alg;
360 
361 	// If dim_left_now is a multiple of b_alg, we can safely return b_alg
362 	// without going any further.
363 	if ( dim_at_edge == 0 )
364 		return b_alg;
365 
366 	// If the dimension currently remaining is less than the maximum
367 	// blocksize, use it as the chosen blocksize. If this is not the case,
368 	// then we know dim_left_now is greater than the maximum blocksize.
369 	// To determine how much of it we should use for the current blocksize,
370 	// we inspect dim_at_edge; if it is smaller than (or equal to) b_max -
371 	// b_alg, then we use b_alg + dim_at_edge. Otherwise, dim_at_edge is
372 	// greater than b_max - b_alg, in which case we use dim_at_edge.
373 	if ( dim_left_now <= b_max )
374 	{
375 		b_now = dim_left_now;
376 	}
377 	else // if ( dim_left_now > b_max )
378 	{
379 		if ( dim_at_edge <= b_max - b_alg )
380 		{
381 			b_now = b_alg + dim_at_edge;
382 		}
383 		else // if ( dim_at_edge > b_max - b_alg )
384 		{
385 			b_now = dim_at_edge;
386 		}
387 	}
388 
389 	return b_now;
390 }
391 
392