1 /*
2 
3    BLIS
4    An object-based framework for developing high-performance BLAS-like
5    libraries.
6 
7    Copyright (C) 2014, The University of Texas at Austin
8 
9    Redistribution and use in source and binary forms, with or without
10    modification, are permitted provided that the following conditions are
11    met:
12     - Redistributions of source code must retain the above copyright
13       notice, this list of conditions and the following disclaimer.
14     - Redistributions in binary form must reproduce the above copyright
15       notice, this list of conditions and the following disclaimer in the
16       documentation and/or other materials provided with the distribution.
17     - Neither the name(s) of the copyright holder(s) nor the names of its
18       contributors may be used to endorse or promote products derived
19       from this software without specific prior written permission.
20 
21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25    HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 
33 */
34 
35 // blksz_t query
36 
bli_blksz_get_def(num_t dt,blksz_t * b)37 BLIS_INLINE dim_t bli_blksz_get_def
38      (
39        num_t    dt,
40        blksz_t* b
41      )
42 {
43 	return b->v[ dt ];
44 }
45 
bli_blksz_get_max(num_t dt,blksz_t * b)46 BLIS_INLINE dim_t bli_blksz_get_max
47      (
48        num_t    dt,
49        blksz_t* b
50      )
51 {
52 	return b->e[ dt ];
53 }
54 
55 
56 // blksz_t modification
57 
bli_blksz_set_def(dim_t val,num_t dt,blksz_t * b)58 BLIS_INLINE void bli_blksz_set_def
59      (
60        dim_t    val,
61        num_t    dt,
62        blksz_t* b
63      )
64 {
65 	b->v[ dt ] = val;
66 }
67 
bli_blksz_set_max(dim_t val,num_t dt,blksz_t * b)68 BLIS_INLINE void bli_blksz_set_max
69      (
70        dim_t    val,
71        num_t    dt,
72        blksz_t* b
73      )
74 {
75 	b->e[ dt ] = val;
76 }
77 
bli_blksz_copy(blksz_t * b_src,blksz_t * b_dst)78 BLIS_INLINE void bli_blksz_copy
79      (
80        blksz_t* b_src,
81        blksz_t* b_dst
82      )
83 {
84 	*b_dst = *b_src;
85 }
86 
bli_blksz_copy_if_pos(blksz_t * b_src,blksz_t * b_dst)87 BLIS_INLINE void bli_blksz_copy_if_pos
88      (
89        blksz_t* b_src,
90        blksz_t* b_dst
91      )
92 {
93 	// Copy the blocksize values over to b_dst one-by-one so that
94 	// we can skip the ones that are non-positive.
95 
96 	const dim_t v_s = bli_blksz_get_def( BLIS_FLOAT,    b_src );
97 	const dim_t v_d = bli_blksz_get_def( BLIS_DOUBLE,   b_src );
98 	const dim_t v_c = bli_blksz_get_def( BLIS_SCOMPLEX, b_src );
99 	const dim_t v_z = bli_blksz_get_def( BLIS_DCOMPLEX, b_src );
100 
101 	const dim_t e_s = bli_blksz_get_max( BLIS_FLOAT,    b_src );
102 	const dim_t e_d = bli_blksz_get_max( BLIS_DOUBLE,   b_src );
103 	const dim_t e_c = bli_blksz_get_max( BLIS_SCOMPLEX, b_src );
104 	const dim_t e_z = bli_blksz_get_max( BLIS_DCOMPLEX, b_src );
105 
106 	if ( v_s > 0 ) bli_blksz_set_def( v_s, BLIS_FLOAT,    b_dst );
107 	if ( v_d > 0 ) bli_blksz_set_def( v_d, BLIS_DOUBLE,   b_dst );
108 	if ( v_c > 0 ) bli_blksz_set_def( v_c, BLIS_SCOMPLEX, b_dst );
109 	if ( v_z > 0 ) bli_blksz_set_def( v_z, BLIS_DCOMPLEX, b_dst );
110 
111 	if ( e_s > 0 ) bli_blksz_set_max( e_s, BLIS_FLOAT,    b_dst );
112 	if ( e_d > 0 ) bli_blksz_set_max( e_d, BLIS_DOUBLE,   b_dst );
113 	if ( e_c > 0 ) bli_blksz_set_max( e_c, BLIS_SCOMPLEX, b_dst );
114 	if ( e_z > 0 ) bli_blksz_set_max( e_z, BLIS_DCOMPLEX, b_dst );
115 }
116 
bli_blksz_copy_def_dt(num_t dt_src,blksz_t * b_src,num_t dt_dst,blksz_t * b_dst)117 BLIS_INLINE void bli_blksz_copy_def_dt
118      (
119        num_t dt_src, blksz_t* b_src,
120        num_t dt_dst, blksz_t* b_dst
121      )
122 {
123 	const dim_t val = bli_blksz_get_def( dt_src, b_src );
124 
125 	bli_blksz_set_def( val, dt_dst, b_dst );
126 }
127 
bli_blksz_copy_max_dt(num_t dt_src,blksz_t * b_src,num_t dt_dst,blksz_t * b_dst)128 BLIS_INLINE void bli_blksz_copy_max_dt
129      (
130        num_t dt_src, blksz_t* b_src,
131        num_t dt_dst, blksz_t* b_dst
132      )
133 {
134 	const dim_t val = bli_blksz_get_max( dt_src, b_src );
135 
136 	bli_blksz_set_max( val, dt_dst, b_dst );
137 }
138 
bli_blksz_copy_dt(num_t dt_src,blksz_t * b_src,num_t dt_dst,blksz_t * b_dst)139 BLIS_INLINE void bli_blksz_copy_dt
140      (
141        num_t dt_src, blksz_t* b_src,
142        num_t dt_dst, blksz_t* b_dst
143      )
144 {
145 	bli_blksz_copy_def_dt( dt_src, b_src, dt_dst, b_dst );
146 	bli_blksz_copy_max_dt( dt_src, b_src, dt_dst, b_dst );
147 }
148 
bli_blksz_scale_def(dim_t num,dim_t den,num_t dt,blksz_t * b)149 BLIS_INLINE void bli_blksz_scale_def
150      (
151        dim_t    num,
152        dim_t    den,
153        num_t    dt,
154        blksz_t* b
155      )
156 {
157 	const dim_t val = bli_blksz_get_def( dt, b );
158 
159 	bli_blksz_set_def( ( val * num ) / den, dt, b );
160 }
161 
bli_blksz_scale_max(dim_t num,dim_t den,num_t dt,blksz_t * b)162 BLIS_INLINE void bli_blksz_scale_max
163      (
164        dim_t    num,
165        dim_t    den,
166        num_t    dt,
167        blksz_t* b
168      )
169 {
170 	const dim_t val = bli_blksz_get_max( dt, b );
171 
172 	bli_blksz_set_max( ( val * num ) / den, dt, b );
173 }
174 
bli_blksz_scale_def_max(dim_t num,dim_t den,num_t dt,blksz_t * b)175 BLIS_INLINE void bli_blksz_scale_def_max
176      (
177        dim_t    num,
178        dim_t    den,
179        num_t    dt,
180        blksz_t* b
181      )
182 {
183 	bli_blksz_scale_def( num, den, dt, b );
184 	bli_blksz_scale_max( num, den, dt, b );
185 }
186 
187 // -----------------------------------------------------------------------------
188 
189 BLIS_EXPORT_BLIS blksz_t* bli_blksz_create_ed
190      (
191        dim_t b_s, dim_t be_s,
192        dim_t b_d, dim_t be_d,
193        dim_t b_c, dim_t be_c,
194        dim_t b_z, dim_t be_z
195      );
196 
197 BLIS_EXPORT_BLIS blksz_t* bli_blksz_create
198      (
199        dim_t b_s,  dim_t b_d,  dim_t b_c,  dim_t b_z,
200        dim_t be_s, dim_t be_d, dim_t be_c, dim_t be_z
201      );
202 
203 BLIS_EXPORT_BLIS void bli_blksz_init_ed
204      (
205        blksz_t* b,
206        dim_t    b_s, dim_t be_s,
207        dim_t    b_d, dim_t be_d,
208        dim_t    b_c, dim_t be_c,
209        dim_t    b_z, dim_t be_z
210      );
211 
212 BLIS_EXPORT_BLIS void bli_blksz_init
213      (
214        blksz_t* b,
215        dim_t b_s,  dim_t b_d,  dim_t b_c,  dim_t b_z,
216        dim_t be_s, dim_t be_d, dim_t be_c, dim_t be_z
217      );
218 
219 BLIS_EXPORT_BLIS void bli_blksz_init_easy
220      (
221        blksz_t* b,
222        dim_t b_s,  dim_t b_d,  dim_t b_c,  dim_t b_z
223      );
224 
225 BLIS_EXPORT_BLIS void bli_blksz_free
226      (
227        blksz_t* b
228      );
229 
230 // -----------------------------------------------------------------------------
231 
232 #if 0
233 BLIS_EXPORT_BLIS void bli_blksz_reduce_dt_to
234      (
235        num_t dt_bm, blksz_t* bmult,
236        num_t dt_bs, blksz_t* blksz
237      );
238 #endif
239 
240 void bli_blksz_reduce_def_to
241      (
242        num_t dt_bm, blksz_t* bmult,
243        num_t dt_bs, blksz_t* blksz
244      );
245 
246 void bli_blksz_reduce_max_to
247      (
248        num_t dt_bm, blksz_t* bmult,
249        num_t dt_bs, blksz_t* blksz
250      );
251 // -----------------------------------------------------------------------------
252 
253 dim_t bli_determine_blocksize
254      (
255        dir_t   direct,
256        dim_t   i,
257        dim_t   dim,
258        obj_t*  obj,
259        bszid_t bszid,
260        cntx_t* cntx
261      );
262 
263 dim_t bli_determine_blocksize_f
264      (
265        dim_t   i,
266        dim_t   dim,
267        obj_t*  obj,
268        bszid_t bszid,
269        cntx_t* cntx
270      );
271 
272 dim_t bli_determine_blocksize_b
273      (
274        dim_t   i,
275        dim_t   dim,
276        obj_t*  obj,
277        bszid_t bszid,
278        cntx_t* cntx
279      );
280 
281 dim_t bli_determine_blocksize_f_sub
282      (
283        dim_t  i,
284        dim_t  dim,
285        dim_t  b_alg,
286        dim_t  b_max
287      );
288 
289 dim_t bli_determine_blocksize_b_sub
290      (
291        dim_t  i,
292        dim_t  dim,
293        dim_t  b_alg,
294        dim_t  b_max
295      );
296 
297