1 /*
2 
3    BLIS
4    An object-based framework for developing high-performance BLAS-like
5    libraries.
6 
7    Copyright (C) 2014, The University of Texas at Austin
8 
9    Redistribution and use in source and binary forms, with or without
10    modification, are permitted provided that the following conditions are
11    met:
12     - Redistributions of source code must retain the above copyright
13       notice, this list of conditions and the following disclaimer.
14     - Redistributions in binary form must reproduce the above copyright
15       notice, this list of conditions and the following disclaimer in the
16       documentation and/or other materials provided with the distribution.
17     - Neither the name(s) of the copyright holder(s) nor the names of its
18       contributors may be used to endorse or promote products derived
19       from this software without specific prior written permission.
20 
21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25    HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 
33 */
34 
35 // Guard the function definitions so that they are only compiled when
36 // #included from files that define the typed API macros.
37 #ifdef BLIS_ENABLE_TAPI
38 
39 //
40 // Define BLAS-like interfaces with typed operands.
41 //
42 
43 #undef  GENTFUNC
44 #define GENTFUNC( ctype, ch, opname, auxker ) \
45 \
46 void PASTEMAC2(ch,opname,EX_SUF) \
47      ( \
48        doff_t  diagoffx, \
49        diag_t  diagx, \
50        uplo_t  uplox, \
51        trans_t transx, \
52        dim_t   m, \
53        dim_t   n, \
54        ctype*  x, inc_t rs_x, inc_t cs_x, \
55        ctype*  y, inc_t rs_y, inc_t cs_y  \
56        BLIS_TAPI_EX_PARAMS  \
57      ) \
58 { \
59 	bli_init_once(); \
60 \
61 	BLIS_TAPI_EX_DECLS \
62 \
63 	if ( bli_zero_dim2( m, n ) ) return; \
64 \
65 	/* Obtain a valid context from the gks if necessary. */ \
66 	if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \
67 \
68 	/* Invoke the helper variant, which loops over the appropriate kernel
69 	   to implement the current operation. */ \
70 	PASTEMAC2(ch,opname,_unb_var1) \
71 	( \
72 	  diagoffx, \
73 	  diagx, \
74 	  uplox, \
75 	  transx, \
76 	  m, \
77 	  n, \
78 	  x, rs_x, cs_x, \
79 	  y, rs_y, cs_y, \
80 	  cntx, \
81 	  rntm  \
82 	); \
83 \
84 	/* When the diagonal of an upper- or lower-stored matrix is unit,
85 	   we handle it with a separate post-processing step. */ \
86 	if ( bli_is_upper_or_lower( uplox ) && \
87 	     bli_is_unit_diag( diagx ) ) \
88 	{ \
89 		PASTEMAC2(ch,auxker,BLIS_TAPI_EX_SUF) \
90 		( \
91 		  diagoffx, \
92 		  diagx, \
93 		  transx, \
94 		  m, \
95 		  n, \
96 		  x, rs_x, cs_x, \
97 		  y, rs_y, cs_y, \
98 		  cntx, \
99 		  rntm  \
100 		); \
101 	} \
102 }
103 
104 INSERT_GENTFUNC_BASIC( addm, addd )
105 INSERT_GENTFUNC_BASIC( subm, subd )
106 
107 
108 #undef  GENTFUNC
109 #define GENTFUNC( ctype, ch, opname ) \
110 \
111 void PASTEMAC2(ch,opname,EX_SUF) \
112      ( \
113        doff_t  diagoffx, \
114        diag_t  diagx, \
115        uplo_t  uplox, \
116        trans_t transx, \
117        dim_t   m, \
118        dim_t   n, \
119        ctype*  x, inc_t rs_x, inc_t cs_x, \
120        ctype*  y, inc_t rs_y, inc_t cs_y  \
121        BLIS_TAPI_EX_PARAMS  \
122      ) \
123 { \
124 	bli_init_once(); \
125 \
126 	BLIS_TAPI_EX_DECLS \
127 \
128 	if ( bli_zero_dim2( m, n ) ) return; \
129 \
130 	/* Obtain a valid context from the gks if necessary. */ \
131 	if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \
132 \
133 	/* Invoke the helper variant, which loops over the appropriate kernel
134 	   to implement the current operation. */ \
135 	PASTEMAC2(ch,opname,_unb_var1) \
136 	( \
137 	  diagoffx, \
138 	  diagx, \
139 	  uplox, \
140 	  transx, \
141 	  m, \
142 	  n, \
143 	  x, rs_x, cs_x, \
144 	  y, rs_y, cs_y, \
145 	  cntx, \
146 	  rntm  \
147 	); \
148 \
149 	/* When the diagonal of an upper- or lower-stored matrix is unit,
150 	   we handle it with a separate post-processing step. */ \
151 	if ( bli_is_upper_or_lower( uplox ) && \
152 	     bli_is_unit_diag( diagx ) ) \
153 	{ \
154 		doff_t diagoffy = diagoffx; \
155 		ctype* one      = PASTEMAC(ch,1); \
156 \
157 		if ( bli_does_trans( transx ) ) \
158 			bli_negate_diag_offset( &diagoffy ); \
159 \
160 		PASTEMAC2(ch,setd,BLIS_TAPI_EX_SUF) \
161 		( \
162 		  BLIS_NO_CONJUGATE, \
163 		  diagoffy, \
164 		  m, \
165 		  n, \
166 		  one, \
167 		  y, rs_y, cs_y, \
168 		  cntx, \
169 		  rntm  \
170 		); \
171 	} \
172 }
173 
174 INSERT_GENTFUNC_BASIC0( copym )
175 
176 
177 #undef  GENTFUNC
178 #define GENTFUNC( ctype, ch, opname ) \
179 \
180 void PASTEMAC2(ch,opname,EX_SUF) \
181      ( \
182        doff_t  diagoffx, \
183        diag_t  diagx, \
184        uplo_t  uplox, \
185        trans_t transx, \
186        dim_t   m, \
187        dim_t   n, \
188        ctype*  alpha, \
189        ctype*  x, inc_t rs_x, inc_t cs_x, \
190        ctype*  y, inc_t rs_y, inc_t cs_y  \
191        BLIS_TAPI_EX_PARAMS  \
192      ) \
193 { \
194 	bli_init_once(); \
195 \
196 	BLIS_TAPI_EX_DECLS \
197 \
198 	if ( bli_zero_dim2( m, n ) ) return; \
199 \
200 	/* If alpha is zero, then the entire operation is a no-op. */ \
201 	if ( PASTEMAC(ch,eq0)( *alpha ) ) return; \
202 \
203 	/* Obtain a valid context from the gks if necessary. */ \
204 	if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \
205 \
206 	/* Invoke the helper variant, which loops over the appropriate kernel
207 	   to implement the current operation. */ \
208 	PASTEMAC2(ch,opname,_unb_var1) \
209 	( \
210 	  diagoffx, \
211 	  diagx, \
212 	  uplox, \
213 	  transx, \
214 	  m, \
215 	  n, \
216 	  alpha, \
217 	  x, rs_x, cs_x, \
218 	  y, rs_y, cs_y, \
219 	  cntx, \
220 	  rntm  \
221 	); \
222 \
223 	/* When the diagonal of an upper- or lower-stored matrix is unit,
224 	   we handle it with a separate post-processing step. */ \
225 	if ( bli_is_upper_or_lower( uplox ) && \
226 	     bli_is_unit_diag( diagx ) ) \
227 	{ \
228 		PASTEMAC2(ch,axpyd,BLIS_TAPI_EX_SUF) \
229 		( \
230 		  diagoffx, \
231 		  diagx, \
232 		  transx, \
233 		  m, \
234 		  n, \
235 		  alpha, \
236 		  x, rs_x, cs_x, \
237 		  y, rs_y, cs_y, \
238 		  cntx, \
239 		  rntm  \
240 		); \
241 	} \
242 }
243 
244 INSERT_GENTFUNC_BASIC0( axpym )
245 
246 
247 #undef  GENTFUNC
248 #define GENTFUNC( ctype, ch, opname ) \
249 \
250 void PASTEMAC2(ch,opname,EX_SUF) \
251      ( \
252        doff_t  diagoffx, \
253        diag_t  diagx, \
254        uplo_t  uplox, \
255        trans_t transx, \
256        dim_t   m, \
257        dim_t   n, \
258        ctype*  alpha, \
259        ctype*  x, inc_t rs_x, inc_t cs_x, \
260        ctype*  y, inc_t rs_y, inc_t cs_y  \
261        BLIS_TAPI_EX_PARAMS  \
262      ) \
263 { \
264 	bli_init_once(); \
265 \
266 	BLIS_TAPI_EX_DECLS \
267 \
268 	if ( bli_zero_dim2( m, n ) ) return; \
269 \
270 	/* Obtain a valid context from the gks if necessary. */ \
271 	if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \
272 \
273 	/* If alpha is zero, then we set the output matrix to zero. This
274 	   seemingly minor optimization is important because it will clear
275 	   any NaNs and Infs in x that would otherwise propogate. */ \
276 	if ( PASTEMAC(ch,eq0)( *alpha ) ) \
277 	{ \
278 \
279 		PASTEMAC2(ch,setm,BLIS_TAPI_EX_SUF) \
280 		( \
281 		  BLIS_NO_CONJUGATE, \
282 		  diagoffx, \
283 		  diagx, \
284 		  uplox, \
285 		  m, \
286 		  n, \
287 		  alpha, \
288 		  y, rs_y, cs_y, \
289 		  cntx, \
290 		  rntm  \
291 		); \
292 		return; \
293 	} \
294 \
295 	/* Invoke the helper variant, which loops over the appropriate kernel
296 	   to implement the current operation. */ \
297 	PASTEMAC2(ch,opname,_unb_var1) \
298 	( \
299 	  diagoffx, \
300 	  diagx, \
301 	  uplox, \
302 	  transx, \
303 	  m, \
304 	  n, \
305 	  alpha, \
306 	  x, rs_x, cs_x, \
307 	  y, rs_y, cs_y, \
308 	  cntx, \
309 	  rntm  \
310 	); \
311 \
312 	/* When the diagonal of an upper- or lower-stored matrix is unit,
313 	   we handle it with a separate post-processing step. */ \
314 	if ( bli_is_upper_or_lower( uplox ) && \
315 	     bli_is_unit_diag( diagx ) ) \
316 	{ \
317 		doff_t diagoffy = diagoffx; \
318 \
319 		if ( bli_does_trans( transx ) ) \
320 			bli_negate_diag_offset( &diagoffy ); \
321 \
322 		PASTEMAC2(ch,setd,BLIS_TAPI_EX_SUF) \
323 		( \
324 		  BLIS_NO_CONJUGATE, \
325 		  diagoffy, \
326 		  m, \
327 		  n, \
328 		  alpha, \
329 		  y, rs_y, cs_y, \
330 		  cntx, \
331 		  rntm  \
332 		); \
333 	} \
334 }
335 
336 INSERT_GENTFUNC_BASIC0( scal2m )
337 
338 
339 #undef  GENTFUNC
340 #define GENTFUNC( ctype, ch, opname ) \
341 \
342 void PASTEMAC2(ch,opname,EX_SUF) \
343      ( \
344        conj_t  conjalpha, \
345        doff_t  diagoffx, \
346        diag_t  diagx, \
347        uplo_t  uplox, \
348        dim_t   m, \
349        dim_t   n, \
350        ctype*  alpha, \
351        ctype*  x, inc_t rs_x, inc_t cs_x  \
352        BLIS_TAPI_EX_PARAMS  \
353      ) \
354 { \
355 	bli_init_once(); \
356 \
357 	BLIS_TAPI_EX_DECLS \
358 \
359 	if ( bli_zero_dim2( m, n ) ) return; \
360 \
361 	/* Obtain a valid context from the gks if necessary. */ \
362 	if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \
363 \
364 	/* Invoke the helper variant, which loops over the appropriate kernel
365 	   to implement the current operation. */ \
366 	PASTEMAC2(ch,opname,_unb_var1) \
367 	( \
368 	  conjalpha, \
369 	  diagoffx, \
370 	  diagx, \
371 	  uplox, \
372 	  m, \
373 	  n, \
374 	  alpha, \
375 	  x, rs_x, cs_x, \
376 	  cntx, \
377 	  rntm  \
378 	); \
379 }
380 
381 INSERT_GENTFUNC_BASIC0( scalm )
382 INSERT_GENTFUNC_BASIC0( setm )
383 
384 
385 #undef  GENTFUNC
386 #define GENTFUNC( ctype, ch, opname ) \
387 \
388 void PASTEMAC2(ch,opname,EX_SUF) \
389      ( \
390        doff_t  diagoffx, \
391        diag_t  diagx, \
392        uplo_t  uplox, \
393        trans_t transx, \
394        dim_t   m, \
395        dim_t   n, \
396        ctype*  x, inc_t rs_x, inc_t cs_x, \
397        ctype*  beta, \
398        ctype*  y, inc_t rs_y, inc_t cs_y  \
399        BLIS_TAPI_EX_PARAMS  \
400      ) \
401 { \
402 	bli_init_once(); \
403 \
404 	BLIS_TAPI_EX_DECLS \
405 \
406 	if ( bli_zero_dim2( m, n ) ) return; \
407 \
408 	/* Obtain a valid context from the gks if necessary. */ \
409 	if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \
410 \
411 	/* If beta is zero, then the operation reduces to copym. */ \
412 	if ( PASTEMAC(ch,eq0)( *beta ) ) \
413 	{ \
414 		PASTEMAC2(ch,copym,_unb_var1) \
415 		( \
416 		  diagoffx, \
417 		  diagx, \
418 		  uplox, \
419 		  transx, \
420 		  m, \
421 		  n, \
422 		  x, rs_x, cs_x, \
423 		  y, rs_y, cs_y, \
424 		  cntx, \
425 		  rntm  \
426 		); \
427 \
428 		return; \
429 	} \
430 \
431 	/* Invoke the helper variant, which loops over the appropriate kernel
432 	   to implement the current operation. */ \
433 	PASTEMAC2(ch,opname,_unb_var1) \
434 	( \
435 	  diagoffx, \
436 	  diagx, \
437 	  uplox, \
438 	  transx, \
439 	  m, \
440 	  n, \
441 	  x, rs_x, cs_x, \
442 	  beta, \
443 	  y, rs_y, cs_y, \
444 	  cntx, \
445 	  rntm  \
446 	); \
447 \
448 	/* When the diagonal of an upper- or lower-stored matrix is unit,
449 	   we handle it with a separate post-processing step. */ \
450 	if ( bli_is_upper_or_lower( uplox ) && \
451 	     bli_is_unit_diag( diagx ) ) \
452 	{ \
453 		PASTEMAC2(ch,xpbyd,BLIS_TAPI_EX_SUF) \
454 		( \
455 		  diagoffx, \
456 		  diagx, \
457 		  transx, \
458 		  m, \
459 		  n, \
460 		  x, rs_x, cs_x, \
461 		  beta, \
462 		  y, rs_y, cs_y, \
463 		  cntx, \
464 		  rntm  \
465 		); \
466 	} \
467 }
468 
469 INSERT_GENTFUNC_BASIC0( xpbym )
470 
471 
472 #undef  GENTFUNC2
473 #define GENTFUNC2( ctype_x, ctype_y, chx, chy, opname ) \
474 \
475 void PASTEMAC3(chx,chy,opname,EX_SUF) \
476      ( \
477        doff_t   diagoffx, \
478        diag_t   diagx, \
479        uplo_t   uplox, \
480        trans_t  transx, \
481        dim_t    m, \
482        dim_t    n, \
483        ctype_x* x, inc_t rs_x, inc_t cs_x, \
484        ctype_y* beta, \
485        ctype_y* y, inc_t rs_y, inc_t cs_y  \
486        BLIS_TAPI_EX_PARAMS  \
487      ) \
488 { \
489 	bli_init_once(); \
490 \
491 	BLIS_TAPI_EX_DECLS \
492 \
493 	if ( bli_zero_dim2( m, n ) ) return; \
494 \
495 	/* Obtain a valid context from the gks if necessary. */ \
496 	if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \
497 \
498 	/* If beta is zero, then the operation reduces to copym. */ \
499 	if ( PASTEMAC(chy,eq0)( *beta ) ) \
500 	{ \
501 		PASTEMAC2(chx,chy,castm) \
502 		( \
503 		  transx, \
504 		  m, \
505 		  n, \
506 		  x, rs_x, cs_x, \
507 		  y, rs_y, cs_y  \
508 		); \
509 \
510 		return; \
511 	} \
512 \
513 	/* Invoke the helper variant, which loops over the appropriate kernel
514 	   to implement the current operation. */ \
515 	PASTEMAC3(chx,chy,opname,_unb_var1) \
516 	( \
517 	  diagoffx, \
518 	  diagx, \
519 	  uplox, \
520 	  transx, \
521 	  m, \
522 	  n, \
523 	  x, rs_x, cs_x, \
524 	  beta, \
525 	  y, rs_y, cs_y, \
526 	  cntx, \
527 	  rntm  \
528 	); \
529 }
530 
531 INSERT_GENTFUNC2_BASIC0( xpbym_md )
532 INSERT_GENTFUNC2_MIXDP0( xpbym_md )
533 
534 
535 #endif
536 
537