1 /* 2 3 BLIS 4 An object-based framework for developing high-performance BLAS-like 5 libraries. 6 7 Copyright (C) 2014, The University of Texas at Austin 8 9 Redistribution and use in source and binary forms, with or without 10 modification, are permitted provided that the following conditions are 11 met: 12 - Redistributions of source code must retain the above copyright 13 notice, this list of conditions and the following disclaimer. 14 - Redistributions in binary form must reproduce the above copyright 15 notice, this list of conditions and the following disclaimer in the 16 documentation and/or other materials provided with the distribution. 17 - Neither the name(s) of the copyright holder(s) nor the names of its 18 contributors may be used to endorse or promote products derived 19 from this software without specific prior written permission. 20 21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 33 */ 34 35 // Guard the function definitions so that they are only compiled when 36 // #included from files that define the typed API macros. 37 #ifdef BLIS_ENABLE_TAPI 38 39 // 40 // Define BLAS-like interfaces with typed operands. 41 // 42 43 #undef GENTFUNC 44 #define GENTFUNC( ctype, ch, opname, auxker ) \ 45 \ 46 void PASTEMAC2(ch,opname,EX_SUF) \ 47 ( \ 48 doff_t diagoffx, \ 49 diag_t diagx, \ 50 uplo_t uplox, \ 51 trans_t transx, \ 52 dim_t m, \ 53 dim_t n, \ 54 ctype* x, inc_t rs_x, inc_t cs_x, \ 55 ctype* y, inc_t rs_y, inc_t cs_y \ 56 BLIS_TAPI_EX_PARAMS \ 57 ) \ 58 { \ 59 bli_init_once(); \ 60 \ 61 BLIS_TAPI_EX_DECLS \ 62 \ 63 if ( bli_zero_dim2( m, n ) ) return; \ 64 \ 65 /* Obtain a valid context from the gks if necessary. */ \ 66 if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ 67 \ 68 /* Invoke the helper variant, which loops over the appropriate kernel 69 to implement the current operation. */ \ 70 PASTEMAC2(ch,opname,_unb_var1) \ 71 ( \ 72 diagoffx, \ 73 diagx, \ 74 uplox, \ 75 transx, \ 76 m, \ 77 n, \ 78 x, rs_x, cs_x, \ 79 y, rs_y, cs_y, \ 80 cntx, \ 81 rntm \ 82 ); \ 83 \ 84 /* When the diagonal of an upper- or lower-stored matrix is unit, 85 we handle it with a separate post-processing step. */ \ 86 if ( bli_is_upper_or_lower( uplox ) && \ 87 bli_is_unit_diag( diagx ) ) \ 88 { \ 89 PASTEMAC2(ch,auxker,BLIS_TAPI_EX_SUF) \ 90 ( \ 91 diagoffx, \ 92 diagx, \ 93 transx, \ 94 m, \ 95 n, \ 96 x, rs_x, cs_x, \ 97 y, rs_y, cs_y, \ 98 cntx, \ 99 rntm \ 100 ); \ 101 } \ 102 } 103 104 INSERT_GENTFUNC_BASIC( addm, addd ) 105 INSERT_GENTFUNC_BASIC( subm, subd ) 106 107 108 #undef GENTFUNC 109 #define GENTFUNC( ctype, ch, opname ) \ 110 \ 111 void PASTEMAC2(ch,opname,EX_SUF) \ 112 ( \ 113 doff_t diagoffx, \ 114 diag_t diagx, \ 115 uplo_t uplox, \ 116 trans_t transx, \ 117 dim_t m, \ 118 dim_t n, \ 119 ctype* x, inc_t rs_x, inc_t cs_x, \ 120 ctype* y, inc_t rs_y, inc_t cs_y \ 121 BLIS_TAPI_EX_PARAMS \ 122 ) \ 123 { \ 124 bli_init_once(); \ 125 \ 126 BLIS_TAPI_EX_DECLS \ 127 \ 128 if ( bli_zero_dim2( m, n ) ) return; \ 129 \ 130 /* Obtain a valid context from the gks if necessary. */ \ 131 if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ 132 \ 133 /* Invoke the helper variant, which loops over the appropriate kernel 134 to implement the current operation. */ \ 135 PASTEMAC2(ch,opname,_unb_var1) \ 136 ( \ 137 diagoffx, \ 138 diagx, \ 139 uplox, \ 140 transx, \ 141 m, \ 142 n, \ 143 x, rs_x, cs_x, \ 144 y, rs_y, cs_y, \ 145 cntx, \ 146 rntm \ 147 ); \ 148 \ 149 /* When the diagonal of an upper- or lower-stored matrix is unit, 150 we handle it with a separate post-processing step. */ \ 151 if ( bli_is_upper_or_lower( uplox ) && \ 152 bli_is_unit_diag( diagx ) ) \ 153 { \ 154 doff_t diagoffy = diagoffx; \ 155 ctype* one = PASTEMAC(ch,1); \ 156 \ 157 if ( bli_does_trans( transx ) ) \ 158 bli_negate_diag_offset( &diagoffy ); \ 159 \ 160 PASTEMAC2(ch,setd,BLIS_TAPI_EX_SUF) \ 161 ( \ 162 BLIS_NO_CONJUGATE, \ 163 diagoffy, \ 164 m, \ 165 n, \ 166 one, \ 167 y, rs_y, cs_y, \ 168 cntx, \ 169 rntm \ 170 ); \ 171 } \ 172 } 173 174 INSERT_GENTFUNC_BASIC0( copym ) 175 176 177 #undef GENTFUNC 178 #define GENTFUNC( ctype, ch, opname ) \ 179 \ 180 void PASTEMAC2(ch,opname,EX_SUF) \ 181 ( \ 182 doff_t diagoffx, \ 183 diag_t diagx, \ 184 uplo_t uplox, \ 185 trans_t transx, \ 186 dim_t m, \ 187 dim_t n, \ 188 ctype* alpha, \ 189 ctype* x, inc_t rs_x, inc_t cs_x, \ 190 ctype* y, inc_t rs_y, inc_t cs_y \ 191 BLIS_TAPI_EX_PARAMS \ 192 ) \ 193 { \ 194 bli_init_once(); \ 195 \ 196 BLIS_TAPI_EX_DECLS \ 197 \ 198 if ( bli_zero_dim2( m, n ) ) return; \ 199 \ 200 /* If alpha is zero, then the entire operation is a no-op. */ \ 201 if ( PASTEMAC(ch,eq0)( *alpha ) ) return; \ 202 \ 203 /* Obtain a valid context from the gks if necessary. */ \ 204 if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ 205 \ 206 /* Invoke the helper variant, which loops over the appropriate kernel 207 to implement the current operation. */ \ 208 PASTEMAC2(ch,opname,_unb_var1) \ 209 ( \ 210 diagoffx, \ 211 diagx, \ 212 uplox, \ 213 transx, \ 214 m, \ 215 n, \ 216 alpha, \ 217 x, rs_x, cs_x, \ 218 y, rs_y, cs_y, \ 219 cntx, \ 220 rntm \ 221 ); \ 222 \ 223 /* When the diagonal of an upper- or lower-stored matrix is unit, 224 we handle it with a separate post-processing step. */ \ 225 if ( bli_is_upper_or_lower( uplox ) && \ 226 bli_is_unit_diag( diagx ) ) \ 227 { \ 228 PASTEMAC2(ch,axpyd,BLIS_TAPI_EX_SUF) \ 229 ( \ 230 diagoffx, \ 231 diagx, \ 232 transx, \ 233 m, \ 234 n, \ 235 alpha, \ 236 x, rs_x, cs_x, \ 237 y, rs_y, cs_y, \ 238 cntx, \ 239 rntm \ 240 ); \ 241 } \ 242 } 243 244 INSERT_GENTFUNC_BASIC0( axpym ) 245 246 247 #undef GENTFUNC 248 #define GENTFUNC( ctype, ch, opname ) \ 249 \ 250 void PASTEMAC2(ch,opname,EX_SUF) \ 251 ( \ 252 doff_t diagoffx, \ 253 diag_t diagx, \ 254 uplo_t uplox, \ 255 trans_t transx, \ 256 dim_t m, \ 257 dim_t n, \ 258 ctype* alpha, \ 259 ctype* x, inc_t rs_x, inc_t cs_x, \ 260 ctype* y, inc_t rs_y, inc_t cs_y \ 261 BLIS_TAPI_EX_PARAMS \ 262 ) \ 263 { \ 264 bli_init_once(); \ 265 \ 266 BLIS_TAPI_EX_DECLS \ 267 \ 268 if ( bli_zero_dim2( m, n ) ) return; \ 269 \ 270 /* Obtain a valid context from the gks if necessary. */ \ 271 if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ 272 \ 273 /* If alpha is zero, then we set the output matrix to zero. This 274 seemingly minor optimization is important because it will clear 275 any NaNs and Infs in x that would otherwise propogate. */ \ 276 if ( PASTEMAC(ch,eq0)( *alpha ) ) \ 277 { \ 278 \ 279 PASTEMAC2(ch,setm,BLIS_TAPI_EX_SUF) \ 280 ( \ 281 BLIS_NO_CONJUGATE, \ 282 diagoffx, \ 283 diagx, \ 284 uplox, \ 285 m, \ 286 n, \ 287 alpha, \ 288 y, rs_y, cs_y, \ 289 cntx, \ 290 rntm \ 291 ); \ 292 return; \ 293 } \ 294 \ 295 /* Invoke the helper variant, which loops over the appropriate kernel 296 to implement the current operation. */ \ 297 PASTEMAC2(ch,opname,_unb_var1) \ 298 ( \ 299 diagoffx, \ 300 diagx, \ 301 uplox, \ 302 transx, \ 303 m, \ 304 n, \ 305 alpha, \ 306 x, rs_x, cs_x, \ 307 y, rs_y, cs_y, \ 308 cntx, \ 309 rntm \ 310 ); \ 311 \ 312 /* When the diagonal of an upper- or lower-stored matrix is unit, 313 we handle it with a separate post-processing step. */ \ 314 if ( bli_is_upper_or_lower( uplox ) && \ 315 bli_is_unit_diag( diagx ) ) \ 316 { \ 317 doff_t diagoffy = diagoffx; \ 318 \ 319 if ( bli_does_trans( transx ) ) \ 320 bli_negate_diag_offset( &diagoffy ); \ 321 \ 322 PASTEMAC2(ch,setd,BLIS_TAPI_EX_SUF) \ 323 ( \ 324 BLIS_NO_CONJUGATE, \ 325 diagoffy, \ 326 m, \ 327 n, \ 328 alpha, \ 329 y, rs_y, cs_y, \ 330 cntx, \ 331 rntm \ 332 ); \ 333 } \ 334 } 335 336 INSERT_GENTFUNC_BASIC0( scal2m ) 337 338 339 #undef GENTFUNC 340 #define GENTFUNC( ctype, ch, opname ) \ 341 \ 342 void PASTEMAC2(ch,opname,EX_SUF) \ 343 ( \ 344 conj_t conjalpha, \ 345 doff_t diagoffx, \ 346 diag_t diagx, \ 347 uplo_t uplox, \ 348 dim_t m, \ 349 dim_t n, \ 350 ctype* alpha, \ 351 ctype* x, inc_t rs_x, inc_t cs_x \ 352 BLIS_TAPI_EX_PARAMS \ 353 ) \ 354 { \ 355 bli_init_once(); \ 356 \ 357 BLIS_TAPI_EX_DECLS \ 358 \ 359 if ( bli_zero_dim2( m, n ) ) return; \ 360 \ 361 /* Obtain a valid context from the gks if necessary. */ \ 362 if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ 363 \ 364 /* Invoke the helper variant, which loops over the appropriate kernel 365 to implement the current operation. */ \ 366 PASTEMAC2(ch,opname,_unb_var1) \ 367 ( \ 368 conjalpha, \ 369 diagoffx, \ 370 diagx, \ 371 uplox, \ 372 m, \ 373 n, \ 374 alpha, \ 375 x, rs_x, cs_x, \ 376 cntx, \ 377 rntm \ 378 ); \ 379 } 380 381 INSERT_GENTFUNC_BASIC0( scalm ) 382 INSERT_GENTFUNC_BASIC0( setm ) 383 384 385 #undef GENTFUNC 386 #define GENTFUNC( ctype, ch, opname ) \ 387 \ 388 void PASTEMAC2(ch,opname,EX_SUF) \ 389 ( \ 390 doff_t diagoffx, \ 391 diag_t diagx, \ 392 uplo_t uplox, \ 393 trans_t transx, \ 394 dim_t m, \ 395 dim_t n, \ 396 ctype* x, inc_t rs_x, inc_t cs_x, \ 397 ctype* beta, \ 398 ctype* y, inc_t rs_y, inc_t cs_y \ 399 BLIS_TAPI_EX_PARAMS \ 400 ) \ 401 { \ 402 bli_init_once(); \ 403 \ 404 BLIS_TAPI_EX_DECLS \ 405 \ 406 if ( bli_zero_dim2( m, n ) ) return; \ 407 \ 408 /* Obtain a valid context from the gks if necessary. */ \ 409 if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ 410 \ 411 /* If beta is zero, then the operation reduces to copym. */ \ 412 if ( PASTEMAC(ch,eq0)( *beta ) ) \ 413 { \ 414 PASTEMAC2(ch,copym,_unb_var1) \ 415 ( \ 416 diagoffx, \ 417 diagx, \ 418 uplox, \ 419 transx, \ 420 m, \ 421 n, \ 422 x, rs_x, cs_x, \ 423 y, rs_y, cs_y, \ 424 cntx, \ 425 rntm \ 426 ); \ 427 \ 428 return; \ 429 } \ 430 \ 431 /* Invoke the helper variant, which loops over the appropriate kernel 432 to implement the current operation. */ \ 433 PASTEMAC2(ch,opname,_unb_var1) \ 434 ( \ 435 diagoffx, \ 436 diagx, \ 437 uplox, \ 438 transx, \ 439 m, \ 440 n, \ 441 x, rs_x, cs_x, \ 442 beta, \ 443 y, rs_y, cs_y, \ 444 cntx, \ 445 rntm \ 446 ); \ 447 \ 448 /* When the diagonal of an upper- or lower-stored matrix is unit, 449 we handle it with a separate post-processing step. */ \ 450 if ( bli_is_upper_or_lower( uplox ) && \ 451 bli_is_unit_diag( diagx ) ) \ 452 { \ 453 PASTEMAC2(ch,xpbyd,BLIS_TAPI_EX_SUF) \ 454 ( \ 455 diagoffx, \ 456 diagx, \ 457 transx, \ 458 m, \ 459 n, \ 460 x, rs_x, cs_x, \ 461 beta, \ 462 y, rs_y, cs_y, \ 463 cntx, \ 464 rntm \ 465 ); \ 466 } \ 467 } 468 469 INSERT_GENTFUNC_BASIC0( xpbym ) 470 471 472 #undef GENTFUNC2 473 #define GENTFUNC2( ctype_x, ctype_y, chx, chy, opname ) \ 474 \ 475 void PASTEMAC3(chx,chy,opname,EX_SUF) \ 476 ( \ 477 doff_t diagoffx, \ 478 diag_t diagx, \ 479 uplo_t uplox, \ 480 trans_t transx, \ 481 dim_t m, \ 482 dim_t n, \ 483 ctype_x* x, inc_t rs_x, inc_t cs_x, \ 484 ctype_y* beta, \ 485 ctype_y* y, inc_t rs_y, inc_t cs_y \ 486 BLIS_TAPI_EX_PARAMS \ 487 ) \ 488 { \ 489 bli_init_once(); \ 490 \ 491 BLIS_TAPI_EX_DECLS \ 492 \ 493 if ( bli_zero_dim2( m, n ) ) return; \ 494 \ 495 /* Obtain a valid context from the gks if necessary. */ \ 496 if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ 497 \ 498 /* If beta is zero, then the operation reduces to copym. */ \ 499 if ( PASTEMAC(chy,eq0)( *beta ) ) \ 500 { \ 501 PASTEMAC2(chx,chy,castm) \ 502 ( \ 503 transx, \ 504 m, \ 505 n, \ 506 x, rs_x, cs_x, \ 507 y, rs_y, cs_y \ 508 ); \ 509 \ 510 return; \ 511 } \ 512 \ 513 /* Invoke the helper variant, which loops over the appropriate kernel 514 to implement the current operation. */ \ 515 PASTEMAC3(chx,chy,opname,_unb_var1) \ 516 ( \ 517 diagoffx, \ 518 diagx, \ 519 uplox, \ 520 transx, \ 521 m, \ 522 n, \ 523 x, rs_x, cs_x, \ 524 beta, \ 525 y, rs_y, cs_y, \ 526 cntx, \ 527 rntm \ 528 ); \ 529 } 530 531 INSERT_GENTFUNC2_BASIC0( xpbym_md ) 532 INSERT_GENTFUNC2_MIXDP0( xpbym_md ) 533 534 535 #endif 536 537