1 /* ========================================================================== */ 2 /* === umf_config.h ========================================================= */ 3 /* ========================================================================== */ 4 #include "../../config.h" 5 6 /* -------------------------------------------------------------------------- */ 7 /* UMFPACK Version 4.4, Copyright (c) 2005 by Timothy A. Davis. CISE Dept, */ 8 /* Univ. of Florida. All Rights Reserved. See ../Doc/License for License. */ 9 /* web: http://www.cise.ufl.edu/research/sparse/umfpack */ 10 /* -------------------------------------------------------------------------- */ 11 12 /* 13 This file controls the compile-time configuration of UMFPACK. Modify the 14 Makefile, the architecture-dependent Make.* file, and this file if 15 necessary, to control these options. The following flags may be given 16 as options to your C compiler (as in "cc -DNBLAS", for example). These 17 flags are normally placed in your CONFIG string, defined in your Make.*. 18 19 All of these options, except for the timer, are for accessing the BLAS. 20 21 -DNBLAS 22 23 BLAS mode. If -DNBLAS is set, then no BLAS will be used. Vanilla 24 C code will be used instead. This is portable, and easier to 25 install, but you won't get the best performance. 26 27 If -DNBLAS is not set, then externally-available BLAS routines 28 (dgemm, dger, and dgemv or the equivalent C-BLAS routines) will be 29 used. This will give you the best performance, but perhaps at the 30 expense of portability. 31 32 The default is to use the BLAS, for both the C-callable libumfpack.a 33 library and the MATLAB mexFunction. If you have trouble installing 34 UMFPACK, set -DNBLAS (but then UMFPACK will be slow). 35 36 -DCBLAS 37 38 If -DCBLAS is set, then the C-BLAS interface to the BLAS is 39 used. If your vendor-supplied BLAS library does not have a C-BLAS 40 interface, you can obtain the ATLAS BLAS, available at 41 http://www.netlib.org/atlas. 42 43 This flag is ignored if -DNBLAS is set. 44 45 -DLP64 46 47 This should be defined if you are compiling in the LP64 model 48 (32 bit int's, 64 bit long's, and 64 bit pointers). In Solaris, 49 this is obtained with the flags -xtarget=ultra -xarch=v9 for 50 the cc compiler (for example). 51 52 -DLONGBLAS 53 54 If not defined, then the BLAS are not called in the long integer 55 version of UMFPACK (the umfpack_*l_* routines). The most common 56 definitions of the BLAS, unfortunately, use int arguments, and 57 are thus not suitable for use in the LP64 model. Only the Sun 58 Performance Library, as far as I can tell, has a version of the 59 BLAS that allows long integer (64-bit) input arguments. This 60 flag is set automatically in Sun Solaris if you are using the 61 Sun Performance BLAS. You can set it yourself, too, if your BLAS 62 routines can take long integer input arguments. 63 64 -DNSUNPERF 65 66 Applies only to Sun Solaris. If -DNSUNPERF is set, then the Sun 67 Performance Library BLAS will not be used. 68 69 The Sun Performance Library BLAS is used by default when compiling 70 the C-callable libumfpack.a library on Sun Solaris. 71 72 This flag is ignored if -DNBLAS is set. 73 74 -DNSCSL 75 76 Applies only to SGI IRIX. If -DSCSL is set, then the SGI SCSL 77 Scientific Library BLAS will not be used. 78 79 The SGI SCSL Scientific Library BLAS is used by default when 80 compiling the C-callable libumfpack.a library on SGI IRIX. 81 82 This flag is ignored if -DNBLAS is set. 83 84 -DNPOSIX 85 86 If -DNPOSIX is set, then your Unix operating system is not POSIX- 87 compliant, and the POSIX routines sysconf ( ) and times ( ) 88 routines are not used. These routines provide CPU time and 89 wallclock time information. If -DNPOSIX is set, then the ANSI 90 C clock ( ) routine is used. If -DNPOSIX is not set, then 91 sysconf ( ) and times ( ) are used in umfpack_tic and umfpack_toc. 92 See umfpack_tictoc.c for more information. 93 The default is to use the POSIX routines, except for Windows, 94 which is not POSIX-compliant. 95 96 -DGETRUSAGE 97 98 If -DGETRUSAGE is set, then your system's getrusage ( ) routine 99 will be used for getting the process CPU time. Otherwise the ANSI 100 C clock ( ) routine will be used. The default is to use getrusage 101 ( ) on Unix systems, and to use clock on all other architectures. 102 103 -DNO_TIMER 104 105 If -DNO_TIMER is set, then no timing routines are used at all. 106 107 -DNUTIL 108 109 If -DNUTIL is set, then the internal MATLAB utMalloc, utFree, and 110 utRealloc routines are not used in the UMFPACK mexFunction. The 111 regular mxMalloc, mxFree, and mxRealloc routines are used instead. 112 These routines are not documented, but are available for use. For 113 Windows, -DNUTIL is defined below, because access to the ut* 114 routines is not available by default. 115 116 -DNRECIPROCAL 117 118 This option controls a tradeoff between speed and accuracy. Using 119 -DNRECIPROCAL can lead to more accurate results, but with perhaps 120 some cost in performance, particularly if floating-point division 121 is much more costly than floating-point multiplication. 122 123 This option determines the method used to scale the pivot column. 124 If set, or if the absolute value of the pivot is < 1e-12 (or is a 125 NaN), then the pivot column is divided by the pivot value. 126 Otherwise, the reciprocal of the pivot value is computed, and the 127 pivot column is multiplied by (1/pivot). Multiplying by the 128 reciprocal can be slightly less accurate than dividing by the 129 pivot, but it is often faster. See umf_scale.c. 130 131 This has a small effect on the performance of UMFPACK, at least on 132 a Pentium 4M. It may have a larger effect on other architectures 133 where floating-point division is much more costly than floating- 134 point multiplication. The RS 6000 is one such example. 135 136 By default, the method chosen is to multiply by the reciprocal 137 (sacrificing accuracy for speed), except when compiling UMFPACK 138 as a built-in routine in MATLAB, or when gcc is being used. 139 140 When MATHWORKS is defined, -DNRECIPROCAL is forced on, and the pivot 141 column is divided by the pivot value. The only way of using the 142 other method in this case is to edit this file. 143 144 If -DNRECIPROCAL is enabled, then the row scaling factors are always 145 applied by dividing each row by the scale factor, rather than 146 multiplying by the reciprocal. If -DNRECIPROCAL is not enabled 147 (the default case), then the scale factors are normally applied by 148 multiplying by the reciprocal. If, however, the smallest scale 149 factor is tiny, then the scale factors are applied via division. 150 151 -DNO_DIVIDE_BY_ZERO 152 153 If the pivot is zero, and this flag is set, then no divide-by-zero 154 occurs. 155 156 You should normally not set these flags yourself: 157 158 -DBLAS_BY_VALUE if scalars are passed by value, not reference 159 -DBLAS_NO_UNDERSCORE if no underscore should be appended 160 -DBLAS_CHAR_ARG if BLAS options are single char's, not strings 161 162 The BLAS options are normally set automatically. If your 163 architecture cannot be determined (see UMFPACK_ARCHITECTURE, below) 164 then you may need to set these flags yourself. 165 166 The following options are controlled by amd_internal.h: 167 168 -DMATLAB_MEX_FILE 169 170 This flag is turned on when compiling the umfpack mexFunction for 171 use in MATLAB. When compiling the MATLAB mexFunction, the MATLAB 172 BLAS are used (unless -DNBLAS is set). The -DCBLAS, -DNSCSL, and 173 -DNSUNPERF flags are all ignored. The -DNRECIPROCAL flag is 174 forced on. Otherwise, [L,U,P,Q,R] = umfpack (A) would return 175 either L*U = P*(R\A)*Q or L*U = P*R*A*Q. Rather than returning a 176 flag stating how the scale factors R are to be applied, the umfpack 177 mexFunction always takes the more accurate route and returns 178 L*U = P*(R\A)*Q. 179 180 -DMATHWORKS 181 182 This flag is turned on when compiling umfpack as a built-in routine 183 in MATLAB. The MATLAB BLAS are used for all architectures (-DNBLAS, 184 -DCBLAS, -DNSCSL, and -DNSUNPERF flags are all ignored). Internal 185 routines utMalloc, utFree, utRealloc, utPrintf, utDivideComplex, 186 and utFdlibm_hypot are used, and the "util.h" file is included. 187 This avoids the problem discussed in the User Guide regarding memory 188 allocation in MATLAB. utMalloc returns NULL on failure, instead of 189 terminating the mexFunction (which is what mxMalloc does). However, 190 the ut* routines are not documented by The MathWorks, Inc., so I 191 cannot guarantee that you will always be able to use them. 192 The -DNRECIPROCAL flag is turned on. 193 194 -DNDEBUG 195 196 Debugging mode (if NDEBUG is not defined). The default, of course, 197 is no debugging. Turning on debugging takes some work (see below). 198 If you do not edit this file, then debugging is turned off anyway, 199 regardless of whether or not -DNDEBUG is specified in your compiler 200 options. 201 */ 202 203 /* ========================================================================== */ 204 /* === AMD configuration ==================================================== */ 205 /* ========================================================================== */ 206 207 /* NDEBUG, PRINTF defined in amd_internal.h */ 208 209 /* ========================================================================== */ 210 /* === reciprocal option ==================================================== */ 211 /* ========================================================================== */ 212 213 /* Force the definition NRECIPROCAL when MATHWORKS or MATLAB_MEX_FILE 214 * are defined. Do not multiply by the reciprocal in those cases. */ 215 216 #ifndef NRECIPROCAL 217 #if defined (MATHWORKS) || defined (MATLAB_MEX_FILE) 218 #define NRECIPROCAL 219 #endif 220 #endif 221 222 /* ========================================================================== */ 223 /* === Microsoft Windows configuration ====================================== */ 224 /* ========================================================================== */ 225 226 #ifdef UMF_WINDOWS 227 /* Windows can't access the ut* routines, and it isn't Unix. */ 228 #define NUTIL 229 #define NPOSIX 230 #endif 231 232 /* ========================================================================== */ 233 /* === 0-based or 1-based printing ========================================== */ 234 /* ========================================================================== */ 235 236 #if defined (MATLAB_MEX_FILE) && defined (NDEBUG) 237 /* In MATLAB, matrices are 1-based to the user, but 0-based internally. */ 238 /* One is added to all row and column indices when printing matrices */ 239 /* for the MATLAB user. The +1 shift is turned off when debugging. */ 240 #define INDEX(i) ((i)+1) 241 #else 242 /* In ANSI C, matrices are 0-based and indices are reported as such. */ 243 /* This mode is also used for debug mode, and if MATHWORKS is defined rather */ 244 /* than MATLAB_MEX_FILE. */ 245 #define INDEX(i) (i) 246 #endif 247 248 /* ========================================================================== */ 249 /* === Timer ================================================================ */ 250 /* ========================================================================== */ 251 252 /* 253 If you have the getrusage routine (all Unix systems I've test do), then use 254 that. Otherwise, use the ANSI C clock function. Note that on many 255 systems, the ANSI clock function wraps around after only 2147 seconds, or 256 about 36 minutes. BE CAREFUL: if you compare the run time of UMFPACK with 257 other sparse matrix packages, be sure to use the same timer. See 258 umfpack_tictoc.c for the timer used internally by UMFPACK. See also 259 umfpack_timer.c for the timer used in an earlier version of UMFPACK. 260 That timer is still available as a user-callable routine, but it is no 261 longer used internally by UMFPACK. 262 */ 263 264 /* Sun Solaris, SGI Irix, Linux, Compaq Alpha, and IBM RS 6000 all have */ 265 /* getrusage. It's in BSD unix, so perhaps all unix systems have it. */ 266 #if defined (UMF_SOL2) || defined (UMF_SGI) || defined (UMF_LINUX) \ 267 || defined (UMF_ALPHA) || defined (UMF_AIX) 268 #define GETRUSAGE 269 #endif 270 271 272 /* ========================================================================== */ 273 /* === BLAS ================================================================= */ 274 /* ========================================================================== */ 275 276 /* 277 The adventure begins. Figure out how to call the BLAS ... 278 279 This works, but it is incredibly ugly. The C-BLAS was supposed to solve 280 this problem, and make it easier to interface a C program to the BLAS. 281 Unfortunately, the C-BLAS does not have a "long" integer (64 bit) version. 282 Various vendors have done their own 64-bit BLAS. Sun has dgemm_64 routines 283 with "long" integers, SGI has a 64-bit dgemm in their scsl_blas_i8 library 284 with "long long" integers, and so on. 285 286 Different vendors also have different ways of defining a complex number, 287 some using struct's. That's a bad idea. See umf_version.h for the better 288 way to do it (the method that was also chosen for the complex C-BLAS, 289 which is compatible and guaranteed to be portable with ANSI C). 290 291 To make matters worse, SGI's SCSL BLAS has a C-BLAS interface which 292 differs from the ATLAS C-BLAS interface (see immediately below); 293 although a more recent version of SGI's C-BLAS interface is correct 294 if SCSL_VOID_ARGS is defined. 295 */ 296 297 298 /* -------------------------------------------------------------------------- */ 299 /* Determine which BLAS to use. */ 300 /* -------------------------------------------------------------------------- */ 301 302 #if defined (MATHWORKS) 303 #define USE_MATLAB_BLAS 304 305 #elif defined (NBLAS) 306 #define USE_NO_BLAS 307 308 #elif defined (MATLAB_MEX_FILE) 309 #define USE_MATLAB_BLAS 310 311 #elif defined (CBLAS) 312 #define USE_C_BLAS 313 314 #elif defined (UMF_SOL2) && !defined (NSUNPERF) 315 #define USE_SUNPERF_BLAS 316 #undef FC_FUNC 317 #define FC_FUNC(a,b) a 318 319 #elif defined (UMF_SGI) && !defined (NSCSL) 320 #define USE_SCSL_BLAS 321 322 #else 323 #define USE_FORTRAN_BLAS 324 #endif 325 326 /* -------------------------------------------------------------------------- */ 327 /* int vs. long integer arguments */ 328 /* -------------------------------------------------------------------------- */ 329 330 /* 331 Determine if the BLAS exists for the long integer version. It exists if 332 LONGBLAS is defined in the Makefile, or if using the BLAS from the 333 Sun Performance Library, or SGI's SCSL Scientific Library. 334 */ 335 336 #if defined (USE_SUNPERF_BLAS) || defined (USE_SCSL_BLAS) 337 #ifndef LONGBLAS 338 #define LONGBLAS 339 #endif 340 #endif 341 342 /* do not use the BLAS if Int's are long and LONGBLAS is not defined */ 343 #if defined (LONG_INTEGER) && !defined (LONGBLAS) && !defined (USE_NO_BLAS) 344 #define USE_NO_BLAS 345 #endif 346 347 348 /* -------------------------------------------------------------------------- */ 349 /* Use (void *) arguments for the SGI */ 350 /* -------------------------------------------------------------------------- */ 351 352 #if defined (UMF_SGI) 353 /* 354 Use (void *) pointers for complex types in SCSL. 355 The ATLAS C-BLAS, and the SGI C-BLAS differ. The former uses (void *) 356 arguments, the latter uses SCSL_ZOMPLEX_T, which are either scsl_zomplex 357 or (void *). Using (void *) is simpler, and is selected by defining 358 SCSL_VOID_ARGS, below. The cc compiler doesn't complain, but gcc is 359 more picky, and generates a warning without this next statement. 360 With gcc and the 07/09/98 version of SGI's cblas.h, spurious warnings 361 about complex BLAS arguments will be reported anyway. This is because this 362 older version of SGI's cblas.h does not make use of the SCSL_VOID_ARGS 363 parameter, which is present in the 12/6/01 version of SGI's cblas.h. You 364 can safely ignore these warnings. 365 */ 366 #define SCSL_VOID_ARGS 367 #endif 368 369 370 /* -------------------------------------------------------------------------- */ 371 /* The BLAS exists, construct appropriate macros */ 372 /* -------------------------------------------------------------------------- */ 373 374 #if !defined (USE_NO_BLAS) /* { */ 375 376 /* 377 If the compile-time flag -DNBLAS is defined, then the BLAS are not used, 378 portable vanilla C code is used instead, and the remainder of this file 379 is ignored. 380 381 Using the BLAS is much faster, but how C calls the Fortran BLAS is 382 machine-dependent and thus can cause portability problems. Thus, use 383 -DNBLAS to ensure portability (at the expense of speed). 384 385 Preferences: 386 387 *** The best interface to use, regardless of the option you select 388 below, is the standard C-BLAS interface. Not all BLAS libraries 389 use this interface. The only problem with this interface is that 390 it does not extend to the LP64 model. The C-BLAS does not provide 391 for a 64-bit integer. In addition, SGI's older cblas.h can cause 392 spurious warnings when using the C-BLAS interface. 393 394 1) often the most preferred (but see option (3)): use the 395 optimized vendor-supplied library (such as the Sun Performance 396 Library, or IBM's ESSL). This is often the fastest, but might not 397 be portable and might not always be available. When compiling a 398 MATLAB mexFunction it might be difficult get the mex compiler 399 script to recognize the vendor- supplied BLAS. Note that the 400 freely-available BLAS (option 3) can be faster than the vendor- 401 specific BLAS. You are encourage to try both option (1) and (3). 402 403 2) When compiling the UMFPACK mexFunction to use UMFPACK in MATLAB, use 404 the BLAS provided by The Mathworks, Inc. This assumes you are using 405 MATLAB V6 or higher, since the BLAS are not incorporated in V5 or 406 earlier versions. On my Sun workstation, the MATLAB BLAS gave 407 slightly worse performance than the Sun Perf. BLAS. The advantage 408 of using the MATLAB BLAS is that it's available on any computer that 409 has MATLAB V6 or higher. I have not tried using MATLAB BLAS outside 410 of a mexFunction in a stand-alone C code, but MATLAB (V6) allows for 411 this. This is well worth trying if you have MATLAB and don't want 412 to bother installing the ATLAS BLAS (option 3a, below). The only 413 glitch to this is that MATLAB does not provide a portable interface 414 to the BLAS (an underscore is required for some but not all 415 architectures). For Windows and MATLAB 6.0 or 6.1, you also need 416 to copy the libmwlapack.dll file into your MATLAB installation 417 directory; see the User Guide for details. 418 419 In the current distribution, the only BLAS that the UMFPACK 420 mexFunction will use is the internal MATLAB BLAS. It's possible to 421 use other BLAS, but handling the porting of using the mex compiler 422 with different BLAS libraries is not trivial. 423 424 As of MATLAB 6.5, the BLAS used internally in MATLAB is the ATLAS 425 BLAS. 426 427 3) Use a freely-available high-performance BLAS library: 428 429 (a) The BLAS by Kazashige Goto and Robert van de Geijn, at 430 http://www.cs.utexas.edu/users/flame/goto. This BLAS increased 431 the performance of UMFPACK by almost 50% as compared to the 432 ATLAS BLAS (v3.2). 433 434 (b) The ATLAS BLAS, available at http://www.netlib.org/atlas, 435 by R. Clint Whaley, Antoine Petitet, and Jack Dongarra. 436 This has a standard C interface, and thus the interface to it is 437 fully portable. Its performance rivals, and sometimes exceeds, 438 the vendor-supplied BLAS on many computers. 439 440 (b) The Fortran RISC BLAS by Michel Dayde', Iain Duff, Antoine 441 Petitet, and Abderrahim Qrichi Aniba, available via anonymous 442 ftp to ftp.enseeiht.fr in the pub/numerique/BLAS/RISC directory, 443 See M. J. Dayde' and I. S. Duff, "The RISC BLAS: A blocked 444 implementation of level 3 BLAS for RISC processors, ACM Trans. 445 Math. Software, vol. 25, no. 3., Sept. 1999. This will give 446 you good performance, but with the same C-to-Fortran portability 447 problems as option (1). 448 449 4) Use UMFPACK's built-in vanilla C code by setting -DNBLAS at compile 450 time. The key advantage is portability, which is guaranteed if you 451 have an ANSI C compliant compiler. You also don't need to download 452 any other package - UMFPACK is stand-alone. No Fortran is used 453 anywhere in UMFPACK. UMFPACK will be much slower than when using 454 options (1) through (3), however. 455 456 5) least preferred: use the standard Fortran implementation of the 457 BLAS, also available at Netlib (http://www.netlib.org/blas). This 458 will be no faster than option (4), and not portable because of 459 C-to-Fortran calling conventions. Don't bother trying option (5). 460 461 The mechanics of how C calls the BLAS on various computers are as follows: 462 463 * C-BLAS (from the ATLAS library, for example): 464 The same interface is used on all computers. 465 466 * Defaults for calling the Fortran BLAS: 467 add underscore, pass scalars by reference, use string arguments. 468 469 * The Fortran BLAS on Sun Solaris (when compiling the MATLAB mexFunction 470 or when using the Fortran RISC BLAS), SGI IRIX, Linux, and Compaq 471 Alpha: use defaults. 472 473 * Sun Solaris (when using the C-callable Sun Performance library): 474 no underscore, pass scalars by value, use character arguments. 475 476 * The Fortran BLAS (ESSL Library) on the IBM RS 6000, and HP Unix: 477 no underscore, pass scalars by reference, use string arguments. 478 479 * The Fortran BLAS on Windows: 480 no underscore, pass scalars by reference, use string arguments. 481 If you compile the umfpack mexFunction using umfpack_make, and are 482 using the lcc compiler bundled with MATLAB, then you must first 483 copy the umfpack\lcc_lib\libmwlapack.lib file into the 484 <matlab>\extern\lib\win32\lcc\ directory, where <matlab> is the 485 directory in which MATLAB is installed. Next, type mex -setup 486 at the MATLAB prompt, and ask MATLAB to select the lcc compiler. 487 MATLAB has built-in BLAS, but it cannot be accessed by a program 488 compiled by lcc without first copying this file. 489 */ 490 491 492 493 /* -------------------------------------------------------------------------- */ 494 #ifdef USE_C_BLAS /* { */ 495 /* -------------------------------------------------------------------------- */ 496 497 498 /* -------------------------------------------------------------------------- */ 499 /* use the C-BLAS (any computer) */ 500 /* -------------------------------------------------------------------------- */ 501 502 /* 503 C-BLAS is the default interface, with the following exceptions. Solaris 504 uses the Sun Performance BLAS for libumfpack.a (the C-callable library). 505 SGI IRIX uses the SCSL BLAS for libumfpack.a. All architectures use 506 MATLAB's internal BLAS for the mexFunction on any architecture. These 507 options are set in the Make.* files. The Make.generic file uses no BLAS 508 at all. 509 510 If you use the ATLAS C-BLAS, then be sure to set the -I flag to 511 -I/path/ATLAS/include, where /path/ATLAS is the ATLAS installation 512 directory. See Make.solaris for an example. You do not need to do this 513 for the SGI, which has a /usr/include/cblas.h. 514 */ 515 516 #include "cblas.h" 517 518 #ifdef COMPLEX 519 #define BLAS_GEMM_ROUTINE cblas_zgemm 520 #define BLAS_TRSM_ROUTINE cblas_ztrsm 521 #define BLAS_TRSV_ROUTINE cblas_ztrsv 522 #define BLAS_GEMV_ROUTINE cblas_zgemv 523 #define BLAS_GER_ROUTINE cblas_zgeru 524 #define BLAS_SCAL_ROUTINE cblas_zscal 525 #define BLAS_COPY_ROUTINE cblas_zcopy 526 #define BLAS_DECLARE_SCALAR(x) double x [2] 527 #define BLAS_ASSIGN(x,xr,xi) { x [0] = xr ; x [1] = xi ; } 528 #else 529 #define BLAS_GEMM_ROUTINE cblas_dgemm 530 #define BLAS_TRSM_ROUTINE cblas_dtrsm 531 #define BLAS_TRSV_ROUTINE cblas_dtrsv 532 #define BLAS_GEMV_ROUTINE cblas_dgemv 533 #define BLAS_GER_ROUTINE cblas_dger 534 #define BLAS_SCAL_ROUTINE cblas_dscal 535 #define BLAS_COPY_ROUTINE cblas_dcopy 536 #define BLAS_DECLARE_SCALAR(x) double x 537 #define BLAS_ASSIGN(x,xr,xi) { x = xr ; } 538 #endif 539 540 #define BLAS_LOWER CblasLower 541 #define BLAS_UNIT_DIAGONAL CblasUnit 542 #define BLAS_RIGHT CblasRight 543 #define BLAS_NO_TRANSPOSE CblasNoTrans 544 #define BLAS_TRANSPOSE CblasTrans 545 #define BLAS_COLUMN_MAJOR_ORDER CblasColMajor, 546 #define BLAS_SCALAR(x) x 547 #define BLAS_INT_SCALAR(n) n 548 #define BLAS_ARRAY(a) a 549 550 551 552 /* -------------------------------------------------------------------------- */ 553 #else /* } USE_C_BLAS { */ 554 /* -------------------------------------------------------------------------- */ 555 556 /* -------------------------------------------------------------------------- */ 557 /* use Fortran (or other architecture-specific) BLAS */ 558 /* -------------------------------------------------------------------------- */ 559 560 /* No such argument when not using the C-BLAS */ 561 #define BLAS_COLUMN_MAJOR_ORDER 562 563 /* Determine which architecture we're on and set options accordingly. */ 564 /* The default, if nothing is defined is to add an underscore, */ 565 /* pass scalars by reference, and use string arguments. */ 566 567 /* ---------------------------------- */ 568 /* Sun Performance BLAS */ 569 /* ---------------------------------- */ 570 571 #ifdef USE_SUNPERF_BLAS 572 #ifdef _SUNPERF_H 573 /* <sunperf.h> has been included somehow anyway, outside of umf_config.h */ 574 #error "sunperf.h must NOT be #include'd. See umf_config.h for details." 575 #endif 576 #define BLAS_BY_VALUE 577 #define BLAS_NO_UNDERSCORE 578 #define BLAS_CHAR_ARG 579 #endif /* USE_SUNPERF_BLAS */ 580 581 /* ---------------------------------- */ 582 /* SGI SCSL BLAS */ 583 /* ---------------------------------- */ 584 585 #ifdef USE_SCSL_BLAS 586 #if defined (LP64) 587 #include <scsl_blas_i8.h> 588 #else 589 #include <scsl_blas.h> 590 #endif 591 #define BLAS_BY_VALUE 592 #define BLAS_NO_UNDERSCORE 593 #endif /* USE_SCSL_BLAS */ 594 595 /* ---------------------------------- */ 596 /* IBM AIX, Windows, and HP Fortran BLAS */ 597 /* ---------------------------------- */ 598 599 #if defined (UMF_AIX) || defined (UMF_WINDOWS) || defined (UMF_HP) 600 #define BLAS_NO_UNDERSCORE 601 #endif 602 603 604 /* -------------------------------------------------------------------------- */ 605 /* BLAS names */ 606 /* -------------------------------------------------------------------------- */ 607 608 #if defined (LP64) && defined (USE_SUNPERF_BLAS) && defined (LONG_INTEGER) 609 610 /* 64-bit sunperf BLAS, for Sun Solaris only */ 611 #ifdef COMPLEX 612 #define BLAS_GEMM_ROUTINE zgemm_64 613 #define BLAS_TRSM_ROUTINE ztrsm_64 614 #define BLAS_TRSV_ROUTINE ztrsv_64 615 #define BLAS_GEMV_ROUTINE zgemv_64 616 #define BLAS_GER_ROUTINE zgeru_64 617 #define BLAS_SCAL_ROUTINE zscal_64 618 #define BLAS_COPY_ROUTINE zcopy_64 619 #else 620 #define BLAS_GEMM_ROUTINE dgemm_64 621 #define BLAS_TRSM_ROUTINE dtrsm_64 622 #define BLAS_TRSV_ROUTINE dtrsv_64 623 #define BLAS_GEMV_ROUTINE dgemv_64 624 #define BLAS_GER_ROUTINE dger_64 625 #define BLAS_SCAL_ROUTINE dscal_64 626 #define BLAS_COPY_ROUTINE dcopy_64 627 #endif /* COMPLEX */ 628 629 #else 630 631 /* default, use names snooped with autoconf */ 632 633 #ifdef COMPLEX 634 635 /* naming convention (use underscore, or not) */ 636 #define BLAS_GEMM_ROUTINE FC_FUNC(zgemm,ZGEMM) 637 #define BLAS_TRSM_ROUTINE FC_FUNC(ztrsm,ZTRSM) 638 #define BLAS_TRSV_ROUTINE FC_FUNC(ztrsv,ZTRSV) 639 #define BLAS_GEMV_ROUTINE FC_FUNC(zgemv,ZGEMV) 640 #define BLAS_GER_ROUTINE FC_FUNC(zgeru,ZGERU) 641 #define BLAS_SCAL_ROUTINE FC_FUNC(zscal,ZSCAL) 642 #define BLAS_COPY_ROUTINE FC_FUNC(zcopy,ZCOPY) 643 644 #else 645 646 /* naming convention (use underscore, or not) */ 647 #define BLAS_GEMM_ROUTINE FC_FUNC(dgemm,DGEMM) 648 #define BLAS_TRSM_ROUTINE FC_FUNC(dtrsm,DTRSM) 649 #define BLAS_TRSV_ROUTINE FC_FUNC(dtrsv,DTRSV) 650 #define BLAS_GEMV_ROUTINE FC_FUNC(dgemv,DGEMV) 651 #define BLAS_GER_ROUTINE FC_FUNC(dger,DGER) 652 #define BLAS_SCAL_ROUTINE FC_FUNC(dscal,DSCAL) 653 #define BLAS_COPY_ROUTINE FC_FUNC(dcopy,DCOPY) 654 655 #endif /* COMPLEX */ 656 657 #endif /* LP64 && USE_SUNPERF_BLAS */ 658 659 660 /* -------------------------------------------------------------------------- */ 661 /* BLAS real or complex floating-point scalars */ 662 /* -------------------------------------------------------------------------- */ 663 664 #ifdef COMPLEX 665 666 /* 667 The SunPerf BLAS expects to see a doublecomplex scalar, but it 668 also will accept an array of size 2. See the manual, normally at 669 file:///opt/SUNWspro/WS6U1/lib/locale/C/html/manuals/perflib/user_guide 670 /plug_using_perflib.html . This manual is inconsistent with the man pages 671 for zgemm, zgemv, and zgeru and also inconsistent with the <sunperf.h> 672 include file. Use this instead, for SunPerf (only works if you do NOT 673 include sunperf.h). Fortunately, this file (umf_config.h) is not included 674 in any user code that calls UMFPACK. Thus, the caller may include 675 sunperf.h in his or her own code, and that is safely ignored here. 676 SGI's SCSL BLAS has yet a different kind of struct, but we can use a 677 double array of size 2 instead (since SCSL_VOID_ARGS is defined). 678 Most BLAS expect complex scalars as pointers to double arrays of size 2. 679 */ 680 681 #define BLAS_DECLARE_SCALAR(x) double x [2] 682 #define BLAS_ASSIGN(x,xr,xi) { x [0] = xr ; x [1] = xi ; } 683 #define BLAS_SCALAR(x) x 684 685 #else 686 687 #define BLAS_DECLARE_SCALAR(x) double x 688 #define BLAS_ASSIGN(x,xr,xi) { x = xr ; } 689 #ifdef BLAS_BY_VALUE 690 #define BLAS_SCALAR(x) x 691 #else 692 #define BLAS_SCALAR(x) &(x) 693 #endif 694 695 #endif /* COMPLEX */ 696 697 698 /* -------------------------------------------------------------------------- */ 699 /* BLAS integer scalars */ 700 /* -------------------------------------------------------------------------- */ 701 702 /* 703 Fortran requires integers to be passed by reference. 704 The SCSL BLAS requires long long arguments in LP64 mode. 705 */ 706 707 #if defined (USE_SCSL_BLAS) && defined (LP64) 708 #define BLAS_INT_SCALAR(n) ((long long) n) 709 #else 710 #ifdef BLAS_BY_VALUE 711 #define BLAS_INT_SCALAR(n) n 712 #else 713 #define BLAS_INT_SCALAR(n) &(n) 714 #endif 715 #endif 716 717 718 /* -------------------------------------------------------------------------- */ 719 /* BLAS strings */ 720 /* -------------------------------------------------------------------------- */ 721 722 /* 723 The Sun Performance BLAS wants a character instead of a string. 724 */ 725 726 #ifdef BLAS_CHAR_ARG 727 #define BLAS_NO_TRANSPOSE 'N' 728 #define BLAS_TRANSPOSE 'T' 729 #define BLAS_LEFT 'L' 730 #define BLAS_RIGHT 'R' 731 #define BLAS_LOWER 'L' 732 #define BLAS_UNIT_DIAGONAL 'U' 733 #else 734 #define BLAS_NO_TRANSPOSE "N" 735 #define BLAS_TRANSPOSE "T" 736 #define BLAS_LEFT "L" 737 #define BLAS_RIGHT "R" 738 #define BLAS_LOWER "L" 739 #define BLAS_UNIT_DIAGONAL "U" 740 #endif 741 742 743 /* -------------------------------------------------------------------------- */ 744 /* BLAS arrays */ 745 /* -------------------------------------------------------------------------- */ 746 747 /* 748 The complex SunPerf BLAS expects to see a doublecomplex array of size s. 749 This is broken (see above, regarding complex scalars in sunperf.h). 750 For SunPerf BLAS, just pass a pointer to the array, and ignore sunperf.h. 751 With sunperf.h, you would need: 752 753 #define BLAS_ARRAY(a) ((doublecomplex *)(a)) 754 755 SGI's SCSL BLAS has yet a different kind of struct, but we can use a 756 double array of size 2 instead (since SCSL_VOID_ARGS is defined). 757 758 The real versions all use just a (double *) pointer. 759 760 In all cases, no typecast is required. This will break if <sunperf.h> is 761 included. 762 763 If you have read this far, I hope you see now why (void *) a much better 764 choice for complex BLAS prototypes, and why double x [2] is better than 765 an architecture dependent struct { double real ; double imag ; } 766 type definition. 767 768 */ 769 770 #define BLAS_ARRAY(a) (a) 771 772 773 /* -------------------------------------------------------------------------- */ 774 #endif /* USE_C_BLAS } */ 775 /* -------------------------------------------------------------------------- */ 776 777 778 779 780 781 /* -------------------------------------------------------------------------- */ 782 /* BLAS macros, for all interfaces */ 783 /* -------------------------------------------------------------------------- */ 784 785 /* 786 All architecture dependent issues have now been taken into consideration, 787 and folded into the macros BLAS_DECLARE_SCALAR, BLAS_ASSIGN, BLAS_*_ROUTINE, 788 BLAS_COLUMN_MAJOR_ORDER, BLAS_NO_TRANSPOSE, BLAS_TRANSPOSE, BLAS_SCALAR, 789 BLAS_INT_SCALAR, BLAS_ARRAY, and Int. 790 791 You will note that there is not a *** single *** name, declaration, or 792 argument to the BLAS which is not somehow different in one or more versions 793 of the BLAS! 794 */ 795 796 797 /* C = C - A*B', where: 798 * A is m-by-k with leading dimension ldac 799 * B is k-by-n with leading dimension ldb 800 * C is m-by-n with leading dimension ldac */ 801 #define BLAS_GEMM(m,n,k,A,B,ldb,C,ldac) \ 802 { \ 803 BLAS_DECLARE_SCALAR (alpha) ; \ 804 BLAS_DECLARE_SCALAR (beta) ; \ 805 BLAS_ASSIGN (alpha, -1.0, 0.0) ; \ 806 BLAS_ASSIGN (beta, 1.0, 0.0) ; \ 807 (void) BLAS_GEMM_ROUTINE (BLAS_COLUMN_MAJOR_ORDER \ 808 BLAS_NO_TRANSPOSE, BLAS_TRANSPOSE, \ 809 BLAS_INT_SCALAR (m), BLAS_INT_SCALAR (n), BLAS_INT_SCALAR (k), \ 810 BLAS_SCALAR (alpha), \ 811 BLAS_ARRAY (A), BLAS_INT_SCALAR (ldac), \ 812 BLAS_ARRAY (B), BLAS_INT_SCALAR (ldb), BLAS_SCALAR (beta), \ 813 BLAS_ARRAY (C), BLAS_INT_SCALAR (ldac)) ; \ 814 } 815 816 /* A = A - x*y', where: 817 * A is m-by-n with leading dimension d 818 x is a column vector with stride 1 819 y is a column vector with stride 1 */ 820 #define BLAS_GER(m,n,x,y,A,d) \ 821 { \ 822 Int one = 1 ; \ 823 BLAS_DECLARE_SCALAR (alpha) ; \ 824 BLAS_ASSIGN (alpha, -1.0, 0.0) ; \ 825 (void) BLAS_GER_ROUTINE (BLAS_COLUMN_MAJOR_ORDER \ 826 BLAS_INT_SCALAR (m), BLAS_INT_SCALAR (n), \ 827 BLAS_SCALAR (alpha), \ 828 BLAS_ARRAY (x), BLAS_INT_SCALAR (one), \ 829 BLAS_ARRAY (y), BLAS_INT_SCALAR (one), \ 830 BLAS_ARRAY (A), BLAS_INT_SCALAR (d)) ; \ 831 } 832 833 834 /* y = y - A*x, where A is m-by-n with leading dimension d, 835 x is a column vector with stride 1 836 y is a column vector with stride 1 */ 837 838 #define BLAS_GEMV(m,n,A,x,y,d) \ 839 { \ 840 Int one = 1 ; \ 841 BLAS_DECLARE_SCALAR (alpha) ; \ 842 BLAS_DECLARE_SCALAR (beta) ; \ 843 BLAS_ASSIGN (alpha, -1.0, 0.0) ; \ 844 BLAS_ASSIGN (beta, 1.0, 0.0) ; \ 845 (void) BLAS_GEMV_ROUTINE (BLAS_COLUMN_MAJOR_ORDER \ 846 BLAS_NO_TRANSPOSE, \ 847 BLAS_INT_SCALAR (m), BLAS_INT_SCALAR (n), \ 848 BLAS_SCALAR (alpha), \ 849 BLAS_ARRAY (A), BLAS_INT_SCALAR (d), \ 850 BLAS_ARRAY (x), BLAS_INT_SCALAR (one), BLAS_SCALAR (beta), \ 851 BLAS_ARRAY (y), BLAS_INT_SCALAR (one)) ; \ 852 } 853 854 855 /* solve Lx=b, where: 856 * B is a column vector (m-by-1) with leading dimension d 857 * A is m-by-m with leading dimension d */ 858 #define BLAS_TRSV(m,A,b,d) \ 859 { \ 860 Int one = 1 ; \ 861 (void) BLAS_TRSV_ROUTINE (BLAS_COLUMN_MAJOR_ORDER \ 862 BLAS_LOWER, BLAS_NO_TRANSPOSE, BLAS_UNIT_DIAGONAL, \ 863 BLAS_INT_SCALAR (m), \ 864 BLAS_ARRAY (A), BLAS_INT_SCALAR (d), \ 865 BLAS_ARRAY (b), BLAS_INT_SCALAR (one)) ; \ 866 } 867 868 /* solve XL'=B where: 869 * B is m-by-n with leading dimension ldb 870 * A is n-by-n with leading dimension lda */ 871 #define BLAS_TRSM_RIGHT(m,n,A,lda,B,ldb) \ 872 { \ 873 BLAS_DECLARE_SCALAR (alpha) ; \ 874 BLAS_ASSIGN (alpha, 1.0, 0.0) ; \ 875 (void) BLAS_TRSM_ROUTINE (BLAS_COLUMN_MAJOR_ORDER \ 876 BLAS_RIGHT, BLAS_LOWER, BLAS_TRANSPOSE, BLAS_UNIT_DIAGONAL, \ 877 BLAS_INT_SCALAR (m), BLAS_INT_SCALAR (n), \ 878 BLAS_SCALAR (alpha), \ 879 BLAS_ARRAY (A), BLAS_INT_SCALAR (lda), \ 880 BLAS_ARRAY (B), BLAS_INT_SCALAR (ldb)) ; \ 881 } 882 883 /* x = s*x, where x is a stride-1 vector of length n */ 884 #define BLAS_SCAL(n,s,x) \ 885 { \ 886 Int one = 1 ; \ 887 BLAS_DECLARE_SCALAR (alpha) ; \ 888 BLAS_ASSIGN (alpha, REAL_COMPONENT (s), IMAG_COMPONENT (s)) ; \ 889 (void) BLAS_SCAL_ROUTINE ( \ 890 BLAS_INT_SCALAR (n), BLAS_SCALAR (alpha), \ 891 BLAS_ARRAY (x), BLAS_INT_SCALAR (one)) ; \ 892 } 893 894 /* x = y, where x and y are a stride-1 vectors of length n */ 895 #define BLAS_COPY(n,x,y) \ 896 { \ 897 Int one = 1 ; \ 898 (void) BLAS_COPY_ROUTINE ( \ 899 BLAS_INT_SCALAR (n), \ 900 BLAS_ARRAY (x), BLAS_INT_SCALAR (one), \ 901 BLAS_ARRAY (y), BLAS_INT_SCALAR (one)) ; \ 902 } 903 904 #endif /* !defined (USE_NO_BLAS) } */ 905