1 /* --------------------------------------------------------------------- 2 * 3 * -- PBLAS routine (version 2.0) -- 4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory, 5 * and University of California, Berkeley. 6 * April 1, 1998 7 * 8 * --------------------------------------------------------------------- 9 */ 10 /* 11 * This file includes the standard C libraries, as well as system depen- 12 * dent include files. All PBLAS routines include this file. 13 * 14 * --------------------------------------------------------------------- 15 * Machine Specific PBLAS macros 16 * --------------------------------------------------------------------- 17 */ 18 #define _HAL_ 0 19 #define _T3D_ 1 20 #define _T3E_ 2 21 22 #ifdef T3D 23 #define _MACH_ _T3D_ 24 #endif 25 #ifdef T3E 26 #define _MACH_ _T3E_ 27 #endif 28 #ifndef _MACH_ 29 #define _MACH_ _HAL_ 30 #endif 31 /* 32 * CBRATIO is the ratio of the transfer cost per element for the combine 33 * sum to one process and the broadcast operation. This value is used 34 * within the Level 3 PBLAS routines to decide on which algorithm to se- 35 * lect. 36 */ 37 #define CBRATIO 1.3 38 /* 39 * --------------------------------------------------------------------- 40 * Include files 41 * --------------------------------------------------------------------- 42 */ 43 #include <stdio.h> 44 #include <stdlib.h> 45 46 #ifdef __STDC__ 47 #include <stdarg.h> 48 #else 49 #include <varargs.h> 50 #endif 51 52 #if( ( _MACH_ == _T3D_ ) || ( _MACH_ == _T3E_ ) ) 53 #include <fortran.h> 54 #endif 55 /* 56 * --------------------------------------------------------------------- 57 * FORTRAN <-> C interface 58 * --------------------------------------------------------------------- 59 * 60 * These macros identifies how the PBLAS will be called as follows: 61 * 62 * _F2C_ADD_: the FORTRAN compiler expects the name of C functions to be 63 * in all lower case and to have an underscore postfixed it (Suns, Intel 64 * compilers expect this). 65 * 66 * _F2C_NOCHANGE: the FORTRAN compiler expects the name of C functions 67 * to be in all lower case (IBM RS6K compilers do this). 68 * 69 * _F2C_UPCASE: the FORTRAN compiler expects the name of C functions 70 * to be in all upcase. (Cray compilers expect this). 71 * 72 * _F2C_F77ISF2C: the FORTRAN compiler in use is f2c, a FORTRAN to C 73 * converter. 74 */ 75 #define _F2C_ADD_ 0 76 #define _F2C_NOCHANGE 1 77 #define _F2C_UPCASE 2 78 #define _F2C_F77ISF2C 3 79 80 #ifdef UpCase 81 #define _F2C_CALL_ _F2C_UPCASE 82 #endif 83 84 #ifdef NoChange 85 #define _F2C_CALL_ _F2C_NOCHANGE 86 #endif 87 88 #ifdef Add_ 89 #define _F2C_CALL_ _F2C_ADD_ 90 #endif 91 92 #ifdef f77IsF2C 93 #define _F2C_CALL_ _F2C_F77ISF2C 94 #endif 95 96 #ifndef _F2C_CALL_ 97 #define _F2C_CALL_ _F2C_ADD_ 98 #endif 99 /* 100 * --------------------------------------------------------------------- 101 * TYPE DEFINITIONS AND CONVERSION UTILITIES 102 * --------------------------------------------------------------------- 103 */ 104 #if( ( _MACH_ == _T3D_ ) || ( _MACH_ == _T3E_ ) ) 105 106 #define float double 107 /* Type of character argument in a FORTRAN call */ 108 #define F_CHAR_T _fcd 109 /* Character conversion utilities */ 110 #define F2C_CHAR(a) ( _fcdtocp( (a) ) ) 111 #define C2F_CHAR(a) ( _cptofcd( (a), 1 ) ) 112 /* Type of FORTRAN functions */ 113 #define F_VOID_FCT void fortran /* Subroutine */ 114 #define F_INTG_FCT int fortran /* INTEGER function */ 115 116 #else /* Type of character argument in a FORTRAN call */ 117 118 typedef char * F_CHAR_T; 119 /* Character conversion utilities */ 120 #define F2C_CHAR(a) (a) 121 #define C2F_CHAR(a) (a) 122 /* Type of FORTRAN functions */ 123 #define F_VOID_FCT void /* Subroutine */ 124 #define F_INTG_FCT int /* INTEGER function */ 125 126 #endif 127 /* 128 * ---------------------------------------------------------------------- 129 * #typedef definitions 130 * --------------------------------------------------------------------- 131 */ 132 typedef float cmplx [2]; 133 typedef double cmplx16[2]; 134 135 #define REAL_PART 0 136 #define IMAG_PART 1 137 138 #ifdef __STDC__ 139 140 typedef void (*GESD2D_T) ( int, int, int, 141 char *, int, int, 142 int ); 143 typedef void (*GERV2D_T) ( int, int, int, 144 char *, int, int, 145 int ); 146 typedef void (*GEBS2D_T) ( int, char *, char *, 147 int, int, char *, 148 int ); 149 typedef void (*GEBR2D_T) ( int, char *, char *, 150 int, int, char *, 151 int, int, int ); 152 typedef void (*GSUM2D_T) ( int, char *, char *, 153 int, int, char *, 154 int, int, int ); 155 156 typedef F_VOID_FCT (*MMADD_T) ( int *, int *, char *, 157 char *, int *, char *, 158 char *, int * ); 159 typedef F_VOID_FCT (*MMSHFT_T) ( int *, int *, int *, 160 char *, int * ); 161 typedef F_VOID_FCT (*VVDOT_T) ( int *, char *, char *, 162 int *, char *, int * ); 163 typedef F_VOID_FCT (*VVSET_T) ( int *, char *, char *, 164 int * ); 165 typedef F_VOID_FCT (*TZPAD_T) ( F_CHAR_T, F_CHAR_T, int *, 166 int *, int *, char *, 167 char *, char *, int * ); 168 typedef F_VOID_FCT (*TZPADCPY_T) ( F_CHAR_T, F_CHAR_T, int *, 169 int *, int *, char *, 170 int *, char *, int * ); 171 typedef F_VOID_FCT (*TZSET_T) ( F_CHAR_T, int *, int *, 172 int *, char *, char *, 173 char *, int * ); 174 typedef F_VOID_FCT (*TZSCAL_T) ( F_CHAR_T, int *, int *, 175 int *, char *, char *, 176 int * ); 177 178 typedef F_VOID_FCT (*AXPY_T) ( int *, char *, char *, 179 int *, char *, int * ); 180 typedef F_VOID_FCT (*COPY_T) ( int *, char *, int *, 181 char *, int * ); 182 typedef F_VOID_FCT (*SWAP_T) ( int *, char *, int *, 183 char *, int * ); 184 185 typedef F_VOID_FCT (*GEMV_T) ( F_CHAR_T, int *, int *, 186 char *, char *, int *, 187 char *, int *, char *, 188 char *, int * ); 189 typedef F_VOID_FCT (*AGEMV_T) ( F_CHAR_T, int *, int *, 190 char *, char *, int *, 191 char *, int *, char *, 192 char *, int * ); 193 typedef F_VOID_FCT (*SYMV_T) ( F_CHAR_T, int *, char *, 194 char *, int *, char *, 195 int *, char *, char *, 196 int * ); 197 typedef F_VOID_FCT (*ASYMV_T) ( F_CHAR_T, int *, char *, 198 char *, int *, char *, 199 int *, char *, char *, 200 int * ); 201 typedef F_VOID_FCT (*HEMV_T) ( F_CHAR_T, int *, char *, 202 char *, int *, char *, 203 int *, char *, char *, 204 int * ); 205 typedef F_VOID_FCT (*AHEMV_T) ( F_CHAR_T, int *, char *, 206 char *, int *, char *, 207 int *, char *, char *, 208 int * ); 209 typedef F_VOID_FCT (*TRMV_T) ( F_CHAR_T, F_CHAR_T, F_CHAR_T, 210 int *, char *, int *, 211 char *, int * ); 212 typedef F_VOID_FCT (*ATRMV_T) ( F_CHAR_T, F_CHAR_T, F_CHAR_T, 213 int *, char *, char *, 214 int *, char *, int *, 215 char *, char *, int * ); 216 typedef F_VOID_FCT (*TRSV_T) ( F_CHAR_T, F_CHAR_T, F_CHAR_T, 217 int *, char *, int *, 218 char *, int * ); 219 typedef F_VOID_FCT (*GERC_T) ( int *, int *, char *, 220 char *, int *, char *, 221 int *, char *, int * ); 222 typedef F_VOID_FCT (*GERU_T) ( int *, int *, char *, 223 char *, int *, char *, 224 int *, char *, int * ); 225 typedef F_VOID_FCT (*SYR_T) ( F_CHAR_T, int *, char *, 226 char *, int *, char *, 227 int * ); 228 typedef F_VOID_FCT (*HER_T) ( F_CHAR_T, int *, char *, 229 char *, int *, char *, 230 int * ); 231 typedef F_VOID_FCT (*SYR2_T) ( F_CHAR_T, int *, char *, 232 char *, int *, char *, 233 int *, char *, int * ); 234 typedef F_VOID_FCT (*HER2_T) ( F_CHAR_T, int *, char *, 235 char *, int *, char *, 236 int *, char *, int * ); 237 238 typedef F_VOID_FCT (*GEMM_T) ( F_CHAR_T, F_CHAR_T, int *, 239 int *, int *, char *, 240 char *, int *, char *, 241 int *, char *, char *, 242 int * ); 243 typedef F_VOID_FCT (*SYMM_T) ( F_CHAR_T, F_CHAR_T, int *, 244 int *, char *, char *, 245 int *, char *, int *, 246 char *, char *, int * ); 247 typedef F_VOID_FCT (*HEMM_T) ( F_CHAR_T, F_CHAR_T, int *, 248 int *, char *, char *, 249 int *, char *, int *, 250 char *, char *, int * ); 251 typedef F_VOID_FCT (*SYRK_T) ( F_CHAR_T, F_CHAR_T, int *, 252 int *, char *, char *, 253 int *, char *, char *, 254 int * ); 255 typedef F_VOID_FCT (*HERK_T) ( F_CHAR_T, F_CHAR_T, int *, 256 int *, char *, char *, 257 int *, char *, char *, 258 int * ); 259 typedef F_VOID_FCT (*SYR2K_T) ( F_CHAR_T, F_CHAR_T, int *, 260 int *, char *, char *, 261 int *, char *, int *, 262 char *, char *, int * ); 263 typedef F_VOID_FCT (*HER2K_T) ( F_CHAR_T, F_CHAR_T, int *, 264 int *, char *, char *, 265 int *, char *, int *, 266 char *, char *, int * ); 267 typedef F_VOID_FCT (*TRMM_T) ( F_CHAR_T, F_CHAR_T, F_CHAR_T, 268 F_CHAR_T, int *, int *, 269 char *, char *, int *, 270 char *, int * ); 271 typedef F_VOID_FCT (*TRSM_T) ( F_CHAR_T, F_CHAR_T, F_CHAR_T, 272 F_CHAR_T, int *, int *, 273 char *, char *, int *, 274 char *, int * ); 275 276 #else 277 278 typedef void (*GESD2D_T) (); 279 typedef void (*GERV2D_T) (); 280 typedef void (*GEBS2D_T) (); 281 typedef void (*GEBR2D_T) (); 282 typedef void (*GSUM2D_T) (); 283 284 typedef F_VOID_FCT (*MMADD_T) (); 285 typedef F_VOID_FCT (*MMSHFT_T) (); 286 typedef F_VOID_FCT (*VVDOT_T) (); 287 typedef F_VOID_FCT (*VVSET_T) (); 288 typedef F_VOID_FCT (*TZPAD_T) (); 289 typedef F_VOID_FCT (*TZPADCPY_T) (); 290 typedef F_VOID_FCT (*TZSET_T) (); 291 typedef F_VOID_FCT (*TZSCAL_T) (); 292 293 typedef F_VOID_FCT (*AXPY_T) (); 294 typedef F_VOID_FCT (*COPY_T) (); 295 typedef F_VOID_FCT (*SWAP_T) (); 296 297 typedef F_VOID_FCT (*GEMV_T) (); 298 typedef F_VOID_FCT (*AGEMV_T) (); 299 typedef F_VOID_FCT (*SYMV_T) (); 300 typedef F_VOID_FCT (*ASYMV_T) (); 301 typedef F_VOID_FCT (*HEMV_T) (); 302 typedef F_VOID_FCT (*AHEMV_T) (); 303 typedef F_VOID_FCT (*TRMV_T) (); 304 typedef F_VOID_FCT (*ATRMV_T) (); 305 typedef F_VOID_FCT (*TRSV_T) (); 306 typedef F_VOID_FCT (*GERC_T) (); 307 typedef F_VOID_FCT (*GERU_T) (); 308 typedef F_VOID_FCT (*SYR_T) (); 309 typedef F_VOID_FCT (*HER_T) (); 310 typedef F_VOID_FCT (*SYR2_T) (); 311 typedef F_VOID_FCT (*HER2_T) (); 312 313 typedef F_VOID_FCT (*GEMM_T) (); 314 typedef F_VOID_FCT (*SYMM_T) (); 315 typedef F_VOID_FCT (*HEMM_T) (); 316 typedef F_VOID_FCT (*SYRK_T) (); 317 typedef F_VOID_FCT (*HERK_T) (); 318 typedef F_VOID_FCT (*SYR2K_T) (); 319 typedef F_VOID_FCT (*HER2K_T) (); 320 typedef F_VOID_FCT (*TRMM_T) (); 321 typedef F_VOID_FCT (*TRSM_T) (); 322 323 #endif 324 325 typedef struct 326 { 327 char type; /* Encoding of the data type */ 328 int usiz; /* length in bytes of elementary data type */ 329 int size; /* length in bytes of data type */ 330 331 char * zero, 332 * one, 333 * negone; /* pointers to contants of correct type */ 334 335 GESD2D_T Cgesd2d; /* BLACS functions */ 336 GERV2D_T Cgerv2d; 337 GEBS2D_T Cgebs2d; 338 GEBR2D_T Cgebr2d; 339 GSUM2D_T Cgsum2d; 340 341 MMADD_T Fmmadd; /* Addition functions */ 342 MMADD_T Fmmcadd; 343 MMADD_T Fmmtadd; 344 MMADD_T Fmmtcadd; 345 MMADD_T Fmmdda; 346 MMADD_T Fmmddac; 347 MMADD_T Fmmddat; 348 MMADD_T Fmmddact; 349 350 MMSHFT_T Fcshft; /* Shift functions */ 351 MMSHFT_T Frshft; 352 353 VVDOT_T Fvvdotu; /* Dot functions */ 354 VVDOT_T Fvvdotc; 355 356 TZPAD_T Ftzpad; /* Array pad function */ 357 TZPADCPY_T Ftzpadcpy; 358 VVSET_T Fset; 359 360 TZSCAL_T Ftzscal; /* Scaling functions */ 361 TZSCAL_T Fhescal; 362 TZSCAL_T Ftzcnjg; 363 364 AXPY_T Faxpy; /* Level 1 BLAS */ 365 COPY_T Fcopy; 366 SWAP_T Fswap; 367 368 GEMV_T Fgemv; /* Level 2 BLAS */ 369 SYMV_T Fsymv; 370 HEMV_T Fhemv; 371 TRMV_T Ftrmv; 372 TRSV_T Ftrsv; 373 374 AGEMV_T Fagemv; 375 ASYMV_T Fasymv; 376 AHEMV_T Fahemv; 377 ATRMV_T Fatrmv; 378 379 GERC_T Fgerc; 380 GERU_T Fgeru; 381 SYR_T Fsyr; 382 HER_T Fher; 383 SYR2_T Fsyr2; 384 HER2_T Fher2; 385 386 GEMM_T Fgemm; /* Level 3 BLAS */ 387 SYMM_T Fsymm; 388 HEMM_T Fhemm; 389 SYRK_T Fsyrk; 390 HERK_T Fherk; 391 SYR2K_T Fsyr2k; 392 HER2K_T Fher2k; 393 TRMM_T Ftrmm; 394 TRSM_T Ftrsm; 395 396 } PBTYP_T; 397 398 #ifdef __STDC__ 399 400 typedef void (*TZSYR_T) ( PBTYP_T *, char *, int, 401 int, int, int, 402 char *, char *, int, 403 char *, int, char *, 404 int ); 405 typedef void (*TZSYR2_T) ( PBTYP_T *, char *, int, 406 int, int, int, 407 char *, char *, int, 408 char *, int, char *, 409 int, char *, int, 410 char *, int ); 411 typedef void (*TZTRM_T) ( PBTYP_T *, char *, char *, 412 char *, char *, int, 413 int, int, int, 414 char *, char *, int, 415 char *, int, char *, 416 int ); 417 typedef void (*TZSYM_T) ( PBTYP_T *, char *, char *, 418 int, int, int, 419 int, char *, char *, 420 int, char *, int, 421 char *, int, char *, 422 int, char *, int ); 423 #else 424 425 typedef void (*TZSYR_T) (); 426 typedef void (*TZSYR2_T) (); 427 typedef void (*TZTRM_T) (); 428 typedef void (*TZSYM_T) (); 429 430 #endif 431 432 typedef struct 433 { 434 int offd; /* Global diagonal offset */ 435 int lcmt00; /* LCM value of first block */ 436 437 int mp; /* Local number of rows */ 438 int imb1; /* Size of first row block (global) */ 439 int imbloc; /* Size of first local row block */ 440 int mb; /* Row block size */ 441 int lmbloc; /* Size of last local row block */ 442 int mblks; /* Number of local row blocks */ 443 int iupp; /* LCM row bound for first diagonal block */ 444 int upp; /* LCM row bound for diagonal block */ 445 int prow; /* Relative row process coordinate */ 446 int nprow; /* Number of process rows */ 447 448 int nq; /* Local number of columns */ 449 int inb1; /* Size of first column block (global) */ 450 int inbloc; /* Size of first local column block */ 451 int nb; /* Column block size */ 452 int lnbloc; /* Size of last local column block */ 453 int nblks; /* Number of local column blocks */ 454 int ilow; /* LCM column bound for first diagonal block */ 455 int low; /* LCM column bound for diagonal block */ 456 int pcol; /* Relative column process coordinate */ 457 int npcol; /* Number of process columns */ 458 459 int lcmb; /* Least common multiple of nprow * mb and npcol * nb */ 460 461 } PB_VM_T; 462 463 /* 464 * --------------------------------------------------------------------- 465 * #define macro constants 466 * --------------------------------------------------------------------- 467 */ 468 #define INT 'I' /* type identifiers */ 469 #define SREAL 'S' 470 #define DREAL 'D' 471 #define SCPLX 'C' 472 #define DCPLX 'Z' 473 474 #define crot_ CROT 475