1 /* ---------------------------------------------------------------------
2 *
3 *  -- PBLAS routine (version 2.0) --
4 *     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 *     and University of California, Berkeley.
6 *     April 1, 1998
7 *
8 *  ---------------------------------------------------------------------
9 */
10 /*
11 *  This file includes the standard C libraries, as well as system depen-
12 *  dent include files. All PBLAS routines include this file.
13 *
14 *  ---------------------------------------------------------------------
15 *  Machine Specific PBLAS macros
16 *  ---------------------------------------------------------------------
17 */
18 #define    _HAL_               0
19 #define    _T3D_               1
20 #define    _T3E_               2
21 
22 #ifdef T3D
23 #define    _MACH_              _T3D_
24 #endif
25 #ifdef T3E
26 #define    _MACH_              _T3E_
27 #endif
28 #ifndef _MACH_
29 #define    _MACH_              _HAL_
30 #endif
31 /*
32 *  CBRATIO is the ratio of the transfer cost per element for the combine
33 *  sum to one process and the broadcast operation.  This  value  is used
34 *  within the Level 3 PBLAS routines to decide on which algorithm to se-
35 *  lect.
36 */
37 #define    CBRATIO             1.3
38 /*
39 *  ---------------------------------------------------------------------
40 *  Include files
41 *  ---------------------------------------------------------------------
42 */
43 #include <stdio.h>
44 #include <stdlib.h>
45 
46 #ifdef __STDC__
47 #include <stdarg.h>
48 #else
49 #include <varargs.h>
50 #endif
51 
52 #if( ( _MACH_ == _T3D_ ) || ( _MACH_ == _T3E_ ) )
53 #include <fortran.h>
54 #endif
55 /*
56 *  ---------------------------------------------------------------------
57 *  FORTRAN <-> C interface
58 *  ---------------------------------------------------------------------
59 *
60 *  These macros identifies how the PBLAS will be called as follows:
61 *
62 *  _F2C_ADD_: the FORTRAN compiler expects the name of C functions to be
63 *  in all lower case and to have an underscore postfixed it (Suns, Intel
64 *  compilers expect this).
65 *
66 *  _F2C_NOCHANGE: the FORTRAN compiler expects the name of  C  functions
67 *  to be in all lower case (IBM RS6K compilers do this).
68 *
69 *  _F2C_UPCASE: the  FORTRAN  compiler expects the name of  C  functions
70 *  to be in all upcase. (Cray compilers expect this).
71 *
72 *  _F2C_F77ISF2C: the  FORTRAN  compiler in use is f2c, a  FORTRAN  to C
73 *  converter.
74 */
75 #define    _F2C_ADD_           0
76 #define    _F2C_NOCHANGE       1
77 #define    _F2C_UPCASE         2
78 #define    _F2C_F77ISF2C       3
79 
80 #ifdef UpCase
81 #define    _F2C_CALL_          _F2C_UPCASE
82 #endif
83 
84 #ifdef NoChange
85 #define    _F2C_CALL_          _F2C_NOCHANGE
86 #endif
87 
88 #ifdef Add_
89 #define    _F2C_CALL_          _F2C_ADD_
90 #endif
91 
92 #ifdef f77IsF2C
93 #define    _F2C_CALL_          _F2C_F77ISF2C
94 #endif
95 
96 #ifndef _F2C_CALL_
97 #define    _F2C_CALL_          _F2C_ADD_
98 #endif
99 /*
100 *  ---------------------------------------------------------------------
101 *  TYPE DEFINITIONS AND CONVERSION UTILITIES
102 *  ---------------------------------------------------------------------
103 */
104 #if( ( _MACH_ == _T3D_ ) || ( _MACH_ == _T3E_ ) )
105 
106 #define    float               double
107                       /* Type of character argument in a FORTRAN call */
108 #define    F_CHAR_T            _fcd
109                                     /* Character conversion utilities */
110 #define    F2C_CHAR(a)         ( _fcdtocp( (a) ) )
111 #define    C2F_CHAR(a)         ( _cptofcd( (a), 1 ) )
112                                          /* Type of FORTRAN functions */
113 #define    F_VOID_FCT          void   fortran           /* Subroutine */
114 #define    F_INTG_FCT          int    fortran     /* INTEGER function */
115 
116 #else                 /* Type of character argument in a FORTRAN call */
117 
118 typedef    char *              F_CHAR_T;
119                                     /* Character conversion utilities */
120 #define    F2C_CHAR(a)            (a)
121 #define    C2F_CHAR(a)            (a)
122                                          /* Type of FORTRAN functions */
123 #define    F_VOID_FCT             void                  /* Subroutine */
124 #define    F_INTG_FCT             int             /* INTEGER function */
125 
126 #endif
127 /*
128 * ----------------------------------------------------------------------
129 *  #typedef definitions
130 *  ---------------------------------------------------------------------
131 */
132 typedef    float               cmplx  [2];
133 typedef    double              cmplx16[2];
134 
135 #define    REAL_PART           0
136 #define    IMAG_PART           1
137 
138 #ifdef __STDC__
139 
140 typedef void           (*GESD2D_T)   ( int,       int,       int,
141                                        char *,    int,       int,
142                                        int );
143 typedef void           (*GERV2D_T)   ( int,       int,       int,
144                                        char *,    int,       int,
145                                        int );
146 typedef void           (*GEBS2D_T)   ( int,       char *,    char *,
147                                        int,       int,       char *,
148                                        int );
149 typedef void           (*GEBR2D_T)   ( int,       char *,    char *,
150                                        int,       int,       char *,
151                                        int,       int,       int );
152 typedef void           (*GSUM2D_T)   ( int,       char *,    char *,
153                                        int,       int,       char *,
154                                        int,       int,       int );
155 
156 typedef F_VOID_FCT     (*MMADD_T)    ( int  *,    int  *,    char *,
157                                        char *,    int  *,    char *,
158                                        char *,    int  * );
159 typedef F_VOID_FCT     (*MMSHFT_T)   ( int  *,    int  *,    int *,
160                                        char *,    int  * );
161 typedef F_VOID_FCT     (*VVDOT_T)    ( int  *,    char *,    char *,
162                                        int  *,    char *,    int  * );
163 typedef F_VOID_FCT     (*VVSET_T)    ( int  *,    char *,    char *,
164                                        int  * );
165 typedef F_VOID_FCT     (*TZPAD_T)    ( F_CHAR_T,  F_CHAR_T,  int  *,
166                                        int  *,    int  *,    char *,
167                                        char *,    char *,    int  * );
168 typedef F_VOID_FCT     (*TZPADCPY_T) ( F_CHAR_T,  F_CHAR_T,  int  *,
169                                        int  *,    int  *,    char *,
170                                        int *,     char *,    int  * );
171 typedef F_VOID_FCT     (*TZSET_T)    ( F_CHAR_T,  int  *,    int  *,
172                                        int  *,    char *,    char *,
173                                        char *,    int  * );
174 typedef F_VOID_FCT     (*TZSCAL_T)   ( F_CHAR_T,  int *,     int  *,
175                                        int  *,    char *,    char *,
176                                        int  * );
177 
178 typedef F_VOID_FCT     (*AXPY_T)     ( int *,     char *,    char *,
179                                        int *,     char *,    int * );
180 typedef F_VOID_FCT     (*COPY_T)     ( int *,     char *,    int *,
181                                        char *,    int * );
182 typedef F_VOID_FCT     (*SWAP_T)     ( int *,     char *,    int *,
183                                        char *,    int * );
184 
185 typedef F_VOID_FCT     (*GEMV_T)     ( F_CHAR_T,  int *,     int *,
186                                        char *,    char *,    int *,
187                                        char *,    int *,     char *,
188                                        char *,    int * );
189 typedef F_VOID_FCT     (*AGEMV_T)    ( F_CHAR_T,  int *,     int *,
190                                        char *,    char *,    int *,
191                                        char *,    int *,     char *,
192                                        char *,    int * );
193 typedef F_VOID_FCT     (*SYMV_T)     ( F_CHAR_T,  int *,     char *,
194                                        char *,    int *,     char *,
195                                        int *,     char *,    char *,
196                                        int * );
197 typedef F_VOID_FCT     (*ASYMV_T)    ( F_CHAR_T,  int *,     char *,
198                                        char *,    int *,     char *,
199                                        int *,     char *,    char *,
200                                        int * );
201 typedef F_VOID_FCT     (*HEMV_T)     ( F_CHAR_T,  int *,     char *,
202                                        char *,    int *,     char *,
203                                        int *,     char *,    char *,
204                                        int * );
205 typedef F_VOID_FCT     (*AHEMV_T)    ( F_CHAR_T,  int *,     char *,
206                                        char *,    int *,     char *,
207                                        int *,     char *,    char *,
208                                        int * );
209 typedef F_VOID_FCT     (*TRMV_T)     ( F_CHAR_T,  F_CHAR_T,  F_CHAR_T,
210                                        int *,     char *,    int *,
211                                        char *,    int * );
212 typedef F_VOID_FCT     (*ATRMV_T)    ( F_CHAR_T,  F_CHAR_T,  F_CHAR_T,
213                                        int *,     char *,    char *,
214                                        int *,     char *,    int *,
215                                        char *,    char *,    int * );
216 typedef F_VOID_FCT     (*TRSV_T)     ( F_CHAR_T,  F_CHAR_T,  F_CHAR_T,
217                                        int *,     char *,    int *,
218                                        char *,    int * );
219 typedef F_VOID_FCT     (*GERC_T)     ( int *,     int *,     char *,
220                                        char *,    int *,     char *,
221                                        int *,     char *,    int * );
222 typedef F_VOID_FCT     (*GERU_T)     ( int *,     int *,     char *,
223                                        char *,    int *,     char *,
224                                        int *,     char *,    int * );
225 typedef F_VOID_FCT     (*SYR_T)      ( F_CHAR_T,  int *,     char *,
226                                        char *,    int *,     char *,
227                                        int * );
228 typedef F_VOID_FCT     (*HER_T)      ( F_CHAR_T,  int *,     char *,
229                                        char *,    int *,     char *,
230                                        int * );
231 typedef F_VOID_FCT     (*SYR2_T)     ( F_CHAR_T,  int *,     char *,
232                                        char *,    int *,     char *,
233                                        int *,     char *,    int * );
234 typedef F_VOID_FCT     (*HER2_T)     ( F_CHAR_T,  int *,     char *,
235                                        char *,    int *,     char *,
236                                        int *,     char *,    int * );
237 
238 typedef F_VOID_FCT     (*GEMM_T)     ( F_CHAR_T,  F_CHAR_T,  int *,
239                                        int *,     int *,     char *,
240                                        char *,    int *,     char *,
241                                        int *,     char *,    char *,
242                                        int * );
243 typedef F_VOID_FCT     (*SYMM_T)     ( F_CHAR_T,  F_CHAR_T,  int *,
244                                        int *,     char *,    char *,
245                                        int *,     char *,    int *,
246                                        char *,    char *,    int * );
247 typedef F_VOID_FCT     (*HEMM_T)     ( F_CHAR_T,  F_CHAR_T,  int *,
248                                        int *,     char *,    char *,
249                                        int *,     char *,    int *,
250                                        char *,    char *,    int * );
251 typedef F_VOID_FCT     (*SYRK_T)     ( F_CHAR_T,  F_CHAR_T,  int *,
252                                        int *,     char *,    char *,
253                                        int *,     char *,    char *,
254                                        int * );
255 typedef F_VOID_FCT     (*HERK_T)     ( F_CHAR_T,  F_CHAR_T,  int *,
256                                        int *,     char *,    char *,
257                                        int *,     char *,    char *,
258                                        int * );
259 typedef F_VOID_FCT     (*SYR2K_T)    ( F_CHAR_T,  F_CHAR_T,  int *,
260                                        int *,     char *,    char *,
261                                        int *,     char *,    int *,
262                                        char *,    char *,    int * );
263 typedef F_VOID_FCT     (*HER2K_T)    ( F_CHAR_T,  F_CHAR_T,  int *,
264                                        int *,     char *,    char *,
265                                        int *,     char *,    int *,
266                                        char *,    char *,    int * );
267 typedef F_VOID_FCT     (*TRMM_T)     ( F_CHAR_T,  F_CHAR_T,  F_CHAR_T,
268                                        F_CHAR_T,  int *,     int *,
269                                        char *,    char *,    int *,
270                                        char *,    int * );
271 typedef F_VOID_FCT     (*TRSM_T)     ( F_CHAR_T,  F_CHAR_T,  F_CHAR_T,
272                                        F_CHAR_T,  int *,     int *,
273                                        char *,    char *,    int *,
274                                        char *,    int * );
275 
276 #else
277 
278 typedef void           (*GESD2D_T)   ();
279 typedef void           (*GERV2D_T)   ();
280 typedef void           (*GEBS2D_T)   ();
281 typedef void           (*GEBR2D_T)   ();
282 typedef void           (*GSUM2D_T)   ();
283 
284 typedef F_VOID_FCT     (*MMADD_T)    ();
285 typedef F_VOID_FCT     (*MMSHFT_T)   ();
286 typedef F_VOID_FCT     (*VVDOT_T)    ();
287 typedef F_VOID_FCT     (*VVSET_T)    ();
288 typedef F_VOID_FCT     (*TZPAD_T)    ();
289 typedef F_VOID_FCT     (*TZPADCPY_T) ();
290 typedef F_VOID_FCT     (*TZSET_T)    ();
291 typedef F_VOID_FCT     (*TZSCAL_T)   ();
292 
293 typedef F_VOID_FCT     (*AXPY_T)     ();
294 typedef F_VOID_FCT     (*COPY_T)     ();
295 typedef F_VOID_FCT     (*SWAP_T)     ();
296 
297 typedef F_VOID_FCT     (*GEMV_T)     ();
298 typedef F_VOID_FCT     (*AGEMV_T)    ();
299 typedef F_VOID_FCT     (*SYMV_T)     ();
300 typedef F_VOID_FCT     (*ASYMV_T)    ();
301 typedef F_VOID_FCT     (*HEMV_T)     ();
302 typedef F_VOID_FCT     (*AHEMV_T)    ();
303 typedef F_VOID_FCT     (*TRMV_T)     ();
304 typedef F_VOID_FCT     (*ATRMV_T)    ();
305 typedef F_VOID_FCT     (*TRSV_T)     ();
306 typedef F_VOID_FCT     (*GERC_T)     ();
307 typedef F_VOID_FCT     (*GERU_T)     ();
308 typedef F_VOID_FCT     (*SYR_T)      ();
309 typedef F_VOID_FCT     (*HER_T)      ();
310 typedef F_VOID_FCT     (*SYR2_T)     ();
311 typedef F_VOID_FCT     (*HER2_T)     ();
312 
313 typedef F_VOID_FCT     (*GEMM_T)     ();
314 typedef F_VOID_FCT     (*SYMM_T)     ();
315 typedef F_VOID_FCT     (*HEMM_T)     ();
316 typedef F_VOID_FCT     (*SYRK_T)     ();
317 typedef F_VOID_FCT     (*HERK_T)     ();
318 typedef F_VOID_FCT     (*SYR2K_T)    ();
319 typedef F_VOID_FCT     (*HER2K_T)    ();
320 typedef F_VOID_FCT     (*TRMM_T)     ();
321 typedef F_VOID_FCT     (*TRSM_T)     ();
322 
323 #endif
324 
325 typedef struct
326 {
327    char           type;                  /* Encoding of the data type */
328    int            usiz;    /* length in bytes of elementary data type */
329    int            size;               /* length in bytes of data type */
330 
331    char           * zero,
332                   * one,
333                   * negone;   /* pointers to contants of correct type */
334 
335    GESD2D_T       Cgesd2d;                         /* BLACS functions */
336    GERV2D_T       Cgerv2d;
337    GEBS2D_T       Cgebs2d;
338    GEBR2D_T       Cgebr2d;
339    GSUM2D_T       Cgsum2d;
340 
341    MMADD_T        Fmmadd;                       /* Addition functions */
342    MMADD_T        Fmmcadd;
343    MMADD_T        Fmmtadd;
344    MMADD_T        Fmmtcadd;
345    MMADD_T        Fmmdda;
346    MMADD_T        Fmmddac;
347    MMADD_T        Fmmddat;
348    MMADD_T        Fmmddact;
349 
350    MMSHFT_T       Fcshft;                          /* Shift functions */
351    MMSHFT_T       Frshft;
352 
353    VVDOT_T        Fvvdotu;                           /* Dot functions */
354    VVDOT_T        Fvvdotc;
355 
356    TZPAD_T        Ftzpad;                       /* Array pad function */
357    TZPADCPY_T     Ftzpadcpy;
358    VVSET_T        Fset;
359 
360    TZSCAL_T       Ftzscal;                       /* Scaling functions */
361    TZSCAL_T       Fhescal;
362    TZSCAL_T       Ftzcnjg;
363 
364    AXPY_T         Faxpy;                              /* Level 1 BLAS */
365    COPY_T         Fcopy;
366    SWAP_T         Fswap;
367 
368    GEMV_T         Fgemv;                              /* Level 2 BLAS */
369    SYMV_T         Fsymv;
370    HEMV_T         Fhemv;
371    TRMV_T         Ftrmv;
372    TRSV_T         Ftrsv;
373 
374    AGEMV_T        Fagemv;
375    ASYMV_T        Fasymv;
376    AHEMV_T        Fahemv;
377    ATRMV_T        Fatrmv;
378 
379    GERC_T         Fgerc;
380    GERU_T         Fgeru;
381    SYR_T          Fsyr;
382    HER_T          Fher;
383    SYR2_T         Fsyr2;
384    HER2_T         Fher2;
385 
386    GEMM_T         Fgemm;                              /* Level 3 BLAS */
387    SYMM_T         Fsymm;
388    HEMM_T         Fhemm;
389    SYRK_T         Fsyrk;
390    HERK_T         Fherk;
391    SYR2K_T        Fsyr2k;
392    HER2K_T        Fher2k;
393    TRMM_T         Ftrmm;
394    TRSM_T         Ftrsm;
395 
396 } PBTYP_T;
397 
398 #ifdef __STDC__
399 
400 typedef void           (*TZSYR_T)    ( PBTYP_T *, char *,    int,
401                                        int,       int,       int,
402                                        char *,    char *,    int,
403                                        char *,    int,       char *,
404                                        int );
405 typedef void           (*TZSYR2_T)   ( PBTYP_T *, char *,    int,
406                                        int,       int,       int,
407                                        char *,    char *,    int,
408                                        char *,    int,       char *,
409                                        int,       char *,    int,
410                                        char *,    int );
411 typedef void           (*TZTRM_T)    ( PBTYP_T *, char *,    char *,
412                                        char *,    char *,    int,
413                                        int,       int,       int,
414                                        char *,    char *,    int,
415                                        char *,    int,       char *,
416                                        int );
417 typedef void           (*TZSYM_T)    ( PBTYP_T *, char *,    char *,
418                                        int,       int,       int,
419                                        int,       char *,    char *,
420                                        int,       char *,    int,
421                                        char *,    int,       char *,
422                                        int,       char *,    int );
423 #else
424 
425 typedef void           (*TZSYR_T)    ();
426 typedef void           (*TZSYR2_T)   ();
427 typedef void           (*TZTRM_T)    ();
428 typedef void           (*TZSYM_T)    ();
429 
430 #endif
431 
432 typedef struct
433 {
434    int offd;                                /* Global diagonal offset */
435    int lcmt00;                            /* LCM value of first block */
436 
437    int mp;                                    /* Local number of rows */
438    int imb1;                      /* Size of first row block (global) */
439    int imbloc;                       /* Size of first local row block */
440    int mb;                                          /* Row block size */
441    int lmbloc;                        /* Size of last local row block */
442    int mblks;                           /* Number of local row blocks */
443    int iupp;                /* LCM row bound for first diagonal block */
444    int upp;                       /* LCM row bound for diagonal block */
445    int prow;                       /* Relative row process coordinate */
446    int nprow;                               /* Number of process rows */
447 
448    int nq;                                 /* Local number of columns */
449    int inb1;                   /* Size of first column block (global) */
450    int inbloc;                    /* Size of first local column block */
451    int nb;                                       /* Column block size */
452    int lnbloc;                     /* Size of last local column block */
453    int nblks;                        /* Number of local column blocks */
454    int ilow;             /* LCM column bound for first diagonal block */
455    int low;                    /* LCM column bound for diagonal block */
456    int pcol;                    /* Relative column process coordinate */
457    int npcol;                            /* Number of process columns */
458 
459    int lcmb;    /* Least common multiple of nprow * mb and npcol * nb */
460 
461 } PB_VM_T;
462 
463 /*
464 *  ---------------------------------------------------------------------
465 *  #define macro constants
466 *  ---------------------------------------------------------------------
467 */
468 #define    INT                 'I'                /* type identifiers */
469 #define    SREAL               'S'
470 #define    DREAL               'D'
471 #define    SCPLX               'C'
472 #define    DCPLX               'Z'
473 
474 #define crot_ CROT
475