1 /* ---------------------------------------------------------------------
2 *
3 *  -- PBLAS auxiliary routine (version 2.0) --
4 *     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 *     and University of California, Berkeley.
6 *     April 1, 1998
7 *
8 *  ---------------------------------------------------------------------
9 */
10 /*
11 *  Include files
12 */
13 #include "../pblas.h"
14 #include "../PBpblas.h"
15 #include "../PBtools.h"
16 #include "../PBblacs.h"
17 #include "../PBblas.h"
18 
19 #ifdef __STDC__
PB_Cptrm(PBTYP_T * TYPE,PBTYP_T * UTYP,char * SIDE,char * UPLO,char * TRANS,char * DIAG,int N,int K,char * ALPHA,char * A,int IA,int JA,int * DESCA,char * X,int LDX,char * Y,int LDY,TZTRM_T TRM)20 void PB_Cptrm( PBTYP_T * TYPE, PBTYP_T * UTYP, char * SIDE, char * UPLO,
21                char * TRANS, char * DIAG, int N, int K, char * ALPHA,
22                char * A, int IA, int JA, int * DESCA,
23                char * X, int LDX, char * Y, int LDY,
24                TZTRM_T TRM )
25 #else
26 void PB_Cptrm( TYPE, UTYP, SIDE, UPLO, TRANS, DIAG, N, K, ALPHA, A,
27                IA, JA, DESCA, X, LDX, Y, LDY, TRM )
28 /*
29 *  .. Scalar Arguments ..
30 */
31    char           * DIAG, * SIDE, * TRANS, * UPLO;
32    int            IA, JA, K, LDX, LDY, N;
33    char           * ALPHA;
34    PBTYP_T        * TYPE, * UTYP;
35    TZTRM_T        TRM;
36 /*
37 *  .. Array Arguments ..
38 */
39    int            * DESCA;
40    char           * A, * X, * Y;
41 #endif
42 {
43 /*
44 *  Purpose
45 *  =======
46 *
47 *  PB_Cptrm  performs a triangular matrix-matrix or matrix-vector multi-
48 *  plication.  In the following, sub( A )  denotes the triangular subma-
49 *  trix operand A( IA:IA+N-1, JA:JA+N-1 ).
50 *
51 *  Notes
52 *  =====
53 *
54 *  A description  vector  is associated with each 2D block-cyclicly dis-
55 *  tributed matrix.  This  vector  stores  the  information  required to
56 *  establish the  mapping  between a  matrix entry and its corresponding
57 *  process and memory location.
58 *
59 *  In  the  following  comments,   the character _  should  be  read  as
60 *  "of  the  distributed  matrix".  Let  A  be a generic term for any 2D
61 *  block cyclicly distributed matrix.  Its description vector is DESC_A:
62 *
63 *  NOTATION         STORED IN       EXPLANATION
64 *  ---------------- --------------- ------------------------------------
65 *  DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
66 *  CTXT_A  (global) DESCA[ CTXT_  ] The BLACS context handle, indicating
67 *                                   the NPROW x NPCOL BLACS process grid
68 *                                   A  is  distributed over. The context
69 *                                   itself  is  global,  but  the handle
70 *                                   (the integer value) may vary.
71 *  M_A     (global) DESCA[ M_     ] The  number of rows in the distribu-
72 *                                   ted matrix A, M_A >= 0.
73 *  N_A     (global) DESCA[ N_     ] The number of columns in the distri-
74 *                                   buted matrix A, N_A >= 0.
75 *  IMB_A   (global) DESCA[ IMB_   ] The number of rows of the upper left
76 *                                   block of the matrix A, IMB_A > 0.
77 *  INB_A   (global) DESCA[ INB_   ] The  number  of columns of the upper
78 *                                   left   block   of   the  matrix   A,
79 *                                   INB_A > 0.
80 *  MB_A    (global) DESCA[ MB_    ] The blocking factor used to  distri-
81 *                                   bute the last  M_A-IMB_A  rows of A,
82 *                                   MB_A > 0.
83 *  NB_A    (global) DESCA[ NB_    ] The blocking factor used to  distri-
84 *                                   bute the last  N_A-INB_A  columns of
85 *                                   A, NB_A > 0.
86 *  RSRC_A  (global) DESCA[ RSRC_  ] The process row over which the first
87 *                                   row of the matrix  A is distributed,
88 *                                   NPROW > RSRC_A >= 0.
89 *  CSRC_A  (global) DESCA[ CSRC_  ] The  process column  over  which the
90 *                                   first column of  A  is  distributed.
91 *                                   NPCOL > CSRC_A >= 0.
92 *  LLD_A   (local)  DESCA[ LLD_   ] The  leading dimension  of the local
93 *                                   array  storing  the  local blocks of
94 *                                   the distributed matrix A,
95 *                                   IF( Lc( 1, N_A ) > 0 )
96 *                                      LLD_A >= MAX( 1, Lr( 1, M_A ) )
97 *                                   ELSE
98 *                                      LLD_A >= 1.
99 *
100 *  Let K be the number of  rows of a matrix A starting at the global in-
101 *  dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
102 *  that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
103 *  receive if these K rows were distributed over NPROW processes.  If  K
104 *  is the number of columns of a matrix  A  starting at the global index
105 *  JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number  of co-
106 *  lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would  receive if
107 *  these K columns were distributed over NPCOL processes.
108 *
109 *  The values of Lr() and Lc() may be determined via a call to the func-
110 *  tion PB_Cnumroc:
111 *  Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
112 *  Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
113 *
114 *  Arguments
115 *  =========
116 *
117 *  TYPE    (local input) pointer to a PBTYP_T structure
118 *          On entry,  TYPE  is a pointer to a structure of type PBTYP_T,
119 *          that contains type information (See pblas.h).
120 *
121 *  UTYP    (local input) pointer to a PBTYP_T structure
122 *          On entry,  UTYP  is a pointer to a structure of type PBTYP_T,
123 *          that contains type information for the Y's (See pblas.h).
124 *
125 *  SIDE    (global input) pointer to CHAR
126 *          On entry,  SIDE  specifies whether  op( sub( A ) ) multiplies
127 *          its operand X from the left or right as follows:
128 *
129 *          SIDE = 'L' or 'l'  Y := alpha*op( sub( A ) )*X + Y,
130 *
131 *          SIDE = 'R' or 'r'  Y := alpha*X*op( sub( A ) ) + Y.
132 *
133 *  UPLO    (global input) pointer to CHAR
134 *          On entry,  UPLO  specifies whether the submatrix  sub( A ) is
135 *          an upper or lower triangular submatrix as follows:
136 *
137 *             UPLO = 'U' or 'u'   sub( A ) is an upper triangular
138 *                                 submatrix,
139 *
140 *             UPLO = 'L' or 'l'   sub( A ) is a  lower triangular
141 *                                 submatrix.
142 *
143 *  TRANS   (global input) pointer to CHAR
144 *          On entry,  TRANS  specifies the  operation to be performed as
145 *          follows:
146 *
147 *             TRANS = 'N' or 'n'  Y := alpha * sub( A )  * X + Y,
148 *
149 *             TRANS = 'T' or 't'  Y := alpha * sub( A )' * X + Y,
150 *
151 *             TRANS = 'C' or 'c'  Y := alpha * sub( A )' * X + Y, or
152 *                                 Y := alpha * conjg(sub( A )') * X + Y.
153 *
154 *  DIAG    (global input) pointer to CHAR
155 *          On entry,  DIAG  specifies  whether or not  sub( A )  is unit
156 *          triangular as follows:
157 *
158 *             DIAG = 'U' or 'u'  sub( A )  is  assumed to be unit trian-
159 *                                gular,
160 *
161 *             DIAG = 'N' or 'n'  sub( A ) is not assumed to be unit tri-
162 *                                angular.
163 *
164 *  N       (global input) INTEGER
165 *          On entry,  N specifies the order of the  submatrix  sub( A ).
166 *          N must be at least zero.
167 *
168 *  K       (global input) INTEGER
169 *          On entry, K  specifies the local number of columns of the lo-
170 *          cal array X and the local number of rows of  the  local array
171 *          Y when SIDE is 'L' or 'l' and TRANS is 'N' or 'n', or SIDE is
172 *          'R' or 'r' and  TRANS  is 'T', 't', 'C' or 'c'.  Otherwise, K
173 *          specifies  the  local number of rows of the local array X and
174 *          the local number of columns of the local array Y. K mut be at
175 *          least zero.
176 *
177 *  ALPHA   (global input) pointer to CHAR
178 *          On entry, ALPHA specifies the scalar alpha.
179 *
180 *  A       (local input) pointer to CHAR
181 *          On entry, A is an array of dimension (LLD_A, Ka), where Ka is
182 *          at least Lc( 1, JA+N-1 ).  Before  entry, this array contains
183 *          the local entries of the matrix A.
184 *          Before  entry  with  UPLO = 'U' or 'u', this  array  contains
185 *          the local entries corresponding to the upper triangular  part
186 *          of the  triangular submatrix  sub( A ), and the local entries
187 *          corresponding to the  strictly lower triangular  of  sub( A )
188 *          are not  referenced.
189 *          Before  entry  with  UPLO = 'L' or 'l', this  array  contains
190 *          the local entries corresponding to the lower triangular  part
191 *          of the  triangular submatrix  sub( A ), and the local entries
192 *          corresponding to the  strictly upper triangular  of  sub( A )
193 *          are not  referenced.
194 *          Note  that  when DIAG = 'U' or 'u', the local entries corres-
195 *          ponding to the  diagonal elements  of the submatrix  sub( A )
196 *          are not referenced either, but are assumed to be unity.
197 *
198 *  IA      (global input) INTEGER
199 *          On entry, IA  specifies A's global row index, which points to
200 *          the beginning of the submatrix sub( A ).
201 *
202 *  JA      (global input) INTEGER
203 *          On entry, JA  specifies A's global column index, which points
204 *          to the beginning of the submatrix sub( A ).
205 *
206 *  DESCA   (global and local input) INTEGER array
207 *          On entry, DESCA  is an integer array of dimension DLEN_. This
208 *          is the array descriptor for the matrix A.
209 *
210 *  X       (local input) pointer to CHAR
211 *          On entry,  X  is  an array of dimension (LDX,Kx), where Kx is
212 *          at least Lc( JA, N ) when SIDE is 'L' or 'l' and TRANS is 'N'
213 *          or 'n', or SIDE is 'R' or 'r' and  TRANS  is 'T', 't', 'C' or
214 *          'c', and K otherwise. Before  entry, this array contains  the
215 *          local entries of the matrix X.
216 *
217 *  LDX     (local input) INTEGER
218 *          On entry, LDX specifies the leading dimension of the array X.
219 *          LDX must be at least K when SIDE is 'L' or 'l' and  TRANS  is
220 *          'N' or 'n', or SIDE is 'R' or 'r' and TRANS  is 'T', 't', 'C'
221 *          or 'c', and max( 1, Lp( IA, N ) ) otherwise.
222 *
223 *  Y       (local input/local output) pointer to CHAR
224 *          On entry, Y is an array of dimension ( LDY, Ky ), where Ky is
225 *          at least max( 1, K ) when SIDE is 'L' or 'l' and TRANS is 'N'
226 *          or 'n', or SIDE is 'R' or 'r' and  TRANS  is 'T', 't', 'C' or
227 *          'c', and max( 1, Lc( JA, N ) ) otherwise. Before  entry, this
228 *          array contains the local entries of the matrix  Y.  On  exit,
229 *          this array contains the updated vector Y.
230 *
231 *  LDY     (local input) INTEGER
232 *          On entry, LDY specifies the leading dimension of the array Y.
233 *          LDY  must  be at least max( 1, Lr( IA, N ) ) when SIDE is 'L'
234 *          or 'l' and  TRANS  is 'N' or 'n', or  SIDE is 'R' or 'r' and
235 *          TRANS  is 'T', 't', 'C' or 'c', and max( 1, K ) otherwise.
236 *
237 *  TRM     (local input) pointer to function of type TZTRM_T
238 *          On entry, TRM  specifies the function performing the triangu-
239 *          lar multiplication of a single block.
240 *
241 *  -- Written on April 1, 1998 by
242 *     Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
243 *
244 *  ---------------------------------------------------------------------
245 */
246 /*
247 *  .. Local Scalars ..
248 */
249    int            Acol, Arow, Aii, Aimb1, Ainb1, Ajj, Ald, Amp, Amb, Anb, Anq,
250                   Aoffi, Aoffj, Arcol, Arrow, GoEast, GoSouth, IsColRepl,
251                   IsRowRepl, Xinc, Yinc, XYii=0, XYjj=0, XYoffi=-1, XYoffj=-1,
252                   XisRow, iimax, ilow, imbloc, inbloc, ioffd, ioffxy, iupp,
253                   jjmax, joffd, joffxy, lcmt, lcmt00, lmbloc, lnbloc, low,
254                   lower, m1, mbloc, mblkd, mblks, mycol, myrow, n1, nbloc,
255                   nblkd, nblks, npcol, nprow, pmb, qnb, size, tmp1, upp, upper;
256 /* ..
257 *  .. Executable Statements ..
258 *
259 */
260 /*
261 *  Quick return if possible
262 */
263    if( N <= 0 ) return;
264 /*
265 *  Retrieve process grid information
266 */
267    Cblacs_gridinfo( DESCA[CTXT_], &nprow, &npcol, &myrow, &mycol );
268 /*
269 *  Retrieve sub( A )'s local information: Aii, Ajj, Arow, Acol ...
270 */
271    PB_Cainfog2l( N, N, IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aimb1,
272                  &Ainb1, &Amp, &Anq, &Aii, &Ajj, &Arow, &Acol, &Arrow, &Arcol );
273 /*
274 *  Quick return if I don't own any of sub( A ) or if sub( A ) is replicated in
275 *  all processes.
276 */
277    if( ( Amp <= 0 ) || ( Anq <= 0 ) ) return;
278 
279    IsRowRepl = ( ( Arow < 0 ) || ( nprow == 1 ) );
280    IsColRepl = ( ( Acol < 0 ) || ( npcol == 1 ) );
281    Amb  = DESCA[ MB_ ]; Anb  = DESCA[ NB_ ]; Ald = DESCA[ LLD_ ];
282    size = TYPE->size;
283 
284    if( IsRowRepl && IsColRepl )
285    {
286       TRM( TYPE, SIDE, UPLO, TRANS, DIAG, Amp, Anq, K, 0, ALPHA,
287            Mptr( A, Aii, Ajj, Ald, size ), Ald, X, LDX, Y, LDY );
288       return;
289    }
290 
291    if( Mupcase( SIDE[0] ) == CLEFT )
292    {
293       if( Mupcase( TRANS[0] ) == CNOTRAN )
294            { XisRow = 1; Xinc = LDX * size; Yinc = UTYP->size; }
295       else { XisRow = 0; Xinc = size; Yinc = LDY * UTYP->size; }
296    }
297    else
298    {
299       if( Mupcase( TRANS[0] ) == CNOTRAN )
300            { XisRow = 0; Xinc = size; Yinc = LDY * UTYP->size; }
301       else { XisRow = 1; Xinc = LDX * size; Yinc = UTYP->size; }
302    }
303    upper = ( Mupcase( UPLO[0] ) == CUPPER );
304    lower = ( Mupcase( UPLO[0] ) == CLOWER );
305 /*
306 *  Initialize lcmt00, mblks, nblks, imbloc, inbloc, lmbloc, lnbloc, ilow, low,
307 *  iupp, and upp.
308 */
309    PB_Cbinfo( 0, Amp, Anq, Aimb1, Ainb1, Amb, Anb, Arrow, Arcol, &lcmt00,
310               &mblks, &nblks, &imbloc, &inbloc, &lmbloc, &lnbloc, &ilow, &low,
311               &iupp, &upp );
312 
313    iimax = ( Aoffi = Aii - 1 ) + ( m1 = Amp );
314    jjmax = ( Aoffj = Ajj - 1 ) + ( n1 = Anq );
315    pmb   = ( IsRowRepl ? Amb : nprow * Amb );
316    qnb   = ( IsColRepl ? Anb : npcol * Anb );
317 /*
318 *  Handle separately the first row and/or column of the LCM table. Update the
319 *  LCM value of the curent block lcmt00, as well as the number of rows and
320 *  columns mblks and nblks remaining in the LCM table.
321 */
322    GoSouth = ( lcmt00 > iupp );
323    GoEast  = ( lcmt00 < ilow );
324 
325    if( XisRow )
326    {
327 /*
328 *  Go through the table looking for blocks owning diagonal entries.
329 */
330       if( ( !( GoSouth ) ) && ( !( GoEast ) ) )
331       {
332 /*
333 *  The upper left block owns diagonal entries lcmt00 >= ilow && lcmt00 <= iupp
334 */
335          TRM( TYPE, SIDE, UPLO, TRANS, DIAG, imbloc, inbloc, K, lcmt00, ALPHA,
336               Mptr( A, Aii, Ajj, Ald, size ), Ald, X+XYjj*Xinc, LDX,
337               Y+XYii*Yinc, LDY );
338 /*
339 *  Decide whether one should go south or east in the table: Go east if
340 *  the block below the current one only owns lower entries. If this block,
341 *  however, owns diagonals, then go south.
342 */
343          GoSouth = !( GoEast = ( ( lcmt00 - ( iupp - upp + pmb ) ) < ilow ) );
344 
345          if( GoSouth )
346          {
347 /*
348 *  When the upper triangular part of sub( A ) should be operated with and
349 *  one is planning to go south in the table, it is neccessary to take care
350 *  of the remaining columns of these imbloc rows immediately.
351 */
352             if( upper && ( Anq > inbloc ) )
353             {
354                tmp1 = Anq - inbloc;
355                TRM( TYPE, SIDE, ALL, TRANS, DIAG, imbloc, tmp1, K, 0, ALPHA,
356                     Mptr( A, Aii, Ajj+inbloc, Ald, size ), Ald,
357                     X+(XYjj+inbloc)*Xinc, LDX, Y+XYii*Yinc, LDY );
358             }
359             Aii += imbloc; XYii += imbloc; m1 -= imbloc;
360          }
361          else
362          {
363 /*
364 *  When the lower triangular part of sub( A ) should be operated with and
365 *  one is planning to go east in the table, it is neccessary to take care
366 *  of the remaining rows of these inbloc columns immediately.
367 */
368             if( lower && ( Amp > imbloc ) )
369             {
370                tmp1 = Amp - imbloc;
371                TRM( TYPE, SIDE, ALL, TRANS, DIAG, tmp1, inbloc, K, 0, ALPHA,
372                     Mptr( A, Aii+imbloc, Ajj, Ald, size ), Ald, X+XYjj*Xinc,
373                     LDX, Y+(XYii+imbloc)*Yinc, LDY );
374             }
375             Ajj += inbloc; XYjj += inbloc; n1 -= inbloc;
376          }
377       }
378 
379       if( GoSouth )
380       {
381 /*
382 *  Go one step south in the LCM table. Adjust the current LCM value as well as
383 *  the local row indexes in A and XC.
384 */
385          lcmt00 -= ( iupp - upp + pmb ); mblks--;
386          Aoffi  += imbloc; XYoffi += imbloc;
387 /*
388 *  While there are blocks remaining that own upper entries, keep going south.
389 *  Adjust the current LCM value as well as the local row indexes in A and XC.
390 */
391          while( ( mblks > 0 ) && ( lcmt00 > upp ) )
392          { lcmt00 -= pmb; mblks--; Aoffi += Amb; XYoffi += Amb; }
393 /*
394 *  Operate with the upper triangular part of sub( A ) we just skipped when
395 *  necessary.
396 */
397          tmp1 = MIN( Aoffi, iimax ) - Aii + 1;
398          if( upper && ( tmp1 > 0 ) )
399          {
400             TRM( TYPE, SIDE, ALL, TRANS, DIAG, tmp1, n1, K, 0, ALPHA,
401                  Mptr( A, Aii, Aoffj+1, Ald, size ), Ald, X+(XYoffj+1)*Xinc,
402                  LDX, Y+XYii*Yinc, LDY );
403             Aii += tmp1; XYii += tmp1; m1  -= tmp1;
404          }
405 /*
406 *  Return if no more row in the LCM table.
407 */
408          if( mblks <= 0 ) return;
409 /*
410 *  lcmt00 <= upp. The current block owns either diagonals or lower entries.
411 *  Save the current position in the LCM table. After this column has been
412 *  completely taken care of, re-start from this row and the next column of
413 *  the LCM table.
414 */
415          lcmt = lcmt00; mblkd = mblks; ioffd = Aoffi; ioffxy = XYoffi;
416 
417          mbloc = Amb;
418          while( ( mblkd > 0 ) && ( lcmt >= ilow ) )
419          {
420 /*
421 *  A block owning diagonals lcmt00 >= ilow && lcmt00 <= upp has been found.
422 */
423             if( mblkd == 1 ) mbloc = lmbloc;
424             TRM( TYPE, SIDE, UPLO, TRANS, DIAG, mbloc, inbloc, K, lcmt,
425                  ALPHA, Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald,
426                  X+(XYoffj+1)*Xinc, LDX, Y+(ioffxy+1)*Yinc, LDY );
427             lcmt00 = lcmt;  lcmt   -= pmb;
428             mblks  = mblkd; mblkd--;
429             Aoffi  = ioffd; XYoffi  = ioffxy;
430             ioffd += mbloc; ioffxy += mbloc;
431          }
432 /*
433 *  Operate with the lower triangular part of sub( A ).
434 */
435          tmp1 = m1 - ioffd + Aii - 1;
436          if( lower && ( tmp1 > 0 ) )
437          {
438             TRM( TYPE, SIDE, ALL, TRANS, DIAG, tmp1, inbloc, K, 0, ALPHA,
439                  Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald,
440                  X+(XYoffj+1)*Xinc, LDX, Y+(ioffxy+1)*Yinc, LDY );
441          }
442          tmp1    = Aoffi - Aii + 1;
443          m1     -= tmp1;
444          n1     -= inbloc;
445          lcmt00 += low - ilow + qnb;
446          nblks--;
447          Aoffj  += inbloc;
448          XYoffj += inbloc;
449 /*
450 *  Operate with the upper triangular part of sub( A ).
451 */
452          if( upper && ( tmp1 > 0 ) && ( n1 > 0 ) )
453          {
454             TRM( TYPE, SIDE, ALL, TRANS, DIAG, tmp1, n1, K, 0, ALPHA,
455                  Mptr( A, Aii, Aoffj+1, Ald, size ), Ald, X+(XYoffj+1)*Xinc,
456                  LDX, Y+XYii*Yinc, LDY );
457          }
458          Aii  = Aoffi  + 1; Ajj  = Aoffj  + 1;
459          XYii = XYoffi + 1; XYjj = XYoffj + 1;
460       }
461       else if( GoEast )
462       {
463 /*
464 *  Go one step east in the LCM table. Adjust the current LCM value as well as
465 *  the local column index in A and XR.
466 */
467          lcmt00 += low - ilow + qnb; nblks--;
468          Aoffj  += inbloc; XYoffj += inbloc;
469 /*
470 *  While there are blocks remaining that own lower entries, keep going east.
471 *  Adjust the current LCM value as well as the local column index in A and XR.
472 */
473          while( ( nblks > 0 ) && ( lcmt00 < low ) )
474          { lcmt00 += qnb; nblks--; Aoffj += Anb; XYoffj += Anb; }
475 /*
476 *  Operate with the lower triangular part of sub( A ).
477 */
478          tmp1 = MIN( Aoffj, jjmax ) - Ajj + 1;
479          if( lower && ( tmp1 > 0 ) )
480          {
481             TRM( TYPE, SIDE, ALL, TRANS, DIAG, m1, tmp1, K, 0, ALPHA,
482                  Mptr( A, Aii, Ajj, Ald, size ), Ald, X+XYjj*Xinc, LDX,
483                  Y+XYii*Yinc, LDY );
484             Ajj += tmp1; XYjj += tmp1; n1  -= tmp1;
485          }
486 /*
487 *  Return if no more column in the LCM table.
488 */
489          if( nblks <= 0 ) return;
490 /*
491 *  lcmt00 >= low. The current block owns either diagonals or upper entries.
492 *  Save the current position in the LCM table. After this row has been
493 *  completely taken care of, re-start from this column and the next row of
494 *  the LCM table.
495 */
496          lcmt = lcmt00; nblkd = nblks; joffd = Aoffj; joffxy = XYoffj;
497 
498          nbloc = Anb;
499          while( ( nblkd > 0 ) && ( lcmt <= iupp ) )
500          {
501 /*
502 *  A block owning diagonals lcmt00 >= low && lcmt00 <= iupp has been found.
503 */
504             if( nblkd == 1 ) nbloc = lnbloc;
505             TRM( TYPE, SIDE, UPLO, TRANS, DIAG, imbloc, nbloc, K, lcmt,
506                  ALPHA, Mptr( A, Aii, joffd+1, Ald, size ), Ald,
507                  X+(joffxy+1)*Xinc, LDX, Y+XYii*Yinc, LDY );
508             lcmt00 = lcmt;  lcmt   += qnb;
509             nblks  = nblkd; nblkd--;
510             Aoffj  = joffd; XYoffj  = joffxy;
511             joffd += nbloc; joffxy += nbloc;
512          }
513 /*
514 *  Operate with the upper triangular part of sub( A ).
515 */
516          tmp1 = n1 - joffd + Ajj - 1;
517          if( upper && ( tmp1 > 0 ) )
518          {
519             TRM( TYPE, SIDE, ALL, TRANS, DIAG, imbloc, tmp1, K, 0, ALPHA,
520                  Mptr( A, Aii, joffd+1, Ald, size ), Ald, X+(joffxy+1)*Xinc,
521                  LDX, Y+XYii*Yinc, LDY );
522          }
523          tmp1    = Aoffj - Ajj + 1;
524          m1     -= imbloc;
525          n1     -= tmp1;
526          lcmt00 -= ( iupp - upp + pmb );
527          mblks--;
528          Aoffi  += imbloc;
529          XYoffi += imbloc;
530 /*
531 *  Operate with the lower triangular part of sub( A ).
532 */
533          if( lower && ( m1 > 0 ) && ( tmp1 > 0 ) )
534          {
535             TRM( TYPE, SIDE, ALL, TRANS, DIAG, m1, tmp1, K, 0, ALPHA,
536                  Mptr( A, Aoffi+1, Ajj, Ald, size ), Ald, X+XYjj*Xinc, LDX,
537                  Y+(XYoffi+1)*Yinc, LDY );
538          }
539          Aii  = Aoffi  + 1; Ajj  = Aoffj  + 1;
540          XYii = XYoffi + 1; XYjj = XYoffj + 1;
541       }
542 /*
543 *  Loop over the remaining columns of the LCM table.
544 */
545       nbloc = Anb;
546       while( nblks > 0 )
547       {
548          if( nblks == 1 ) nbloc = lnbloc;
549 /*
550 *  While there are blocks remaining that own upper entries, keep going south.
551 *  Adjust the current LCM value as well as the local row index in A and XC.
552 */
553          while( ( mblks > 0 ) && ( lcmt00 > upp ) )
554          { lcmt00 -= pmb; mblks--; Aoffi += Amb; XYoffi += Amb; }
555 /*
556 *  Operate with the upper triangular part of sub( A ).
557 */
558          tmp1 = MIN( Aoffi, iimax ) - Aii + 1;
559          if( upper && ( tmp1 > 0 ) )
560          {
561             TRM( TYPE, SIDE, ALL, TRANS, DIAG, tmp1, n1, K, 0, ALPHA,
562                  Mptr( A, Aii, Aoffj+1, Ald, size ), Ald, X+(XYoffj+1)*Xinc,
563                  LDX, Y+XYii*Yinc, LDY );
564             Aii  += tmp1;
565             XYii += tmp1;
566             m1   -= tmp1;
567          }
568 /*
569 *  Return if no more row in the LCM table.
570 */
571          if( mblks <= 0 ) return;
572 /*
573 *  lcmt00 <= upp. The current block owns either diagonals or lower entries.
574 *  Save the current position in the LCM table. After this column has been
575 *  completely taken care of, re-start from this row and the next column of
576 *  the LCM table.
577 */
578          lcmt = lcmt00; mblkd = mblks; ioffd = Aoffi; ioffxy = XYoffi;
579 
580          mbloc = Amb;
581          while( ( mblkd > 0 ) && ( lcmt >= low ) )
582          {
583 /*
584 *  A block owning diagonals lcmt00 >= low && lcmt00 <= upp has been found.
585 */
586             if( mblkd == 1 ) mbloc = lmbloc;
587             TRM( TYPE, SIDE, UPLO, TRANS, DIAG, mbloc, nbloc, K, lcmt,
588                  ALPHA, Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald,
589                  X+(XYoffj+1)*Xinc, LDX, Y+(ioffxy+1)*Yinc, LDY );
590             lcmt00 = lcmt;  lcmt   -= pmb;
591             mblks  = mblkd; mblkd--;
592             Aoffi  = ioffd; XYoffi = ioffxy;
593             ioffd += mbloc; ioffxy += mbloc;
594          }
595 /*
596 *  Operate with the lower triangular part of sub( A ).
597 */
598          tmp1 = m1 - ioffd + Aii - 1;
599          if( lower && ( tmp1 > 0 ) )
600          {
601             TRM( TYPE, SIDE, ALL, TRANS, DIAG, tmp1, nbloc, K, 0, ALPHA,
602                  Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald,
603                  X+(XYoffj+1)*Xinc, LDX, Y+(ioffxy+1)*Yinc, LDY );
604          }
605 
606          tmp1    = MIN( Aoffi, iimax ) - Aii + 1;
607          m1     -= tmp1;
608          n1     -= nbloc;
609          lcmt00 += qnb;
610          nblks--;
611          Aoffj  += nbloc;
612          XYoffj += nbloc;
613 /*
614 *  Operate with the upper triangular part of sub( A ).
615 */
616          if( upper && ( tmp1 > 0 ) && ( n1 > 0 ) )
617          {
618             TRM( TYPE, SIDE, ALL, TRANS, DIAG, tmp1, n1, K, 0, ALPHA,
619                  Mptr( A, Aii, Aoffj+1, Ald, size ), Ald, X+(XYoffj+1)*Xinc,
620                  LDX, Y+XYii*Yinc, LDY );
621          }
622          Aii  = Aoffi  + 1;  Ajj = Aoffj  + 1;
623          XYii = XYoffi + 1; XYjj = XYoffj + 1;
624       }
625    }
626    else
627    {
628 /*
629 *  Go through the table looking for blocks owning diagonal entries.
630 */
631       if( ( !( GoSouth ) ) && ( !( GoEast ) ) )
632       {
633 /*
634 *  The upper left block owns diagonal entries lcmt00 >= ilow && lcmt00 <= iupp
635 */
636          TRM( TYPE, SIDE, UPLO, TRANS, DIAG, imbloc, inbloc, K, lcmt00, ALPHA,
637               Mptr( A, Aii, Ajj, Ald, size ), Ald, X+XYii*Xinc, LDX,
638               Y+XYjj*Yinc, LDY );
639 /*
640 *  Decide whether one should go south or east in the table: Go east if
641 *  the block below the current one only owns lower entries. If this block,
642 *  however, owns diagonals, then go south.
643 */
644          GoSouth = !( GoEast = ( ( lcmt00 - ( iupp - upp + pmb ) ) < ilow ) );
645 
646          if( GoSouth )
647          {
648 /*
649 *  When the upper triangular part of sub( A ) should be operated with and
650 *  one is planning to go south in the table, it is neccessary to take care
651 *  of the remaining columns of these imbloc rows immediately.
652 */
653             if( upper && ( Anq > inbloc ) )
654             {
655                tmp1 = Anq - inbloc;
656                TRM( TYPE, SIDE, ALL, TRANS, DIAG, imbloc, tmp1, K, 0, ALPHA,
657                     Mptr( A, Aii, Ajj+inbloc, Ald, size ), Ald, X+XYii*Xinc,
658                     LDX, Y+(XYjj+inbloc)*Yinc, LDY );
659             }
660             Aii += imbloc; XYii += imbloc; m1 -= imbloc;
661          }
662          else
663          {
664 /*
665 *  When the lower triangular part of sub( A ) should be operated with and
666 *  one is planning to go east in the table, it is neccessary to take care
667 *  of the remaining rows of these inbloc columns immediately.
668 */
669             if( lower && ( Amp > imbloc ) )
670             {
671                tmp1 = Amp - imbloc;
672                TRM( TYPE, SIDE, ALL, TRANS, DIAG, tmp1, inbloc, K, 0, ALPHA,
673                     Mptr( A, Aii+imbloc, Ajj, Ald, size ), Ald,
674                     X+(XYii+imbloc)*Xinc, LDX, Y+XYjj*Yinc, LDY );
675             }
676             Ajj += inbloc; XYjj += inbloc; n1 -= inbloc;
677          }
678       }
679 
680       if( GoSouth )
681       {
682 /*
683 *  Go one step south in the LCM table. Adjust the current LCM value as well as
684 *  the local row indexes in A and XC.
685 */
686          lcmt00 -= ( iupp - upp + pmb ); mblks--;
687          Aoffi  += imbloc; XYoffi  += imbloc;
688 /*
689 *  While there are blocks remaining that own upper entries, keep going south.
690 *  Adjust the current LCM value as well as the local row indexes in A and XC.
691 */
692          while( ( mblks > 0 ) && ( lcmt00 > upp ) )
693          { lcmt00 -= pmb; mblks--; Aoffi += Amb; XYoffi += Amb; }
694 /*
695 *  Operate with the upper triangular part of sub( A ) we just skipped when
696 *  necessary.
697 */
698          tmp1 = MIN( Aoffi, iimax ) - Aii + 1;
699          if( upper && ( tmp1 > 0 ) )
700          {
701             TRM( TYPE, SIDE, ALL, TRANS, DIAG, tmp1, n1, K, 0, ALPHA,
702                  Mptr( A, Aii, Aoffj+1, Ald, size ), Ald, X+XYii*Xinc, LDX,
703                  Y+(XYoffj+1)*Yinc, LDY );
704             Aii += tmp1; XYii += tmp1; m1  -= tmp1;
705          }
706 /*
707 *  Return if no more row in the LCM table.
708 */
709          if( mblks <= 0 ) return;
710 /*
711 *  lcmt00 <= upp. The current block owns either diagonals or lower entries.
712 *  Save the current position in the LCM table. After this column has been
713 *  completely taken care of, re-start from this row and the next column of
714 *  the LCM table.
715 */
716          lcmt = lcmt00; mblkd = mblks; ioffd = Aoffi; ioffxy = XYoffi;
717 
718          mbloc = Amb;
719          while( ( mblkd > 0 ) && ( lcmt >= ilow ) )
720          {
721 /*
722 *  A block owning diagonals lcmt00 >= ilow && lcmt00 <= upp has been found.
723 */
724             if( mblkd == 1 ) mbloc = lmbloc;
725             TRM( TYPE, SIDE, UPLO, TRANS, DIAG, mbloc, inbloc, K, lcmt,
726                  ALPHA, Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald,
727                  X+(ioffxy+1)*Xinc, LDX, Y+(XYoffj+1)*Yinc, LDY );
728             lcmt00 = lcmt;  lcmt   -= pmb;
729             mblks  = mblkd; mblkd--;
730             Aoffi  = ioffd; XYoffi  = ioffxy;
731             ioffd += mbloc; ioffxy += mbloc;
732          }
733 /*
734 *  Operate with the lower triangular part of sub( A ).
735 */
736          tmp1 = m1 - ioffd + Aii - 1;
737          if( lower && ( tmp1 > 0 ) )
738          {
739             TRM( TYPE, SIDE, ALL, TRANS, DIAG, tmp1, inbloc, K, 0, ALPHA,
740                  Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald,
741                  X+(ioffxy+1)*Xinc, LDX, Y+(XYoffj+1)*Yinc, LDY );
742          }
743          tmp1    = Aoffi - Aii + 1;
744          m1     -= tmp1;
745          n1     -= inbloc;
746          lcmt00 += low - ilow + qnb;
747          nblks--;
748          Aoffj  += inbloc;
749          XYoffj += inbloc;
750 /*
751 *  Operate with the upper triangular part of sub( A ).
752 */
753          if( upper && ( tmp1 > 0 ) && ( n1 > 0 ) )
754          {
755             TRM( TYPE, SIDE, ALL, TRANS, DIAG, tmp1, n1, K, 0, ALPHA,
756                  Mptr( A, Aii, Aoffj+1, Ald, size ), Ald, X+XYii*Xinc, LDX,
757                  Y+(XYoffj+1)*Yinc, LDY );
758          }
759          Aii  = Aoffi  + 1; Ajj  = Aoffj  + 1;
760          XYii = XYoffi + 1; XYjj = XYoffj + 1;
761       }
762       else if( GoEast )
763       {
764 /*
765 *  Go one step east in the LCM table. Adjust the current LCM value as well as
766 *  the local column index in A and XR.
767 */
768          lcmt00 += low - ilow + qnb; nblks--;
769          Aoffj  += inbloc; XYoffj += inbloc;
770 /*
771 *  While there are blocks remaining that own lower entries, keep going east.
772 *  Adjust the current LCM value as well as the local column index in A and XR.
773 */
774          while( ( nblks > 0 ) && ( lcmt00 < low ) )
775          { lcmt00 += qnb; nblks--; Aoffj += Anb; XYoffj += Anb; }
776 /*
777 *  Operate with the lower triangular part of sub( A ).
778 */
779          tmp1 = MIN( Aoffj, jjmax ) - Ajj + 1;
780          if( lower && ( tmp1 > 0 ) )
781          {
782             TRM( TYPE, SIDE, ALL, TRANS, DIAG, m1, tmp1, K, 0, ALPHA,
783                  Mptr( A, Aii, Ajj, Ald, size ), Ald, X+XYii*Xinc, LDX,
784                  Y+XYjj*Yinc, LDY );
785             Ajj += tmp1; XYjj += tmp1; n1  -= tmp1;
786          }
787 /*
788 *  Return if no more column in the LCM table.
789 */
790          if( nblks <= 0 ) return;
791 /*
792 *  lcmt00 >= low. The current block owns either diagonals or upper entries.
793 *  Save the current position in the LCM table. After this row has been
794 *  completely taken care of, re-start from this column and the next row of
795 *  the LCM table.
796 */
797          lcmt = lcmt00; nblkd = nblks; joffd = Aoffj; joffxy = XYoffj;
798 
799          nbloc = Anb;
800          while( ( nblkd > 0 ) && ( lcmt <= iupp ) )
801          {
802 /*
803 *  A block owning diagonals lcmt00 >= low && lcmt00 <= iupp has been found.
804 */
805             if( nblkd == 1 ) nbloc = lnbloc;
806             TRM( TYPE, SIDE, UPLO, TRANS, DIAG, imbloc, nbloc, K, lcmt,
807                  ALPHA, Mptr( A, Aii, joffd+1, Ald, size ), Ald, X+XYii*Xinc,
808                  LDX, Y+(joffxy+1)*Yinc, LDY );
809             lcmt00 = lcmt;  lcmt   += qnb;
810             nblks  = nblkd; nblkd--;
811             Aoffj  = joffd; XYoffj  = joffxy;
812             joffd += nbloc; joffxy += nbloc;
813          }
814 /*
815 *  Operate with the upper triangular part of sub( A ).
816 */
817          tmp1 = n1 - joffd + Ajj - 1;
818          if( upper && ( tmp1 > 0 ) )
819          {
820             TRM( TYPE, SIDE, ALL, TRANS, DIAG, imbloc, tmp1, K, 0, ALPHA,
821                  Mptr( A, Aii, joffd+1, Ald, size ), Ald, X+XYii*Xinc, LDX,
822                  Y+(joffxy+1)*Yinc, LDY );
823          }
824          tmp1    = Aoffj - Ajj + 1;
825          m1     -= imbloc;
826          n1     -= tmp1;
827          lcmt00 -= ( iupp - upp + pmb );
828          mblks--;
829          Aoffi  += imbloc;
830          XYoffi += imbloc;
831 /*
832 *  Operate with the lower triangular part of sub( A ).
833 */
834          if( lower && ( m1 > 0 ) && ( tmp1 > 0 ) )
835          {
836             TRM( TYPE, SIDE, ALL, TRANS, DIAG, m1, tmp1, K, 0, ALPHA,
837                  Mptr( A, Aoffi+1, Ajj, Ald, size ), Ald, X+(XYoffi+1)*Xinc,
838                  LDX, Y+XYjj*Yinc, LDY );
839          }
840          Aii  = Aoffi  + 1; Ajj  = Aoffj  + 1;
841          XYii = XYoffi + 1; XYjj = XYoffj + 1;
842       }
843 /*
844 *  Loop over the remaining columns of the LCM table.
845 */
846       nbloc = Anb;
847       while( nblks > 0 )
848       {
849          if( nblks == 1 ) nbloc = lnbloc;
850 /*
851 *  While there are blocks remaining that own upper entries, keep going south.
852 *  Adjust the current LCM value as well as the local row index in A and XC.
853 */
854          while( ( mblks > 0 ) && ( lcmt00 > upp ) )
855          { lcmt00 -= pmb; mblks--; Aoffi += Amb; XYoffi += Amb; }
856 /*
857 *  Operate with the upper triangular part of sub( A ).
858 */
859          tmp1 = MIN( Aoffi, iimax ) - Aii + 1;
860          if( upper && ( tmp1 > 0 ) )
861          {
862             TRM( TYPE, SIDE, ALL, TRANS, DIAG, tmp1, n1, K, 0, ALPHA,
863                  Mptr( A, Aii, Aoffj+1, Ald, size ), Ald, X+XYii*Xinc, LDX,
864                  Y+(XYoffj+1)*Yinc, LDY );
865             Aii  += tmp1;
866             XYii += tmp1;
867             m1   -= tmp1;
868          }
869 /*
870 *  Return if no more row in the LCM table.
871 */
872          if( mblks <= 0 ) return;
873 /*
874 *  lcmt00 <= upp. The current block owns either diagonals or lower entries.
875 *  Save the current position in the LCM table. After this column has been
876 *  completely taken care of, re-start from this row and the next column of
877 *  the LCM table.
878 */
879          lcmt = lcmt00; mblkd = mblks; ioffd = Aoffi; ioffxy = XYoffi;
880 
881          mbloc = Amb;
882          while( ( mblkd > 0 ) && ( lcmt >= low ) )
883          {
884 /*
885 *  A block owning diagonals lcmt00 >= low && lcmt00 <= upp has been found.
886 */
887             if( mblkd == 1 ) mbloc = lmbloc;
888             TRM( TYPE, SIDE, UPLO, TRANS, DIAG, mbloc, nbloc, K, lcmt,
889                  ALPHA, Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald,
890                  X+(ioffxy+1)*Xinc, LDX, Y+(XYoffj+1)*Yinc, LDY );
891             lcmt00 = lcmt;  lcmt   -= pmb;
892             mblks  = mblkd; mblkd--;
893             Aoffi  = ioffd; XYoffi = ioffxy;
894             ioffd += mbloc; ioffxy += mbloc;
895          }
896 /*
897 *  Operate with the lower triangular part of sub( A ).
898 */
899          tmp1 = m1 - ioffd + Aii - 1;
900          if( lower && ( tmp1 > 0 ) )
901          {
902             TRM( TYPE, SIDE, ALL, TRANS, DIAG, tmp1, nbloc, K, 0, ALPHA,
903                  Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald,
904                  X+(ioffxy+1)*Xinc, LDX, Y+(XYoffj+1)*Yinc, LDY );
905          }
906 
907          tmp1    = MIN( Aoffi, iimax ) - Aii + 1;
908          m1     -= tmp1;
909          n1     -= nbloc;
910          lcmt00 += qnb;
911          nblks--;
912          Aoffj  += nbloc;
913          XYoffj += nbloc;
914 /*
915 *  Operate with the upper triangular part of sub( A ).
916 */
917          if( upper && ( tmp1 > 0 ) && ( n1 > 0 ) )
918          {
919             TRM( TYPE, SIDE, ALL, TRANS, DIAG, tmp1, n1, K, 0, ALPHA,
920                  Mptr( A, Aii, Aoffj+1, Ald, size ), Ald, X+XYii*Xinc, LDX,
921                  Y+(XYoffj+1)*Yinc, LDY );
922          }
923          Aii  = Aoffi  + 1;  Ajj = Aoffj  + 1;
924          XYii = XYoffi + 1; XYjj = XYoffj + 1;
925       }
926    }
927 /*
928 *  End of PB_Cptrm
929 */
930 }
931