1 /* ---------------------------------------------------------------------
2 *
3 *  -- PBLAS auxiliary routine (version 2.0) --
4 *     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 *     and University of California, Berkeley.
6 *     April 1, 1998
7 *
8 *  ---------------------------------------------------------------------
9 */
10 /*
11 *  Include files
12 */
13 #include "../pblas.h"
14 #include "../PBpblas.h"
15 #include "../PBtools.h"
16 #include "../PBblacs.h"
17 #include "../PBblas.h"
18 
19 #ifdef __STDC__
PB_Cpsym(PBTYP_T * TYPE,PBTYP_T * UTYP,char * SIDE,char * UPLO,int N,int K,char * ALPHA,char * A,int IA,int JA,int * DESCA,char * XC,int LDXC,char * XR,int LDXR,char * YC,int LDYC,char * YR,int LDYR,TZSYM_T SYM)20 void PB_Cpsym( PBTYP_T * TYPE, PBTYP_T * UTYP, char * SIDE,
21                char * UPLO, int N, int K, char * ALPHA, char * A,
22                int IA, int JA, int * DESCA, char * XC, int LDXC,
23                char * XR, int LDXR, char * YC, int LDYC, char * YR,
24                int LDYR, TZSYM_T SYM )
25 #else
26 void PB_Cpsym( TYPE, UTYP, SIDE, UPLO, N, K, ALPHA, A, IA, JA, DESCA,
27                XC, LDXC, XR, LDXR, YC, LDYC, YR, LDYR, SYM )
28 /*
29 *  .. Scalar Arguments ..
30 */
31    char           * SIDE, * UPLO;
32    int            IA, JA, K, LDXC, LDXR, LDYC, LDYR, N;
33    char           * ALPHA;
34    PBTYP_T        * TYPE, * UTYP;
35    TZSYM_T        SYM;
36 /*
37 *  .. Array Arguments ..
38 */
39    int            * DESCA;
40    char           * A, * XC, * XR, * YC, * YR;
41 #endif
42 {
43 /*
44 *  Purpose
45 *  =======
46 *
47 *  PB_Cpsym  performs  a symmetric or Hermitian matrix-matrix or matrix-
48 *  vector multiplication.  In the following, sub( A ) denotes the symme-
49 *  tric or Hermitian submatrix operand A( IA:IA+N-1, JA:JA+N-1 ).
50 *
51 *  Notes
52 *  =====
53 *
54 *  A description  vector  is associated with each 2D block-cyclicly dis-
55 *  tributed matrix.  This  vector  stores  the  information  required to
56 *  establish the  mapping  between a  matrix entry and its corresponding
57 *  process and memory location.
58 *
59 *  In  the  following  comments,   the character _  should  be  read  as
60 *  "of  the  distributed  matrix".  Let  A  be a generic term for any 2D
61 *  block cyclicly distributed matrix.  Its description vector is DESC_A:
62 *
63 *  NOTATION         STORED IN       EXPLANATION
64 *  ---------------- --------------- ------------------------------------
65 *  DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
66 *  CTXT_A  (global) DESCA[ CTXT_  ] The BLACS context handle, indicating
67 *                                   the NPROW x NPCOL BLACS process grid
68 *                                   A  is  distributed over. The context
69 *                                   itself  is  global,  but  the handle
70 *                                   (the integer value) may vary.
71 *  M_A     (global) DESCA[ M_     ] The  number of rows in the distribu-
72 *                                   ted matrix A, M_A >= 0.
73 *  N_A     (global) DESCA[ N_     ] The number of columns in the distri-
74 *                                   buted matrix A, N_A >= 0.
75 *  IMB_A   (global) DESCA[ IMB_   ] The number of rows of the upper left
76 *                                   block of the matrix A, IMB_A > 0.
77 *  INB_A   (global) DESCA[ INB_   ] The  number  of columns of the upper
78 *                                   left   block   of   the  matrix   A,
79 *                                   INB_A > 0.
80 *  MB_A    (global) DESCA[ MB_    ] The blocking factor used to  distri-
81 *                                   bute the last  M_A-IMB_A  rows of A,
82 *                                   MB_A > 0.
83 *  NB_A    (global) DESCA[ NB_    ] The blocking factor used to  distri-
84 *                                   bute the last  N_A-INB_A  columns of
85 *                                   A, NB_A > 0.
86 *  RSRC_A  (global) DESCA[ RSRC_  ] The process row over which the first
87 *                                   row of the matrix  A is distributed,
88 *                                   NPROW > RSRC_A >= 0.
89 *  CSRC_A  (global) DESCA[ CSRC_  ] The  process column  over  which the
90 *                                   first column of  A  is  distributed.
91 *                                   NPCOL > CSRC_A >= 0.
92 *  LLD_A   (local)  DESCA[ LLD_   ] The  leading dimension  of the local
93 *                                   array  storing  the  local blocks of
94 *                                   the distributed matrix A,
95 *                                   IF( Lc( 1, N_A ) > 0 )
96 *                                      LLD_A >= MAX( 1, Lr( 1, M_A ) )
97 *                                   ELSE
98 *                                      LLD_A >= 1.
99 *
100 *  Let K be the number of  rows of a matrix A starting at the global in-
101 *  dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
102 *  that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
103 *  receive if these K rows were distributed over NPROW processes.  If  K
104 *  is the number of columns of a matrix  A  starting at the global index
105 *  JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number  of co-
106 *  lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would  receive if
107 *  these K columns were distributed over NPCOL processes.
108 *
109 *  The values of Lr() and Lc() may be determined via a call to the func-
110 *  tion PB_Cnumroc:
111 *  Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
112 *  Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
113 *
114 *  Arguments
115 *  =========
116 *
117 *  TYPE    (local input) pointer to a PBTYP_T structure
118 *          On entry,  TYPE  is a pointer to a structure of type PBTYP_T,
119 *          that contains type information (See pblas.h).
120 *
121 *  UTYP    (local input) pointer to a PBTYP_T structure
122 *          On entry,  UTYP  is a pointer to a structure of type PBTYP_T,
123 *          that contains type information for the Y's (See pblas.h).
124 *
125 *  SIDE    (global input) pointer to CHAR
126 *          On entry,  SIDE  specifies whether  op( sub( A ) ) multiplies
127 *          its operand X from the left or right as follows:
128 *
129 *          SIDE = 'L' or 'l'  Y := alpha*op( sub( A ) )*X + Y,
130 *
131 *          SIDE = 'R' or 'r'  Y := alpha*X*op( sub( A ) ) + Y.
132 *
133 *  UPLO    (global input) pointer to CHAR
134 *          On  entry,   UPLO  specifies  whether  the  local  pieces  of
135 *          the array  A  containing the  upper or lower triangular  part
136 *          of the symmetric or Hermitian submatrix  sub( A )  are to  be
137 *          referenced as follows:
138 *
139 *             UPLO = 'U' or 'u'   Only the local pieces corresponding to
140 *                                 the upper triangular part of  the sym-
141 *                                 metric or Hermitian submatrix sub( A )
142 *                                 are to be referenced,
143 *
144 *             UPLO = 'L' or 'l'   Only the local pieces corresponding to
145 *                                 the  lower triangular part of the sym-
146 *                                 metric or Hermitian submatrix sub( A )
147 *                                 are to be referenced.
148 *
149 *  N       (global input) INTEGER
150 *          On entry,  N specifies the order of the  submatrix  sub( A ).
151 *          N must be at least zero.
152 *
153 *  K       (global input) INTEGER
154 *          On entry, K  specifies the local number of columns of the lo-
155 *          cal array XC  and the local number of rows of the local array
156 *          XR. K mut be at least zero.
157 *
158 *  ALPHA   (global input) pointer to CHAR
159 *          On entry, ALPHA specifies the scalar alpha.
160 *
161 *  A       (local input) pointer to CHAR
162 *          On entry, A is an array of dimension (LLD_A, Ka), where Ka is
163 *          at least Lc( 1, JA+N-1 ).  Before  entry, this array contains
164 *          the local entries of the matrix A.
165 *          Before  entry  with  UPLO = 'U' or 'u', this  array  contains
166 *          the local entries corresponding to the upper triangular  part
167 *          of the  @(syhec)  submatrix  sub( A ), and the local entries
168 *          corresponding to the  strictly lower triangular  of  sub( A )
169 *          are not  referenced.
170 *          Before  entry  with  UPLO = 'L' or 'l', this  array  contains
171 *          the local entries corresponding to the lower triangular  part
172 *          of the  @(syhec)  submatrix  sub( A ), and the local entries
173 *          corresponding to the  strictly upper triangular  of  sub( A )
174 *          are not  referenced.
175 *
176 *  IA      (global input) INTEGER
177 *          On entry, IA  specifies A's global row index, which points to
178 *          the beginning of the submatrix sub( A ).
179 *
180 *  JA      (global input) INTEGER
181 *          On entry, JA  specifies A's global column index, which points
182 *          to the beginning of the submatrix sub( A ).
183 *
184 *  DESCA   (global and local input) INTEGER array
185 *          On entry, DESCA  is an integer array of dimension DLEN_. This
186 *          is the array descriptor for the matrix A.
187 *
188 *  XC      (local input) pointer to CHAR
189 *          On entry, XC is an array of dimension (LDXC,K). Before entry,
190 *          this array contains the local entries of the matrix XC.
191 *
192 *  LDXC    (local input) INTEGER
193 *          On entry, LDXC  specifies  the leading dimension of the array
194 *          XC. LDXC must be at least max( 1, Lp( IA, N ) ).
195 *
196 *  XR      (local input) pointer to CHAR
197 *          On entry, XR is an array of dimension (LDXR,Kx),  where Kx is
198 *          at least Lc( JA, N ). Before  entry, this array contains  the
199 *          local entries of the matrix XR.
200 *
201 *  LDXR    (local input) INTEGER
202 *          On entry, LDXR  specifies  the leading dimension of the array
203 *          XR. LDXR must be at least max( 1, K ).
204 *
205 *  YC      (local input/local output) pointer to CHAR
206 *          On entry, YC is an array of dimension (LDYC,K). Before entry,
207 *          this  array  contains  the local entries of the matrix YC. On
208 *          exit, this array contains the updated vector YC.
209 *
210 *  LDYC    (local input) INTEGER
211 *          On entry, LDYC  specifies  the leading dimension of the array
212 *          YC. LDYC must be at least max( 1, Lp( IA, N ) ).
213 *
214 *  YR      (local input/local output) pointer to CHAR
215 *          On entry, YR is an array of dimension (LDYR,Ky),  where Ky is
216 *          at least Lc( JA, N ).  Before  entry, this array contains the
217 *          local entries of the matrix YR. On  exit, this array contains
218 *          the updated vector YR.
219 *
220 *  LDYR    (local input) INTEGER
221 *          On entry, LDYR  specifies  the leading dimension of the array
222 *          YR. LDYR must be at least max( 1, K ).
223 *
224 *  SYM     (local input) pointer to function of type TZSYM_T
225 *          On entry, SYM specifies the function performing the symmetric
226 *          or  Hermitian  matrix-vector  or  matrix-matrix multiply of a
227 *          single block.
228 *
229 *  -- Written on April 1, 1998 by
230 *     Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
231 *
232 *  ---------------------------------------------------------------------
233 */
234 /*
235 *  .. Local Scalars ..
236 */
237    int            Acol, Arow, Aii, Aimb1, Ainb1, Ajj, Ald, Amp, Amb, Anb, Anq,
238                   Aoffi, Aoffj, Arcol, Arrow, GoEast, GoSouth, IsColRepl,
239                   IsRowRepl, XCinc, XRinc, Xii=0, Xjj=0, Xoffi=-1, Xoffj=-1,
240                   YCinc, YRinc, iimax, ilow, imbloc, inbloc, ioffd, ioffx, iupp,
241                   jjmax, joffd, joffx, lcmt, lcmt00, lmbloc, lnbloc, low, lower,
242                   m1, mbloc, mblkd, mblks, mycol, myrow, n1, nbloc, nblkd,
243                   nblks, npcol, nprow, pmb, qnb, size, tmp1, upp, upper;
244 /* ..
245 *  .. Executable Statements ..
246 *
247 */
248 /*
249 *  Quick return if possible
250 */
251    if( N <= 0 ) return;
252 /*
253 *  Retrieve process grid information
254 */
255    Cblacs_gridinfo( DESCA[CTXT_], &nprow, &npcol, &myrow, &mycol );
256 /*
257 *  Retrieve sub( A )'s local information: Aii, Ajj, Arow, Acol ...
258 */
259    PB_Cainfog2l( N, N, IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aimb1,
260                  &Ainb1, &Amp, &Anq, &Aii, &Ajj, &Arow, &Acol, &Arrow, &Arcol );
261 /*
262 *  Quick return if I don't own any of sub( A ) or if sub( A ) is replicated in
263 *  all processes.
264 */
265    if( ( Amp <= 0 ) || ( Anq <= 0 ) ) return;
266 
267    IsRowRepl = ( ( Arow < 0 ) || ( nprow == 1 ) );
268    IsColRepl = ( ( Acol < 0 ) || ( npcol == 1 ) );
269    Amb  = DESCA[ MB_ ]; Anb = DESCA[ NB_ ]; Ald = DESCA[ LLD_ ];
270    size = TYPE->size;
271 
272    if( IsRowRepl && IsColRepl )
273    {
274       SYM( TYPE, SIDE, UPLO, Amp, Anq, K, 0, ALPHA,  Mptr( A, Aii, Ajj, Ald,
275            size ), Ald, XC, LDXC, XR, LDXR, YC, LDYC, YR, LDYR );
276       return;
277    }
278 
279    XCinc = size;         XRinc = LDXR * size;
280    YCinc = UTYP->size;   YRinc = LDYR * UTYP->size;
281    upper = ( Mupcase( UPLO[0] ) == CUPPER );
282    lower = ( Mupcase( UPLO[0] ) == CLOWER );
283 /*
284 *  Initialize lcmt00, mblks, nblks, imbloc, inbloc, lmbloc, lnbloc, ilow, low,
285 *  iupp, and upp.
286 */
287    PB_Cbinfo( 0, Amp, Anq, Aimb1, Ainb1, Amb, Anb, Arrow, Arcol, &lcmt00,
288               &mblks, &nblks, &imbloc, &inbloc, &lmbloc, &lnbloc, &ilow, &low,
289               &iupp, &upp );
290 
291    iimax = ( Aoffi = Aii - 1 ) + ( m1 = Amp );
292    jjmax = ( Aoffj = Ajj - 1 ) + ( n1 = Anq );
293    pmb   = ( IsRowRepl ? Amb : nprow * Amb );
294    qnb   = ( IsColRepl ? Anb : npcol * Anb );
295 /*
296 *  Handle separately the first row and/or column of the LCM table. Update the
297 *  LCM value of the curent block lcmt00, as well as the number of rows and
298 *  columns mblks and nblks remaining in the LCM table.
299 */
300    GoSouth = ( lcmt00 > iupp );
301    GoEast  = ( lcmt00 < ilow );
302 /*
303 *  Go through the table looking for blocks owning diagonal entries.
304 */
305    if( ( !( GoSouth ) ) && ( !( GoEast ) ) )
306    {
307 /*
308 *  The upper left block owns diagonal entries lcmt00 >= ilow && lcmt00 <= iupp
309 */
310       SYM( TYPE, SIDE, UPLO, imbloc, inbloc, K, lcmt00, ALPHA, Mptr( A, Aii,
311            Ajj, Ald, size ), Ald, XC+Xii*XCinc, LDXC, XR+Xjj*XRinc, LDXR,
312            YC+Xii*YCinc, LDYC, YR+Xjj*YRinc, LDYR );
313 /*
314 *  Decide whether one should go south or east in the table: Go east if the
315 *  block below the current one only owns lower entries. If this block, however,
316 *  owns diagonals, then go south.
317 */
318       GoSouth = !( GoEast = ( ( lcmt00 - ( iupp - upp + pmb ) ) < ilow ) );
319 
320       if( GoSouth )
321       {
322 /*
323 *  When the upper triangular part of sub( A ) should be operated with and
324 *  one is planning to go south in the table, it is neccessary to take care
325 *  of the remaining columns of these imbloc rows immediately.
326 */
327          if( upper && ( Anq > inbloc ) )
328          {
329             tmp1 = Anq - inbloc;
330             SYM( TYPE, SIDE, ALL, imbloc, tmp1, K, 0,
331                  ALPHA, Mptr( A, Aii, Ajj+inbloc, Ald, size ), Ald,
332                  XC+Xii*XCinc, LDXC, XR+(Xjj+inbloc)*XRinc, LDXR,
333                  YC+Xii*YCinc, LDYC, YR+(Xjj+inbloc)*YRinc, LDYR );
334          }
335          Aii += imbloc; Xii += imbloc; m1  -= imbloc;
336       }
337       else
338       {
339 /*
340 *  When the lower triangular part of sub( A ) should be operated with and
341 *  one is planning to go east in the table, it is neccessary to take care
342 *  of the remaining rows of these inbloc columns immediately.
343 */
344          if( lower && ( Amp > imbloc ) )
345          {
346             tmp1 = Amp - imbloc;
347             SYM( TYPE, SIDE, ALL, tmp1, inbloc, K, 0,
348                  ALPHA, Mptr( A, Aii+imbloc, Ajj, Ald, size ), Ald,
349                  XC+(Xii+imbloc)*XCinc, LDXC, XR+Xjj*XRinc, LDXR,
350                  YC+(Xii+imbloc)*YCinc, LDYC, YR+Xjj*YRinc, LDYR );
351          }
352          Ajj += inbloc; Xjj += inbloc; n1  -= inbloc;
353       }
354    }
355 
356    if( GoSouth )
357    {
358 /*
359 *  Go one step south in the LCM table. Adjust the current LCM value as well as
360 *  the local row indexes in A and XC.
361 */
362       lcmt00 -= ( iupp - upp + pmb ); mblks--;
363       Aoffi  += imbloc; Xoffi  += imbloc;
364 /*
365 *  While there are blocks remaining that own upper entries, keep going south.
366 *  Adjust the current LCM value as well as the local row indexes in A and XC.
367 */
368       while( ( mblks > 0 ) && ( lcmt00 > upp ) )
369       { lcmt00 -= pmb; mblks--; Aoffi += Amb; Xoffi += Amb; }
370 /*
371 *  Operate with the upper triangular part of sub( A ) we just skipped when
372 *  necessary.
373 */
374       tmp1 = MIN( Aoffi, iimax ) - Aii + 1;
375       if( upper && ( tmp1 > 0 ) )
376       {
377          SYM( TYPE, SIDE, ALL, tmp1, n1, K, 0,
378               ALPHA, Mptr( A, Aii, Aoffj+1, Ald, size ), Ald,
379               XC+Xii*XCinc, LDXC, XR+(Xoffj+1)*XRinc, LDXR,
380               YC+Xii*YCinc, LDYC, YR+(Xoffj+1)*YRinc, LDYR );
381          Aii += tmp1; Xii += tmp1; m1  -= tmp1;
382       }
383 /*
384 *  Return if no more row in the LCM table.
385 */
386       if( mblks <= 0 ) return;
387 /*
388 *  lcmt00 <= upp. The current block owns either diagonals or lower entries.
389 *  Save the current position in the LCM table. After this column has been
390 *  completely taken care of, re-start from this row and the next column of
391 *  the LCM table.
392 */
393       lcmt = lcmt00; mblkd = mblks; ioffd = Aoffi; ioffx = Xoffi;
394 
395       mbloc = Amb;
396       while( ( mblkd > 0 ) && ( lcmt >= ilow ) )
397       {
398 /*
399 *  A block owning diagonals lcmt00 >= ilow && lcmt00 <= upp has been found.
400 */
401          if( mblkd == 1 ) mbloc = lmbloc;
402          SYM( TYPE, SIDE, UPLO, mbloc, inbloc, K, lcmt,
403               ALPHA, Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald,
404               XC+(ioffx+1)*XCinc, LDXC, XR+(Xoffj+1)*XRinc, LDXR,
405               YC+(ioffx+1)*YCinc, LDYC, YR+(Xoffj+1)*YRinc, LDYR );
406          lcmt00 = lcmt;  lcmt  -= pmb;
407          mblks  = mblkd; mblkd--;
408          Aoffi  = ioffd; ioffd += mbloc;
409          Xoffi  = ioffx; ioffx += mbloc;
410       }
411 /*
412 *  Operate with the lower triangular part of sub( A ).
413 */
414       tmp1 = m1 - ioffd + Aii - 1;
415       if( lower && ( tmp1 > 0 ) )
416          SYM( TYPE, SIDE, ALL, tmp1, inbloc, K, 0,
417               ALPHA, Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald,
418               XC+(ioffx+1)*XCinc, LDXC, XR+(Xoffj+1)*XRinc, LDXR,
419               YC+(ioffx+1)*YCinc, LDYC, YR+(Xoffj+1)*YRinc, LDYR );
420 
421       tmp1    = Aoffi - Aii + 1;
422       m1     -= tmp1;
423       n1     -= inbloc;
424       lcmt00 += low - ilow + qnb;
425       nblks--;
426       Aoffj  += inbloc;
427       Xoffj  += inbloc;
428 /*
429 *  Operate with the upper triangular part of sub( A ).
430 */
431       if( upper && ( tmp1 > 0 ) && ( n1 > 0 ) )
432          SYM( TYPE, SIDE, ALL, tmp1, n1, K, 0,
433               ALPHA, Mptr( A, Aii, Aoffj+1, Ald, size ), Ald,
434               XC+Xii*XCinc, LDXC, XR+(Xoffj+1)*XRinc, LDXR,
435               YC+Xii*YCinc, LDYC, YR+(Xoffj+1)*YRinc, LDYR );
436       Aii = Aoffi + 1; Ajj = Aoffj + 1;
437       Xii = Xoffi + 1; Xjj = Xoffj + 1;
438    }
439    else if( GoEast )
440    {
441 /*
442 *  Go one step east in the LCM table. Adjust the current LCM value as well as
443 *  the local column index in A and XR.
444 */
445       lcmt00 += low - ilow + qnb; nblks--;
446       Aoffj  += inbloc; Xoffj  += inbloc;
447 /*
448 *  While there are blocks remaining that own lower entries, keep going east.
449 *  Adjust the current LCM value as well as the local column index in A and XR.
450 */
451       while( ( nblks > 0 ) && ( lcmt00 < low ) )
452       { lcmt00 += qnb; nblks--; Aoffj += Anb; Xoffj += Anb; }
453 /*
454 *  Operate with the lower triangular part of sub( A ).
455 */
456       tmp1 = MIN( Aoffj, jjmax ) - Ajj + 1;
457       if( lower && ( tmp1 > 0 ) )
458       {
459          SYM( TYPE, SIDE, ALL, m1, tmp1, K, 0,
460               ALPHA, Mptr( A, Aii, Ajj, Ald, size ), Ald,
461               XC+Xii*XCinc, LDXC, XR+Xjj*XRinc, LDXR,
462               YC+Xii*YCinc, LDYC, YR+Xjj*YRinc, LDYR );
463          Ajj += tmp1; Xjj += tmp1; n1  -= tmp1;
464       }
465 /*
466 *  Return if no more column in the LCM table.
467 */
468       if( nblks <= 0 ) return;
469 /*
470 *  lcmt00 >= low. The current block owns either diagonals or upper entries.
471 *  Save the current position in the LCM table. After this row has been
472 *  completely taken care of, re-start from this column and the next row of
473 *  the LCM table.
474 */
475       lcmt = lcmt00; nblkd = nblks; joffd = Aoffj; joffx = Xoffj;
476 
477       nbloc = Anb;
478       while( ( nblkd > 0 ) && ( lcmt <= iupp ) )
479       {
480 /*
481 *  A block owning diagonals lcmt00 >= low && lcmt00 <= iupp has been found.
482 */
483          if( nblkd == 1 ) nbloc = lnbloc;
484          SYM( TYPE, SIDE, UPLO, imbloc, nbloc, K, lcmt,
485               ALPHA, Mptr( A, Aii, joffd+1, Ald, size ), Ald,
486               XC+Xii*XCinc, LDXC, XR+(joffx+1)*XRinc, LDXR,
487               YC+Xii*YCinc, LDYC, YR+(joffx+1)*YRinc, LDYR );
488          lcmt00 = lcmt;  lcmt  += qnb;
489          nblks  = nblkd; nblkd--;
490          Aoffj  = joffd; joffd += nbloc;
491          Xoffj  = joffx; joffx += nbloc;
492       }
493 /*
494 *  Operate with the upper triangular part of sub( A ).
495 */
496       tmp1 = n1 - joffd + Ajj - 1;
497       if( upper && ( tmp1 > 0 ) )
498          SYM( TYPE, SIDE, ALL, imbloc, tmp1, K, 0,
499               ALPHA, Mptr( A, Aii, joffd+1, Ald, size ), Ald,
500               XC+Xii*XCinc, LDXC, XR+(joffx+1)*XRinc, LDXR,
501               YC+Xii*YCinc, LDYC, YR+(joffx+1)*YRinc, LDYR );
502 
503       tmp1    = Aoffj - Ajj + 1;
504       m1     -= imbloc;
505       n1     -= tmp1;
506       lcmt00 -= ( iupp - upp + pmb );
507       mblks--;
508       Aoffi  += imbloc;
509       Xoffi  += imbloc;
510 /*
511 *  Operate with the lower triangular part of sub( A ).
512 */
513       if( lower && ( m1 > 0 ) && ( tmp1 > 0 ) )
514          SYM( TYPE, SIDE, ALL, m1, tmp1, K, 0,
515               ALPHA, Mptr( A, Aoffi+1, Ajj, Ald, size ), Ald,
516               XC+(Xoffi+1)*XCinc, LDXC, XR+Xjj*XRinc, LDXR,
517               YC+(Xoffi+1)*YCinc, LDYC, YR+Xjj*YRinc, LDYR );
518       Aii = Aoffi + 1; Ajj = Aoffj + 1;
519       Xii = Xoffi + 1; Xjj = Xoffj + 1;
520    }
521 /*
522 *  Loop over the remaining columns of the LCM table.
523 */
524    nbloc = Anb;
525    while( nblks > 0 )
526    {
527       if( nblks == 1 ) nbloc = lnbloc;
528 /*
529 *  While there are blocks remaining that own upper entries, keep going south.
530 *  Adjust the current LCM value as well as the local row index in A and XC.
531 */
532       while( ( mblks > 0 ) && ( lcmt00 > upp ) )
533       { lcmt00 -= pmb; mblks--; Aoffi += Amb; Xoffi += Amb; }
534 /*
535 *  Operate with the upper triangular part of sub( A ).
536 */
537       tmp1 = MIN( Aoffi, iimax ) - Aii + 1;
538       if( upper && ( tmp1 > 0 ) )
539       {
540          SYM( TYPE, SIDE, ALL, tmp1, n1, K, 0,
541               ALPHA, Mptr( A, Aii, Aoffj+1, Ald, size ), Ald,
542               XC+Xii*XCinc, LDXC, XR+(Xoffj+1)*XRinc, LDXR,
543               YC+Xii*YCinc, LDYC, YR+(Xoffj+1)*YRinc, LDYR );
544          Aii += tmp1;
545          Xii += tmp1;
546          m1  -= tmp1;
547       }
548 /*
549 *  Return if no more row in the LCM table.
550 */
551       if( mblks <= 0 ) return;
552 /*
553 *  lcmt00 <= upp. The current block owns either diagonals or lower entries.
554 *  Save the current position in the LCM table. After this column has been
555 *  completely taken care of, re-start from this row and the next column of
556 *  the LCM table.
557 */
558       lcmt  = lcmt00; mblkd = mblks; ioffd = Aoffi; ioffx = Xoffi;
559 
560       mbloc = Amb;
561       while( ( mblkd > 0 ) && ( lcmt >= low ) )
562       {
563 /*
564 *  A block owning diagonals lcmt00 >= low && lcmt00 <= upp has been found.
565 */
566          if( mblkd == 1 ) mbloc = lmbloc;
567          SYM( TYPE, SIDE, UPLO, mbloc, nbloc, K, lcmt,
568               ALPHA, Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald,
569               XC+(ioffx+1)*XCinc, LDXC, XR+(Xoffj+1)*XRinc, LDXR,
570               YC+(ioffx+1)*YCinc, LDYC, YR+(Xoffj+1)*YRinc, LDYR );
571          lcmt00 = lcmt;  lcmt  -= pmb;
572          mblks  = mblkd; mblkd--;
573          Aoffi  = ioffd; Xoffi  = ioffx;
574          ioffd += mbloc; ioffx += mbloc;
575       }
576 /*
577 *  Operate with the lower triangular part of sub( A ).
578 */
579       tmp1 = m1 - ioffd + Aii - 1;
580       if( lower && ( tmp1 > 0 ) )
581          SYM( TYPE, SIDE, ALL, tmp1, nbloc, K, 0,
582               ALPHA, Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald,
583               XC+(ioffx+1)*XCinc, LDXC, XR+(Xoffj+1)*XRinc, LDXR,
584               YC+(ioffx+1)*YCinc, LDYC, YR+(Xoffj+1)*YRinc, LDYR );
585 
586       tmp1    = MIN( Aoffi, iimax ) - Aii + 1;
587       m1     -= tmp1;
588       n1     -= nbloc;
589       lcmt00 += qnb;
590       nblks--;
591       Aoffj  += nbloc;
592       Xoffj  += nbloc;
593 /*
594 *  Operate with the upper triangular part of sub( A ).
595 */
596       if( upper && ( tmp1 > 0 ) && ( n1 > 0 ) )
597          SYM( TYPE, SIDE, ALL, tmp1, n1, K, 0,
598               ALPHA, Mptr( A, Aii, Aoffj+1, Ald, size ), Ald,
599               XC+Xii*XCinc, LDXC, XR+(Xoffj+1)*XRinc, LDXR,
600               YC+Xii*YCinc, LDYC, YR+(Xoffj+1)*YRinc, LDYR );
601       Aii = Aoffi + 1; Ajj = Aoffj + 1;
602       Xii = Xoffi + 1; Xjj = Xoffj + 1;
603    }
604 /*
605 *  End of PB_Cpsym
606 */
607 }
608