1 /* ---------------------------------------------------------------------
2 *
3 *  -- PBLAS auxiliary routine (version 2.0) --
4 *     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 *     and University of California, Berkeley.
6 *     April 1, 1998
7 *
8 *  ---------------------------------------------------------------------
9 */
10 /*
11 *  Include files
12 */
13 #include "../pblas.h"
14 #include "../PBpblas.h"
15 #include "../PBtools.h"
16 #include "../PBblacs.h"
17 #include "../PBblas.h"
18 
19 #ifdef __STDC__
PB_Cpsyr2(PBTYP_T * TYPE,char * UPLO,int N,int K,char * ALPHA,char * XC,int LDXC,char * XR,int LDXR,char * YC,int LDYC,char * YR,int LDYR,char * A,int IA,int JA,int * DESCA,TZSYR2_T SYR2)20 void PB_Cpsyr2( PBTYP_T * TYPE, char * UPLO, int N, int K,
21                 char * ALPHA, char * XC, int LDXC, char * XR, int LDXR,
22                 char * YC, int LDYC, char * YR, int LDYR, char * A,
23                 int IA, int JA, int * DESCA, TZSYR2_T SYR2 )
24 #else
25 void PB_Cpsyr2( TYPE, UPLO, N, K, ALPHA, XC, LDXC, XR, LDXR,
26                 YC, LDYC, YR, LDYR, A, IA, JA, DESCA, SYR2 )
27 /*
28 *  .. Scalar Arguments ..
29 */
30    char           * UPLO;
31    int            IA, JA, K, LDXC, LDXR, LDYC, LDYR, N;
32    char           * ALPHA;
33    PBTYP_T        * TYPE;
34    TZSYR2_T       SYR2;
35 /*
36 *  .. Array Arguments ..
37 */
38    int            * DESCA;
39    char           * A, * XC, * XR, * YC, * YR;
40 #endif
41 {
42 /*
43 *  Purpose
44 *  =======
45 *
46 *  PB_Cpsyr2 performs a symmetric or Hermitian rank-2 update of the sub-
47 *  matrix sub( A ) denoting A( IA:IA+N-1, JA:JA+N-1 ).
48 *
49 *  Notes
50 *  =====
51 *
52 *  A description  vector  is associated with each 2D block-cyclicly dis-
53 *  tributed matrix.  This  vector  stores  the  information  required to
54 *  establish the  mapping  between a  matrix entry and its corresponding
55 *  process and memory location.
56 *
57 *  In  the  following  comments,   the character _  should  be  read  as
58 *  "of  the  distributed  matrix".  Let  A  be a generic term for any 2D
59 *  block cyclicly distributed matrix.  Its description vector is DESC_A:
60 *
61 *  NOTATION         STORED IN       EXPLANATION
62 *  ---------------- --------------- ------------------------------------
63 *  DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
64 *  CTXT_A  (global) DESCA[ CTXT_  ] The BLACS context handle, indicating
65 *                                   the NPROW x NPCOL BLACS process grid
66 *                                   A  is  distributed over. The context
67 *                                   itself  is  global,  but  the handle
68 *                                   (the integer value) may vary.
69 *  M_A     (global) DESCA[ M_     ] The  number of rows in the distribu-
70 *                                   ted matrix A, M_A >= 0.
71 *  N_A     (global) DESCA[ N_     ] The number of columns in the distri-
72 *                                   buted matrix A, N_A >= 0.
73 *  IMB_A   (global) DESCA[ IMB_   ] The number of rows of the upper left
74 *                                   block of the matrix A, IMB_A > 0.
75 *  INB_A   (global) DESCA[ INB_   ] The  number  of columns of the upper
76 *                                   left   block   of   the  matrix   A,
77 *                                   INB_A > 0.
78 *  MB_A    (global) DESCA[ MB_    ] The blocking factor used to  distri-
79 *                                   bute the last  M_A-IMB_A  rows of A,
80 *                                   MB_A > 0.
81 *  NB_A    (global) DESCA[ NB_    ] The blocking factor used to  distri-
82 *                                   bute the last  N_A-INB_A  columns of
83 *                                   A, NB_A > 0.
84 *  RSRC_A  (global) DESCA[ RSRC_  ] The process row over which the first
85 *                                   row of the matrix  A is distributed,
86 *                                   NPROW > RSRC_A >= 0.
87 *  CSRC_A  (global) DESCA[ CSRC_  ] The  process column  over  which the
88 *                                   first column of  A  is  distributed.
89 *                                   NPCOL > CSRC_A >= 0.
90 *  LLD_A   (local)  DESCA[ LLD_   ] The  leading dimension  of the local
91 *                                   array  storing  the  local blocks of
92 *                                   the distributed matrix A,
93 *                                   IF( Lc( 1, N_A ) > 0 )
94 *                                      LLD_A >= MAX( 1, Lr( 1, M_A ) )
95 *                                   ELSE
96 *                                      LLD_A >= 1.
97 *
98 *  Let K be the number of  rows of a matrix A starting at the global in-
99 *  dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
100 *  that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
101 *  receive if these K rows were distributed over NPROW processes.  If  K
102 *  is the number of columns of a matrix  A  starting at the global index
103 *  JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number  of co-
104 *  lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would  receive if
105 *  these K columns were distributed over NPCOL processes.
106 *
107 *  The values of Lr() and Lc() may be determined via a call to the func-
108 *  tion PB_Cnumroc:
109 *  Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
110 *  Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
111 *
112 *  Arguments
113 *  =========
114 *
115 *  TYPE    (local input) pointer to a PBTYP_T structure
116 *          On entry,  TYPE  is a pointer to a structure of type PBTYP_T,
117 *          that contains type information (See pblas.h).
118 *
119 *  UPLO    (global input) pointer to CHAR
120 *          On  entry,   UPLO  specifies  whether  the  local  pieces  of
121 *          the array  A  containing the  upper or lower triangular  part
122 *          of the symmetric or Hermitian submatrix  sub( A )  are to  be
123 *          referenced as follows:
124 *
125 *             UPLO = 'U' or 'u'   Only the local pieces corresponding to
126 *                                 the upper triangular part of  the sym-
127 *                                 metric or Hermitian submatrix sub( A )
128 *                                 are to be referenced,
129 *
130 *             UPLO = 'L' or 'l'   Only the local pieces corresponding to
131 *                                 the  lower triangular part of the sym-
132 *                                 metric or Hermitian submatrix sub( A )
133 *                                 are to be referenced.
134 *
135 *  N       (global input) INTEGER
136 *          On entry,  N specifies the order of the  submatrix  sub( A ).
137 *          N must be at least zero.
138 *
139 *  K       (global input) INTEGER
140 *          On entry, K  specifies the local number of columns of the lo-
141 *          cal array XC  and the local number of rows of the local array
142 *          XR. K mut be at least zero.
143 *
144 *  ALPHA   (global input) pointer to CHAR
145 *          On entry, ALPHA specifies the scalar alpha.
146 *
147 *  XC      (local input) pointer to CHAR
148 *          On entry, XC is an array of dimension (LDXC,K). Before entry,
149 *          this array contains the local entries of the matrix XC.
150 *
151 *  LDXC    (local input) INTEGER
152 *          On entry, LDXC  specifies  the leading dimension of the array
153 *          XC. LDXC must be at least max( 1, Lp( IA, N ) ).
154 *
155 *  YC      (local input) pointer to CHAR
156 *          On entry, YC is an array of dimension (LDYC,K). Before entry,
157 *          this array contains the local entries of the matrix YC.
158 *
159 *  LDYC    (local input) INTEGER
160 *          On entry, LDYC  specifies  the leading dimension of the array
161 *          YC. LDYC must be at least max( 1, Lp( IA, N ) ).
162 *
163 *  XR      (local input) pointer to CHAR
164 *          On entry, XR is an array of dimension (LDXR,Kx),  where Kx is
165 *          at least Lc( JA, N ). Before  entry, this array contains  the
166 *          local entries of the matrix XR.
167 *
168 *  LDXR    (local input) INTEGER
169 *          On entry, LDXR  specifies  the leading dimension of the array
170 *          XR. LDXR must be at least max( 1, K ).
171 *
172 *  YR      (local input) pointer to CHAR
173 *          On entry, YR is an array of dimension (LDYR,Ky),  where Ky is
174 *          at least Lc( JA, N ).  Before  entry, this array contains the
175 *          local entries of the matrix YR.
176 *
177 *  LDYR    (local input) INTEGER
178 *          On entry, LDYR  specifies  the leading dimension of the array
179 *          YR. LDYR must be at least max( 1, K ).
180 *
181 *  A       (local input/local output) pointer to CHAR
182 *          On entry, A is an array of dimension (LLD_A, Ka), where Ka is
183 *          at least Lc( 1, JA+N-1 ).  Before  entry, this array contains
184 *          the local entries of the matrix A.
185 *          Before  entry  with  UPLO = 'U' or 'u', this  array  contains
186 *          the local entries corresponding to the upper triangular  part
187 *          of the  @(syhec)  submatrix  sub( A ), and the local entries
188 *          corresponding to the  strictly lower triangular  of  sub( A )
189 *          are not  referenced.  On exit,  the upper triangular part  of
190 *          sub( A ) is overwritten by the  upper triangular part  of the
191 *          updated submatrix.
192 *          Before  entry  with  UPLO = 'L' or 'l', this  array  contains
193 *          the local entries corresponding to the lower triangular  part
194 *          of the  @(syhec)  submatrix  sub( A ), and the local entries
195 *          corresponding to the  strictly upper triangular  of  sub( A )
196 *          are not  referenced.  On exit,  the lower triangular part  of
197 *          sub( A ) is overwritten by the  lower triangular part  of the
198 *          updated submatrix.
199 *
200 *  IA      (global input) INTEGER
201 *          On entry, IA  specifies A's global row index, which points to
202 *          the beginning of the submatrix sub( A ).
203 *
204 *  JA      (global input) INTEGER
205 *          On entry, JA  specifies A's global column index, which points
206 *          to the beginning of the submatrix sub( A ).
207 *
208 *  DESCA   (global and local input) INTEGER array
209 *          On entry, DESCA  is an integer array of dimension DLEN_. This
210 *          is the array descriptor for the matrix A.
211 *
212 *  SYR2    (local input) pointer to function of type TZSYR2_T
213 *          On entry,  SYR2  specifies the function performing the update
214 *          of a single block.
215 *
216 *  -- Written on April 1, 1998 by
217 *     Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
218 *
219 *  ---------------------------------------------------------------------
220 */
221 /*
222 *  .. Local Scalars ..
223 */
224    int            Acol, Arow, Aii, Aimb1, Ainb1, Ajj, Ald, Amp, Amb, Anb, Anq,
225                   Aoffi, Aoffj, Arcol, Arrow, GoEast, GoSouth, IsColRepl,
226                   IsRowRepl, XCinc, XRinc, Xii=0, Xjj=0, Xoffi=-1, Xoffj=-1,
227                   YCinc, YRinc, iimax, ilow, imbloc, inbloc, ioffd, ioffx, iupp,
228                   jjmax, joffd, joffx, lcmt, lcmt00, lmbloc, lnbloc, low, lower,
229                   m1, mbloc, mblkd, mblks, mycol, myrow, n1, nbloc, nblkd,
230                   nblks, npcol, nprow, pmb, qnb, size, tmp1, upp, upper;
231 /* ..
232 *  .. Executable Statements ..
233 *
234 */
235 /*
236 *  Quick return if possible
237 */
238    if( N <= 0 ) return;
239 /*
240 *  Retrieve process grid information
241 */
242    Cblacs_gridinfo( DESCA[CTXT_], &nprow, &npcol, &myrow, &mycol );
243 /*
244 *  Retrieve sub( A )'s local information: Aii, Ajj, Arow, Acol ...
245 */
246    PB_Cainfog2l( N, N, IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aimb1,
247                  &Ainb1, &Amp, &Anq, &Aii, &Ajj, &Arow, &Acol, &Arrow, &Arcol );
248 /*
249 *  Quick return if I don't own any of sub( A ) or if sub( A ) is replicated in
250 *  all processes.
251 */
252    if( ( Amp <= 0 ) || ( Anq <= 0 ) ) return;
253 
254    IsRowRepl = ( ( Arow < 0 ) || ( nprow == 1 ) );
255    IsColRepl = ( ( Acol < 0 ) || ( npcol == 1 ) );
256    Amb  = DESCA[ MB_ ]; Anb = DESCA[ NB_ ]; Ald = DESCA[LLD_];
257    size = TYPE->size;
258 
259    if( IsRowRepl && IsColRepl )
260    {
261       SYR2( TYPE, UPLO, Amp, Anq, K, 0, ALPHA, XC, LDXC, YC, LDYC, XR, LDXR,
262             YR, LDYR, Mptr( A, Aii, Ajj, Ald, size ), Ald );
263       return;
264    }
265 
266    XCinc = size;         XRinc = LDXR * size;
267    YCinc = size;         YRinc = LDYR * size;
268    upper = ( Mupcase( UPLO[0] ) == CUPPER );
269    lower = ( Mupcase( UPLO[0] ) == CLOWER );
270 /*
271 *  Initialize lcmt00, mblks, nblks, imbloc, inbloc, lmbloc, lnbloc, ilow, low,
272 *  iupp, and upp.
273 */
274    PB_Cbinfo( 0, Amp, Anq, Aimb1, Ainb1, Amb, Anb, Arrow, Arcol, &lcmt00,
275               &mblks, &nblks, &imbloc, &inbloc, &lmbloc, &lnbloc, &ilow, &low,
276               &iupp, &upp );
277 
278    iimax = ( Aoffi = Aii - 1 ) + ( m1 = Amp );
279    jjmax = ( Aoffj = Ajj - 1 ) + ( n1 = Anq );
280    pmb   = ( IsRowRepl ? Amb : nprow * Amb );
281    qnb   = ( IsColRepl ? Anb : npcol * Anb );
282 /*
283 *  Handle separately the first row and/or column of the LCM table. Update the
284 *  LCM value of the curent block lcmt00, as well as the number of rows and
285 *  columns mblks and nblks remaining in the LCM table.
286 */
287    GoSouth = ( lcmt00 > iupp );
288    GoEast  = ( lcmt00 < ilow );
289 /*
290 *  Go through the table looking for blocks owning diagonal entries.
291 */
292    if( ( !( GoSouth ) ) && ( !( GoEast ) ) )
293    {
294 /*
295 *  The upper left block owns diagonal entries lcmt00 >= ilow && lcmt00 <= iupp
296 */
297       SYR2( TYPE, UPLO, imbloc, inbloc, K, lcmt00, ALPHA,
298             XC+Xii*XCinc, LDXC, YC+Xii*YCinc, LDYC,
299             XR+Xjj*XRinc, LDXR, YR+Xjj*YRinc, LDYR,
300             Mptr( A, Aii, Ajj, Ald, size ), Ald );
301 /*
302 *  Decide whether one should go south or east in the table: Go east if
303 *  the block below the current one only owns lower entries. If this block,
304 *  however, owns diagonals, then go south.
305 */
306       GoSouth = !( GoEast = ( ( lcmt00 - ( iupp - upp + pmb ) ) < ilow ) );
307 
308       if( GoSouth )
309       {
310 /*
311 *  When the upper triangular part of sub( A ) should be updated and one is
312 *  planning to go south in the table, it is neccessary to take care of the
313 *  remaining columns of these imbloc rows immediately.
314 */
315          if( upper && ( Anq > inbloc ) )
316          {
317             tmp1 = Anq - inbloc;
318             SYR2( TYPE, ALL, imbloc, tmp1, K, 0, ALPHA,
319                   XC+Xii*XCinc,          LDXC, YC+Xii*YCinc,          LDYC,
320                   XR+(Xjj+inbloc)*XRinc, LDXR, YR+(Xjj+inbloc)*YRinc, LDYR,
321                   Mptr( A, Aii, Ajj+inbloc, Ald, size ), Ald );
322          }
323          Aii += imbloc; Xii += imbloc; m1  -= imbloc;
324       }
325       else
326       {
327 /*
328 *  When the lower triangular part of sub( A ) should be updated and one is
329 *  planning to go east in the table, it is neccessary to take care of the
330 *  remaining rows of these inbloc columns immediately.
331 */
332          if( lower && ( Amp > imbloc ) )
333          {
334             tmp1 = Amp - imbloc;
335             SYR2( TYPE, ALL, tmp1, inbloc, K, 0, ALPHA,
336                   XC+(Xii+imbloc)*XCinc, LDXC, YC+(Xii+imbloc)*YCinc, LDYC,
337                   XR+Xjj*XRinc,          LDXR, YR+Xjj*YRinc,          LDYR,
338                   Mptr( A, Aii+imbloc, Ajj, Ald, size ), Ald );
339          }
340          Ajj += inbloc; Xjj += inbloc; n1  -= inbloc;
341       }
342    }
343 
344    if( GoSouth )
345    {
346 /*
347 *  Go one step south in the LCM table. Adjust the current LCM value as well as
348 *  the local row indexes in A and XC.
349 */
350       lcmt00 -= ( iupp - upp + pmb ); mblks--; Aoffi += imbloc; Xoffi += imbloc;
351 /*
352 *  While there are blocks remaining that own upper entries, keep going south.
353 *  Adjust the current LCM value as well as the local row indexes in A and XC.
354 */
355       while( ( mblks > 0 ) && ( lcmt00 > upp ) )
356       { lcmt00 -= pmb; mblks--; Aoffi += Amb; Xoffi += Amb; }
357 /*
358 *  Update the upper triangular part of sub( A ) we just skipped when necessary.
359 */
360       tmp1 = MIN( Aoffi, iimax ) - Aii + 1;
361       if( upper && ( tmp1 > 0 ) )
362       {
363          SYR2( TYPE, ALL, tmp1, n1, K, 0, ALPHA,
364                XC+Xii*XCinc,       LDXC, YC+Xii*YCinc,       LDYC,
365                XR+(Xoffj+1)*XRinc, LDXR, YR+(Xoffj+1)*YRinc, LDYR,
366                Mptr( A, Aii, Aoffj+1, Ald, size ), Ald );
367          Aii += tmp1; Xii += tmp1; m1  -= tmp1;
368       }
369 /*
370 *  Return if no more row in the LCM table.
371 */
372       if( mblks <= 0 ) return;
373 /*
374 *  lcmt00 <= upp. The current block owns either diagonals or lower entries.
375 *  Save the current position in the LCM table. After this column has been
376 *  completely taken care of, re-start from this row and the next column of
377 *  the LCM table.
378 */
379       lcmt = lcmt00; mblkd = mblks; ioffd = Aoffi; ioffx = Xoffi;
380 
381       mbloc = Amb;
382       while( ( mblkd > 0 ) && ( lcmt >= ilow ) )
383       {
384 /*
385 *  A block owning diagonals lcmt00 >= ilow && lcmt00 <= upp has been found.
386 */
387          if( mblkd == 1 ) mbloc = lmbloc;
388          SYR2( TYPE, UPLO, mbloc, inbloc, K, lcmt, ALPHA,
389                XC+(ioffx+1)*XCinc, LDXC, YC+(ioffx+1)*YCinc, LDYC,
390                XR+(Xoffj+1)*XRinc, LDXR, YR+(Xoffj+1)*YRinc, LDYR,
391                Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald );
392          lcmt00 = lcmt;  lcmt  -= pmb;
393          mblks  = mblkd; mblkd--;
394          Aoffi  = ioffd; ioffd += mbloc;
395          Xoffi  = ioffx; ioffx += mbloc;
396       }
397 /*
398 *  Update the lower triangular part of sub( A ).
399 */
400       tmp1 = m1 - ioffd + Aii - 1;
401       if( lower && ( tmp1 > 0 ) )
402          SYR2( TYPE, ALL, tmp1, inbloc, K, 0, ALPHA,
403                XC+(ioffx+1)*XCinc, LDXC, YC+(ioffx+1)*YCinc, LDYC,
404                XR+(Xoffj+1)*XRinc, LDXR, YR+(Xoffj+1)*YRinc, LDYR,
405                Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald );
406 
407       tmp1    = Aoffi - Aii + 1;
408       m1     -= tmp1;
409       n1     -= inbloc;
410       lcmt00 += low - ilow + qnb;
411       nblks--;
412       Aoffj  += inbloc;
413       Xoffj  += inbloc;
414 /*
415 *  Update the upper triangular part of sub( A ).
416 */
417       if( upper && ( tmp1 > 0 ) && ( n1 > 0 ) )
418          SYR2( TYPE, ALL, tmp1, n1, K, 0, ALPHA,
419                XC+Xii*XCinc,       LDXC, YC+Xii*YCinc,       LDYC,
420                XR+(Xoffj+1)*XRinc, LDXR, YR+(Xoffj+1)*YRinc, LDYR,
421                Mptr( A, Aii, Aoffj+1, Ald, size ), Ald );
422       Aii = Aoffi + 1; Ajj = Aoffj + 1;
423       Xii = Xoffi + 1; Xjj = Xoffj + 1;
424    }
425    else if( GoEast )
426    {
427 /*
428 *  Go one step east in the LCM table. Adjust the current LCM value as well as
429 *  the local column index in A and XR.
430 */
431       lcmt00 += low - ilow + qnb; nblks--; Aoffj  += inbloc; Xoffj  += inbloc;
432 /*
433 *  While there are blocks remaining that own lower entries, keep going east.
434 *  Adjust the current LCM value as well as the local column index in A and XR.
435 */
436       while( ( nblks > 0 ) && ( lcmt00 < low ) )
437       { lcmt00 += qnb; nblks--; Aoffj += Anb; Xoffj += Anb; }
438 /*
439 *  Update the lower triangular part of sub( A ).
440 */
441       tmp1 = MIN( Aoffj, jjmax ) - Ajj + 1;
442       if( lower && ( tmp1 > 0 ) )
443       {
444          SYR2( TYPE, ALL, m1, tmp1, K, 0, ALPHA,
445                XC+Xii*XCinc, LDXC, YC+Xii*YCinc, LDYC,
446                XR+Xjj*XRinc, LDXR, YR+Xjj*YRinc, LDYR,
447                Mptr( A, Aii, Ajj, Ald, size ), Ald );
448          Ajj += tmp1; Xjj += tmp1; n1  -= tmp1;
449       }
450 /*
451 *  Return if no more column in the LCM table.
452 */
453       if( nblks <= 0 ) return;
454 /*
455 *  lcmt00 >= low. The current block owns either diagonals or upper entries.
456 *  Save the current position in the LCM table. After this row has been
457 *  completely taken care of, re-start from this column and the next row of
458 *  the LCM table.
459 */
460       lcmt = lcmt00; nblkd = nblks; joffd = Aoffj; joffx = Xoffj;
461 
462       nbloc = Anb;
463       while( ( nblkd > 0 ) && ( lcmt <= iupp ) )
464       {
465 /*
466 *  A block owning diagonals lcmt00 >= low && lcmt00 <= iupp has been found.
467 */
468          if( nblkd == 1 ) nbloc = lnbloc;
469          SYR2( TYPE, UPLO, imbloc, nbloc, K, lcmt, ALPHA,
470                XC+Xii*XCinc,       LDXC, YC+Xii*YCinc,       LDYC,
471                XR+(joffx+1)*XRinc, LDXR, YR+(joffx+1)*YRinc, LDYR,
472                Mptr( A, Aii, joffd+1, Ald, size ), Ald );
473          lcmt00 = lcmt;  lcmt  += qnb;
474          nblks  = nblkd; nblkd--;
475          Aoffj  = joffd; joffd += nbloc;
476          Xoffj  = joffx; joffx += nbloc;
477       }
478 /*
479 *  Update the upper triangular part of sub( A ).
480 */
481       tmp1 = n1 - joffd + Ajj - 1;
482       if( upper && ( tmp1 > 0 ) )
483          SYR2( TYPE, ALL, imbloc, tmp1, K, 0, ALPHA,
484                XC+Xii*XCinc,       LDXC, YC+Xii*YCinc,       LDYC,
485                XR+(joffx+1)*XRinc, LDXR, YR+(joffx+1)*YRinc, LDYR,
486                Mptr( A, Aii, joffd+1, Ald, size ), Ald );
487 
488       tmp1    = Aoffj - Ajj + 1;
489       m1     -= imbloc;
490       n1     -= tmp1;
491       lcmt00 -= ( iupp - upp + pmb );
492       mblks--;
493       Aoffi  += imbloc;
494       Xoffi  += imbloc;
495 /*
496 *  Update the lower triangular part of sub( A ).
497 */
498       if( lower && ( m1 > 0 ) && ( tmp1 > 0 ) )
499          SYR2( TYPE, ALL, m1, tmp1, K, 0, ALPHA,
500                XC+(Xoffi+1)*XCinc, LDXC, YC+(Xoffi+1)*YCinc, LDYC,
501                XR+Xjj*XRinc,       LDXR, YR+Xjj*YRinc,       LDYR,
502                Mptr( A, Aoffi+1, Ajj, Ald, size ), Ald );
503       Aii = Aoffi + 1; Ajj = Aoffj + 1;
504       Xii = Xoffi + 1; Xjj = Xoffj + 1;
505    }
506 /*
507 *  Loop over the remaining columns of the LCM table.
508 */
509    nbloc = Anb;
510    while( nblks > 0 )
511    {
512       if( nblks == 1 ) nbloc = lnbloc;
513 /*
514 *  While there are blocks remaining that own upper entries, keep going south.
515 *  Adjust the current LCM value as well as the local row index in A and XC.
516 */
517       while( ( mblks > 0 ) && ( lcmt00 > upp ) )
518       { lcmt00 -= pmb; mblks--; Aoffi += Amb; Xoffi += Amb; }
519 /*
520 *  Update the upper triangular part of sub( A ).
521 */
522       tmp1 = MIN( Aoffi, iimax ) - Aii + 1;
523       if( upper && ( tmp1 > 0 ) )
524       {
525          SYR2( TYPE, ALL, tmp1, n1, K, 0, ALPHA,
526                XC+Xii*XCinc,       LDXC, YC+Xii*YCinc,       LDYC,
527                XR+(Xoffj+1)*XRinc, LDXR, YR+(Xoffj+1)*YRinc, LDYR,
528                Mptr( A, Aii, Aoffj+1, Ald, size ), Ald );
529          Aii += tmp1;
530          Xii += tmp1;
531          m1  -= tmp1;
532       }
533 /*
534 *  Return if no more row in the LCM table.
535 */
536       if( mblks <= 0 ) return;
537 /*
538 *  lcmt00 <= upp. The current block owns either diagonals or lower entries.
539 *  Save the current position in the LCM table. After this column has been
540 *  completely taken care of, re-start from this row and the next column of
541 *  the LCM table.
542 */
543       lcmt  = lcmt00; mblkd = mblks; ioffd = Aoffi; ioffx = Xoffi;
544 
545       mbloc = Amb;
546       while( ( mblkd > 0 ) && ( lcmt >= low ) )
547       {
548 /*
549 *  A block owning diagonals lcmt00 >= low && lcmt00 <= upp has been found.
550 */
551          if( mblkd == 1 ) mbloc = lmbloc;
552          SYR2( TYPE, UPLO, mbloc, nbloc, K, lcmt, ALPHA,
553                XC+(ioffx+1)*XCinc, LDXC, YC+(ioffx+1)*YCinc, LDYC,
554                XR+(Xoffj+1)*XRinc, LDXR, YR+(Xoffj+1)*YRinc, LDYR,
555                Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald );
556          lcmt00 = lcmt;  lcmt  -= pmb;
557          mblks  = mblkd; mblkd--;
558          Aoffi  = ioffd; Xoffi  = ioffx;
559          ioffd += mbloc; ioffx += mbloc;
560       }
561 /*
562 *  Update the lower triangular part of sub( A ).
563 */
564       tmp1 = m1 - ioffd + Aii - 1;
565       if( lower && ( tmp1 > 0 ) )
566          SYR2( TYPE, ALL, tmp1, nbloc, K, 0, ALPHA,
567                XC+(ioffx+1)*XCinc, LDXC, YC+(ioffx+1)*YCinc, LDYC,
568                XR+(Xoffj+1)*XRinc, LDXR, YR+(Xoffj+1)*YRinc, LDYR,
569                Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald );
570 
571       tmp1    = MIN( Aoffi, iimax ) - Aii + 1;
572       m1     -= tmp1;
573       n1     -= nbloc;
574       lcmt00 += qnb;
575       nblks--;
576       Aoffj  += nbloc;
577       Xoffj  += nbloc;
578 /*
579 *  Update the upper triangular part of sub( A ).
580 */
581       if( upper && ( tmp1 > 0 ) && ( n1 > 0 ) )
582          SYR2( TYPE, ALL, tmp1, n1, K, 0, ALPHA,
583                XC+Xii*XCinc,       LDXC, YC+Xii*YCinc,       LDYC,
584                XR+(Xoffj+1)*XRinc, LDXR, YR+(Xoffj+1)*YRinc, LDYR,
585                Mptr( A, Aii, Aoffj+1, Ald, size ), Ald );
586       Aii = Aoffi + 1; Ajj = Aoffj + 1;
587       Xii = Xoffi + 1; Xjj = Xoffj + 1;
588    }
589 /*
590 *  End of PB_Cpsyr2
591 */
592 }
593