1 /* ---------------------------------------------------------------------
2 *
3 *  -- PBLAS auxiliary routine (version 2.0) --
4 *     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 *     and University of California, Berkeley.
6 *     April 1, 1998
7 *
8 *  ---------------------------------------------------------------------
9 */
10 /*
11 *  Include files
12 */
13 #include "../pblas.h"
14 #include "../PBpblas.h"
15 #include "../PBtools.h"
16 #include "../PBblacs.h"
17 #include "../PBblas.h"
18 
19 #ifdef __STDC__
PB_CVMpack(PBTYP_T * TYPE,PB_VM_T * VM,char * VROCS,char * ROCS,char * UNPA,char * TRANS,int MN,int K,char * ALPHA,char * A,int LDA,char * BETA,char * B,int LDB)20 int PB_CVMpack( PBTYP_T * TYPE, PB_VM_T * VM, char * VROCS, char * ROCS,
21                 char * UNPA, char * TRANS, int MN, int K,
22                 char * ALPHA, char * A, int LDA,
23                 char * BETA,  char * B, int LDB )
24 #else
25 int PB_CVMpack( TYPE, VM, VROCS, ROCS, UNPA, TRANS, MN, K, ALPHA, A,
26                 LDA, BETA,  B, LDB )
27 /*
28 *  .. Scalar Arguments ..
29 */
30    int            K, LDA, LDB, MN;
31    char           * ALPHA, * BETA;
32 /*
33 *  .. Array Arguments ..
34 */
35    char           * VROCS, * ROCS, * UNPA, * TRANS;
36    PBTYP_T        * TYPE;
37    PB_VM_T        * VM;
38    char           * A, * B;
39 #endif
40 {
41 /*
42 *  Purpose
43 *  =======
44 *
45 *  PB_CVMpack  packs a  one-dimensional  distributed  array A into B, or
46 *  unpacks  an array B  into a one-dimensional distributed array A. This
47 *  operation is triggered by a virtual distributed array.
48 *
49 *  Arguments
50 *  =========
51 *
52 *  TYPE    (local input) pointer to a PBTYP_T structure
53 *          On entry,  TYPE  is a pointer to a structure of type PBTYP_T,
54 *          that contains type information (see pblas.h).
55 *
56 *  VM      (local input) pointer to a VM structure
57 *          On entry,  VM  is  a  pointer to a structure of type PB_VM_T,
58 *          that contains the virtual matrix information (see pblas.h).
59 *
60 *  VROCS   (local input) pointer to CHAR
61 *          On entry,  VROCS specifies if the rows or columns of the vir-
62 *          tual distributed array grid should be used for the packing or
63 *          unpacking operation as follows:
64 *             VROCS = 'R' or 'r', the rows should be used,
65 *             VROCS = 'C' or 'c', the columns should be used.
66 *
67 *  ROCS    (local input) pointer to CHAR
68 *          On entry,  ROCS  specifies if rows or columns  should be used
69 *          packed or unpacked as follows:
70 *             ROCS = 'R' or 'r', rows should be (un)packed,
71 *             ROCS = 'C' or 'c', columns should be (un)packed.
72 *
73 *  UNPA    (local input) pointer to CHAR
74 *          On entry,  UNPA specifies if the data should be packed or un-
75 *          packed as follows:
76 *             UNPA = 'P' or 'p', packing,
77 *             UNPA = 'U' or 'u', unpacking.
78 *
79 *  TRANS   (local input) pointer to CHAR
80 *          On entry,  TRANS  specifies if conjugation,  transposition or
81 *          conjugate transposition  should occur during the  (un)packing
82 *          operation as follows:
83 *             TRANS = 'N' or 'n', natural (un)packing,
84 *             TRANS = 'Z' or 'z', conjugated (un)packing,
85 *             TRANS = 'T' or 't', transposed (un)packing,
86 *             TRANS = 'C' or 'c', conjugate transposed (un)packing.
87 *
88 *  MN      (local input) INTEGER
89 *          On entry, MN  specifies the length of the distributed  dimen-
90 *          sion to be (un)packed. MN must be at least zero.
91 *
92 *  K       (local input) INTEGER
93 *          On entry,  K  specifies the length of the non-distributed di-
94 *          mension to be (un)packed. K must be at least zero.
95 *
96 *  ALPHA   (local input) pointer to CHAR
97 *          On entry, ALPHA specifies the scalar alpha.
98 *
99 *  A       (local input/local output) pointer to CHAR
100 *          On entry, A points to an array of  dimension (LDA, Ka), where
101 *          Ka is K when ROCS is 'R' or 'r' and  when ROCS is 'C' or 'c',
102 *          Ka is IMBLOC+(MBLKS-2)*MB+LMB  when  VROCS  is 'R' or 'r' and
103 *          when VROCS is 'C' or 'c', Ka is INBLOC+(NBLKS-2)*NB+LNB. This
104 *          array contains unpacked data.
105 *
106 *  LDA     (local input) INTEGER
107 *          On entry, LDA specifies the leading dimension of the array A.
108 *          LDA must be at least MAX( 1, K ) when ROCS = 'C' or  'c'  and
109 *          MAX( 1, IMBLOC+(MBLKS-2)*MB+LMB ) when ROCS is 'R' or 'r' and
110 *          VROCS  is  'R'  or 'r', and MAX( 1, INBLOC+(NBLKS-2)*NB+LNB )
111 *          when ROCS is 'R' or 'r' and VROCS is 'C' or 'c'.
112 *
113 *  BETA    (local input) pointer to CHAR
114 *          On entry, BETA specifies the scalar beta.
115 *
116 *  B       (local input/local output) pointer to CHAR
117 *          On entry,  B  points  to an array of  dimension (LDB,*). When
118 *          ROCS  is  'C'  or  'c',  and TRANS is 'N', 'n', 'Z' or 'Z', B
119 *          points to an K by MN  array.  When  TRANS is 'T', 't', 'C' or
120 *          'c', B points to  an MN  by K array. When ROCS is 'R' or 'r',
121 *          and TRANS is 'T', 't', 'C' or 'c', B points to an K by MN ar-
122 *          ray. When TRANS is 'N', 'n', 'Z' or 'z', B points to an MN by
123 *          K array. This array contains the packed data.
124 *
125 *  LDB     (local input) INTEGER
126 *          On entry, LDB specifies the leading dimension of the array B.
127 *
128 *  -- Written on April 1, 1998 by
129 *     Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
130 *
131 *  ---------------------------------------------------------------------
132 */
133 /*
134 *  .. Local Scalars ..
135 */
136    int            GoEast, GoSouth, ilow, imbloc, inbloc, inca, incb, iupp, kb,
137                   lcmt, lcmt00, lmbloc, lnbloc, low, mb, mblkd, mblks, mbloc,
138                   * m, * n, nb, nblkd, nblks, nbloc, notran, npcol, npq=0,
139                   nprow, pmb, qnb, rows, size, tmp1, tmp2, upp;
140    char           * aptrd;
141    MMADD_T        add;
142 /* ..
143 *  .. Executable Statements ..
144 *
145 */
146    mblks = VM->mblks; nblks = VM->nblks;
147 /*
148 *  Quick return if I don't own any blocks.
149 */
150    if( ( mblks == 0 ) || ( nblks == 0 ) ) return( 0 );
151 /*
152 *  Retrieve the contents of VM structure fields
153 */
154    lcmt00 = VM->lcmt00;
155    imbloc = VM->imbloc; mb    = VM->mb; lmbloc = VM->lmbloc; upp = VM->upp;
156    iupp   = VM->iupp;   nprow = VM->nprow;
157    inbloc = VM->inbloc; nb    = VM->nb; lnbloc = VM->lnbloc; low = VM->low;
158    ilow   = VM->ilow;   npcol = VM->npcol;
159 
160    if( Mupcase( UNPA[0] ) == CPACKING )
161    {
162 /*
163 *  B is the target packed buffer, A is the distributed source
164 */
165       if( Mupcase( TRANS[0] ) == CNOTRAN )
166       {
167 /*
168 *  Add A to B
169 */
170          notran = 1; add = TYPE->Fmmadd;
171       }
172       else if( Mupcase( TRANS[0] ) == CCONJG )
173       {
174 /*
175 *  Add the conjugate of A to B
176 */
177          notran = 1; add = TYPE->Fmmcadd;
178       }
179       else if( Mupcase( TRANS[0] ) == CTRAN )
180       {
181 /*
182 *  Add the tranpose of A to B
183 */
184          notran = 0; add = TYPE->Fmmtadd;
185       }
186       else
187       {
188 /*
189 *  Add the conjugate tranpose of A to B
190 */
191          notran = 0; add = TYPE->Fmmtcadd;
192       }
193    }
194    else
195    {
196 /*
197 *  B is the source packed buffer, A is the distributed target
198 */
199       if( Mupcase( TRANS[0] ) == CNOTRAN )
200       {
201 /*
202 *  Add B to A
203 */
204          notran = 1; add = TYPE->Fmmdda;
205       }
206       else if( Mupcase( TRANS[0] ) == CCONJG )
207       {
208 /*
209 *  Add the conjugate of B to A
210 */
211          notran = 1; add = TYPE->Fmmddac;
212       }
213       else if( Mupcase( TRANS[0] ) == CTRAN )
214       {
215 /*
216 *  Add the tranpose of B to A
217 */
218          notran = 0; add = TYPE->Fmmddat;
219       }
220       else
221       {
222 /*
223 *  Add the conjugate tranpose of B to A
224 */
225          notran = 0; add = TYPE->Fmmddact;
226       }
227    }
228 
229    size = TYPE->size;
230    rows = ( Mupcase( ROCS[0] ) == CROW );
231 
232    if( Mupcase( VROCS[0] ) == CROW )
233    {
234 /*
235 *  (un)packing using rows of virtual matrix
236 */
237       if( rows )
238       {
239 /*
240 *  (un)packing rows of mn by k array A.
241 */
242          inca = size;
243          incb = ( notran ? size : LDB * size );
244          m    = &tmp2;
245          n    = &K;
246       }
247       else
248       {
249 /*
250 *  (un)packing columns of k by mn array A
251 */
252          inca = LDA * size;
253          incb = ( notran ? LDB * size : size );
254          m    = &K;
255          n    = &tmp2;
256       }
257       kb  = MN;
258 /*
259 *  From the (un)packing point of view the only valuable shortcut is when the
260 *  virtual grid and the blocks are square, and the offset is zero or the grid
261 *  is 1x1.
262 */
263       if( ( ( lcmt00 == 0 ) && ( VM->imb1 == VM->inb1 ) && ( mb == nb ) &&
264             ( nprow == npcol ) ) || ( ( nprow == 1 ) && ( npcol == 1 ) ) )
265       {
266          if( VM->prow == VM->pcol )
267          {
268             npq = ( ( mblks <  2 ) ? imbloc :
269                     imbloc + ( mblks - 2 ) * mb + lmbloc );
270             npq = MIN( npq, kb );
271             if( rows ) add( &npq, &K, ALPHA, A, &LDA, BETA, B, &LDB );
272             else       add( &K, &npq, ALPHA, A, &LDA, BETA, B, &LDB );
273          }
274          return( npq );
275       }
276       pmb = nprow * mb;
277       qnb = npcol * nb;
278 /*
279 *  Handle separately the first row and/or column of the LCM table. Update the
280 *  LCM value of the curent block lcmt00, as well as the number of rows and
281 *  columns mblks and nblks remaining in the LCM table.
282 */
283       GoSouth = ( lcmt00 > iupp );
284       GoEast  = ( lcmt00 < ilow );
285 
286       if( !( GoSouth ) && !( GoEast ) )
287       {
288 /*
289 *  The upper left block owns diagonal entries lcmt00 >= ilow && lcmt00 <= iupp
290 */
291          if( lcmt00 >= 0 )
292          {
293             tmp1 = imbloc - lcmt00; tmp1 = MAX( 0, tmp1 );
294             tmp2 = MIN( tmp1, inbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
295             add( m, n, ALPHA, A+lcmt00*inca, &LDA, BETA, B, &LDB );
296          }
297          else
298          {
299             tmp1 = inbloc + lcmt00; tmp1 = MAX( 0, tmp1 );
300             tmp2 = MIN( tmp1, imbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
301             add( m, n, ALPHA, A, &LDA, BETA, B, &LDB );
302          }
303          if( ( kb -= tmp2 ) == 0 ) return( npq );
304          B += tmp2 * incb;
305 /*
306 *  Decide whether one should go south or east in the table: Go east if
307 *  the block below the current one only owns lower entries. If this block,
308 *  however, owns diagonals, then go south.
309 */
310          GoSouth = !( GoEast = ( ( lcmt00 - ( iupp - upp + pmb ) ) < ilow ) );
311       }
312 
313       if( GoSouth )
314       {
315 /*
316 *  Go one step south in the LCM table. Adjust the current LCM value as well as
317 *  the pointer to A. The pointer to B remains unchanged.
318 */
319          lcmt00 -= iupp - upp + pmb; mblks--; A += imbloc * inca;
320 /*
321 *  While there are blocks remaining that own upper entries, keep going south.
322 *  Adjust the current LCM value as well as the pointer to A accordingly.
323 */
324          while( mblks && ( lcmt00 > upp ) )
325          { lcmt00 -= pmb; mblks--; A += mb * inca; }
326 /*
327 *  Return if no more row in the LCM table.
328 */
329          if( mblks <= 0 ) return( npq );
330 /*
331 *  lcmt00 <= upp. The current block owns either diagonals or lower entries.
332 *  Save the current position in the LCM table. After this column has been
333 *  completely taken care of, re-start from this row and the next column of
334 *  the LCM table.
335 */
336          lcmt = lcmt00; mblkd = mblks; aptrd = A;
337 
338          while( mblkd && ( lcmt >= ilow ) )
339          {
340 /*
341 *  A block owning diagonals lcmt00 >= ilow && lcmt00 <= upp has been found.
342 */
343             mbloc = ( ( mblkd == 1 ) ? lmbloc : mb );
344             if( lcmt >= 0 )
345             {
346                tmp1 = mbloc - lcmt; tmp1 = MAX( 0, tmp1 );
347                tmp2 = MIN( tmp1, inbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
348                add( m, n, ALPHA, aptrd+lcmt*inca, &LDA, BETA, B, &LDB );
349             }
350             else
351             {
352                tmp1 = inbloc + lcmt; tmp1 = MAX( 0, tmp1 );
353                tmp2 = MIN( tmp1, mbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
354                add( m, n, ALPHA, aptrd, &LDA, BETA, B, &LDB );
355             }
356             if( ( kb -= tmp2 ) == 0 ) return( npq );
357 /*
358 *  Keep going south until there are no more blocks owning diagonals
359 */
360             lcmt -= pmb; mblkd--; aptrd += mbloc * inca; B += tmp2 * incb;
361          }
362 /*
363 *  I am done with the first column of the LCM table. Go to the next column.
364 */
365          lcmt00 += low - ilow + qnb; nblks--;
366       }
367       else if( GoEast )
368       {
369 /*
370 *  Go one step east in the LCM table. Adjust the current LCM value as
371 *  well as the pointer to B. The pointer to A remains unchanged.
372 */
373          lcmt00 += low - ilow + qnb; nblks--;
374 /*
375 *  While there are blocks remaining that own lower entries, keep going east
376 *  in the LCM table. Adjust the current LCM value as well as the pointer to
377 *  B accordingly.
378 */
379          while( nblks && ( lcmt00 < low ) ) { lcmt00 += qnb; nblks--; }
380 /*
381 *  Return if no more column in the LCM table.
382 */
383          if( nblks <= 0 ) return( npq );
384 /*
385 *  lcmt00 >= low. The current block owns either diagonals or upper entries. Save
386 *  the current position in the LCM table. After this row has been completely
387 *  taken care of, re-start from this column and the next row of the LCM table.
388 */
389          lcmt = lcmt00; nblkd = nblks;
390 
391          while( nblkd && ( lcmt <= iupp ) )
392          {
393 /*
394 *  A block owning diagonals lcmt00 >= low && lcmt00 <= iupp has been found.
395 */
396             nbloc = ( ( nblkd == 1 ) ? lnbloc : nb );
397             if( lcmt >= 0 )
398             {
399                tmp1 = imbloc - lcmt; tmp1 = MAX( 0, tmp1 );
400                tmp2 = MIN( tmp1, nbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
401                add( m, n, ALPHA, A+lcmt*inca, &LDA, BETA, B, &LDB );
402             }
403             else
404             {
405                tmp1 = nbloc + lcmt; tmp1 = MAX( 0, tmp1 );
406                tmp2 = MIN( tmp1, imbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
407                add( m, n, ALPHA, A, &LDA, BETA, B, &LDB );
408             }
409             if( ( kb  -= tmp2 ) == 0 ) return( npq );
410 /*
411 *  Keep going east until there are no more blocks owning diagonals.
412 */
413             lcmt += qnb; nblkd--; B += tmp2 * incb;
414          }
415 /*
416 *  I am done with the first row of the LCM table. Go to the next row.
417 */
418          lcmt00 -= iupp - upp + pmb; mblks--; A += imbloc * inca;
419       }
420 /*
421 *  Loop over the remaining columns of the LCM table.
422 */
423       do
424       {
425 /*
426 *  If the current block does not have diagonal elements, find the closest one in
427 *  the LCM table having some.
428 */
429          if( ( lcmt00 < low ) || ( lcmt00 > upp ) )
430          {
431             while( mblks && nblks )
432             {
433                while( mblks && ( lcmt00 > upp ) )
434                { lcmt00 -= pmb; mblks--; A += mb*inca; }
435                if( lcmt00 >= low ) break;
436                while( nblks && ( lcmt00 < low ) )
437                { lcmt00 += qnb; nblks--; }
438                if( lcmt00 <= upp ) break;
439             }
440          }
441          if( !( mblks ) || !( nblks ) ) return( npq );
442 /*
443 *  The current block owns diagonals. Save the current position in the LCM table.
444 *  After this column has been completely taken care of, re-start from this row
445 *  and the next column in the LCM table.
446 */
447          nbloc = ( ( nblks == 1 ) ? lnbloc : nb );
448          lcmt = lcmt00; mblkd = mblks; aptrd = A;
449 
450          while( mblkd && ( lcmt >= low ) )
451          {
452 /*
453 *  A block owning diagonals lcmt00 >= low && lcmt00 <= upp has been found.
454 */
455             mbloc = ( ( mblkd == 1 ) ? lmbloc : mb );
456             if( lcmt >= 0 )
457             {
458                tmp1 = mbloc - lcmt; tmp1 = MAX( 0, tmp1 );
459                tmp2 = MIN( tmp1, nbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
460                add( m, n, ALPHA, aptrd+lcmt*inca, &LDA, BETA, B, &LDB );
461             }
462             else
463             {
464                tmp1 = nbloc + lcmt; tmp1 = MAX( 0, tmp1 );
465                tmp2 = MIN( tmp1, mbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
466                add( m, n, ALPHA, aptrd, &LDA, BETA, B, &LDB );
467             }
468             if( ( kb  -= tmp2 ) == 0 ) return( npq );
469 /*
470 *  Keep going south until there are no more blocks owning diagonals
471 */
472             lcmt -= pmb; mblkd--; aptrd += mbloc * inca; B += tmp2 * incb;
473          }
474 /*
475 *  I am done with this column of the LCM table. Go to the next column ...
476 */
477          lcmt00 += qnb; nblks--;
478 /*
479 *  ... until there are no more columns.
480 */
481       } while( nblks > 0 );
482 /*
483 *  Return the number of diagonals found.
484 */
485       return( npq );
486    }
487    else
488    {
489 /*
490 *  (un)packing using columns of virtual matrix
491 */
492       if( rows )
493       {
494 /*
495 *  (un)packing rows of mn by k array A
496 */
497          inca = size;
498          incb = ( notran ? size : LDB * size );
499          m    = &tmp2;
500          n    = &K;
501       }
502       else
503       {
504 /*
505 *  (un)packing columns of k by mn array A
506 */
507          inca = LDA * size;
508          incb = ( notran ? LDB * size : size );
509          m    = &K;
510          n    = &tmp2;
511       }
512       kb  = MN;
513 /*
514 *  From the (un)packing point of view the only valuable shortcut is when the
515 *  virtual grid and the blocks are square, and the offset is zero or the grid
516 *  is 1x1.
517 */
518       if( ( ( lcmt00 == 0 ) && ( VM->imb1 == VM->inb1 ) && ( mb == nb ) &&
519             ( nprow == npcol ) ) || ( ( nprow == 1 ) && ( npcol == 1 ) ) )
520       {
521          if(  VM->prow == VM->pcol )
522          {
523             npq = ( ( nblks <  2 ) ? inbloc :
524                     inbloc + ( nblks - 2 ) * nb + lnbloc );
525             npq = MIN( npq, kb );
526             if( rows ) add( &npq, &K, ALPHA, A, &LDA, BETA, B, &LDB );
527             else       add( &K, &npq, ALPHA, A, &LDA, BETA, B, &LDB );
528          }
529          return( npq );
530       }
531       pmb = nprow * mb;
532       qnb = npcol * nb;
533 /*
534 *  Handle separately the first row and/or column of the LCM table. Update the
535 *  LCM value of the curent block lcmt00, as well as the number of rows and
536 *  columns mblks and nblks remaining in the LCM table.
537 */
538       GoSouth = ( lcmt00 > iupp );
539       GoEast  = ( lcmt00 < ilow );
540 
541       if( !( GoSouth ) && !( GoEast ) )
542       {
543 /*
544 *  The upper left block owns diagonal entries lcmt00 >= ilow && lcmt00 <= iupp
545 */
546          if( lcmt00 >= 0 )
547          {
548             tmp1 = imbloc - lcmt00; tmp1 = MAX( 0, tmp1 );
549             tmp2 = MIN( tmp1, inbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
550             add( m, n, ALPHA, A, &LDA, BETA, B, &LDB );
551          }
552          else
553          {
554             tmp1 = inbloc + lcmt00; tmp1 = MAX( 0, tmp1 );
555             tmp2 = MIN( tmp1, imbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
556             add( m, n, ALPHA, A-lcmt00*inca, &LDA, BETA, B, &LDB );
557          }
558          if( ( kb -= tmp2 ) == 0 ) return( npq );
559          B += tmp2 * incb;
560 /*
561 *  Decide whether one should go south or east in the table: Go east if
562 *  the block below the current one only owns lower entries. If this block,
563 *  however, owns diagonals, then go south.
564 */
565          GoSouth = !( GoEast = ( ( lcmt00 - ( iupp - upp + pmb ) ) < ilow ) );
566       }
567 
568       if( GoSouth )
569       {
570 /*
571 *  Go one step south in the LCM table. Adjust the current LCM value as well as
572 *  the pointer to B. The pointer to A remains unchanged.
573 */
574          lcmt00 -= iupp - upp + pmb; mblks--;
575 /*
576 *  While there are blocks remaining that own upper entries, keep going south.
577 *  Adjust the current LCM value as well as the pointer to B accordingly.
578 */
579          while( mblks && ( lcmt00 > upp ) ) { lcmt00 -= pmb; mblks--; }
580 /*
581 *  Return if no more row in the LCM table.
582 */
583          if( mblks <= 0 ) return( npq );
584 /*
585 *  lcmt00 <= upp. The current block owns either diagonals or lower entries.
586 *  Save the current position in the LCM table. After this column has been
587 *  completely taken care of, re-start from this row and the next column of
588 *  the LCM table.
589 */
590          lcmt  = lcmt00; mblkd = mblks;
591 
592          while( mblkd && ( lcmt >= ilow ) )
593          {
594 /*
595 *  A block owning diagonals lcmt00 >= ilow && lcmt00 <= upp has been found.
596 */
597             mbloc = ( ( mblkd == 1 ) ? lmbloc : mb );
598             if( lcmt >= 0 )
599             {
600                tmp1 = mbloc - lcmt; tmp1 = MAX( 0, tmp1 );
601                tmp2 = MIN( tmp1, inbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
602                add( m, n, ALPHA, A, &LDA, BETA, B, &LDB );
603             }
604             else
605             {
606                tmp1 = inbloc + lcmt; tmp1 = MAX( 0, tmp1 );
607                tmp2 = MIN( tmp1, mbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
608                add( m, n, ALPHA, A-lcmt*inca, &LDA, BETA, B, &LDB );
609             }
610             if( ( kb  -= tmp2 ) == 0 ) return( npq );
611 /*
612 *  Keep going south until there are no more blocks owning diagonals
613 */
614             lcmt  -= pmb; mblkd--; B += tmp2 * incb;
615          }
616 /*
617 *  I am done with the first column of the LCM table. Go to the next column.
618 */
619          lcmt00 += low - ilow + qnb; nblks--; A += inbloc * inca;
620       }
621       else if( GoEast )
622       {
623 /*
624 *  Go one step east in the LCM table. Adjust the current LCM value as
625 *  well as the pointer to A. The pointer to B remains unchanged.
626 */
627          lcmt00 += low - ilow + qnb; nblks--; A += inbloc * inca;
628 /*
629 *  While there are blocks remaining that own lower entries, keep going east
630 *  in the LCM table. Adjust the current LCM value as well as the pointer to
631 *  A accordingly.
632 */
633          while( nblks && ( lcmt00 < low ) )
634          { lcmt00 += qnb; nblks--; A += nb * inca; }
635 /*
636 *  Return if no more column in the LCM table.
637 */
638          if( nblks <= 0 ) return( npq );
639 /*
640 *  lcmt00 >= low. The current block owns either diagonals or upper entries. Save
641 *  the current position in the LCM table. After this row has been completely
642 *  taken care of, re-start from this column and the next row of the LCM table.
643 */
644          lcmt  = lcmt00; nblkd = nblks; aptrd = A;
645 
646          while( nblkd && ( lcmt <= iupp ) )
647          {
648 /*
649 *  A block owning diagonals lcmt00 >= low && lcmt00 <= iupp has been found.
650 */
651             nbloc = ( ( nblkd == 1 ) ? lnbloc : nb );
652             if( lcmt >= 0 )
653             {
654                tmp1 = imbloc - lcmt; tmp1 = MAX( 0, tmp1 );
655                tmp2 = MIN( tmp1, nbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
656                add( m, n, ALPHA, aptrd, &LDA, BETA, B, &LDB );
657             }
658             else
659             {
660                tmp1 = nbloc + lcmt; tmp1 = MAX( 0, tmp1 );
661                tmp2 = MIN( tmp1, imbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
662                add( m, n, ALPHA, aptrd-lcmt*inca, &LDA, BETA, B, &LDB );
663             }
664             if( ( kb  -= tmp2 ) == 0 ) return( npq );
665 /*
666 *  Keep going east until there are no more blocks owning diagonals.
667 */
668             lcmt += qnb; nblkd--; aptrd += nbloc * inca; B += tmp2 * incb;
669          }
670 /*
671 *  I am done with the first row of the LCM table. Go to the next row.
672 */
673          lcmt00 -= iupp - upp + pmb; mblks--;
674       }
675 /*
676 *  Loop over the remaining columns of the LCM table.
677 */
678       do
679       {
680 /*
681 *  If the current block does not have diagonal elements, find the closest one in
682 *  the LCM table having some.
683 */
684          if( ( lcmt00 < low ) || ( lcmt00 > upp ) )
685          {
686             while( mblks && nblks )
687             {
688                while( mblks && ( lcmt00 > upp ) )
689                { lcmt00 -= pmb; mblks--; }
690                if( lcmt00 >= low ) break;
691                while( nblks && ( lcmt00 < low ) )
692                { lcmt00 += qnb; nblks--; A += nb*inca; }
693                if( lcmt00 <= upp ) break;
694             }
695          }
696          if( !( mblks ) || !( nblks ) ) return( npq );
697 /*
698 *  The current block owns diagonals. Save the current position in the LCM table.
699 *  After this column has been completely taken care of, re-start from this row
700 *  and the next column in the LCM table.
701 */
702          nbloc = ( ( nblks == 1 ) ? lnbloc : nb );
703          lcmt = lcmt00; mblkd = mblks;
704 
705          while( mblkd && ( lcmt >= low ) )
706          {
707 /*
708 *  A block owning diagonals lcmt00 >= low && lcmt00 <= upp has been found.
709 */
710             mbloc = ( ( mblkd == 1 ) ? lmbloc : mb );
711             if( lcmt >= 0 )
712             {
713                tmp1 = mbloc - lcmt; tmp1 = MAX( 0, tmp1 );
714                tmp2 = MIN( tmp1, nbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
715                add( m, n, ALPHA, A, &LDA, BETA, B, &LDB );
716             }
717             else
718             {
719                tmp1 = nbloc + lcmt; tmp1 = MAX( 0, tmp1 );
720                tmp2 = MIN( tmp1, mbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
721                add( m, n, ALPHA, A-lcmt*inca, &LDA, BETA, B, &LDB );
722             }
723             if( ( kb  -= tmp2 ) == 0 ) return( npq );
724 /*
725 *  Keep going south until there are no more blocks owning diagonals
726 */
727             lcmt -= pmb; mblkd--; B += tmp2 * incb;
728          }
729 /*
730 *  I am done with this column of the LCM table. Go to the next column ...
731 */
732          lcmt00 += qnb; nblks--; A += nbloc * inca;
733 /*
734 *  ... until there are no more columns.
735 */
736       } while( nblks > 0 );
737 /*
738 *  Return the number of diagonals found.
739 */
740       return( npq );
741    }
742 /*
743 *  End of PB_CVMpack
744 */
745 }
746