1 /* ---------------------------------------------------------------------
2 *
3 * -- PBLAS auxiliary routine (version 2.0) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * April 1, 1998
7 *
8 * ---------------------------------------------------------------------
9 */
10 /*
11 * Include files
12 */
13 #include "../pblas.h"
14 #include "../PBpblas.h"
15 #include "../PBtools.h"
16 #include "../PBblacs.h"
17 #include "../PBblas.h"
18
19 #ifdef __STDC__
PB_CVMpack(PBTYP_T * TYPE,PB_VM_T * VM,char * VROCS,char * ROCS,char * UNPA,char * TRANS,int MN,int K,char * ALPHA,char * A,int LDA,char * BETA,char * B,int LDB)20 int PB_CVMpack( PBTYP_T * TYPE, PB_VM_T * VM, char * VROCS, char * ROCS,
21 char * UNPA, char * TRANS, int MN, int K,
22 char * ALPHA, char * A, int LDA,
23 char * BETA, char * B, int LDB )
24 #else
25 int PB_CVMpack( TYPE, VM, VROCS, ROCS, UNPA, TRANS, MN, K, ALPHA, A,
26 LDA, BETA, B, LDB )
27 /*
28 * .. Scalar Arguments ..
29 */
30 int K, LDA, LDB, MN;
31 char * ALPHA, * BETA;
32 /*
33 * .. Array Arguments ..
34 */
35 char * VROCS, * ROCS, * UNPA, * TRANS;
36 PBTYP_T * TYPE;
37 PB_VM_T * VM;
38 char * A, * B;
39 #endif
40 {
41 /*
42 * Purpose
43 * =======
44 *
45 * PB_CVMpack packs a one-dimensional distributed array A into B, or
46 * unpacks an array B into a one-dimensional distributed array A. This
47 * operation is triggered by a virtual distributed array.
48 *
49 * Arguments
50 * =========
51 *
52 * TYPE (local input) pointer to a PBTYP_T structure
53 * On entry, TYPE is a pointer to a structure of type PBTYP_T,
54 * that contains type information (see pblas.h).
55 *
56 * VM (local input) pointer to a VM structure
57 * On entry, VM is a pointer to a structure of type PB_VM_T,
58 * that contains the virtual matrix information (see pblas.h).
59 *
60 * VROCS (local input) pointer to CHAR
61 * On entry, VROCS specifies if the rows or columns of the vir-
62 * tual distributed array grid should be used for the packing or
63 * unpacking operation as follows:
64 * VROCS = 'R' or 'r', the rows should be used,
65 * VROCS = 'C' or 'c', the columns should be used.
66 *
67 * ROCS (local input) pointer to CHAR
68 * On entry, ROCS specifies if rows or columns should be used
69 * packed or unpacked as follows:
70 * ROCS = 'R' or 'r', rows should be (un)packed,
71 * ROCS = 'C' or 'c', columns should be (un)packed.
72 *
73 * UNPA (local input) pointer to CHAR
74 * On entry, UNPA specifies if the data should be packed or un-
75 * packed as follows:
76 * UNPA = 'P' or 'p', packing,
77 * UNPA = 'U' or 'u', unpacking.
78 *
79 * TRANS (local input) pointer to CHAR
80 * On entry, TRANS specifies if conjugation, transposition or
81 * conjugate transposition should occur during the (un)packing
82 * operation as follows:
83 * TRANS = 'N' or 'n', natural (un)packing,
84 * TRANS = 'Z' or 'z', conjugated (un)packing,
85 * TRANS = 'T' or 't', transposed (un)packing,
86 * TRANS = 'C' or 'c', conjugate transposed (un)packing.
87 *
88 * MN (local input) INTEGER
89 * On entry, MN specifies the length of the distributed dimen-
90 * sion to be (un)packed. MN must be at least zero.
91 *
92 * K (local input) INTEGER
93 * On entry, K specifies the length of the non-distributed di-
94 * mension to be (un)packed. K must be at least zero.
95 *
96 * ALPHA (local input) pointer to CHAR
97 * On entry, ALPHA specifies the scalar alpha.
98 *
99 * A (local input/local output) pointer to CHAR
100 * On entry, A points to an array of dimension (LDA, Ka), where
101 * Ka is K when ROCS is 'R' or 'r' and when ROCS is 'C' or 'c',
102 * Ka is IMBLOC+(MBLKS-2)*MB+LMB when VROCS is 'R' or 'r' and
103 * when VROCS is 'C' or 'c', Ka is INBLOC+(NBLKS-2)*NB+LNB. This
104 * array contains unpacked data.
105 *
106 * LDA (local input) INTEGER
107 * On entry, LDA specifies the leading dimension of the array A.
108 * LDA must be at least MAX( 1, K ) when ROCS = 'C' or 'c' and
109 * MAX( 1, IMBLOC+(MBLKS-2)*MB+LMB ) when ROCS is 'R' or 'r' and
110 * VROCS is 'R' or 'r', and MAX( 1, INBLOC+(NBLKS-2)*NB+LNB )
111 * when ROCS is 'R' or 'r' and VROCS is 'C' or 'c'.
112 *
113 * BETA (local input) pointer to CHAR
114 * On entry, BETA specifies the scalar beta.
115 *
116 * B (local input/local output) pointer to CHAR
117 * On entry, B points to an array of dimension (LDB,*). When
118 * ROCS is 'C' or 'c', and TRANS is 'N', 'n', 'Z' or 'Z', B
119 * points to an K by MN array. When TRANS is 'T', 't', 'C' or
120 * 'c', B points to an MN by K array. When ROCS is 'R' or 'r',
121 * and TRANS is 'T', 't', 'C' or 'c', B points to an K by MN ar-
122 * ray. When TRANS is 'N', 'n', 'Z' or 'z', B points to an MN by
123 * K array. This array contains the packed data.
124 *
125 * LDB (local input) INTEGER
126 * On entry, LDB specifies the leading dimension of the array B.
127 *
128 * -- Written on April 1, 1998 by
129 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
130 *
131 * ---------------------------------------------------------------------
132 */
133 /*
134 * .. Local Scalars ..
135 */
136 int GoEast, GoSouth, ilow, imbloc, inbloc, inca, incb, iupp, kb,
137 lcmt, lcmt00, lmbloc, lnbloc, low, mb, mblkd, mblks, mbloc,
138 * m, * n, nb, nblkd, nblks, nbloc, notran, npcol, npq=0,
139 nprow, pmb, qnb, rows, size, tmp1, tmp2, upp;
140 char * aptrd;
141 MMADD_T add;
142 /* ..
143 * .. Executable Statements ..
144 *
145 */
146 mblks = VM->mblks; nblks = VM->nblks;
147 /*
148 * Quick return if I don't own any blocks.
149 */
150 if( ( mblks == 0 ) || ( nblks == 0 ) ) return( 0 );
151 /*
152 * Retrieve the contents of VM structure fields
153 */
154 lcmt00 = VM->lcmt00;
155 imbloc = VM->imbloc; mb = VM->mb; lmbloc = VM->lmbloc; upp = VM->upp;
156 iupp = VM->iupp; nprow = VM->nprow;
157 inbloc = VM->inbloc; nb = VM->nb; lnbloc = VM->lnbloc; low = VM->low;
158 ilow = VM->ilow; npcol = VM->npcol;
159
160 if( Mupcase( UNPA[0] ) == CPACKING )
161 {
162 /*
163 * B is the target packed buffer, A is the distributed source
164 */
165 if( Mupcase( TRANS[0] ) == CNOTRAN )
166 {
167 /*
168 * Add A to B
169 */
170 notran = 1; add = TYPE->Fmmadd;
171 }
172 else if( Mupcase( TRANS[0] ) == CCONJG )
173 {
174 /*
175 * Add the conjugate of A to B
176 */
177 notran = 1; add = TYPE->Fmmcadd;
178 }
179 else if( Mupcase( TRANS[0] ) == CTRAN )
180 {
181 /*
182 * Add the tranpose of A to B
183 */
184 notran = 0; add = TYPE->Fmmtadd;
185 }
186 else
187 {
188 /*
189 * Add the conjugate tranpose of A to B
190 */
191 notran = 0; add = TYPE->Fmmtcadd;
192 }
193 }
194 else
195 {
196 /*
197 * B is the source packed buffer, A is the distributed target
198 */
199 if( Mupcase( TRANS[0] ) == CNOTRAN )
200 {
201 /*
202 * Add B to A
203 */
204 notran = 1; add = TYPE->Fmmdda;
205 }
206 else if( Mupcase( TRANS[0] ) == CCONJG )
207 {
208 /*
209 * Add the conjugate of B to A
210 */
211 notran = 1; add = TYPE->Fmmddac;
212 }
213 else if( Mupcase( TRANS[0] ) == CTRAN )
214 {
215 /*
216 * Add the tranpose of B to A
217 */
218 notran = 0; add = TYPE->Fmmddat;
219 }
220 else
221 {
222 /*
223 * Add the conjugate tranpose of B to A
224 */
225 notran = 0; add = TYPE->Fmmddact;
226 }
227 }
228
229 size = TYPE->size;
230 rows = ( Mupcase( ROCS[0] ) == CROW );
231
232 if( Mupcase( VROCS[0] ) == CROW )
233 {
234 /*
235 * (un)packing using rows of virtual matrix
236 */
237 if( rows )
238 {
239 /*
240 * (un)packing rows of mn by k array A.
241 */
242 inca = size;
243 incb = ( notran ? size : LDB * size );
244 m = &tmp2;
245 n = &K;
246 }
247 else
248 {
249 /*
250 * (un)packing columns of k by mn array A
251 */
252 inca = LDA * size;
253 incb = ( notran ? LDB * size : size );
254 m = &K;
255 n = &tmp2;
256 }
257 kb = MN;
258 /*
259 * From the (un)packing point of view the only valuable shortcut is when the
260 * virtual grid and the blocks are square, and the offset is zero or the grid
261 * is 1x1.
262 */
263 if( ( ( lcmt00 == 0 ) && ( VM->imb1 == VM->inb1 ) && ( mb == nb ) &&
264 ( nprow == npcol ) ) || ( ( nprow == 1 ) && ( npcol == 1 ) ) )
265 {
266 if( VM->prow == VM->pcol )
267 {
268 npq = ( ( mblks < 2 ) ? imbloc :
269 imbloc + ( mblks - 2 ) * mb + lmbloc );
270 npq = MIN( npq, kb );
271 if( rows ) add( &npq, &K, ALPHA, A, &LDA, BETA, B, &LDB );
272 else add( &K, &npq, ALPHA, A, &LDA, BETA, B, &LDB );
273 }
274 return( npq );
275 }
276 pmb = nprow * mb;
277 qnb = npcol * nb;
278 /*
279 * Handle separately the first row and/or column of the LCM table. Update the
280 * LCM value of the curent block lcmt00, as well as the number of rows and
281 * columns mblks and nblks remaining in the LCM table.
282 */
283 GoSouth = ( lcmt00 > iupp );
284 GoEast = ( lcmt00 < ilow );
285
286 if( !( GoSouth ) && !( GoEast ) )
287 {
288 /*
289 * The upper left block owns diagonal entries lcmt00 >= ilow && lcmt00 <= iupp
290 */
291 if( lcmt00 >= 0 )
292 {
293 tmp1 = imbloc - lcmt00; tmp1 = MAX( 0, tmp1 );
294 tmp2 = MIN( tmp1, inbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
295 add( m, n, ALPHA, A+lcmt00*inca, &LDA, BETA, B, &LDB );
296 }
297 else
298 {
299 tmp1 = inbloc + lcmt00; tmp1 = MAX( 0, tmp1 );
300 tmp2 = MIN( tmp1, imbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
301 add( m, n, ALPHA, A, &LDA, BETA, B, &LDB );
302 }
303 if( ( kb -= tmp2 ) == 0 ) return( npq );
304 B += tmp2 * incb;
305 /*
306 * Decide whether one should go south or east in the table: Go east if
307 * the block below the current one only owns lower entries. If this block,
308 * however, owns diagonals, then go south.
309 */
310 GoSouth = !( GoEast = ( ( lcmt00 - ( iupp - upp + pmb ) ) < ilow ) );
311 }
312
313 if( GoSouth )
314 {
315 /*
316 * Go one step south in the LCM table. Adjust the current LCM value as well as
317 * the pointer to A. The pointer to B remains unchanged.
318 */
319 lcmt00 -= iupp - upp + pmb; mblks--; A += imbloc * inca;
320 /*
321 * While there are blocks remaining that own upper entries, keep going south.
322 * Adjust the current LCM value as well as the pointer to A accordingly.
323 */
324 while( mblks && ( lcmt00 > upp ) )
325 { lcmt00 -= pmb; mblks--; A += mb * inca; }
326 /*
327 * Return if no more row in the LCM table.
328 */
329 if( mblks <= 0 ) return( npq );
330 /*
331 * lcmt00 <= upp. The current block owns either diagonals or lower entries.
332 * Save the current position in the LCM table. After this column has been
333 * completely taken care of, re-start from this row and the next column of
334 * the LCM table.
335 */
336 lcmt = lcmt00; mblkd = mblks; aptrd = A;
337
338 while( mblkd && ( lcmt >= ilow ) )
339 {
340 /*
341 * A block owning diagonals lcmt00 >= ilow && lcmt00 <= upp has been found.
342 */
343 mbloc = ( ( mblkd == 1 ) ? lmbloc : mb );
344 if( lcmt >= 0 )
345 {
346 tmp1 = mbloc - lcmt; tmp1 = MAX( 0, tmp1 );
347 tmp2 = MIN( tmp1, inbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
348 add( m, n, ALPHA, aptrd+lcmt*inca, &LDA, BETA, B, &LDB );
349 }
350 else
351 {
352 tmp1 = inbloc + lcmt; tmp1 = MAX( 0, tmp1 );
353 tmp2 = MIN( tmp1, mbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
354 add( m, n, ALPHA, aptrd, &LDA, BETA, B, &LDB );
355 }
356 if( ( kb -= tmp2 ) == 0 ) return( npq );
357 /*
358 * Keep going south until there are no more blocks owning diagonals
359 */
360 lcmt -= pmb; mblkd--; aptrd += mbloc * inca; B += tmp2 * incb;
361 }
362 /*
363 * I am done with the first column of the LCM table. Go to the next column.
364 */
365 lcmt00 += low - ilow + qnb; nblks--;
366 }
367 else if( GoEast )
368 {
369 /*
370 * Go one step east in the LCM table. Adjust the current LCM value as
371 * well as the pointer to B. The pointer to A remains unchanged.
372 */
373 lcmt00 += low - ilow + qnb; nblks--;
374 /*
375 * While there are blocks remaining that own lower entries, keep going east
376 * in the LCM table. Adjust the current LCM value as well as the pointer to
377 * B accordingly.
378 */
379 while( nblks && ( lcmt00 < low ) ) { lcmt00 += qnb; nblks--; }
380 /*
381 * Return if no more column in the LCM table.
382 */
383 if( nblks <= 0 ) return( npq );
384 /*
385 * lcmt00 >= low. The current block owns either diagonals or upper entries. Save
386 * the current position in the LCM table. After this row has been completely
387 * taken care of, re-start from this column and the next row of the LCM table.
388 */
389 lcmt = lcmt00; nblkd = nblks;
390
391 while( nblkd && ( lcmt <= iupp ) )
392 {
393 /*
394 * A block owning diagonals lcmt00 >= low && lcmt00 <= iupp has been found.
395 */
396 nbloc = ( ( nblkd == 1 ) ? lnbloc : nb );
397 if( lcmt >= 0 )
398 {
399 tmp1 = imbloc - lcmt; tmp1 = MAX( 0, tmp1 );
400 tmp2 = MIN( tmp1, nbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
401 add( m, n, ALPHA, A+lcmt*inca, &LDA, BETA, B, &LDB );
402 }
403 else
404 {
405 tmp1 = nbloc + lcmt; tmp1 = MAX( 0, tmp1 );
406 tmp2 = MIN( tmp1, imbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
407 add( m, n, ALPHA, A, &LDA, BETA, B, &LDB );
408 }
409 if( ( kb -= tmp2 ) == 0 ) return( npq );
410 /*
411 * Keep going east until there are no more blocks owning diagonals.
412 */
413 lcmt += qnb; nblkd--; B += tmp2 * incb;
414 }
415 /*
416 * I am done with the first row of the LCM table. Go to the next row.
417 */
418 lcmt00 -= iupp - upp + pmb; mblks--; A += imbloc * inca;
419 }
420 /*
421 * Loop over the remaining columns of the LCM table.
422 */
423 do
424 {
425 /*
426 * If the current block does not have diagonal elements, find the closest one in
427 * the LCM table having some.
428 */
429 if( ( lcmt00 < low ) || ( lcmt00 > upp ) )
430 {
431 while( mblks && nblks )
432 {
433 while( mblks && ( lcmt00 > upp ) )
434 { lcmt00 -= pmb; mblks--; A += mb*inca; }
435 if( lcmt00 >= low ) break;
436 while( nblks && ( lcmt00 < low ) )
437 { lcmt00 += qnb; nblks--; }
438 if( lcmt00 <= upp ) break;
439 }
440 }
441 if( !( mblks ) || !( nblks ) ) return( npq );
442 /*
443 * The current block owns diagonals. Save the current position in the LCM table.
444 * After this column has been completely taken care of, re-start from this row
445 * and the next column in the LCM table.
446 */
447 nbloc = ( ( nblks == 1 ) ? lnbloc : nb );
448 lcmt = lcmt00; mblkd = mblks; aptrd = A;
449
450 while( mblkd && ( lcmt >= low ) )
451 {
452 /*
453 * A block owning diagonals lcmt00 >= low && lcmt00 <= upp has been found.
454 */
455 mbloc = ( ( mblkd == 1 ) ? lmbloc : mb );
456 if( lcmt >= 0 )
457 {
458 tmp1 = mbloc - lcmt; tmp1 = MAX( 0, tmp1 );
459 tmp2 = MIN( tmp1, nbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
460 add( m, n, ALPHA, aptrd+lcmt*inca, &LDA, BETA, B, &LDB );
461 }
462 else
463 {
464 tmp1 = nbloc + lcmt; tmp1 = MAX( 0, tmp1 );
465 tmp2 = MIN( tmp1, mbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
466 add( m, n, ALPHA, aptrd, &LDA, BETA, B, &LDB );
467 }
468 if( ( kb -= tmp2 ) == 0 ) return( npq );
469 /*
470 * Keep going south until there are no more blocks owning diagonals
471 */
472 lcmt -= pmb; mblkd--; aptrd += mbloc * inca; B += tmp2 * incb;
473 }
474 /*
475 * I am done with this column of the LCM table. Go to the next column ...
476 */
477 lcmt00 += qnb; nblks--;
478 /*
479 * ... until there are no more columns.
480 */
481 } while( nblks > 0 );
482 /*
483 * Return the number of diagonals found.
484 */
485 return( npq );
486 }
487 else
488 {
489 /*
490 * (un)packing using columns of virtual matrix
491 */
492 if( rows )
493 {
494 /*
495 * (un)packing rows of mn by k array A
496 */
497 inca = size;
498 incb = ( notran ? size : LDB * size );
499 m = &tmp2;
500 n = &K;
501 }
502 else
503 {
504 /*
505 * (un)packing columns of k by mn array A
506 */
507 inca = LDA * size;
508 incb = ( notran ? LDB * size : size );
509 m = &K;
510 n = &tmp2;
511 }
512 kb = MN;
513 /*
514 * From the (un)packing point of view the only valuable shortcut is when the
515 * virtual grid and the blocks are square, and the offset is zero or the grid
516 * is 1x1.
517 */
518 if( ( ( lcmt00 == 0 ) && ( VM->imb1 == VM->inb1 ) && ( mb == nb ) &&
519 ( nprow == npcol ) ) || ( ( nprow == 1 ) && ( npcol == 1 ) ) )
520 {
521 if( VM->prow == VM->pcol )
522 {
523 npq = ( ( nblks < 2 ) ? inbloc :
524 inbloc + ( nblks - 2 ) * nb + lnbloc );
525 npq = MIN( npq, kb );
526 if( rows ) add( &npq, &K, ALPHA, A, &LDA, BETA, B, &LDB );
527 else add( &K, &npq, ALPHA, A, &LDA, BETA, B, &LDB );
528 }
529 return( npq );
530 }
531 pmb = nprow * mb;
532 qnb = npcol * nb;
533 /*
534 * Handle separately the first row and/or column of the LCM table. Update the
535 * LCM value of the curent block lcmt00, as well as the number of rows and
536 * columns mblks and nblks remaining in the LCM table.
537 */
538 GoSouth = ( lcmt00 > iupp );
539 GoEast = ( lcmt00 < ilow );
540
541 if( !( GoSouth ) && !( GoEast ) )
542 {
543 /*
544 * The upper left block owns diagonal entries lcmt00 >= ilow && lcmt00 <= iupp
545 */
546 if( lcmt00 >= 0 )
547 {
548 tmp1 = imbloc - lcmt00; tmp1 = MAX( 0, tmp1 );
549 tmp2 = MIN( tmp1, inbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
550 add( m, n, ALPHA, A, &LDA, BETA, B, &LDB );
551 }
552 else
553 {
554 tmp1 = inbloc + lcmt00; tmp1 = MAX( 0, tmp1 );
555 tmp2 = MIN( tmp1, imbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
556 add( m, n, ALPHA, A-lcmt00*inca, &LDA, BETA, B, &LDB );
557 }
558 if( ( kb -= tmp2 ) == 0 ) return( npq );
559 B += tmp2 * incb;
560 /*
561 * Decide whether one should go south or east in the table: Go east if
562 * the block below the current one only owns lower entries. If this block,
563 * however, owns diagonals, then go south.
564 */
565 GoSouth = !( GoEast = ( ( lcmt00 - ( iupp - upp + pmb ) ) < ilow ) );
566 }
567
568 if( GoSouth )
569 {
570 /*
571 * Go one step south in the LCM table. Adjust the current LCM value as well as
572 * the pointer to B. The pointer to A remains unchanged.
573 */
574 lcmt00 -= iupp - upp + pmb; mblks--;
575 /*
576 * While there are blocks remaining that own upper entries, keep going south.
577 * Adjust the current LCM value as well as the pointer to B accordingly.
578 */
579 while( mblks && ( lcmt00 > upp ) ) { lcmt00 -= pmb; mblks--; }
580 /*
581 * Return if no more row in the LCM table.
582 */
583 if( mblks <= 0 ) return( npq );
584 /*
585 * lcmt00 <= upp. The current block owns either diagonals or lower entries.
586 * Save the current position in the LCM table. After this column has been
587 * completely taken care of, re-start from this row and the next column of
588 * the LCM table.
589 */
590 lcmt = lcmt00; mblkd = mblks;
591
592 while( mblkd && ( lcmt >= ilow ) )
593 {
594 /*
595 * A block owning diagonals lcmt00 >= ilow && lcmt00 <= upp has been found.
596 */
597 mbloc = ( ( mblkd == 1 ) ? lmbloc : mb );
598 if( lcmt >= 0 )
599 {
600 tmp1 = mbloc - lcmt; tmp1 = MAX( 0, tmp1 );
601 tmp2 = MIN( tmp1, inbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
602 add( m, n, ALPHA, A, &LDA, BETA, B, &LDB );
603 }
604 else
605 {
606 tmp1 = inbloc + lcmt; tmp1 = MAX( 0, tmp1 );
607 tmp2 = MIN( tmp1, mbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
608 add( m, n, ALPHA, A-lcmt*inca, &LDA, BETA, B, &LDB );
609 }
610 if( ( kb -= tmp2 ) == 0 ) return( npq );
611 /*
612 * Keep going south until there are no more blocks owning diagonals
613 */
614 lcmt -= pmb; mblkd--; B += tmp2 * incb;
615 }
616 /*
617 * I am done with the first column of the LCM table. Go to the next column.
618 */
619 lcmt00 += low - ilow + qnb; nblks--; A += inbloc * inca;
620 }
621 else if( GoEast )
622 {
623 /*
624 * Go one step east in the LCM table. Adjust the current LCM value as
625 * well as the pointer to A. The pointer to B remains unchanged.
626 */
627 lcmt00 += low - ilow + qnb; nblks--; A += inbloc * inca;
628 /*
629 * While there are blocks remaining that own lower entries, keep going east
630 * in the LCM table. Adjust the current LCM value as well as the pointer to
631 * A accordingly.
632 */
633 while( nblks && ( lcmt00 < low ) )
634 { lcmt00 += qnb; nblks--; A += nb * inca; }
635 /*
636 * Return if no more column in the LCM table.
637 */
638 if( nblks <= 0 ) return( npq );
639 /*
640 * lcmt00 >= low. The current block owns either diagonals or upper entries. Save
641 * the current position in the LCM table. After this row has been completely
642 * taken care of, re-start from this column and the next row of the LCM table.
643 */
644 lcmt = lcmt00; nblkd = nblks; aptrd = A;
645
646 while( nblkd && ( lcmt <= iupp ) )
647 {
648 /*
649 * A block owning diagonals lcmt00 >= low && lcmt00 <= iupp has been found.
650 */
651 nbloc = ( ( nblkd == 1 ) ? lnbloc : nb );
652 if( lcmt >= 0 )
653 {
654 tmp1 = imbloc - lcmt; tmp1 = MAX( 0, tmp1 );
655 tmp2 = MIN( tmp1, nbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
656 add( m, n, ALPHA, aptrd, &LDA, BETA, B, &LDB );
657 }
658 else
659 {
660 tmp1 = nbloc + lcmt; tmp1 = MAX( 0, tmp1 );
661 tmp2 = MIN( tmp1, imbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
662 add( m, n, ALPHA, aptrd-lcmt*inca, &LDA, BETA, B, &LDB );
663 }
664 if( ( kb -= tmp2 ) == 0 ) return( npq );
665 /*
666 * Keep going east until there are no more blocks owning diagonals.
667 */
668 lcmt += qnb; nblkd--; aptrd += nbloc * inca; B += tmp2 * incb;
669 }
670 /*
671 * I am done with the first row of the LCM table. Go to the next row.
672 */
673 lcmt00 -= iupp - upp + pmb; mblks--;
674 }
675 /*
676 * Loop over the remaining columns of the LCM table.
677 */
678 do
679 {
680 /*
681 * If the current block does not have diagonal elements, find the closest one in
682 * the LCM table having some.
683 */
684 if( ( lcmt00 < low ) || ( lcmt00 > upp ) )
685 {
686 while( mblks && nblks )
687 {
688 while( mblks && ( lcmt00 > upp ) )
689 { lcmt00 -= pmb; mblks--; }
690 if( lcmt00 >= low ) break;
691 while( nblks && ( lcmt00 < low ) )
692 { lcmt00 += qnb; nblks--; A += nb*inca; }
693 if( lcmt00 <= upp ) break;
694 }
695 }
696 if( !( mblks ) || !( nblks ) ) return( npq );
697 /*
698 * The current block owns diagonals. Save the current position in the LCM table.
699 * After this column has been completely taken care of, re-start from this row
700 * and the next column in the LCM table.
701 */
702 nbloc = ( ( nblks == 1 ) ? lnbloc : nb );
703 lcmt = lcmt00; mblkd = mblks;
704
705 while( mblkd && ( lcmt >= low ) )
706 {
707 /*
708 * A block owning diagonals lcmt00 >= low && lcmt00 <= upp has been found.
709 */
710 mbloc = ( ( mblkd == 1 ) ? lmbloc : mb );
711 if( lcmt >= 0 )
712 {
713 tmp1 = mbloc - lcmt; tmp1 = MAX( 0, tmp1 );
714 tmp2 = MIN( tmp1, nbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
715 add( m, n, ALPHA, A, &LDA, BETA, B, &LDB );
716 }
717 else
718 {
719 tmp1 = nbloc + lcmt; tmp1 = MAX( 0, tmp1 );
720 tmp2 = MIN( tmp1, mbloc ); npq += ( tmp2 = MIN( tmp2, kb ) );
721 add( m, n, ALPHA, A-lcmt*inca, &LDA, BETA, B, &LDB );
722 }
723 if( ( kb -= tmp2 ) == 0 ) return( npq );
724 /*
725 * Keep going south until there are no more blocks owning diagonals
726 */
727 lcmt -= pmb; mblkd--; B += tmp2 * incb;
728 }
729 /*
730 * I am done with this column of the LCM table. Go to the next column ...
731 */
732 lcmt00 += qnb; nblks--; A += nbloc * inca;
733 /*
734 * ... until there are no more columns.
735 */
736 } while( nblks > 0 );
737 /*
738 * Return the number of diagonals found.
739 */
740 return( npq );
741 }
742 /*
743 * End of PB_CVMpack
744 */
745 }
746