1 /* ---------------------------------------------------------------------
2 *
3 * -- PBLAS routine (version 2.0) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * April 1, 1998
7 *
8 * ---------------------------------------------------------------------
9 */
10 /*
11 * Include files
12 */
13 #include "pblas.h"
14 #include "PBpblas.h"
15 #include "PBtools.h"
16 #include "PBblacs.h"
17 #include "PBblas.h"
18
19 #ifdef __STDC__
pchemv_(F_CHAR_T UPLO,int * N,float * ALPHA,float * A,int * IA,int * JA,int * DESCA,float * X,int * IX,int * JX,int * DESCX,int * INCX,float * BETA,float * Y,int * IY,int * JY,int * DESCY,int * INCY)20 void pchemv_( F_CHAR_T UPLO, int * N, float * ALPHA,
21 float * A, int * IA, int * JA, int * DESCA,
22 float * X, int * IX, int * JX, int * DESCX, int * INCX,
23 float * BETA,
24 float * Y, int * IY, int * JY, int * DESCY, int * INCY )
25 #else
26 void pchemv_( UPLO, N, ALPHA, A, IA, JA, DESCA, X, IX, JX, DESCX,
27 INCX, BETA, Y, IY, JY, DESCY, INCY )
28 /*
29 * .. Scalar Arguments ..
30 */
31 F_CHAR_T UPLO;
32 int * IA, * INCX, * INCY, * IX, * IY, * JA, * JX, * JY,
33 * N;
34 float * ALPHA, * BETA;
35 /*
36 * .. Array Arguments ..
37 */
38 int * DESCA, * DESCX, * DESCY;
39 float * A, * X, * Y;
40 #endif
41 {
42 /*
43 * Purpose
44 * =======
45 *
46 * PCHEMV performs the matrix-vector operation
47 *
48 * sub( Y ) := alpha*sub( A )*sub( X ) + beta*sub( Y ),
49 *
50 * where
51 *
52 * sub( A ) denotes A(IA:IA+M-1,JA:JA+N-1),
53 *
54 * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X,
55 * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, and,
56 *
57 * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y,
58 * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y.
59 *
60 * Alpha and beta are scalars, sub( X ) and sub( Y ) are n element sub-
61 * vectors and sub( A ) is an n by n Hermitian submatrix.
62 *
63 * Notes
64 * =====
65 *
66 * A description vector is associated with each 2D block-cyclicly dis-
67 * tributed matrix. This vector stores the information required to
68 * establish the mapping between a matrix entry and its corresponding
69 * process and memory location.
70 *
71 * In the following comments, the character _ should be read as
72 * "of the distributed matrix". Let A be a generic term for any 2D
73 * block cyclicly distributed matrix. Its description vector is DESC_A:
74 *
75 * NOTATION STORED IN EXPLANATION
76 * ---------------- --------------- ------------------------------------
77 * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
78 * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating
79 * the NPROW x NPCOL BLACS process grid
80 * A is distributed over. The context
81 * itself is global, but the handle
82 * (the integer value) may vary.
83 * M_A (global) DESCA[ M_ ] The number of rows in the distribu-
84 * ted matrix A, M_A >= 0.
85 * N_A (global) DESCA[ N_ ] The number of columns in the distri-
86 * buted matrix A, N_A >= 0.
87 * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left
88 * block of the matrix A, IMB_A > 0.
89 * INB_A (global) DESCA[ INB_ ] The number of columns of the upper
90 * left block of the matrix A,
91 * INB_A > 0.
92 * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri-
93 * bute the last M_A-IMB_A rows of A,
94 * MB_A > 0.
95 * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri-
96 * bute the last N_A-INB_A columns of
97 * A, NB_A > 0.
98 * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first
99 * row of the matrix A is distributed,
100 * NPROW > RSRC_A >= 0.
101 * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the
102 * first column of A is distributed.
103 * NPCOL > CSRC_A >= 0.
104 * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local
105 * array storing the local blocks of
106 * the distributed matrix A,
107 * IF( Lc( 1, N_A ) > 0 )
108 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
109 * ELSE
110 * LLD_A >= 1.
111 *
112 * Let K be the number of rows of a matrix A starting at the global in-
113 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
114 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
115 * receive if these K rows were distributed over NPROW processes. If K
116 * is the number of columns of a matrix A starting at the global index
117 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
118 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
119 * these K columns were distributed over NPCOL processes.
120 *
121 * The values of Lr() and Lc() may be determined via a call to the func-
122 * tion PB_Cnumroc:
123 * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
124 * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
125 *
126 * Arguments
127 * =========
128 *
129 * UPLO (global input) CHARACTER*1
130 * On entry, UPLO specifies whether the local pieces of
131 * the array A containing the upper or lower triangular part
132 * of the Hermitian submatrix sub( A ) are to be referenced as
133 * follows:
134 *
135 * UPLO = 'U' or 'u' Only the local pieces corresponding to
136 * the upper triangular part of the
137 * Hermitian submatrix sub( A ) are to be
138 * referenced,
139 *
140 * UPLO = 'L' or 'l' Only the local pieces corresponding to
141 * the lower triangular part of the
142 * Hermitian submatrix sub( A ) are to be
143 * referenced.
144 *
145 * N (global input) INTEGER
146 * On entry, N specifies the order of the submatrix sub( A ).
147 * N must be at least zero.
148 *
149 * ALPHA (global input) COMPLEX
150 * On entry, ALPHA specifies the scalar alpha. When ALPHA is
151 * supplied as zero then the local entries of the arrays A
152 * and X corresponding to the entries of the submatrix sub( A )
153 * and the subvector sub( X ) need not be set on input.
154 *
155 * A (local input) COMPLEX array
156 * On entry, A is an array of dimension (LLD_A, Ka), where Ka is
157 * at least Lc( 1, JA+N-1 ). Before entry, this array contains
158 * the local entries of the matrix A.
159 * Before entry with UPLO = 'U' or 'u', this array contains
160 * the local entries of the upper triangular part of the
161 * Hermitian submatrix sub( A ), and the local entries of the
162 * strictly lower triangular of sub( A ) are not referenced.
163 * Before entry with UPLO = 'L' or 'l', this array contains
164 * the local entries of the lower triangular part of the
165 * Hermitian submatrix sub( A ), and the local entries of the
166 * strictly upper triangular of sub( A ) are not referenced.
167 * Note that the imaginary parts of the local entries corres-
168 * ponding to the diagonal elements of sub( A ) need not be
169 * set and assumed to be zero.
170 *
171 * IA (global input) INTEGER
172 * On entry, IA specifies A's global row index, which points to
173 * the beginning of the submatrix sub( A ).
174 *
175 * JA (global input) INTEGER
176 * On entry, JA specifies A's global column index, which points
177 * to the beginning of the submatrix sub( A ).
178 *
179 * DESCA (global and local input) INTEGER array
180 * On entry, DESCA is an integer array of dimension DLEN_. This
181 * is the array descriptor for the matrix A.
182 *
183 * X (local input) COMPLEX array
184 * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X
185 * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and
186 * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least
187 * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise.
188 * Before entry, this array contains the local entries of the
189 * matrix X.
190 *
191 * IX (global input) INTEGER
192 * On entry, IX specifies X's global row index, which points to
193 * the beginning of the submatrix sub( X ).
194 *
195 * JX (global input) INTEGER
196 * On entry, JX specifies X's global column index, which points
197 * to the beginning of the submatrix sub( X ).
198 *
199 * DESCX (global and local input) INTEGER array
200 * On entry, DESCX is an integer array of dimension DLEN_. This
201 * is the array descriptor for the matrix X.
202 *
203 * INCX (global input) INTEGER
204 * On entry, INCX specifies the global increment for the
205 * elements of X. Only two values of INCX are supported in
206 * this version, namely 1 and M_X. INCX must not be zero.
207 *
208 * BETA (global input) COMPLEX
209 * On entry, BETA specifies the scalar beta. When BETA is
210 * supplied as zero then the local entries of the array Y
211 * corresponding to the entries of the subvector sub( Y ) need
212 * not be set on input.
213 *
214 * Y (local input/local output) COMPLEX array
215 * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y
216 * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and
217 * MAX( 1, Lr( 1, IY+N-1 ) ) otherwise, and, Ky is at least
218 * Lc( 1, JY+N-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise.
219 * Before entry, this array contains the local entries of the
220 * matrix Y. On exit, sub( Y ) is overwritten by the updated
221 * subvector.
222 *
223 * IY (global input) INTEGER
224 * On entry, IY specifies Y's global row index, which points to
225 * the beginning of the submatrix sub( Y ).
226 *
227 * JY (global input) INTEGER
228 * On entry, JY specifies Y's global column index, which points
229 * to the beginning of the submatrix sub( Y ).
230 *
231 * DESCY (global and local input) INTEGER array
232 * On entry, DESCY is an integer array of dimension DLEN_. This
233 * is the array descriptor for the matrix Y.
234 *
235 * INCY (global input) INTEGER
236 * On entry, INCY specifies the global increment for the
237 * elements of Y. Only two values of INCY are supported in
238 * this version, namely 1 and M_Y. INCY must not be zero.
239 *
240 * -- Written on April 1, 1998 by
241 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
242 *
243 * ---------------------------------------------------------------------
244 */
245 /*
246 * .. Local Scalars ..
247 */
248 char UploA, * one, top;
249 int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Akp, Akq, Ald, Amb,
250 Amp, Amp0, Anb, Anq, Anq0, Arow, XCfr, XCld, XRfr, XRld,
251 Xi, Xj, YCfr, YCpbY, YCsum, YCld, YRfr, YRpbY, YRsum,
252 YRld, Ycol, Yi, Yii, YisRow, Yj, Yjj, Yld, Ynp, Ynq, Yrow,
253 ctxt, info, ione=1, k, kb, ktmp, mycol, myrow, nb, npcol,
254 nprow, size, upper;
255 float * tbeta;
256 PBTYP_T * type;
257 /*
258 * .. Local Arrays ..
259 */
260 int Ad [DLEN_], Ad0[DLEN_], XCd[DLEN_], XRd[DLEN_], Xd[DLEN_],
261 YCd[DLEN_], YRd[DLEN_], Yd [DLEN_];
262 char * Aptr = NULL, * XC = NULL, * XR = NULL, * YC = NULL,
263 * YR = NULL;
264 /* ..
265 * .. Executable Statements ..
266 *
267 */
268 upper = ( ( UploA = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER );
269 PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad );
270 PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd );
271 PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd );
272 #ifndef NO_ARGCHK
273 Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
274 /*
275 * Test the input parameters
276 */
277 if( !( info = ( ( nprow == -1 ) ? -( 701 + CTXT_ ) : 0 ) ) )
278 {
279 if( ( !upper ) && ( UploA != CLOWER ) )
280 {
281 PB_Cwarn( ctxt, __LINE__, __FILE__, "Illegal UPLO = %c\n", UploA );
282 info = -1;
283 }
284 PB_Cchkmat( ctxt, "PCHEMV", "A", *N, 2, *N, 2, Ai, Aj, Ad, 7, &info );
285 PB_Cchkvec( ctxt, "PCHEMV", "X", *N, 2, Xi, Xj, Xd, *INCX, 11, &info );
286 PB_Cchkvec( ctxt, "PCHEMV", "Y", *N, 2, Yi, Yj, Yd, *INCY, 17, &info );
287 }
288 if( info ) { PB_Cabort( ctxt, "PCHEMV", info ); return; }
289 #endif
290 /*
291 * Quick return if possible
292 */
293 if( ( *N == 0 ) ||
294 ( ( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) &&
295 ( ( BETA [REAL_PART] == ONE ) && ( BETA [IMAG_PART] == ZERO ) ) ) )
296 return;
297 /*
298 * Retrieve process grid information
299 */
300 #ifdef NO_ARGCHK
301 Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
302 #endif
303 /*
304 * Get type structure
305 */
306 type = PB_Cctypeset();
307 /*
308 * When alpha is zero
309 */
310 if( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) )
311 {
312 /*
313 * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol
314 */
315 PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj,
316 &Yrow, &Ycol );
317
318 if( *INCY == Yd[M_] )
319 {
320 /*
321 * sub( Y ) resides in (a) process row(s)
322 */
323 if( ( myrow == Yrow ) || ( Yrow < 0 ) )
324 {
325 /*
326 * Make sure I own some data and scale sub( Y )
327 */
328 Ynq = PB_Cnumroc( *N, Yj, Yd[INB_], Yd[NB_], mycol, Yd[CSRC_],
329 npcol );
330 if( Ynq > 0 )
331 {
332 Yld = Yd[LLD_];
333 if( ( BETA[REAL_PART] == ZERO ) && ( BETA[IMAG_PART] == ZERO ) )
334 {
335 cset_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii,
336 Yjj, Yld, type->size ), &Yld );
337 }
338 else
339 {
340 cscal_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii,
341 Yjj, Yld, type->size ), &Yld );
342 }
343 }
344 }
345 }
346 else
347 {
348 /*
349 * sub( Y ) resides in (a) process column(s)
350 */
351 if( ( mycol == Ycol ) || ( Ycol < 0 ) )
352 {
353 /*
354 * Make sure I own some data and scale sub( Y )
355 */
356 Ynp = PB_Cnumroc( *N, Yi, Yd[IMB_], Yd[MB_], myrow, Yd[RSRC_],
357 nprow );
358 if( Ynp > 0 )
359 {
360 if( ( BETA[REAL_PART] == ZERO ) && ( BETA[IMAG_PART] == ZERO ) )
361 {
362 cset_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii,
363 Yjj, Yd[LLD_], type->size ), INCY );
364 }
365 else
366 {
367 cscal_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii,
368 Yjj, Yd[LLD_], type->size ), INCY );
369 }
370 }
371 }
372 }
373 return;
374 }
375 /*
376 * Compute descriptor Ad0 for sub( A )
377 */
378 PB_Cdescribe( *N, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj,
379 &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 );
380 /*
381 * Reuse sub( Y ) and/or create vectors YR in process rows and YC in process
382 * columns spanned by sub( A )
383 */
384 if( ( YisRow = ( *INCY == Yd[M_] ) ) != 0 )
385 {
386 PB_CInOutV( type, ROW, *N, *N, Ad0, 1, ((char *)BETA), ((char *) Y),
387 Yi, Yj, Yd, ROW, ((char**)(&tbeta)), &YR, YRd, &YRfr,
388 &YRsum, &YRpbY );
389 PB_COutV( type, COLUMN, INIT, *N, *N, Ad0, 1, &YC, YCd, &YCfr, &YCsum );
390 }
391 else
392 {
393 PB_CInOutV( type, COLUMN, *N, *N, Ad0, 1, ((char *)BETA), ((char *) Y),
394 Yi, Yj, Yd, COLUMN, ((char**)(&tbeta)), &YC, YCd, &YCfr,
395 &YCsum, &YCpbY );
396 PB_COutV( type, ROW, INIT, *N, *N, Ad0, 1, &YR, YRd, &YRfr, &YRsum );
397 }
398 /*
399 * Replicate sub( X ) in process rows (XR) and process columns (XC) spanned by
400 * sub( A )
401 */
402 if( *INCX == Xd[M_] )
403 {
404 PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd,
405 ROW, &XR, XRd, &XRfr );
406 PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, XR, 0, 0, XRd,
407 ROW, &XC, XCd, &XCfr );
408 }
409 else
410 {
411 PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd,
412 COLUMN, &XC, XCd, &XCfr );
413 PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, XC, 0, 0, XCd,
414 COLUMN, &XR, XRd, &XRfr );
415 }
416
417 one = type->one;
418 /*
419 * Local matrix-vector multiply iff I own some data
420 */
421 Aimb1 = Ad0[IMB_ ]; Ainb1 = Ad0[INB_ ]; Amb = Ad0[MB_]; Anb = Ad0[NB_];
422 Acol = Ad0[CSRC_]; Arow = Ad0[RSRC_];
423 Amp = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow );
424 Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol );
425
426 if( ( Amp > 0 ) && ( Anq > 0 ) )
427 {
428 size = type->size;
429 Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size );
430
431 XCld = XCd[LLD_]; XRld = XRd[LLD_]; YCld = YCd[LLD_]; YRld = YRd[LLD_];
432 /*
433 * Scale YR or YC in the case sub( Y ) has been reused
434 */
435 if( YisRow )
436 {
437 /*
438 * YR resides in (a) process row(s)
439 */
440 if( !YRpbY )
441 {
442 if( ( myrow == YRd[RSRC_] ) || ( YRd[RSRC_] < 0 ) )
443 {
444 /*
445 * Make sure I own some data and scale YR
446 */
447 if( Anq > 0 )
448 {
449 if( ( tbeta[REAL_PART] == ZERO ) &&
450 ( tbeta[IMAG_PART] == ZERO ) )
451 {
452 cset_( &Anq, ((char *) tbeta), YR, &YRld );
453 }
454 else
455 {
456 cscal_( &Anq, ((char *) tbeta), YR, &YRld );
457 }
458 }
459 }
460 }
461 }
462 else
463 {
464 /*
465 * YC resides in (a) process column(s)
466 */
467 if( !YCpbY )
468 {
469 if( ( mycol == YCd[CSRC_] ) || ( YCd[CSRC_] < 0 ) )
470 {
471 /*
472 * Make sure I own some data and scale YC
473 */
474 if( Amp > 0 )
475 {
476 if( ( tbeta[REAL_PART] == ZERO ) &&
477 ( tbeta[IMAG_PART] == ZERO ) )
478 {
479 cset_( &Amp, ((char *) tbeta), YC, &ione );
480 }
481 else
482 {
483 cscal_( &Amp, ((char *) tbeta), YC, &ione );
484 }
485 }
486 }
487 }
488 }
489 /*
490 * Computational partitioning size is computed as the product of the logical
491 * value returned by pilaenv_ and 2 * lcm( nprow, npcol ).
492 */
493 nb = 2 * pilaenv_( &ctxt, C2F_CHAR( &type->type ) ) *
494 PB_Clcm( ( Arow >= 0 ? nprow : 1 ), ( Acol >= 0 ? npcol : 1 ) );
495
496 if( upper )
497 {
498 for( k = 0; k < *N; k += nb )
499 {
500 kb = *N - k; kb = MIN( kb, nb );
501 Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow );
502 Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol );
503 Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol );
504 if( Akp > 0 && Anq0 > 0 )
505 {
506 cgemv_( C2F_CHAR( NOTRAN ), &Akp, &Anq0, ((char *)ALPHA),
507 Mptr( Aptr, 0, Akq, Ald, size ), &Ald, Mptr( XR, 0, Akq,
508 XRld, size ), &XRld, one, YC, &ione );
509 cgemv_( C2F_CHAR( COTRAN ), &Akp, &Anq0, ((char *)ALPHA),
510 Mptr( Aptr, 0, Akq, Ald, size ), &Ald, XC, &ione, one,
511 Mptr( YR, 0, Akq, YRld, size ), &YRld );
512 }
513 PB_Cpsym( type, type, LEFT, UPPER, kb, 1, ((char *) ALPHA),
514 Aptr, k, k, Ad0, Mptr( XC, Akp, 0, XCld, size ), XCld,
515 Mptr( XR, 0, Akq, XRld, size ), XRld, Mptr( YC, Akp, 0,
516 YCld, size ), YCld, Mptr( YR, 0, Akq, YRld, size ), YRld,
517 PB_Ctzhemv );
518 }
519 }
520 else
521 {
522 for( k = 0; k < *N; k += nb )
523 {
524 kb = *N - k; ktmp = k + ( kb = MIN( kb, nb ) );
525 Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow );
526 Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol );
527 PB_Cpsym( type, type, LEFT, LOWER, kb, 1, ((char *) ALPHA),
528 Aptr, k, k, Ad0, Mptr( XC, Akp, 0, XCld, size ), XCld,
529 Mptr( XR, 0, Akq, XRld, size ), XRld, Mptr( YC, Akp, 0,
530 YCld, size ), YCld, Mptr( YR, 0, Akq, YRld, size ), YRld,
531 PB_Ctzhemv );
532 Akp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow );
533 Amp0 = Amp - Akp;
534 Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol );
535 if( Amp0 > 0 && Anq0 > 0 )
536 {
537 cgemv_( C2F_CHAR( NOTRAN ), &Amp0, &Anq0, ((char *) ALPHA),
538 Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XR, 0,
539 Akq, XRld, size ), &XRld, one, Mptr( YC, Akp, 0, YCld,
540 size ), &ione );
541 cgemv_( C2F_CHAR( COTRAN ), &Amp0, &Anq0, ((char *) ALPHA),
542 Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XC, Akp,
543 0, XCld, size ), &ione, one, Mptr( YR, 0, Akq, YRld,
544 size ), &YRld );
545 }
546 }
547 }
548 }
549 if( XCfr ) free( XC );
550 if( XRfr ) free( XR );
551
552 if( YisRow )
553 {
554 /*
555 * Combine the partial column results into YC
556 */
557 if( YCsum )
558 {
559 YCd[CSRC_] = 0;
560 if( Amp > 0 )
561 {
562 top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET );
563 Ccgsum2d( ctxt, ROW, &top, Amp, 1, YC, YCd[LLD_], myrow, 0 );
564 }
565 }
566 /*
567 * Combine the partial row results into YR
568 */
569 if( YRsum && ( Anq > 0 ) )
570 {
571 top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET );
572 Ccgsum2d( ctxt, COLUMN, &top, 1, Anq, YR, YRd[LLD_], YRd[RSRC_],
573 mycol );
574 }
575
576 /*
577 * YR := YR + YC
578 */
579 PB_Cpaxpby( type, NOCONJG, *N, 1, one, YC, 0, 0, YCd, COLUMN, one,
580 YR, 0, 0, YRd, ROW );
581 /*
582 * sub( Y ) := beta * sub( Y ) + YR (if necessary)
583 */
584 if( YRpbY )
585 {
586 PB_Cpaxpby( type, NOCONJG, 1, *N, one, YR, 0, 0, YRd, ROW,
587 ((char *)BETA), ((char *) Y), Yi, Yj, Yd, ROW );
588 }
589 }
590 else
591 {
592 /*
593 * Combine the partial row results into YR
594 */
595 if( YRsum )
596 {
597 YRd[RSRC_] = 0;
598 if( Anq > 0 )
599 {
600 top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET );
601 Ccgsum2d( ctxt, COLUMN, &top, 1, Anq, YR, YRd[LLD_], 0,
602 mycol );
603 }
604 }
605 /*
606 * Combine the partial column results into YC
607 */
608 if( YCsum && ( Amp > 0 ) )
609 {
610 top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET );
611 Ccgsum2d( ctxt, ROW, &top, Amp, 1, YC, YCd[LLD_], myrow,
612 YCd[CSRC_] );
613 }
614 /*
615 * YC := YR + YC
616 */
617 PB_Cpaxpby( type, NOCONJG, 1, *N, one, YR, 0, 0, YRd, ROW, one,
618 YC, 0, 0, YCd, COLUMN );
619 /*
620 * sub( Y ) := beta * sub( Y ) + YC (if necessary)
621 */
622 if( YCpbY )
623 {
624 PB_Cpaxpby( type, NOCONJG, *N, 1, one, YC, 0, 0, YCd,
625 COLUMN, ((char *)BETA), ((char *) Y), Yi, Yj, Yd,
626 COLUMN );
627 }
628 }
629 if( YCfr ) free( YC );
630 if( YRfr ) free( YR );
631 /*
632 * End of PCHEMV
633 */
634 }
635