1 /* ---------------------------------------------------------------------
2 *
3 *  -- PBLAS routine (version 2.0) --
4 *     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 *     and University of California, Berkeley.
6 *     April 1, 1998
7 *
8 *  ---------------------------------------------------------------------
9 */
10 /*
11 *  Include files
12 */
13 #include "pblas.h"
14 #include "PBpblas.h"
15 #include "PBtools.h"
16 #include "PBblacs.h"
17 #include "PBblas.h"
18 
19 #ifdef __STDC__
pchemv_(F_CHAR_T UPLO,int * N,float * ALPHA,float * A,int * IA,int * JA,int * DESCA,float * X,int * IX,int * JX,int * DESCX,int * INCX,float * BETA,float * Y,int * IY,int * JY,int * DESCY,int * INCY)20 void pchemv_( F_CHAR_T UPLO, int * N, float * ALPHA,
21               float * A, int * IA, int * JA, int * DESCA,
22               float * X, int * IX, int * JX, int * DESCX, int * INCX,
23               float * BETA,
24               float * Y, int * IY, int * JY, int * DESCY, int * INCY )
25 #else
26 void pchemv_( UPLO, N, ALPHA, A, IA, JA, DESCA, X, IX, JX, DESCX,
27               INCX, BETA, Y, IY, JY, DESCY, INCY )
28 /*
29 *  .. Scalar Arguments ..
30 */
31    F_CHAR_T       UPLO;
32    int            * IA, * INCX, * INCY, * IX, * IY, * JA, * JX, * JY,
33                   * N;
34    float          * ALPHA, * BETA;
35 /*
36 *  .. Array Arguments ..
37 */
38    int            * DESCA, * DESCX, * DESCY;
39    float          * A, * X, * Y;
40 #endif
41 {
42 /*
43 *  Purpose
44 *  =======
45 *
46 *  PCHEMV  performs the matrix-vector operation
47 *
48 *     sub( Y ) := alpha*sub( A )*sub( X ) + beta*sub( Y ),
49 *
50 *  where
51 *
52 *     sub( A ) denotes A(IA:IA+M-1,JA:JA+N-1),
53 *
54 *     sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X,
55 *                      X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, and,
56 *
57 *     sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y,
58 *                      Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y.
59 *
60 *  Alpha and beta are scalars, sub( X ) and sub( Y ) are n element  sub-
61 *  vectors and sub( A ) is an n by n Hermitian submatrix.
62 *
63 *  Notes
64 *  =====
65 *
66 *  A description  vector  is associated with each 2D block-cyclicly dis-
67 *  tributed matrix.  This  vector  stores  the  information  required to
68 *  establish the  mapping  between a  matrix entry and its corresponding
69 *  process and memory location.
70 *
71 *  In  the  following  comments,   the character _  should  be  read  as
72 *  "of  the  distributed  matrix".  Let  A  be a generic term for any 2D
73 *  block cyclicly distributed matrix.  Its description vector is DESC_A:
74 *
75 *  NOTATION         STORED IN       EXPLANATION
76 *  ---------------- --------------- ------------------------------------
77 *  DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
78 *  CTXT_A  (global) DESCA[ CTXT_  ] The BLACS context handle, indicating
79 *                                   the NPROW x NPCOL BLACS process grid
80 *                                   A  is  distributed over. The context
81 *                                   itself  is  global,  but  the handle
82 *                                   (the integer value) may vary.
83 *  M_A     (global) DESCA[ M_     ] The  number of rows in the distribu-
84 *                                   ted matrix A, M_A >= 0.
85 *  N_A     (global) DESCA[ N_     ] The number of columns in the distri-
86 *                                   buted matrix A, N_A >= 0.
87 *  IMB_A   (global) DESCA[ IMB_   ] The number of rows of the upper left
88 *                                   block of the matrix A, IMB_A > 0.
89 *  INB_A   (global) DESCA[ INB_   ] The  number  of columns of the upper
90 *                                   left   block   of   the  matrix   A,
91 *                                   INB_A > 0.
92 *  MB_A    (global) DESCA[ MB_    ] The blocking factor used to  distri-
93 *                                   bute the last  M_A-IMB_A  rows of A,
94 *                                   MB_A > 0.
95 *  NB_A    (global) DESCA[ NB_    ] The blocking factor used to  distri-
96 *                                   bute the last  N_A-INB_A  columns of
97 *                                   A, NB_A > 0.
98 *  RSRC_A  (global) DESCA[ RSRC_  ] The process row over which the first
99 *                                   row of the matrix  A is distributed,
100 *                                   NPROW > RSRC_A >= 0.
101 *  CSRC_A  (global) DESCA[ CSRC_  ] The  process column  over  which the
102 *                                   first column of  A  is  distributed.
103 *                                   NPCOL > CSRC_A >= 0.
104 *  LLD_A   (local)  DESCA[ LLD_   ] The  leading dimension  of the local
105 *                                   array  storing  the  local blocks of
106 *                                   the distributed matrix A,
107 *                                   IF( Lc( 1, N_A ) > 0 )
108 *                                      LLD_A >= MAX( 1, Lr( 1, M_A ) )
109 *                                   ELSE
110 *                                      LLD_A >= 1.
111 *
112 *  Let K be the number of  rows of a matrix A starting at the global in-
113 *  dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
114 *  that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
115 *  receive if these K rows were distributed over NPROW processes.  If  K
116 *  is the number of columns of a matrix  A  starting at the global index
117 *  JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number  of co-
118 *  lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would  receive if
119 *  these K columns were distributed over NPCOL processes.
120 *
121 *  The values of Lr() and Lc() may be determined via a call to the func-
122 *  tion PB_Cnumroc:
123 *  Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
124 *  Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
125 *
126 *  Arguments
127 *  =========
128 *
129 *  UPLO    (global input) CHARACTER*1
130 *          On  entry,   UPLO  specifies  whether  the  local  pieces  of
131 *          the array  A  containing the  upper or lower triangular  part
132 *          of the Hermitian submatrix  sub( A )  are to be referenced as
133 *          follows:
134 *
135 *             UPLO = 'U' or 'u'   Only the local pieces corresponding to
136 *                                 the   upper  triangular  part  of  the
137 *                                 Hermitian submatrix sub( A ) are to be
138 *                                 referenced,
139 *
140 *             UPLO = 'L' or 'l'   Only the local pieces corresponding to
141 *                                 the   lower  triangular  part  of  the
142 *                                 Hermitian submatrix sub( A ) are to be
143 *                                 referenced.
144 *
145 *  N       (global input) INTEGER
146 *          On entry,  N specifies the order of the  submatrix  sub( A ).
147 *          N must be at least zero.
148 *
149 *  ALPHA   (global input) COMPLEX
150 *          On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
151 *          supplied  as  zero  then  the  local entries of the arrays  A
152 *          and X corresponding to the entries of the submatrix  sub( A )
153 *          and the subvector sub( X ) need not be set on input.
154 *
155 *  A       (local input) COMPLEX array
156 *          On entry, A is an array of dimension (LLD_A, Ka), where Ka is
157 *          at least Lc( 1, JA+N-1 ).  Before  entry, this array contains
158 *          the local entries of the matrix A.
159 *          Before  entry  with  UPLO = 'U' or 'u', this  array  contains
160 *          the  local  entries  of  the  upper  triangular  part  of the
161 *          Hermitian  submatrix  sub( A ), and  the local entries of the
162 *          strictly lower triangular of sub( A ) are not referenced.
163 *          Before  entry  with  UPLO = 'L' or 'l', this  array  contains
164 *          the  local  entries  of  the  lower  triangular  part  of the
165 *          Hermitian  submatrix  sub( A ), and  the local entries of the
166 *          strictly upper triangular of sub( A ) are not referenced.
167 *          Note that the  imaginary parts  of the local entries  corres-
168 *          ponding to the  diagonal elements  of  sub( A )  need not  be
169 *          set and assumed to be zero.
170 *
171 *  IA      (global input) INTEGER
172 *          On entry, IA  specifies A's global row index, which points to
173 *          the beginning of the submatrix sub( A ).
174 *
175 *  JA      (global input) INTEGER
176 *          On entry, JA  specifies A's global column index, which points
177 *          to the beginning of the submatrix sub( A ).
178 *
179 *  DESCA   (global and local input) INTEGER array
180 *          On entry, DESCA  is an integer array of dimension DLEN_. This
181 *          is the array descriptor for the matrix A.
182 *
183 *  X       (local input) COMPLEX array
184 *          On entry, X is an array of dimension (LLD_X, Kx), where LLD_X
185 *          is   at  least  MAX( 1, Lr( 1, IX ) )  when  INCX = M_X   and
186 *          MAX( 1, Lr( 1, IX+N-1 ) )  otherwise,  and,  Kx  is  at least
187 *          Lc( 1, JX+N-1 )  when  INCX = M_X  and Lc( 1, JX ) otherwise.
188 *          Before  entry,  this  array contains the local entries of the
189 *          matrix X.
190 *
191 *  IX      (global input) INTEGER
192 *          On entry, IX  specifies X's global row index, which points to
193 *          the beginning of the submatrix sub( X ).
194 *
195 *  JX      (global input) INTEGER
196 *          On entry, JX  specifies X's global column index, which points
197 *          to the beginning of the submatrix sub( X ).
198 *
199 *  DESCX   (global and local input) INTEGER array
200 *          On entry, DESCX  is an integer array of dimension DLEN_. This
201 *          is the array descriptor for the matrix X.
202 *
203 *  INCX    (global input) INTEGER
204 *          On entry,  INCX   specifies  the  global  increment  for  the
205 *          elements of  X.  Only two values of  INCX   are  supported in
206 *          this version, namely 1 and M_X. INCX  must not be zero.
207 *
208 *  BETA    (global input) COMPLEX
209 *          On entry,  BETA  specifies the scalar  beta.   When  BETA  is
210 *          supplied  as  zero  then  the  local entries of  the array  Y
211 *          corresponding to the entries of the subvector  sub( Y )  need
212 *          not be set on input.
213 *
214 *  Y       (local input/local output) COMPLEX array
215 *          On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y
216 *          is   at  least  MAX( 1, Lr( 1, IY ) )  when  INCY = M_Y   and
217 *          MAX( 1, Lr( 1, IY+N-1 ) )  otherwise,  and,  Ky  is  at least
218 *          Lc( 1, JY+N-1 )  when  INCY = M_Y  and Lc( 1, JY ) otherwise.
219 *          Before  entry,  this  array contains the local entries of the
220 *          matrix Y.  On  exit,  sub( Y )  is overwritten by the updated
221 *          subvector.
222 *
223 *  IY      (global input) INTEGER
224 *          On entry, IY  specifies Y's global row index, which points to
225 *          the beginning of the submatrix sub( Y ).
226 *
227 *  JY      (global input) INTEGER
228 *          On entry, JY  specifies Y's global column index, which points
229 *          to the beginning of the submatrix sub( Y ).
230 *
231 *  DESCY   (global and local input) INTEGER array
232 *          On entry, DESCY  is an integer array of dimension DLEN_. This
233 *          is the array descriptor for the matrix Y.
234 *
235 *  INCY    (global input) INTEGER
236 *          On entry,  INCY   specifies  the  global  increment  for  the
237 *          elements of  Y.  Only two values of  INCY   are  supported in
238 *          this version, namely 1 and M_Y. INCY  must not be zero.
239 *
240 *  -- Written on April 1, 1998 by
241 *     Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
242 *
243 *  ---------------------------------------------------------------------
244 */
245 /*
246 *  .. Local Scalars ..
247 */
248    char           UploA, * one, top;
249    int            Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Akp, Akq, Ald, Amb,
250                   Amp, Amp0, Anb, Anq, Anq0, Arow, XCfr, XCld, XRfr, XRld,
251                   Xi, Xj, YCfr, YCpbY, YCsum, YCld, YRfr, YRpbY, YRsum,
252                   YRld, Ycol, Yi, Yii, YisRow, Yj, Yjj, Yld, Ynp, Ynq, Yrow,
253                   ctxt, info, ione=1, k, kb, ktmp, mycol, myrow, nb, npcol,
254                   nprow, size, upper;
255    float          * tbeta;
256    PBTYP_T        * type;
257 /*
258 *  .. Local Arrays ..
259 */
260    int            Ad [DLEN_], Ad0[DLEN_], XCd[DLEN_], XRd[DLEN_], Xd[DLEN_],
261                   YCd[DLEN_], YRd[DLEN_], Yd [DLEN_];
262    char           * Aptr = NULL, * XC = NULL, * XR = NULL, * YC = NULL,
263                   * YR   = NULL;
264 /* ..
265 *  .. Executable Statements ..
266 *
267 */
268    upper = ( ( UploA  = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER );
269    PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad );
270    PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd );
271    PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd );
272 #ifndef NO_ARGCHK
273    Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
274 /*
275 *  Test the input parameters
276 */
277    if( !( info = ( ( nprow == -1 ) ? -( 701 + CTXT_ ) : 0 ) ) )
278    {
279       if( ( !upper ) && ( UploA != CLOWER ) )
280       {
281          PB_Cwarn( ctxt, __LINE__, __FILE__, "Illegal UPLO = %c\n", UploA );
282          info = -1;
283       }
284       PB_Cchkmat( ctxt, "PCHEMV", "A", *N, 2, *N, 2, Ai, Aj, Ad,  7, &info );
285       PB_Cchkvec( ctxt, "PCHEMV", "X", *N, 2, Xi, Xj, Xd, *INCX, 11, &info );
286       PB_Cchkvec( ctxt, "PCHEMV", "Y", *N, 2, Yi, Yj, Yd, *INCY, 17, &info );
287    }
288    if( info ) { PB_Cabort( ctxt, "PCHEMV", info ); return; }
289 #endif
290 /*
291 *  Quick return if possible
292 */
293    if( ( *N == 0 ) ||
294        ( ( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) &&
295          ( ( BETA [REAL_PART] ==  ONE ) && ( BETA [IMAG_PART] == ZERO ) ) ) )
296       return;
297 /*
298 *  Retrieve process grid information
299 */
300 #ifdef NO_ARGCHK
301    Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
302 #endif
303 /*
304 *  Get type structure
305 */
306    type = PB_Cctypeset();
307 /*
308 *  When alpha is zero
309 */
310    if( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) )
311    {
312 /*
313 *  Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol
314 */
315       PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj,
316                    &Yrow, &Ycol );
317 
318       if( *INCY == Yd[M_] )
319       {
320 /*
321 *  sub( Y ) resides in (a) process row(s)
322 */
323          if( ( myrow == Yrow ) || ( Yrow < 0 ) )
324          {
325 /*
326 *  Make sure I own some data and scale sub( Y )
327 */
328             Ynq = PB_Cnumroc( *N, Yj, Yd[INB_], Yd[NB_], mycol, Yd[CSRC_],
329                               npcol );
330             if( Ynq > 0 )
331             {
332                Yld = Yd[LLD_];
333                if( ( BETA[REAL_PART] == ZERO ) && ( BETA[IMAG_PART] == ZERO ) )
334                {
335                   cset_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii,
336                          Yjj, Yld, type->size ), &Yld );
337                }
338                else
339                {
340                   cscal_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii,
341                           Yjj, Yld, type->size ), &Yld );
342                }
343             }
344          }
345       }
346       else
347       {
348 /*
349 *  sub( Y ) resides in (a) process column(s)
350 */
351          if( ( mycol == Ycol ) || ( Ycol < 0 ) )
352          {
353 /*
354 *  Make sure I own some data and scale sub( Y )
355 */
356             Ynp = PB_Cnumroc( *N, Yi, Yd[IMB_], Yd[MB_], myrow, Yd[RSRC_],
357                               nprow );
358             if( Ynp > 0 )
359             {
360                if( ( BETA[REAL_PART] == ZERO ) && ( BETA[IMAG_PART] == ZERO ) )
361                {
362                   cset_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii,
363                          Yjj, Yd[LLD_], type->size ), INCY );
364                }
365                else
366                {
367                   cscal_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii,
368                           Yjj, Yd[LLD_], type->size ), INCY );
369                }
370             }
371          }
372       }
373       return;
374    }
375 /*
376 *  Compute descriptor Ad0 for sub( A )
377 */
378    PB_Cdescribe( *N, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj,
379                  &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 );
380 /*
381 *  Reuse sub( Y ) and/or create vectors YR in process rows and YC in process
382 *  columns spanned by sub( A )
383 */
384    if( ( YisRow = ( *INCY == Yd[M_] ) ) != 0 )
385    {
386       PB_CInOutV( type, ROW,    *N, *N, Ad0, 1, ((char *)BETA), ((char *) Y),
387                   Yi, Yj, Yd, ROW,    ((char**)(&tbeta)), &YR, YRd, &YRfr,
388                   &YRsum, &YRpbY );
389       PB_COutV( type, COLUMN, INIT, *N, *N, Ad0, 1, &YC, YCd, &YCfr, &YCsum );
390    }
391    else
392    {
393       PB_CInOutV( type, COLUMN, *N, *N, Ad0, 1, ((char *)BETA), ((char *) Y),
394                   Yi, Yj, Yd, COLUMN, ((char**)(&tbeta)), &YC, YCd, &YCfr,
395                   &YCsum, &YCpbY );
396       PB_COutV( type, ROW,    INIT, *N, *N, Ad0, 1, &YR, YRd, &YRfr, &YRsum );
397    }
398 /*
399 *  Replicate sub( X ) in process rows (XR) and process columns (XC) spanned by
400 *  sub( A )
401 */
402    if( *INCX == Xd[M_] )
403    {
404       PB_CInV( type, NOCONJG, ROW,    *N, *N, Ad0, 1, ((char *) X), Xi, Xj,  Xd,
405                ROW, &XR, XRd, &XRfr );
406       PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, XR,            0,  0, XRd,
407                ROW, &XC, XCd, &XCfr );
408    }
409    else
410    {
411       PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, ((char *) X), Xi, Xj,  Xd,
412                COLUMN, &XC, XCd, &XCfr );
413       PB_CInV( type, NOCONJG, ROW,    *N, *N, Ad0, 1, XC,            0,  0, XCd,
414                COLUMN, &XR, XRd, &XRfr );
415    }
416 
417    one = type->one;
418 /*
419 *  Local matrix-vector multiply iff I own some data
420 */
421    Aimb1 = Ad0[IMB_ ]; Ainb1 = Ad0[INB_ ]; Amb = Ad0[MB_]; Anb = Ad0[NB_];
422    Acol  = Ad0[CSRC_]; Arow  = Ad0[RSRC_];
423    Amp   = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow );
424    Anq   = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol );
425 
426    if( ( Amp > 0 ) && ( Anq > 0 ) )
427    {
428       size = type->size;
429       Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size );
430 
431       XCld = XCd[LLD_]; XRld = XRd[LLD_]; YCld = YCd[LLD_]; YRld = YRd[LLD_];
432 /*
433 *  Scale YR or YC in the case sub( Y ) has been reused
434 */
435       if( YisRow )
436       {
437 /*
438 *  YR resides in (a) process row(s)
439 */
440          if( !YRpbY )
441          {
442             if( ( myrow == YRd[RSRC_] ) || ( YRd[RSRC_] < 0 ) )
443             {
444 /*
445 *  Make sure I own some data and scale YR
446 */
447                if( Anq > 0 )
448                {
449                   if( ( tbeta[REAL_PART] == ZERO ) &&
450                       ( tbeta[IMAG_PART] == ZERO ) )
451                   {
452                      cset_( &Anq, ((char *) tbeta), YR, &YRld );
453                   }
454                   else
455                   {
456                      cscal_( &Anq, ((char *) tbeta), YR, &YRld );
457                   }
458                }
459             }
460          }
461       }
462       else
463       {
464 /*
465 *  YC resides in (a) process column(s)
466 */
467          if( !YCpbY )
468          {
469             if( ( mycol == YCd[CSRC_] ) || ( YCd[CSRC_] < 0 ) )
470             {
471 /*
472 *  Make sure I own some data and scale YC
473 */
474                if( Amp > 0 )
475                {
476                   if( ( tbeta[REAL_PART] == ZERO ) &&
477                       ( tbeta[IMAG_PART] == ZERO ) )
478                   {
479                      cset_( &Amp, ((char *) tbeta), YC, &ione );
480                   }
481                   else
482                   {
483                      cscal_( &Amp, ((char *) tbeta), YC, &ione );
484                   }
485                }
486             }
487          }
488       }
489 /*
490 *  Computational partitioning size is computed as the product of the logical
491 *  value returned by pilaenv_ and 2 * lcm( nprow, npcol ).
492 */
493       nb = 2 * pilaenv_( &ctxt, C2F_CHAR( &type->type ) ) *
494            PB_Clcm( ( Arow >= 0 ? nprow : 1 ), ( Acol >= 0 ? npcol : 1 ) );
495 
496       if( upper )
497       {
498          for( k = 0; k < *N; k += nb )
499          {
500             kb   = *N - k; kb = MIN( kb, nb );
501             Akp  = PB_Cnumroc( k,  0, Aimb1, Amb, myrow, Arow, nprow );
502             Akq  = PB_Cnumroc( k,  0, Ainb1, Anb, mycol, Acol, npcol );
503             Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol );
504             if( Akp > 0 && Anq0 > 0 )
505             {
506                cgemv_( C2F_CHAR( NOTRAN ), &Akp, &Anq0, ((char *)ALPHA),
507                        Mptr( Aptr, 0, Akq, Ald, size ), &Ald, Mptr( XR, 0, Akq,
508                        XRld, size ), &XRld, one, YC, &ione );
509                cgemv_( C2F_CHAR( COTRAN ), &Akp, &Anq0, ((char *)ALPHA),
510                        Mptr( Aptr, 0, Akq, Ald, size ), &Ald, XC, &ione, one,
511                        Mptr( YR, 0, Akq, YRld, size ), &YRld );
512             }
513             PB_Cpsym( type, type, LEFT, UPPER, kb, 1, ((char *) ALPHA),
514                       Aptr, k, k, Ad0, Mptr( XC, Akp, 0, XCld, size ), XCld,
515                       Mptr( XR, 0, Akq, XRld, size ), XRld, Mptr( YC, Akp, 0,
516                       YCld, size ), YCld, Mptr( YR, 0, Akq, YRld, size ), YRld,
517                       PB_Ctzhemv );
518          }
519       }
520       else
521       {
522          for( k = 0; k < *N; k += nb )
523          {
524             kb  = *N - k; ktmp = k + ( kb = MIN( kb, nb ) );
525             Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow );
526             Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol );
527             PB_Cpsym( type, type, LEFT, LOWER, kb, 1, ((char *) ALPHA),
528                       Aptr, k, k, Ad0, Mptr( XC, Akp, 0, XCld, size ), XCld,
529                       Mptr( XR, 0, Akq, XRld, size ), XRld, Mptr( YC, Akp, 0,
530                       YCld, size ), YCld, Mptr( YR, 0, Akq, YRld, size ), YRld,
531                       PB_Ctzhemv );
532             Akp  = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow );
533             Amp0 = Amp - Akp;
534             Anq0 = PB_Cnumroc( kb,   k, Ainb1, Anb, mycol, Acol, npcol );
535             if( Amp0 > 0 && Anq0 > 0 )
536             {
537                cgemv_( C2F_CHAR( NOTRAN ), &Amp0, &Anq0, ((char *) ALPHA),
538                        Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XR, 0,
539                        Akq, XRld, size ), &XRld, one, Mptr( YC, Akp, 0, YCld,
540                        size ), &ione );
541                cgemv_( C2F_CHAR( COTRAN ), &Amp0, &Anq0, ((char *) ALPHA),
542                        Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XC, Akp,
543                        0, XCld, size ), &ione, one, Mptr( YR, 0, Akq, YRld,
544                        size ), &YRld );
545             }
546          }
547       }
548    }
549    if( XCfr ) free( XC );
550    if( XRfr ) free( XR );
551 
552    if( YisRow )
553    {
554 /*
555 *  Combine the partial column results into YC
556 */
557       if( YCsum )
558       {
559          YCd[CSRC_] = 0;
560          if( Amp > 0 )
561          {
562             top        = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET );
563             Ccgsum2d( ctxt, ROW, &top, Amp, 1, YC, YCd[LLD_], myrow, 0 );
564          }
565       }
566 /*
567 *  Combine the partial row results into YR
568 */
569       if( YRsum && ( Anq > 0 ) )
570       {
571          top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET );
572          Ccgsum2d( ctxt, COLUMN, &top, 1, Anq, YR, YRd[LLD_], YRd[RSRC_],
573                    mycol );
574       }
575 
576 /*
577 *  YR := YR + YC
578 */
579       PB_Cpaxpby( type, NOCONJG, *N, 1, one, YC, 0, 0, YCd, COLUMN, one,
580                   YR, 0, 0, YRd, ROW );
581 /*
582 *  sub( Y ) := beta * sub( Y ) + YR (if necessary)
583 */
584       if( YRpbY )
585       {
586          PB_Cpaxpby( type, NOCONJG, 1, *N, one, YR, 0, 0, YRd, ROW,
587                      ((char *)BETA), ((char *) Y), Yi, Yj, Yd, ROW );
588       }
589    }
590    else
591    {
592 /*
593 *  Combine the partial row results into YR
594 */
595       if( YRsum )
596       {
597          YRd[RSRC_] = 0;
598          if( Anq > 0 )
599          {
600             top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET );
601             Ccgsum2d( ctxt, COLUMN, &top, 1, Anq, YR, YRd[LLD_], 0,
602                       mycol );
603          }
604       }
605 /*
606 *  Combine the partial column results into YC
607 */
608       if( YCsum && ( Amp > 0 ) )
609       {
610          top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET );
611          Ccgsum2d( ctxt, ROW, &top, Amp, 1, YC, YCd[LLD_], myrow,
612                    YCd[CSRC_] );
613       }
614 /*
615 *  YC := YR + YC
616 */
617       PB_Cpaxpby( type, NOCONJG, 1, *N, one, YR, 0, 0, YRd, ROW, one,
618                   YC, 0, 0, YCd, COLUMN );
619 /*
620 *  sub( Y ) := beta * sub( Y ) + YC (if necessary)
621 */
622       if( YCpbY )
623       {
624          PB_Cpaxpby( type, NOCONJG, *N, 1, one, YC, 0, 0, YCd,
625                      COLUMN, ((char *)BETA), ((char *) Y), Yi, Yj, Yd,
626                      COLUMN );
627       }
628    }
629    if( YCfr ) free( YC );
630    if( YRfr ) free( YR );
631 /*
632 *  End of PCHEMV
633 */
634 }
635