1 /* ---------------------------------------------------------------------
2 *
3 *  -- PBLAS routine (version 2.0) --
4 *     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 *     and University of California, Berkeley.
6 *     April 1, 1998
7 *
8 *  ---------------------------------------------------------------------
9 */
10 /*
11 *  Include files
12 */
13 #include "pblas.h"
14 #include "PBpblas.h"
15 #include "PBtools.h"
16 #include "PBblacs.h"
17 #include "PBblas.h"
18 
19 #ifdef __STDC__
pztrmv_(F_CHAR_T UPLO,F_CHAR_T TRANS,F_CHAR_T DIAG,int * N,double * A,int * IA,int * JA,int * DESCA,double * X,int * IX,int * JX,int * DESCX,int * INCX)20 void pztrmv_( F_CHAR_T UPLO, F_CHAR_T TRANS, F_CHAR_T DIAG, int * N,
21               double * A, int * IA, int * JA, int * DESCA,
22               double * X, int * IX, int * JX, int * DESCX,
23               int * INCX )
24 #else
25 void pztrmv_( UPLO, TRANS, DIAG, N, A, IA, JA, DESCA, X, IX, JX,
26               DESCX, INCX )
27 /*
28 *  .. Scalar Arguments ..
29 */
30    F_CHAR_T       DIAG, TRANS, UPLO;
31    int            * IA, * INCX, * IX, * JA, * JX, * N;
32 /*
33 *  .. Array Arguments ..
34 */
35    int            * DESCA, * DESCX;
36    double         * A, * X;
37 #endif
38 {
39 /*
40 *  Purpose
41 *  =======
42 *
43 *  PZTRMV  performs one of the matrix-vector operations
44 *
45 *  sub( X ) := sub( A )*sub( X )   or   sub( X ) := sub( A )'*sub( X )
46 *
47 *  or
48 *
49 *  sub( X ) := conjg( sub( A )' )*sub( X ),
50 *
51 *  where
52 *
53 *     sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1), and,
54 *
55 *     sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X,
56 *                      X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X.
57 *
58 *  sub( X )  is an n element subvector and  sub( A ) is an n by n  unit,
59 *  or non-unit, upper or lower triangular submatrix.
60 *
61 *  Notes
62 *  =====
63 *
64 *  A description  vector  is associated with each 2D block-cyclicly dis-
65 *  tributed matrix.  This  vector  stores  the  information  required to
66 *  establish the  mapping  between a  matrix entry and its corresponding
67 *  process and memory location.
68 *
69 *  In  the  following  comments,   the character _  should  be  read  as
70 *  "of  the  distributed  matrix".  Let  A  be a generic term for any 2D
71 *  block cyclicly distributed matrix.  Its description vector is DESC_A:
72 *
73 *  NOTATION         STORED IN       EXPLANATION
74 *  ---------------- --------------- ------------------------------------
75 *  DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
76 *  CTXT_A  (global) DESCA[ CTXT_  ] The BLACS context handle, indicating
77 *                                   the NPROW x NPCOL BLACS process grid
78 *                                   A  is  distributed over. The context
79 *                                   itself  is  global,  but  the handle
80 *                                   (the integer value) may vary.
81 *  M_A     (global) DESCA[ M_     ] The  number of rows in the distribu-
82 *                                   ted matrix A, M_A >= 0.
83 *  N_A     (global) DESCA[ N_     ] The number of columns in the distri-
84 *                                   buted matrix A, N_A >= 0.
85 *  IMB_A   (global) DESCA[ IMB_   ] The number of rows of the upper left
86 *                                   block of the matrix A, IMB_A > 0.
87 *  INB_A   (global) DESCA[ INB_   ] The  number  of columns of the upper
88 *                                   left   block   of   the  matrix   A,
89 *                                   INB_A > 0.
90 *  MB_A    (global) DESCA[ MB_    ] The blocking factor used to  distri-
91 *                                   bute the last  M_A-IMB_A  rows of A,
92 *                                   MB_A > 0.
93 *  NB_A    (global) DESCA[ NB_    ] The blocking factor used to  distri-
94 *                                   bute the last  N_A-INB_A  columns of
95 *                                   A, NB_A > 0.
96 *  RSRC_A  (global) DESCA[ RSRC_  ] The process row over which the first
97 *                                   row of the matrix  A is distributed,
98 *                                   NPROW > RSRC_A >= 0.
99 *  CSRC_A  (global) DESCA[ CSRC_  ] The  process column  over  which the
100 *                                   first column of  A  is  distributed.
101 *                                   NPCOL > CSRC_A >= 0.
102 *  LLD_A   (local)  DESCA[ LLD_   ] The  leading dimension  of the local
103 *                                   array  storing  the  local blocks of
104 *                                   the distributed matrix A,
105 *                                   IF( Lc( 1, N_A ) > 0 )
106 *                                      LLD_A >= MAX( 1, Lr( 1, M_A ) )
107 *                                   ELSE
108 *                                      LLD_A >= 1.
109 *
110 *  Let K be the number of  rows of a matrix A starting at the global in-
111 *  dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
112 *  that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
113 *  receive if these K rows were distributed over NPROW processes.  If  K
114 *  is the number of columns of a matrix  A  starting at the global index
115 *  JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number  of co-
116 *  lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would  receive if
117 *  these K columns were distributed over NPCOL processes.
118 *
119 *  The values of Lr() and Lc() may be determined via a call to the func-
120 *  tion PB_Cnumroc:
121 *  Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
122 *  Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
123 *
124 *  Arguments
125 *  =========
126 *
127 *  UPLO    (global input) CHARACTER*1
128 *          On entry,  UPLO  specifies whether the submatrix  sub( A ) is
129 *          an upper or lower triangular submatrix as follows:
130 *
131 *             UPLO = 'U' or 'u'   sub( A ) is an upper triangular
132 *                                 submatrix,
133 *
134 *             UPLO = 'L' or 'l'   sub( A ) is a  lower triangular
135 *                                 submatrix.
136 *
137 *  TRANS   (global input) CHARACTER*1
138 *          On entry,  TRANS  specifies the  operation to be performed as
139 *          follows:
140 *
141 *             TRANS = 'N' or 'n'   sub( X ) := sub( A )  * sub( X ).
142 *
143 *             TRANS = 'T' or 't'   sub( X ) := sub( A )' * sub( X ).
144 *
145 *             TRANS = 'C' or 'c'
146 *                         sub( X ) := conjg( sub( A )' ) * sub( X ).
147 *
148 *  DIAG    (global input) CHARACTER*1
149 *          On entry,  DIAG  specifies  whether or not  sub( A )  is unit
150 *          triangular as follows:
151 *
152 *             DIAG = 'U' or 'u'  sub( A )  is  assumed to be unit trian-
153 *                                gular,
154 *
155 *             DIAG = 'N' or 'n'  sub( A ) is not assumed to be unit tri-
156 *                                angular.
157 *
158 *  N       (global input) INTEGER
159 *          On entry,  N specifies the order of the  submatrix  sub( A ).
160 *          N must be at least zero.
161 *
162 *  A       (local input) COMPLEX*16 array
163 *          On entry, A is an array of dimension (LLD_A, Ka), where Ka is
164 *          at least Lc( 1, JA+N-1 ).  Before  entry, this array contains
165 *          the local entries of the matrix A.
166 *          Before entry with  UPLO = 'U' or 'u', this array contains the
167 *          local entries corresponding to  the entries of the upper tri-
168 *          angular submatrix  sub( A ), and the local entries correspon-
169 *          ding to the entries of the  strictly lower triangular part of
170 *          the submatrix  sub( A )  are not referenced.
171 *          Before entry with  UPLO = 'L' or 'l', this array contains the
172 *          local entries corresponding to  the entries of the lower tri-
173 *          angular submatrix  sub( A ), and the local entries correspon-
174 *          ding to the entries of the  strictly upper triangular part of
175 *          the submatrix  sub( A )  are not referenced.
176 *          Note  that  when DIAG = 'U' or 'u', the local entries corres-
177 *          ponding to  the diagonal elements  of the submatrix  sub( A )
178 *          are not referenced either, but are assumed to be unity.
179 *
180 *  IA      (global input) INTEGER
181 *          On entry, IA  specifies A's global row index, which points to
182 *          the beginning of the submatrix sub( A ).
183 *
184 *  JA      (global input) INTEGER
185 *          On entry, JA  specifies A's global column index, which points
186 *          to the beginning of the submatrix sub( A ).
187 *
188 *  DESCA   (global and local input) INTEGER array
189 *          On entry, DESCA  is an integer array of dimension DLEN_. This
190 *          is the array descriptor for the matrix A.
191 *
192 *  X       (local input/local output) COMPLEX*16 array
193 *          On entry, X is an array of dimension (LLD_X, Kx), where LLD_X
194 *          is   at  least  MAX( 1, Lr( 1, IX ) )  when  INCX = M_X   and
195 *          MAX( 1, Lr( 1, IX+N-1 ) )  otherwise,  and,  Kx  is  at least
196 *          Lc( 1, JX+N-1 )  when  INCX = M_X  and Lc( 1, JX ) otherwise.
197 *          Before  entry,  this array  contains the local entries of the
198 *          matrix X. On exit, sub( X ) is overwritten with the transfor-
199 *          med subvector.
200 *
201 *  IX      (global input) INTEGER
202 *          On entry, IX  specifies X's global row index, which points to
203 *          the beginning of the submatrix sub( X ).
204 *
205 *  JX      (global input) INTEGER
206 *          On entry, JX  specifies X's global column index, which points
207 *          to the beginning of the submatrix sub( X ).
208 *
209 *  DESCX   (global and local input) INTEGER array
210 *          On entry, DESCX  is an integer array of dimension DLEN_. This
211 *          is the array descriptor for the matrix X.
212 *
213 *  INCX    (global input) INTEGER
214 *          On entry,  INCX   specifies  the  global  increment  for  the
215 *          elements of  X.  Only two values of  INCX   are  supported in
216 *          this version, namely 1 and M_X. INCX  must not be zero.
217 *
218 *  -- Written on April 1, 1998 by
219 *     Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
220 *
221 *  ---------------------------------------------------------------------
222 */
223 /*
224 *  .. Local Scalars ..
225 */
226    char           DiagA, TranOp, UploA, Xroc, * one, * tbeta, top, *zero;
227    int            Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Akp, Akq, Ald, Amb,
228                   Amp, Amp0, Anb, Anq, Anq0, Arow, XAfr, XAld, Xcol, Xi, Xii,
229                   Xj, Xjj, Xld, Xnp, Xnq, Xrow, YAfr, YAld, YApbY, YAsum,
230                   ctxt, info, ione=1, k, kb, ktmp, mycol, myrow, nb, notran,
231                   nounit, npcol, nprow, size, upper;
232    PBTYP_T        * type;
233 /*
234 *  .. Local Arrays ..
235 */
236    int            Ad [DLEN_], Ad0[DLEN_], XAd[DLEN_], Xd[DLEN_], YAd[DLEN_];
237    char           * Aptr = NULL, * XA = NULL, * YA = NULL;
238 /* ..
239 *  .. Executable Statements ..
240 *
241 */
242    upper  = ( ( UploA  = Mupcase( F2C_CHAR( UPLO  )[0] ) ) ==  CUPPER );
243    notran = ( ( TranOp = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN );
244    nounit = ( ( DiagA  = Mupcase( F2C_CHAR( DIAG  )[0] ) ) == CNOUNIT );
245    PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad );
246    PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd );
247 #ifndef NO_ARGCHK
248 /*
249 *  Test the input parameters
250 */
251    Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
252    if( !( info = ( ( nprow == -1 ) ? -( 801 + CTXT_ ) : 0 ) ) )
253    {
254       if( ( !upper ) && ( UploA != CLOWER ) )
255       {
256          PB_Cwarn( ctxt, __LINE__, "PZTRMV", "Illegal UPLO = %c\n", UploA );
257          info = -1;
258       }
259       else if( ( !notran ) && ( TranOp != CTRAN ) && ( TranOp != CCOTRAN ) )
260       {
261          PB_Cwarn( ctxt, __LINE__, "PZTRMV", "Illegal TRANS = %c\n", TranOp );
262          info = -2;
263       }
264       else if( ( !nounit ) && ( DiagA != CUNIT ) )
265       {
266          PB_Cwarn( ctxt, __LINE__, "PZTRMV", "Illegal DIAG = %c\n", DiagA );
267          info = -3;
268       }
269       PB_Cchkmat( ctxt, "PZTRMV", "A", *N, 4, *N, 4, Ai, Aj, Ad,  8, &info );
270       PB_Cchkvec( ctxt, "PZTRMV", "X", *N, 4, Xi, Xj, Xd, *INCX, 12, &info );
271    }
272    if( info ) { PB_Cabort( ctxt, "PZTRMV", info ); return; }
273 #endif
274 /*
275 *  Quick return if possible
276 */
277    if( *N == 0 ) return;
278 /*
279 *  Retrieve process grid information
280 */
281 #ifdef NO_ARGCHK
282    Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
283 #endif
284 /*
285 *  Get type structure
286 */
287    type = PB_Cztypeset();
288    size = type->size; one  = type->one; zero = type->zero;
289 /*
290 *  Compute descriptor Ad0 for sub( A )
291 */
292    PB_Cdescribe( *N, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj,
293                  &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 );
294 
295    Xroc = ( *INCX == Xd[M_] ? CROW : CCOLUMN );
296 
297    if( notran )
298    {
299 /*
300 *  Replicate sub( X ) in process rows spanned by sub( A ) -> XA
301 */
302       PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd,
303                &Xroc, &XA, XAd, &XAfr );
304 /*
305 *  Reuse sub( X ) and/or create vector YA in process columns spanned by sub( A )
306 */
307       PB_CInOutV( type, COLUMN, *N, *N, Ad0, 1, one, ((char *) X), Xi, Xj, Xd,
308                   &Xroc, &tbeta, &YA, YAd, &YAfr, &YAsum, &YApbY );
309 /*
310 *  If sub( X ) is distributed in (a) process column(s), then zero it.
311 */
312       if( Xroc == CCOLUMN )
313       {
314 /*
315 *  Retrieve sub( X )'s local information: Xii, Xjj, Xrow, Xcol
316 */
317          PB_Cinfog2l( Xi, Xj, Xd, nprow, npcol, myrow, mycol, &Xii, &Xjj, &Xrow,
318                       &Xcol );
319 /*
320 *  sub( X ) resides in (a) process columns(s)
321 */
322          if( ( mycol == Xcol ) || ( Xcol < 0 ) )
323          {
324 /*
325 *  Make sure I own some data and scale sub( X )
326 */
327             Xnp = PB_Cnumroc( *N, Xi, Xd[IMB_], Xd[MB_], myrow, Xd[RSRC_],
328                               nprow );
329             if( Xnp > 0 )
330             {
331                zset_( &Xnp, zero, Mptr( ((char *) X), Xii, Xjj, Xd[LLD_],
332                       size ), &ione );
333             }
334          }
335       }
336    }
337    else
338    {
339 /*
340 *  Replicate sub( X ) in process columns spanned by sub( A ) -> XA
341 */
342       PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd,
343                &Xroc, &XA, XAd, &XAfr );
344 /*
345 *  Reuse sub( X ) and/or create vector YA in process rows spanned by sub( A )
346 */
347       PB_CInOutV( type, ROW,    *N, *N, Ad0, 1, one, ((char *) X), Xi, Xj, Xd,
348                   &Xroc, &tbeta, &YA, YAd, &YAfr, &YAsum, &YApbY );
349 /*
350 *  If sub( X ) is distributed in (a) process row(s), then zero it.
351 */
352       if( Xroc == CROW )
353       {
354 /*
355 *  Retrieve sub( X )'s local information: Xii, Xjj, Xrow, Xcol
356 */
357          PB_Cinfog2l( Xi, Xj, Xd, nprow, npcol, myrow, mycol, &Xii, &Xjj, &Xrow,
358                       &Xcol );
359 /*
360 *  sub( X ) resides in (a) process row(s)
361 */
362          if( ( myrow == Xrow ) || ( Xrow < 0 ) )
363          {
364 /*
365 *  Make sure I own some data and scale sub( X )
366 */
367             Xnq = PB_Cnumroc( *N, Xj, Xd[INB_], Xd[NB_], mycol, Xd[CSRC_],
368                               npcol );
369             if( Xnq > 0 )
370             {
371                Xld = Xd[LLD_];
372                zset_( &Xnq, zero, Mptr( ((char *) X), Xii, Xjj, Xld,
373                       size ), &Xld );
374             }
375          }
376       }
377    }
378 /*
379 *  Local matrix-vector multiply iff I own some data
380 */
381    Aimb1 = Ad0[IMB_ ]; Ainb1 = Ad0[INB_ ]; Amb = Ad0[MB_]; Anb = Ad0[NB_];
382    Acol  = Ad0[CSRC_]; Arow  = Ad0[RSRC_];
383    Amp = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow );
384    Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol );
385 
386    if( ( Amp > 0 ) && ( Anq > 0 ) )
387    {
388       Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size );
389 
390       XAld = XAd[LLD_]; YAld = YAd[LLD_];
391 /*
392 *  Computational partitioning size is computed as the product of the logical
393 *  value returned by pilaenv_ and 2 * lcm( nprow, npcol ).
394 */
395       nb = 2 * pilaenv_( &ctxt, C2F_CHAR( &type->type ) ) *
396            PB_Clcm( ( Arow >= 0 ? nprow : 1 ), ( Acol >= 0 ? npcol : 1 ) );
397 
398       if( upper )
399       {
400          if( notran )
401          {
402             for( k = 0; k < *N; k += nb )
403             {
404                kb   = *N - k; kb = MIN( kb, nb );
405                Akp  = PB_Cnumroc( k,  0, Aimb1, Amb, myrow, Arow, nprow );
406                Akq  = PB_Cnumroc( k,  0, Ainb1, Anb, mycol, Acol, npcol );
407                Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol );
408                if( Akp > 0 && Anq0 > 0 )
409                {
410                   zgemv_( TRANS, &Akp, &Anq0, one, Mptr( Aptr, 0, Akq, Ald,
411                                size ), &Ald, Mptr( XA, 0, Akq, XAld, size ),
412                                &XAld, one, YA, &ione );
413                }
414                PB_Cptrm( type, type, LEFT, UPPER, &TranOp, &DiagA, kb, 1, one,
415                          Aptr, k, k, Ad0, Mptr( XA, 0, Akq, XAld, size ), XAld,
416                          Mptr( YA, Akp, 0, YAld, size ), YAld, PB_Ctztrmv );
417             }
418          }
419          else
420          {
421             for( k = 0; k < *N; k += nb )
422             {
423                kb   = *N - k; kb = MIN( kb, nb );
424                Akp  = PB_Cnumroc( k,  0, Aimb1, Amb, myrow, Arow, nprow );
425                Akq  = PB_Cnumroc( k,  0, Ainb1, Anb, mycol, Acol, npcol );
426                Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol );
427                if( Akp > 0 && Anq0 > 0 )
428                {
429                   zgemv_( TRANS, &Akp, &Anq0, one, Mptr( Aptr, 0, Akq, Ald,
430                           size ), &Ald, XA, &ione, one, Mptr( YA, 0, Akq, YAld,
431                           size ), &YAld );
432                }
433                PB_Cptrm( type, type, LEFT, UPPER, &TranOp, &DiagA, kb, 1, one,
434                          Aptr, k, k, Ad0, Mptr( XA, Akp, 0, XAld, size ), XAld,
435                          Mptr( YA, 0, Akq, YAld, size ), YAld, PB_Ctztrmv );
436             }
437          }
438       }
439       else
440       {
441          if( notran )
442          {
443             for( k = 0; k < *N; k += nb )
444             {
445                kb  = *N - k; ktmp = k + ( kb = MIN( kb, nb ) );
446                Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow );
447                Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol );
448                PB_Cptrm( type, type, LEFT, LOWER, &TranOp, &DiagA, kb, 1, one,
449                          Aptr, k, k, Ad0, Mptr( XA, 0, Akq, XAld, size ), XAld,
450                          Mptr( YA, Akp, 0, YAld, size ), YAld, PB_Ctztrmv );
451                Akp  = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow );
452                Amp0 = Amp - Akp;
453                Anq0 = PB_Cnumroc( kb,   k, Ainb1, Anb, mycol, Acol, npcol );
454                if( Amp0 > 0 && Anq0 > 0 )
455                {
456                   zgemv_( TRANS, &Amp0, &Anq0, one,
457                           Mptr( Aptr, Akp, Akq,  Ald, size ),  &Ald,
458                           Mptr( XA,     0, Akq, XAld, size ), &XAld, one,
459                           Mptr( YA,   Akp,   0, YAld, size ), &ione );
460                }
461             }
462          }
463          else
464          {
465             for( k = 0; k < *N; k += nb )
466             {
467                kb = *N - k; ktmp = k + ( kb = MIN( kb, nb ) );
468                Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow );
469                Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol );
470                PB_Cptrm( type, type, LEFT, LOWER, &TranOp, &DiagA, kb, 1, one,
471                          Aptr, k, k, Ad0, Mptr( XA, Akp, 0, XAld, size ), XAld,
472                          Mptr( YA, 0, Akq, YAld, size ), YAld, PB_Ctztrmv );
473                Akp  = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow );
474                Amp0 = Amp - Akp;
475                Anq0 = PB_Cnumroc( kb,   k, Ainb1, Anb, mycol, Acol, npcol );
476                if( Amp0 > 0 && Anq0 > 0 )
477                {
478                   zgemv_( TRANS, &Amp0, &Anq0, one,
479                           Mptr( Aptr, Akp, Akq,  Ald, size ),  &Ald,
480                           Mptr( XA,   Akp,   0, XAld, size ), &ione, one,
481                           Mptr( YA,     0, Akq, YAld, size ), &YAld );
482                }
483             }
484          }
485       }
486    }
487    if( XAfr ) free( XA );
488 
489    if( notran )
490    {
491 /*
492 *  Combine the partial column results into YA
493 */
494       if( YAsum && ( Amp > 0 ) )
495       {
496          top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET );
497          Czgsum2d( ctxt, ROW, &top, Amp, 1, YA, YAd[LLD_], myrow,
498                    YAd[CSRC_] );
499       }
500 /*
501 *  sub( X ) := YA (if necessary)
502 */
503       if( YApbY )
504       {
505          PB_Cpaxpby( type, NOCONJG, *N, 1, one, YA, 0, 0, YAd, COLUMN, zero,
506                      ((char *) X), Xi, Xj, Xd, &Xroc );
507       }
508    }
509    else
510    {
511 /*
512 *  Combine the partial row results into YA
513 */
514       if( YAsum && ( Anq > 0 ) )
515       {
516          top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET );
517          Czgsum2d( ctxt, COLUMN, &top, 1, Anq, YA, YAd[LLD_], YAd[RSRC_],
518                    mycol );
519       }
520 /*
521 *  sub( X ) := YA (if necessary)
522 */
523       if( YApbY )
524       {
525          PB_Cpaxpby( type, NOCONJG, 1, *N, one, YA, 0, 0, YAd, ROW, zero,
526                      ((char *) X), Xi, Xj, Xd, &Xroc );
527       }
528    }
529    if( YAfr ) free( YA );
530 /*
531 *  End of PZTRMV
532 */
533 }
534