1 /* ---------------------------------------------------------------------
2 *
3 *  -- Mark R. Fahey
4 *     June 28, 2000
5 *
6 *  ---------------------------------------------------------------------
7 */
8 /*
9 *  Include files
10 */
11 #include "pblas.h"
12 
pcrot_(n,X,ix,jx,desc_X,incx,Y,iy,jy,desc_Y,incy,c,s)13 void pcrot_( n, X, ix, jx, desc_X, incx, Y, iy, jy, desc_Y, incy, c, s )
14 /*
15 *  Mark Fahey
16 *  June 22, 2000
17 */
18 /*
19 *  .. Scalar Arguments ..
20 */
21    int         * incx, * incy, * ix, * iy, * jx, * jy, * n;
22    float       * c;
23    complex     * s;
24 /*
25 *  ..
26 *  .. Array Arguments ..
27 */
28    int         desc_X[], desc_Y[];
29    complex     X[], Y[];
30 {
31 /*
32 *  Purpose
33 *  =======
34 *
35 *  PCROT applies a plane rotation, where the cos (C) is real and the
36 *  sin (S) is complex, and the vectors CX and CY are complex, i.e.,
37 *
38 *     [ sub( X ) ] := [  C          S  ] [ sub( X ) ]
39 *     [ sub( Y ) ] := [ -conjg(S)   C  ] [ sub( Y ) ]
40 *
41 *  where sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X,
42 *                         X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X,
43 *
44 *        sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y,
45 *                         Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y,
46 *
47 *  and where C*C + S*CONJG(S) = 1.0.
48 *
49 *  Notes
50 *  =====
51 *
52 *  Each global data object is described by an associated description
53 *  vector.  This vector stores the information required to establish
54 *  the mapping between an object element and its corresponding process
55 *  and memory location.
56 *
57 *  Let A be a generic term for any 2D block cyclicly distributed array.
58 *  Such a global array has an associated description vector DESCA.
59 *  In the following comments, the character _ should be read as
60 *  "of the global array".
61 *
62 *  NOTATION        STORED IN      EXPLANATION
63 *  --------------- -------------- --------------------------------------
64 *  DT_A   (global) descA[ DT_ ]   The descriptor type.  In this case,
65 *                                 DT_A = 1.
66 *  CTXT_A (global) descA[ CTXT_ ] The BLACS context handle, indicating
67 *                                 the BLACS process grid A is distribu-
68 *                                 ted over. The context itself is glo-
69 *                                 bal, but the handle (the integer
70 *                                 value) may vary.
71 *  M_A    (global) descA[ M_ ]    The number of rows in the global
72 *                                 array A.
73 *  N_A    (global) descA[ N_ ]    The number of columns in the global
74 *                                 array A.
75 *  MB_A   (global) descA[ MB_ ]   The blocking factor used to distribu-
76 *                                 te the rows of the array.
77 *  NB_A   (global) descA[ NB_ ]   The blocking factor used to distribu-
78 *                                 te the columns of the array.
79 *  RSRC_A (global) descA[ RSRC_ ] The process row over which the first
80 *                                 row of the array A is distributed.
81 *  CSRC_A (global) descA[ CSRC_ ] The process column over which the
82 *                                 first column of the array A is
83 *                                 distributed.
84 *  LLD_A  (local)  descA[ LLD_ ]  The leading dimension of the local
85 *                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
86 *
87 *  Let K be the number of rows or columns of a distributed matrix,
88 *  and assume that its process grid has dimension p x q.
89 *  LOCr( K ) denotes the number of elements of K that a process
90 *  would receive if K were distributed over the p processes of its
91 *  process column.
92 *  Similarly, LOCc( K ) denotes the number of elements of K that a
93 *  process would receive if K were distributed over the q processes of
94 *  its process row.
95 *  The values of LOCr() and LOCc() may be determined via a call to the
96 *  ScaLAPACK tool function, NUMROC:
97 *          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
98 *          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
99 *  An upper bound for these quantities may be computed by:
100 *          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
101 *          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
102 *
103 *  Because vectors may be seen as particular matrices, a distributed
104 *  vector is considered to be a distributed matrix.
105 *
106 *  If INCX = M_X and INCY = M_Y, NB_X must be equal to NB_Y, and the
107 *  process column having the first entries of sub( Y ) must also contain
108 *  the first entries of sub( X ). Moreover, the quantity
109 *  MOD( JX-1, NB_X ) must be equal to MOD( JY-1, NB_Y ).
110 *
111 *  If INCX = M_X, INCY = 1 and INCY <> M_Y, NB_X must be equal to MB_Y.
112 *  Moreover, the quantity MOD( JX-1, NB_X ) must be equal to
113 *  MOD( IY-1, MB_Y ).
114 *
115 *  If INCX = 1, INCX <> M_X and INCY = M_Y, MB_X must be equal to NB_Y.
116 *  Moreover, the quantity MOD( IX-1, MB_X ) must be equal to
117 *  MOD( JY-1, NB_Y ).
118 *
119 *  If INCX = 1, INCX <> M_X, INCY = 1 and INCY <> M_Y, MB_X must be
120 *  equal to MB_Y, and the process row having the first entries of
121 *  sub( Y ) must also contain the first entries of sub( X ). Moreover,
122 *  the quantity MOD( IX-1, MB_X ) must be equal to MOD( IY-1, MB_Y ).
123 *
124 *  Arguments
125 *  =========
126 *
127 *  N       (input) INTEGER
128 *          The number of elements in the vectors CX and CY.
129 *
130 *  X       (local input) COMPLEX array containing the local
131 *          pieces of a distributed matrix of dimension of at least
132 *              ( (JX-1)*M_X + IX + ( N - 1 )*abs( INCX ) )
133 *          This array contains the entries of the distributed vector
134 *          sub( X ).
135 *          On output, CX is overwritten with C*X + S*Y.
136 *
137 *  IX      (global input) pointer to INTEGER
138 *          The global row index of the submatrix of the distributed
139 *          matrix X to operate on.
140 *
141 *  JX      (global input) pointer to INTEGER
142 *          The global column index of the submatrix of the distributed
143 *          matrix X to operate on.
144 *
145 *  DESCX   (global and local input) INTEGER array of dimension 8.
146 *          The array descriptor of the distributed matrix X.
147 *
148 *  INCX    (global input) pointer to INTEGER
149 *          The global increment for the elements of X. Only two values
150 *          of INCX are supported in this version, namely 1 and M_X.
151 *
152 *  Y       (local input) COMPLEX array containing the local
153 *          pieces of a distributed matrix of dimension of at least
154 *              ( (JY-1)*M_Y + IY + ( N - 1 )*abs( INCY ) )
155 *          This array contains the entries of the distributed vector
156 *          sub( Y ).
157 *          On output, CY is overwritten with -CONJG(S)*X + C*Y.
158 *
159 *  IY      (global input) pointer to INTEGER
160 *          The global row index of the submatrix of the distributed
161 *          matrix Y to operate on.
162 *
163 *  JY      (global input) pointer to INTEGER
164 *          The global column index of the submatrix of the distributed
165 *          matrix Y to operate on.
166 *
167 *  DESCY   (global and local input) INTEGER array of dimension 8.
168 *          The array descriptor of the distributed matrix Y.
169 *
170 *  INCY    (global input) pointer to INTEGER
171 *          The global increment for the elements of Y. Only two values
172 *          of INCY are supported in this version, namely 1 and M_Y.
173 *
174 *  C       (input) pointer to FLOAT
175 *  S       (input) pointer COMPLEX
176 *          C and S define a rotation
177 *             [  C          S  ]
178 *             [ -conjg(S)   C  ]
179 *          where C*C + S*CONJG(S) = 1.0.
180 *
181 * =====================================================================
182 *
183 *  .. Local Scalars ..
184 */
185    int         ictxt, iix, iiy, info, ixcol, ixrow, iycol, iyrow, jjx,
186                jjy, lcm, lcmp, mycol, myrow, nn, np, np0,
187                nprow, npcol, nq, nz, ione=1, tmp1, wksz;
188    complex     xwork[1], ywork[1], zero;
189 /* ..
190 *  .. PBLAS Buffer ..
191 */
192    complex     * buff;
193 /* ..
194 *  .. External Functions ..
195 */
196    void        blacs_gridinfo_();
197    void        cgerv2d_();
198    void        cgesd2d_();
199    void        pbchkvect();
200    void        PB_Cabort();
201    char        * getpbbuf();
202    F_INTG_FCT  pbctrnv_();
203    F_INTG_FCT  crot_();
204    F_INTG_FCT  ilcm_();
205 /* ..
206 *  .. Executable Statements ..
207 *
208 *  Get grid parameters
209 */
210    ictxt = desc_X[CTXT_];
211    blacs_gridinfo_( &ictxt, &nprow, &npcol, &myrow, &mycol );
212 /*
213 *  Test the input parameters
214 */
215    info = 0;
216    if( nprow == -1 )
217       info = -(500+CTXT_+1);
218    else
219    {
220       pbchkvect( *n, 1, *ix, *jx, desc_X, *incx, 5, &iix, &jjx,
221                  &ixrow, &ixcol, nprow, npcol, myrow, mycol, &info );
222       pbchkvect( *n, 1, *iy, *jy, desc_Y, *incy, 10, &iiy, &jjy,
223                  &iyrow, &iycol, nprow, npcol, myrow, mycol, &info );
224 
225       if( info == 0 )
226       {
227          if( *n != 1 )
228          {
229             if( *incx == desc_X[M_] )
230             {                 /* X is distributed along a process row */
231                if( *incy == desc_Y[M_] )
232                {               /* Y is distributed over a process row */
233                   if( ( ixcol != iycol ) ||
234                       ( ( (*jx-1) % desc_X[NB_] ) !=
235                         ( (*jy-1) % desc_Y[NB_] ) ) )
236                      info = -9;
237                   else if( desc_Y[NB_] != desc_X[NB_] )
238                      info = -(1000+NB_+1);
239                }
240                else if( ( *incy == 1 ) && ( *incy != desc_Y[M_] ) )
241                {            /* Y is distributed over a process column */
242                   if( ( (*jx-1) % desc_X[NB_] ) != ( (*iy-1) % desc_Y[MB_] ) )
243                      info = -8;
244                   else if( desc_Y[MB_] != desc_X[NB_] )
245                      info = -(1000+MB_+1);
246                }
247                else
248                {
249                   info = -11;
250                }
251             }
252             else if( ( *incx == 1 ) && ( *incx != desc_X[M_] ) )
253             {              /* X is distributed along a process column */
254                if( *incy == desc_Y[M_] )
255                {                  /* Y is distributed over a process row */
256                   if( ( (*ix-1) % desc_X[MB_] ) != ( (*jy-1) % desc_Y[NB_] ) )
257                      info = -9;
258                   else if( desc_Y[NB_] != desc_X[MB_] )
259                      info = -(1000+NB_+1);
260                }
261                else if( ( *incy == 1 ) && ( *incy != desc_Y[M_] ) )
262                {            /* Y is distributed over a process column */
263                   if( ( ixrow != iyrow ) ||
264                       ( ( (*ix-1) % desc_X[MB_] ) !=
265                         ( (*iy-1) % desc_Y[MB_] ) ) )
266                      info = -8;
267                   else if( desc_Y[MB_] != desc_X[MB_] )
268                      info = -(1000+MB_+1);
269                }
270                else
271                {
272                   info = -11;
273                }
274             }
275             else
276             {
277                info = -6;
278             }
279          }
280          if( ictxt != desc_Y[CTXT_] )
281             info = -(1000+CTXT_+1);
282       }
283    }
284    if( info ) { PB_Cabort( ictxt, "PCROT", info ); return; }
285 /*
286    if( info )
287    {
288       pberror_( &ictxt, "PCROT", &info );
289       return;
290    }
291 */
292 /*
293 *  Quick return if possible.
294 */
295    zero.re  = ZERO;
296    zero.im  = ZERO;
297    if( *n == 0 ) return;
298 /*
299 *  rotation
300 */
301    if( *n == 1 )
302    {
303       if( ( myrow == ixrow ) && ( mycol == ixcol ) )
304       {
305          buff = &X[iix-1+(jjx-1)*desc_X[LLD_]];
306          if( ( myrow != iyrow ) || ( mycol != iycol ) )
307          {
308             cgesd2d_( &ictxt, n, n, buff, n, &iyrow, &iycol );
309             cgerv2d_( &ictxt, n, n, ywork, n, &iyrow, &iycol );
310          }
311          else
312             *ywork = Y[iiy-1+(jjy-1)*desc_Y[LLD_]];
313          crot_( n, buff, n, ywork, n, c, s );
314          X[iix-1+(jjx-1)*desc_X[LLD_]] = *buff;
315          if( ( myrow == iyrow ) && ( mycol == iycol ) )
316             Y[iiy-1+(jjy-1)*desc_Y[LLD_]] = *ywork;
317       }
318       else if( ( myrow == iyrow ) && ( mycol == iycol ) )
319       {
320          cgesd2d_( &ictxt, n, n, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], n,
321                    &ixrow, &ixcol );
322          cgerv2d_( &ictxt, n, n, xwork, n, &ixrow, &ixcol );
323          crot_( n, xwork, n, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], n, c, s );
324       }
325       return;
326    }
327 
328    if( ( *incx == desc_X[M_] ) && ( *incy == desc_Y[M_] ) )
329    {               /* X and Y are both distributed over a process row */
330       nz = (*jx-1) % desc_Y[NB_];
331       nn = *n + nz;
332       nq = numroc_( &nn, &desc_X[NB_], &mycol, &ixcol, &npcol );
333       if( mycol == ixcol )
334          nq -= nz;
335       if( ixrow == iyrow )
336       {
337          if( myrow == ixrow )
338          {
339             crot_( &nq, &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_],
340                         &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_], c, s );
341          }
342       }
343       else
344       {
345          if( myrow == ixrow )
346          {
347             cgesd2d_( &ictxt, &ione, &nq,
348                       &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_],
349                       &iyrow, &mycol );
350             buff = (complex *)getpbbuf( "PCROT", nq*sizeof(complex) );
351             cgerv2d_( &ictxt, &nq, &ione, buff, &nq, &iyrow, &mycol );
352             crot_( &nq, &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_],
353                         buff, &ione, c, s );
354          }
355          else if( myrow == iyrow )
356          {
357             cgesd2d_( &ictxt, &ione, &nq,
358                       &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_],
359                       &ixrow, &mycol );
360             buff = (complex *)getpbbuf( "PCROT", nq*sizeof(complex) );
361             cgerv2d_( &ictxt, &nq, &ione, buff, &nq, &ixrow, &mycol );
362             crot_( &nq, buff, &ione,
363                         &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_], c, s );
364          }
365       }
366    }
367    else if( ( *incx == 1 ) && ( *incx != desc_X[M_] ) &&
368             ( *incy == 1 ) && ( *incy != desc_Y[M_] ) )
369    {            /* X and Y are both distributed over a process column */
370       nz = (*ix-1) % desc_X[MB_];
371       nn = *n + nz;
372       np = numroc_( &nn, &desc_X[MB_], &myrow, &ixrow, &nprow );
373       if( myrow == ixrow )
374          np -= nz;
375       if( ixcol == iycol )
376       {
377          if( mycol == ixcol )
378          {
379             crot_( &np, &X[iix-1+(jjx-1)*desc_X[LLD_]], incx,
380                         &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], incy, c, s );
381          }
382       }
383       else
384       {
385          if( mycol == ixcol )
386          {
387             cgesd2d_( &ictxt, &np, &ione,
388                       &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_],
389                       &myrow, &iycol );
390             buff = (complex *)getpbbuf( "PCROT", np*sizeof(complex) );
391             cgerv2d_( &ictxt, &np, &ione, buff, &np, &myrow, &iycol );
392             crot_( &np, &X[iix-1+(jjx-1)*desc_X[LLD_]], incx,
393                         buff, &ione, c, s );
394          }
395          else if( mycol == iycol )
396          {
397             cgesd2d_( &ictxt, &np, &ione,
398                       &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_],
399                       &myrow, &ixcol );
400             buff = (complex *)getpbbuf( "PCROT", np*sizeof(complex) );
401             cgerv2d_( &ictxt, &np, &ione, buff, &np, &myrow, &ixcol );
402             crot_( &np, buff, &ione,
403                         &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], incy, c, s );
404          }
405       }
406    }
407    else       /* X and Y are not distributed along the same direction */
408    {
409       lcm = ilcm_( &nprow, &npcol );
410       if( ( *incx == 1 ) && ( *incx != desc_X[M_] ) )
411       {                     /* X is distributed over a process column */
412          lcmp = lcm / nprow;
413          nz = (*jy-1) % desc_Y[NB_];
414          nn = *n + nz;
415          tmp1 = nn / desc_Y[MB_];
416          np = numroc_( &nn, &desc_X[MB_], &myrow, &ixrow, &nprow );
417          np0 = MYROC0( tmp1, nn, desc_X[MB_], nprow );
418          tmp1 = np0 / desc_X[MB_];
419          wksz = MYROC0( tmp1, np0, desc_X[MB_], lcmp );
420          wksz = np + wksz;
421 
422          buff = (complex *)getpbbuf( "PCROT", wksz*sizeof(complex) );
423 
424          if( mycol == iycol )
425             jjy -= nz;
426          if( myrow == ixrow )
427             np -= nz;
428          pbctrnv_( &ictxt, C2F_CHAR( "R" ), C2F_CHAR( "T" ), n,
429                    &desc_Y[NB_], &nz, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]],
430                    &desc_Y[LLD_], &zero, buff, &ione, &iyrow, &iycol,
431                    &ixrow, &ixcol, buff+np );
432          if( mycol == ixcol )
433          {
434             crot_( &np, &X[iix-1+(jjx-1)*desc_X[LLD_]],
435                      incx, buff, &ione, c, s );
436          }
437          pbctrnv_( &ictxt, C2F_CHAR( "R" ), C2F_CHAR( "T" ), n,
438                    &desc_Y[NB_], &nz, buff, &ione, &zero,
439                    &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_],
440                    &ixrow, &ixcol,  &iyrow, &iycol, buff+np );
441       }
442       else                  /* Y is distributed over a process column */
443       {
444          lcmp = lcm / nprow;
445          nz = (*jx-1) % desc_X[NB_];
446          nn = *n + nz;
447          tmp1 = nn / desc_X[MB_];
448          np = numroc_( &nn, desc_Y+MB_, &myrow, &iyrow, &nprow );
449          np0 = MYROC0( tmp1, nn, desc_Y[MB_], nprow );
450          tmp1 = np0 / desc_Y[MB_];
451          wksz = MYROC0( tmp1, np0, desc_Y[MB_], lcmp );
452          wksz = np + wksz;
453 
454          buff = (complex *)getpbbuf( "PCROT", wksz*sizeof(complex) );
455 
456          if( myrow == iyrow )
457             np -= nz;
458          pbctrnv_( &ictxt, C2F_CHAR( "R" ), C2F_CHAR( "T" ), n,
459                    &desc_X[NB_], &nz, &X[iix-1+(jjx-1)*desc_X[LLD_]],
460                    &desc_X[LLD_], &zero, buff, &ione, &ixrow, &ixcol,
461                    &iyrow, &iycol, buff+np );
462          if( mycol == iycol )
463          {
464             crot_( &np, buff, &ione,
465                      &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], incy, c, s );
466          }
467          pbctrnv_( &ictxt, C2F_CHAR( "R" ), C2F_CHAR( "T" ), n,
468                    &desc_X[NB_], &nz, buff, &ione, &zero,
469                    &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_],
470                    &iyrow, &iycol, &ixrow, &ixcol, buff+np );
471       }
472    }
473 }
474