1 /* ---------------------------------------------------------------------
2 *
3 * -- Mark R. Fahey
4 * June 28, 2000
5 *
6 * ---------------------------------------------------------------------
7 */
8 /*
9 * Include files
10 */
11 #include "pblas.h"
12
pcrot_(n,X,ix,jx,desc_X,incx,Y,iy,jy,desc_Y,incy,c,s)13 void pcrot_( n, X, ix, jx, desc_X, incx, Y, iy, jy, desc_Y, incy, c, s )
14 /*
15 * Mark Fahey
16 * June 22, 2000
17 */
18 /*
19 * .. Scalar Arguments ..
20 */
21 int * incx, * incy, * ix, * iy, * jx, * jy, * n;
22 float * c;
23 complex * s;
24 /*
25 * ..
26 * .. Array Arguments ..
27 */
28 int desc_X[], desc_Y[];
29 complex X[], Y[];
30 {
31 /*
32 * Purpose
33 * =======
34 *
35 * PCROT applies a plane rotation, where the cos (C) is real and the
36 * sin (S) is complex, and the vectors CX and CY are complex, i.e.,
37 *
38 * [ sub( X ) ] := [ C S ] [ sub( X ) ]
39 * [ sub( Y ) ] := [ -conjg(S) C ] [ sub( Y ) ]
40 *
41 * where sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X,
42 * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X,
43 *
44 * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y,
45 * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y,
46 *
47 * and where C*C + S*CONJG(S) = 1.0.
48 *
49 * Notes
50 * =====
51 *
52 * Each global data object is described by an associated description
53 * vector. This vector stores the information required to establish
54 * the mapping between an object element and its corresponding process
55 * and memory location.
56 *
57 * Let A be a generic term for any 2D block cyclicly distributed array.
58 * Such a global array has an associated description vector DESCA.
59 * In the following comments, the character _ should be read as
60 * "of the global array".
61 *
62 * NOTATION STORED IN EXPLANATION
63 * --------------- -------------- --------------------------------------
64 * DT_A (global) descA[ DT_ ] The descriptor type. In this case,
65 * DT_A = 1.
66 * CTXT_A (global) descA[ CTXT_ ] The BLACS context handle, indicating
67 * the BLACS process grid A is distribu-
68 * ted over. The context itself is glo-
69 * bal, but the handle (the integer
70 * value) may vary.
71 * M_A (global) descA[ M_ ] The number of rows in the global
72 * array A.
73 * N_A (global) descA[ N_ ] The number of columns in the global
74 * array A.
75 * MB_A (global) descA[ MB_ ] The blocking factor used to distribu-
76 * te the rows of the array.
77 * NB_A (global) descA[ NB_ ] The blocking factor used to distribu-
78 * te the columns of the array.
79 * RSRC_A (global) descA[ RSRC_ ] The process row over which the first
80 * row of the array A is distributed.
81 * CSRC_A (global) descA[ CSRC_ ] The process column over which the
82 * first column of the array A is
83 * distributed.
84 * LLD_A (local) descA[ LLD_ ] The leading dimension of the local
85 * array. LLD_A >= MAX(1,LOCr(M_A)).
86 *
87 * Let K be the number of rows or columns of a distributed matrix,
88 * and assume that its process grid has dimension p x q.
89 * LOCr( K ) denotes the number of elements of K that a process
90 * would receive if K were distributed over the p processes of its
91 * process column.
92 * Similarly, LOCc( K ) denotes the number of elements of K that a
93 * process would receive if K were distributed over the q processes of
94 * its process row.
95 * The values of LOCr() and LOCc() may be determined via a call to the
96 * ScaLAPACK tool function, NUMROC:
97 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
98 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
99 * An upper bound for these quantities may be computed by:
100 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
101 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
102 *
103 * Because vectors may be seen as particular matrices, a distributed
104 * vector is considered to be a distributed matrix.
105 *
106 * If INCX = M_X and INCY = M_Y, NB_X must be equal to NB_Y, and the
107 * process column having the first entries of sub( Y ) must also contain
108 * the first entries of sub( X ). Moreover, the quantity
109 * MOD( JX-1, NB_X ) must be equal to MOD( JY-1, NB_Y ).
110 *
111 * If INCX = M_X, INCY = 1 and INCY <> M_Y, NB_X must be equal to MB_Y.
112 * Moreover, the quantity MOD( JX-1, NB_X ) must be equal to
113 * MOD( IY-1, MB_Y ).
114 *
115 * If INCX = 1, INCX <> M_X and INCY = M_Y, MB_X must be equal to NB_Y.
116 * Moreover, the quantity MOD( IX-1, MB_X ) must be equal to
117 * MOD( JY-1, NB_Y ).
118 *
119 * If INCX = 1, INCX <> M_X, INCY = 1 and INCY <> M_Y, MB_X must be
120 * equal to MB_Y, and the process row having the first entries of
121 * sub( Y ) must also contain the first entries of sub( X ). Moreover,
122 * the quantity MOD( IX-1, MB_X ) must be equal to MOD( IY-1, MB_Y ).
123 *
124 * Arguments
125 * =========
126 *
127 * N (input) INTEGER
128 * The number of elements in the vectors CX and CY.
129 *
130 * X (local input) COMPLEX array containing the local
131 * pieces of a distributed matrix of dimension of at least
132 * ( (JX-1)*M_X + IX + ( N - 1 )*abs( INCX ) )
133 * This array contains the entries of the distributed vector
134 * sub( X ).
135 * On output, CX is overwritten with C*X + S*Y.
136 *
137 * IX (global input) pointer to INTEGER
138 * The global row index of the submatrix of the distributed
139 * matrix X to operate on.
140 *
141 * JX (global input) pointer to INTEGER
142 * The global column index of the submatrix of the distributed
143 * matrix X to operate on.
144 *
145 * DESCX (global and local input) INTEGER array of dimension 8.
146 * The array descriptor of the distributed matrix X.
147 *
148 * INCX (global input) pointer to INTEGER
149 * The global increment for the elements of X. Only two values
150 * of INCX are supported in this version, namely 1 and M_X.
151 *
152 * Y (local input) COMPLEX array containing the local
153 * pieces of a distributed matrix of dimension of at least
154 * ( (JY-1)*M_Y + IY + ( N - 1 )*abs( INCY ) )
155 * This array contains the entries of the distributed vector
156 * sub( Y ).
157 * On output, CY is overwritten with -CONJG(S)*X + C*Y.
158 *
159 * IY (global input) pointer to INTEGER
160 * The global row index of the submatrix of the distributed
161 * matrix Y to operate on.
162 *
163 * JY (global input) pointer to INTEGER
164 * The global column index of the submatrix of the distributed
165 * matrix Y to operate on.
166 *
167 * DESCY (global and local input) INTEGER array of dimension 8.
168 * The array descriptor of the distributed matrix Y.
169 *
170 * INCY (global input) pointer to INTEGER
171 * The global increment for the elements of Y. Only two values
172 * of INCY are supported in this version, namely 1 and M_Y.
173 *
174 * C (input) pointer to FLOAT
175 * S (input) pointer COMPLEX
176 * C and S define a rotation
177 * [ C S ]
178 * [ -conjg(S) C ]
179 * where C*C + S*CONJG(S) = 1.0.
180 *
181 * =====================================================================
182 *
183 * .. Local Scalars ..
184 */
185 int ictxt, iix, iiy, info, ixcol, ixrow, iycol, iyrow, jjx,
186 jjy, lcm, lcmp, mycol, myrow, nn, np, np0,
187 nprow, npcol, nq, nz, ione=1, tmp1, wksz;
188 complex xwork[1], ywork[1], zero;
189 /* ..
190 * .. PBLAS Buffer ..
191 */
192 complex * buff;
193 /* ..
194 * .. External Functions ..
195 */
196 void blacs_gridinfo_();
197 void cgerv2d_();
198 void cgesd2d_();
199 void pbchkvect();
200 void PB_Cabort();
201 char * getpbbuf();
202 F_INTG_FCT pbctrnv_();
203 F_INTG_FCT crot_();
204 F_INTG_FCT ilcm_();
205 /* ..
206 * .. Executable Statements ..
207 *
208 * Get grid parameters
209 */
210 ictxt = desc_X[CTXT_];
211 blacs_gridinfo_( &ictxt, &nprow, &npcol, &myrow, &mycol );
212 /*
213 * Test the input parameters
214 */
215 info = 0;
216 if( nprow == -1 )
217 info = -(500+CTXT_+1);
218 else
219 {
220 pbchkvect( *n, 1, *ix, *jx, desc_X, *incx, 5, &iix, &jjx,
221 &ixrow, &ixcol, nprow, npcol, myrow, mycol, &info );
222 pbchkvect( *n, 1, *iy, *jy, desc_Y, *incy, 10, &iiy, &jjy,
223 &iyrow, &iycol, nprow, npcol, myrow, mycol, &info );
224
225 if( info == 0 )
226 {
227 if( *n != 1 )
228 {
229 if( *incx == desc_X[M_] )
230 { /* X is distributed along a process row */
231 if( *incy == desc_Y[M_] )
232 { /* Y is distributed over a process row */
233 if( ( ixcol != iycol ) ||
234 ( ( (*jx-1) % desc_X[NB_] ) !=
235 ( (*jy-1) % desc_Y[NB_] ) ) )
236 info = -9;
237 else if( desc_Y[NB_] != desc_X[NB_] )
238 info = -(1000+NB_+1);
239 }
240 else if( ( *incy == 1 ) && ( *incy != desc_Y[M_] ) )
241 { /* Y is distributed over a process column */
242 if( ( (*jx-1) % desc_X[NB_] ) != ( (*iy-1) % desc_Y[MB_] ) )
243 info = -8;
244 else if( desc_Y[MB_] != desc_X[NB_] )
245 info = -(1000+MB_+1);
246 }
247 else
248 {
249 info = -11;
250 }
251 }
252 else if( ( *incx == 1 ) && ( *incx != desc_X[M_] ) )
253 { /* X is distributed along a process column */
254 if( *incy == desc_Y[M_] )
255 { /* Y is distributed over a process row */
256 if( ( (*ix-1) % desc_X[MB_] ) != ( (*jy-1) % desc_Y[NB_] ) )
257 info = -9;
258 else if( desc_Y[NB_] != desc_X[MB_] )
259 info = -(1000+NB_+1);
260 }
261 else if( ( *incy == 1 ) && ( *incy != desc_Y[M_] ) )
262 { /* Y is distributed over a process column */
263 if( ( ixrow != iyrow ) ||
264 ( ( (*ix-1) % desc_X[MB_] ) !=
265 ( (*iy-1) % desc_Y[MB_] ) ) )
266 info = -8;
267 else if( desc_Y[MB_] != desc_X[MB_] )
268 info = -(1000+MB_+1);
269 }
270 else
271 {
272 info = -11;
273 }
274 }
275 else
276 {
277 info = -6;
278 }
279 }
280 if( ictxt != desc_Y[CTXT_] )
281 info = -(1000+CTXT_+1);
282 }
283 }
284 if( info ) { PB_Cabort( ictxt, "PCROT", info ); return; }
285 /*
286 if( info )
287 {
288 pberror_( &ictxt, "PCROT", &info );
289 return;
290 }
291 */
292 /*
293 * Quick return if possible.
294 */
295 zero.re = ZERO;
296 zero.im = ZERO;
297 if( *n == 0 ) return;
298 /*
299 * rotation
300 */
301 if( *n == 1 )
302 {
303 if( ( myrow == ixrow ) && ( mycol == ixcol ) )
304 {
305 buff = &X[iix-1+(jjx-1)*desc_X[LLD_]];
306 if( ( myrow != iyrow ) || ( mycol != iycol ) )
307 {
308 cgesd2d_( &ictxt, n, n, buff, n, &iyrow, &iycol );
309 cgerv2d_( &ictxt, n, n, ywork, n, &iyrow, &iycol );
310 }
311 else
312 *ywork = Y[iiy-1+(jjy-1)*desc_Y[LLD_]];
313 crot_( n, buff, n, ywork, n, c, s );
314 X[iix-1+(jjx-1)*desc_X[LLD_]] = *buff;
315 if( ( myrow == iyrow ) && ( mycol == iycol ) )
316 Y[iiy-1+(jjy-1)*desc_Y[LLD_]] = *ywork;
317 }
318 else if( ( myrow == iyrow ) && ( mycol == iycol ) )
319 {
320 cgesd2d_( &ictxt, n, n, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], n,
321 &ixrow, &ixcol );
322 cgerv2d_( &ictxt, n, n, xwork, n, &ixrow, &ixcol );
323 crot_( n, xwork, n, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], n, c, s );
324 }
325 return;
326 }
327
328 if( ( *incx == desc_X[M_] ) && ( *incy == desc_Y[M_] ) )
329 { /* X and Y are both distributed over a process row */
330 nz = (*jx-1) % desc_Y[NB_];
331 nn = *n + nz;
332 nq = numroc_( &nn, &desc_X[NB_], &mycol, &ixcol, &npcol );
333 if( mycol == ixcol )
334 nq -= nz;
335 if( ixrow == iyrow )
336 {
337 if( myrow == ixrow )
338 {
339 crot_( &nq, &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_],
340 &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_], c, s );
341 }
342 }
343 else
344 {
345 if( myrow == ixrow )
346 {
347 cgesd2d_( &ictxt, &ione, &nq,
348 &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_],
349 &iyrow, &mycol );
350 buff = (complex *)getpbbuf( "PCROT", nq*sizeof(complex) );
351 cgerv2d_( &ictxt, &nq, &ione, buff, &nq, &iyrow, &mycol );
352 crot_( &nq, &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_],
353 buff, &ione, c, s );
354 }
355 else if( myrow == iyrow )
356 {
357 cgesd2d_( &ictxt, &ione, &nq,
358 &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_],
359 &ixrow, &mycol );
360 buff = (complex *)getpbbuf( "PCROT", nq*sizeof(complex) );
361 cgerv2d_( &ictxt, &nq, &ione, buff, &nq, &ixrow, &mycol );
362 crot_( &nq, buff, &ione,
363 &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_], c, s );
364 }
365 }
366 }
367 else if( ( *incx == 1 ) && ( *incx != desc_X[M_] ) &&
368 ( *incy == 1 ) && ( *incy != desc_Y[M_] ) )
369 { /* X and Y are both distributed over a process column */
370 nz = (*ix-1) % desc_X[MB_];
371 nn = *n + nz;
372 np = numroc_( &nn, &desc_X[MB_], &myrow, &ixrow, &nprow );
373 if( myrow == ixrow )
374 np -= nz;
375 if( ixcol == iycol )
376 {
377 if( mycol == ixcol )
378 {
379 crot_( &np, &X[iix-1+(jjx-1)*desc_X[LLD_]], incx,
380 &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], incy, c, s );
381 }
382 }
383 else
384 {
385 if( mycol == ixcol )
386 {
387 cgesd2d_( &ictxt, &np, &ione,
388 &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_],
389 &myrow, &iycol );
390 buff = (complex *)getpbbuf( "PCROT", np*sizeof(complex) );
391 cgerv2d_( &ictxt, &np, &ione, buff, &np, &myrow, &iycol );
392 crot_( &np, &X[iix-1+(jjx-1)*desc_X[LLD_]], incx,
393 buff, &ione, c, s );
394 }
395 else if( mycol == iycol )
396 {
397 cgesd2d_( &ictxt, &np, &ione,
398 &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_],
399 &myrow, &ixcol );
400 buff = (complex *)getpbbuf( "PCROT", np*sizeof(complex) );
401 cgerv2d_( &ictxt, &np, &ione, buff, &np, &myrow, &ixcol );
402 crot_( &np, buff, &ione,
403 &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], incy, c, s );
404 }
405 }
406 }
407 else /* X and Y are not distributed along the same direction */
408 {
409 lcm = ilcm_( &nprow, &npcol );
410 if( ( *incx == 1 ) && ( *incx != desc_X[M_] ) )
411 { /* X is distributed over a process column */
412 lcmp = lcm / nprow;
413 nz = (*jy-1) % desc_Y[NB_];
414 nn = *n + nz;
415 tmp1 = nn / desc_Y[MB_];
416 np = numroc_( &nn, &desc_X[MB_], &myrow, &ixrow, &nprow );
417 np0 = MYROC0( tmp1, nn, desc_X[MB_], nprow );
418 tmp1 = np0 / desc_X[MB_];
419 wksz = MYROC0( tmp1, np0, desc_X[MB_], lcmp );
420 wksz = np + wksz;
421
422 buff = (complex *)getpbbuf( "PCROT", wksz*sizeof(complex) );
423
424 if( mycol == iycol )
425 jjy -= nz;
426 if( myrow == ixrow )
427 np -= nz;
428 pbctrnv_( &ictxt, C2F_CHAR( "R" ), C2F_CHAR( "T" ), n,
429 &desc_Y[NB_], &nz, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]],
430 &desc_Y[LLD_], &zero, buff, &ione, &iyrow, &iycol,
431 &ixrow, &ixcol, buff+np );
432 if( mycol == ixcol )
433 {
434 crot_( &np, &X[iix-1+(jjx-1)*desc_X[LLD_]],
435 incx, buff, &ione, c, s );
436 }
437 pbctrnv_( &ictxt, C2F_CHAR( "R" ), C2F_CHAR( "T" ), n,
438 &desc_Y[NB_], &nz, buff, &ione, &zero,
439 &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_],
440 &ixrow, &ixcol, &iyrow, &iycol, buff+np );
441 }
442 else /* Y is distributed over a process column */
443 {
444 lcmp = lcm / nprow;
445 nz = (*jx-1) % desc_X[NB_];
446 nn = *n + nz;
447 tmp1 = nn / desc_X[MB_];
448 np = numroc_( &nn, desc_Y+MB_, &myrow, &iyrow, &nprow );
449 np0 = MYROC0( tmp1, nn, desc_Y[MB_], nprow );
450 tmp1 = np0 / desc_Y[MB_];
451 wksz = MYROC0( tmp1, np0, desc_Y[MB_], lcmp );
452 wksz = np + wksz;
453
454 buff = (complex *)getpbbuf( "PCROT", wksz*sizeof(complex) );
455
456 if( myrow == iyrow )
457 np -= nz;
458 pbctrnv_( &ictxt, C2F_CHAR( "R" ), C2F_CHAR( "T" ), n,
459 &desc_X[NB_], &nz, &X[iix-1+(jjx-1)*desc_X[LLD_]],
460 &desc_X[LLD_], &zero, buff, &ione, &ixrow, &ixcol,
461 &iyrow, &iycol, buff+np );
462 if( mycol == iycol )
463 {
464 crot_( &np, buff, &ione,
465 &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], incy, c, s );
466 }
467 pbctrnv_( &ictxt, C2F_CHAR( "R" ), C2F_CHAR( "T" ), n,
468 &desc_X[NB_], &nz, buff, &ione, &zero,
469 &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_],
470 &iyrow, &iycol, &ixrow, &ixcol, buff+np );
471 }
472 }
473 }
474