1 /*
2  $Id$
3  *======================================================================
4  *
5  * DISCLAIMER
6  *
7  * This material was prepared as an account of work sponsored by an
8  * agency of the United States Government.  Neither the United States
9  * Government nor the United States Department of Energy, nor Battelle,
10  * nor any of their employees, MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
11  * ASSUMES ANY LEGAL LIABILITY OR RESPONSIBILITY FOR THE ACCURACY,
12  * COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, APPARATUS, PRODUCT,
13  * SOFTWARE, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT
14  * INFRINGE PRIVATELY OWNED RIGHTS.
15  *
16  * ACKNOWLEDGMENT
17  *
18  * This software and its documentation were produced with Government
19  * support under Contract Number DE-AC06-76RLO-1830 awarded by the United
20  * States Department of Energy.  The Government retains a paid-up
21  * non-exclusive, irrevocable worldwide license to reproduce, prepare
22  * derivative works, perform publicly and display publicly by or for the
23  * Government, including the right to distribute to other Government
24  * contractors.
25  *
26  *======================================================================
27  *
28  *  -- PEIGS  routine (version 2.1) --
29  *     Pacific Northwest Laboratory
30  *     July 28, 1995
31  *
32  *======================================================================
33  */
34 #include <stdio.h>
35 #include <memory.h>
36 #include <math.h>
37 
38 #include "globalp.c.h"
39 
40 #define max(a,b) ((a) > (b) ? (a) : (b))
41 #define min(a,b) ((a) < (b) ? (a) : (b))
42 
mxm5x(n,rowU,mapU,m,colF,mapF,iscratch,scratch)43 void mxm5x( n, rowU, mapU, m, colF, mapF, iscratch, scratch)
44      Integer *n, *mapU, *mapF,  *m, *iscratch;
45      DoublePrecision **colF, **rowU, *scratch;
46 {
47 
48 /**************************************************************
49  *
50  * Subroutine mxm5x
51  *
52    This subroutine does the matrix multiplication F <- U * F
53 
54    where U is a n x n upper trianguler matrix in packed storage format,
55    and row distributed,
56    and matrix F is a general n x m matrix distributed by columns.
57 
58    ARGUMENTS
59    ---------
60    In the following:
61 
62      INTEGER          = "pointer to Integer"
63      DOUBLE PRECISION = "pointer to DoublePrecision"
64 
65      me     = this processor's id (= mxmynd_())
66      nprocs = number of allocated processors ( = mxnprc_())
67      nvecsU = number of entries in mapU equal to me
68               (= count_list( me, mapU, n ))
69      neleU_max = maximum number of elements of U owned by any
70                  processor.
71                  (= max over i of ci_size_( i, n, mapU) )
72      nvecsF = number of entries in mapF equal to me
73               (= count_list( me, mapF, m ))
74      sDP    = sizeof( DoublePrecision )
75 
76    n ...... (input) INTEGER
77             The number of rows and columns of U, and the number
78             of rows of F.
79 
80    rowU ... (input) array of pointers to DoublePrecision,
81                     length (nvecsU)
82             The part of matrix U owned by this processer stored
83             in packed format, i.e., rowU[i] points to the diagonal
84             element of the i-th row of U
85             owned by this processor, i = 0 to nvecsU-1.
86 
87    mapU ... (input) INTEGER array, length (n)
88             The i-th row of U is
89             owned by processor mapU[i], i = 0 to n-1.
90 
91    m ...... (input) INTEGER
92             m is the number of columns in F.
93 
94    colF ... (input/output) array of pointers to DoublePrecision,
95                            length (nvecsF)
96 
97             On Entry:
98               The part of matrix F owned by this processer stored
99               in packed format by columns, i.e., colF[i] points
100               to the first element of the i-th column of F
101               owned by this processor, i = 0 to nvecsF-1.
102 
103             On Exit:
104               The result matrix U * F stored in the same manner as
105               F on entry.
106 
107    mapF ... (input) INTEGER array, length (m)
108             The i-th column of F is
109             owned by processor mapF[i], i = 0 to m-1.
110 
111    iscratch .. (workspace) INTEGER array, length ( 3*n+2*m +1 )
112 
113    scratch.... (workspace) DOUBLE PRECISION array,
114                         length ( maximum ( n+m, mxlbuf_()/sDP + 1,
115                                            (nvecsF * n + 2 * neleU_max )
116 */
117   static Integer ONE = 1;
118   Integer jndx;
119   Integer i, k, me, isize, *iptr, indx;
120   Integer nvecsU, nvecsF, ii;
121   Integer j, linfo, ll, mm, nproc;
122   Integer osize, rsize, idummy;		/* */
123 
124   Integer *mapvecF, *mapvecU;
125   Integer *iscrat, *proclist;
126   Integer *mapvec_in, me_indx, last_proc, next_proc, maxsz;
127 
128   DoublePrecision *buffer, *d_ptr, *in_buffer, *out_buffer;
129   DoublePrecision *F_ptr, *U_ptr, t;
130 
131 
132 
133   /*
134     blas calls
135     */
136 
137   extern void dcopy_(), daxpy_();
138   extern DoublePrecision ddot_();
139 
140   extern Integer mxmynd_();
141   extern void xerbla_();
142   extern void mapdif1_();
143   extern void g_exit_();
144   extern void pxerbla2_();
145   extern Integer fil_mapvec_();
146   extern void chol_pipe_bcast();
147   extern void gshellsort_();
148   extern Integer indxL();
149   extern void fil_dbl_lst();
150 
151 
152   /*
153     mxsubs calls
154     */
155 
156   extern Integer mxwrit_(),  mxread_();
157   extern Integer count_list();
158   extern Integer fil_mapvec_();
159   extern Integer ci_size_();
160   extern Integer reduce_list2();
161   extern Integer peigs_cmod_();
162 
163   i = 0;
164   me = mxmynd_();
165 
166 #ifdef DEBUG1
167   fprintf(stderr, " Entering mxm5x ********************* me = %d \n ", me );
168 #endif
169 
170   if ( m == NULL )
171     i = -4;
172   else
173     if ( n == NULL ) {
174       i = -1;
175       xerbla_( "MXM5 ", &i);
176     }
177     else if ( *n < 1 )
178       i = -1;
179     else {
180       iscrat = mapU;
181       for ( j = 0; j < *n; j++ ) {
182 	if ( iscrat == NULL ) {
183 	  i = -3;
184 	  xerbla_( "MXM5 \n", &i);
185 	}
186 	else
187 	  iscrat++;
188       }
189       iscrat = mapF;
190       for ( j = 0; j < *m; j++ ) {
191 	if ( iscrat == NULL ) {
192 	  i = -6;
193 	  xerbla_( "MXM5 \n", &i);
194 	}
195 	else
196 	  iscrat++;
197       }
198     }
199 
200   /*
201     at this point inputs are minimally acceptable
202 
203     check to see if mapU and mapF are the same set of processors
204     */
205 
206   iscrat = iscratch;
207   mapdif1_( n, mapU, m, mapF, iscrat, &j );
208 
209   /*
210    * if ( j != 0 ) {
211    *   i = -3;
212    * }
213    */
214 
215 #ifdef DEBUG1
216   fprintf (stderr, "me = %d, i = %d, n=%d, m=%d \n", me, i, *n, *m);
217   for ( j = 0; j < *n; j++ )
218     fprintf (stderr, "me = %d, mapU[%d]= %d \n", me, j, mapU[j] );
219   for ( j = 0; j < *m; j++ )
220     fprintf (stderr, "me = %d, mapF[%d]=%d, \n", me, j, mapF[j] );
221 #endif
222 
223   me = mxmynd_();
224   nvecsU = count_list ( me, mapU, n);
225   nvecsF = count_list ( me, mapF, m);
226 
227   for ( j = 0; j < nvecsU; j++ )
228     if ( rowU[j] == NULL ) {
229       linfo = -2;
230       i = min(linfo, i);
231       break;
232     }
233 
234   for ( j = 0; j < nvecsF; j++ )
235     if ( colF[j] == NULL ) {
236       i = min(i, -6);
237       break;
238     }
239 
240   /*
241 
242     g_exit_( &i, "Mapping problem or memory assignment problem MXM5X \n", mapU, n, iscratch, scratch );
243     g_exit_( &i, "Mapping problem or memory assignment problem MXM5 \n", mapF, n, iscratch, scratch );
244 
245 
246     linfo = 0;
247     ll = *n * sizeof(Integer);
248     pxerbla2_( &ll, (char *) mapU, mapU, n, iscrat, &i );
249     linfo = min(linfo, i);
250     ll = *m * sizeof(Integer);
251 
252     pxerbla2_( &ll, (char *) mapF, mapU, n, iscrat, &i);
253 
254     linfo = min(linfo, i);
255 
256     g_exit_( &linfo, "Mapping inconsistancies calling MXM5X \n", mapU, n, iscratch, scratch );
257     g_exit_( &linfo, "Mapping inconsistancies calling MXM5X \n", mapF, n, iscratch, scratch );
258     */
259 
260   me = mxmynd_();
261 
262   ll = *n;
263   mm = *m;
264 
265   iscrat = iscratch;
266   mapvecU = iscrat;
267 
268 #ifdef DEBUG
269   fprintf(stderr, " in mxm5x me = %d \n", me);
270 #endif
271 
272   nvecsU = fil_mapvec_( &me, &ll, mapU, mapvecU );
273   iscrat += nvecsU;
274   mapvecF = iscrat;
275   nvecsF = fil_mapvec_( &me, m, mapF, mapvecF );
276   iscrat += nvecsF;
277 
278 #ifdef DEBUG1
279   k = -1;
280   for ( j = 0; j < *n; j++ ){
281     if( mapU[j] == me ) {
282       k++;
283       for ( i = 0; i < *n-j; i++ )
284         fprintf (stderr, "me = %d, rowU[%d][%d] = %g \n", me, k, i, rowU[k][i] );
285     }
286   }
287   for ( j = 0; j < nvecsF; j++ )
288     for ( i = 0; i < *n; i++ )
289       fprintf (stderr, "me = %d, colF[%d][%d] = %g \n", me, j, i, colF[j][i] );
290 #endif
291 
292 
293   if ( nvecsU + nvecsF == 0 )
294     return;
295 
296   iptr = (Integer *) scratch;
297   for ( i = 0; i < ll; i++ )
298     *(iptr++) = mapU[i];
299   for ( i = 0; i < mm; i++ )
300     *(iptr++) = mapF[i];
301 
302   iptr = (Integer *) scratch;
303 
304   /*
305     nproc = reduce_list( ll+mm, iptr, iscrat);
306     */
307 
308   nproc = reduce_list2( ll+mm, iptr, iscrat);
309   proclist = iscrat;
310   iscrat += nproc;
311   mapvec_in = iscrat;
312 
313   gshellsort_( &nproc, proclist);
314 
315   indx = indxL ( me, nproc, proclist);
316   me_indx = indx;
317   indx += nproc;
318 
319   /*
320     last_proc = (indx - 1) % nproc;
321     */
322 
323   idummy = indx - 1;
324   idummy += nproc;
325   last_proc = (idummy+nproc) % nproc;
326   last_proc = proclist[last_proc];
327 
328   /*
329     next_proc = (indx + 1) % nproc;
330     */
331 
332   idummy = indx + 1;
333   idummy += nproc;
334   next_proc = (idummy + nproc ) % nproc;
335   next_proc = proclist[next_proc];
336 
337   maxsz = 0;
338   isize = 0;
339   for ( indx = 0; indx < nproc; indx++ ) {
340     isize = ci_size_( &proclist[indx], &ll, mapU);   /* the number Q vectors that I own */
341     maxsz = max( isize, maxsz );
342   }
343 
344   buffer = (DoublePrecision *) scratch;
345   indx = 0;
346   for ( jndx = 0; jndx < nvecsF; jndx++ ){
347     dcopy_( &ll, colF[jndx], &ONE, &buffer[indx], &ONE);
348     indx += ll;
349   }
350   buffer += indx;
351 
352   for ( jndx = 0; jndx < nvecsF; jndx++ ){
353     d_ptr = colF[jndx];
354     fil_dbl_lst ( ll, d_ptr, 0.0e0);
355   }
356 
357   in_buffer = buffer;
358   out_buffer = buffer + maxsz;
359 
360   /*
361     copy upper triangular matrix into memory
362     */
363 
364   d_ptr = in_buffer;
365   for ( jndx = 0; jndx < nvecsU; jndx++ ){
366     ii = mapvecU[jndx];
367     ii = ll - ii;
368     dcopy_( &ii, rowU[jndx], &ONE, d_ptr, &ONE);
369     d_ptr += ii;
370   }
371 
372   /*
373     compute local matrix multiply
374     */
375 
376   F_ptr = scratch;
377   for ( k = 0; k < nvecsF; k++ ){
378     U_ptr = in_buffer;
379     for ( jndx = 0; jndx < nvecsU; jndx++ ) {
380       i = mapvecU[jndx];
381       isize = ll - i;
382       t = ddot_( &isize, &F_ptr[i], &ONE, U_ptr , &ONE);
383       colF[k][i] = t;
384       U_ptr += isize;
385     }
386     F_ptr += ll;
387   }
388 
389   buffer     = out_buffer + maxsz;
390 
391   isize = ci_size_( &me, &ll, mapU);   /* the number Q vectors that I own */
392   osize = isize;
393 
394   me_indx += nproc;
395 
396   for ( i = 0; i < nproc-1 ; i++ ) {
397     dcopy_(&osize, in_buffer, &ONE, out_buffer, &ONE);
398     idummy = 2;
399     if ( (me_indx + nproc) % idummy ==  0 ) {
400       rsize = osize * sizeof(DoublePrecision);
401       if ( rsize != 0 )
402 	rsize = mxwrit_( out_buffer, &rsize, &next_proc, &i );
403 
404       /*
405         indx = ( me_indx -i -1) % nproc;
406         */
407       idummy = me_indx - i - 1;
408       idummy += nproc;
409       indx = (idummy + nproc ) % nproc;
410       indx = proclist[indx]; /* the current processor id number */
411       nvecsU = fil_mapvec_( &indx, &ll, mapU, mapvec_in);
412       isize = ci_size_( &indx, &ll, mapU);   /* the number Q vectors that I own */
413 
414       /*
415 	mapvec_in contains the mapvecQ for processor proclist[i])
416 	*/
417 
418       rsize = isize * sizeof(DoublePrecision);
419       if ( rsize != 0 )
420 	rsize = mxread_( in_buffer, &rsize, &last_proc, &i );
421     }
422     else {
423       /*
424         indx = ( me_indx - i -1 ) % nproc;
425         */
426       idummy = me_indx - i - 1;
427       idummy += nproc;
428       indx = (idummy + nproc) % nproc;
429       indx = proclist[indx]; /* the current processor id number */
430       nvecsU = fil_mapvec_( &indx, &ll, mapU, mapvec_in);
431       isize = ci_size_( &indx, &ll, mapU);   /* the number Q vectors that I own */
432 
433       /*
434 	mapvec_in contains the mapvecQ for processor proclist[i])
435 	*/
436 
437       rsize = isize * sizeof(DoublePrecision);
438 
439       if ( rsize != 0 )
440 	rsize = mxread_( in_buffer, &rsize, &last_proc, &i );
441 
442       rsize = osize * sizeof(DoublePrecision);
443       if ( rsize != 0 )
444 	rsize = mxwrit_( out_buffer, &rsize, &next_proc, &i );
445     }
446 
447     osize = isize;
448 
449     F_ptr = scratch;
450     for ( k = 0; k < nvecsF; k++ ){
451       U_ptr = in_buffer;
452       for ( jndx = 0; jndx < nvecsU; jndx++ ) {
453 	ii = mapvec_in[jndx];
454 	isize = ll - ii;
455 	t = ddot_( &isize, &F_ptr[ii], &ONE, U_ptr , &ONE);
456 	colF[k][ii] = t;
457 	U_ptr += isize;
458       }
459       F_ptr += ll;
460     }
461   }
462 
463   return;
464 }
465 
466