1 /*
2  $Id$
3  *======================================================================
4  *
5  * DISCLAIMER
6  *
7  * This material was prepared as an account of work sponsored by an
8  * agency of the United States Government.  Neither the United States
9  * Government nor the United States Department of Energy, nor Battelle,
10  * nor any of their employees, MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
11  * ASSUMES ANY LEGAL LIABILITY OR RESPONSIBILITY FOR THE ACCURACY,
12  * COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, APPARATUS, PRODUCT,
13  * SOFTWARE, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT
14  * INFRINGE PRIVATELY OWNED RIGHTS.
15  *
16  * ACKNOWLEDGMENT
17  *
18  * This software and its documentation were produced with Government
19  * support under Contract Number DE-AC06-76RLO-1830 awarded by the United
20  * States Department of Energy.  The Government retains a paid-up
21  * non-exclusive, irrevocable worldwide license to reproduce, prepare
22  * derivative works, perform publicly and display publicly by or for the
23  * Government, including the right to distribute to other Government
24  * contractors.
25  *
26  *======================================================================
27  *
28  *  -- PEIGS  routine (version 2.1) --
29  *     Pacific Northwest Laboratory
30  *     July 28, 1995
31  *
32  *======================================================================
33  */
34 #include <stdio.h>
35 
36 #include "globalp.c.h"
37 
38 #define ZERO ((DoublePrecision) 0.0e0)
39 
40 #define min(a,b) ((a) < (b) ? (a) : (b))
41 
mxm88(n,colQ,mapQ,m,colW,mapW,iwork,work,iptr)42 void mxm88 ( n, colQ, mapQ, m, colW, mapW, iwork, work, iptr)
43      Integer *n, *mapQ, *m, *mapW, *iwork;
44      DoublePrecision **colQ, **colW, *work, **iptr;
45 
46 /**************************************************************
47  *
48  * Subroutine mxm88
49  *
50    This subroutine does the matrix multiplication W <- Q * W
51 
52    where Q is a n x n symmetric matrix in packed storage format,
53    and row (or equivalently column) distributed,
54    and matrix W is a general n x m matrix distributed by columns.
55 
56    ARGUMENTS
57    ---------
58    In the following:
59 
60      INTEGER          = "pointer to Integer"
61      DOUBLE PRECISION = "pointer to DoublePrecision"
62 
63      me     = this processor's id (= mxmynd_())
64      nprocs = number of allocated processors ( = mxnprc_())
65      nvecsW = number of entries in mapW equal to me
66                   (= count_list( me, mapW, m ))
67      nvecsQ = number of entries in mapQ equal to me
68                   (= count_list( me, mapQ, n ))
69      sDP    = sizeof( DoublePrecision )
70 
71    n ...... (input) INTEGER
72             n is the dimension of the symmetric matrix Q
73 
74    colQ ... (input) array of pointers to DoublePrecision,
75                     length (nvecsQ)
76             The part of matrix Q owned by this processer stored
77             in packed format, i.e., colQ[i] points to the diagonal
78             element of the i-th column (or equivalently row) of Q
79             owned by this processor, i = 0 to nvecsQ-1.
80 
81    mapQ ... (input) INTEGER array, length (n)
82             The i-th column (or equivalently row) of Q is
83             owned by processor mapQ[i], i = 0 to n-1.
84 
85    m ...... (input) INTEGER
86             m is the number of columns in W.
87 
88    colW ... (input/output) array of pointers to DoublePrecision,
89                            length (nvecsW)
90 
91             On Entry:
92               The part of matrix W owned by this processer stored
93               in packed format by columns, i.e., colW[i] points
94               to the first element of the i-th column of W
95               owned by this processor, i = 0 to nvecsW-1.
96 
97             On Exit:
98               The result matrix Q * W stored in the same manner as
99               W on entry.
100 
101    mapW ... (input) INTEGER array, length (m)
102             The i-th column of W is
103             owned by processor mapW[i], i = 0 to m-1.
104 
105 
106    iwork .. (workspace) INTEGER array, length ( 2*(n+m)+nvecsW+nvecsQ )
107 
108    work ... (workspace) DOUBLE PRECISION array,
109                         length ( maximum ( (nvecsW+1)*n, mxlbuf_()/sDP + 1 )
110 
111    iptr ... (workspace ) array of pointers to DoublePrecision,
112                          length (nvecsW)
113 
114 */
115 
116 {
117 
118   static Integer IONE = 1, MSGTYP = 51;
119 
120   Integer ll, k, i, *iscrat;
121   Integer isize, me;
122   Integer nvecsW, nvecsQ, j, iQ;
123   Integer *mapvecQ, *mapvecW, *proclist, nprocs, npro, nele;
124   Integer linfo;
125 
126   DoublePrecision t;
127   DoublePrecision *buffer, scl;
128   DoublePrecision *ptr, *ptr1;
129   DoublePrecision  **Wptr;
130 
131   /*
132     blas call
133     */
134 
135   extern DoublePrecision ddot_();
136   extern void pxerbla2_();
137   extern void daxpy_();
138   extern void dcopy_();
139   extern void zero_out();
140 
141   extern void chol_pipe_bcast();
142   extern Integer fil_mapvec_();
143   extern Integer count_list();
144   extern Integer reduce_list2();
145 
146   extern Integer mxwrit_(), mxread_();
147   extern Integer menode_(), mxmynd_(), mxnprc_();
148 
149   extern void xerbla_();
150   extern void g_exit_();
151   extern void bbcast00();
152 
153   i = 0;
154   me = mxmynd_();
155 
156   if ( m == NULL ) {
157     i = -4;
158     xerbla_( "MXM8 ", &i);
159   }
160 
161   if ( n == NULL ) {
162     i = -1;
163     xerbla_( "MXM8 ", &i);
164   }
165 
166 
167   if ( *n < 1 ) {
168     i = -1;
169     xerbla_( "MXM8 ", &i);
170   }
171 
172   iscrat = mapQ;
173   for ( j = 0; j < *n; j++ ) {
174     if ( iscrat++ == NULL ) {
175       i = -3;
176       xerbla_( "MXM8 \n", &i);
177     }
178   }
179   iscrat = NULL;
180 
181   iscrat = mapW;
182   for ( j = 0; j < *m; j++ ) {
183     if ( iscrat++ == NULL ) {
184       i = -6;
185       xerbla_( "MXM8 \n", &i);
186     }
187   }
188   iscrat = NULL;
189 
190   /*
191     at this point inputs are minimally acceptable
192 
193     check to see if mapQ and mapW are the same set of processors
194     */
195 
196   iscrat = iwork;
197   j = 0;
198 
199 /*
200  * mapdif1_( n, mapQ, m, mapW, iscrat, &j );
201  *
202  * if ( j != 0 ) {
203  *   i = -3;
204  * }
205  *
206  */
207   me = mxmynd_();
208   nvecsQ = count_list( me, mapQ, n);
209   nvecsW = count_list( me, mapW, m);
210 
211   for ( j = 0; j < nvecsQ; j++ )
212     if ( colQ[j] == NULL ) {
213       linfo = -2;
214       i = min(linfo, i);
215       break;
216     }
217 
218   for ( j = 0; j < nvecsW; j++ )
219     if ( colW[j] == NULL ) {
220       i = min(i, -6);
221       break;
222     }
223 
224   g_exit_( &i, "Mapping problem or memory assignment problem in MXM8 \n", mapQ, n, iwork, work );
225 
226   linfo = 0;
227   j = *n * sizeof(Integer);
228   pxerbla2_( &j , (char *) mapQ, mapQ, n, iscrat, &i );
229   linfo = min(linfo, i);
230   j = *m * sizeof(Integer);;
231   pxerbla2_( &j, (char *) mapW, mapQ, n, iscrat, &i);
232 
233   linfo = min(linfo, i);
234   g_exit_( &linfo, "Mapping inconsistancies calling MXM8 \n", mapQ, n, iwork, work );
235 
236   ll = *n;
237 
238   /*
239     should perform global sync to check for errors
240     */
241 
242   me = mxmynd_();
243 /*
244  * npro = mxnprc_();
245  */
246   npro = *m + *n;
247 
248   iscrat = iwork;
249 
250   proclist = iscrat;
251   for ( i = 0; i < *m; i++ )
252      iscrat[npro+i] = mapW[i];
253 
254   for ( i = 0; i < *n; i++ )
255      iscrat[npro + *m + i] = mapQ[i];
256 
257   nele = *n + *m;
258   nprocs = reduce_list2( nele, &iscrat[npro], proclist );
259   iscrat += nprocs;
260 
261   mapvecQ = iscrat;
262   nvecsQ = fil_mapvec_( &me, &ll, mapQ, mapvecQ );
263   iscrat += nvecsQ;
264 
265   mapvecW = iscrat;
266   nvecsW = fil_mapvec_( &me, m, mapW, mapvecW);
267   iscrat += nvecsW;
268 
269   if (( nvecsQ == 0 ) && ( nvecsW == 0 ))
270     return; /* no work */
271 
272   k = 0;
273   Wptr = iptr;
274   ptr = work;
275   for ( i = 0; i < nvecsW; i++ ) {
276     Wptr[i] = ptr;
277     dcopy_( &ll, colW[i], &IONE, ptr, &IONE );
278     ptr += ll;
279   }
280 
281   buffer = ptr;
282 
283   /*
284     zero out W
285     */
286 
287   for ( i = 0; i < nvecsW; i++ ) {
288     zero_out ( ll, colW[i] );
289   }
290 
291   iQ = 0;
292   for ( i = 0; i < ll; i++ ) {
293     /*
294       copy the column for passing
295       */
296     if ( mapQ[i] == me ) {
297       isize = ll - i;
298       dcopy_( &isize, colQ[iQ], &IONE, buffer, &IONE );
299 
300       /*
301 	broadcast to all processors holding W columns
302 	*/
303 
304       isize *= sizeof(DoublePrecision);
305 /*
306  *    chol_pipe_bcast( buffer, isize, MSGTYP + i, mapQ[i], nprocs, proclist, iscrat);
307  */
308       bbcast00((char *) buffer, isize, MSGTYP + i, mapQ[i], nprocs, proclist);
309 
310       iQ++;
311     }
312     else {
313       isize = (ll - i )* sizeof(DoublePrecision);
314 /*
315  *     chol_pipe_bcast( buffer, isize, MSGTYP + i , mapQ[i], nprocs, proclist, iscrat);
316  */
317       bbcast00((char *) buffer, isize, MSGTYP + i, mapQ[i], nprocs, proclist);
318     }
319 
320     /*
321       the buffer contains the column Q[i:n-1, i]
322       */
323 
324     for ( k = 0; k < nvecsW; k++ ) {
325       isize = ll - i;
326       ptr = Wptr[k] + i;
327       t = *ptr;
328       scl = ddot_( &isize, ptr, &IONE, buffer, &IONE);
329       ptr1 = colW[k] + i;
330       *ptr1 += scl;
331       isize += (-1);
332       daxpy_( &isize, &t, buffer+1 , &IONE, ptr1+1, &IONE);
333     }
334   }
335 
336   return;
337 }
338