1 /*
2 $Id$
3 *======================================================================
4 *
5 * DISCLAIMER
6 *
7 * This material was prepared as an account of work sponsored by an
8 * agency of the United States Government. Neither the United States
9 * Government nor the United States Department of Energy, nor Battelle,
10 * nor any of their employees, MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
11 * ASSUMES ANY LEGAL LIABILITY OR RESPONSIBILITY FOR THE ACCURACY,
12 * COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, APPARATUS, PRODUCT,
13 * SOFTWARE, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT
14 * INFRINGE PRIVATELY OWNED RIGHTS.
15 *
16 * ACKNOWLEDGMENT
17 *
18 * This software and its documentation were produced with Government
19 * support under Contract Number DE-AC06-76RLO-1830 awarded by the United
20 * States Department of Energy. The Government retains a paid-up
21 * non-exclusive, irrevocable worldwide license to reproduce, prepare
22 * derivative works, perform publicly and display publicly by or for the
23 * Government, including the right to distribute to other Government
24 * contractors.
25 *
26 *======================================================================
27 *
28 * -- PEIGS routine (version 2.1) --
29 * Pacific Northwest Laboratory
30 * July 28, 1995
31 *
32 *======================================================================
33 */
34 #include <stdio.h>
35
36 #include "globalp.c.h"
37
38 #define ZERO ((DoublePrecision) 0.0e0)
39
40 #define min(a,b) ((a) < (b) ? (a) : (b))
41
mxm88(n,colQ,mapQ,m,colW,mapW,iwork,work,iptr)42 void mxm88 ( n, colQ, mapQ, m, colW, mapW, iwork, work, iptr)
43 Integer *n, *mapQ, *m, *mapW, *iwork;
44 DoublePrecision **colQ, **colW, *work, **iptr;
45
46 /**************************************************************
47 *
48 * Subroutine mxm88
49 *
50 This subroutine does the matrix multiplication W <- Q * W
51
52 where Q is a n x n symmetric matrix in packed storage format,
53 and row (or equivalently column) distributed,
54 and matrix W is a general n x m matrix distributed by columns.
55
56 ARGUMENTS
57 ---------
58 In the following:
59
60 INTEGER = "pointer to Integer"
61 DOUBLE PRECISION = "pointer to DoublePrecision"
62
63 me = this processor's id (= mxmynd_())
64 nprocs = number of allocated processors ( = mxnprc_())
65 nvecsW = number of entries in mapW equal to me
66 (= count_list( me, mapW, m ))
67 nvecsQ = number of entries in mapQ equal to me
68 (= count_list( me, mapQ, n ))
69 sDP = sizeof( DoublePrecision )
70
71 n ...... (input) INTEGER
72 n is the dimension of the symmetric matrix Q
73
74 colQ ... (input) array of pointers to DoublePrecision,
75 length (nvecsQ)
76 The part of matrix Q owned by this processer stored
77 in packed format, i.e., colQ[i] points to the diagonal
78 element of the i-th column (or equivalently row) of Q
79 owned by this processor, i = 0 to nvecsQ-1.
80
81 mapQ ... (input) INTEGER array, length (n)
82 The i-th column (or equivalently row) of Q is
83 owned by processor mapQ[i], i = 0 to n-1.
84
85 m ...... (input) INTEGER
86 m is the number of columns in W.
87
88 colW ... (input/output) array of pointers to DoublePrecision,
89 length (nvecsW)
90
91 On Entry:
92 The part of matrix W owned by this processer stored
93 in packed format by columns, i.e., colW[i] points
94 to the first element of the i-th column of W
95 owned by this processor, i = 0 to nvecsW-1.
96
97 On Exit:
98 The result matrix Q * W stored in the same manner as
99 W on entry.
100
101 mapW ... (input) INTEGER array, length (m)
102 The i-th column of W is
103 owned by processor mapW[i], i = 0 to m-1.
104
105
106 iwork .. (workspace) INTEGER array, length ( 2*(n+m)+nvecsW+nvecsQ )
107
108 work ... (workspace) DOUBLE PRECISION array,
109 length ( maximum ( (nvecsW+1)*n, mxlbuf_()/sDP + 1 )
110
111 iptr ... (workspace ) array of pointers to DoublePrecision,
112 length (nvecsW)
113
114 */
115
116 {
117
118 static Integer IONE = 1, MSGTYP = 51;
119
120 Integer ll, k, i, *iscrat;
121 Integer isize, me;
122 Integer nvecsW, nvecsQ, j, iQ;
123 Integer *mapvecQ, *mapvecW, *proclist, nprocs, npro, nele;
124 Integer linfo;
125
126 DoublePrecision t;
127 DoublePrecision *buffer, scl;
128 DoublePrecision *ptr, *ptr1;
129 DoublePrecision **Wptr;
130
131 /*
132 blas call
133 */
134
135 extern DoublePrecision ddot_();
136 extern void pxerbla2_();
137 extern void daxpy_();
138 extern void dcopy_();
139 extern void zero_out();
140
141 extern void chol_pipe_bcast();
142 extern Integer fil_mapvec_();
143 extern Integer count_list();
144 extern Integer reduce_list2();
145
146 extern Integer mxwrit_(), mxread_();
147 extern Integer menode_(), mxmynd_(), mxnprc_();
148
149 extern void xerbla_();
150 extern void g_exit_();
151 extern void bbcast00();
152
153 i = 0;
154 me = mxmynd_();
155
156 if ( m == NULL ) {
157 i = -4;
158 xerbla_( "MXM8 ", &i);
159 }
160
161 if ( n == NULL ) {
162 i = -1;
163 xerbla_( "MXM8 ", &i);
164 }
165
166
167 if ( *n < 1 ) {
168 i = -1;
169 xerbla_( "MXM8 ", &i);
170 }
171
172 iscrat = mapQ;
173 for ( j = 0; j < *n; j++ ) {
174 if ( iscrat++ == NULL ) {
175 i = -3;
176 xerbla_( "MXM8 \n", &i);
177 }
178 }
179 iscrat = NULL;
180
181 iscrat = mapW;
182 for ( j = 0; j < *m; j++ ) {
183 if ( iscrat++ == NULL ) {
184 i = -6;
185 xerbla_( "MXM8 \n", &i);
186 }
187 }
188 iscrat = NULL;
189
190 /*
191 at this point inputs are minimally acceptable
192
193 check to see if mapQ and mapW are the same set of processors
194 */
195
196 iscrat = iwork;
197 j = 0;
198
199 /*
200 * mapdif1_( n, mapQ, m, mapW, iscrat, &j );
201 *
202 * if ( j != 0 ) {
203 * i = -3;
204 * }
205 *
206 */
207 me = mxmynd_();
208 nvecsQ = count_list( me, mapQ, n);
209 nvecsW = count_list( me, mapW, m);
210
211 for ( j = 0; j < nvecsQ; j++ )
212 if ( colQ[j] == NULL ) {
213 linfo = -2;
214 i = min(linfo, i);
215 break;
216 }
217
218 for ( j = 0; j < nvecsW; j++ )
219 if ( colW[j] == NULL ) {
220 i = min(i, -6);
221 break;
222 }
223
224 g_exit_( &i, "Mapping problem or memory assignment problem in MXM8 \n", mapQ, n, iwork, work );
225
226 linfo = 0;
227 j = *n * sizeof(Integer);
228 pxerbla2_( &j , (char *) mapQ, mapQ, n, iscrat, &i );
229 linfo = min(linfo, i);
230 j = *m * sizeof(Integer);;
231 pxerbla2_( &j, (char *) mapW, mapQ, n, iscrat, &i);
232
233 linfo = min(linfo, i);
234 g_exit_( &linfo, "Mapping inconsistancies calling MXM8 \n", mapQ, n, iwork, work );
235
236 ll = *n;
237
238 /*
239 should perform global sync to check for errors
240 */
241
242 me = mxmynd_();
243 /*
244 * npro = mxnprc_();
245 */
246 npro = *m + *n;
247
248 iscrat = iwork;
249
250 proclist = iscrat;
251 for ( i = 0; i < *m; i++ )
252 iscrat[npro+i] = mapW[i];
253
254 for ( i = 0; i < *n; i++ )
255 iscrat[npro + *m + i] = mapQ[i];
256
257 nele = *n + *m;
258 nprocs = reduce_list2( nele, &iscrat[npro], proclist );
259 iscrat += nprocs;
260
261 mapvecQ = iscrat;
262 nvecsQ = fil_mapvec_( &me, &ll, mapQ, mapvecQ );
263 iscrat += nvecsQ;
264
265 mapvecW = iscrat;
266 nvecsW = fil_mapvec_( &me, m, mapW, mapvecW);
267 iscrat += nvecsW;
268
269 if (( nvecsQ == 0 ) && ( nvecsW == 0 ))
270 return; /* no work */
271
272 k = 0;
273 Wptr = iptr;
274 ptr = work;
275 for ( i = 0; i < nvecsW; i++ ) {
276 Wptr[i] = ptr;
277 dcopy_( &ll, colW[i], &IONE, ptr, &IONE );
278 ptr += ll;
279 }
280
281 buffer = ptr;
282
283 /*
284 zero out W
285 */
286
287 for ( i = 0; i < nvecsW; i++ ) {
288 zero_out ( ll, colW[i] );
289 }
290
291 iQ = 0;
292 for ( i = 0; i < ll; i++ ) {
293 /*
294 copy the column for passing
295 */
296 if ( mapQ[i] == me ) {
297 isize = ll - i;
298 dcopy_( &isize, colQ[iQ], &IONE, buffer, &IONE );
299
300 /*
301 broadcast to all processors holding W columns
302 */
303
304 isize *= sizeof(DoublePrecision);
305 /*
306 * chol_pipe_bcast( buffer, isize, MSGTYP + i, mapQ[i], nprocs, proclist, iscrat);
307 */
308 bbcast00((char *) buffer, isize, MSGTYP + i, mapQ[i], nprocs, proclist);
309
310 iQ++;
311 }
312 else {
313 isize = (ll - i )* sizeof(DoublePrecision);
314 /*
315 * chol_pipe_bcast( buffer, isize, MSGTYP + i , mapQ[i], nprocs, proclist, iscrat);
316 */
317 bbcast00((char *) buffer, isize, MSGTYP + i, mapQ[i], nprocs, proclist);
318 }
319
320 /*
321 the buffer contains the column Q[i:n-1, i]
322 */
323
324 for ( k = 0; k < nvecsW; k++ ) {
325 isize = ll - i;
326 ptr = Wptr[k] + i;
327 t = *ptr;
328 scl = ddot_( &isize, ptr, &IONE, buffer, &IONE);
329 ptr1 = colW[k] + i;
330 *ptr1 += scl;
331 isize += (-1);
332 daxpy_( &isize, &t, buffer+1 , &IONE, ptr1+1, &IONE);
333 }
334 }
335
336 return;
337 }
338