1 /*
2 $Id$
3 *======================================================================
4 *
5 * DISCLAIMER
6 *
7 * This material was prepared as an account of work sponsored by an
8 * agency of the United States Government. Neither the United States
9 * Government nor the United States Department of Energy, nor Battelle,
10 * nor any of their employees, MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
11 * ASSUMES ANY LEGAL LIABILITY OR RESPONSIBILITY FOR THE ACCURACY,
12 * COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, APPARATUS, PRODUCT,
13 * SOFTWARE, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT
14 * INFRINGE PRIVATELY OWNED RIGHTS.
15 *
16 * ACKNOWLEDGMENT
17 *
18 * This software and its documentation were produced with Government
19 * support under Contract Number DE-AC06-76RLO-1830 awarded by the United
20 * States Department of Energy. The Government retains a paid-up
21 * non-exclusive, irrevocable worldwide license to reproduce, prepare
22 * derivative works, perform publicly and display publicly by or for the
23 * Government, including the right to distribute to other Government
24 * contractors.
25 *
26 *======================================================================
27 *
28 * -- PEIGS routine (version 2.1) --
29 * Pacific Northwest Laboratory
30 * July 28, 1995
31 *
32 *======================================================================
33 */
34 #include <stdio.h>
35 #include <memory.h>
36 #include <math.h>
37
38 #include "globalp.c.h"
39
40 #define max(a,b) ((a) > (b) ? (a) : (b))
41 #define min(a,b) ((a) < (b) ? (a) : (b))
42
mxm5x(n,rowU,mapU,m,colF,mapF,iscratch,scratch)43 void mxm5x( n, rowU, mapU, m, colF, mapF, iscratch, scratch)
44 Integer *n, *mapU, *mapF, *m, *iscratch;
45 DoublePrecision **colF, **rowU, *scratch;
46 {
47
48 /**************************************************************
49 *
50 * Subroutine mxm5x
51 *
52 This subroutine does the matrix multiplication F <- U * F
53
54 where U is a n x n upper trianguler matrix in packed storage format,
55 and row distributed,
56 and matrix F is a general n x m matrix distributed by columns.
57
58 ARGUMENTS
59 ---------
60 In the following:
61
62 INTEGER = "pointer to Integer"
63 DOUBLE PRECISION = "pointer to DoublePrecision"
64
65 me = this processor's id (= mxmynd_())
66 nprocs = number of allocated processors ( = mxnprc_())
67 nvecsU = number of entries in mapU equal to me
68 (= count_list( me, mapU, n ))
69 neleU_max = maximum number of elements of U owned by any
70 processor.
71 (= max over i of ci_size_( i, n, mapU) )
72 nvecsF = number of entries in mapF equal to me
73 (= count_list( me, mapF, m ))
74 sDP = sizeof( DoublePrecision )
75
76 n ...... (input) INTEGER
77 The number of rows and columns of U, and the number
78 of rows of F.
79
80 rowU ... (input) array of pointers to DoublePrecision,
81 length (nvecsU)
82 The part of matrix U owned by this processer stored
83 in packed format, i.e., rowU[i] points to the diagonal
84 element of the i-th row of U
85 owned by this processor, i = 0 to nvecsU-1.
86
87 mapU ... (input) INTEGER array, length (n)
88 The i-th row of U is
89 owned by processor mapU[i], i = 0 to n-1.
90
91 m ...... (input) INTEGER
92 m is the number of columns in F.
93
94 colF ... (input/output) array of pointers to DoublePrecision,
95 length (nvecsF)
96
97 On Entry:
98 The part of matrix F owned by this processer stored
99 in packed format by columns, i.e., colF[i] points
100 to the first element of the i-th column of F
101 owned by this processor, i = 0 to nvecsF-1.
102
103 On Exit:
104 The result matrix U * F stored in the same manner as
105 F on entry.
106
107 mapF ... (input) INTEGER array, length (m)
108 The i-th column of F is
109 owned by processor mapF[i], i = 0 to m-1.
110
111 iscratch .. (workspace) INTEGER array, length ( 3*n+2*m +1 )
112
113 scratch.... (workspace) DOUBLE PRECISION array,
114 length ( maximum ( n+m, mxlbuf_()/sDP + 1,
115 (nvecsF * n + 2 * neleU_max )
116 */
117 static Integer ONE = 1;
118 Integer jndx;
119 Integer i, k, me, isize, *iptr, indx;
120 Integer nvecsU, nvecsF, ii;
121 Integer j, linfo, ll, mm, nproc;
122 Integer osize, rsize, idummy; /* */
123
124 Integer *mapvecF, *mapvecU;
125 Integer *iscrat, *proclist;
126 Integer *mapvec_in, me_indx, last_proc, next_proc, maxsz;
127
128 DoublePrecision *buffer, *d_ptr, *in_buffer, *out_buffer;
129 DoublePrecision *F_ptr, *U_ptr, t;
130
131
132
133 /*
134 blas calls
135 */
136
137 extern void dcopy_(), daxpy_();
138 extern DoublePrecision ddot_();
139
140 extern Integer mxmynd_();
141 extern void xerbla_();
142 extern void mapdif1_();
143 extern void g_exit_();
144 extern void pxerbla2_();
145 extern Integer fil_mapvec_();
146 extern void chol_pipe_bcast();
147 extern void gshellsort_();
148 extern Integer indxL();
149 extern void fil_dbl_lst();
150
151
152 /*
153 mxsubs calls
154 */
155
156 extern Integer mxwrit_(), mxread_();
157 extern Integer count_list();
158 extern Integer fil_mapvec_();
159 extern Integer ci_size_();
160 extern Integer reduce_list2();
161 extern Integer peigs_cmod_();
162
163 i = 0;
164 me = mxmynd_();
165
166 #ifdef DEBUG1
167 fprintf(stderr, " Entering mxm5x ********************* me = %d \n ", me );
168 #endif
169
170 if ( m == NULL )
171 i = -4;
172 else
173 if ( n == NULL ) {
174 i = -1;
175 xerbla_( "MXM5 ", &i);
176 }
177 else if ( *n < 1 )
178 i = -1;
179 else {
180 iscrat = mapU;
181 for ( j = 0; j < *n; j++ ) {
182 if ( iscrat == NULL ) {
183 i = -3;
184 xerbla_( "MXM5 \n", &i);
185 }
186 else
187 iscrat++;
188 }
189 iscrat = mapF;
190 for ( j = 0; j < *m; j++ ) {
191 if ( iscrat == NULL ) {
192 i = -6;
193 xerbla_( "MXM5 \n", &i);
194 }
195 else
196 iscrat++;
197 }
198 }
199
200 /*
201 at this point inputs are minimally acceptable
202
203 check to see if mapU and mapF are the same set of processors
204 */
205
206 iscrat = iscratch;
207 mapdif1_( n, mapU, m, mapF, iscrat, &j );
208
209 /*
210 * if ( j != 0 ) {
211 * i = -3;
212 * }
213 */
214
215 #ifdef DEBUG1
216 fprintf (stderr, "me = %d, i = %d, n=%d, m=%d \n", me, i, *n, *m);
217 for ( j = 0; j < *n; j++ )
218 fprintf (stderr, "me = %d, mapU[%d]= %d \n", me, j, mapU[j] );
219 for ( j = 0; j < *m; j++ )
220 fprintf (stderr, "me = %d, mapF[%d]=%d, \n", me, j, mapF[j] );
221 #endif
222
223 me = mxmynd_();
224 nvecsU = count_list ( me, mapU, n);
225 nvecsF = count_list ( me, mapF, m);
226
227 for ( j = 0; j < nvecsU; j++ )
228 if ( rowU[j] == NULL ) {
229 linfo = -2;
230 i = min(linfo, i);
231 break;
232 }
233
234 for ( j = 0; j < nvecsF; j++ )
235 if ( colF[j] == NULL ) {
236 i = min(i, -6);
237 break;
238 }
239
240 /*
241
242 g_exit_( &i, "Mapping problem or memory assignment problem MXM5X \n", mapU, n, iscratch, scratch );
243 g_exit_( &i, "Mapping problem or memory assignment problem MXM5 \n", mapF, n, iscratch, scratch );
244
245
246 linfo = 0;
247 ll = *n * sizeof(Integer);
248 pxerbla2_( &ll, (char *) mapU, mapU, n, iscrat, &i );
249 linfo = min(linfo, i);
250 ll = *m * sizeof(Integer);
251
252 pxerbla2_( &ll, (char *) mapF, mapU, n, iscrat, &i);
253
254 linfo = min(linfo, i);
255
256 g_exit_( &linfo, "Mapping inconsistancies calling MXM5X \n", mapU, n, iscratch, scratch );
257 g_exit_( &linfo, "Mapping inconsistancies calling MXM5X \n", mapF, n, iscratch, scratch );
258 */
259
260 me = mxmynd_();
261
262 ll = *n;
263 mm = *m;
264
265 iscrat = iscratch;
266 mapvecU = iscrat;
267
268 #ifdef DEBUG
269 fprintf(stderr, " in mxm5x me = %d \n", me);
270 #endif
271
272 nvecsU = fil_mapvec_( &me, &ll, mapU, mapvecU );
273 iscrat += nvecsU;
274 mapvecF = iscrat;
275 nvecsF = fil_mapvec_( &me, m, mapF, mapvecF );
276 iscrat += nvecsF;
277
278 #ifdef DEBUG1
279 k = -1;
280 for ( j = 0; j < *n; j++ ){
281 if( mapU[j] == me ) {
282 k++;
283 for ( i = 0; i < *n-j; i++ )
284 fprintf (stderr, "me = %d, rowU[%d][%d] = %g \n", me, k, i, rowU[k][i] );
285 }
286 }
287 for ( j = 0; j < nvecsF; j++ )
288 for ( i = 0; i < *n; i++ )
289 fprintf (stderr, "me = %d, colF[%d][%d] = %g \n", me, j, i, colF[j][i] );
290 #endif
291
292
293 if ( nvecsU + nvecsF == 0 )
294 return;
295
296 iptr = (Integer *) scratch;
297 for ( i = 0; i < ll; i++ )
298 *(iptr++) = mapU[i];
299 for ( i = 0; i < mm; i++ )
300 *(iptr++) = mapF[i];
301
302 iptr = (Integer *) scratch;
303
304 /*
305 nproc = reduce_list( ll+mm, iptr, iscrat);
306 */
307
308 nproc = reduce_list2( ll+mm, iptr, iscrat);
309 proclist = iscrat;
310 iscrat += nproc;
311 mapvec_in = iscrat;
312
313 gshellsort_( &nproc, proclist);
314
315 indx = indxL ( me, nproc, proclist);
316 me_indx = indx;
317 indx += nproc;
318
319 /*
320 last_proc = (indx - 1) % nproc;
321 */
322
323 idummy = indx - 1;
324 idummy += nproc;
325 last_proc = (idummy+nproc) % nproc;
326 last_proc = proclist[last_proc];
327
328 /*
329 next_proc = (indx + 1) % nproc;
330 */
331
332 idummy = indx + 1;
333 idummy += nproc;
334 next_proc = (idummy + nproc ) % nproc;
335 next_proc = proclist[next_proc];
336
337 maxsz = 0;
338 isize = 0;
339 for ( indx = 0; indx < nproc; indx++ ) {
340 isize = ci_size_( &proclist[indx], &ll, mapU); /* the number Q vectors that I own */
341 maxsz = max( isize, maxsz );
342 }
343
344 buffer = (DoublePrecision *) scratch;
345 indx = 0;
346 for ( jndx = 0; jndx < nvecsF; jndx++ ){
347 dcopy_( &ll, colF[jndx], &ONE, &buffer[indx], &ONE);
348 indx += ll;
349 }
350 buffer += indx;
351
352 for ( jndx = 0; jndx < nvecsF; jndx++ ){
353 d_ptr = colF[jndx];
354 fil_dbl_lst ( ll, d_ptr, 0.0e0);
355 }
356
357 in_buffer = buffer;
358 out_buffer = buffer + maxsz;
359
360 /*
361 copy upper triangular matrix into memory
362 */
363
364 d_ptr = in_buffer;
365 for ( jndx = 0; jndx < nvecsU; jndx++ ){
366 ii = mapvecU[jndx];
367 ii = ll - ii;
368 dcopy_( &ii, rowU[jndx], &ONE, d_ptr, &ONE);
369 d_ptr += ii;
370 }
371
372 /*
373 compute local matrix multiply
374 */
375
376 F_ptr = scratch;
377 for ( k = 0; k < nvecsF; k++ ){
378 U_ptr = in_buffer;
379 for ( jndx = 0; jndx < nvecsU; jndx++ ) {
380 i = mapvecU[jndx];
381 isize = ll - i;
382 t = ddot_( &isize, &F_ptr[i], &ONE, U_ptr , &ONE);
383 colF[k][i] = t;
384 U_ptr += isize;
385 }
386 F_ptr += ll;
387 }
388
389 buffer = out_buffer + maxsz;
390
391 isize = ci_size_( &me, &ll, mapU); /* the number Q vectors that I own */
392 osize = isize;
393
394 me_indx += nproc;
395
396 for ( i = 0; i < nproc-1 ; i++ ) {
397 dcopy_(&osize, in_buffer, &ONE, out_buffer, &ONE);
398 idummy = 2;
399 if ( (me_indx + nproc) % idummy == 0 ) {
400 rsize = osize * sizeof(DoublePrecision);
401 if ( rsize != 0 )
402 rsize = mxwrit_( out_buffer, &rsize, &next_proc, &i );
403
404 /*
405 indx = ( me_indx -i -1) % nproc;
406 */
407 idummy = me_indx - i - 1;
408 idummy += nproc;
409 indx = (idummy + nproc ) % nproc;
410 indx = proclist[indx]; /* the current processor id number */
411 nvecsU = fil_mapvec_( &indx, &ll, mapU, mapvec_in);
412 isize = ci_size_( &indx, &ll, mapU); /* the number Q vectors that I own */
413
414 /*
415 mapvec_in contains the mapvecQ for processor proclist[i])
416 */
417
418 rsize = isize * sizeof(DoublePrecision);
419 if ( rsize != 0 )
420 rsize = mxread_( in_buffer, &rsize, &last_proc, &i );
421 }
422 else {
423 /*
424 indx = ( me_indx - i -1 ) % nproc;
425 */
426 idummy = me_indx - i - 1;
427 idummy += nproc;
428 indx = (idummy + nproc) % nproc;
429 indx = proclist[indx]; /* the current processor id number */
430 nvecsU = fil_mapvec_( &indx, &ll, mapU, mapvec_in);
431 isize = ci_size_( &indx, &ll, mapU); /* the number Q vectors that I own */
432
433 /*
434 mapvec_in contains the mapvecQ for processor proclist[i])
435 */
436
437 rsize = isize * sizeof(DoublePrecision);
438
439 if ( rsize != 0 )
440 rsize = mxread_( in_buffer, &rsize, &last_proc, &i );
441
442 rsize = osize * sizeof(DoublePrecision);
443 if ( rsize != 0 )
444 rsize = mxwrit_( out_buffer, &rsize, &next_proc, &i );
445 }
446
447 osize = isize;
448
449 F_ptr = scratch;
450 for ( k = 0; k < nvecsF; k++ ){
451 U_ptr = in_buffer;
452 for ( jndx = 0; jndx < nvecsU; jndx++ ) {
453 ii = mapvec_in[jndx];
454 isize = ll - ii;
455 t = ddot_( &isize, &F_ptr[ii], &ONE, U_ptr , &ONE);
456 colF[k][ii] = t;
457 U_ptr += isize;
458 }
459 F_ptr += ll;
460 }
461 }
462
463 return;
464 }
465
466