1 /*
2 $Id$
3 *======================================================================
4 *
5 * DISCLAIMER
6 *
7 * This material was prepared as an account of work sponsored by an
8 * agency of the United States Government. Neither the United States
9 * Government nor the United States Department of Energy, nor Battelle,
10 * nor any of their employees, MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
11 * ASSUMES ANY LEGAL LIABILITY OR RESPONSIBILITY FOR THE ACCURACY,
12 * COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, APPARATUS, PRODUCT,
13 * SOFTWARE, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT
14 * INFRINGE PRIVATELY OWNED RIGHTS.
15 *
16 * ACKNOWLEDGMENT
17 *
18 * This software and its documentation were produced with Government
19 * support under Contract Number DE-AC06-76RLO-1830 awarded by the United
20 * States Department of Energy. The Government retains a paid-up
21 * non-exclusive, irrevocable worldwide license to reproduce, prepare
22 * derivative works, perform publicly and display publicly by or for the
23 * Government, including the right to distribute to other Government
24 * contractors.
25 *
26 *======================================================================
27 *
28 * -- PEIGS routine (version 2.1) --
29 * Pacific Northwest Laboratory
30 * July 28, 1995
31 *
32 *======================================================================
33 */
34 #include <stdio.h>
35 #include <math.h>
36
37 #include "globalp.c.h"
38
choleski(n,vecA,mapA,iscratch,scratch,info)39 void choleski( n, vecA, mapA, iscratch, scratch, info )
40
41 Integer *n, mapA[], iscratch[], *info;
42
43 DoublePrecision scratch[];
44
45 DoublePrecision **vecA;
46 {
47
48 /*
49 * ==================================================================
50 *
51 *
52 * The original code is due to Mike Heath. George Fann
53 * bastardized it for our code. We thank Mike
54 * for his generosity. Any bugs and problems are introduced
55 * by us.
56 *
57 *
58 * Currently, this procedure only handles the case where the
59 * lower triangular part of A is distributed to processors by columns.
60 *
61 *
62 * Purpose
63 * =======
64 *
65 * choleski computes the Cholesky factorization of a real symmetric
66 * positive definite matrix A in packed storage format.
67 *
68 * The factorization has the form
69 * A = L * L'
70 * where L is lower triangular. On output the matrix A is overwritten by
71 * the Choleski factor L.
72 *
73 *
74 * Arguments
75 * =========
76 *
77 * In the following let:
78 *
79 * me = The id of this processor
80 *
81 * nvecsA = The number of columns of A owned by this processor.
82 * In particular, nvecsA = count_list(me, mapA, n).
83 *
84 * nprocsA = Number of distinct processor id's in "mapA". In
85 * particular, nprocs = reduce_list( n, mapA, iscratch )
86 *
87 * n (input) (Integer *) scaler
88 * The order of the matrix A. n >= 0.
89 *
90 * vecA (input/output) (DoublePrecision **) array, length (nvecsA)
91 * On entry, the portion of the lower triangular part of the
92 * symmetric matrix A owned by this processor.
93 *
94 * vecA[i] points to an array containing the lower triangular
95 * part of the i-th column of A which is owned by this
96 * processor, i = 0 to nvecsA.
97 *
98 * On exit, if *info = 0, the factor L from the Cholesky
99 * factorization A = L*L^(T).
100 *
101 * mapA (input) (Integer *) array, length (*n)
102 * mapA[i] = the id of the processor which owns column i of A,
103 * i = 0 to *n - 1.
104 *
105 * iscratch (integer workspace) (Integer *) array, length (3 n + 2)
106 *
107 * scratch (DoublePrecision workspace) (DoublePrecision *) array,
108 * length ( MAX{ n+1, mxlbuf_() / sizeof(DoublePrecision) + 1})
109 *
110 * info (output) (Integer *) scaler
111 * = 0: successful exit
112 * < 0: if info = -k, the k-th argument had an illegal value
113 * > 0: if info = k, 1<= k <= n, the leading minor of order
114 * k is not positive definite, and the factorization
115 * could not be completed.
116 *
117 *
118 *
119 * ==================================================================
120 */
121
122 /*
123 * Local Variables
124 */
125
126 char msg[30];
127 Integer linfo,k, i;
128 Integer me, nprocs, nvecsA;
129 Integer *myvecsA, *iwork;
130
131
132 /*
133 * External Procedures
134 */
135
136 extern Integer mxmynd_(), mxnprc_();
137 extern Integer count_list(), fil_mapvec_(), reduce_list(), reduce_list2();
138 extern void bbcast00();
139
140 void sub_chol0() ;
141 extern char *strcpy();
142
143 /*
144 * ---------------------------------------------------------------
145 * Executable Statements
146 * ---------------------------------------------------------------
147 */
148
149 /*
150 * Quick Return if possible.
151 */
152
153 strcpy ( msg, "Error in Choleski\n");
154
155 linfo = 0;
156 if ( info == NULL ) {
157 linfo = -6;
158 xerbla_( "Choleski \n", &linfo);
159 return;
160 }
161
162 *info = 0;
163
164 if ( n == NULL ) {
165 linfo = -1;
166 xerbla_( "Choleski \n", &linfo);
167 return;
168 }
169
170 if ( *n < 1) {
171 linfo = -1;
172 xerbla_( "Choleski \n", &linfo);
173 return;
174 }
175
176 /*
177 * Get this processor's id, and the number of allocated nodes.
178 */
179
180
181 me = mxmynd_();
182 nprocs = mxnprc_();
183
184 iwork = mapA;
185 for ( k = 0; k < *n; k++ ) {
186 if ( iwork++ == NULL ) {
187 linfo = -2;
188 xerbla_ ( "Choleski \n", &linfo);
189 return;
190 }
191 }
192
193 /*
194 * Test the input map array
195 */
196
197 for ( k = 0; k < *n; k++ ) {
198 i = mapA[k];
199 if (( i < 0 ) || ( i > nprocs - 1 )) {
200 linfo = -3;
201 xerbla_ ( "Choleski \n", &linfo);
202 return;
203 }
204 }
205
206 *iscratch = *n;
207 iwork = iscratch + 1;
208 for ( k = 0; k < *n; k++ )
209 *(iwork++) = mapA[k];
210
211 linfo = 0;
212 k = (*n + 1)*sizeof(Integer);
213 iwork = iscratch + *n + 1;
214 pxerbla2_ ( &k, (char *) iscratch, mapA, n, iwork, &linfo );
215
216 g_exit_ ( &linfo, "CHOLESKI: Mapping inconsistancies.\n", mapA, n, iwork, scratch);
217
218 /*
219 * Partition workspace.
220 */
221
222 myvecsA = iscratch;
223 nvecsA = fil_mapvec_( &me, n, mapA, myvecsA );
224 iwork = iscratch + nvecsA;
225
226 if (nvecsA < 1 ){
227 *info = -50;
228 return;
229 }
230
231 /*
232 * Factor matrix.
233 */
234
235 sub_chol0( me, *n, vecA, mapA, nvecsA, myvecsA, iwork, scratch, info );
236
237 return;
238 }
239
240
sub_chol0(me,n,col,map,ncols,mycols,i_scratch,scratch,info)241 void sub_chol0( me, n, col, map, ncols, mycols, i_scratch, scratch, info )
242 Integer me, n, ncols;
243 Integer *map, *mycols, *i_scratch, *info;
244 DoublePrecision *scratch;
245 DoublePrecision **col;
246 {
247
248 /*
249 * ==================================================================
250 *
251 * Submatrix-Cholesky factorization.
252 * Uses fan-out communication without send-ahead.
253 *
254 * The original code is due to Mike Heath. George Fann
255 * bastardized it for our code.
256 *
257 * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
258 * This procedure does no input error checking and should only be
259 * called via "choleski".
260 * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
261 *
262 * Workspace requirements:
263 * -----------------------
264 *
265 * Variable Type Length
266 * ----------- ---------- ------
267 * i_scratch (Integer *) 2*n+1
268 *
269 * scratch (DoublePrecision *) n+1
270 *
271 *
272 * On exit:
273 * --------
274 *
275 * *info = 0 means factorization finished successfully
276 * *info = k 1 <= k <= n, means the leading minor of order k is
277 * not positive definite, and the factorization could not
278 * be completed.
279 *
280 * ==================================================================
281 */
282
283 /*
284 * Local Variables
285 */
286
287 static Integer IONE = 1;
288 static DoublePrecision ZERO = 0.0e0, ONE = 1.0e0;
289
290 Integer i, j, k, m, indx, num_procs;
291 DoublePrecision t, *q;
292 long length, root;
293
294 /*
295 * External Procedures
296 */
297
298 extern void dscal_(), daxpy_(), dcopy_();
299 extern void chol_pipe_bcast();
300 extern Integer reduce_list2();
301 extern void bbcast00();
302
303 /*
304 extern void iCC_bcast(), iCC_work_init();
305 */
306
307 /*
308 * Intrinsic Procedures
309 */
310
311 /*
312 * ---------------------------------------------------------------
313 * Executable Statements
314 * ---------------------------------------------------------------
315 */
316
317 *info = 0;
318
319
320 #ifdef DEBUG1
321 for ( j = 0; j < n; j++ )
322 fprintf(stderr, " me = %d j = %d map %d \n", me, j, map[j]);
323 #endif
324
325
326 num_procs = reduce_list2( n, map, i_scratch );
327
328 #ifdef DEBUG1
329 for ( j = 0; j < num_procs; j++ )
330 fprintf(stderr, " me = %d j = %d map %d num_procs = %d \n", me, j,
331 map[j], num_procs);
332 #endif
333
334
335 j = 0;
336
337 /*
338 #ifdef Intel
339 i_random(num_procs, i_scratch, i_scratch+num_procs+1);
340 #endif
341 */
342
343 /*
344 iCC_work_init(( unsigned long ) n);
345 */
346
347 for ( k = 0; k < n; k++ )
348 {
349 m = n - k;
350 if ( map[ k ] == me )
351 {
352 /*
353 * Check for postive definiteness and scale column k.
354 */
355
356 if ( *col[ j ] <= ZERO ){
357 scratch[ m ] = ONE;
358 }
359 else
360 {
361 t = ONE/sqrt( *col[ j ] );
362 dscal_( &m, &t, col[ j ], &IONE );
363 dcopy_( &m, col[ j ], &IONE, scratch, &IONE );
364 j++;
365 scratch[ m ] = ZERO;
366 }
367 }
368
369 /*
370 * Distribute new column k, and info.
371 */
372
373 i = (m+1) * sizeof(DoublePrecision);
374 length = (long) i;
375 root = (long) map[k];
376
377 if ( k > -1 ) {
378 /*
379 iCC_bcast( scratch, length, root );
380 */
381 bbcast00((char *) scratch, i, k, map[k], num_procs, i_scratch );
382 }
383 else
384 chol_pipe_bcast( (char *) scratch, i, k, map[ k ], n - k, &map[ k ], i_scratch );
385
386 if ( scratch[ m ] != ZERO )
387 {
388 *info = k + 1;
389 break;
390 }
391
392 /*
393 * Update the rest of the matrix.
394 */
395
396 for ( i = j; i < ncols; i++ )
397 {
398 indx = mycols[ i ];
399 q = scratch + indx - k;
400 m = n - indx;
401 t = -(*q);
402 daxpy_( &m, &t, q, &IONE, col[ i ], &IONE );
403 }
404 }
405
406 /*
407 * Global check of info. Just pipeline info from owner of column,
408 * n-1, who always knows the correct final value of info, to the
409 * rest of the processors in "map".
410 */
411
412 /*
413 chol_pipe_bcast( info, sizeof(Integer), n+1, map[ n-1 ], n, map, i_scratch );
414 */
415
416 num_procs = reduce_list2( n, map, i_scratch );
417 #ifdef MESH
418 i_random(num_procs, i_scratch, i_scratch+num_procs+1);
419 #endif
420 bbcast00( (char *) info, sizeof(Integer), n+1, map[n-1], num_procs, i_scratch );
421
422 /*
423 length = (long) sizeof(Integer);
424 root = (long) map[0];
425 iCC_bcast( info, length, root );
426 */
427
428 return;
429 }
430
431
432