1 /*
2  $Id$
3  *======================================================================
4  *
5  * DISCLAIMER
6  *
7  * This material was prepared as an account of work sponsored by an
8  * agency of the United States Government.  Neither the United States
9  * Government nor the United States Department of Energy, nor Battelle,
10  * nor any of their employees, MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
11  * ASSUMES ANY LEGAL LIABILITY OR RESPONSIBILITY FOR THE ACCURACY,
12  * COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, APPARATUS, PRODUCT,
13  * SOFTWARE, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT
14  * INFRINGE PRIVATELY OWNED RIGHTS.
15  *
16  * ACKNOWLEDGMENT
17  *
18  * This software and its documentation were produced with Government
19  * support under Contract Number DE-AC06-76RLO-1830 awarded by the United
20  * States Department of Energy.  The Government retains a paid-up
21  * non-exclusive, irrevocable worldwide license to reproduce, prepare
22  * derivative works, perform publicly and display publicly by or for the
23  * Government, including the right to distribute to other Government
24  * contractors.
25  *
26  *======================================================================
27  *
28  *  -- PEIGS  routine (version 2.1) --
29  *     Pacific Northwest Laboratory
30  *     July 28, 1995
31  *
32  *======================================================================
33  */
34 #include <stdio.h>
35 #include <math.h>
36 
37 #include "globalp.c.h"
38 
choleski(n,vecA,mapA,iscratch,scratch,info)39 void choleski( n, vecA, mapA, iscratch, scratch, info )
40 
41      Integer            *n, mapA[], iscratch[], *info;
42 
43      DoublePrecision         scratch[];
44 
45      DoublePrecision        **vecA;
46 {
47 
48   /*
49    *  ==================================================================
50    *
51    *
52    *  The original code is due to Mike Heath.  George Fann
53    *  bastardized it for our code.  We thank Mike
54    *  for his generosity.  Any bugs and problems are introduced
55    *  by us.
56    *
57    *
58    *  Currently, this procedure only handles the case where the
59    *  lower triangular part of A is distributed to processors by columns.
60    *
61    *
62    *  Purpose
63    *  =======
64    *
65    *  choleski computes the Cholesky factorization of a real symmetric
66    *  positive definite matrix A in packed storage format.
67    *
68    *  The factorization has the form
69    *     A = L  * L'
70    *  where L is lower triangular.  On output the matrix A is overwritten by
71    *   the Choleski factor L.
72    *
73    *
74    *  Arguments
75    *  =========
76    *
77    *  In the following let:
78    *
79    *     me     = The id of this processor
80    *
81    *     nvecsA = The number of columns of A owned by this processor.
82    *              In particular, nvecsA = count_list(me, mapA, n).
83    *
84    *     nprocsA = Number of distinct processor id's in "mapA".  In
85    *               particular, nprocs = reduce_list( n, mapA, iscratch )
86    *
87    *  n       (input) (Integer *) scaler
88    *          The order of the matrix A.  n >= 0.
89    *
90    *  vecA    (input/output) (DoublePrecision **) array, length (nvecsA)
91    *          On entry, the portion of the lower triangular part of the
92    *          symmetric matrix A owned by this processor.
93    *
94    *          vecA[i] points to an array containing the lower triangular
95    *          part of the i-th column of A which is owned by this
96    *          processor, i = 0 to nvecsA.
97    *
98    *          On exit, if *info = 0, the factor L from the Cholesky
99    *          factorization A = L*L^(T).
100    *
101    *  mapA    (input) (Integer *) array, length (*n)
102    *          mapA[i] = the id of the processor which owns column i of A,
103    *          i = 0 to *n - 1.
104    *
105    *  iscratch  (integer workspace) (Integer *) array, length (3 n + 2)
106    *
107    *  scratch   (DoublePrecision workspace) (DoublePrecision *) array,
108    *            length ( MAX{ n+1, mxlbuf_() / sizeof(DoublePrecision) + 1})
109    *
110    *  info      (output) (Integer *) scaler
111    *            = 0: successful exit
112    *            < 0: if info = -k, the k-th argument had an illegal value
113    *            > 0: if info = k, 1<= k <= n, the leading minor of order
114    *                 k is not positive definite, and the factorization
115    *                 could not be completed.
116    *
117    *
118    *
119    *  ==================================================================
120    */
121 
122   /*
123    *  Local Variables
124    */
125 
126   char            msg[30];
127   Integer             linfo,k, i;
128   Integer             me, nprocs, nvecsA;
129   Integer            *myvecsA, *iwork;
130 
131 
132   /*
133    *  External Procedures
134    */
135 
136   extern Integer      mxmynd_(), mxnprc_();
137   extern Integer      count_list(), fil_mapvec_(), reduce_list(), reduce_list2();
138   extern void bbcast00();
139 
140   void     sub_chol0() ;
141   extern char    *strcpy();
142 
143   /*
144    *  ---------------------------------------------------------------
145    *                      Executable Statements
146    *  ---------------------------------------------------------------
147    */
148 
149   /*
150    *  Quick Return if possible.
151    */
152 
153   strcpy ( msg, "Error in Choleski\n");
154 
155   linfo = 0;
156   if ( info == NULL ) {
157     linfo = -6;
158     xerbla_( "Choleski \n", &linfo);
159     return;
160   }
161 
162   *info = 0;
163 
164   if ( n == NULL ) {
165     linfo = -1;
166     xerbla_( "Choleski \n", &linfo);
167     return;
168   }
169 
170  if ( *n < 1) {
171     linfo = -1;
172     xerbla_( "Choleski \n", &linfo);
173     return;
174   }
175 
176   /*
177    *  Get this processor's id, and the number of allocated nodes.
178    */
179 
180 
181   me     = mxmynd_();
182   nprocs = mxnprc_();
183 
184   iwork = mapA;
185   for ( k = 0; k < *n; k++ ) {
186     if ( iwork++ == NULL ) {
187       linfo = -2;
188       xerbla_ ( "Choleski \n", &linfo);
189       return;
190     }
191   }
192 
193   /*
194    *     Test the input map array
195    */
196 
197   for ( k = 0; k < *n; k++ ) {
198     i = mapA[k];
199     if (( i  < 0 ) ||   ( i > nprocs - 1 )) {
200       linfo = -3;
201       xerbla_ ( "Choleski \n", &linfo);
202       return;
203     }
204   }
205 
206   *iscratch = *n;
207   iwork = iscratch + 1;
208   for ( k = 0; k < *n; k++ )
209     *(iwork++) = mapA[k];
210 
211   linfo = 0;
212   k = (*n + 1)*sizeof(Integer);
213   iwork = iscratch + *n + 1;
214   pxerbla2_ ( &k, (char *) iscratch, mapA, n, iwork, &linfo );
215 
216   g_exit_ ( &linfo, "CHOLESKI: Mapping inconsistancies.\n", mapA, n, iwork, scratch);
217 
218   /*
219    *  Partition workspace.
220    */
221 
222   myvecsA = iscratch;
223   nvecsA = fil_mapvec_( &me, n, mapA, myvecsA );
224   iwork   = iscratch + nvecsA;
225 
226   if (nvecsA < 1 ){
227     *info = -50;
228      return;
229   }
230 
231   /*
232    *  Factor matrix.
233    */
234 
235   sub_chol0( me, *n, vecA, mapA, nvecsA, myvecsA, iwork, scratch, info );
236 
237   return;
238 }
239 
240 
sub_chol0(me,n,col,map,ncols,mycols,i_scratch,scratch,info)241 void sub_chol0( me, n, col, map, ncols, mycols, i_scratch, scratch, info )
242      Integer             me, n, ncols;
243      Integer            *map, *mycols, *i_scratch, *info;
244      DoublePrecision         *scratch;
245      DoublePrecision        **col;
246 {
247 
248   /*
249    *  ==================================================================
250    *
251    *  Submatrix-Cholesky factorization.
252    *  Uses fan-out communication without send-ahead.
253    *
254    *  The original code is due to Mike Heath.  George Fann
255    *  bastardized it for our code.
256    *
257    *  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
258    *  This procedure does no input error checking and should only be
259    *  called via "choleski".
260    *  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
261    *
262    *  Workspace requirements:
263    *  -----------------------
264    *
265    *     Variable       Type       Length
266    *    -----------  ----------    ------
267    *    i_scratch     (Integer *)   2*n+1
268    *
269    *    scratch       (DoublePrecision *)    n+1
270    *
271    *
272    *  On exit:
273    *  --------
274    *
275    *    *info = 0  means factorization finished successfully
276    *    *info = k  1 <= k <= n, means the leading minor of order k is
277    *               not positive definite, and the factorization could not
278    *               be completed.
279    *
280    *  ==================================================================
281    */
282 
283   /*
284     *  Local Variables
285     */
286 
287    static Integer      IONE = 1;
288    static DoublePrecision   ZERO = 0.0e0, ONE = 1.0e0;
289 
290    Integer             i, j, k, m, indx, num_procs;
291    DoublePrecision          t, *q;
292    long length, root;
293 
294    /*
295     *  External Procedures
296     */
297 
298    extern void     dscal_(), daxpy_(), dcopy_();
299    extern void     chol_pipe_bcast();
300    extern Integer  reduce_list2();
301    extern void bbcast00();
302 
303 /*
304    extern void iCC_bcast(), iCC_work_init();
305 */
306 
307    /*
308     *  Intrinsic Procedures
309     */
310 
311    /*
312     *  ---------------------------------------------------------------
313     *                      Executable Statements
314     *  ---------------------------------------------------------------
315     */
316 
317   *info = 0;
318 
319 
320 #ifdef DEBUG1
321    for ( j = 0; j < n; j++ )
322      fprintf(stderr, " me = %d j = %d map %d \n", me, j, map[j]);
323 #endif
324 
325 
326    num_procs = reduce_list2( n, map, i_scratch );
327 
328 #ifdef DEBUG1
329    for ( j = 0; j < num_procs; j++ )
330      fprintf(stderr, " me = %d j = %d map %d num_procs = %d \n", me, j,
331 	     map[j], num_procs);
332 #endif
333 
334 
335    j = 0;
336 
337    /*
338      #ifdef Intel
339      i_random(num_procs, i_scratch, i_scratch+num_procs+1);
340      #endif
341      */
342 
343    /*
344      iCC_work_init(( unsigned long ) n);
345      */
346 
347    for ( k = 0; k < n; k++ )
348      {
349        m = n - k;
350        if ( map[ k ] == me )
351 	 {
352 	   /*
353 	    *  Check for postive definiteness and scale column k.
354 	    */
355 
356 	   if ( *col[ j ] <= ZERO ){
357 	     scratch[ m ] = ONE;
358 	   }
359 	   else
360 	     {
361 	       t = ONE/sqrt( *col[ j ] );
362 	       dscal_( &m, &t, col[ j ], &IONE );
363 	       dcopy_( &m, col[ j ], &IONE, scratch, &IONE );
364 	       j++;
365 	       scratch[ m ] = ZERO;
366 	     }
367 	 }
368 
369        /*
370 	*  Distribute new column k, and info.
371 	*/
372 
373        i = (m+1) * sizeof(DoublePrecision);
374        length = (long) i;
375        root = (long) map[k];
376 
377        if ( k > -1 ) {
378 	 /*
379 	   iCC_bcast( scratch, length, root );
380 	   */
381 	 bbcast00((char *) scratch, i, k, map[k], num_procs, i_scratch );
382        }
383        else
384 	 chol_pipe_bcast( (char *) scratch, i, k, map[ k ], n - k, &map[ k ], i_scratch );
385 
386        if ( scratch[ m ] != ZERO )
387 	 {
388 	   *info = k + 1;
389 	   break;
390 	 }
391 
392        /*
393 	*  Update the rest of the matrix.
394 	*/
395 
396        for ( i = j; i < ncols; i++ )
397 	 {
398 	   indx = mycols[ i ];
399 	   q    = scratch + indx - k;
400 	   m    = n - indx;
401 	   t    = -(*q);
402 	   daxpy_( &m, &t, q, &IONE, col[ i ], &IONE );
403 	 }
404      }
405 
406    /*
407     *  Global check of info.  Just pipeline info from owner of column,
408     *  n-1, who always knows the correct final value of info, to the
409     *  rest of the processors in "map".
410     */
411 
412    /*
413      chol_pipe_bcast( info, sizeof(Integer), n+1, map[ n-1 ], n, map, i_scratch );
414      */
415 
416    num_procs = reduce_list2( n, map, i_scratch );
417 #ifdef MESH
418    i_random(num_procs, i_scratch, i_scratch+num_procs+1);
419 #endif
420    bbcast00( (char *) info, sizeof(Integer), n+1, map[n-1], num_procs, i_scratch );
421 
422    /*
423      length = (long) sizeof(Integer);
424      root = (long) map[0];
425      iCC_bcast( info, length, root );
426      */
427 
428    return;
429  }
430 
431 
432