1 #include "ICFS.h"
2 #include <cmath>
3 #include <algorithm>
4 
5 #ifdef USE_OPENMP
6 #include <omp.h>
7 #endif
8 
9 #include "ICFS.h"
10 
11 /*****************************************************************
12 
13 COPYRIGHT NOTIFICATION
14 
15 This program discloses material protectable under copyright laws of
16 the United States. Permission to copy and modify this software and its
17 documentation for internal research use is hereby granted, provided
18 that this notice is retained thereon and on all copies or modifications.
19 The University of Chicago makes no representations as to the suitability
20 and operability of this software for any purpose.
21 It is provided "as is" without express or implied warranty.
22 
23 Use of this software for commercial purposes is expressly prohibited
24 without contacting
25 
26 Jorge J. More'
27 Mathematics and Computer Science Division
28 Argonne National Laboratory
29 9700 S. Cass Ave.
30 Argonne, Illinois 60439-4844
31 e-mail: more@mcs.anl.gov
32 
33 Argonne National Laboratory with facilities in the states of
34 Illinois and Idaho, is owned by The United States Government, and
35 operated by the University of Chicago under provision of a contract
36 with the Department of Energy.
37 
38 *****************************************************************/
39 
dicfs_(int * n,int * nnz,double * a,double * adiag,int * acol_ptr__,int * arow_ind__,double * l,double * ldiag,int * lcol_ptr__,int * lrow_ind__,int * p,double * alpha,int * iwa,double * wa1,double * wa2)40 /* Subroutine */int dicfs_(int *n, int *nnz, double *a, double *adiag,
41                                                    int *acol_ptr__, int *arow_ind__, double *l, double *ldiag,
42                                                    int *lcol_ptr__, int * lrow_ind__, int *p, double *alpha, int *iwa,
43                                                    double * wa1, double *wa2)
44 {
45         /* System generated locals */
46         int i__1, i__2;
47         double d__1/*, d__2, d__3*/;
48 
49         /* Local variables */
50         static int i__, j, k, nb;
51         static int info;
52         static double alphas;
53 
54         /*     ********* */
55 
56         /*     Subroutine dicfs */
57 
58         /*     Given a symmetric matrix A in compressed column storage, this */
59         /*     subroutine computes an incomplete Cholesky factor of A + alpha*D, */
60         /*     where alpha is a shift and D is the diagonal matrix with entries */
61         /*     set to the l2 norms of the columns of A. */
62 
63         /*     The subroutine statement is */
64 
65         /*       subroutine dicfs(n,nnz,a,adiag,acol_ptr,arow_ind, */
66         /*                        l,ldiag,lcol_ptr,lrow_ind, */
67         /*                        p,alpha,iwa,wa1,wa2) */
68 
69         /*     where */
70 
71         /*       n is an int variable. */
72         /*         On entry n is the order of A. */
73         /*         On exit n is unchanged. */
74 
75         /*       nnz is an int variable. */
76         /*         On entry nnz is the number of nonzeros in the strict lower */
77         /*            triangular part of A. */
78         /*         On exit nnz is unchanged. */
79 
80         /*       a is a double precision array of dimension nnz. */
81         /*         On entry a must contain the strict lower triangular part */
82         /*            of A in compressed column storage. */
83         /*         On exit a is unchanged. */
84 
85         /*       adiag is a double precision array of dimension n. */
86         /*         On entry adiag must contain the diagonal elements of A. */
87         /*         On exit adiag is unchanged. */
88 
89         /*       acol_ptr is an int array of dimension n + 1. */
90         /*         On entry acol_ptr must contain pointers to the columns of A. */
91         /*            The nonzeros in column j of A must be in positions */
92         /*            acol_ptr(j), ... , acol_ptr(j+1) - 1. */
93         /*         On exit acol_ptr is unchanged. */
94 
95         /*       arow_ind is an int array of dimension nnz. */
96         /*         On entry arow_ind must contain row indices for the strict */
97         /*            lower triangular part of A in compressed column storage. */
98         /*         On exit arow_ind is unchanged. */
99 
100         /*       l is a double precision array of dimension nnz+n*p. */
101         /*         On entry l need not be specified. */
102         /*         On exit l contains the strict lower triangular part */
103         /*            of L in compressed column storage. */
104 
105         /*       ldiag is a double precision array of dimension n. */
106         /*         On entry ldiag need not be specified. */
107         /*         On exit ldiag contains the diagonal elements of L. */
108 
109         /*       lcol_ptr is an int array of dimension n + 1. */
110         /*         On entry lcol_ptr need not be specified. */
111         /*         On exit lcol_ptr contains pointers to the columns of L. */
112         /*            The nonzeros in column j of L are in the */
113         /*            lcol_ptr(j), ... , lcol_ptr(j+1) - 1 positions of l. */
114 
115         /*       lrow_ind is an int array of dimension nnz+n*p. */
116         /*         On entry lrow_ind need not be specified. */
117         /*         On exit lrow_ind contains row indices for the strict lower */
118         /*            triangular part of L in compressed column storage. */
119 
120         /*       p is an int variable. */
121         /*         On entry p specifes the amount of memory available for the */
122         /*            incomplete Cholesky factorization. */
123         /*         On exit p is unchanged. */
124 
125         /*       alpha is a double precision variable. */
126         /*         On entry alpha is the initial guess of the shift. */
127         /*         On exit alpha is final shift */
128 
129         /*       iwa is an int work array of dimesnion 3*n. */
130 
131         /*       wa1 is a double precision work array of dimension n. */
132 
133         /*       wa2 is a double precision work array of dimension n. */
134 
135         /*     Subprograms called */
136 
137         /*         MINPACK-2  ......  dicf */
138 
139         /*     MINPACK-2 Project. October 1998. */
140         /*     Argonne National Laboratory. */
141         /*     Chih-Jen Lin and Jorge J. More'. */
142 
143         /*     ********** */
144         /*     Compute the l2 norms of the columns of A. */
145         /* Parameter adjustments */
146         --wa2;
147         --wa1;
148         --iwa;
149         --lcol_ptr__;
150         --ldiag;
151         --acol_ptr__;
152         --adiag;
153         --arow_ind__;
154         --a;
155         --lrow_ind__;
156         --l;
157 
158         /* Function Body */
159         i__1 = *n;
160 #ifdef USE_OPENMP
161 #pragma omp parallel for private(i__)
162 #endif
163         for (i__ = 1; i__ <= i__1; ++i__)
164         {
165                 /* Computing 2nd power */
166                 wa1[i__] = adiag[i__] * adiag[i__];
167         }
168         i__1 = *n;
169         for (j = 1; j <= i__1; ++j)
170         {
171                 i__2 = acol_ptr__[j + 1] - 1;
172                 for (i__ = acol_ptr__[j]; i__ <= i__2; ++i__)
173                 {
174                         k = arow_ind__[i__];
175                         /* Computing 2nd power */
176                         d__1 = a[i__] * a[i__];
177                         wa1[j] += d__1;
178                         wa1[k] += d__1;
179                 }
180         }
181         i__1 = *n;
182 #ifdef USE_OPENMP
183 #pragma omp parallel for private(j)
184 #endif
185         for (j = 1; j <= i__1; ++j)
186         {
187                 wa1[j] = std::sqrt(wa1[j]);
188         }
189         /*     Compute the scaling matrix D. */
190         i__1 = *n;
191         std::fill(&wa2[1], &wa2[i__1 + 1], 1.0);
192 #ifdef USE_OPENMP
193 #pragma omp parallel for private(i__)
194 #endif
195         for (i__ = 1; i__ <= i__1; ++i__)
196         {
197                 if (wa1[i__] > 0.)
198                 {
199                         wa2[i__] = 1. / std::sqrt(wa1[i__]);
200                 }
201         }
202         /*     Determine a lower bound for the step. */
203         if (*alpha <= 0.)
204         {
205                 alphas = .001;
206         }
207         else
208         {
209                 alphas = *alpha;
210         }
211         /*     Compute the initial shift. */
212         *alpha = 0.;
213         i__1 = *n;
214         for (i__ = 1; i__ <= i__1; ++i__)
215         {
216                 if (adiag[i__] == 0.)
217                 {
218                         *alpha = alphas;
219                 }
220                 else
221                 {
222                         *alpha = std::max<double>(*alpha, -adiag[i__] * (wa2[i__]
223                         * wa2[i__]));
224                 }
225         }
226         if (*alpha > 0.)
227         {
228                 *alpha = std::max<double>(*alpha, alphas);
229         }
230         /*     Search for an acceptable shift. During the search we decrease */
231         /*     the lower bound alphas until we Determine a lower bound that */
232         /*     is not acceptable. We then increase the shift. */
233         /*     The lower bound is decreased by nbfactor at most nbmax times. */
234         nb = 1;
235         for(;;)
236         {
237                 /*        Copy the sparsity structure of A into L. */
238                 i__1 = *n + 1;
239                 std::copy(&acol_ptr__[1], &acol_ptr__[i__1 + 1], &lcol_ptr__[1]);
240                 i__1 = *nnz;
241                 std::copy(&arow_ind__[1], &arow_ind__[i__1 + 1], &lrow_ind__[1]);
242                 /*        Scale A and store in the lower triangular matrix L. */
243                 i__1 = *n;
244 #ifdef USE_OPENMP
245 #pragma omp parallel for private(j)
246 #endif
247                 for (j = 1; j <= i__1; ++j)
248                 {
249                         /* Computing 2nd power */
250                         ldiag[j] = adiag[j] * (wa2[j] * wa2[j]) + *alpha;
251                 }
252                 i__1 = *n;
253 #ifdef USE_OPENMP
254 #pragma omp parallel for private(j)
255 #endif
256                 for (j = 1; j <= i__1; ++j)
257                 {
258 #ifdef USE_OPENMP
259 #pragma omp parallel for private(i__)
260 #endif
261                         for (i__ = acol_ptr__[j]; i__ <= acol_ptr__[j + 1] - 1; ++i__)
262                         {
263                                 l[i__] = a[i__] * wa2[j] * wa2[arow_ind__[i__]];
264                         }
265                 }
266                 /*        Attempt an incomplete factorization. */
267                 dicf_(n, nnz, &l[1], &ldiag[1], &lcol_ptr__[1], &lrow_ind__[1], p,
268                         &info, &iwa[1], &iwa[*n + 1], &iwa[(*n << 1) + 1], &wa1[1]);
269                 /*        If the factorization exists, then test for termination. */
270                 /*        Otherwise increment the shift. */
271                 if (info >= 0)
272                 {
273                         /*           If the shift is at the lower bound, reduce the shift. */
274                         /*           Otherwise undo the scaling of L and exit. */
275                         if (*alpha == alphas && nb < 3)
276                         {
277                                 alphas /= 512.;
278                                 *alpha = alphas;
279                                 ++nb;
280                         }
281                         else
282                         {
283                                 i__1 = *n;
284 #ifdef USE_OPENMP
285 #pragma omp parallel for private(i__)
286 #endif
287                                 for (i__ = 1; i__ <= i__1; ++i__)
288                                 {
289                                         ldiag[i__] /= wa2[i__];
290                                 }
291                                 i__1 = lcol_ptr__[*n + 1] - 1;
292 #ifdef USE_OPENMP
293 #pragma omp parallel for private(j)
294 #endif
295                                 for (j = 1; j <= i__1; ++j)
296                                 {
297                                         l[j] /= wa2[lrow_ind__[j]];
298                                 }
299                                 return 0;
300                         }
301                 }
302                 else
303                 {
304                         /* Computing std::max<double> */
305                         d__1 = *alpha * 2.;
306                         *alpha = std::max<double>(d__1, alphas);
307                 }
308         }
309         return 0;
310 } /* dicfs_ */
311 
312 /* Subroutine */
dicf_(int * n,int * nnz,double * a,double * diag,int * col_ptr__,int * row_ind__,int * p,int * info,int * indr,int * indf,int * list,double * w)313 int dicf_(int *n, int *nnz, double *a, double *diag, int *col_ptr__,
314                   int *row_ind__, int *p, int *info, int *indr, int *indf, int *list,
315                   double *w)
316 {
317         /* System generated locals */
318         int i__1, i__2;
319         //double d__1;
320 
321         /* Local variables */
322         static int i__, j, k, ip, np, iej, iek, mlj, nlj, isj, kth, isk;
323         static double lval;
324         static int newk;
325         static int newiej, newisj;
326 
327         /*     ********* */
328 
329         /*     Subroutine dicf */
330 
331         /*     Given a sparse symmetric matrix A in compressed row storage, */
332         /*     this subroutine computes an incomplete Cholesky factorization. */
333 
334         /*     Implementation of dicf is based on the Jones-Plassmann code. */
335         /*     Arrays indf and list define the data structure. */
336         /*     At the beginning of the computation of the j-th column, */
337 
338         /*       For k < j, indf(k) is the index of A for the first */
339         /*       nonzero l(i,k) in the k-th column with i >= j. */
340 
341         /*       For k < j, list(i) is a pointer to a linked list of column */
342         /*       indices k with i = row_ind(indf(k)). */
343 
344         /*     For the computation of the j-th column, the array indr records */
345         /*     the row indices. Hence, if nlj is the number of nonzeros in the */
346         /*     j-th column, then indr(1),...,indr(nlj) are the row indices. */
347         /*     Also, for i > j, indf(i) marks the row indices in the j-th */
348         /*     column so that indf(i) = 1 if l(i,j) is not zero. */
349 
350         /*     The subroutine statement is */
351 
352         /*       subroutine dicf(n,nnz,a,diag,col_ptr,row_ind,p,info, */
353         /*                       indr,indf,list,w) */
354 
355         /*     where */
356 
357         /*       n is an int variable. */
358         /*         On entry n is the order of A. */
359         /*         On exit n is unchanged. */
360 
361         /*       nnz is an int variable. */
362         /*         On entry nnz is the number of nonzeros in the strict lower */
363         /*            triangular part of A. */
364         /*         On exit nnz is unchanged. */
365 
366         /*       a is a double precision array of dimension nnz+n*p. */
367         /*         On entry the first nnz entries of a must contain the strict */
368         /*            lower triangular part of A in compressed column storage. */
369         /*         On exit a contains the strict lower triangular part */
370         /*            of L in compressed column storage. */
371 
372         /*       diag is a double precision array of dimension n. */
373         /*         On entry diag must contain the diagonal elements of A. */
374         /*         On exit diag contains the diagonal elements of L. */
375 
376         /*       col_ptr is an int array of dimension n + 1. */
377         /*         On entry col_ptr must contain pointers to the columns of A. */
378         /*            The nonzeros in column j of A must be in positions */
379         /*            col_ptr(j), ... , col_ptr(j+1) - 1. */
380         /*         On exit col_ptr contains pointers to the columns of L. */
381         /*            The nonzeros in column j of L are in the */
382         /*            col_ptr(j), ... , col_ptr(j+1) - 1 positions of l. */
383 
384         /*       row_ind is an int array of dimension nnz+n*p. */
385         /*         On entry row_ind must contain row indices for the strict */
386         /*            lower triangular part of A in compressed column storage. */
387         /*         On exit row_ind contains row indices for the strict lower */
388         /*            triangular part of L in compressed column storage. */
389 
390         /*       p is an int variable. */
391         /*         On entry p specifes the amount of memory available for the */
392         /*            incomplete Cholesky factorization. */
393         /*         On exit p is unchanged. */
394 
395         /*       info is an int variable. */
396         /*         On entry info need not be specified. */
397         /*         On exit info = 0 if the factorization succeeds, and */
398         /*            info < 0 if the -info pivot is not positive. */
399 
400         /*       indr is an int work array of dimension n. */
401 
402         /*       indf is an int work array of dimension n. */
403 
404         /*       list is an int work array of dimension n. */
405 
406         /*       w is a double precision work array of dimension n. */
407 
408         /*     Subprograms called */
409 
410         /*       MINPACK-2  ......  dsel2, ihsort, insort */
411 
412         /*       Level 1 BLAS  ...  daxpy, dcopy, ddot, dnrm2 */
413 
414         /*     MINPACK-2 Project. May 1998. */
415         /*     Argonne National Laboratory. */
416         /*     Chih-Jen Lin and Jorge J. More'. */
417 
418         /*     ********** */
419         /* Parameter adjustments */
420         --w;
421         --list;
422         --indf;
423         --indr;
424         --col_ptr__;
425         --diag;
426         --a;
427         --row_ind__;
428 
429         /* Function Body */
430         *info = 0;
431         i__1 = *n;
432         std::fill(&indf[1], &indf[i__1 + 1], 0);
433         std::fill(&list[1], &list[i__1 + 1], 0);
434         /*     Make room for L by moving A to the last n*p positions in a. */
435         np = *n * *p;
436         i__1 = *n + 1;
437 #ifdef USE_OPENMP
438 #pragma omp parallel for private(j)
439 #endif
440         for (j = 1; j <= i__1; ++j)
441         {
442                 col_ptr__[j] += np;
443         }
444         for (j = *nnz; j >= 1; --j)
445         {
446                 row_ind__[np + j] = row_ind__[j];
447                 a[np + j] = a[j];
448         }
449         /*     Compute the incomplete Cholesky factorization. */
450         isj = col_ptr__[1];
451         col_ptr__[1] = 1;
452         i__1 = *n;
453         for (j = 1; j <= i__1; ++j)
454         {
455                 /*        Load column j into the array w. The first and last elements */
456                 /*        of the j-th column of A are a(isj) and a(iej). */
457                 nlj = 0;
458                 iej = col_ptr__[j + 1] - 1;
459                 i__2 = iej;
460                 for (ip = isj; ip <= i__2; ++ip)
461                 {
462                         i__ = row_ind__[ip];
463                         w[i__] = a[ip];
464                         ++nlj;
465                         indr[nlj] = i__;
466                         indf[i__] = 1;
467                 }
468                 /*        Exit if the current pivot is not positive. */
469                 if (diag[j] <= 0.)
470                 {
471                         *info = -j;
472                         return 0;
473                 }
474                 diag[j] = std::sqrt(diag[j]);
475                 /*        Update column j using the previous columns. */
476                 k = list[j];
477                 while (k != 0)
478                 {
479                         isk = indf[k];
480                         iek = col_ptr__[k + 1] - 1;
481                         /*           Set lval to l(j,k). */
482                         lval = a[isk];
483                         /*           Update indf and list. */
484                         newk = list[k];
485                         ++isk;
486                         if (isk < iek)
487                         {
488                                 indf[k] = isk;
489                                 list[k] = list[row_ind__[isk]];
490                                 list[row_ind__[isk]] = k;
491                         }
492                         k = newk;
493                         /*           Compute the update a(i,i) <- a(i,j) - l(i,k)*l(j,k). */
494                         /*           In this loop we pick up l(i,k) for k < j and i > j. */
495                         i__2 = iek;
496                         for (ip = isk; ip <= i__2; ++ip)
497                         {
498                                 i__ = row_ind__[ip];
499                                 if (indf[i__] != 0)
500                                 {
501                                         w[i__] -= lval * a[ip];
502                                 }
503                                 else
504                                 {
505                                         indf[i__] = 1;
506                                         ++nlj;
507                                         indr[nlj] = i__;
508                                         w[i__] = -lval * a[ip];
509                                 }
510                         }
511                 }
512                 /*        Compute the j-th column of L. */
513                 i__2 = nlj;
514 #ifdef USE_OPENMP
515 #pragma omp parallel for private(k)
516 #endif
517                 for (k = 1; k <= i__2; ++k)
518                 {
519                         w[indr[k]] /= diag[j];
520                 }
521                 /*        Set mlj to the number of nonzeros to be retained. */
522                 /* Computing min */
523                 i__2 = iej - isj + 1 + *p;
524                 mlj = std::min<int>(i__2, nlj);
525                 kth = nlj - mlj + 1;
526                 if (nlj >= 1)
527                 {
528                         /*           Determine the kth smallest elements in the current */
529                         /*           column, and hence, the largest mlj elements. */
530                         dsel2_(&nlj, &w[1], &indr[1], &kth);
531                         /*           Sort the row indices of the selected elements. Insertion */
532                         /*           sort is used for small arrays, and heap sort for larger */
533                         /*           arrays. The sorting of the row indices is required so that */
534                         /*           we can retrieve l(i,k) with i > k from indf(k). */
535                         if (mlj <= 20)
536                         {
537                                 insort_(&mlj, &indr[kth]);
538                         }
539                         else
540                         {
541                                 ihsort_(&mlj, &indr[kth]);
542                         }
543                 }
544                 /*        Store the largest elements in L. The first and last elements */
545                 /*        of the j-th column of L are a(newisj) and a(newiej). */
546                 newisj = col_ptr__[j];
547                 newiej = newisj + mlj - 1;
548                 i__2 = newiej;
549 #ifdef USE_OPENMP
550 #pragma omp parallel for private(k)
551 #endif
552                 for (k = newisj; k <= i__2; ++k)
553                 {
554                         a[k] = w[indr[k - newisj + kth]];
555                         row_ind__[k] = indr[k - newisj + kth];
556                 }
557                 /*        Update the diagonal elements. */
558                 i__2 = nlj;
559 #ifdef USE_OPENMP
560 #pragma omp parallel for private(k)
561 #endif
562                 for (k = kth; k <= i__2; ++k)
563                 {
564                         /* Computing 2nd power */
565                         diag[indr[k]] -= w[indr[k]] * w[indr[k]];
566                 }
567                 /*        Update indf and list for the j-th column. */
568                 if (newisj < newiej)
569                 {
570                         indf[j] = newisj;
571                         list[j] = list[row_ind__[newisj]];
572                         list[row_ind__[newisj]] = j;
573                 }
574                 /*        Clear out elements j+1,...,n of the array indf. */
575                 i__2 = nlj;
576 #ifdef USE_OPENMP
577 #pragma omp parallel for private(k)
578 #endif
579                 for (k = 1; k <= i__2; ++k)
580                 {
581                         indf[indr[k]] = 0;
582                 }
583                 /*        Update isj and col_ptr. */
584                 isj = col_ptr__[j + 1];
585                 col_ptr__[j + 1] = newiej + 1;
586         }
587         return 0;
588 } /* dicf_ */
589 
590 /* Subroutine */
dsel2_(int * n,double * x,int * keys,int * k)591 int dsel2_(int *n, double *x, int *keys, int *k)
592 {
593         /* System generated locals */
594         int i__1;
595         //double d__1, d__2;
596 
597         /* Local variables */
598         static int i__, l, m, p, u, p1, p2, p3, lc, lp;
599 
600         /*     ********** */
601 
602         /*     Subroutine dsel2 */
603 
604         /*     Given an array x, this subroutine permutes the elements of the */
605         /*     array keys so that */
606 
607         /*       abs(x(keys(i))) <= abs(x(keys(k))),  1 <= i <= k, */
608         /*       abs(x(keys(k))) <= abs(x(keys(i))),  k <= i <= n. */
609 
610         /*     In other words, the smallest k elements of x in absolute value are */
611         /*     x(keys(i)), i = 1,...,k, and x(keys(k)) is the kth smallest element. */
612 
613         /*     The subroutine statement is */
614 
615         /*       subroutine dsel2(n,x,keys,k) */
616 
617         /*     where */
618 
619         /*       n is an int variable. */
620         /*         On entry n is the number of keys. */
621         /*         On exit n is unchanged. */
622 
623         /*       x is a double precision array of length *. */
624         /*         On entry x is the array to be sorted. */
625         /*         On exit x is unchanged. */
626 
627         /*       keys is an int array of length n. */
628         /*         On entry keys is the array of indices for x. */
629         /*         On exit keys is permuted so that the smallest k elements */
630         /*            of x in absolute value are x(keys(i)), i = 1,...,k, and */
631         /*            x(keys(k)) is the kth smallest element. */
632 
633         /*       k is an int. */
634         /*         On entry k specifes the kth largest element. */
635         /*         On exit k is unchanged. */
636 
637         /*     MINPACK-2 Project. March 1998. */
638         /*     Argonne National Laboratory. */
639         /*     William D. Kastak, Chih-Jen Lin, and Jorge J. More'. */
640 
641         /*     Revised October 1999. Length of x was incorrectly set to n. */
642 
643         /*     ********** */
644         /* Parameter adjustments */
645         --keys;
646         --x;
647 
648         /* Function Body */
649         if (*n <= 1 || *k <= 0 || *k > *n)
650         {
651                 return 0;
652         }
653         u = *n;
654         l = 1;
655         lc = *n;
656         lp = *n << 1;
657         /*     Start of iteration loop. */
658         while (l < u)
659         {
660                 /*        Choose the partition as the median of the elements in */
661                 /*        positions l+s*(u-l) for s = 0, 0.25, 0.5, 0.75, 1. */
662                 /*        Move the partition element into position l. */
663                 p1 = (u + l * 3) / 4;
664                 p2 = (u + l) / 2;
665                 p3 = (u * 3 + l) / 4;
666                 /*        Order the elements in positions l and p1. */
667                 if (std::fabs(x[keys[l]]) > std::fabs(x[keys[p1]]))
668                 {
669                         std::swap(keys[l], keys[p1]);
670                 }
671                 /*        Order the elements in positions p2 and p3. */
672                 if (std::fabs(x[keys[p2]]) > std::fabs(x[keys[p3]]))
673                 {
674                         std::swap(keys[p2], keys[p3]);
675                 }
676                 /*        Swap the larger of the elements in positions p1 */
677                 /*        and p3, with the element in position u, and reorder */
678                 /*        the first two pairs of elements as necessary. */
679                 if (std::fabs(x[keys[p3]]) > std::fabs(x[keys[p1]]))
680                 {
681                         std::swap(keys[p3], keys[u]);
682                         if (std::fabs(x[keys[p2]]) > std::fabs(x[keys[p3]]))
683                         {
684                                 std::swap(keys[p2], keys[p3]);
685                         }
686                 }
687                 else
688                 {
689                         std::swap(keys[p1], keys[u]);
690                         if (std::fabs(x[keys[l]]) > std::fabs(x[keys[p1]]))
691                         {
692                                 std::swap(keys[l], keys[p1]);
693                         }
694                 }
695                 /*        If we define a(i) = abs(x(keys(i)) for i = 1,..., n, we have */
696                 /*        permuted keys so that */
697 
698                 /*          a(l) <= a(p1), a(p2) <= a(p3), std::max<double>(a(p1),a(p3)) <= a(u). */
699 
700                 /*        Find the third largest element of the four remaining */
701                 /*        elements (the median), and place in position l. */
702                 if (std::fabs(x[keys[p1]]) > std::fabs(x[keys[p3]]))
703                 {
704                         if (std::fabs(x[keys[l]]) <= std::fabs(x[keys[p3]]))
705                         {
706                                 std::swap(keys[l], keys[p3]);
707                         }
708                 }
709                 else
710                 {
711                         if (std::fabs(x[keys[p2]]) <= std::fabs(x[keys[p1]]))
712                         {
713                                 std::swap(keys[l], keys[p1]);
714                         }
715                         else
716                         {
717                                 std::swap(keys[l], keys[p2]);
718                         }
719                 }
720                 /*        Partition the array about the element in position l. */
721                 m = l;
722                 i__1 = u;
723                 for (i__ = l + 1; i__ <= i__1; ++i__)
724                 {
725                         if (std::fabs(x[keys[i__]]) < std::fabs(x[keys[l]]))
726                         {
727                                 ++m;
728                                 std::swap(keys[m], keys[i__]);
729                         }
730                 }
731                 /*        Move the partition element into position m. */
732                 std::swap(keys[l], keys[m]);
733                 /*        Adjust the values of l and u. */
734                 if (*k >= m)
735                 {
736                         l = m + 1;
737                 }
738                 if (*k <= m)
739                 {
740                         u = m - 1;
741                 }
742                 /*        Check for multiple medians if the length of the subarray */
743                 /*        has not decreased by 1/3 after two consecutive iterations. */
744                 if ((u - l) * 3 > lp << 1 && *k > m)
745                 {
746                         /*           Partition the remaining elements into those elements */
747                         /*           equal to x(m), and those greater than x(m). Adjust */
748                         /*           the values of l and u. */
749                         p = m;
750                         i__1 = u;
751                         for (i__ = m + 1; i__ <= i__1; ++i__)
752                         {
753                                 if (std::fabs(x[keys[i__]]) == std::fabs(x[keys[m]]))
754                                 {
755                                         ++p;
756                                         std::swap(keys[p], keys[i__]);
757                                 }
758                         }
759                         l = p + 1;
760                         if (*k <= p)
761                         {
762                                 u = p - 1;
763                         }
764                 }
765                 /*        Update the length indicators for the subarray. */
766                 lp = lc;
767                 lc = u - l;
768         }
769         return 0;
770 } /* dsel2_ */
771 
772 /* Subroutine */
insort_(int * n,int * keys)773 int insort_(int *n, int *keys)
774 {
775         /* System generated locals */
776         int i__1;
777 
778         /* Local variables */
779         static int i__, j, ind;
780 
781         /*     ********** */
782 
783         /*     Subroutine insort */
784 
785         /*     Given an int array keys of length n, this subroutine uses */
786         /*     an insertion sort to sort the keys in increasing order. */
787 
788         /*     The subroutine statement is */
789 
790         /*       subroutine insort(n,keys) */
791 
792         /*     where */
793 
794         /*       n is an int variable. */
795         /*         On entry n is the number of keys. */
796         /*         On exit n is unchanged. */
797 
798         /*       keys is an int array of length n. */
799         /*         On entry keys is the array to be sorted. */
800         /*         On exit keys is permuted to increasing order. */
801 
802         /*     MINPACK-2 Project. March 1998. */
803         /*     Argonne National Laboratory. */
804         /*     Chih-Jen Lin and Jorge J. More'. */
805 
806         /*     ********** */
807         /* Parameter adjustments */
808         --keys;
809 
810         /* Function Body */
811         i__1 = *n;
812         for (j = 2; j <= i__1; ++j)
813         {
814                 ind = keys[j];
815                 i__ = j - 1;
816                 while (i__ > 0 && keys[i__] > ind)
817                 {
818                         keys[i__ + 1] = keys[i__];
819                         --i__;
820                 }
821                 keys[i__ + 1] = ind;
822         }
823         return 0;
824 } /* insort_ */
825 
826 /* Subroutine */
ihsort_(int * n,int * keys)827 int ihsort_(int *n, int *keys)
828 {
829         static int k, m, x, mid, lheap, rheap;
830 
831         /*     ********** */
832 
833         /*     Subroutine ihsort */
834 
835         /*     Given an int array keys of length n, this subroutine uses */
836         /*     a heap sort to sort the keys in increasing order. */
837 
838         /*     This subroutine is a minor modification of code written by */
839         /*     Mark Jones and Paul Plassmann. */
840 
841         /*     The subroutine statement is */
842 
843         /*       subroutine ihsort(n,keys) */
844 
845         /*     where */
846 
847         /*       n is an int variable. */
848         /*         On entry n is the number of keys. */
849         /*         On exit n is unchanged. */
850 
851         /*       keys is an int array of length n. */
852         /*         On entry keys is the array to be sorted. */
853         /*         On exit keys is permuted to increasing order. */
854 
855         /*     MINPACK-2 Project. March 1998. */
856         /*     Argonne National Laboratory. */
857         /*     Chih-Jen Lin and Jorge J. More'. */
858 
859         /*     ********** */
860         /* Parameter adjustments */
861         --keys;
862 
863         /* Function Body */
864         if (*n <= 1)
865         {
866                 return 0;
867         }
868         /*     Build the heap. */
869         mid = *n / 2;
870         for (k = mid; k >= 1; --k)
871         {
872                 x = keys[k];
873                 lheap = k;
874                 rheap = *n;
875                 m = lheap << 1;
876                 while (m <= rheap)
877                 {
878                         if (m < rheap)
879                         {
880                                 if (keys[m] < keys[m + 1])
881                                 {
882                                         ++m;
883                                 }
884                         }
885                         if (x >= keys[m])
886                         {
887                                 m = rheap + 1;
888                         }
889                         else
890                         {
891                                 keys[lheap] = keys[m];
892                                 lheap = m;
893                                 m = lheap << 1;
894                         }
895                 }
896                 keys[lheap] = x;
897         }
898         /*     Sort the heap. */
899         for (k = *n; k >= 2; --k)
900         {
901                 x = keys[k];
902                 keys[k] = keys[1];
903                 lheap = 1;
904                 rheap = k - 1;
905                 m = 2;
906                 while (m <= rheap)
907                 {
908                         if (m < rheap)
909                         {
910                                 if (keys[m] < keys[m + 1])
911                                 {
912                                         ++m;
913                                 }
914                         }
915                         if (x >= keys[m])
916                         {
917                                 m = rheap + 1;
918                         }
919                         else
920                         {
921                                 keys[lheap] = keys[m];
922                                 lheap = m;
923                                 m = lheap << 1;
924                         }
925                 }
926                 keys[lheap] = x;
927         }
928         return 0;
929 } /* ihsort_ */
930 
931 /* Subroutine */
dstrsol_(int * n,double * l,double * ldiag,int * jptr,int * indr,double * r__,char * task)932 int dstrsol_(int *n, double *l, double *ldiag, int *jptr, int *indr,
933                          double *r__, char *task)
934 {
935         /* System generated locals */
936         int i__1, i__2;
937 
938         /* Local variables */
939         static int j, k;
940         static double temp;
941 
942         /*     ********** */
943 
944         /*     Subroutine dstrsol */
945 
946         /*     This subroutine solves the triangular systems L*x = r or L'*x = r. */
947 
948         /*     The subroutine statement is */
949 
950         /*       subroutine dstrsol(n,l,ldiag,jptr,indr,r,task) */
951 
952         /*     where */
953 
954         /*       n is an int variable. */
955         /*         On entry n is the order of L. */
956         /*         On exit n is unchanged. */
957 
958         /*       l is a double precision array of dimension *. */
959         /*         On entry l must contain the nonzeros in the strict lower */
960         /*            triangular part of L in compressed column storage. */
961         /*         On exit l is unchanged. */
962 
963         /*       ldiag is a double precision array of dimension n. */
964         /*         On entry ldiag must contain the diagonal elements of L. */
965         /*         On exit ldiag is unchanged. */
966 
967         /*       jptr is an int array of dimension n + 1. */
968         /*         On entry jptr must contain pointers to the columns of A. */
969         /*            The nonzeros in column j of A must be in positions */
970         /*            jptr(j), ... , jptr(j+1) - 1. */
971         /*         On exit jptr is unchanged. */
972 
973         /*       indr is an int array of dimension *. */
974         /*         On entry indr must contain row indices for the strict */
975         /*            lower triangular part of L in compressed column storage. */
976         /*         On exit indr is unchanged. */
977 
978         /*       r is a double precision array of dimension n. */
979         /*         On entry r must contain the vector r. */
980         /*         On exit r contains the solution vector x. */
981 
982         /*       task is a character variable of length 60. */
983         /*         On entry */
984         /*            task(1:1) = 'N' if we need to solve L*x = r */
985         /*            task(1:1) = 'T' if we need to solve L'*x = r */
986         /*         On exit task is unchanged. */
987 
988         /*     MINPACK-2 Project. May 1998. */
989         /*     Argonne National Laboratory. */
990 
991         /*     ********** */
992         /*     Solve L*x =r and store the result in r. */
993         /* Parameter adjustments */
994         --r__;
995         --jptr;
996         --ldiag;
997         --l;
998         --indr;
999 
1000         /* Function Body */
1001         if (*(unsigned char *) task == 'N')
1002         {
1003                 i__1 = *n;
1004                 for (j = 1; j <= i__1; ++j)
1005                 {
1006                         temp = r__[j] / ldiag[j];
1007                         i__2 = jptr[j + 1] - 1;
1008 #ifdef USE_OPENMP
1009 #pragma omp parallel for private(k)
1010 #endif
1011                         for (k = jptr[j]; k <= i__2; ++k)
1012                         {
1013                                 r__[indr[k]] -= l[k] * temp;
1014                         }
1015                         r__[j] = temp;
1016                 }
1017                 return 0;
1018         }
1019         /*     Solve L'*x =r and store the result in r. */
1020         if (*(unsigned char *) task == 'T')
1021         {
1022                 r__[*n] /= ldiag[*n];
1023                 for (j = *n - 1; j >= 1; --j)
1024                 {
1025                         temp = 0.;
1026                         i__1 = jptr[j + 1] - 1;
1027 #ifdef USE_OPENMP
1028 #pragma omp parallel for private(k) reduction(+:temp)
1029 #endif
1030                         for (k = jptr[j]; k <= i__1; ++k)
1031                         {
1032                                 temp += l[k] * r__[indr[k]];
1033                         }
1034                         r__[j] = (r__[j] - temp) / ldiag[j];
1035                 }
1036                 return 0;
1037         }
1038         return 0;
1039 } /* dstrsol_ */
1040 
1041