1 #include "ICFS.h"
2 #include <cmath>
3 #include <algorithm>
4
5 #ifdef USE_OPENMP
6 #include <omp.h>
7 #endif
8
9 #include "ICFS.h"
10
11 /*****************************************************************
12
13 COPYRIGHT NOTIFICATION
14
15 This program discloses material protectable under copyright laws of
16 the United States. Permission to copy and modify this software and its
17 documentation for internal research use is hereby granted, provided
18 that this notice is retained thereon and on all copies or modifications.
19 The University of Chicago makes no representations as to the suitability
20 and operability of this software for any purpose.
21 It is provided "as is" without express or implied warranty.
22
23 Use of this software for commercial purposes is expressly prohibited
24 without contacting
25
26 Jorge J. More'
27 Mathematics and Computer Science Division
28 Argonne National Laboratory
29 9700 S. Cass Ave.
30 Argonne, Illinois 60439-4844
31 e-mail: more@mcs.anl.gov
32
33 Argonne National Laboratory with facilities in the states of
34 Illinois and Idaho, is owned by The United States Government, and
35 operated by the University of Chicago under provision of a contract
36 with the Department of Energy.
37
38 *****************************************************************/
39
dicfs_(int * n,int * nnz,double * a,double * adiag,int * acol_ptr__,int * arow_ind__,double * l,double * ldiag,int * lcol_ptr__,int * lrow_ind__,int * p,double * alpha,int * iwa,double * wa1,double * wa2)40 /* Subroutine */int dicfs_(int *n, int *nnz, double *a, double *adiag,
41 int *acol_ptr__, int *arow_ind__, double *l, double *ldiag,
42 int *lcol_ptr__, int * lrow_ind__, int *p, double *alpha, int *iwa,
43 double * wa1, double *wa2)
44 {
45 /* System generated locals */
46 int i__1, i__2;
47 double d__1/*, d__2, d__3*/;
48
49 /* Local variables */
50 static int i__, j, k, nb;
51 static int info;
52 static double alphas;
53
54 /* ********* */
55
56 /* Subroutine dicfs */
57
58 /* Given a symmetric matrix A in compressed column storage, this */
59 /* subroutine computes an incomplete Cholesky factor of A + alpha*D, */
60 /* where alpha is a shift and D is the diagonal matrix with entries */
61 /* set to the l2 norms of the columns of A. */
62
63 /* The subroutine statement is */
64
65 /* subroutine dicfs(n,nnz,a,adiag,acol_ptr,arow_ind, */
66 /* l,ldiag,lcol_ptr,lrow_ind, */
67 /* p,alpha,iwa,wa1,wa2) */
68
69 /* where */
70
71 /* n is an int variable. */
72 /* On entry n is the order of A. */
73 /* On exit n is unchanged. */
74
75 /* nnz is an int variable. */
76 /* On entry nnz is the number of nonzeros in the strict lower */
77 /* triangular part of A. */
78 /* On exit nnz is unchanged. */
79
80 /* a is a double precision array of dimension nnz. */
81 /* On entry a must contain the strict lower triangular part */
82 /* of A in compressed column storage. */
83 /* On exit a is unchanged. */
84
85 /* adiag is a double precision array of dimension n. */
86 /* On entry adiag must contain the diagonal elements of A. */
87 /* On exit adiag is unchanged. */
88
89 /* acol_ptr is an int array of dimension n + 1. */
90 /* On entry acol_ptr must contain pointers to the columns of A. */
91 /* The nonzeros in column j of A must be in positions */
92 /* acol_ptr(j), ... , acol_ptr(j+1) - 1. */
93 /* On exit acol_ptr is unchanged. */
94
95 /* arow_ind is an int array of dimension nnz. */
96 /* On entry arow_ind must contain row indices for the strict */
97 /* lower triangular part of A in compressed column storage. */
98 /* On exit arow_ind is unchanged. */
99
100 /* l is a double precision array of dimension nnz+n*p. */
101 /* On entry l need not be specified. */
102 /* On exit l contains the strict lower triangular part */
103 /* of L in compressed column storage. */
104
105 /* ldiag is a double precision array of dimension n. */
106 /* On entry ldiag need not be specified. */
107 /* On exit ldiag contains the diagonal elements of L. */
108
109 /* lcol_ptr is an int array of dimension n + 1. */
110 /* On entry lcol_ptr need not be specified. */
111 /* On exit lcol_ptr contains pointers to the columns of L. */
112 /* The nonzeros in column j of L are in the */
113 /* lcol_ptr(j), ... , lcol_ptr(j+1) - 1 positions of l. */
114
115 /* lrow_ind is an int array of dimension nnz+n*p. */
116 /* On entry lrow_ind need not be specified. */
117 /* On exit lrow_ind contains row indices for the strict lower */
118 /* triangular part of L in compressed column storage. */
119
120 /* p is an int variable. */
121 /* On entry p specifes the amount of memory available for the */
122 /* incomplete Cholesky factorization. */
123 /* On exit p is unchanged. */
124
125 /* alpha is a double precision variable. */
126 /* On entry alpha is the initial guess of the shift. */
127 /* On exit alpha is final shift */
128
129 /* iwa is an int work array of dimesnion 3*n. */
130
131 /* wa1 is a double precision work array of dimension n. */
132
133 /* wa2 is a double precision work array of dimension n. */
134
135 /* Subprograms called */
136
137 /* MINPACK-2 ...... dicf */
138
139 /* MINPACK-2 Project. October 1998. */
140 /* Argonne National Laboratory. */
141 /* Chih-Jen Lin and Jorge J. More'. */
142
143 /* ********** */
144 /* Compute the l2 norms of the columns of A. */
145 /* Parameter adjustments */
146 --wa2;
147 --wa1;
148 --iwa;
149 --lcol_ptr__;
150 --ldiag;
151 --acol_ptr__;
152 --adiag;
153 --arow_ind__;
154 --a;
155 --lrow_ind__;
156 --l;
157
158 /* Function Body */
159 i__1 = *n;
160 #ifdef USE_OPENMP
161 #pragma omp parallel for private(i__)
162 #endif
163 for (i__ = 1; i__ <= i__1; ++i__)
164 {
165 /* Computing 2nd power */
166 wa1[i__] = adiag[i__] * adiag[i__];
167 }
168 i__1 = *n;
169 for (j = 1; j <= i__1; ++j)
170 {
171 i__2 = acol_ptr__[j + 1] - 1;
172 for (i__ = acol_ptr__[j]; i__ <= i__2; ++i__)
173 {
174 k = arow_ind__[i__];
175 /* Computing 2nd power */
176 d__1 = a[i__] * a[i__];
177 wa1[j] += d__1;
178 wa1[k] += d__1;
179 }
180 }
181 i__1 = *n;
182 #ifdef USE_OPENMP
183 #pragma omp parallel for private(j)
184 #endif
185 for (j = 1; j <= i__1; ++j)
186 {
187 wa1[j] = std::sqrt(wa1[j]);
188 }
189 /* Compute the scaling matrix D. */
190 i__1 = *n;
191 std::fill(&wa2[1], &wa2[i__1 + 1], 1.0);
192 #ifdef USE_OPENMP
193 #pragma omp parallel for private(i__)
194 #endif
195 for (i__ = 1; i__ <= i__1; ++i__)
196 {
197 if (wa1[i__] > 0.)
198 {
199 wa2[i__] = 1. / std::sqrt(wa1[i__]);
200 }
201 }
202 /* Determine a lower bound for the step. */
203 if (*alpha <= 0.)
204 {
205 alphas = .001;
206 }
207 else
208 {
209 alphas = *alpha;
210 }
211 /* Compute the initial shift. */
212 *alpha = 0.;
213 i__1 = *n;
214 for (i__ = 1; i__ <= i__1; ++i__)
215 {
216 if (adiag[i__] == 0.)
217 {
218 *alpha = alphas;
219 }
220 else
221 {
222 *alpha = std::max<double>(*alpha, -adiag[i__] * (wa2[i__]
223 * wa2[i__]));
224 }
225 }
226 if (*alpha > 0.)
227 {
228 *alpha = std::max<double>(*alpha, alphas);
229 }
230 /* Search for an acceptable shift. During the search we decrease */
231 /* the lower bound alphas until we Determine a lower bound that */
232 /* is not acceptable. We then increase the shift. */
233 /* The lower bound is decreased by nbfactor at most nbmax times. */
234 nb = 1;
235 for(;;)
236 {
237 /* Copy the sparsity structure of A into L. */
238 i__1 = *n + 1;
239 std::copy(&acol_ptr__[1], &acol_ptr__[i__1 + 1], &lcol_ptr__[1]);
240 i__1 = *nnz;
241 std::copy(&arow_ind__[1], &arow_ind__[i__1 + 1], &lrow_ind__[1]);
242 /* Scale A and store in the lower triangular matrix L. */
243 i__1 = *n;
244 #ifdef USE_OPENMP
245 #pragma omp parallel for private(j)
246 #endif
247 for (j = 1; j <= i__1; ++j)
248 {
249 /* Computing 2nd power */
250 ldiag[j] = adiag[j] * (wa2[j] * wa2[j]) + *alpha;
251 }
252 i__1 = *n;
253 #ifdef USE_OPENMP
254 #pragma omp parallel for private(j)
255 #endif
256 for (j = 1; j <= i__1; ++j)
257 {
258 #ifdef USE_OPENMP
259 #pragma omp parallel for private(i__)
260 #endif
261 for (i__ = acol_ptr__[j]; i__ <= acol_ptr__[j + 1] - 1; ++i__)
262 {
263 l[i__] = a[i__] * wa2[j] * wa2[arow_ind__[i__]];
264 }
265 }
266 /* Attempt an incomplete factorization. */
267 dicf_(n, nnz, &l[1], &ldiag[1], &lcol_ptr__[1], &lrow_ind__[1], p,
268 &info, &iwa[1], &iwa[*n + 1], &iwa[(*n << 1) + 1], &wa1[1]);
269 /* If the factorization exists, then test for termination. */
270 /* Otherwise increment the shift. */
271 if (info >= 0)
272 {
273 /* If the shift is at the lower bound, reduce the shift. */
274 /* Otherwise undo the scaling of L and exit. */
275 if (*alpha == alphas && nb < 3)
276 {
277 alphas /= 512.;
278 *alpha = alphas;
279 ++nb;
280 }
281 else
282 {
283 i__1 = *n;
284 #ifdef USE_OPENMP
285 #pragma omp parallel for private(i__)
286 #endif
287 for (i__ = 1; i__ <= i__1; ++i__)
288 {
289 ldiag[i__] /= wa2[i__];
290 }
291 i__1 = lcol_ptr__[*n + 1] - 1;
292 #ifdef USE_OPENMP
293 #pragma omp parallel for private(j)
294 #endif
295 for (j = 1; j <= i__1; ++j)
296 {
297 l[j] /= wa2[lrow_ind__[j]];
298 }
299 return 0;
300 }
301 }
302 else
303 {
304 /* Computing std::max<double> */
305 d__1 = *alpha * 2.;
306 *alpha = std::max<double>(d__1, alphas);
307 }
308 }
309 return 0;
310 } /* dicfs_ */
311
312 /* Subroutine */
dicf_(int * n,int * nnz,double * a,double * diag,int * col_ptr__,int * row_ind__,int * p,int * info,int * indr,int * indf,int * list,double * w)313 int dicf_(int *n, int *nnz, double *a, double *diag, int *col_ptr__,
314 int *row_ind__, int *p, int *info, int *indr, int *indf, int *list,
315 double *w)
316 {
317 /* System generated locals */
318 int i__1, i__2;
319 //double d__1;
320
321 /* Local variables */
322 static int i__, j, k, ip, np, iej, iek, mlj, nlj, isj, kth, isk;
323 static double lval;
324 static int newk;
325 static int newiej, newisj;
326
327 /* ********* */
328
329 /* Subroutine dicf */
330
331 /* Given a sparse symmetric matrix A in compressed row storage, */
332 /* this subroutine computes an incomplete Cholesky factorization. */
333
334 /* Implementation of dicf is based on the Jones-Plassmann code. */
335 /* Arrays indf and list define the data structure. */
336 /* At the beginning of the computation of the j-th column, */
337
338 /* For k < j, indf(k) is the index of A for the first */
339 /* nonzero l(i,k) in the k-th column with i >= j. */
340
341 /* For k < j, list(i) is a pointer to a linked list of column */
342 /* indices k with i = row_ind(indf(k)). */
343
344 /* For the computation of the j-th column, the array indr records */
345 /* the row indices. Hence, if nlj is the number of nonzeros in the */
346 /* j-th column, then indr(1),...,indr(nlj) are the row indices. */
347 /* Also, for i > j, indf(i) marks the row indices in the j-th */
348 /* column so that indf(i) = 1 if l(i,j) is not zero. */
349
350 /* The subroutine statement is */
351
352 /* subroutine dicf(n,nnz,a,diag,col_ptr,row_ind,p,info, */
353 /* indr,indf,list,w) */
354
355 /* where */
356
357 /* n is an int variable. */
358 /* On entry n is the order of A. */
359 /* On exit n is unchanged. */
360
361 /* nnz is an int variable. */
362 /* On entry nnz is the number of nonzeros in the strict lower */
363 /* triangular part of A. */
364 /* On exit nnz is unchanged. */
365
366 /* a is a double precision array of dimension nnz+n*p. */
367 /* On entry the first nnz entries of a must contain the strict */
368 /* lower triangular part of A in compressed column storage. */
369 /* On exit a contains the strict lower triangular part */
370 /* of L in compressed column storage. */
371
372 /* diag is a double precision array of dimension n. */
373 /* On entry diag must contain the diagonal elements of A. */
374 /* On exit diag contains the diagonal elements of L. */
375
376 /* col_ptr is an int array of dimension n + 1. */
377 /* On entry col_ptr must contain pointers to the columns of A. */
378 /* The nonzeros in column j of A must be in positions */
379 /* col_ptr(j), ... , col_ptr(j+1) - 1. */
380 /* On exit col_ptr contains pointers to the columns of L. */
381 /* The nonzeros in column j of L are in the */
382 /* col_ptr(j), ... , col_ptr(j+1) - 1 positions of l. */
383
384 /* row_ind is an int array of dimension nnz+n*p. */
385 /* On entry row_ind must contain row indices for the strict */
386 /* lower triangular part of A in compressed column storage. */
387 /* On exit row_ind contains row indices for the strict lower */
388 /* triangular part of L in compressed column storage. */
389
390 /* p is an int variable. */
391 /* On entry p specifes the amount of memory available for the */
392 /* incomplete Cholesky factorization. */
393 /* On exit p is unchanged. */
394
395 /* info is an int variable. */
396 /* On entry info need not be specified. */
397 /* On exit info = 0 if the factorization succeeds, and */
398 /* info < 0 if the -info pivot is not positive. */
399
400 /* indr is an int work array of dimension n. */
401
402 /* indf is an int work array of dimension n. */
403
404 /* list is an int work array of dimension n. */
405
406 /* w is a double precision work array of dimension n. */
407
408 /* Subprograms called */
409
410 /* MINPACK-2 ...... dsel2, ihsort, insort */
411
412 /* Level 1 BLAS ... daxpy, dcopy, ddot, dnrm2 */
413
414 /* MINPACK-2 Project. May 1998. */
415 /* Argonne National Laboratory. */
416 /* Chih-Jen Lin and Jorge J. More'. */
417
418 /* ********** */
419 /* Parameter adjustments */
420 --w;
421 --list;
422 --indf;
423 --indr;
424 --col_ptr__;
425 --diag;
426 --a;
427 --row_ind__;
428
429 /* Function Body */
430 *info = 0;
431 i__1 = *n;
432 std::fill(&indf[1], &indf[i__1 + 1], 0);
433 std::fill(&list[1], &list[i__1 + 1], 0);
434 /* Make room for L by moving A to the last n*p positions in a. */
435 np = *n * *p;
436 i__1 = *n + 1;
437 #ifdef USE_OPENMP
438 #pragma omp parallel for private(j)
439 #endif
440 for (j = 1; j <= i__1; ++j)
441 {
442 col_ptr__[j] += np;
443 }
444 for (j = *nnz; j >= 1; --j)
445 {
446 row_ind__[np + j] = row_ind__[j];
447 a[np + j] = a[j];
448 }
449 /* Compute the incomplete Cholesky factorization. */
450 isj = col_ptr__[1];
451 col_ptr__[1] = 1;
452 i__1 = *n;
453 for (j = 1; j <= i__1; ++j)
454 {
455 /* Load column j into the array w. The first and last elements */
456 /* of the j-th column of A are a(isj) and a(iej). */
457 nlj = 0;
458 iej = col_ptr__[j + 1] - 1;
459 i__2 = iej;
460 for (ip = isj; ip <= i__2; ++ip)
461 {
462 i__ = row_ind__[ip];
463 w[i__] = a[ip];
464 ++nlj;
465 indr[nlj] = i__;
466 indf[i__] = 1;
467 }
468 /* Exit if the current pivot is not positive. */
469 if (diag[j] <= 0.)
470 {
471 *info = -j;
472 return 0;
473 }
474 diag[j] = std::sqrt(diag[j]);
475 /* Update column j using the previous columns. */
476 k = list[j];
477 while (k != 0)
478 {
479 isk = indf[k];
480 iek = col_ptr__[k + 1] - 1;
481 /* Set lval to l(j,k). */
482 lval = a[isk];
483 /* Update indf and list. */
484 newk = list[k];
485 ++isk;
486 if (isk < iek)
487 {
488 indf[k] = isk;
489 list[k] = list[row_ind__[isk]];
490 list[row_ind__[isk]] = k;
491 }
492 k = newk;
493 /* Compute the update a(i,i) <- a(i,j) - l(i,k)*l(j,k). */
494 /* In this loop we pick up l(i,k) for k < j and i > j. */
495 i__2 = iek;
496 for (ip = isk; ip <= i__2; ++ip)
497 {
498 i__ = row_ind__[ip];
499 if (indf[i__] != 0)
500 {
501 w[i__] -= lval * a[ip];
502 }
503 else
504 {
505 indf[i__] = 1;
506 ++nlj;
507 indr[nlj] = i__;
508 w[i__] = -lval * a[ip];
509 }
510 }
511 }
512 /* Compute the j-th column of L. */
513 i__2 = nlj;
514 #ifdef USE_OPENMP
515 #pragma omp parallel for private(k)
516 #endif
517 for (k = 1; k <= i__2; ++k)
518 {
519 w[indr[k]] /= diag[j];
520 }
521 /* Set mlj to the number of nonzeros to be retained. */
522 /* Computing min */
523 i__2 = iej - isj + 1 + *p;
524 mlj = std::min<int>(i__2, nlj);
525 kth = nlj - mlj + 1;
526 if (nlj >= 1)
527 {
528 /* Determine the kth smallest elements in the current */
529 /* column, and hence, the largest mlj elements. */
530 dsel2_(&nlj, &w[1], &indr[1], &kth);
531 /* Sort the row indices of the selected elements. Insertion */
532 /* sort is used for small arrays, and heap sort for larger */
533 /* arrays. The sorting of the row indices is required so that */
534 /* we can retrieve l(i,k) with i > k from indf(k). */
535 if (mlj <= 20)
536 {
537 insort_(&mlj, &indr[kth]);
538 }
539 else
540 {
541 ihsort_(&mlj, &indr[kth]);
542 }
543 }
544 /* Store the largest elements in L. The first and last elements */
545 /* of the j-th column of L are a(newisj) and a(newiej). */
546 newisj = col_ptr__[j];
547 newiej = newisj + mlj - 1;
548 i__2 = newiej;
549 #ifdef USE_OPENMP
550 #pragma omp parallel for private(k)
551 #endif
552 for (k = newisj; k <= i__2; ++k)
553 {
554 a[k] = w[indr[k - newisj + kth]];
555 row_ind__[k] = indr[k - newisj + kth];
556 }
557 /* Update the diagonal elements. */
558 i__2 = nlj;
559 #ifdef USE_OPENMP
560 #pragma omp parallel for private(k)
561 #endif
562 for (k = kth; k <= i__2; ++k)
563 {
564 /* Computing 2nd power */
565 diag[indr[k]] -= w[indr[k]] * w[indr[k]];
566 }
567 /* Update indf and list for the j-th column. */
568 if (newisj < newiej)
569 {
570 indf[j] = newisj;
571 list[j] = list[row_ind__[newisj]];
572 list[row_ind__[newisj]] = j;
573 }
574 /* Clear out elements j+1,...,n of the array indf. */
575 i__2 = nlj;
576 #ifdef USE_OPENMP
577 #pragma omp parallel for private(k)
578 #endif
579 for (k = 1; k <= i__2; ++k)
580 {
581 indf[indr[k]] = 0;
582 }
583 /* Update isj and col_ptr. */
584 isj = col_ptr__[j + 1];
585 col_ptr__[j + 1] = newiej + 1;
586 }
587 return 0;
588 } /* dicf_ */
589
590 /* Subroutine */
dsel2_(int * n,double * x,int * keys,int * k)591 int dsel2_(int *n, double *x, int *keys, int *k)
592 {
593 /* System generated locals */
594 int i__1;
595 //double d__1, d__2;
596
597 /* Local variables */
598 static int i__, l, m, p, u, p1, p2, p3, lc, lp;
599
600 /* ********** */
601
602 /* Subroutine dsel2 */
603
604 /* Given an array x, this subroutine permutes the elements of the */
605 /* array keys so that */
606
607 /* abs(x(keys(i))) <= abs(x(keys(k))), 1 <= i <= k, */
608 /* abs(x(keys(k))) <= abs(x(keys(i))), k <= i <= n. */
609
610 /* In other words, the smallest k elements of x in absolute value are */
611 /* x(keys(i)), i = 1,...,k, and x(keys(k)) is the kth smallest element. */
612
613 /* The subroutine statement is */
614
615 /* subroutine dsel2(n,x,keys,k) */
616
617 /* where */
618
619 /* n is an int variable. */
620 /* On entry n is the number of keys. */
621 /* On exit n is unchanged. */
622
623 /* x is a double precision array of length *. */
624 /* On entry x is the array to be sorted. */
625 /* On exit x is unchanged. */
626
627 /* keys is an int array of length n. */
628 /* On entry keys is the array of indices for x. */
629 /* On exit keys is permuted so that the smallest k elements */
630 /* of x in absolute value are x(keys(i)), i = 1,...,k, and */
631 /* x(keys(k)) is the kth smallest element. */
632
633 /* k is an int. */
634 /* On entry k specifes the kth largest element. */
635 /* On exit k is unchanged. */
636
637 /* MINPACK-2 Project. March 1998. */
638 /* Argonne National Laboratory. */
639 /* William D. Kastak, Chih-Jen Lin, and Jorge J. More'. */
640
641 /* Revised October 1999. Length of x was incorrectly set to n. */
642
643 /* ********** */
644 /* Parameter adjustments */
645 --keys;
646 --x;
647
648 /* Function Body */
649 if (*n <= 1 || *k <= 0 || *k > *n)
650 {
651 return 0;
652 }
653 u = *n;
654 l = 1;
655 lc = *n;
656 lp = *n << 1;
657 /* Start of iteration loop. */
658 while (l < u)
659 {
660 /* Choose the partition as the median of the elements in */
661 /* positions l+s*(u-l) for s = 0, 0.25, 0.5, 0.75, 1. */
662 /* Move the partition element into position l. */
663 p1 = (u + l * 3) / 4;
664 p2 = (u + l) / 2;
665 p3 = (u * 3 + l) / 4;
666 /* Order the elements in positions l and p1. */
667 if (std::fabs(x[keys[l]]) > std::fabs(x[keys[p1]]))
668 {
669 std::swap(keys[l], keys[p1]);
670 }
671 /* Order the elements in positions p2 and p3. */
672 if (std::fabs(x[keys[p2]]) > std::fabs(x[keys[p3]]))
673 {
674 std::swap(keys[p2], keys[p3]);
675 }
676 /* Swap the larger of the elements in positions p1 */
677 /* and p3, with the element in position u, and reorder */
678 /* the first two pairs of elements as necessary. */
679 if (std::fabs(x[keys[p3]]) > std::fabs(x[keys[p1]]))
680 {
681 std::swap(keys[p3], keys[u]);
682 if (std::fabs(x[keys[p2]]) > std::fabs(x[keys[p3]]))
683 {
684 std::swap(keys[p2], keys[p3]);
685 }
686 }
687 else
688 {
689 std::swap(keys[p1], keys[u]);
690 if (std::fabs(x[keys[l]]) > std::fabs(x[keys[p1]]))
691 {
692 std::swap(keys[l], keys[p1]);
693 }
694 }
695 /* If we define a(i) = abs(x(keys(i)) for i = 1,..., n, we have */
696 /* permuted keys so that */
697
698 /* a(l) <= a(p1), a(p2) <= a(p3), std::max<double>(a(p1),a(p3)) <= a(u). */
699
700 /* Find the third largest element of the four remaining */
701 /* elements (the median), and place in position l. */
702 if (std::fabs(x[keys[p1]]) > std::fabs(x[keys[p3]]))
703 {
704 if (std::fabs(x[keys[l]]) <= std::fabs(x[keys[p3]]))
705 {
706 std::swap(keys[l], keys[p3]);
707 }
708 }
709 else
710 {
711 if (std::fabs(x[keys[p2]]) <= std::fabs(x[keys[p1]]))
712 {
713 std::swap(keys[l], keys[p1]);
714 }
715 else
716 {
717 std::swap(keys[l], keys[p2]);
718 }
719 }
720 /* Partition the array about the element in position l. */
721 m = l;
722 i__1 = u;
723 for (i__ = l + 1; i__ <= i__1; ++i__)
724 {
725 if (std::fabs(x[keys[i__]]) < std::fabs(x[keys[l]]))
726 {
727 ++m;
728 std::swap(keys[m], keys[i__]);
729 }
730 }
731 /* Move the partition element into position m. */
732 std::swap(keys[l], keys[m]);
733 /* Adjust the values of l and u. */
734 if (*k >= m)
735 {
736 l = m + 1;
737 }
738 if (*k <= m)
739 {
740 u = m - 1;
741 }
742 /* Check for multiple medians if the length of the subarray */
743 /* has not decreased by 1/3 after two consecutive iterations. */
744 if ((u - l) * 3 > lp << 1 && *k > m)
745 {
746 /* Partition the remaining elements into those elements */
747 /* equal to x(m), and those greater than x(m). Adjust */
748 /* the values of l and u. */
749 p = m;
750 i__1 = u;
751 for (i__ = m + 1; i__ <= i__1; ++i__)
752 {
753 if (std::fabs(x[keys[i__]]) == std::fabs(x[keys[m]]))
754 {
755 ++p;
756 std::swap(keys[p], keys[i__]);
757 }
758 }
759 l = p + 1;
760 if (*k <= p)
761 {
762 u = p - 1;
763 }
764 }
765 /* Update the length indicators for the subarray. */
766 lp = lc;
767 lc = u - l;
768 }
769 return 0;
770 } /* dsel2_ */
771
772 /* Subroutine */
insort_(int * n,int * keys)773 int insort_(int *n, int *keys)
774 {
775 /* System generated locals */
776 int i__1;
777
778 /* Local variables */
779 static int i__, j, ind;
780
781 /* ********** */
782
783 /* Subroutine insort */
784
785 /* Given an int array keys of length n, this subroutine uses */
786 /* an insertion sort to sort the keys in increasing order. */
787
788 /* The subroutine statement is */
789
790 /* subroutine insort(n,keys) */
791
792 /* where */
793
794 /* n is an int variable. */
795 /* On entry n is the number of keys. */
796 /* On exit n is unchanged. */
797
798 /* keys is an int array of length n. */
799 /* On entry keys is the array to be sorted. */
800 /* On exit keys is permuted to increasing order. */
801
802 /* MINPACK-2 Project. March 1998. */
803 /* Argonne National Laboratory. */
804 /* Chih-Jen Lin and Jorge J. More'. */
805
806 /* ********** */
807 /* Parameter adjustments */
808 --keys;
809
810 /* Function Body */
811 i__1 = *n;
812 for (j = 2; j <= i__1; ++j)
813 {
814 ind = keys[j];
815 i__ = j - 1;
816 while (i__ > 0 && keys[i__] > ind)
817 {
818 keys[i__ + 1] = keys[i__];
819 --i__;
820 }
821 keys[i__ + 1] = ind;
822 }
823 return 0;
824 } /* insort_ */
825
826 /* Subroutine */
ihsort_(int * n,int * keys)827 int ihsort_(int *n, int *keys)
828 {
829 static int k, m, x, mid, lheap, rheap;
830
831 /* ********** */
832
833 /* Subroutine ihsort */
834
835 /* Given an int array keys of length n, this subroutine uses */
836 /* a heap sort to sort the keys in increasing order. */
837
838 /* This subroutine is a minor modification of code written by */
839 /* Mark Jones and Paul Plassmann. */
840
841 /* The subroutine statement is */
842
843 /* subroutine ihsort(n,keys) */
844
845 /* where */
846
847 /* n is an int variable. */
848 /* On entry n is the number of keys. */
849 /* On exit n is unchanged. */
850
851 /* keys is an int array of length n. */
852 /* On entry keys is the array to be sorted. */
853 /* On exit keys is permuted to increasing order. */
854
855 /* MINPACK-2 Project. March 1998. */
856 /* Argonne National Laboratory. */
857 /* Chih-Jen Lin and Jorge J. More'. */
858
859 /* ********** */
860 /* Parameter adjustments */
861 --keys;
862
863 /* Function Body */
864 if (*n <= 1)
865 {
866 return 0;
867 }
868 /* Build the heap. */
869 mid = *n / 2;
870 for (k = mid; k >= 1; --k)
871 {
872 x = keys[k];
873 lheap = k;
874 rheap = *n;
875 m = lheap << 1;
876 while (m <= rheap)
877 {
878 if (m < rheap)
879 {
880 if (keys[m] < keys[m + 1])
881 {
882 ++m;
883 }
884 }
885 if (x >= keys[m])
886 {
887 m = rheap + 1;
888 }
889 else
890 {
891 keys[lheap] = keys[m];
892 lheap = m;
893 m = lheap << 1;
894 }
895 }
896 keys[lheap] = x;
897 }
898 /* Sort the heap. */
899 for (k = *n; k >= 2; --k)
900 {
901 x = keys[k];
902 keys[k] = keys[1];
903 lheap = 1;
904 rheap = k - 1;
905 m = 2;
906 while (m <= rheap)
907 {
908 if (m < rheap)
909 {
910 if (keys[m] < keys[m + 1])
911 {
912 ++m;
913 }
914 }
915 if (x >= keys[m])
916 {
917 m = rheap + 1;
918 }
919 else
920 {
921 keys[lheap] = keys[m];
922 lheap = m;
923 m = lheap << 1;
924 }
925 }
926 keys[lheap] = x;
927 }
928 return 0;
929 } /* ihsort_ */
930
931 /* Subroutine */
dstrsol_(int * n,double * l,double * ldiag,int * jptr,int * indr,double * r__,char * task)932 int dstrsol_(int *n, double *l, double *ldiag, int *jptr, int *indr,
933 double *r__, char *task)
934 {
935 /* System generated locals */
936 int i__1, i__2;
937
938 /* Local variables */
939 static int j, k;
940 static double temp;
941
942 /* ********** */
943
944 /* Subroutine dstrsol */
945
946 /* This subroutine solves the triangular systems L*x = r or L'*x = r. */
947
948 /* The subroutine statement is */
949
950 /* subroutine dstrsol(n,l,ldiag,jptr,indr,r,task) */
951
952 /* where */
953
954 /* n is an int variable. */
955 /* On entry n is the order of L. */
956 /* On exit n is unchanged. */
957
958 /* l is a double precision array of dimension *. */
959 /* On entry l must contain the nonzeros in the strict lower */
960 /* triangular part of L in compressed column storage. */
961 /* On exit l is unchanged. */
962
963 /* ldiag is a double precision array of dimension n. */
964 /* On entry ldiag must contain the diagonal elements of L. */
965 /* On exit ldiag is unchanged. */
966
967 /* jptr is an int array of dimension n + 1. */
968 /* On entry jptr must contain pointers to the columns of A. */
969 /* The nonzeros in column j of A must be in positions */
970 /* jptr(j), ... , jptr(j+1) - 1. */
971 /* On exit jptr is unchanged. */
972
973 /* indr is an int array of dimension *. */
974 /* On entry indr must contain row indices for the strict */
975 /* lower triangular part of L in compressed column storage. */
976 /* On exit indr is unchanged. */
977
978 /* r is a double precision array of dimension n. */
979 /* On entry r must contain the vector r. */
980 /* On exit r contains the solution vector x. */
981
982 /* task is a character variable of length 60. */
983 /* On entry */
984 /* task(1:1) = 'N' if we need to solve L*x = r */
985 /* task(1:1) = 'T' if we need to solve L'*x = r */
986 /* On exit task is unchanged. */
987
988 /* MINPACK-2 Project. May 1998. */
989 /* Argonne National Laboratory. */
990
991 /* ********** */
992 /* Solve L*x =r and store the result in r. */
993 /* Parameter adjustments */
994 --r__;
995 --jptr;
996 --ldiag;
997 --l;
998 --indr;
999
1000 /* Function Body */
1001 if (*(unsigned char *) task == 'N')
1002 {
1003 i__1 = *n;
1004 for (j = 1; j <= i__1; ++j)
1005 {
1006 temp = r__[j] / ldiag[j];
1007 i__2 = jptr[j + 1] - 1;
1008 #ifdef USE_OPENMP
1009 #pragma omp parallel for private(k)
1010 #endif
1011 for (k = jptr[j]; k <= i__2; ++k)
1012 {
1013 r__[indr[k]] -= l[k] * temp;
1014 }
1015 r__[j] = temp;
1016 }
1017 return 0;
1018 }
1019 /* Solve L'*x =r and store the result in r. */
1020 if (*(unsigned char *) task == 'T')
1021 {
1022 r__[*n] /= ldiag[*n];
1023 for (j = *n - 1; j >= 1; --j)
1024 {
1025 temp = 0.;
1026 i__1 = jptr[j + 1] - 1;
1027 #ifdef USE_OPENMP
1028 #pragma omp parallel for private(k) reduction(+:temp)
1029 #endif
1030 for (k = jptr[j]; k <= i__1; ++k)
1031 {
1032 temp += l[k] * r__[indr[k]];
1033 }
1034 r__[j] = (r__[j] - temp) / ldiag[j];
1035 }
1036 return 0;
1037 }
1038 return 0;
1039 } /* dstrsol_ */
1040
1041