1 #include "splines/oskar_dierckx_fpsphe.h"
2 #include "splines/oskar_dierckx_fpback.h"
3 #include "splines/oskar_dierckx_fpdisc.h"
4 #include "splines/oskar_dierckx_fporde.h"
5 #include "splines/oskar_dierckx_fprank.h"
6 #include "splines/oskar_dierckx_fpbspl.h"
7 #include "splines/oskar_dierckx_fprota.h"
8 #include "splines/oskar_dierckx_fpgivs.h"
9 #include "splines/oskar_dierckx_fprpsp.h"
10 #include "splines/oskar_dierckx_fprati.h"
11 #include <math.h>
12 
13 #ifdef __cplusplus
14 extern "C" {
15 #endif
16 
oskar_dierckx_fpsphe(int iopt,int m,const double * theta,const double * phi,const double * r,const double * w,double s,int ntest,int npest,double eta,double tol,int maxit,int ncc,int * nt,double * tt,int * np,double * tp,double * c,double * fp,double * sup,double * fpint,double * coord,double * f,double * ff,double * row,double * coco,double * cosi,double * a,double * q,double * bt,double * bp,double * spt,double * spp,double * h,int * index,int * nummer,double * wrk,int lwrk,int * ier)17 void oskar_dierckx_fpsphe(int iopt, int m, const double* theta,
18         const double* phi, const double* r, const double* w, double s,
19         int ntest, int npest, double eta, double tol, int maxit, int ncc,
20         int* nt, double* tt, int* np, double* tp, double* c, double* fp,
21         double* sup, double* fpint, double* coord, double* f, double* ff,
22         double* row, double* coco, double* cosi, double* a, double* q,
23         double* bt, double* bp, double* spt, double* spp, double* h,
24         int* index, int* nummer, double* wrk, int lwrk, int* ier)
25 {
26     /* System generated locals */
27     int a_dim1 = 0, a_offset = 0, q_dim1 = 0, q_offset = 0;
28     int bt_dim1 = 0, bt_offset = 0, bp_dim1 = 0, bp_offset = 0;
29     double r1 = 0.0;
30 
31     /* Local variables */
32     int i = 0, j = 0, l = 0, i1 = 0, i2 = 0, i3 = 0, j1 = 0, j2 = 0;
33     int l1 = 0, l2 = 0, l3 = 0, l4 = 0, la = 0, ii = 0, ij = 0, il = 0, in = 0;
34     int lf = 0, lh = 0, ll = 0, lp = 0, lt = 0, np4 = 0, nt4 = 0, nt6 = 0;
35     int jlt = 0, npp = 0, num = 0, nrr = 0;
36     int ntt = 0, ich1 = 0, ich3 = 0, num1 = 0, ncof = 0, nreg = 0, rank = 0;
37     int iter = 0, irot = 0, jrot = 0;
38     int iband = 0, ncoff = 0, nrint = 0;
39     int iband1 = 0, lwest = 0, iband3 = 0, iband4 = 0;
40     double p = 0.0, c1 = 0.0, d1 = 0.0, d2 = 0.0, f1 = 0.0, f2 = 0.0, f3 = 0.0;
41     double p1 = 0.0, p2 = 0.0, p3 = 0.0, aa = 0.0, cn = 0.0, co = 0.0;
42     double fn = 0.0, ri = 0.0, si = 0.0;
43     double wi = 0.0, rn = 0.0, sq = 0.0, acc = 0.0, arg = 0.0;
44     double hti = 0.0, htj = 0.0, eps = 0.0, piv = 0.0, fac1 = 0.0, fac2 = 0.0;
45     double facc = 0.0, facs = 0.0, dmax = 0.0, fpms = 0.0, pinv = 0.0;
46     double sigma = 0.0, fpmax = 0.0, store = 0.0, pi = 0.0, pi2 = 0.0;
47     double ht[4], hp[4];
48 
49     /* Parameter adjustments */
50     --nummer;
51     spp -= (1 + m);
52     spt -= (1 + m);
53     --w;
54     --r;
55     --phi;
56     --theta;
57     bt_dim1 = ntest;
58     bt_offset = 1 + bt_dim1;
59     bt -= bt_offset;
60     --tt;
61     bp_dim1 = npest;
62     bp_offset = 1 + bp_dim1;
63     bp -= bp_offset;
64     --cosi;
65     --coco;
66     --row;
67     --tp;
68     --h;
69     --ff;
70     --c;
71     q_dim1 = ncc;
72     q_offset = 1 + q_dim1;
73     q -= q_offset;
74     a_dim1 = ncc;
75     a_offset = 1 + a_dim1;
76     a -= a_offset;
77     --f;
78     --coord;
79     --fpint;
80     --index;
81     --wrk;
82 
83     /* Function Body */
84     pi = 4.0 * atan(1.0);
85     pi2 = pi + pi;
86     eps = sqrt(eta);
87     /*  calculation of acc, the absolute tolerance for the root of f(p)=s. */
88     acc = tol * s;
89     if (iopt < 0) {
90         goto L70;
91     }
92     if (iopt != 0 && s < *sup)
93     {
94         if (*np - 11 >= 0) {
95             goto L70;
96         } else {
97             goto L60;
98         }
99     }
100 
101     /*  if iopt=0 we begin by computing the weighted least-squares polynomial
102      *  of the form
103      *     s(theta,phi) = c1*f1(theta) + cn*fn(theta)
104      *  where f1(theta) and fn(theta) are the cubic polynomials satisfying
105      *     f1(0) = 1, f1(pi) = f1'(0) = f1'(pi) = 0 ; fn(theta) = 1-f1(theta).
106      *  the corresponding weighted sum of squared residuals gives the upper
107      *  bound sup for the smoothing factor s. */
108     *sup = 0.0;
109     d1 = 0.0;
110     d2 = 0.0;
111     c1 = 0.0;
112     cn = 0.0;
113     fac1 = pi * (1.0 + 0.5);
114     fac2 = (1.0 + 1.0) / (pi * (pi * pi));
115     aa = 0.0;
116     for (i = 1; i <= m; ++i)
117     {
118         wi = w[i];
119         ri = r[i] * wi;
120         arg = theta[i];
121         fn = fac2 * arg * arg * (fac1 - arg);
122         f1 = (1.0 - fn) * wi;
123         fn *= wi;
124         if (fn != 0.0)
125         {
126             oskar_dierckx_fpgivs(fn, &d1, &co, &si);
127             oskar_dierckx_fprota(co, si, &f1, &aa);
128             oskar_dierckx_fprota(co, si, &ri, &cn);
129         }
130         if (f1 != 0.0)
131         {
132             oskar_dierckx_fpgivs(f1, &d2, &co, &si);
133             oskar_dierckx_fprota(co, si, &ri, &c1);
134         }
135         *sup += ri * ri;
136     }
137     if (d2 != 0.0) c1 /= d2;
138     if (d1 != 0.0) cn = (cn - aa * c1) / d1;
139     /*  find the b-spline representation of this least-squares polynomial */
140     *nt = 8;
141     *np = 8;
142     for (i = 1; i <= 4; ++i)
143     {
144         c[i] = c1;
145         c[i + 4] = c1;
146         c[i + 8] = cn;
147         c[i + 12] = cn;
148         tt[i] = 0.0;
149         tt[i + 4] = pi;
150         tp[i] = 0.0;
151         tp[i + 4] = pi2;
152     }
153     *fp = *sup;
154     /*  test whether the least-squares polynomial is an acceptable solution */
155     fpms = *sup - s;
156     if (fpms < acc)
157     {
158         *ier = -2;
159         return;
160     }
161     /*  test whether we cannot further increase the number of knots. */
162 L60:
163     if (npest < 11 || ntest < 9)
164     {
165         *ier = 1;
166         return;
167     }
168     /*  find the initial set of interior knots of the spherical spline in
169      *  case iopt = 0. */
170     *np = 11;
171     tp[5] = pi * 0.5;
172     tp[6] = pi;
173     tp[7] = tp[5] + pi;
174     *nt = 9;
175     tt[5] = tp[5];
176 L70:
177     /*
178      *  part 1 : computation of least-squares spherical splines.
179      *  ********************************************************
180      *  if iopt < 0 we compute the least-squares spherical spline according
181      *  to the given set of knots.
182      *  if iopt >=0 we compute least-squares spherical splines with increas-
183      *  ing numbers of knots until the corresponding sum f(p=inf)<=s.
184      *  the initial set of knots then depends on the value of iopt:
185      *    if iopt=0 we start with one interior knot in the theta-direction
186      *              (pi/2) and three in the phi-direction (pi/2,pi,3*pi/2).
187      *    if iopt>0 we start with the set of knots found at the last call
188      *              of the routine. */
189 
190     /*  main loop for the different sets of knots. m is a safe upper bound
191      *  for the number of trials. */
192     for (iter = 1; iter <= m; ++iter)
193     {
194         /*  find the position of the additional knots which are needed for
195          *  the b-spline representation of s(theta,phi). */
196         l1 = 4;
197         l2 = l1;
198         l3 = *np - 3;
199         l4 = l3;
200         tp[l2] = 0.0;
201         tp[l3] = pi2;
202         for (i = 1; i <= 3; ++i)
203         {
204             ++l1;
205             --l2;
206             ++l3;
207             --l4;
208             tp[l2] = tp[l4] - pi2;
209             tp[l3] = tp[l1] + pi2;
210         }
211         l = *nt;
212         for (i = 1; i <= 4; ++i)
213         {
214             tt[i] = 0.0;
215             tt[l] = pi;
216             --l;
217         }
218         /*  find nrint, the total number of knot intervals and nreg, the number
219          *  of panels in which the approximation domain is subdivided by the
220          *  intersection of knots. */
221         ntt = *nt - 7;
222         npp = *np - 7;
223         nrr = npp / 2;
224         nrint = ntt + npp;
225         nreg = ntt * npp;
226         /*  arrange the data points according to the panel they belong to. */
227         oskar_dierckx_fporde(&theta[1], &phi[1], m, 3, 3, &tt[1], *nt,
228                 &tp[1], *np, &nummer[1], &index[1], nreg);
229         /*  find the b-spline coefficients coco and cosi of the cubic spline
230          *  approximations sc(phi) and ss(phi) for cos(phi) and sin(phi). */
231         for (i = 1; i <= npp; ++i)
232         {
233             coco[i] = 0.0;
234             cosi[i] = 0.0;
235             for (j = 1; j <= npp; ++j)
236             {
237                 a[i + j * a_dim1] = 0.0;
238             }
239         }
240         /*  the coefficients coco and cosi are obtained from the conditions
241          *  sc(tp(i))=cos(tp(i)),resp. ss(tp(i))=sin(tp(i)),i=4,5,...np-4. */
242         for (i = 1; i <= npp; ++i)
243         {
244             l2 = i + 3;
245             arg = tp[l2];
246             oskar_dierckx_fpbspl_d(&tp[1], 3, arg, l2, hp);
247             for (j = 1; j <= npp; ++j)
248             {
249                 row[j] = 0.0;
250             }
251             ll = i;
252             for (j = 1; j <= 3; ++j)
253             {
254                 if (ll > npp) ll = 1;
255                 row[ll] += hp[j - 1];
256                 ++ll;
257             }
258             facc = cos(arg);
259             facs = sin(arg);
260             for (j = 1; j <= npp; ++j)
261             {
262                 piv = row[j];
263                 if (piv == 0.0) continue;
264                 oskar_dierckx_fpgivs(piv, &a[j + a_dim1], &co, &si);
265                 oskar_dierckx_fprota(co, si, &facc, &coco[j]);
266                 oskar_dierckx_fprota(co, si, &facs, &cosi[j]);
267                 if (j == npp) break;
268                 j1 = j + 1;
269                 i2 = 1;
270                 for (l = j1; l <= npp; ++l)
271                 {
272                     ++i2;
273                     oskar_dierckx_fprota(co, si, &row[l], &a[j + i2 * a_dim1]);
274                 }
275             }
276         }
277         oskar_dierckx_fpback(&a[a_offset], &coco[1], npp, npp, &coco[1], ncc);
278         oskar_dierckx_fpback(&a[a_offset], &cosi[1], npp, npp, &cosi[1], ncc);
279         /*  find ncof, the dimension of the spherical spline and ncoff, the
280          *  number of coefficients in the standard b-spline representation. */
281         nt4 = *nt - 4;
282         np4 = *np - 4;
283         ncoff = nt4 * np4;
284         ncof = npp * (ntt - 1) + 6;
285         /*  find the bandwidth of the observation matrix a. */
286         iband = npp << 2;
287         if (ntt == 4)
288         {
289             iband = (npp + 1) * 3;
290         }
291         else if (ntt < 4)
292         {
293             iband = ncof;
294         }
295         iband1 = iband - 1;
296         /*  initialize the observation matrix a. */
297         for (i = 1; i <= ncof; ++i)
298         {
299             f[i] = 0.0;
300             for (j = 1; j <= iband; ++j)
301             {
302                 a[i + j * a_dim1] = 0.0;
303             }
304         }
305         /*  initialize the sum of squared residuals. */
306         *fp = 0.0;
307         /*  fetch the data points in the new order. main loop for the
308          *  different panels. */
309         for (num = 1; num <= nreg; ++num)
310         {
311             /*  fix certain constants for the current panel; jrot records the
312              *  column number of the first non-zero element in a row of the
313              *  observation matrix according to a data point of the panel. */
314             num1 = num - 1;
315             lt = num1 / npp;
316             l1 = lt + 4;
317             lp = num1 - lt * npp + 1;
318             l2 = lp + 3;
319             ++lt;
320             jrot = (lt > 2) ? (lt - 3) * npp + 3 : 0;
321             /*  test whether there are still data points in the
322              *  current panel. */
323             in = index[num];
324             while (in != 0)
325             {
326                 /*  fetch a new data point. */
327                 wi = w[in];
328                 ri = r[in] * wi;
329                 /*  evaluate for the theta-direction, the 4 non-zero b-splines
330                  *  at theta(in) */
331                 oskar_dierckx_fpbspl_d(&tt[1], 3, theta[in], l1, ht);
332                 /*  evaluate for the phi-direction, the 4 non-zero b-splines
333                  *  at phi(in) */
334                 oskar_dierckx_fpbspl_d(&tp[1], 3, phi[in], l2, hp);
335                 /*  store the value of these b-splines in spt and spp resp. */
336                 for (i = 1; i <= 4; ++i)
337                 {
338                     spp[in + i * m] = hp[i - 1];
339                     spt[in + i * m] = ht[i - 1];
340                 }
341                 /*  initialize the new row of observation matrix. */
342                 for (i = 1; i <= iband; ++i)
343                 {
344                     h[i] = 0.0;
345                 }
346                 /*  calculate the non-zero elements of the new row by making
347                  *  the cross products of the non-zero b-splines in theta- and
348                  *  phi-direction and by taking into account the conditions of
349                  *  the spherical splines. */
350                 for (i = 1; i <= npp; ++i)
351                 {
352                     row[i] = 0.0;
353                 }
354                 /*  take into account the condition (3) of the spherical
355                  *  splines. */
356                 ll = lp;
357                 for (i = 1; i <= 4; ++i)
358                 {
359                     if (ll > npp) ll = 1;
360                     row[ll] += hp[i - 1];
361                     ++ll;
362                 }
363                 /*  take into account the other conditions of the spherical
364                  *  splines. */
365                 if (!(lt > 2 && lt < ntt - 1))
366                 {
367                     facc = 0.0;
368                     facs = 0.0;
369                     for (i = 1; i <= npp; ++i)
370                     {
371                         facc += row[i] * coco[i];
372                         facs += row[i] * cosi[i];
373                     }
374                 }
375                 /*  fill in the non-zero elements of the new row. */
376                 j1 = 0;
377                 for (j = 1; j <= 4; ++j)
378                 {
379                     jlt = j + lt;
380                     htj = ht[j - 1];
381                     if (!(jlt > 2 && jlt <= nt4))
382                     {
383                         ++j1;
384                         h[j1] += htj;
385                         continue;
386                     }
387                     if (!(jlt == 3 || jlt == nt4))
388                     {
389                         for (i = 1; i <= npp; ++i)
390                         {
391                             ++j1;
392                             h[j1] = row[i] * htj;
393                         }
394                         continue;
395                     }
396                     if (jlt != 3)
397                     {
398                         h[j1 + 1] = facc * htj;
399                         h[j1 + 2] = facs * htj;
400                         h[j1 + 3] = htj;
401                         j1 += 2;
402                         continue;
403                     }
404                     h[1] += htj;
405                     h[2] = facc * htj;
406                     h[3] = facs * htj;
407                     j1 = 3;
408                 }
409                 for (i = 1; i <= iband; ++i)
410                 {
411                     h[i] *= wi;
412                 }
413                 /*  rotate the row into triangle by givens transformations. */
414                 irot = jrot;
415                 for (i = 1; i <= iband; ++i)
416                 {
417                     ++irot;
418                     piv = h[i];
419                     if (piv == 0.0) continue;
420                     /*  calculate the parameters of the givens transformation. */
421                     oskar_dierckx_fpgivs(piv, &a[irot + a_dim1], &co, &si);
422                     /*  apply that transformation to the right hand side. */
423                     oskar_dierckx_fprota(co, si, &ri, &f[irot]);
424                     if (i == iband) break;
425                     /*  apply that transformation to the left hand side. */
426                     i2 = 1;
427                     i3 = i + 1;
428                     for (j = i3; j <= iband; ++j)
429                     {
430                         ++i2;
431                         oskar_dierckx_fprota(co, si, &h[j],
432                                 &a[irot + i2 * a_dim1]);
433                     }
434                 }
435                 /*  add the contribution of the row to the sum of squares of
436                  *  residual right hand sides. */
437                 *fp += ri * ri;
438                 /*  find the number of the next data point in the panel. */
439                 in = nummer[in];
440             }
441         }
442         /*  find dmax, the maximum value for the diagonal elements in the
443          *  reduced triangle. */
444         dmax = 0.0;
445         for (i = 1; i <= ncof; ++i)
446         {
447             if (a[i + a_dim1] <= dmax) continue;
448             dmax = a[i + a_dim1];
449         }
450         /*  check whether the observation matrix is rank deficient. */
451         sigma = eps * dmax;
452         for (i = 1; i <= ncof; ++i)
453         {
454             if (a[i + a_dim1] <= sigma)
455             {
456                 i = -1;
457                 break;
458             }
459         }
460         if (i != -1)
461         {
462             /*  backward substitution in case of full rank. */
463             oskar_dierckx_fpback(&a[a_offset], &f[1], ncof, iband, &c[1], ncc);
464             rank = ncof;
465             for (i = 1; i <= ncof; ++i)
466             {
467                 q[i + q_dim1] = a[i + a_dim1] / dmax;
468             }
469         }
470         else
471         {
472             /*  in case of rank deficiency, find the minimum norm solution. */
473             lwest = ncof * iband + ncof + iband;
474             if (lwrk < lwest)
475             {
476                 *ier = lwest;
477                 return;
478             }
479             lf = 1;
480             lh = lf + ncof;
481             la = lh + iband;
482             for (i = 1; i <= ncof; ++i)
483             {
484                 ff[i] = f[i];
485                 for (j = 1; j <= iband; ++j)
486                 {
487                     q[i + j * q_dim1] = a[i + j * a_dim1];
488                 }
489             }
490             oskar_dierckx_fprank(&q[q_offset], &ff[1], ncof, iband, ncc,
491                     sigma, &c[1], &sq, &rank, &wrk[la], &wrk[lf], &wrk[lh]);
492             for (i = 1; i <= ncof; ++i)
493             {
494                 q[i + q_dim1] /= dmax;
495             }
496             /*  add to the sum of squared residuals, the contribution of
497              *  reducing the rank. */
498             *fp += sq;
499         }
500         /*  find the coefficients in the standard b-spline representation of
501          *  the spherical spline. */
502         oskar_dierckx_fprpsp(*nt, *np, &coco[1], &cosi[1], &c[1], &ff[1],
503                 ncoff);
504         /*  test whether the least-squares spline is an acceptable solution. */
505         if (iopt < 0) {
506             if (*fp <= 0.0) {
507                 goto L970;
508             } else {
509                 goto L980;
510             }
511         }
512         fpms = *fp - s;
513         if (fabs(fpms) <= acc) {
514             if (*fp <= 0.0) {
515                 goto L970;
516             } else {
517                 goto L980;
518             }
519         }
520         /*  if f(p=inf) < s, accept the choice of knots. */
521         if (fpms < 0.0) break; /* Go to part 2. */
522         /*  test whether we cannot further increase the number of knots. */
523         if (ncof > m)
524         {
525             *ier = 4;
526             return;
527         }
528         /*  search where to add a new knot.
529          *  find for each interval the sum of squared residuals fpint for the
530          *  data points having the coordinate belonging to that knot interval.
531          *  calculate also coord which is the same sum, weighted by the
532          *  position of the data points considered. */
533         for (i = 1; i <= nrint; ++i)
534         {
535             fpint[i] = 0.0;
536             coord[i] = 0.0;
537         }
538         for (num = 1; num <= nreg; ++num)
539         {
540             num1 = num - 1;
541             lt = num1 / npp;
542             l1 = lt + 1;
543             lp = num1 - lt * npp;
544             l2 = lp + 1 + ntt;
545             jrot = lt * np4 + lp;
546             in = index[num];
547             while (in != 0)
548             {
549                 store = 0.0;
550                 i1 = jrot;
551                 for (i = 1; i <= 4; ++i)
552                 {
553                     hti = spt[in + i * m];
554                     j1 = i1;
555                     for (j = 1; j <= 4; ++j)
556                     {
557                         ++j1;
558                         store += hti * spp[in + j * m] * c[j1];
559                     }
560                     i1 += np4;
561                 }
562                 r1 = w[in] * (r[in] - store);
563                 store = r1 * r1;
564                 fpint[l1] += store;
565                 coord[l1] += store * theta[in];
566                 fpint[l2] += store;
567                 coord[l2] += store * phi[in];
568                 in = nummer[in];
569             }
570         }
571         /*  find the interval for which fpint is maximal on the condition that
572          *  there still can be added a knot. */
573         l1 = 1;
574         l2 = nrint;
575         if (ntest < *nt + 1) l1 = ntt + 1;
576         if (npest < *np + 2) l2 = ntt;
577         /*  test whether we cannot further increase the number of knots. */
578         if (l1 > l2)
579         {
580             *ier = 1;
581             return;
582         }
583         for (;;)
584         {
585             fpmax = 0.0;
586             l = 0;
587             for (i = l1; i <= l2; ++i)
588             {
589                 if (fpmax >= fpint[i]) continue;
590                 l = i;
591                 fpmax = fpint[i];
592             }
593             if (l == 0)
594             {
595                 *ier = 5;
596                 return;
597             }
598             /*  calculate the position of the new knot. */
599             arg = coord[l] / fpint[l];
600             /*  test in what direction the new knot is going to be added. */
601             if (l > ntt)
602             {
603                 /*  addition in the phi-direction */
604                 l4 = l + 4 - ntt;
605                 if (!(arg < pi))
606                 {
607                     arg -= pi;
608                     l4 -= nrr;
609                 }
610                 fpint[l] = 0.0;
611                 fac1 = tp[l4] - arg;
612                 fac2 = arg - tp[l4 - 1];
613                 if (fac1 > 10.0 * fac2 || fac2 > 10.0 * fac1) continue;
614                 ll = nrr + 4;
615                 j = ll;
616                 for (i = l4; i <= ll; ++i)
617                 {
618                     tp[j + 1] = tp[j];
619                     --j;
620                 }
621                 tp[l4] = arg;
622                 *np += 2;
623                 ++nrr;
624                 for (i = 5; i <= ll; ++i)
625                 {
626                     j = i + nrr;
627                     tp[j] = tp[i] + pi;
628                 }
629             }
630             else
631             {
632                 /*  addition in the theta-direction */
633                 l4 = l + 4;
634                 fpint[l] = 0.0;
635                 fac1 = tt[l4] - arg;
636                 fac2 = arg - tt[l4 - 1];
637                 if (fac1 > 10.0 * fac2 || fac2 > 10.0 * fac1) continue;
638                 j = *nt;
639                 for (i = l4; i <= *nt; ++i)
640                 {
641                     tt[j + 1] = tt[j];
642                     --j;
643                 }
644                 tt[l4] = arg;
645                 ++(*nt);
646             }
647             break;
648         }
649         /*  restart the computations with the new set of knots. */
650     }
651 
652     /*
653      * part 2: determination of the smoothing spherical spline.
654      * ********************************************************
655      * we have determined the number of knots and their position. we now
656      * compute the coefficients of the smoothing spline sp(theta,phi).
657      * the observation matrix a is extended by the rows of a matrix, expres-
658      * sing that sp(theta,phi) must be a constant function in the variable
659      * phi and a cubic polynomial in the variable theta. the corresponding
660      * weights of these additional rows are set to 1/(p). iteratively
661      * we than have to determine the value of p such that f(p) = sum((w(i)*
662      * (r(i)-sp(theta(i),phi(i))))**2)  be = s.
663      * we already know that the least-squares polynomial corresponds to p=0,
664      * and that the least-squares spherical spline corresponds to p=infin.
665      * the iteration process makes use of rational interpolation. since f(p)
666      * is a convex and strictly decreasing function of p, it can be
667      * approximated by a rational function of the form r(p) = (u*p+v)/(p+w).
668      * three values of p (p1,p2,p3) with corresponding values of f(p) (f1=
669      * f(p1)-s,f2=f(p2)-s,f3=f(p3)-s) are used to calculate the new value
670      * of p such that r(p)=s. convergence is guaranteed by taking f1>0,f3<0.
671      */
672 
673     /*  evaluate the discontinuity jumps of the 3rd order derivative of
674      *  the b-splines at the knots tt(l),l=5,...,nt-4. */
675     oskar_dierckx_fpdisc(&tt[1], *nt, 5, &bt[bt_offset], ntest);
676     /*  evaluate the discontinuity jumps of the 3rd order derivative of
677      *  the b-splines at the knots tp(l),l=5,...,np-4. */
678     oskar_dierckx_fpdisc(&tp[1], *np, 5, &bp[bp_offset], npest);
679     /*  initial value for p. */
680     p1 = 0.0;
681     f1 = *sup - s;
682     p3 = -1.0;
683     f3 = fpms;
684     p = 0.0;
685     for (i = 1; i <= ncof; ++i)
686     {
687         p += a[i + a_dim1];
688     }
689     rn = (double) ncof;
690     p = rn / p;
691     /*  find the bandwidth of the extended observation matrix. */
692     iband4 = (ntt <= 4) ? ncof : iband + 3;
693     iband3 = iband4 - 1;
694     ich1 = 0;
695     ich3 = 0;
696     /*  iteration process to find the root of f(p)=s. */
697     for (iter = 1; iter <= maxit; ++iter)
698     {
699         pinv = 1.0 / p;
700         /*  store the triangularized observation matrix into q. */
701         for (i = 1; i <= ncof; ++i)
702         {
703             ff[i] = f[i];
704             for (j = 1; j <= iband4; ++j)
705             {
706                 q[i + j * q_dim1] = 0.0;
707             }
708             for (j = 1; j <= iband; ++j)
709             {
710                 q[i + j * q_dim1] = a[i + j * a_dim1];
711             }
712         }
713         /*  extend the observation matrix with the rows of a matrix, expressing
714          *  that for theta=cst. sp(theta,phi) must be a constant function. */
715         nt6 = *nt - 6;
716         for (i = 5; i <= np4; ++i)
717         {
718             ii = i - 4;
719             for (l = 1; l <= npp; ++l)
720             {
721                 row[l] = 0.0;
722             }
723             ll = ii;
724             for (l = 1; l <= 5; ++l)
725             {
726                 if (ll > npp) ll = 1;
727                 row[ll] += bp[ii + l * bp_dim1];
728                 ++ll;
729             }
730             facc = 0.0;
731             facs = 0.0;
732             for (l = 1; l <= npp; ++l)
733             {
734                 facc += row[l] * coco[l];
735                 facs += row[l] * cosi[l];
736             }
737             for (j = 1; j <= nt6; ++j)
738             {
739                 /*  initialize the new row. */
740                 for (l = 1; l <= iband; ++l)
741                 {
742                     h[l] = 0.0;
743                 }
744                 /*  fill in the non-zero elements of the row. jrot records the
745                  *  column number of the first non-zero element in the row. */
746                 jrot = (j - 2) * npp + 4;
747                 if (j > 1 && j < nt6)
748                 {
749                     for (l = 1; l <= npp; ++l)
750                     {
751                         h[l] = row[l];
752                     }
753                 }
754                 else
755                 {
756                     h[1] = facc;
757                     h[2] = facs;
758                     if (j == 1) jrot = 2;
759                 }
760                 for (l = 1; l <= iband; ++l)
761                 {
762                     h[l] *= pinv;
763                 }
764                 ri = 0.0;
765                 /*  rotate the new row into triangle by givens transformations. */
766                 for (irot = jrot; irot <= ncof; ++irot)
767                 {
768                     piv = h[1];
769                     i2 = (iband1 < ncof - irot) ? iband1 : ncof - irot;
770                     if (piv == 0.0)
771                     {
772                         if (i2 <= 0) break;
773                     }
774                     else
775                     {
776                         /*  calculate the parameters of the givens
777                          *  transformation. */
778                         oskar_dierckx_fpgivs(piv, &q[irot + q_dim1], &co, &si);
779                         /*  apply givens transformation to right hand side. */
780                         oskar_dierckx_fprota(co, si, &ri, &ff[irot]);
781                         if (i2 == 0) break;
782                         /*  apply givens transformation to left hand side. */
783                         for (l = 1; l <= i2; ++l)
784                         {
785                             l1 = l + 1;
786                             oskar_dierckx_fprota(co, si, &h[l1],
787                                     &q[irot + l1 * q_dim1]);
788                         }
789                     }
790                     for (l = 1; l <= i2; ++l)
791                     {
792                         h[l] = h[l + 1];
793                     }
794                     h[i2 + 1] = 0.0;
795                 }
796             }
797         }
798         /*  extend the observation matrix with the rows of a matrix expressing
799          *  that for phi=cst. sp(theta,phi) must be a cubic polynomial. */
800         for (i = 5; i <= nt4; ++i)
801         {
802             ii = i - 4;
803             for (j = 1; j <= npp; ++j)
804             {
805                 /*  initialize the new row */
806                 for (l = 1; l <= iband4; ++l)
807                 {
808                     h[l] = 0.0;
809                 }
810                 /*  fill in the non-zero elements of the row. jrot records the
811                  *  column number of the first non-zero element in the row. */
812                 j1 = 1;
813                 for (l = 1; l <= 5; ++l)
814                 {
815                     il = ii + l;
816                     ij = npp;
817                     if (il == 3 || il == nt4)
818                     {
819                         j1 = j1 + 3 - j;
820                         j2 = j1 - 2;
821                         ij = 0;
822                         if (il == 3)
823                         {
824                             j1 = 1;
825                             j2 = 2;
826                             ij = j + 2;
827                         }
828                         h[j2] = bt[ii + l * bt_dim1] * coco[j];
829                         h[j2 + 1] = bt[ii + l * bt_dim1] * cosi[j];
830                     }
831                     h[j1] += bt[ii + l * bt_dim1];
832                     j1 += ij;
833                 }
834                 for (l = 1; l <= iband4; ++l)
835                 {
836                     h[l] *= pinv;
837                 }
838                 ri = 0.0;
839                 jrot = 1;
840                 if (ii > 2) jrot = j + 3 + (ii - 3) * npp;
841                 /*  rotate the new row into triangle by givens transformations. */
842                 for (irot = jrot; irot <= ncof; ++irot)
843                 {
844                     piv = h[1];
845                     i2 = (iband3 < ncof - irot) ? iband3 : ncof - irot;
846                     if (piv == 0.0)
847                     {
848                         if (i2 <= 0) break;
849                     }
850                     else
851                     {
852                         /*  calculate the parameters of the givens
853                          *  transformation. */
854                         oskar_dierckx_fpgivs(piv, &q[irot + q_dim1], &co, &si);
855                         /*  apply givens transformation to right hand side. */
856                         oskar_dierckx_fprota(co, si, &ri, &ff[irot]);
857                         if (i2 == 0) break;
858                         /*  apply givens transformation to left hand side. */
859                         for (l = 1; l <= i2; ++l)
860                         {
861                             l1 = l + 1;
862                             oskar_dierckx_fprota(co, si, &h[l1],
863                                     &q[irot + l1 * q_dim1]);
864                         }
865                     }
866                     for (l = 1; l <= i2; ++l)
867                     {
868                         h[l] = h[l + 1];
869                     }
870                     h[i2 + 1] = 0.0;
871                 }
872             }
873         }
874         /*  find dmax, the maximum value for the diagonal elements in the
875          *  reduced triangle. */
876         dmax = 0.0;
877         for (i = 1; i <= ncof; ++i)
878         {
879             if (q[i + q_dim1] <= dmax) continue;
880             dmax = q[i + q_dim1];
881         }
882         /*  check whether the matrix is rank deficient. */
883         sigma = eps * dmax;
884         for (i = 1; i <= ncof; ++i)
885         {
886             if (q[i + q_dim1] <= sigma)
887             {
888                 i = -1;
889                 break;
890             }
891         }
892         if (i != -1)
893         {
894             /*  backward substitution in case of full rank. */
895             oskar_dierckx_fpback(&q[q_offset], &ff[1], ncof, iband4,
896                     &c[1], ncc);
897             rank = ncof;
898         }
899         else
900         {
901             /*  in case of rank deficiency, find the minimum norm solution. */
902             lwest = ncof * iband4 + ncof + iband4;
903             if (lwrk < lwest)
904             {
905                 *ier = lwest;
906                 return;
907             }
908             lf = 1;
909             lh = lf + ncof;
910             la = lh + iband4;
911             oskar_dierckx_fprank(&q[q_offset], &ff[1], ncof, iband4, ncc,
912                     sigma, &c[1], &sq, &rank, &wrk[la], &wrk[lf], &wrk[lh]);
913         }
914         for (i = 1; i <= ncof; ++i)
915         {
916             q[i + q_dim1] /= dmax;
917         }
918         /*  find the coefficients in the standard b-spline representation of
919          *  the spherical spline. */
920         oskar_dierckx_fprpsp(*nt, *np, &coco[1], &cosi[1], &c[1], &ff[1],
921                 ncoff);
922         /*  compute f(p). */
923         *fp = 0.0;
924         for (num = 1; num <= nreg; ++num)
925         {
926             num1 = num - 1;
927             lt = num1 / npp;
928             lp = num1 - lt * npp;
929             jrot = lt * np4 + lp;
930             in = index[num];
931             while (in != 0)
932             {
933                 store = 0.0;
934                 i1 = jrot;
935                 for (i = 1; i <= 4; ++i)
936                 {
937                     hti = spt[in + i * m];
938                     j1 = i1;
939                     for (j = 1; j <= 4; ++j)
940                     {
941                         ++j1;
942                         store += hti * spp[in + j * m] * c[j1];
943                     }
944                     i1 += np4;
945                 }
946                 r1 = w[in] * (r[in] - store);
947                 *fp += r1 * r1;
948                 in = nummer[in];
949             }
950         }
951         /*  test whether the approximation sp(theta,phi) is an acceptable
952          *  solution */
953         fpms = *fp - s;
954         if (fabs(fpms) <= acc) {
955             goto L980;
956         }
957         /*  test whether the maximum allowable number of iterations has been
958          *  reached. */
959         if (iter == maxit)
960         {
961             *ier = 3;
962             return;
963         }
964         /*  carry out one more step of the iteration process. */
965         p2 = p;
966         f2 = fpms;
967         if (ich3 == 0)
968         {
969             if (! (f2 - f3 > acc))
970             {
971                 /*  our initial choice of p is too large. */
972                 p3 = p2;
973                 f3 = f2;
974                 p *= 0.04;
975                 if (p <= p1) p = p1 * 0.9 + p2 * 0.1;
976                 continue;
977             }
978             if (f2 < 0.0) ich3 = 1;
979         }
980         if (ich1 == 0)
981         {
982             if (! (f1 - f2 > acc))
983             {
984                 /*  our initial choice of p is too small */
985                 p1 = p2;
986                 f1 = f2;
987                 p /= 0.04;
988                 if (p3 < 0.0) continue;
989                 if (p >= p3) p = p2 * 0.1 + p3 * 0.9;
990                 continue;
991             }
992             if (f2 > 0.0) ich1 = 1;
993         }
994 
995         /*  test whether the iteration process proceeds as theoretically
996          *  expected. */
997         if (f2 >= f1 || f2 <= f3)
998         {
999             *ier = 2;
1000             return;
1001         }
1002         /*  find the new value of p. */
1003         p = oskar_dierckx_fprati(&p1, &f1, p2, f2, &p3, &f3);
1004     }
1005 L970:
1006     *ier = -1;
1007     *fp = 0.0;
1008 L980:
1009     if (ncof != rank) *ier = -rank;
1010 }
1011 
1012 #ifdef __cplusplus
1013 }
1014 #endif
1015