1* research!alice!wsc Mon Dec 30 16:55 EST 1985
2* W. S. Cleveland
3* ATT Bell Laboratories
4* Murray Hill NJ 07974
5*
6* outline of this file:
7*    lines 1-72   introduction
8*        73-177   documentation for lowess
9*       178-238   ratfor version of lowess
10*       239-301   documentation for lowest
11*       302-350   ratfor version of lowest
12*       351-end   test driver and fortran version of lowess and lowest
13*
14*   a multivariate version is available by "send lowess from a"
15*
16*              COMPUTER PROGRAMS FOR LOCALLY WEIGHTED REGRESSION
17*
18*             This package consists  of  two  FORTRAN  programs  for
19*        smoothing    scatterplots   by   robust   locally   weighted
20*        regression, or lowess.   The  principal  routine  is  LOWESS
21*        which   computes   the  smoothed  values  using  the  method
22*        described in The Elements of Graphing Data, by William S.
23*        Cleveland    (Wadsworth,    555 Morego   Street,   Monterey,
24*        California 93940).
25*
26*             LOWESS calls a support routine, LOWEST, the code for
27*        which is included. LOWESS also calls a routine  SORT,  which
28*        the user must provide.
29*
30*             To reduce the computations, LOWESS  requires  that  the
31*        arrays  X  and  Y,  which  are  the  horizontal and vertical
32*        coordinates, respectively, of the scatterplot, be such  that
33*        X  is  sorted  from  smallest  to  largest.   The  user must
34*        therefore use another sort routine which will sort X  and  Y
35*        according  to X.
36*             To summarize the scatterplot, YS,  the  fitted  values,
37*        should  be  plotted  against X.   No  graphics  routines are
38*        available in the package and must be supplied by the user.
39*
40*             The FORTRAN code for the routines LOWESS and LOWEST has
41*        been   generated   from   higher   level   RATFOR   programs
42*        (B. W. Kernighan, ``RATFOR:  A Preprocessor for  a  Rational
43*        Fortran,''  Software Practice and Experience, Vol. 5 (1975),
44*        which are also included.
45*
46*             The following are data and output from LOWESS that  can
47*        be  used  to check your implementation of the routines.  The
48*        notation (10)v means 10 values of v.
49*
50*
51*
52*
53*        X values:
54*          1  2  3  4  5  (10)6  8  10  12  14  50
55*
56*        Y values:
57*           18  2  15  6  10  4  16  11  7  3  14  17  20  12  9  13  1  8  5  19
58*
59*
60*        YS values with F = .25, NSTEPS = 0, DELTA = 0.0
61*         13.659  11.145  8.701  9.722  10.000  (10)11.300  13.000  6.440  5.596
62*           5.456  18.998
63*
64*        YS values with F = .25, NSTEPS = 0 ,  DELTA = 3.0
65*          13.659  12.347  11.034  9.722  10.511  (10)11.300  13.000  6.440  5.596
66*            5.456  18.998
67*
68*        YS values with F = .25, NSTEPS = 2, DELTA = 0.0
69*          14.811  12.115  8.984  9.676  10.000  (10)11.346  13.000  6.734  5.744
70*            5.415  18.998
71*
72*
73*
74*
75*                                   LOWESS
76*
77*
78*
79*        Calling sequence
80*
81*        CALL LOWESS(X,Y,N,F,NSTEPS,DELTA,YS,RW,RES)
82*
83*        Purpose
84*
85*        LOWESS computes the smooth of a scatterplot of Y  against  X
86*        using  robust  locally  weighted regression.  Fitted values,
87*        YS, are computed at each of the  values  of  the  horizontal
88*        axis in X.
89*
90*        Argument description
91*
92*              X = Input; abscissas of the points on the
93*                  scatterplot; the values in X must be ordered
94*                  from smallest to largest.
95*              Y = Input; ordinates of the points on the
96*                  scatterplot.
97*              N = Input; dimension of X,Y,YS,RW, and RES.
98*              F = Input; specifies the amount of smoothing; F is
99*                  the fraction of points used to compute each
100*                  fitted value; as F increases the smoothed values
101*                  become smoother; choosing F in the range .2 to
102*                  idea which value to use, try F = .5.
103*         NSTEPS = Input; the number of iterations in the robust
104*                  fit; if NSTEPS = 0, the nonrobust fit is
105*                  returned; setting NSTEPS equal to 2 should serve
106*                  most purposes.
107*          DELTA = input; nonnegative parameter which may be used
108*                  to save computations; if N is less than 100, set
109*                  DELTA equal to 0.0; if N is greater than 100 you
110*                  should find out how DELTA works by reading the
111*                  additional instructions section.
112*             YS = Output; fitted values; YS(I) is the fitted value
113*                  at X(I); to summarize the scatterplot, YS(I)
114*                  should be plotted against X(I).
115*             RW = Output; robustness weights; RW(I) is the weight
116*                  given to the point (X(I),Y(I)); if NSTEPS = 0,
117*                  RW is not used.
118*            RES = Output; residuals; RES(I) = Y(I)-YS(I).
119*
120*
121*        Other programs called
122*
123*               LOWEST
124*               SORT
125*
126*        Additional instructions
127*
128*        DELTA can be used to save computations.   Very  roughly  the
129*        algorithm  is  this:   on the initial fit and on each of the
130*        NSTEPS iterations locally weighted regression fitted  values
131*        are computed at points in X which are spaced, roughly, DELTA
132*        apart; then the fitted values at the  remaining  points  are
133*        computed  using  linear  interpolation.   The  first locally
134*        weighted regression (l.w.r.) computation is carried  out  at
135*        X(1)  and  the  last  is  carried  out at X(N).  Suppose the
136*        l.w.r. computation is carried out at  X(I).   If  X(I+1)  is
137*        greater  than  or  equal  to  X(I)+DELTA,  the  next  l.w.r.
138*        computation is carried out at X(I+1).   If  X(I+1)  is  less
139*        than X(I)+DELTA, the next l.w.r.  computation is carried out
140*        at the largest X(J) which is greater than or equal  to  X(I)
141*        but  is not greater than X(I)+DELTA.  Then the fitted values
142*        for X(K) between X(I)  and  X(J),  if  there  are  any,  are
143*        computed  by  linear  interpolation  of the fitted values at
144*        X(I) and X(J).  If N is less than 100 then DELTA can be  set
145*        to  0.0  since  the  computation time will not be too great.
146*        For larger N it is typically not necessary to carry out  the
147*        l.w.r.  computation for all points, so that much computation
148*        time can be saved by taking DELTA to be  greater  than  0.0.
149*        If  DELTA =  Range  (X)/k  then,  if  the  values  in X were
150*        uniformly  scattered  over  the  range,  the   full   l.w.r.
151*        computation  would be carried out at approximately k points.
152*        Taking k to be 50 often works well.
153*
154*        Method
155*
156*        The fitted values are computed by using the nearest neighbor
157*        routine  and  robust locally weighted regression of degree 1
158*        with the tricube weight function.  A few additional features
159*        have  been  added.  Suppose r is FN truncated to an integer.
160*        Let  h  be  the  distance  to  the  r-th  nearest   neighbor
161*        from X(I).   All  points within h of X(I) are used.  Thus if
162*        the r-th nearest neighbor is exactly the  same  distance  as
163*        other  points,  more  than r points can possibly be used for
164*        the smooth at  X(I).   There  are  two  cases  where  robust
165*        locally  weighted regression of degree 0 is actually used at
166*        X(I).  One case occurs when  h  is  0.0.   The  second  case
167*        occurs  when  the  weighted  standard error of the X(I) with
168*        respect to the weights w(j) is  less  than  .001  times  the
169*        range  of the X(I), where w(j) is the weight assigned to the
170*        j-th point of X (the tricube  weight  times  the  robustness
171*        weight)  divided by the sum of all of the weights.  Finally,
172*        if the w(j) are all zero for the smooth at X(I), the  fitted
173*        value is taken to be Y(I).
174*
175*
176*
177*
178*  subroutine lowess(x,y,n,f,nsteps,delta,ys,rw,res)
179*  real x(n),y(n),ys(n),rw(n),res(n)
180*  logical ok
181*  if (n<2){ ys(1) = y(1); return }
182*  ns = max0(min0(ifix(f*float(n)),n),2)  # at least two, at most n points
183*  for(iter=1; iter<=nsteps+1; iter=iter+1){      # robustness iterations
184*         nleft = 1; nright = ns
185*         last = 0        # index of prev estimated point
186*         i = 1   # index of current point
187*         repeat{
188*                 while(nright<n){
189*  # move nleft, nright to right if radius decreases
190*                         d1 = x(i)-x(nleft)
191*                         d2 = x(nright+1)-x(i)
192*  # if d1<=d2 with x(nright+1)==x(nright), lowest fixes
193*                         if (d1<=d2) break
194*  # radius will not decrease by move right
195*                         nleft = nleft+1
196*                         nright = nright+1
197*                         }
198*                 call lowest(x,y,n,x(i),ys(i),nleft,nright,res,iter>1,rw,ok)
199*  # fitted value at x(i)
200*                 if (!ok) ys(i) = y(i)
201*  # all weights zero - copy over value (all rw==0)
202*                 if (last<i-1) { # skipped points -- interpolate
203*                         denom = x(i)-x(last)    # non-zero - proof?
204*                         for(j=last+1; j<i; j=j+1){
205*                                 alpha = (x(j)-x(last))/denom
206*                                 ys(j) = alpha*ys(i)+(1.0-alpha)*ys(last)
207*                                 }
208*                         }
209*                 last = i        # last point actually estimated
210*                 cut = x(last)+delta     # x coord of close points
211*                 for(i=last+1; i<=n; i=i+1){     # find close points
212*                         if (x(i)>cut) break     # i one beyond last pt within cut
213*                         if(x(i)==x(last)){      # exact match in x
214*                                 ys(i) = ys(last)
215*                                 last = i
216*                                 }
217*                         }
218*                 i=max0(last+1,i-1)
219*  # back 1 point so interpolation within delta, but always go forward
220*                 } until(last>=n)
221*         do i = 1,n      # residuals
222*                 res(i) = y(i)-ys(i)
223*         if (iter>nsteps) break  # compute robustness weights except last time
224*         do i = 1,n
225*                 rw(i) = abs(res(i))
226*         call sort(rw,n)
227*         m1 = 1+n/2; m2 = n-m1+1
228*         cmad = 3.0*(rw(m1)+rw(m2))      # 6 median abs resid
229*         c9 = .999*cmad; c1 = .001*cmad
230*         do i = 1,n {
231*                 r = abs(res(i))
232*                 if(r<=c1) rw(i)=1.      # near 0, avoid underflow
233*                 else if(r>c9) rw(i)=0.  # near 1, avoid underflow
234*                 else rw(i) = (1.0-(r/cmad)**2)**2
235*                 }
236*         }
237*  return
238*  end
239*
240*
241*
242*
243*                                   LOWEST
244*
245*
246*
247*        Calling sequence
248*
249*        CALL LOWEST(X,Y,N,XS,YS,NLEFT,NRIGHT,W,USERW,RW,OK)
250*
251*        Purpose
252*
253*        LOWEST is a support routine for LOWESS and  ordinarily  will
254*        not  be  called  by  the  user.   The  fitted  value, YS, is
255*        computed  at  the  value,  XS,  of  the   horizontal   axis.
256*        Robustness  weights,  RW,  can  be employed in computing the
257*        fit.
258*
259*        Argument description
260*
261*
262*              X = Input; abscissas of the points on the
263*                  scatterplot; the values in X must be ordered
264*                  from smallest to largest.
265*              Y = Input; ordinates of the points on the
266*                  scatterplot.
267*              N = Input; dimension of X,Y,W, and RW.
268*             XS = Input; value of the horizontal axis at which the
269*                  smooth is computed.
270*             YS = Output; fitted value at XS.
271*          NLEFT = Input; index of the first point which should be
272*                  considered in computing the fitted value.
273*         NRIGHT = Input; index of the last point which should be
274*                  considered in computing the fitted value.
275*              W = Output; W(I) is the weight for Y(I) used in the
276*                  expression for YS, which is the sum from
277*                  I = NLEFT to NRIGHT of W(I)*Y(I); W(I) is
278*                  defined only at locations NLEFT to NRIGHT.
279*          USERW = Input; logical variable; if USERW is .TRUE., a
280*                  robust fit is carried out using the weights in
281*                  RW; if USERW is .FALSE., the values in RW are
282*                  not used.
283*             RW = Input; robustness weights.
284*             OK = Output; logical variable; if the weights for the
285*                  smooth are all 0.0, the fitted value, YS, is not
286*                  computed and OK is set equal to .FALSE.; if the
287*                  fitted value is computed OK is set equal to
288*
289*
290*        Method
291*
292*        The smooth at XS is computed using (robust) locally weighted
293*        regression of degree 1.  The tricube weight function is used
294*        with h equal to the maximum of XS-X(NLEFT) and X(NRIGHT)-XS.
295*        Two  cases  where  the  program  reverts to locally weighted
296*        regression of degree 0 are described  in  the  documentation
297*        for LOWESS.
298*
299*
300*
301*
302*  subroutine lowest(x,y,n,xs,ys,nleft,nright,w,userw,rw,ok)
303*  real x(n),y(n),w(n),rw(n)
304*  logical userw,ok
305*  range = x(n)-x(1)
306*  h = amax1(xs-x(nleft),x(nright)-xs)
307*  h9 = .999*h
308*  h1 = .001*h
309*  a = 0.0        # sum of weights
310*  for(j=nleft; j<=n; j=j+1){     # compute weights (pick up all ties on right)
311*         w(j)=0.
312*         r = abs(x(j)-xs)
313*         if (r<=h9) {    # small enough for non-zero weight
314*                 if (r>h1) w(j) = (1.0-(r/h)**3)**3
315*                 else      w(j) = 1.
316*                 if (userw) w(j) = rw(j)*w(j)
317*                 a = a+w(j)
318*                 }
319*         else if(x(j)>xs)break   # get out at first zero wt on right
320*         }
321*  nrt=j-1        # rightmost pt (may be greater than nright because of ties)
322*  if (a<=0.0) ok = FALSE
323*  else { # weighted least squares
324*         ok = TRUE
325*         do j = nleft,nrt
326*                 w(j) = w(j)/a   # make sum of w(j) == 1
327*         if (h>0.) {     # use linear fit
328*                 a = 0.0
329*                 do j = nleft,nrt
330*                         a = a+w(j)*x(j) # weighted center of x values
331*                 b = xs-a
332*                 c = 0.0
333*                 do j = nleft,nrt
334*                         c = c+w(j)*(x(j)-a)**2
335*                 if(sqrt(c)>.001*range) {
336*  # points are spread out enough to compute slope
337*                         b = b/c
338*                         do j = nleft,nrt
339*                                 w(j) = w(j)*(1.0+b*(x(j)-a))
340*                         }
341*                 }
342*         ys = 0.0
343*         do j = nleft,nrt
344*                 ys = ys+w(j)*y(j)
345*         }
346*  return
347*  end
348*
349*
350*
351c  test driver for lowess
352c  for expected output, see introduction
353      real x(20), y(20), ys(20), rw(20), res(20)
354      data x /1,2,3,4,5,10*6,8,10,12,14,50/
355      data y /18,2,15,6,10,4,16,11,7,3,14,17,20,12,9,13,1,8,5,19/
356      call lowess(x,y,20,.25,0,0.,ys,rw,res)
357      write(6,*) ys
358      call lowess(x,y,20,.25,0,3.,ys,rw,res)
359      write(6,*) ys
360      call lowess(x,y,20,.25,2,0.,ys,rw,res)
361      write(6,*) ys
362      end
363c**************************************************************
364c  Fortran output from ratfor
365c
366      subroutine lowess(x, y, n, f, nsteps, delta, ys, rw, res)
367      integer n
368      integer nsteps
369      real x(n), y(n), f, delta, ys(n), rw(n)
370      real res(n)
371      integer nright, min0, max0, i, j, ifix
372      integer iter, last, m1, m2, ns, nleft
373      real abs, cut, cmad, r, d1, d2
374      real c1, c9, alpha, denom, float
375      logical ok
376      if (n .ge. 2) goto 1
377         ys(1) = y(1)
378         return
379c at least two, at most n points
380   1  ns = max0(min0(ifix(f*float(n)), n), 2)
381      iter = 1
382         goto  3
383   2     iter = iter+1
384   3     if (iter .gt. nsteps+1) goto  22
385c robustness iterations
386         nleft = 1
387         nright = ns
388c index of prev estimated point
389         last = 0
390c index of current point
391         i = 1
392   4        if (nright .ge. n) goto  5
393c move nleft, nright to right if radius decreases
394               d1 = x(i)-x(nleft)
395c if d1<=d2 with x(nright+1)==x(nright), lowest fixes
396               d2 = x(nright+1)-x(i)
397               if (d1 .le. d2) goto  5
398c radius will not decrease by move right
399               nleft = nleft+1
400               nright = nright+1
401               goto  4
402c fitted value at x(i)
403   5        call lowest(x, y, n, x(i), ys(i), nleft, nright, res, iter
404     +     .gt. 1, rw, ok)
405            if (.not. ok) ys(i) = y(i)
406c all weights zero - copy over value (all rw==0)
407            if (last .ge. i-1) goto 9
408               denom = x(i)-x(last)
409c skipped points -- interpolate
410c non-zero - proof?
411               j = last+1
412                  goto  7
413   6              j = j+1
414   7              if (j .ge. i) goto  8
415                  alpha = (x(j)-x(last))/denom
416                  ys(j) = alpha*ys(i)+(1.0-alpha)*ys(last)
417                  goto  6
418   8           continue
419c last point actually estimated
420   9        last = i
421c x coord of close points
422            cut = x(last)+delta
423            i = last+1
424               goto  11
425  10           i = i+1
426  11           if (i .gt. n) goto  13
427c find close points
428               if (x(i) .gt. cut) goto  13
429c i one beyond last pt within cut
430               if (x(i) .ne. x(last)) goto 12
431                  ys(i) = ys(last)
432c exact match in x
433                  last = i
434  12           continue
435               goto  10
436c back 1 point so interpolation within delta, but always go forward
437  13        i = max0(last+1, i-1)
438  14        if (last .lt. n) goto  4
439c residuals
440         do  15 i = 1, n
441            res(i) = y(i)-ys(i)
442  15        continue
443         if (iter .gt. nsteps) goto  22
444c compute robustness weights except last time
445         do  16 i = 1, n
446            rw(i) = abs(res(i))
447  16        continue
448         call sort(rw, n)
449         m1 = n/2+1
450         m2 = n-m1+1
451c 6 median abs resid
452         cmad = 3.0*(rw(m1)+rw(m2))
453         c9 = .999*cmad
454         c1 = .001*cmad
455         do  21 i = 1, n
456            r = abs(res(i))
457            if (r .gt. c1) goto 17
458               rw(i) = 1.
459c near 0, avoid underflow
460               goto  20
461  17           if (r .le. c9) goto 18
462                  rw(i) = 0.
463c near 1, avoid underflow
464                  goto  19
465  18              rw(i) = (1.0-(r/cmad)**2)**2
466  19        continue
467  20        continue
468  21        continue
469         goto  2
470  22  return
471      end
472      subroutine lowest(x, y, n, xs, ys, nleft, nright, w, userw
473     +, rw, ok)
474      integer n
475      integer nleft, nright
476      real x(n), y(n), xs, ys, w(n), rw(n)
477      logical userw, ok
478      integer nrt, j
479      real abs, a, b, c, h, r
480      real h1, sqrt, h9, amax1, range
481      range = x(n)-x(1)
482      h = amax1(xs-x(nleft), x(nright)-xs)
483      h9 = .999*h
484      h1 = .001*h
485c sum of weights
486      a = 0.0
487      j = nleft
488         goto  2
489   1     j = j+1
490   2     if (j .gt. n) goto  7
491c compute weights (pick up all ties on right)
492         w(j) = 0.
493         r = abs(x(j)-xs)
494         if (r .gt. h9) goto 5
495            if (r .le. h1) goto 3
496               w(j) = (1.0-(r/h)**3)**3
497c small enough for non-zero weight
498               goto  4
499   3           w(j) = 1.
500   4        if (userw) w(j) = rw(j)*w(j)
501            a = a+w(j)
502            goto  6
503   5        if (x(j) .gt. xs) goto  7
504c get out at first zero wt on right
505   6     continue
506         goto  1
507c rightmost pt (may be greater than nright because of ties)
508   7  nrt = j-1
509      if (a .gt. 0.0) goto 8
510         ok = .false.
511         goto  16
512   8     ok = .true.
513c weighted least squares
514         do  9 j = nleft, nrt
515c make sum of w(j) == 1
516            w(j) = w(j)/a
517   9        continue
518         if (h .le. 0.) goto 14
519            a = 0.0
520c use linear fit
521            do  10 j = nleft, nrt
522c weighted center of x values
523               a = a+w(j)*x(j)
524  10           continue
525            b = xs-a
526            c = 0.0
527            do  11 j = nleft, nrt
528               c = c+w(j)*(x(j)-a)**2
529  11           continue
530            if (sqrt(c) .le. .001*range) goto 13
531               b = b/c
532c points are spread out enough to compute slope
533               do  12 j = nleft, nrt
534                  w(j) = w(j)*(b*(x(j)-a)+1.0)
535  12              continue
536  13        continue
537  14     ys = 0.0
538         do  15 j = nleft, nrt
539            ys = ys+w(j)*y(j)
540  15        continue
541  16  return
542      end
543