1cc -*- mode: fortran; kept-new-versions: 25; kept-old-versions: 20 -*-
2cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
3cc  rrcov : Scalable Robust Estimators with High Breakdown Point
4cc
5cc  This program is free software; you can redistribute it and/or modify
6cc  it under the terms of the GNU General Public License as published by
7cc  the Free Software Foundation; either version 2 of the License, or
8cc  (at your option) any later version.
9cc
10cc  This program is distributed in the hope that it will be useful,
11cc  but WITHOUT ANY WARRANTY; without even the implied warranty of
12cc  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13cc  GNU General Public License for more details.
14cc
15cc  You should have received a copy of the GNU General Public License
16cc  along with this program; if not, write to the Free Software
17cc  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18cc
19cc  I would like to thank Peter Rousseeuw and Katrien van Driessen for
20cc  providing the initial code of this function.
21cc
22cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
23cc
24cc   Computes the MCD estimator of multivariate location and scatter.
25cc   This estimator is given by the subset of h observations for which
26cc   the determinant of their covariance matrix is minimal. The MCD
27cc   location estimate is then the mean of those h points, and the MCD
28cc   scatter estimate is their covariance matrix. This value of h may be
29cc   chosen by the user; its default value is roughly n/2.
30cc
31cc   The MCD estimator was first introduced in:
32cc
33cc      Rousseeuw, P.J. (1984), "Least Median of Squares Regression,"
34cc      Journal of the American Statistical Association, Vol. 79,
35cc      pp. 871-881. [See page 877.]
36cc
37cc   The MCD is a robust estimator in the sense that the estimates are
38cc   not unduly influenced by outliers in the data, even if there
39cc   are many outliers. Its robustness was proved in:
40cc
41cc      Rousseeuw, P.J. (1985), "Multivariate Estimation with High
42cc      Breakdown Point," in Mathematical Statistics and Applications,
43cc      edited by  W. Grossmann, G. Pflug, I. Vincze, and W. Wertz.
44cc      Dordrecht: Reidel Publishing Company, pp. 283-297.
45cc
46cc      Rousseeuw, P.J. and Leroy, A.M. (1987), Robust Regression and
47cc      Outlier Detection, Wiley-Interscience, New York. [Chapter 7]
48cc
49cc   The program also computes the distance of each observation
50cc   from the center (location) of the data, relative to the shape
51cc   (scatter) of the data:
52cc
53cc   * Using the classical estimates yields the Mahalanobis distance
54cc     MD(i). Often, outlying points fail to have a large Mahalanobis
55cc     distance because of the masking effect.
56cc
57cc   * Using the MCD estimates yields a robust distance RD(i).
58cc     These distances allow us to easily identify the outliers.
59cc
60cc   For applications of robust distances in a regression context see:
61cc
62cc      Rousseeuw, P.J. and van Zomeren, B.C. (1990), "Unmasking
63cc      Multivariate Outliers and Leverage Points," Journal of the
64cc      American Statistical Association, Vol. 85, 633-639.
65cc
66cc   There also a diagnostic plot is given to distinguish between
67cc   regular observations, vertical outliers, good leverage points,
68cc   and bad leverage points.
69cc
70cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
71cc
72cc   The new FAST_MCD algorithm introduced here is due to
73cc
74cc      Rousseeuw, P.J. and Van Driessen, K. (1997), "A Fast
75cc      Algorithm for the Minimum Covariance Determinant
76cc      Estimator," in preparation.
77cc
78cc   The algorithm works as follows:
79cc
80cc      The dataset contains n cases, and nvar variables are used.
81cc      Let n_0 := 2 * nmini (== 600 by default).
82cc      When n <  n_0, the algorithm will analyze the dataset as a whole,
83cc      when n >= n_0, the algorithm will use several subdatasets.
84cc
85cc   1. n < n_0 : When the dataset is analyzed as a whole, a trial
86cc      subsample of nvar+1 cases is taken, of which the mean and
87cc      covariance matrix is calculated. The h cases with smallest
88cc      relative distances are used to calculate the next mean and
89cc      covariance matrix, and this cycle is repeated k1 times.
90cc      [For small n we can consider all subsets of nvar+1 out of n, else
91cc      the algorithm draws 500 random subsets.]
92cc      Afterwards, the best 10 solutions (covariance matrices and
93cc      corresponding means) are used as starting values for the final
94cc      iterations. These iterations stop when two subsequent determinants
95cc      become equal. (At most k3 iteration steps are taken.)
96cc      The solution with smallest determinant is retained.
97cc
98cc   2. n > n_0 --- more than n_0 = 2*nmini cases: The algorithm
99cc      does part of the calculations on (at most) kmini nonoverlapping
100cc      subdatasets, of (roughly) nmini cases.
101cc
102cc      Stage 1: For each trial subsample in each subdataset,
103cc      k1 iterations are carried out in that subdataset.
104cc      For each subdataset, the 10 best solutions are stored.
105cc
106cc      Stage 2 considers the union of the subdatasets, called the
107cc      merged set. (If n is large, the merged set is a proper subset of
108cc      the entire dataset.) In this merged set, each of the 'best
109cc      solutions' of stage 1 are used as starting values for k2
110cc      iterations. Also here, the 10 best solutions are stored.
111cc
112cc      Stage 3 depends on n, the total number of cases in the
113cc      dataset. If n <= 5000, all 10 preliminary solutions are iterated
114cc      k3 times. If n > 5000, only the best preliminary
115cc      solution is iterated, and the number of iterations decreases to 1
116cc      according to n*nvar. (If n*nvar <= 100,000 we iterate k3 times,
117cc      whereas for n*nvar > 1,000,000 we take only one iteration step.)
118cc
119cc   An important advantage of the algorithm FAST_MCD is that it allows
120cc   for exact fit situations, where more than h observations lie on
121cc   a hyperplane. Then the program still yields the MCD location and
122cc   scatter matrix, the latter being singular (as it should be), as
123cc   well as the equation of the hyperplane.
124cc
125cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
126cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
127
128      subroutine rffastmcd(dat, n,nvar, nhalff, krep, nmini,kmini, ! 7
129     *     initcov, initmean, inbest, det,                         ! 11
130     *     weight, fit, coeff, kount, adcov,                       ! 16
131     *     temp, index1, index2, indexx, nmahad, ndist, am, am2,   ! 24
132     *     slutn, med, mad, sd, means, bmeans, w, fv1, fv2,        ! 33
133     *     rec, sscp1, cova1, corr1, cinv1, cova2, cinv2, z,       ! 41
134     *     cstock, mstock, c1stock, m1stock, dath,                 ! 46
135     *     cutoff, chimed, i_trace)                                ! 49 args
136
137cc      VT::10.10.2005 - a DATA operator was used for computing the
138cc              median and the 0.975 quantile of the chisq distribution
139cc              with nvar degrees of freedom. Since now we have no
140cc              restriction on the number of variables, these will be
141cc              passed as parameters - cutoff and chimed
142
143      implicit none
144
145cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
146c
147c  ALGORITHM PARAMETERS:
148c
149c      The number of iteration steps in stages 1,2 and 3 can be changed
150c      by adapting the parameters k1, k2, and k3.
151
152      integer k1,k2,k3, int_max
153      parameter (k1 =  2 )
154      parameter (k2 =  2 )
155      parameter (k3 = 100)
156c int_max: easily recognized, slightly smaller than 2147483647 = .Machine$integer.max
157      parameter (int_max = 2146666666)
158c Arguments
159      integer n,nvar ! (n, p)
160      integer nhalff ! == quan := h(alpha) >= n/2 "n half"
161      integer krep   ! krep == nsamp
162c  krep := the total number of trial subsamples
163c          to be drawn when n exceeds 2*nmini;
164c          krep = 0  :<==>  "exact"  <==>  all possible subsamples
165c      was hardcoded krep := 500; now an *argument*
166      integer kmini ! the maximal number of subdatasets   and
167      integer nmini ! their minimal size
168
169      double precision dat(n,nvar)
170      double precision initcov(nvar*nvar), initmean(nvar)
171      integer inbest(nhalff)
172      double precision det
173      integer weight(n), fit
174      double precision coeff(kmini,nvar)
175      integer kount
176      double precision adcov(nvar*nvar)
177
178      integer temp(n)
179      integer index1(n), index2(n), indexx(n)
180      double precision nmahad(n), ndist(n)
181      double precision am(n), am2(n), slutn(n)
182
183      double precision med(nvar), mad(nvar), sd(nvar), means(nvar),
184     *     bmeans(nvar), w(nvar), fv1(nvar), fv2(nvar)
185
186      double precision rec(nvar+1),
187     *     sscp1((nvar+1)*(nvar+1)), corr1(nvar*nvar),
188     *     cova1(nvar*nvar), cinv1(nvar*nvar),
189     *     cova2(nvar*nvar), cinv2(nvar*nvar),
190     *     z(nvar*nvar)
191
192      double precision cstock(10,nvar*nvar), mstock(10,nvar),
193     *     c1stock(10*kmini, nvar*nvar),
194     *     m1stock(10*kmini, nvar*nvar),
195     *     dath(nmini*kmini, nvar)
196
197      double precision cutoff, chimed
198      integer i_trace
199
200      integer l2i
201c Functions from ./rf-common.f :
202      integer replow
203      integer rfncomb
204      double precision rffindq
205
206c ------------------------------------------------------------------
207
208c Variables
209      integer i,ii,iii, ix, j,jj,jjj, k,kk,kkk,kstep,
210     *     l,lll, m,mm,minigr,
211     *     nn, ngroup,nhalf,nrep,nsel, nv_2
212      double precision bstd, deti,detimin1,dist,dist2, eps,
213     *     object, qorder, t
214
215c km10, nmaxi: now *variable* as nmini
216      integer km10, nmaxi,
217     *     ierr,matz,pnsel, tottimes, step,
218     *     flag(10*kmini), mini(kmini),
219     *     subdat(2, nmini*kmini)
220      double precision mcdndex(10,2,kmini)
221c     subndex: vector of indices;
222c     length(subndex) = maximal value of n_j := mini(j) {j in 1:ngroup} below; n0 := nmini
223c     mini(j) = n1 or n1+1,  where  n0 <= n1 < n_max := max_j n_j <= n1+1 <= 1+ (3 n0 - 1)/2 = (3 n0 + 1)/2
224c     ==> see vignette ../vignettes/fastMcd-kmini.Rnw
225      integer subndex((3*nmini + 1)/ 2)
226      double precision med1,med2, percen, pivot,rfmahad,medi2
227      logical all,part,fine,final,class
228c     -Wall (false alarm):
229      all = .true.
230      part= .false.
231c     Consistency correction now happens in R
232
233      if(i_trace .ge. 2) then
234         call pr1mcd(i_trace, n, nvar, nhalff, krep, nmini, kmini)
235      endif
236
237      call rndstart
238C     -------- == GetRNGstate() in C
239
240      nrep = krep
241      kstep = k1
242      medi2 = 0
243
244cc  From here on, the sample size n is known.
245cc  Some initializations can be made. First of all, h (= the number of
246cc  observations on which the MCD is based) is given by the integer variable
247cc  nhalff.
248cc  If nhalff equals n, the MCD is the classical covariance matrix.
249cc  The logical value class indicates this situation.
250cc  The variable jbreak is the breakdown point of the MCD estimator
251cc  based on nhalff observations, whereas jdefaul = (n+nvar+1)/2
252cc  would be the optimal value of nhalff, with maximal breakdown point.
253cc  The variable percen is the corresponding percentage (MM: rather "fraction").
254cc
255c     unused    jbreak=rfnbreak(nhalff,n,nvar)
256
257      percen = dble(nhalff) / n ! the fraction, also called  'alpha'
258
259      if(nvar.lt.5) then
260         eps=1.0D-12
261      else
262         if(nvar.ge.5.and.nvar.le.8) then
263            eps=1.0D-14
264         else
265            eps=1.0D-16
266         endif
267      endif
268
269      class = (nhalff .ge. n)
270      if(class) goto 9500 ! compute *only* the classical estimate
271
272      if(nvar.eq.1) then
273         do jj=1,n
274            ndist(jj)=dat(jj,1)
275         end do
276         call rfshsort(ndist,n)
277cc. consistency correction now happens in R code
278cc.       nquant=min(int(real(((nhalff*1.D0/n)-0.5D0)*40))+1,11)
279cc.       factor=faclts(nquant)
280cc.       call rfmcduni(ndist,n,nhalff,slutn,bstd,am,am2, factor,
281         call rfmcduni(ndist,n,nhalff,slutn,bstd,am,am2, 1.d0,
282     *        n-nhalff+1)
283         initmean(1)=slutn(1)
284         adcov(1)=bstd
285         initcov(1)=bstd
286         goto 9999
287      endif
288
289cc  p >= 2   in the following
290cc  ------
291c These are "constants" given the arguments:
292      nmaxi = nmini*kmini
293      km10 = 10*kmini
294      nv_2 = nvar*nvar
295
296cc  Some initializations:
297cc    matz = auxiliary variable for the subroutine rs, indicating whether
298cc           or not eigenvectors are calculated
299cc    nsel = number of variables + 1
300cc    ngroup = number of subdatasets, is in {1,2,.., kmini}
301cc    part = logical value, true if the dataset is split up
302cc    fine = logical value, becomes true when the subsets are merged
303cc    final = logical value, to indicate the final stage of the algorithm
304cc    all = logical value, true if all (p+1)-subsets out n of should be drawn;
305cc          always true for (very) small n, but also when krep=0 (special value)
306cc    subdat = matrix with a first row containing indices of observations
307cc             and a second row indicating the corresponding subdataset
308cc
309      matz=1
310      nsel=nvar+1
311      ngroup=1
312      fine=.false.
313      final=.false.
314      do i=1,nmaxi
315         subdat(1,i)=int_max
316         subdat(2,i)=int_max
317      end do
318
319cc  Determine whether the dataset needs to be divided into subdatasets
320cc  or can be treated as a whole. The subroutine rfrdraw constructs
321cc  nonoverlapping subdatasets, with uniform distribution of the case numbers.
322cc  For small n, the number of trial subsamples is determined.
323
324c     part := Shall we partition the data into sub-datasets / "groups"?
325      part = (krep.gt.0 .and. n .ge. (2*nmini))
326      all = .not. part
327      if(part) then
328         do i=1,kmini
329            mini(i)=0
330         end do
331         kstep=k1
332         ngroup = n / nmini ! =: k = n % nmini (integer division)
333         if(ngroup .lt. kmini) then
334c          we distribute n evenly into ngroup subdatasets, of size
335            mm = n / ngroup ! =: n_0 =  n % k ==> rest r = n - k*N = n-k*n_0
336c          The rest r in {0,..,k-1} gives one extra obs. in the last r groups, i.e.,
337c          group numbers j > jj := k - r :
338            ii = n - ngroup*mm ! =: r
339            jj = ngroup - ii   ! = k - r
340            do j =  1,jj
341               mini(j) = mm
342            end do
343            do j =  jj+1,ngroup
344               mini(j) = mm +1
345            end do
346            minigr = ngroup*mm + ii
347         else !  ngroup = k := floor(n/nmini) >= kmini =: k_0 :
348            ngroup = kmini
349            do j=1,kmini
350               mini(j)=nmini
351            end do
352            minigr = kmini*nmini
353         end if
354
355         nhalf = int(mini(1)*percen)
356         nrep = krep / ngroup ! integer division
357         if(i_trace .ge. 2)
358     +        call prp1mcd (n,ngroup,minigr,nhalf,nrep, mini)
359         call rfrdraw(subdat,n,minigr,mini,ngroup,kmini)
360      else
361c  "not part" : not partitioning; either  krep == 0  or   n <= 2*nmini-1 ( = 599 by default)
362         minigr=n
363         nhalf=nhalff
364         kstep=k1
365         if(krep.eq.0 .or. n.le.replow(nsel)) then
366c             use all combinations; happens iff  nsel = nvar+1 = p+1 <= 6
367            nrep = rfncomb(nsel,n)
368         else
369            nrep=krep
370            all = .false.
371         endif
372      endif
373c     seed=iseed
374
375c     above: prp1mcd (n,ngroup, minigr, nhalf,nrep, mini)
376      if(i_trace .ge. 2)
377     1     call pr2mcd(l2i(part), l2i(all),
378     2                 kstep, ngroup, minigr, nhalf, nrep)
379
380cc
381cc  Some more initializations:
382cc    m1stock = matrix containing the means of the ngroup*10 best estimates
383cc              obtained in the subdatasets.
384cc    c1stock = matrix containing the covariance matrices of the ngroup*10
385cc              best estimates obtained in the subdatasets.
386cc    mstock = matrix containing the means of the ten best estimates
387cc             obtained after merging the subdatasets and iterating from
388cc             their best estimates.
389cc    cstock = matrix containing the covariance matrices of the ten best
390cc             estimates obtained after merging the subdatasets
391cc             and iterating from their best estimates.
392cc    means = mean vector
393cc    bmeans = initial MCD location estimate
394cc    sd = standard deviation vector
395cc    nmahad = vector of mahalanobis distances
396cc    ndist = vector of general (possibly robust) distances
397cc    inbest = best solution vector
398cc    index1 = index vector of subsample observations
399cc    index2 = index vector of ordered mahalanobis distances
400cc    indexx = temporary index vector, parallel to index1, used when
401cc              generating all possible subsamples
402cc    temp  = auxiliary vector
403cc    flag = vector with components indicating the occurrence of a
404cc           singular intermediate MCD estimate.
405cc
406      do j=1,nvar
407         do k=1,10
408            mstock(k,j)=1234567.D0
409            do kk=1,kmini
410               m1stock((kk-1)*10+k,j)=1234567.D0
411            end do
412            do i=1,nvar
413               do kk=1,kmini
414                  c1stock((kk-1)*10+k,(j-1)*nvar+i)=1234567.D0
415               end do
416               cstock(k,(j-1)*nvar+i)=1234567.D0
417            end do
418         end do
419         means(j)=0.D0
420         bmeans(j)=0.D0
421         sd(j)=0.D0
422      end do
423
424      do j=1,n
425         nmahad(j)=0.D0
426         ndist(j)=0.D0
427         index1(j)=int_max
428         index2(j)=int_max
429         indexx(j)=int_max
430         temp(j)=int_max
431      end do
432      do j=1,km10
433         flag(j)=1
434      end do
435
436
437 9500 continue
438c==== ********* Compute the classical estimates **************
439c
440      call rfcovinit(sscp1,nvar+1,nvar+1)
441      do i=1,n
442         do j=1,nvar
443            rec(j)=dat(i,j)
444         end do
445         call rfadmit(rec,nvar,sscp1)
446      end do
447      call rfcovar(n,nvar,sscp1,cova1,means,sd)
448      do j=1,nvar
449         if(sd(j).eq.0.D0)      goto 5001
450      end do
451
452      call rfcovcopy(cova1,cinv1,nvar,nvar)
453      det= 0.
454      do j=1,nvar
455         pivot=cinv1((j-1)*nvar+j)
456         det=det + log(pivot)
457         if(pivot.lt.eps)       goto 5001
458
459         call rfcovsweep(cinv1,nvar,j)
460      end do
461      call rfcorrel(nvar,cova1,corr1,sd)
462
463c     if just classical estimate, we are done
464      if(class)         goto 9999
465
466      goto 5002
467
468c     singularity '1' (exact fit := 1) :
469 5001 continue
470      call rs(nvar,nvar,cova1,w,matz,z,fv1,fv2,ierr)
471      call rfdis(dat,z,ndist,n,nvar,n,nvar,means)
472      call rfexact(kount,n,ndist, nvar,
473     *     sscp1,rec,dat, cova1,means,sd,weight)
474      call rfcovcopy(cova1,initcov,nvar,nvar)
475      call rfcovcopy(means,initmean,nvar,1)
476      do j=1,nvar
477         coeff(1,j)=z(j)
478      end do
479      fit=1
480      goto 9999
481
482
483 5002 continue
484cc
485cc Compute and store classical Mahalanobis distances.
486cc
487      do j=1,n
488         do i=1,nvar
489            rec(i)=dat(j,i)
490         end do
491         nmahad(j)=rfmahad(rec,nvar,means,cinv1)
492      end do
493
494
495cc ******* Compute the MCD estimates ************** ----------------------------
496
497cc Main loop: inspects the subsamples.
498cc   Every time the sscp of the subsample is placed in sscp1,
499cc   its covariance matrix in cova1, and its inverse in cinv1 .
500cc   The minimum covariance determinant matrix is placed in cova2,
501cc   and its inverse in cinv2.
502cc   The robust distances are placed in ndist.
503cc
504c    tottimes := counting the total number of iteration steps in the main loop
505cc
506cc   The algorithm returns here twice when the dataset is divided
507cc   at the beginning of the program. According to the situation,
508cc   new initializations are made.
509c  fine  == TRUE : <==> We are in the second stage, where the subdatasets are merged,
510c  final == TRUE : <==> We are in the last stage, when the whole dataset is considered
511c     			In the last stage, the number of iterations 'nrep'
512c                       is determined according to the total number of observations and the dimension.
513      tottimes=0
514
515 5555 object=10.D25
516
517      if(.not. part .or. final) then
518         nn=n
519      else if (fine) then !->  part  &  fine  &  .not. final
520         nn=minigr
521      else !->  part - "phase 1" (.not. fine  & .not. final)
522         nn=-1
523      endif
524      if(i_trace .ge. 2)  ! " Main loop, phase[%s]: ... "
525     1     call pr3mcd(l2i(part), l2i(fine), l2i(final),
526     2                 nrep, nn, nsel, nhalf, kstep,  nmini, kmini)
527
528      if(fine .or.(.not.part.and.final)) then
529         nrep = 10
530c        ----   == hardcoded
531         nsel = nhalf
532         kstep = k2
533         if (final) then ! "final": stage 3 --
534            nhalf=nhalff
535            ngroup=1
536c           ksteps := k3 (= 100) unless n*p is "large" where
537c	    ksteps jumps down to at most 10 <<- "discontinuous!" FIXME
538            if (n*nvar .le.100000) then
539               kstep=k3  ! = 100 ("hardcoded default")
540            else if (n*nvar .gt.100000 .and. n*nvar .le.200000) then
541               kstep=10
542            else if (n*nvar .gt.200000 .and. n*nvar .le.300000) then
543               kstep=9
544            else if (n*nvar .gt.300000 .and. n*nvar .le.400000) then
545               kstep=8
546            else if (n*nvar .gt.400000 .and. n*nvar .le.500000) then
547               kstep=7
548            else if (n*nvar .gt.500000 .and. n*nvar .le.600000) then
549               kstep=6
550            else if (n*nvar .gt.600000 .and. n*nvar .le.700000) then
551               kstep=5
552            else if (n*nvar .gt.700000 .and. n*nvar .le.800000) then
553               kstep=4
554            else if (n*nvar .gt.800000 .and. n*nvar .le.900000) then
555               kstep=3
556            else if (n*nvar .gt.900000 .and. n*nvar .le.1000000) then
557               kstep=2
558            else ! n*p > 1e6
559               kstep=1
560            endif
561            if (n.gt.5000) then
562               nrep=1
563            endif
564         else
565            nhalf=int(minigr*percen)
566         endif
567      endif
568
569      do i=1,nsel-1
570         index1(i)=i
571         indexx(i)=i
572      end do
573      index1(nsel)=nsel-1
574      indexx(nsel)=nsel-1
575cc
576cc  Initialization of the matrices to store partial results. For the
577cc  first stage of the algorithm, the currently best covariance matrices and
578cc  means are stored in the matrices c1stock and m1stock initialized earlier.
579cc  The corresponding objective values and the number of the trial subset
580cc  are stored in the matrix mcdndex.
581cc  For the second stage of the algorithm or for small datasets, only the
582cc  currently best objective values are stored in the same matrix mcdndex
583cc  and the corresponding covariance matrices and mean vectors are stored in
584cc  the matrices cstock and mstock initialized earlier.
585cc
586      if(.not. final) then
587         do i=1,10
588            do j=1,ngroup
589               mcdndex(i,1,j)=10.D25
590               mcdndex(i,2,j)=10.D25
591            end do
592         end do
593      endif
594      if(.not.fine .and. .not.final) then !-- first phase
595         do j=1,nvar
596            do i=1,n
597               am (i)=dat(i,j)
598               am2(i)=dat(i,j)
599            end do
600            if(2*n/2 .eq. n) then
601               med1=rffindq(am, n, n/2,   index2)
602               med2=rffindq(am2,n,(n+2)/2,index2)
603               med(j)=(med1+med2)/2
604            else
605               med(j)=rffindq(am,n,(n+1)/2,index2)
606            endif
607            do i=1,n
608               ndist(i)=dabs(dat(i,j)-med(j))
609            end do
610            mad(j)=rffindq(ndist,n,nhalff,index2)
611            if(mad(j)-0.D0 .lt. eps) then
612               do k=1,j-1
613                  do i=1,n
614                     dat(i,k)=dat(i,k)*mad(k)+med(k)
615                  end do
616               end do
617               call rfcovinit(sscp1,nvar+1,nvar+1)
618               do k=1,nsel
619                  do m=1,nvar
620                     rec(m)=dat(index2(k),m)
621                  end do
622                  call rfadmit(rec,nvar,sscp1)
623               end do
624               call rfcovar(nsel,nvar,sscp1,cova1,means,sd)
625               call rs(nvar,nvar,cova1,w,matz,z,fv1,fv2,ierr)
626
627C       VT::15.11.2014, fixing array overrun, found by MM
628C       The following code expects that z (the plane coefficients)
629C       are all zeros with 1 in the position of the variable with MAD=0
630C       If not, tries to find it.
631C
632            if(.FALSE.) then
633               if(z(j).ne.1) then
634                  do kk=1,nvar
635                     if(z(kk*nvar+j).eq.1) then
636                        do l=1,nvar
637                           z(l)=z(kk*nvar+l)
638                        end do
639                        goto 76 ! break
640                     endif
641                  end do
642               endif
643 76            continue
644            else
645C       Instead of this, we set all coefficients to 0 and the one of
646C       variable j to 1. The exactfit code will be set 3 and will be
647C       handled respectively by the R code.
648               do kk=1,nvar
649                 z(kk) = 0
650               end do
651               z(j) = 1
652            end if
653
654               call rfdis(dat,z,ndist,n,nvar,n,nvar,means)
655               call rfexact(kount,n,ndist, nvar,
656     *              sscp1,rec,dat, cova1,means,sd,weight)
657               call rfcovcopy(cova1,initcov,nvar,nvar)
658               call rfcovcopy(means,initmean,nvar,1)
659               do jjj=1,nvar
660                  coeff(1,jjj)=z(jjj)
661               end do
662               fit=3
663               goto 9999
664            endif
665            do i=1,n
666               dat(i,j)=(dat(i,j)-med(j))/mad(j)
667            end do
668         end do
669      endif
670cc
671cc  The matrix dath contains the observations to be used in the
672cc  algorithm. In the first stage of the split-up procedure dath contains
673cc  nmini objects, corresponding to the original observations, with the index
674cc  of the processed group in the array subdat. For the second stage, the
675cc  data points of all the subdatasets are merged in dath.
676cc  The variable kount indicates the occurrence of a singular subsample leading
677cc  to the corresponding plane. In some situations the variable kount counts
678cc  the number of observations on that plane.
679cc
680      if (fine .and. .not. final) then
681         do j=1,minigr
682            do k=1,nvar
683               dath(j,k)=dat(subdat(1,j),k)
684            end do
685         end do
686      endif
687      kount=0
688
689c---- For-Loop over groups  - - - - - - - - - - - - - - - - - - - - -
690      do 1111 ii= 1,ngroup
691         if(.not.fine) kount=0
692         if(part .and. .not. fine) then
693            nn=mini(ii)
694            kk=0
695            do j=1,minigr
696               if(subdat(2,j).eq.ii) then
697                  kk=kk+1
698                  subndex(kk)=subdat(1,j)
699               endif
700            end do
701            do j=1,mini(ii)
702               do k=1,nvar
703                  dath(j,k)=dat(subndex(j),k)
704               end do
705            end do
706         endif
707
708         if(i_trace .ge. 3) call prgrmcd(ii, nn, i_trace)
709         do i=1,nn
710            index2(i)=i
711         end do
712
713cc  The number of trial subsamples is represented by nrep, which depends
714cc  on the data situation.
715cc  When all (p+1)-subsets out of n can be drawn, the subroutine rfgenpn
716cc  is used. Otherwise, random subsamples are drawn by the routine
717cc  rfrangen. The trial subsamples are put in the array index1. The
718cc  same thing happens for large datasets, except that the number of
719cc  observations is nmini instead of n.
720cc
721cc  When a trial subsample is singular, the algorithm counts the number of
722cc  observations that lie on the hyperplane corresponding to this sample.
723cc  If, for small datasets, this number is larger than nhalff, the program
724cc  stops (exact fit) and gives the mean and the covariance matrix
725cc  of the observations on the hyperplane, together with the equation
726cc  of the hyperplane.
727cc  For large datasets, the algorithm first checks whether there are more
728cc  than nhalff observations on the hyperplane. If this is the case, the
729cc  program stops for the same reason of exact fit and gives the covariance
730cc  matrix and mean of the observations on the hyperplane. If not, the
731cc  algorithm counts the number of observations that lie on the hyperplane.
732cc  When this number is smaller than the current nhalf in the subdataset, these
733cc  observations are extended to nhalf observations by adding those
734cc  observations that have smallest orthogonal distances to the hyperplane
735cc  and the algorithm continues.
736cc  When larger, the coefficients of the hyperplane are stored in the matrix
737cc  m1stock for use as starting value in the next stage, and the flag of this
738cc  estimate gets the value zero.
739cc
740cc  In the second stage of the algorithm, when the subdatasets are merged,
741cc  the array index2 contains the indices of the observations
742cc  corresponding to the nhalf observations with minimal relative distances
743cc  with respect to the best estimates of the first stage.
744cc  When the estimate of the first stage is a hyperplane, the algorithm
745cc  investigates whether there are more than the current nhalf observations of
746cc  the merged subdataset on that hyperplane. If so, the coefficients of the
747cc  hyperplane are again stored, now in the matrix mstock, for the final
748cc  stage of the algorithm.
749cc  If not, the observations on the hyperplane are extended to nhalf
750cc  observations by adding the observations in the merged dataset with
751cc  smallest orthogonal distances to that hyperplane.
752cc  For small datasets or for larger datasets with n <= nmaxi := nmini*kmini,
753cc  the algorithm already stops when one solution becomes singular,
754cc  since we then have an exact fit.
755cc
756cc  In the third stage, the covariance matrices and means of the best
757cc  solutions of the second stage are used as starting values.
758cc  Again, when a solution becomes singular, the subroutine 'exact'
759cc  determines the hyperplane through at least nhalff observations and stops
760cc  because of the exact fit.
761cc
762cc  When the program stops because of an exact fit, the covariance matrix and
763cc  mean of the observations on the hyperplane will always be given.
764cc
765C        VT::27.10.2014 - an issue with nsamp="exact" fixed:
766         do ix=1,n
767             indexx(ix)=index1(ix)
768         end do
769
770         do 1000 i=1,nrep
771            pnsel=nsel
772            tottimes=tottimes+1
773            if(i_trace .ge. 4) call pr4mcd(i)
774            call rchkusr() ! <- allow user interrupt
775            deti= -1.d300
776            detimin1=deti
777            step=0
778            call rfcovinit(sscp1,nvar+1,nvar+1)
779            if((part.and..not.fine).or.(.not.part.and..not.final)) then
780               if(part) then
781                  call rfrangen(mini(ii),nsel,index1)
782               else if(all) then
783                  call rfgenpn(n,nsel,indexx)
784                  do ix=1,n
785                     index1(ix)=indexx(ix)
786                  end do
787               else
788                  call rfrangen(n,nsel,index1)
789               endif
790            endif
791cc
792cc  The covariance matrix and mean of the initial subsamples are
793cc  calculated with the subroutine covar and represented by
794cc  the variables cova1 and means.
795cc
796cc  In the following stages of the algorithm, the covariance matrices and means
797cc  used as starting values are already stored in the matrices c1stock
798cc  and m1stock (for the second stage), and in the matrices cstock and mstock
799cc  (for the third stage).
800cc
801cc  The inverse cinv1 of the covariance matrix is calculated by the
802cc  subroutine rfcovsweep, together with its determinant det.
803c
804c Repeat
805 9550       call rfcovinit(sscp1,nvar+1,nvar+1)
806            if(.not.fine.and.part) then
807               do j=1,pnsel
808                  do m=1,nvar
809                     rec(m)=dath(index1(j),m)
810                  end do
811                  call rfadmit(rec,nvar,sscp1)
812               end do
813               call rfcovar(pnsel,nvar,sscp1,cova1,means,sd)
814            endif
815            if(.not.part.and..not.final) then
816               do j=1,pnsel
817                  do m=1,nvar
818                     rec(m)=dat(index1(j),m)
819                  end do
820                  call rfadmit(rec,nvar,sscp1)
821               end do
822               call rfcovar(pnsel,nvar,sscp1,cova1,means,sd)
823            endif
824            if (final) then
825               if(mstock(i,1) .ne. 1234567.D0) then
826                  do jj=1,nvar
827                     means(jj)=mstock(i,jj)
828                     do kk=1,nvar
829                        cova1((jj-1)*nvar+kk)=cstock(i,(jj-1)*nvar+kk)
830                     end do
831                  end do
832               else
833                  goto 1111
834               endif
835               if(flag(i).eq.0) then
836                  qorder=1.D0
837                  do jjj=1,nvar
838                     z(jjj)=coeff(1,jjj)
839                  end do
840                  call rfdis(dat,z,ndist,n,nvar,nn,nvar, means)
841                  dist2=rffindq(ndist,nn,nhalf,index2)
842                  goto 9555
843               endif
844            endif
845            if (fine .and. .not.final) then
846               if(m1stock((ii-1)*10+i,1) .ne. 1234567.D0) then
847                  do jj=1,nvar
848                     means(jj)=m1stock((ii-1)*10+i,jj)
849                     do kk=1,nvar
850                        cova1((jj-1)*nvar+kk)=c1stock((ii-1)*10+i,
851     *                       (jj-1)*nvar+kk)
852                     end do
853                  end do
854               else
855                  goto 1111
856               endif
857               if(flag((ii-1)*10+i).eq.0) then
858                  qorder=1.D0
859                  do jjj=1,nvar
860                     z(jjj)=coeff(ii,jjj)
861                  end do
862                  call rfdis(dath,z,ndist,nmaxi,nvar,nn,nvar, means)
863                  call rfshsort(ndist,nn)
864                  qorder=ndist(nhalf)
865                  if(dabs(qorder-0.D0).lt.10.D-8 .and. kount.eq.0
866     *                 .and. n.gt.nmaxi) then
867                     kount=nhalf
868                     do kkk=nhalf+1,nn
869                        if(dabs(ndist(kkk)-0.D0).lt.10.D-8) then
870                           kount=kount+1
871                        endif
872                     end do
873                     flag(1)=0
874                     do kkk=1,nvar
875                        coeff(1,kkk)=z(kkk)
876                     end do
877                     call rfstore2(nvar,cstock,mstock,nv_2,
878     *                    kmini,cova1,means,i,mcdndex,kount)
879                     kount=1
880                     goto 1000
881                  else if(dabs(qorder-0.D0).lt.10.D-8 .and.
882     *                    kount.ne.0 .and. n.gt.nmaxi) then
883                     goto 1000
884                  else
885                     flag(1)=1
886                     dist2=rffindq(ndist,nn,nhalf,index2)
887                     goto 9555
888                  endif
889               endif
890            endif
891            call rfcovcopy(cova1,cinv1,nvar,nvar)
892            det=0.
893            do 200 j=1,nvar
894               pivot=cinv1((j-1)*nvar+j)
895               det=det + log(pivot)
896               if(pivot.lt.eps) then
897                  call rs(nvar,nvar,cova1,w,matz,z,fv1,fv2,ierr)
898                  qorder=1.D0
899                  if(.not.part.or.final) then
900                     call rfdis(dat,z,ndist,n,nvar,nn,nvar,means)
901                  else
902                     call rfdis(dath,z,ndist,nmaxi,nvar,nn,nvar,means)
903                  endif
904                  call rfshsort(ndist,nn)
905                  qorder=ndist(nhalf)
906                  if(dabs(qorder-0.D0).lt. 10.D-8 .and. .not.part) then
907                     call transfo(cova1,means,dat,med,mad,nvar,n)
908                     call rs(nvar,nvar,cova1,w,matz,z,fv1,fv2,ierr)
909                     call rfdis(dat,z,ndist,n,nvar,nn,nvar,means)
910                     call rfexact(kount,n,ndist, nvar,
911     *                    sscp1,rec,dat, cova1,means,sd,weight)
912                     call rfcovcopy(cova1,initcov,nvar,nvar)
913                     call rfcovcopy(means,initmean,nvar,1)
914                     do jjj=1,nvar
915                        coeff(1,jjj)=z(jjj)
916                     end do
917                     fit=2
918                     goto 9999
919                  else if(dabs(qorder-0.D0).lt. 10.D-8 .and. part .and.
920     *                    kount.eq.0) then
921                     call rfdis(dat,z,ndist,n,nvar,n,nvar, means)
922                     call rfshsort(ndist,n)
923                     if(dabs(ndist(nhalff)-0.D0).lt.10.D-8) then
924                        call transfo(cova1,means,dat,med,mad,nvar,n)
925                        call rs(nvar,nvar,cova1,w,matz,z,fv1,fv2,ierr)
926                        call rfdis(dat,z,ndist,n,nvar,nn,nvar,means)
927                        call rfexact(kount,n,ndist, nvar,sscp1,
928     *                       rec,dat, cova1,means,sd,weight)
929                        call rfcovcopy(cova1,initcov,nvar,nvar)
930                        call rfcovcopy(means,initmean,nvar,1)
931                        do jjj=1,nvar
932                           coeff(1,jjj)=z(jjj)
933                        end do
934                        fit=2
935                        goto 9999
936                     endif
937                     call rfdis(dath,z,ndist,nmaxi,nvar,nn,nvar, means)
938                     call rfshsort(ndist,nn)
939                     kount=nhalf
940                     do kkk=nhalf+1,nn
941                        if(dabs(ndist(kkk)-0.D0) .lt. 10.D-8) then
942                           kount=kount+1
943                        endif
944                     end do
945                     flag((ii-1)*10+1)=0
946                     do kkk=1,nvar
947                        coeff(ii,kkk)=z(kkk)
948                     end do
949                     call rfstore1(nvar,c1stock,m1stock,nv_2,
950     *                    kmini,cova1,means,i,km10,ii,mcdndex, kount)
951                     kount=1
952                     goto 1000
953                  else if(dabs(qorder-0.D0).lt. 10.D-8 .and. part .and.
954     *                    kount.ne.0) then
955                     goto 1000
956                  else
957C
958C                 VT::27.10.2014 - an issue with nsamp="exact" fixed:
959C
960C                 Add one more observation and return to recompute the
961C                 covariance. In case of complete enumeration, when all
962C                 p+1 subsamples are generated, the array 'index1' must
963C                 be preserved around label 9550).
964C
965                     if(i_trace .ge. 2)
966     *                call intpr('Singularity -> extended subsample: ',
967     *                    -1,index1,nsel)
968
969                     call rfishsort(index1,pnsel)
970                     call prdraw(index1,pnsel, nn)
971                     pnsel=pnsel+1
972                     goto 9550
973c                    --------- until
974                  endif
975               endif
976               call rfcovsweep(cinv1,nvar,j)
977 200        continue
978cc
979cc  Mahalanobis distances are computed with the subroutine rfmahad
980cc  and stored in the array ndist.
981cc  The k-th order statistic of the mahalanobis distances is stored
982cc  in dist2. The array index2 containes the indices of the
983cc  corresponding observations.
984cc
985            do j=1,nn
986               if(.not.part.or.final) then
987                  do mm=1,nvar
988                     rec(mm)=dat(j,mm)
989                  end do
990               else
991                  do mm=1,nvar
992                     rec(mm)=dath(j,mm)
993                  end do
994               endif
995               t=rfmahad(rec,nvar,means,cinv1)
996               ndist(j)=t
997            end do
998            dist2=rffindq(ndist,nn,nhalf,index2)
999cc
1000cc  The variable kstep represents the number of iterations of the current stage (1,2, or 3),
1001cc  i.e., the situation of the program, kstep = k1, k2, or k3.  Within each
1002cc  iteration the mean and covariance matrix of nhalf observations are
1003cc  calculated. The nhalf smallest corresponding mahalanobis distances
1004cc  determine the subset for the next iteration.
1005cc  The best subset for the whole data is stored in the array inbest.
1006cc  The iteration stops when two subsequent determinants become equal.
1007cc
1008 9555       do 400 step=1,kstep
1009               tottimes=tottimes+1
1010               if(i_trace .ge. 4) call pr5mcd(step, tottimes)
1011               call rchkusr() ! <- allow user interrupt
1012               call rfcovinit(sscp1,nvar+1,nvar+1)
1013               do j=1,nhalf
1014                  temp(j)=index2(j)
1015               end do
1016               call rfishsort(temp,nhalf)
1017               do j=1,nhalf
1018                  if(.not.part.or.final) then
1019                     do mm=1,nvar
1020                        rec(mm)=dat(temp(j),mm)
1021                     end do
1022                  else
1023                     do mm=1,nvar
1024                        rec(mm)=dath(temp(j),mm)
1025                     end do
1026                  endif
1027                  call rfadmit(rec,nvar,sscp1)
1028               end do
1029               call rfcovar(nhalf,nvar,sscp1,cova1,means,sd)
1030               call rfcovcopy(cova1,cinv1,nvar,nvar)
1031               det= 0.
1032               do 600 j=1,nvar
1033                  pivot=cinv1((j-1)*nvar+j)
1034                  det=det + log(pivot)
1035                  if(pivot.lt.eps) then
1036                     if(final .or. .not.part .or.
1037     *                  (fine.and. .not.final .and. n .le. nmaxi)) then
1038                        call transfo(cova1,means,dat,med,mad,nvar,n)
1039                        call rs(nvar,nvar,cova1,w,matz,z,fv1,fv2,ierr)
1040                        if(final.or..not.part) then
1041                           call rfdis(dath,z,ndist,n, nvar,nn,
1042     *                          nvar,means)
1043                        else
1044                           call rfdis(dath,z,ndist,nmaxi,nvar,nn,
1045     *                          nvar,means)
1046                        endif
1047                        call rfexact(kount,n,ndist,nvar,sscp1,
1048     *                       rec,dat, cova1,means,sd,weight)
1049                        call rfcovcopy(cova1,initcov,nvar,nvar)
1050                        call rfcovcopy(means,initmean,nvar,1)
1051                        do jjj=1,nvar
1052                           coeff(1,jjj)=z(jjj)
1053                        end do
1054                        fit=2
1055                        goto 9999
1056                     endif
1057                     if(part.and..not.fine.and.kount.eq.0) then
1058                        call rs(nvar,nvar,cova1,w,matz,z,fv1,fv2,ierr)
1059                        call rfdis(dat,z,ndist,n,nvar,n,nvar, means)
1060                        call rfshsort(ndist,n)
1061                        if(dabs(ndist(nhalff)-0.D0).lt.10.D-8) then
1062                           call transfo(cova1,means,dat,med,mad,nvar,n)
1063                           call rs(nvar,nvar,cova1,w,matz,z,
1064     *                          fv1,fv2,ierr)
1065                           call rfdis(dat,z,ndist,n,nvar,n,nvar,means)
1066                           call rfexact(kount,n,ndist,nvar,sscp1,
1067     *                          rec,dat, cova1,means,sd,weight)
1068                           call rfcovcopy(cova1,initcov,nvar,nvar)
1069                           call rfcovcopy(means,initmean,nvar,1)
1070                           do jjj=1,nvar
1071                              coeff(1,jjj)=z(jjj)
1072                           end do
1073                           fit=2
1074                           goto 9999
1075                        endif
1076                        call rfdis(dath,z,ndist,nmaxi,nvar,nn,
1077     *                       nvar,means)
1078                        call rfshsort(ndist,nn)
1079                        kount=nhalf
1080                        do,kkk=nhalf+1,nn
1081                           if(dabs(ndist(kkk)-0.D0).lt.10.D-8) then
1082                              kount=kount+1
1083                           endif
1084                        end do
1085                        flag((ii-1)*10+1)=0
1086                        do kkk=1,nvar
1087                           coeff(ii,kkk)=z(kkk)
1088                        end do
1089                        call rfstore1(nvar,c1stock,m1stock,nv_2,
1090     *                       kmini,cova1,means,i,km10,ii,mcdndex, kount)
1091                        kount=1
1092                        goto 1000
1093                     else
1094                        if(part.and..not.fine.and.kount.ne.0) then
1095                           goto 1000
1096                        endif
1097                     endif
1098                     if(fine.and..not.final.and.kount.eq.0) then
1099                        call rs(nvar,nvar,cova1,w,matz,z,fv1,fv2,ierr)
1100                        call rfdis(dat,z,ndist,n,nvar,n,nvar, means)
1101                        call rfshsort(ndist,n)
1102                        if(dabs(ndist(nhalff)-0.D0).lt.10.D-8) then
1103                           call transfo(cova1,means,dat,med,mad,nvar,n)
1104                           call rs(nvar,nvar,cova1,w,matz,z,
1105     *                          fv1,fv2,ierr)
1106                           call rfdis(dat,z,ndist,n,nvar,n,nvar,means)
1107                           call rfexact(kount,n,ndist,nvar,sscp1,
1108     *                          rec,dat, cova1,means,sd,weight)
1109                           call rfcovcopy(cova1,initcov,nvar,nvar)
1110                           call rfcovcopy(means,initmean,nvar,1)
1111                           do jjj=1,nvar
1112                              coeff(1,jjj)=z(jjj)
1113                           end do
1114                           fit=2
1115                           goto 9999
1116                        endif
1117                        call rfdis(dath,z,ndist,nmaxi,nvar,nn,
1118     *                       nvar,means)
1119
1120                        call rfshsort(ndist,nn)
1121                        kount=nhalf
1122                        do kkk=nhalf+1,nn
1123                           if(dabs(ndist(kkk)-0.D0).lt.10.D-8) then
1124                              kount=kount+1
1125                           endif
1126                        end do
1127                        flag(1)=0
1128                        do kkk=1,nvar
1129                           coeff(1,kkk)=z(kkk)
1130                        end do
1131                        call rfstore2(nvar,cstock,mstock,nv_2,
1132     *                       kmini,cova1,means,i,mcdndex,kount)
1133                        kount=1
1134                        goto 1000
1135                     else
1136                        if(fine.and..not.final.and.kount.ne.0) then
1137                           goto 1000
1138                        endif
1139                     endif
1140                  endif
1141                  call rfcovsweep(cinv1,nvar,j)
1142 600           continue
1143
1144               if(step.ge.2 .and. det.eq.detimin1) then
1145                  goto 5000
1146               endif
1147               detimin1=deti
1148               deti=det
1149               do j=1,nn
1150                  if(.not.part.or.final) then
1151                     do mm=1,nvar
1152                        rec(mm)=dat(j,mm)
1153                     end do
1154                  else
1155                     do mm=1,nvar
1156                        rec(mm)=dath(j,mm)
1157                     end do
1158                  endif
1159                  t=rfmahad(rec,nvar,means,cinv1)
1160                  ndist(j)=t
1161               end do
1162               dist2=rffindq(ndist,nn,nhalf,index2)
1163               dist=dsqrt(dist2)
1164               if(final .and. ((i.eq.1 .and. step.eq.1 .and. .not.fine)
1165     *             .or. det .lt. object)) then
1166                  medi2=rffindq(ndist,nn,int(n/2),index1)
1167                  object=det
1168                  do jjj=1,nhalf
1169                     inbest(jjj)=index2(jjj)
1170                  end do
1171                  call rfcovcopy(cova1,cova2,nvar,nvar)
1172                  call rfcovcopy(cinv1,cinv2,nvar,nvar)
1173                  call rfcovcopy(means,bmeans,nvar,1)
1174               endif
1175 400        continue
1176            if(i_trace .ge. 4) call println()
1177
1178cc  After each iteration, it has to be checked whether the new solution
1179cc  is better than some previous one and therefore needs to be stored. This
1180cc  isn't necessary in the third stage of the algorithm, where only the best
1181cc  solution is kept.
1182
1183 5000       if(.not. final) then
1184               if(part .and. .not. fine) then
1185                  iii=ii
1186               else
1187                  iii=1
1188               endif
1189c     At the end of the algorithm, only the ten
1190c     best solutions need to be stored.
1191
1192cc  For each data group :
1193cc    If the objective function is lower than the largest value in the
1194cc    matrix mcdndex :
1195cc    A distinction is made between different stages of the algorithm:
1196cc      * At the first stage of the split-up situation:
1197cc        -If the new objective value did not yet occur in mcdndex
1198cc         its value and corresponding covariance matrix and mean are
1199cc         stored at the right place in the matrices mcdndex, c1stock and
1200cc         m1stock, and other values are shifted to their new position
1201cc         in these arrays.
1202cc        -If the new objective value already occurs in mcdndex, a
1203cc         comparison is made between the new mean vector and covariance matrix
1204cc         and those estimates with the same determinant.
1205cc         When for an equal determinant, the mean vector or covariance matrix
1206cc         do not correspond, both of them are kept in the matrices mcdndex
1207cc         and nbest.
1208cc      * In the second stage of the algorithm, the covariances and means
1209cc        are stored :
1210cc        - If the new objective value did not yet occur
1211cc          in the matrix mcdndex, it is inserted by shifting the greater
1212cc          determinants upwards and doing the same in the arrays mstock
1213cc          and cstock.
1214cc        - If the new objective value already occurs in the array mcdndex,
1215cc          it is compared with all solutions with the same determinant.
1216cc          In the case of an equality, the means and covariances
1217cc          are compared to determine whether or not to insert the
1218cc          new solution.
1219cc    Otherwise nothing happens. When a singularity occurs,
1220cc    the determinant in the matrix mcdndex is zero and the
1221cc    corresponding flag is zero too, so the search in the arrays mcdndex,
1222cc    m1stock, c1stock, mstock and cstock is done on the rows with flag one.
1223cc
1224
1225               if( flag((iii-1)*10+1).eq.1) then
1226                  lll=1
1227               else
1228                  lll=2
1229               endif
1230               do j=lll,10
1231                  if (det .le. mcdndex(j,2,iii)) then
1232                     if(det.ne.mcdndex(j,2,iii)) then
1233                        if(.not.fine.and.part)                  goto 203
1234                        goto 205
1235                     else
1236                        do kkk=j,10
1237                           if(det.eq.mcdndex(kkk,2,iii)) then
1238                              do jjj=1,nvar
1239                                 if(part.and..not.fine) then
1240                                    if(means(jjj) .ne.
1241     *                                 m1stock((iii-1)*10+ kkk,jjj))
1242     *                                   goto 203
1243                                 else
1244                                    if(means(jjj).ne.mstock(kkk,jjj))
1245     *                                   goto 205
1246                                 endif
1247                              end do
1248                              do jjj=1,nvar*nvar
1249                                 if(part.and..not.fine) then
1250                                    if(cova1(jjj) .ne.
1251     *                                   c1stock((iii-1)*10+ kkk,jjj))
1252     *                                   goto 203
1253                                 else
1254                                    if(cova1(jjj).ne.cstock(kkk,jjj))
1255     *                                   goto 205
1256                                 endif
1257                              end do
1258                           endif
1259                        end do  ! kkk
1260                     endif
1261                     goto 1000
1262
1263c---
1264 203                 do k=10,j+1,-1
1265                        do kk=1,nvar*nvar
1266                           c1stock((iii-1)*10+k,kk)=
1267     *                          c1stock((iii-1)*10+k-1,kk)
1268                        end do
1269                        do kk=1,nvar
1270                           m1stock((iii-1)*10+k,kk)=
1271     *                          m1stock((iii-1)*10+k-1,kk)
1272                        end do
1273                        mcdndex(k,1,iii)=mcdndex(k-1,1,iii)
1274                        mcdndex(k,2,iii)=mcdndex(k-1,2,iii)
1275                     end do
1276
1277                     do kk=1,nvar
1278                        do kkk=1,nvar
1279                           c1stock((iii-1)*10+j,(kk-1)*nvar+kkk)=
1280     *                          cova1((kk-1)*nvar+kkk)
1281                           m1stock((iii-1)*10+j,kk)=means(kk)
1282                        end do
1283                     end do
1284                     mcdndex(j,1,iii)=i
1285                     mcdndex(j,2,iii)=det
1286                     goto 1000
1287c---
1288 205                 do k=10,j+1,-1
1289                        do kk=1,nvar*nvar
1290                           cstock(k,kk)= cstock(k-1,kk)
1291                        end do
1292
1293                        do kk=1,nvar
1294                           mstock(k,kk)= mstock(k-1,kk)
1295                        end do
1296
1297                        mcdndex(k,1,iii)=mcdndex(k-1,1,iii)
1298                        mcdndex(k,2,iii)=mcdndex(k-1,2,iii)
1299                     end do
1300                     do kk=1,nvar
1301                        do kkk=1,nvar
1302                           cstock(j,(kk-1)*nvar+kkk)=
1303     *                          cova1((kk-1)*nvar+kkk)
1304                           mstock(j,kk)=means(kk)
1305                        end do
1306                     end do
1307                     mcdndex(j,1,iii)=i
1308                     mcdndex(j,2,iii)=det
1309                     goto 1000
1310
1311                  endif
1312               end do  ! j
1313
1314            endif
1315c     (not final)
1316
1317 1000    continue !end{ i = 1..nrep }
1318 1111 continue
1319c---- - - - - - end [ For (ii = 1 .. ngroup) ]  - - - - - - - - -
1320
1321cc  Determine whether the algorithm needs to be run again or not.
1322cc
1323      if(part .and. .not. fine) then
1324         fine= .true.
1325         goto 5555
1326      else if(.not. final .and. ((part.and.fine).or. .not.part)) then
1327         final= .true.
1328         goto 5555
1329      endif
1330
1331cc******** end { Main Loop } ************** --------------------------------
1332
1333
1334c MM: 'temp' is thrown away in calling R code:
1335c      do j=1,nhalf
1336c         temp(j)=inbest(j)
1337c      end do
1338c      call rfishsort(temp,nhalf)
1339
1340      do j=1,nvar
1341         means(j)=bmeans(j)*mad(j)+med(j)
1342      end do
1343      call rfcovcopy(means,initmean,nvar,1)
1344
1345      do i=1,nvar
1346         do j=1,nvar
1347            cova1((i-1)*nvar+j)=cova2((i-1)*nvar+j)*mad(i)*mad(j)
1348         end do
1349      end do
1350      call rfcovcopy(cova1,initcov,nvar,nvar)
1351      det=object
1352      do j=1,nvar
1353         det=det + 2*log(mad(j))
1354      end do
1355
1356
1357cc      VT::chimed is passed now as a parameter
1358cc        call rfcovmult(cova1,nvar,nvar,medi2/chimed(nvar))
1359cc        call rfcovmult(cova2,nvar,nvar,medi2/chimed(nvar))
1360cc        call rfcovmult(cinv2,nvar,nvar,1.D0/(medi2/chimed(nvar)))
1361
1362      medi2 = medi2/chimed
1363      call rfcovmult(cova1, nvar,nvar, medi2)
1364      call rfcovmult(cova2, nvar,nvar, medi2)
1365      call rfcovmult(cinv2, nvar,nvar, 1.D0/medi2)
1366      call rfcovcopy(cova1, adcov,nvar,nvar)
1367cc
1368cc      The MCD location is in bmeans.
1369cc      The MCD scatter matrix is in cova2,
1370cc      and its inverse in cinv2.
1371cc
1372cc      For every observation we compute its MCD distance
1373cc      and compare it to a cutoff value.
1374cc
1375      call rfcovinit(sscp1,nvar+1,nvar+1)
1376
1377      do i=1,n
1378         do mm=1,nvar
1379            rec(mm)=dat(i,mm)
1380         end do
1381         dist2=rfmahad(rec,nvar,bmeans,cinv2)
1382         if(dist2.le.cutoff) then
1383            weight(i)=1
1384         else
1385            weight(i)=0
1386         endif
1387      end do
1388
1389      call transfo(cova2,bmeans,dat,med,mad,nvar,n)
1390      goto 9999
1391cc ******************************************************************
1392
1393 9999 continue
1394      if(i_trace .ge. 2) call pr9mcd(tottimes)
1395      call rndend
1396C     ------ == PutRNGstate() in C
1397      return
1398      end
1399ccccc end {rffastmcd}
1400ccccc
1401ccccc
1402
1403
1404c --- Auxiliary just to pass Fortran 'logical' as 'integer' to C;   int(.) does *not* work
1405      integer function l2i(logi)
1406      implicit none
1407      logical logi
1408      if(logi) then
1409         l2i = 1
1410      else
1411         l2i = 0
1412      endif
1413      return
1414      end
1415
1416ccccc
1417ccccc
1418      subroutine rfexact(kount,nn,ndist, nvar,sscp1,
1419     *     rec,dat, cova1,means,sd,weight)
1420cc
1421cc Determines how many objects lie on the hyperplane with equation
1422cc z(1,1)*(x_i1 - means_1)+ ... + z(p,1)* (x_ip - means_p) = 0
1423cc and computes their mean and their covariance matrix.
1424cc
1425      double precision ndist(nn)
1426      double precision sscp1(nvar+1,nvar+1)
1427      double precision rec(nvar+1)
1428      double precision dat(nn,nvar)
1429      double precision cova1(nvar,nvar)
1430      double precision means(nvar), sd(nvar)
1431      integer weight(nn)
1432
1433      call rfcovinit(sscp1,nvar+1,nvar+1)
1434      kount=0
1435      do kk=1,nn
1436         if(dabs(ndist(kk)-0.D0).lt.10.D-8) then
1437            kount=kount+1
1438            weight(kk)=1
1439            do j=1,nvar
1440               rec(j)=dat(kk,j)
1441            end do
1442            call rfadmit(rec,nvar,sscp1)
1443         else
1444            weight(kk)=0
1445         endif
1446      end do
1447      call rfcovar(kount,nvar,sscp1,cova1,means,sd)
1448      return
1449      end
1450ccccc
1451ccccc
1452      subroutine transfo(cova,means,dat,med,mad,nvar,n)
1453cc
1454      implicit none
1455      integer n, nvar
1456      double precision dat(n,nvar), cova(nvar,nvar)
1457      double precision means(nvar), med(nvar), mad(nvar)
1458      integer i,j,k
1459      do j=1,nvar
1460        means(j)=means(j)*mad(j)+med(j)
1461        do k=1,nvar
1462           cova(j,k)=cova(j,k)*mad(j)*mad(k)
1463        end do
1464        do i=1,n
1465           dat(i,j)=dat(i,j)*mad(j)+med(j)
1466        end do
1467      end do
1468
1469      return
1470      end
1471ccccc
1472ccccc
1473      subroutine rfcovmult(a,n1,n2,fac)
1474cc
1475cc  Multiplies the matrix a by the real factor fac.
1476cc
1477      double precision a(n1,n2)
1478      double precision fac
1479cc
1480      do i=1,n1
1481        do j=1,n2
1482          a(i,j)=a(i,j)*fac
1483        end do
1484      end do
1485      return
1486      end
1487ccccc
1488ccccc
1489      subroutine rfadmit(rec,nvar,sscp)
1490cc
1491cc  Updates the sscp matrix with the additional case rec.
1492cc
1493      double precision rec(nvar)
1494      double precision sscp(nvar+1,nvar+1)
1495cc
1496      sscp(1,1)=sscp(1,1)+1.D0
1497      do j=1,nvar
1498        sscp(1,j+1)=sscp(1,j+1)+rec(j)
1499        sscp(j+1,1)=sscp(1,j+1)
1500      end do
1501      do i=1,nvar
1502        do j=1,nvar
1503          sscp(i+1,j+1)=sscp(i+1,j+1)+rec(i)*rec(j)
1504        end do
1505      end do
1506      return
1507      end
1508ccccc
1509ccccc
1510      subroutine rfcovar(n,nvar, sscp,cova, means,sd)
1511cc
1512cc  Computes the classical mean and covariance matrix.
1513cc
1514      implicit none
1515      integer n,nvar, i,j
1516      double precision sscp(nvar+1,nvar+1), cova(nvar,nvar)
1517      double precision means(nvar), sd(nvar), f
1518
1519      do i=1,nvar
1520        means(i)=sscp(1,i+1)
1521        sd(i)=sscp(i+1,i+1)
1522        f=(sd(i)-means(i)*means(i)/n)/(n-1)
1523        if(f.gt.0.D0) then
1524          sd(i)=dsqrt(f)
1525        else
1526          sd(i)=0.D0
1527        endif
1528        means(i)=means(i)/n
1529      end do
1530      do i=1,nvar
1531        do j=1,nvar
1532          cova(i,j)=sscp(i+1,j+1)
1533        end do
1534      end do
1535      do i=1,nvar
1536        do j=1,nvar
1537          cova(i,j)=cova(i,j)-n*means(i)*means(j)
1538          cova(i,j)=cova(i,j)/(n-1)
1539        end do
1540      end do
1541      return
1542      end
1543ccccc
1544ccccc
1545      subroutine rfcorrel(nvar,a,b,sd)
1546cc
1547cc  Transforms the scatter matrix a to the correlation matrix b: <==> R's  cov2cor(.)
1548cc
1549      implicit none
1550      integer nvar
1551      double precision a(nvar,nvar), b(nvar,nvar), sd(nvar)
1552      integer j,i
1553
1554      do j=1,nvar
1555         sd(j)=1/sqrt(a(j,j))
1556      end do
1557      do i=1,nvar
1558         do j=1,nvar
1559            if(i.eq.j) then
1560               b(i,j)=1.0
1561            else
1562               b(i,j)=a(i,j)*sd(i)*sd(j)
1563            endif
1564         end do
1565      end do
1566      return
1567      end
1568
1569      subroutine prdraw(a,pnsel, nn)
1570
1571      implicit none
1572      integer nn, a(nn), pnsel
1573c
1574      double precision unifrnd
1575      integer jndex, nrand, i,j
1576
1577      jndex=pnsel
1578c     OLD       nrand=int(uniran(seed)*(nn-jndex))+1
1579      nrand=int(unifrnd() * (nn-jndex))+1
1580C     if(nrand .gt. nn-jndex) then
1581C     call intpr(
1582C     1          '** prdraw(): correcting nrand > nn-jndex; nrand=',
1583C     2          -1, nrand, 1)
1584C     nrand=nn-jndex
1585C     endif
1586
1587      jndex=jndex+1
1588      a(jndex)=nrand+jndex-1
1589      do i=1,jndex-1
1590         if(a(i).gt.nrand+i-1) then
1591            do j=jndex,i+1,-1
1592               a(j)=a(j-1)
1593            end do
1594            a(i)=nrand+i-1
1595            goto 10
1596c           ------- break
1597         endif
1598      end do
1599 10   continue
1600      return
1601      end
1602ccccc
1603ccccc
1604      double precision function rfmahad(rec,nvar,means,sigma)
1605cc
1606cc  Computes a Mahalanobis-type distance.
1607cc
1608      double precision rec(nvar), means(nvar), sigma(nvar,nvar), t
1609
1610      t = 0.
1611      do j=1,nvar
1612         do k=1,nvar
1613            t = t + (rec(j)-means(j))*(rec(k)-means(k))*sigma(j,k)
1614         end do
1615      end do
1616      rfmahad=t
1617      return
1618      end
1619ccccc
1620ccccc
1621      subroutine rfdis(da,z,ndist,nm,nv,nn,nvar, means)
1622cc
1623cc Computes the distance between the objects of da and a hyperplane with
1624cc equation z(1,1)*(x_i1 - means_1) + ... + z(p,1)*(x_ip - means_p) = 0
1625cc
1626      double precision da(nm,nv)
1627      double precision z(nvar,nvar)
1628      double precision ndist(nn)
1629      double precision means(nvar)
1630
1631      do i=1,nn
1632         ndist(i)=0
1633         do j=1,nvar
1634            ndist(i)=z(j,1)*(da(i,j)-means(j))+ndist(i)
1635         end do
1636         ndist(i)=dabs(ndist(i))
1637      end do
1638      return
1639      end
1640ccccc
1641ccccc
1642      subroutine rfstore2(nvar,cstock,mstock,nv_2,
1643     *     kmini,cova1,means,i,mcdndex,kount)
1644cc
1645cc  Stores the coefficients of a hyperplane
1646cc  z(1,1)*(x_i1 - means_1) + ... +  z(p,1)*(x_ip - means_p) = 0
1647cc  into the first row of the matrix mstock, and shifts the other
1648cc  elements of the arrays mstock and cstock.
1649cc
1650      double precision cstock(10, nv_2), mstock(10, nvar)
1651      double precision mcdndex(10, 2, kmini)
1652      double precision cova1(nvar,nvar), means(nvar)
1653
1654      do k=10,2,-1
1655         do kk=1,nvar*nvar
1656            cstock(k,kk)= cstock(k-1,kk)
1657         end do
1658         do kk=1,nvar
1659            mstock(k,kk)= mstock(k-1,kk)
1660         end do
1661         mcdndex(k,1,1)=mcdndex(k-1,1,1)
1662         mcdndex(k,2,1)=mcdndex(k-1,2,1)
1663      end do
1664      do kk=1,nvar
1665         mstock(1,kk)=means(kk)
1666         do jj=1,nvar
1667            cstock(1,(kk-1)*nvar+jj)=cova1(kk,jj)
1668         end do
1669      end do
1670      mcdndex(1,1,1)=i
1671      mcdndex(1,2,1)=kount
1672      return
1673      end
1674ccccc
1675ccccc
1676      subroutine rfstore1(nvar,c1stock,m1stock,nv_2,
1677     *     kmini,cova1,means,i,km10,ii,mcdndex,kount)
1678
1679      double precision c1stock(km10, nv_2), m1stock(km10, nvar)
1680      double precision mcdndex(10,2,kmini)
1681      double precision cova1(nvar,nvar), means(nvar)
1682
1683      do k=10,2,-1
1684         do kk=1,nvar*nvar
1685            c1stock((ii-1)*10+k,kk)=
1686     *           c1stock((ii-1)*10+k-1,kk)
1687         end do
1688         do kk=1,nvar
1689            m1stock((ii-1)*10+k,kk)=
1690     *           m1stock((ii-1)*10+k-1,kk)
1691         end do
1692         mcdndex(k,1,ii)=mcdndex(k-1,1,ii)
1693         mcdndex(k,2,ii)=mcdndex(k-1,2,ii)
1694      end do
1695      do kk=1,nvar
1696         m1stock((ii-1)*10+1,kk)=means(kk)
1697         do jj=1,nvar
1698            c1stock((ii-1)*10+1,(kk-1)*nvar+jj)=
1699     *           cova1(kk,jj)
1700         end do
1701      end do
1702      mcdndex(1,1,ii)=i
1703      mcdndex(1,2,ii)=kount
1704      return
1705      end
1706
1707
1708CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
1709ccccc
1710ccccc
1711      subroutine rfcovinit(a,n1,n2)
1712cc
1713cc  Initializes the matrix a by filling it with zeroes.
1714cc
1715      double precision a(n1,n2)
1716cc
1717      do i=1,n1
1718        do j=1,n2
1719           a(i,j)=0.D0
1720        end do
1721      end do
1722      return
1723      end
1724ccccc
1725ccccc
1726      subroutine rfcovsweep(a,nvar,k)
1727cc
1728      double precision a(nvar,nvar)
1729      double precision b, d
1730cc
1731      d=a(k,k)
1732      do j=1,nvar
1733         a(k,j)=a(k,j)/d
1734      end do
1735      do i=1,nvar
1736         if(i.ne.k) then
1737            b=a(i,k)
1738            do j=1,nvar
1739               a(i,j)=a(i,j)-b*a(k,j)
1740            end do
1741            a(i,k) = -b/d
1742         endif
1743      end do
1744      a(k,k)=1/d
1745      return
1746      end
1747ccccc
1748