1C++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++C
2C                                                            C
3C  HIERARCHICAL CLUSTERING using (user-specified) criterion. C
4C                                                            C
5C  Parameters:                                               C
6C                                                            C
7C  N                 the number of points being clustered    C
8C  DISS(LEN)         dissimilarities in lower half diagonal  C
9C                    storage; LEN = N.N-1/2,                 C
10C  IOPT              clustering criterion to be used,        C
11C  IA, IB, CRIT      history of agglomerations; dimensions   C
12C                    N, first N-1 locations only used,       C
13C  MEMBR, NN, DISNN  vectors of length N, used to store      C
14C                    cluster cardinalities, current nearest  C
15C                    neighbour, and the dissimilarity assoc. C
16C                    with the latter.                        C
17C                    MEMBR must be initialized by R to the   C
18C                    default of  rep(1, N)                   C
19C  FLAG              boolean indicator of agglomerable obj./ C
20C                    clusters.                               C
21C                                                            C
22C  F. Murtagh, ESA/ESO/STECF, Garching, February 1986.       C
23C  Modifications for R: Ross Ihaka, Dec 1996                 C
24C                       Fritz Leisch, Jun 2000               C
25C  all vars declared:   Martin Maechler, Apr 2001            C
26C                                                            C
27c- R Bug PR#4195 fixed "along" qclust.c, given in the report C
28C- Testing: --> "hclust" in ../../../../tests/reg-tests-1b.R C
29C  "ward.D2" (iOpt = 8): Martin Maechler, Mar 2014           C
30C                                                            C
31C  FLAG not passed as arg to avoid possible                  C
32C     C/Fortran inconsistency, May 2019                      C
33C------------------------------------------------------------C
34      SUBROUTINE HCLUST(N,LEN,IOPT,IA,IB,CRIT,MEMBR,NN,DISNN, DISS)
35c Args
36      INTEGER N, LEN, IOPT
37      INTEGER IA(N),IB(N), NN(N)
38      LOGICAL FLAG(N), isWard
39      DOUBLE PRECISION CRIT(N), MEMBR(N),DISS(LEN), DISNN(N)
40c Var
41      INTEGER IM, JJ, JM, I, NCL, J, IND, I2, J2, K, IND1, IND2
42      DOUBLE PRECISION INF, DMIN, D12
43c External function
44      INTEGER IOFFST
45c
46c     was 1D+20
47      DATA INF/1.D+300/
48c
49c     unnecessary initialization of im jj jm to keep g77 -Wall happy
50c
51      IM = 0
52      JJ = 0
53      JM = 0
54C
55C  Initializations
56C
57      DO I=1,N
58C        We do not initialize MEMBR in order to be able to restart the
59C        algorithm from a cut.
60C        MEMBR(I)=1.
61         FLAG(I)=.TRUE.
62      end do
63      NCL=N
64
65      IF (iOpt .eq. 8) THEN ! Ward "D2" ---> using *squared* distances
66         do I=1,LEN
67            DISS(I)=DISS(I)*DISS(I)
68         end do
69      ENDIF
70
71C
72C  Carry out an agglomeration - first create list of NNs
73C  Note NN and DISNN are the nearest neighbour and its distance
74C  TO THE RIGHT of I.
75C
76      DO I=1,N-1
77         DMIN=INF
78         DO J=I+1,N
79            IND=IOFFST(N,I,J)
80            IF (DMIN .GT. DISS(IND)) THEN
81               DMIN=DISS(IND)
82               JM=J
83            end if
84         end do
85         NN(I)=JM
86         DISNN(I)=DMIN
87      end do
88
89C-- Repeat -------------------------------------------------------
90  400 CONTINUE
91
92C     Next, determine least diss. using list of NNs
93      DMIN=INF
94      DO I=1,N-1
95         IF (FLAG(I) .AND. DISNN(I) .LT. DMIN) THEN
96            DMIN=DISNN(I)
97            IM=I
98            JM=NN(I)
99         end if
100      end do
101      NCL=NCL-1
102C
103C  This allows an agglomeration to be carried out.
104C
105      I2=MIN0(IM,JM)
106      J2=MAX0(IM,JM)
107      IA(N-NCL)=I2
108      IB(N-NCL)=J2
109C     WARD'S "D1", or "D2" MINIMUM VARIANCE METHOD -- iOpt = 1 or 8.
110      isWard = (iOpt .eq. 1 .or. iOpt .eq. 8)
111      IF (iOpt .eq. 8) DMIN = dsqrt(DMIN)
112      CRIT(N-NCL)=DMIN
113      FLAG(J2)=.FALSE.
114C
115C  Update dissimilarities from new cluster.
116C
117      DMIN=INF
118      DO K=1,N
119         IF (FLAG(K) .AND. K .NE. I2) THEN
120            IF (I2.LT.K) THEN
121               IND1=IOFFST(N,I2,K)
122            ELSE
123               IND1=IOFFST(N,K,I2)
124            ENDIF
125            IF (J2.LT.K) THEN
126               IND2=IOFFST(N,J2,K)
127            ELSE
128               IND2=IOFFST(N,K,J2)
129            ENDIF
130            D12=DISS(IOFFST(N,I2,J2))
131C
132C     WARD'S "D1", or "D2" MINIMUM VARIANCE METHOD - IOPT=8.
133            IF (isWard) THEN
134               DISS(IND1)=(MEMBR(I2)+MEMBR(K))*DISS(IND1)+
135     X              (MEMBR(J2)+MEMBR(K))*DISS(IND2) - MEMBR(K)*D12
136               DISS(IND1)=DISS(IND1) / (MEMBR(I2)+MEMBR(J2)+MEMBR(K))
137C
138C     SINGLE LINK METHOD - IOPT=2.
139            ELSEIF (IOPT.EQ.2) THEN
140               DISS(IND1)=MIN(DISS(IND1),DISS(IND2))
141C
142C     COMPLETE LINK METHOD - IOPT=3.
143            ELSEIF (IOPT.EQ.3) THEN
144               DISS(IND1)=MAX(DISS(IND1),DISS(IND2))
145C
146C     AVERAGE LINK (OR GROUP AVERAGE) METHOD - IOPT=4.
147            ELSEIF (IOPT.EQ.4) THEN
148               DISS(IND1)= (MEMBR(I2)*DISS(IND1)+MEMBR(J2)*DISS(IND2))
149     X                      / (MEMBR(I2)+MEMBR(J2))
150C
151C     MCQUITTY'S METHOD - IOPT=5.
152            ELSEIF (IOPT.EQ.5) THEN
153               DISS(IND1)=(DISS(IND1)+DISS(IND2)) / 2
154C
155C     MEDIAN (GOWER'S) METHOD aka "Weighted Centroid" - IOPT=6.
156            ELSEIF (IOPT.EQ.6) THEN
157               DISS(IND1)= ((DISS(IND1)+DISS(IND2)) - D12/2) / 2
158C
159C     Unweighted CENTROID METHOD - IOPT=7.
160            ELSEIF (IOPT.EQ.7) THEN
161               DISS(IND1)=(MEMBR(I2)*DISS(IND1)+MEMBR(J2)*DISS(IND2)-
162     X              MEMBR(I2)*MEMBR(J2)*D12/(MEMBR(I2)+MEMBR(J2)))/
163     X              (MEMBR(I2)+MEMBR(J2))
164            ENDIF
165
166C
167            IF (I2 .lt. K) THEN
168               IF (DISS(IND1) .LT. DMIN) THEN
169                  DMIN=DISS(IND1)
170                  JJ=K
171               ENDIF
172            else  ! i2 > k
173c	     FIX: the rest of the else clause is a fix by JB to ensure
174c            correct nearest neighbours are found when a non-monotone
175c            clustering method (e.g. the centroid methods) are used
176               if(DISS(IND1) .lt. DISNN(K)) then ! find nearest neighbour of i2
177                  DISNN(K) = DISS(IND1)
178                  NN(K) = I2
179               end if
180            ENDIF
181         ENDIF
182      END DO
183      MEMBR(I2)=MEMBR(I2)+MEMBR(J2)
184      DISNN(I2)=DMIN
185      NN(I2)=JJ
186C
187C  Update list of NNs insofar as this is required.
188C
189      DO I=1,N-1
190         IF (FLAG(I) .AND.
191     X        ((NN(I).EQ.I2) .OR. (NN(I).EQ.J2))) THEN
192C     (Redetermine NN of I:)
193            DMIN=INF
194            DO J=I+1,N
195               if (FLAG(J)) then
196                  IND=IOFFST(N,I,J)
197                  if (DISS(IND) .lt. DMIN) then
198                     DMIN=DISS(IND)
199                     JJ=J
200                  end if
201               end if
202            end do
203            NN(I)=JJ
204            DISNN(I)=DMIN
205         end if
206      end do
207C
208C  Repeat previous steps until N-1 agglomerations carried out.
209C
210      IF (NCL.GT.1) GOTO 400
211C
212C
213      RETURN
214      END
215C     of HCLUST()
216C
217C
218      INTEGER FUNCTION IOFFST(N,I,J)
219C  Map row I and column J of upper half diagonal symmetric matrix
220C  onto vector.
221      INTEGER N,I,J
222C  Use 64-bit integers for temporaries to avoid integer overflow
223C  This could use SELECTED_INT_KIND(R=18), instead
224      INTEGER(KIND=8) N8,I8,J8
225      N8=N
226      I8=I
227      J8=J
228C  Result is known to fit in 31 bits, so INT() is safe
229C  and supresses compiler warning.
230      IOFFST=INT(J8+(I8-1)*N8-(I8*(I8+1))/2)
231      RETURN
232      END
233
234C+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++C
235C                                                               C
236C  Given a HIERARCHIC CLUSTERING, described as a sequence of    C
237C  agglomerations, prepare the seq. of aggloms. and "horiz."    C
238C  order of objects for plotting the dendrogram using S routine C
239C  'plclust'.                                                   C
240C                                                               C
241C  Parameters:                                                  C
242C                                                               C
243C  IA, IB:       vectors of dimension N defining the agglomer-  C
244C                 ations.                                       C
245C  IIA, IIB:     used to store IA and IB values differently     C
246C                (in form needed for S command 'plclust'        C
247C  IORDER:       "horiz." order of objects for dendrogram       C
248C                                                               C
249C  F. Murtagh, ESA/ESO/STECF, Garching, June 1991               C
250C                                                               C
251C  HISTORY                                                      C
252C                                                               C
253C  Adapted from routine HCASS, which additionally determines    C
254C   cluster assignments at all levels, at extra comput. expense C
255C                                                               C
256C---------------------------------------------------------------C
257      SUBROUTINE HCASS2(N,IA,IB,IORDER,IIA,IIB)
258c Args
259      INTEGER N,IA(N),IB(N),IORDER(N),IIA(N),IIB(N)
260c Var
261      INTEGER I, J, K, K1, K2, LOC
262C
263C     Following bit is to get seq. of merges into format acceptable to plclust
264C     I coded clusters as lowest seq. no. of constituents; S's 'hclust' codes
265C     singletons as -ve numbers, and non-singletons with their seq. nos.
266C
267      do I=1,N
268         IIA(I)=IA(I)
269         IIB(I)=IB(I)
270      end do
271      do I=1,N-2
272C        In the following, smallest (+ve or -ve) seq. no. wanted
273         K=MIN(IA(I),IB(I))
274         do J=I+1, N-1
275            IF(IA(J).EQ.K) IIA(J)=-I
276            IF(IB(J).EQ.K) IIB(J)=-I
277         end do
278      end do
279      do I=1,N-1
280         IIA(I)=-IIA(I)
281         IIB(I)=-IIB(I)
282      end do
283      do I=1,N-1
284         IF (IIA(I).GT.0 .AND. IIB(I).LT.0) THEN
285            K = IIA(I)
286            IIA(I) = IIB(I)
287            IIB(I) = K
288         ENDIF
289         IF (IIA(I).GT.0 .AND. IIB(I).GT.0) THEN
290            K1 = MIN(IIA(I),IIB(I))
291            K2 = MAX(IIA(I),IIB(I))
292            IIA(I) = K1
293            IIB(I) = K2
294         ENDIF
295      end do
296C
297C
298C     NEW PART FOR 'ORDER'
299C
300      IORDER(1) = IIA(N-1)
301      IORDER(2) = IIB(N-1)
302      LOC=2
303      DO I=N-2,1,-1
304         DO J=1,LOC
305            IF(IORDER(J).EQ.I) THEN
306C      REPLACE IORDER(J) WITH IIA(I) AND IIB(I)
307               IORDER(J)=IIA(I)
308               IF (J.EQ.LOC) THEN
309                  LOC=LOC+1
310                  IORDER(LOC)=IIB(I)
311               else
312                  LOC=LOC+1
313                  do K=LOC,J+2,-1
314                     IORDER(K)=IORDER(K-1)
315                  end do
316                  IORDER(J+1)=IIB(I)
317               end if
318               GOTO 171
319            ENDIF
320         end do
321C     SHOULD NEVER REACH HERE
322 171     CONTINUE
323      end do
324C
325C
326      do I=1,N
327         IORDER(I) = -IORDER(I)
328      end do
329C
330C
331      RETURN
332      END
333