1C++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++C 2C C 3C HIERARCHICAL CLUSTERING using (user-specified) criterion. C 4C C 5C Parameters: C 6C C 7C N the number of points being clustered C 8C DISS(LEN) dissimilarities in lower half diagonal C 9C storage; LEN = N.N-1/2, C 10C IOPT clustering criterion to be used, C 11C IA, IB, CRIT history of agglomerations; dimensions C 12C N, first N-1 locations only used, C 13C MEMBR, NN, DISNN vectors of length N, used to store C 14C cluster cardinalities, current nearest C 15C neighbour, and the dissimilarity assoc. C 16C with the latter. C 17C MEMBR must be initialized by R to the C 18C default of rep(1, N) C 19C FLAG boolean indicator of agglomerable obj./ C 20C clusters. C 21C C 22C F. Murtagh, ESA/ESO/STECF, Garching, February 1986. C 23C Modifications for R: Ross Ihaka, Dec 1996 C 24C Fritz Leisch, Jun 2000 C 25C all vars declared: Martin Maechler, Apr 2001 C 26C C 27c- R Bug PR#4195 fixed "along" qclust.c, given in the report C 28C- Testing: --> "hclust" in ../../../../tests/reg-tests-1b.R C 29C "ward.D2" (iOpt = 8): Martin Maechler, Mar 2014 C 30C C 31C FLAG not passed as arg to avoid possible C 32C C/Fortran inconsistency, May 2019 C 33C------------------------------------------------------------C 34 SUBROUTINE HCLUST(N,LEN,IOPT,IA,IB,CRIT,MEMBR,NN,DISNN, DISS) 35c Args 36 INTEGER N, LEN, IOPT 37 INTEGER IA(N),IB(N), NN(N) 38 LOGICAL FLAG(N), isWard 39 DOUBLE PRECISION CRIT(N), MEMBR(N),DISS(LEN), DISNN(N) 40c Var 41 INTEGER IM, JJ, JM, I, NCL, J, IND, I2, J2, K, IND1, IND2 42 DOUBLE PRECISION INF, DMIN, D12 43c External function 44 INTEGER IOFFST 45c 46c was 1D+20 47 DATA INF/1.D+300/ 48c 49c unnecessary initialization of im jj jm to keep g77 -Wall happy 50c 51 IM = 0 52 JJ = 0 53 JM = 0 54C 55C Initializations 56C 57 DO I=1,N 58C We do not initialize MEMBR in order to be able to restart the 59C algorithm from a cut. 60C MEMBR(I)=1. 61 FLAG(I)=.TRUE. 62 end do 63 NCL=N 64 65 IF (iOpt .eq. 8) THEN ! Ward "D2" ---> using *squared* distances 66 do I=1,LEN 67 DISS(I)=DISS(I)*DISS(I) 68 end do 69 ENDIF 70 71C 72C Carry out an agglomeration - first create list of NNs 73C Note NN and DISNN are the nearest neighbour and its distance 74C TO THE RIGHT of I. 75C 76 DO I=1,N-1 77 DMIN=INF 78 DO J=I+1,N 79 IND=IOFFST(N,I,J) 80 IF (DMIN .GT. DISS(IND)) THEN 81 DMIN=DISS(IND) 82 JM=J 83 end if 84 end do 85 NN(I)=JM 86 DISNN(I)=DMIN 87 end do 88 89C-- Repeat ------------------------------------------------------- 90 400 CONTINUE 91 92C Next, determine least diss. using list of NNs 93 DMIN=INF 94 DO I=1,N-1 95 IF (FLAG(I) .AND. DISNN(I) .LT. DMIN) THEN 96 DMIN=DISNN(I) 97 IM=I 98 JM=NN(I) 99 end if 100 end do 101 NCL=NCL-1 102C 103C This allows an agglomeration to be carried out. 104C 105 I2=MIN0(IM,JM) 106 J2=MAX0(IM,JM) 107 IA(N-NCL)=I2 108 IB(N-NCL)=J2 109C WARD'S "D1", or "D2" MINIMUM VARIANCE METHOD -- iOpt = 1 or 8. 110 isWard = (iOpt .eq. 1 .or. iOpt .eq. 8) 111 IF (iOpt .eq. 8) DMIN = dsqrt(DMIN) 112 CRIT(N-NCL)=DMIN 113 FLAG(J2)=.FALSE. 114C 115C Update dissimilarities from new cluster. 116C 117 DMIN=INF 118 DO K=1,N 119 IF (FLAG(K) .AND. K .NE. I2) THEN 120 IF (I2.LT.K) THEN 121 IND1=IOFFST(N,I2,K) 122 ELSE 123 IND1=IOFFST(N,K,I2) 124 ENDIF 125 IF (J2.LT.K) THEN 126 IND2=IOFFST(N,J2,K) 127 ELSE 128 IND2=IOFFST(N,K,J2) 129 ENDIF 130 D12=DISS(IOFFST(N,I2,J2)) 131C 132C WARD'S "D1", or "D2" MINIMUM VARIANCE METHOD - IOPT=8. 133 IF (isWard) THEN 134 DISS(IND1)=(MEMBR(I2)+MEMBR(K))*DISS(IND1)+ 135 X (MEMBR(J2)+MEMBR(K))*DISS(IND2) - MEMBR(K)*D12 136 DISS(IND1)=DISS(IND1) / (MEMBR(I2)+MEMBR(J2)+MEMBR(K)) 137C 138C SINGLE LINK METHOD - IOPT=2. 139 ELSEIF (IOPT.EQ.2) THEN 140 DISS(IND1)=MIN(DISS(IND1),DISS(IND2)) 141C 142C COMPLETE LINK METHOD - IOPT=3. 143 ELSEIF (IOPT.EQ.3) THEN 144 DISS(IND1)=MAX(DISS(IND1),DISS(IND2)) 145C 146C AVERAGE LINK (OR GROUP AVERAGE) METHOD - IOPT=4. 147 ELSEIF (IOPT.EQ.4) THEN 148 DISS(IND1)= (MEMBR(I2)*DISS(IND1)+MEMBR(J2)*DISS(IND2)) 149 X / (MEMBR(I2)+MEMBR(J2)) 150C 151C MCQUITTY'S METHOD - IOPT=5. 152 ELSEIF (IOPT.EQ.5) THEN 153 DISS(IND1)=(DISS(IND1)+DISS(IND2)) / 2 154C 155C MEDIAN (GOWER'S) METHOD aka "Weighted Centroid" - IOPT=6. 156 ELSEIF (IOPT.EQ.6) THEN 157 DISS(IND1)= ((DISS(IND1)+DISS(IND2)) - D12/2) / 2 158C 159C Unweighted CENTROID METHOD - IOPT=7. 160 ELSEIF (IOPT.EQ.7) THEN 161 DISS(IND1)=(MEMBR(I2)*DISS(IND1)+MEMBR(J2)*DISS(IND2)- 162 X MEMBR(I2)*MEMBR(J2)*D12/(MEMBR(I2)+MEMBR(J2)))/ 163 X (MEMBR(I2)+MEMBR(J2)) 164 ENDIF 165 166C 167 IF (I2 .lt. K) THEN 168 IF (DISS(IND1) .LT. DMIN) THEN 169 DMIN=DISS(IND1) 170 JJ=K 171 ENDIF 172 else ! i2 > k 173c FIX: the rest of the else clause is a fix by JB to ensure 174c correct nearest neighbours are found when a non-monotone 175c clustering method (e.g. the centroid methods) are used 176 if(DISS(IND1) .lt. DISNN(K)) then ! find nearest neighbour of i2 177 DISNN(K) = DISS(IND1) 178 NN(K) = I2 179 end if 180 ENDIF 181 ENDIF 182 END DO 183 MEMBR(I2)=MEMBR(I2)+MEMBR(J2) 184 DISNN(I2)=DMIN 185 NN(I2)=JJ 186C 187C Update list of NNs insofar as this is required. 188C 189 DO I=1,N-1 190 IF (FLAG(I) .AND. 191 X ((NN(I).EQ.I2) .OR. (NN(I).EQ.J2))) THEN 192C (Redetermine NN of I:) 193 DMIN=INF 194 DO J=I+1,N 195 if (FLAG(J)) then 196 IND=IOFFST(N,I,J) 197 if (DISS(IND) .lt. DMIN) then 198 DMIN=DISS(IND) 199 JJ=J 200 end if 201 end if 202 end do 203 NN(I)=JJ 204 DISNN(I)=DMIN 205 end if 206 end do 207C 208C Repeat previous steps until N-1 agglomerations carried out. 209C 210 IF (NCL.GT.1) GOTO 400 211C 212C 213 RETURN 214 END 215C of HCLUST() 216C 217C 218 INTEGER FUNCTION IOFFST(N,I,J) 219C Map row I and column J of upper half diagonal symmetric matrix 220C onto vector. 221 INTEGER N,I,J 222C Use 64-bit integers for temporaries to avoid integer overflow 223C This could use SELECTED_INT_KIND(R=18), instead 224 INTEGER(KIND=8) N8,I8,J8 225 N8=N 226 I8=I 227 J8=J 228C Result is known to fit in 31 bits, so INT() is safe 229C and supresses compiler warning. 230 IOFFST=INT(J8+(I8-1)*N8-(I8*(I8+1))/2) 231 RETURN 232 END 233 234C+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++C 235C C 236C Given a HIERARCHIC CLUSTERING, described as a sequence of C 237C agglomerations, prepare the seq. of aggloms. and "horiz." C 238C order of objects for plotting the dendrogram using S routine C 239C 'plclust'. C 240C C 241C Parameters: C 242C C 243C IA, IB: vectors of dimension N defining the agglomer- C 244C ations. C 245C IIA, IIB: used to store IA and IB values differently C 246C (in form needed for S command 'plclust' C 247C IORDER: "horiz." order of objects for dendrogram C 248C C 249C F. Murtagh, ESA/ESO/STECF, Garching, June 1991 C 250C C 251C HISTORY C 252C C 253C Adapted from routine HCASS, which additionally determines C 254C cluster assignments at all levels, at extra comput. expense C 255C C 256C---------------------------------------------------------------C 257 SUBROUTINE HCASS2(N,IA,IB,IORDER,IIA,IIB) 258c Args 259 INTEGER N,IA(N),IB(N),IORDER(N),IIA(N),IIB(N) 260c Var 261 INTEGER I, J, K, K1, K2, LOC 262C 263C Following bit is to get seq. of merges into format acceptable to plclust 264C I coded clusters as lowest seq. no. of constituents; S's 'hclust' codes 265C singletons as -ve numbers, and non-singletons with their seq. nos. 266C 267 do I=1,N 268 IIA(I)=IA(I) 269 IIB(I)=IB(I) 270 end do 271 do I=1,N-2 272C In the following, smallest (+ve or -ve) seq. no. wanted 273 K=MIN(IA(I),IB(I)) 274 do J=I+1, N-1 275 IF(IA(J).EQ.K) IIA(J)=-I 276 IF(IB(J).EQ.K) IIB(J)=-I 277 end do 278 end do 279 do I=1,N-1 280 IIA(I)=-IIA(I) 281 IIB(I)=-IIB(I) 282 end do 283 do I=1,N-1 284 IF (IIA(I).GT.0 .AND. IIB(I).LT.0) THEN 285 K = IIA(I) 286 IIA(I) = IIB(I) 287 IIB(I) = K 288 ENDIF 289 IF (IIA(I).GT.0 .AND. IIB(I).GT.0) THEN 290 K1 = MIN(IIA(I),IIB(I)) 291 K2 = MAX(IIA(I),IIB(I)) 292 IIA(I) = K1 293 IIB(I) = K2 294 ENDIF 295 end do 296C 297C 298C NEW PART FOR 'ORDER' 299C 300 IORDER(1) = IIA(N-1) 301 IORDER(2) = IIB(N-1) 302 LOC=2 303 DO I=N-2,1,-1 304 DO J=1,LOC 305 IF(IORDER(J).EQ.I) THEN 306C REPLACE IORDER(J) WITH IIA(I) AND IIB(I) 307 IORDER(J)=IIA(I) 308 IF (J.EQ.LOC) THEN 309 LOC=LOC+1 310 IORDER(LOC)=IIB(I) 311 else 312 LOC=LOC+1 313 do K=LOC,J+2,-1 314 IORDER(K)=IORDER(K-1) 315 end do 316 IORDER(J+1)=IIB(I) 317 end if 318 GOTO 171 319 ENDIF 320 end do 321C SHOULD NEVER REACH HERE 322 171 CONTINUE 323 end do 324C 325C 326 do I=1,N 327 IORDER(I) = -IORDER(I) 328 end do 329C 330C 331 RETURN 332 END 333