1 SUBROUTINE LINK(MM, M, N, A, P, IWORK, WORK, OUNIT) 2C 3C<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> 4C 5C PURPOSE 6C ------- 7C 8C CONSTRUCTS AND PRINTS TREE OF CLUSTERS OBTAINED BY ADDING CASES 9C IN SUCCESSION TO MINIMIZE THE SUM OF THE LINKING DISTANCES 10C 11C DESCRIPTION 12C ----------- 13C 14C 1. THE TREE CONSTRUCTED IS A SET OF NODES WITH THE LINK TO EACH 15C NODE'S ANCESTOR. THE TREE STARTS WITH ONE OBJECT AND OTHER 16C OBJECTS ARE ADDED BY MINIMIZING THE SUM OF THE TOTAL LINK 17C DISTANCES. THE OUTPUT IS WRITTEN ON FORTRAN UNIT OUNIT AND IS 18C TWO COLUMNS OF NUMBERS, THE FIRST IS THE NODES AND THE SECOND 19C IS THE NODE THAT IS ITS DIRECT ANCESTOR. THE TREE CAN BE 20C RECOVERED BY CONNECTING EACH NODE TO ITS ANCESTOR NODE, 21C REMEMBERING THAT THE FIRST M NODES ARE THE CASES. 22C 23C 2. THREE AMALGAMATION AND DISTANCE MEASURES ARE AVAILABLE AND 24C SPECIFIED BY THE PARAMETER P. SETTING P=0 AMALGAMATES THREE 25C CLUSTERS BY GIVING THE RESULTANT CLUSTER THE VALUES 26C CORRESPONDING TO THE MEDIAN OF THE THREE. THE DISTANCE BETWEEN 27C THE CLUSTERS IS THE PROPORTION OF THE NON- MATCHING VALUES OF 28C THE CLUSTERS. SETTING P=1 AMALGAMATES CLUSTERS TO THE MEDIAN 29C OF THE THREE, BUT USES THE ABSOLUTE VALUE OF THE DIFFERENCES OF 30C THE VALUES FOR THE DISTANCE. SETTING P=2 GIVES THE RESULTING 31C CLUSTER THE MEAN VALUES OF THE AMALGAMATED CLUSTERS. IT USES 32C THE EUCLIDEAN DISTANCES AS THE DISTANCE FUNCTION. 33C 34C INPUT PARAMETERS 35C ---------------- 36C 37C MM INTEGER SCALAR (UNCHANGED ON OUTPUT). 38C THE FIRST DIMENSION OF THE MATRIX A. MUST BE AT LEAST 2*M. 39C 40C M INTEGER SCALAR (UNCHANGED ON OUTPUT). 41C THE NUMBER OF CASES. 42C 43C N INTEGER SCALAR (UNCHANGED ON OUTPUT). 44C THE NUMBER OF VARIABLES. 45C 46C A REAL MATRIX WHOSE FIRST DIMENSION MUST BE MM AND WHOSE SECOND 47C DIMENSION MUST BE AT LEAST N (DESTROYED DURING EXECUTION). 48C THE MATRIX OF DATA VALUES ARE STORED IN THE FIRST M ROWS AND 49C THE SECOND M ROWS ARE USED AS WORK SPACE. 50C 51C P INTEGER SCALAR (UNCHANGED ON OUTPUT). 52C PARAMETER SPECIFYING TYPE OF AMALGAMATION AND METHOD FOR 53C DETERMINING DISTANCES BETWEEN CLUSTERS. 54C 55C P = 2 NEW CLUSTERS WILL BE FORMED BY THE MEANS OF THE 56C AMALGAMATED CLUSTERS AND WILL USE EUCLIDEAN 57C DISTANCES BETWEEN CLUSTERS 58C P = 1 NEW CLUSTERS WILL BE FORMED BY THE MEDIANS OF THE 59C AMALGAMATED CLUSTERS AND WILL USE THE ABSOLUTE 60C LINEAR DISTANCE BETWEEN CLUSTERS. 61C P = 0 NEW CLUSTERS WILL BE FORMED BY THE MEDIANS OF THE 62C AMALGAMATED CLUSTERS AND THE DISTANCE BETWEEN 63C CLUSTERS WILL BE THE PROPORTION OF NON-MATCHING 64C VALUES. 65C 66C IWORK INTEGER VECTOR DIMENSIONED AT LEAST 2*M. 67C WORK VECTOR. 68C 69C WORK REAL VECTOR DIMENSIONED AT LEAST 2*M. 70C WORK VECTOR. 71C 72C OUNIT INTEGER SCALAR (UNCHANGED ON OUTPUT). 73C UNIT NUMBER FOR OUTPUT. 74C 75C REFERENCE 76C --------- 77C 78C HARTIGAN, J. A. (1975). CLUSTERING ALGORITHMS, JOHN WILEY & 79C SONS, INC., NEW YORK. PAGES 233-248. 80C 81C<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> 82C 83 INTEGER P, OUNIT 84 DIMENSION IWORK(*), A(MM,*), WORK(*) 85C 86C INITIALIZE TREE 87C 88 K=M*2 89 DO 10 I=1,K 90 10 IWORK(I)=0 91C 92C FIND CLOSEST PAIR OF OBJECTS 93C 94 DM=R1MACH(2) 95 IM=1 96 JM=2 97 DO 20 I=1,M 98 DO 20 J=I,M 99 IF(J.NE.I) THEN 100 CALL MIDDLE(N,A(I,1),A(J,1),A(I,1),MM,P,A(K,1),D) 101 CALL MIDDLE(N,A(I,1),A(J,1),A(J,1),MM,P,A(K,1),D1) 102 D3=D+D1 103 IF(D3.LT.DM) THEN 104 IM=I 105 JM=J 106 DM=D3 107 ENDIF 108 ENDIF 109 20 CONTINUE 110 IWORK(JM)=JM 111 IWORK(IM)=JM 112C 113C FIND DISTANCES TO INITIAL LINK 114C 115 DO 30 L=1,M 116 IF(IWORK(L).LE.0) THEN 117 CALL MIDDLE(N,A(IM,1),A(JM,1),A(L,1),MM,P,A(K,1),WORK(L)) 118 IWORK(L)=-IM 119 ENDIF 120 30 CONTINUE 121C 122C CONSTRUCT CLUSTERS 123C 124 KK=K-2 125 DO 60 I=M+1,KK 126 DM=R1MACH(2) 127 I1=2 128 I2=2 129 I3=3 130C 131C FIND BEST OBJECT TO ADD TO TREE 132C 133 DO 40 L=1,M 134 IF(IWORK(L).LE.0) THEN 135 IF(WORK(L).LT.DM) THEN 136 I1=-IWORK(L) 137 I2=IWORK(I1) 138 I3=L 139 DM=WORK(L) 140 ENDIF 141 ENDIF 142 40 CONTINUE 143 CALL MIDDLE(N,A(I1,1),A(I2,1),A(I3,1),MM,P,A(I,1),D) 144 IWORK(I3)=I 145 IWORK(I)=I2 146 IWORK(I1)=I 147C 148C UPDATE DISTANCE ARRAY 149C 150 DO 50 L=1,M 151 IF(IWORK(L).LE.0) THEN 152 CALL MIDDLE(N,A(I1,1),A(I,1),A(L,1),MM,P,A(K,1),D1) 153 CALL MIDDLE(N,A(I2,1),A(I,1),A(L,1),MM,P,A(K,1),D2) 154 CALL MIDDLE(N,A(I3,1),A(I,1),A(L,1),MM,P,A(K,1),D3) 155 IF(I1.EQ.-IWORK(L)) WORK(L)=R1MACH(2) 156 IF(D1.LT.WORK(L)) THEN 157 IWORK(L)=-I1 158 WORK(L)=D1 159 ENDIF 160 IF(D2.LT.WORK(L)) THEN 161 IWORK(L)=-I 162 WORK(L)=D2 163 ENDIF 164 IF(D3.LT.WORK(L)) THEN 165 IWORK(L)=-I3 166 WORK(L)=D3 167 ENDIF 168 ENDIF 169 50 CONTINUE 170 60 CONTINUE 171C 172C OUTPUT 173C 174 IF (OUNIT .GT. 0) THEN 175 WRITE(OUNIT,1) 176 1 FORMAT('1') 177 WRITE(OUNIT,2) (I,IWORK(I),I=1,K) 178 2 FORMAT(1X, 2I4) 179 ENDIF 180 RETURN 181 END 182