SUBROUTINE LINK(MM, M, N, A, P, IWORK, WORK, OUNIT) C C<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> C C PURPOSE C ------- C C CONSTRUCTS AND PRINTS TREE OF CLUSTERS OBTAINED BY ADDING CASES C IN SUCCESSION TO MINIMIZE THE SUM OF THE LINKING DISTANCES C C DESCRIPTION C ----------- C C 1. THE TREE CONSTRUCTED IS A SET OF NODES WITH THE LINK TO EACH C NODE'S ANCESTOR. THE TREE STARTS WITH ONE OBJECT AND OTHER C OBJECTS ARE ADDED BY MINIMIZING THE SUM OF THE TOTAL LINK C DISTANCES. THE OUTPUT IS WRITTEN ON FORTRAN UNIT OUNIT AND IS C TWO COLUMNS OF NUMBERS, THE FIRST IS THE NODES AND THE SECOND C IS THE NODE THAT IS ITS DIRECT ANCESTOR. THE TREE CAN BE C RECOVERED BY CONNECTING EACH NODE TO ITS ANCESTOR NODE, C REMEMBERING THAT THE FIRST M NODES ARE THE CASES. C C 2. THREE AMALGAMATION AND DISTANCE MEASURES ARE AVAILABLE AND C SPECIFIED BY THE PARAMETER P. SETTING P=0 AMALGAMATES THREE C CLUSTERS BY GIVING THE RESULTANT CLUSTER THE VALUES C CORRESPONDING TO THE MEDIAN OF THE THREE. THE DISTANCE BETWEEN C THE CLUSTERS IS THE PROPORTION OF THE NON- MATCHING VALUES OF C THE CLUSTERS. SETTING P=1 AMALGAMATES CLUSTERS TO THE MEDIAN C OF THE THREE, BUT USES THE ABSOLUTE VALUE OF THE DIFFERENCES OF C THE VALUES FOR THE DISTANCE. SETTING P=2 GIVES THE RESULTING C CLUSTER THE MEAN VALUES OF THE AMALGAMATED CLUSTERS. IT USES C THE EUCLIDEAN DISTANCES AS THE DISTANCE FUNCTION. C C INPUT PARAMETERS C ---------------- C C MM INTEGER SCALAR (UNCHANGED ON OUTPUT). C THE FIRST DIMENSION OF THE MATRIX A. MUST BE AT LEAST 2*M. C C M INTEGER SCALAR (UNCHANGED ON OUTPUT). C THE NUMBER OF CASES. C C N INTEGER SCALAR (UNCHANGED ON OUTPUT). C THE NUMBER OF VARIABLES. C C A REAL MATRIX WHOSE FIRST DIMENSION MUST BE MM AND WHOSE SECOND C DIMENSION MUST BE AT LEAST N (DESTROYED DURING EXECUTION). C THE MATRIX OF DATA VALUES ARE STORED IN THE FIRST M ROWS AND C THE SECOND M ROWS ARE USED AS WORK SPACE. C C P INTEGER SCALAR (UNCHANGED ON OUTPUT). C PARAMETER SPECIFYING TYPE OF AMALGAMATION AND METHOD FOR C DETERMINING DISTANCES BETWEEN CLUSTERS. C C P = 2 NEW CLUSTERS WILL BE FORMED BY THE MEANS OF THE C AMALGAMATED CLUSTERS AND WILL USE EUCLIDEAN C DISTANCES BETWEEN CLUSTERS C P = 1 NEW CLUSTERS WILL BE FORMED BY THE MEDIANS OF THE C AMALGAMATED CLUSTERS AND WILL USE THE ABSOLUTE C LINEAR DISTANCE BETWEEN CLUSTERS. C P = 0 NEW CLUSTERS WILL BE FORMED BY THE MEDIANS OF THE C AMALGAMATED CLUSTERS AND THE DISTANCE BETWEEN C CLUSTERS WILL BE THE PROPORTION OF NON-MATCHING C VALUES. C C IWORK INTEGER VECTOR DIMENSIONED AT LEAST 2*M. C WORK VECTOR. C C WORK REAL VECTOR DIMENSIONED AT LEAST 2*M. C WORK VECTOR. C C OUNIT INTEGER SCALAR (UNCHANGED ON OUTPUT). C UNIT NUMBER FOR OUTPUT. C C REFERENCE C --------- C C HARTIGAN, J. A. (1975). CLUSTERING ALGORITHMS, JOHN WILEY & C SONS, INC., NEW YORK. PAGES 233-248. C C<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> C INTEGER P, OUNIT DIMENSION IWORK(*), A(MM,*), WORK(*) C C INITIALIZE TREE C K=M*2 DO 10 I=1,K 10 IWORK(I)=0 C C FIND CLOSEST PAIR OF OBJECTS C DM=R1MACH(2) IM=1 JM=2 DO 20 I=1,M DO 20 J=I,M IF(J.NE.I) THEN CALL MIDDLE(N,A(I,1),A(J,1),A(I,1),MM,P,A(K,1),D) CALL MIDDLE(N,A(I,1),A(J,1),A(J,1),MM,P,A(K,1),D1) D3=D+D1 IF(D3.LT.DM) THEN IM=I JM=J DM=D3 ENDIF ENDIF 20 CONTINUE IWORK(JM)=JM IWORK(IM)=JM C C FIND DISTANCES TO INITIAL LINK C DO 30 L=1,M IF(IWORK(L).LE.0) THEN CALL MIDDLE(N,A(IM,1),A(JM,1),A(L,1),MM,P,A(K,1),WORK(L)) IWORK(L)=-IM ENDIF 30 CONTINUE C C CONSTRUCT CLUSTERS C KK=K-2 DO 60 I=M+1,KK DM=R1MACH(2) I1=2 I2=2 I3=3 C C FIND BEST OBJECT TO ADD TO TREE C DO 40 L=1,M IF(IWORK(L).LE.0) THEN IF(WORK(L).LT.DM) THEN I1=-IWORK(L) I2=IWORK(I1) I3=L DM=WORK(L) ENDIF ENDIF 40 CONTINUE CALL MIDDLE(N,A(I1,1),A(I2,1),A(I3,1),MM,P,A(I,1),D) IWORK(I3)=I IWORK(I)=I2 IWORK(I1)=I C C UPDATE DISTANCE ARRAY C DO 50 L=1,M IF(IWORK(L).LE.0) THEN CALL MIDDLE(N,A(I1,1),A(I,1),A(L,1),MM,P,A(K,1),D1) CALL MIDDLE(N,A(I2,1),A(I,1),A(L,1),MM,P,A(K,1),D2) CALL MIDDLE(N,A(I3,1),A(I,1),A(L,1),MM,P,A(K,1),D3) IF(I1.EQ.-IWORK(L)) WORK(L)=R1MACH(2) IF(D1.LT.WORK(L)) THEN IWORK(L)=-I1 WORK(L)=D1 ENDIF IF(D2.LT.WORK(L)) THEN IWORK(L)=-I WORK(L)=D2 ENDIF IF(D3.LT.WORK(L)) THEN IWORK(L)=-I3 WORK(L)=D3 ENDIF ENDIF 50 CONTINUE 60 CONTINUE C C OUTPUT C IF (OUNIT .GT. 0) THEN WRITE(OUNIT,1) 1 FORMAT('1') WRITE(OUNIT,2) (I,IWORK(I),I=1,K) 2 FORMAT(1X, 2I4) ENDIF RETURN END