1 SUBROUTINE CSPLIT(MM, M, N, A, CLAB, IR, KA, TH, IORD, DMIWRK, 2 * IWORK, DMWORK, WORK) 3C 4C<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> 5C 6C PURPOSE 7C ------- 8C 9C FINDS OPTIMAL SPLIT OF VARIABLES 10C 11C DESCRIPTION 12C ----------- 13C 14C 1. INITIALLY, THE FIRST CLUSTER CONSISTS OF ALL VARIABLES WITHIN 15C THE BLOCK IR AND THE SECOND CLUSTER IS EMPTY. THE REDUCTION IN 16C THE WITHIN-CLUSTER SUM OF SQUARES FOR MOVING EACH VARIABLE 17C FROM THE FIRST CLUSTER TO THE SECOND IS CALCULATED. THE 18C VARIABLE THAT REDUCES THE SUM OF SQUARES THE MOST IS MOVED AND 19C THIS CONTINUES UNTIL ALL VARIABLES ARE MOVED WITH EACH 20C REDUCTION STORED. THEN THE SPLIT THAT HAD THE SMALLEST 21C REDUCTION OF ALL IS RETURNED AS THE OPTIMUM SPLIT. 22C 23C INPUT PARAMETERS 24C ---------------- 25C 26C MM, M, N, A, CLAB, TH, IORD, DMIWRK, DMWORK -- SEE SUBROUTINE SPLIT2 27C 28C IR INTEGER SCALAR (UNCHANGED ON OUTPUT). 29C NUMBER OF BLOCK TO BE SPLIT. 30C 31C KA INTEGER SCALAR (UNCHANGED ON OUTPUT). 32C NUMBER OF BLOCKS. 33C 34C IWORK INTEGER MATRIX WHOSE FIRST DIMENSION MUST BE DMIWRK AND SECOND 35C DIMENSION MUST BE AT LEAST KA. 36C THE MATRIX DEFINING THE BOUNDARIES OF THE BLOCKS. 37C 38C IWORK(1,I) IS 1 + THE FIRST ROW IN BLOCK I 39C IWORK(2,I) IS 1 + THE LAST ROW IN BLOCK I 40C IWORK(3,I) IS 1 + THE FIRST COLUMN IN BLOCK I 41C IWORK(4,I) IS 1 + THE LAST COLUMN IN BLOCK I 42C 43C WORK REAL MATRIX WHOSE FIRST DIMENSION MUST BE DMWORK AND SECOND 44C DIMENSION MUST BE AT LEAST MAX(M,N). 45C 46C WORK(1,I) = FIRST CASE IN CASE CLUSTER I 47C WORK(2,I) = LAST CASE IN CASE CLUSTER I 48C WORK(3,I) = REDUCTION IN SSQ DUE TO SPLITTING 49C WORK(4,I) = LAST CASE IN FIRST CLUSTER OF SPLIT OF I 50C WORK(5,I) = 1 IF CASE IS INCLUDED IN PRESENT VARIABLE SPLIT 51C WORK(6,I) = NUMBER OF VARIABLES IN I-TH ROW OF PRESENT 52C VARIABLE SPLIT 53C WORK(7,I) = MEAN OF I-TH CASE, FIRST VARIABLE CLUSTER 54C WORK(8,I) = NUMBER OF VARIABLES SECOND CLUSTER 55C WORK(9,I) = MEAN OF I-TH CASE, SECOND CLUSTER 56C 57C WORK(10-18,I) ARE SIMILAR WITH VARIABLES AND CASES REVERSED. 58C 59C REFERENCE 60C --------- 61C 62C HARTIGAN, J. A. (1975). CLUSTERING ALGORITHMS, JOHN WILEY & 63C SONS, INC., NEW YORK. PAGE 276. 64C 65C<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> 66C 67 INTEGER DMWORK, DMIWRK 68 DIMENSION A(MM,*), IWORK(DMIWRK,*), WORK(DMWORK,*) 69 CHARACTER*4 CLAB(*), C 70C 71 XM=99999. 72 DO 10 I=1,M 73 10 WORK(5,I)=0. 74C 75C LOOK FOR BLOCKS WITHIN THRESHOLD 76C 77 JL=INT(WORK(10,IR)) 78 JU=INT(WORK(11,IR)) 79 DO 40 K=1,KA 80 IF(IWORK(3,K).EQ.JL+1.AND.IWORK(4,K).EQ.JU+1) THEN 81 IL=IWORK(1,K) 82 IF(IL.LT.0) IL=-IL 83 IU=IWORK(2,K) 84C 85C COMPUTE VARIANCES 86C 87 NC=0 88 DO 30 I=IL-1,IU-1 89 S1=0. 90 S2=0. 91 S3=0. 92 DO 20 J=JL,JU 93 IF(A(I,J).NE.XM) THEN 94 S1=S1+1 95 S2=S2+A(I,J) 96 S3=S3+A(I,J)**2 97 ENDIF 98 20 CONTINUE 99 WORK(6,I)=S1 100 IF(S1.NE.0.) THEN 101 WORK(7,I)=S2/S1 102 S3=S3/S1-(S2/S1)**2 103 ENDIF 104 IF(S3.GT.TH) THEN 105 WORK(5,I)=1. 106 NC=1 107 ENDIF 108 30 CONTINUE 109 IF(NC.EQ.0) IWORK(3,K)=-IWORK(3,K) 110 ENDIF 111 40 CONTINUE 112C 113C FIND BEST VARIABLE SPLIT 114C 115 DO 50 I=1,M 116 WORK(8,I)=0. 117 50 WORK(9,I)=0. 118 DM=0. 119 WORK(12,IR)=0. 120 WORK(13,IR)=JL 121 DO 100 J=JL,JU-1 122 JJ=JU-J+JL 123 JD=JJ 124 DD=-R1MACH(2) 125 DO 70 L=JL,JJ 126 IF(IORD.LT.2.OR.L.EQ.JJ) THEN 127 DL=0. 128 DO 60 I=1,M 129 IF(WORK(5,I).NE.0.AND.A(I,L).NE.XM) THEN 130 DL=DL+(A(I,L)-WORK(7,I))**2*(WORK(6,I)+1.)/WORK(6,I) 131 DL=DL-(A(I,L)-WORK(9,I))**2*WORK(8,I)/(WORK(8,I)+1.) 132 ENDIF 133 60 CONTINUE 134 IF(DL.GT.DD) THEN 135 DD=DL 136 JD=L 137 ENDIF 138 ENDIF 139 70 CONTINUE 140C 141C INTERCHANGE JD AND JJ 142C 143 DO 80 I=1,M 144 CC=A(I,JJ) 145 A(I,JJ)=A(I,JD) 146 80 A(I,JD)=CC 147 C = CLAB(JJ) 148 CLAB(JJ) = CLAB(JD) 149 CLAB(JD) = C 150C 151C UPDATE MEANS 152C 153 DO 90 I=1,M 154 IF(WORK(5,I).NE.0..AND.A(I,JJ).NE.XM) THEN 155 WORK(6,I)=WORK(6,I)-1. 156 IF(WORK(6,I).NE.0.)WORK(7,I)=WORK(7,I)+(WORK(7,I)- 157 * A(I,JJ))/WORK(6,I) 158 WORK(8,I)=WORK(8,I)+1. 159 WORK(9,I)=WORK(9,I)-(WORK(9,I)-A(I,JJ))/WORK(8,I) 160 ENDIF 161 90 CONTINUE 162 DM=DM+DD 163 IF(DM.GE.WORK(12,IR)) THEN 164 WORK(12,IR)=DM 165 WORK(13,IR)=JJ-1 166 ENDIF 167 100 CONTINUE 168 RETURN 169 END 170