1      SUBROUTINE CSPLIT(MM, M, N, A, CLAB, IR, KA, TH, IORD, DMIWRK,
2     *                  IWORK, DMWORK, WORK)
3C
4C<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
5C
6C   PURPOSE
7C   -------
8C
9C      FINDS OPTIMAL SPLIT OF VARIABLES
10C
11C   DESCRIPTION
12C   -----------
13C
14C   1.  INITIALLY, THE FIRST CLUSTER CONSISTS OF ALL VARIABLES WITHIN
15C       THE BLOCK IR AND THE SECOND CLUSTER IS EMPTY.  THE REDUCTION IN
16C       THE WITHIN-CLUSTER SUM OF SQUARES FOR MOVING EACH VARIABLE
17C       FROM THE FIRST CLUSTER TO THE SECOND IS CALCULATED.  THE
18C       VARIABLE THAT REDUCES THE SUM OF SQUARES THE MOST IS MOVED AND
19C       THIS CONTINUES UNTIL ALL VARIABLES ARE MOVED WITH EACH
20C       REDUCTION STORED.  THEN THE SPLIT THAT HAD THE SMALLEST
21C       REDUCTION OF ALL IS RETURNED AS THE OPTIMUM SPLIT.
22C
23C   INPUT PARAMETERS
24C   ----------------
25C
26C   MM, M, N, A, CLAB, TH, IORD, DMIWRK, DMWORK -- SEE SUBROUTINE SPLIT2
27C
28C   IR    INTEGER SCALAR (UNCHANGED ON OUTPUT).
29C         NUMBER OF BLOCK TO BE SPLIT.
30C
31C   KA    INTEGER SCALAR (UNCHANGED ON OUTPUT).
32C         NUMBER OF BLOCKS.
33C
34C   IWORK INTEGER MATRIX WHOSE FIRST DIMENSION MUST BE DMIWRK AND SECOND
35C            DIMENSION MUST BE AT LEAST KA.
36C         THE MATRIX DEFINING THE BOUNDARIES OF THE BLOCKS.
37C
38C         IWORK(1,I) IS 1 + THE FIRST ROW IN BLOCK I
39C         IWORK(2,I) IS 1 + THE LAST ROW IN BLOCK I
40C         IWORK(3,I) IS 1 + THE FIRST COLUMN IN BLOCK I
41C         IWORK(4,I) IS 1 + THE LAST COLUMN IN BLOCK I
42C
43C   WORK  REAL MATRIX WHOSE FIRST DIMENSION MUST BE DMWORK AND SECOND
44C            DIMENSION MUST BE AT LEAST MAX(M,N).
45C
46C         WORK(1,I) = FIRST CASE IN CASE CLUSTER I
47C         WORK(2,I) = LAST CASE IN CASE CLUSTER I
48C         WORK(3,I) = REDUCTION IN SSQ DUE TO SPLITTING
49C         WORK(4,I) = LAST CASE IN FIRST CLUSTER OF SPLIT OF I
50C         WORK(5,I) = 1 IF CASE IS INCLUDED IN PRESENT VARIABLE SPLIT
51C         WORK(6,I) = NUMBER OF VARIABLES IN I-TH ROW OF PRESENT
52C                     VARIABLE SPLIT
53C         WORK(7,I) = MEAN OF I-TH CASE, FIRST VARIABLE CLUSTER
54C         WORK(8,I) = NUMBER OF VARIABLES SECOND CLUSTER
55C         WORK(9,I) = MEAN OF I-TH CASE, SECOND CLUSTER
56C
57C         WORK(10-18,I) ARE SIMILAR WITH VARIABLES AND CASES REVERSED.
58C
59C   REFERENCE
60C   ---------
61C
62C     HARTIGAN, J. A. (1975).  CLUSTERING ALGORITHMS, JOHN WILEY &
63C        SONS, INC., NEW YORK.  PAGE 276.
64C
65C<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
66C
67      INTEGER DMWORK, DMIWRK
68      DIMENSION A(MM,*), IWORK(DMIWRK,*), WORK(DMWORK,*)
69      CHARACTER*4 CLAB(*), C
70C
71      XM=99999.
72      DO 10 I=1,M
73   10    WORK(5,I)=0.
74C
75C     LOOK FOR BLOCKS WITHIN THRESHOLD
76C
77      JL=INT(WORK(10,IR))
78      JU=INT(WORK(11,IR))
79      DO 40 K=1,KA
80         IF(IWORK(3,K).EQ.JL+1.AND.IWORK(4,K).EQ.JU+1) THEN
81            IL=IWORK(1,K)
82            IF(IL.LT.0) IL=-IL
83            IU=IWORK(2,K)
84C
85C     COMPUTE VARIANCES
86C
87            NC=0
88            DO 30 I=IL-1,IU-1
89               S1=0.
90               S2=0.
91               S3=0.
92               DO 20 J=JL,JU
93                  IF(A(I,J).NE.XM) THEN
94                     S1=S1+1
95                     S2=S2+A(I,J)
96                     S3=S3+A(I,J)**2
97                  ENDIF
98   20          CONTINUE
99               WORK(6,I)=S1
100               IF(S1.NE.0.) THEN
101                  WORK(7,I)=S2/S1
102                  S3=S3/S1-(S2/S1)**2
103               ENDIF
104               IF(S3.GT.TH) THEN
105                  WORK(5,I)=1.
106                  NC=1
107               ENDIF
108   30       CONTINUE
109            IF(NC.EQ.0) IWORK(3,K)=-IWORK(3,K)
110         ENDIF
111   40 CONTINUE
112C
113C     FIND BEST VARIABLE SPLIT
114C
115      DO 50 I=1,M
116         WORK(8,I)=0.
117   50    WORK(9,I)=0.
118      DM=0.
119      WORK(12,IR)=0.
120      WORK(13,IR)=JL
121      DO 100 J=JL,JU-1
122         JJ=JU-J+JL
123         JD=JJ
124         DD=-R1MACH(2)
125         DO 70 L=JL,JJ
126            IF(IORD.LT.2.OR.L.EQ.JJ) THEN
127               DL=0.
128               DO 60 I=1,M
129                  IF(WORK(5,I).NE.0.AND.A(I,L).NE.XM) THEN
130                    DL=DL+(A(I,L)-WORK(7,I))**2*(WORK(6,I)+1.)/WORK(6,I)
131                    DL=DL-(A(I,L)-WORK(9,I))**2*WORK(8,I)/(WORK(8,I)+1.)
132                  ENDIF
133   60          CONTINUE
134               IF(DL.GT.DD) THEN
135                  DD=DL
136                  JD=L
137               ENDIF
138            ENDIF
139   70    CONTINUE
140C
141C     INTERCHANGE JD AND JJ
142C
143         DO 80 I=1,M
144            CC=A(I,JJ)
145            A(I,JJ)=A(I,JD)
146   80       A(I,JD)=CC
147         C = CLAB(JJ)
148         CLAB(JJ) = CLAB(JD)
149         CLAB(JD) = C
150C
151C     UPDATE MEANS
152C
153         DO 90 I=1,M
154            IF(WORK(5,I).NE.0..AND.A(I,JJ).NE.XM) THEN
155               WORK(6,I)=WORK(6,I)-1.
156               IF(WORK(6,I).NE.0.)WORK(7,I)=WORK(7,I)+(WORK(7,I)-
157     *                           A(I,JJ))/WORK(6,I)
158               WORK(8,I)=WORK(8,I)+1.
159               WORK(9,I)=WORK(9,I)-(WORK(9,I)-A(I,JJ))/WORK(8,I)
160            ENDIF
161   90    CONTINUE
162         DM=DM+DD
163         IF(DM.GE.WORK(12,IR)) THEN
164            WORK(12,IR)=DM
165            WORK(13,IR)=JJ-1
166         ENDIF
167  100 CONTINUE
168      RETURN
169      END
170