1      SUBROUTINE TWO(MM, M, N, A, CLAB, RLAB, TITLE, DMCOV, COV,
2     1               COVLAB, COVTIT, AVE, AVECLB, AVERLB, AVETIT)
3C
4C<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
5C
6C   PURPOSE
7C   -------
8C
9C      COMPUTES OVERALL MEAN AND COVARIANCE MATRICES OF A DATA SET WITH
10C      APPROPRIATE LABELS FOR THE CASES AND VARIABLES
11C
12C   DESCRIPTION
13C   -----------
14C
15C   1.  MISSING VALUES ON INPUT ARE DENOTED BY 99999.  THE NON-MISSING
16C       VALUES FOR EACH VARIABLE ARE SUMMED AND THE OVERALL MEAN IS
17C       COMPUTED.  THEN EACH MISSING VALUE IS REPLACED BY THE MEAN FOR
18C       THE RESPECTIVE VARIABLE.
19C
20C   2.  THE COVARIANCE MATRIX IS THEN CALCULATED.
21C
22C       SINCE BOTH THE ROW AND COLUMN LABELS FOR THE COVARIANCE MATRIX
23C       ARE THE SAME, ONLY ONE SET IS GENERATED.  TO USE THE CLUSTER
24C       ROUTINE "OUT" TO PRINT OUT THE COVARIANCE MATRIX, THE SAME
25C       VECTOR OF LABELS CAN BE USED FOR THE RLAB AND CLAB ARGUMENTS.
26C       THE ROUTINE WILL RETURN BOTH ROW AND COLUMN LABELS FOR THE
27C       AVERAGE VECTOR IN ORDER TO ALLOW THE USER TO USE THE CLUSTER
28C       ROUTINE "OUT" TO OUTPUT THIS VECTOR WITHOUT ANY OTHER SETUP.
29C       SEE PROGRAM "TWO" IN THE SAMPLE FILE UNDER THE HARTIGA ACCOUNT
30C       FOR AN EXAMPLE OF USING "OUT" AFTER "TWO".
31C
32C   INPUT PARAMETERS
33C   ----------------
34C
35C   MM    INTEGER SCALAR (UNCHANGED ON OUTPUT).
36C         THE FIRST DIMENSION OF THE MATRIX A.  MUST BE AT LEAST M.
37C
38C   M     INTEGER SCALAR (UNCHANGED ON OUTPUT).
39C         THE NUMBER OF CASES.
40C
41C   N     INTEGER SCALAR (UNCHANGED ON OUTPUT).
42C         THE NUMBER OF VARIABLES.
43C
44C   A     REAL MATRIX WHOSE FIRST DIMENSION MUST BE MM AND WHOSE SECOND
45C            DIMENSION MUST BE AT LEAST N. (CHANGED ON OUTPUT).
46C         THE MATRIX OF DATA VALUES.
47C
48C         A(I,J) IS THE VALUE FOR THE J-TH VARIABLE FOR THE I-TH CASE.
49C
50C   CLAB  VECTOR OF 4-CHARACTER VARIABLES DIMENSIONED AT LEAST N.
51C            (UNCHANGED ON OUTPUT).
52C         THE LABELS OF THE VARIABLES.
53C
54C   RLAB  VECTOR OF 4-CHARACTER VARIABLES DIMENSIONED AT LEAST M.
55C            (UNCHANGED ON OUTPUT).
56C         THE LABELS OF THE CASES.
57C
58C   TITLE 10-CHARACTER VARIABLE (UNCHANGED ON OUTPUT).
59C         TITLE OF THE DATA SET.
60C
61C   DMCOV INTEGER SCALAR (UNCHANGED ON OUTPUT).
62C         THE LEADING DIMENSION OF THE MATRIX COV.  MUST BE AT LEAST N.
63C
64C   OUTPUT PARAMETERS
65C   -----------------
66C
67C   A     REAL MATRIX WHOSE FIRST DIMENSION MUST BE MM AND WHOSE SECOND
68C            DIMENSION MUST BE AT LEAST N.
69C         THE MATRIX OF DATA VALUES WITH MISSING VALUES REPLACED.
70C
71C   COV   REAL MATRIX WHOSE FIRST DIMENSION MUST BE DMCOV AND WHOSE
72C            SECOND DIMENSION MUST BE AT LEAST N.
73C         THE COVARIANCE MATRIX.
74C
75C   COVLAB MATRIX OF 4-CHARACTER VARIABLES DIMENSIONED AT LEAST N.
76C         THE LABELS OF THE VARIABLES FOR THE COVARIANCE MATRIX.
77C
78C   COVTIT 10-CHARACTER VARIABLE.
79C         TITLE OF THE COVARIANCE MATRIX.
80C
81C   AVE   REAL VECTOR WHOSE DIMENSION MUST BE AT LEAST N.
82C         THE VECTOR OF VARIABLE AVERAGES.
83C
84C   AVECLB VECTOR OF 4-CHARACTER VARIABLES DIMENSIONED AT LEAST N.
85C         THE COLUMN LABELS FOR THE AVERAGE VECTOR.
86C
87C   AVERLB 4-CHARACTER VARIABLE.
88C         THE LABEL OF THE ROW FOR THE AVERAGE VECTOR.  (BLANKED OUT,
89C         USED TO COMPLETE THE PARAMETERS TO THE "OUT" ROUTINE IN
90C         CLUSTER)
91C
92C   AVETIT 10-CHARACTER VARIABLE.
93C         TITLE OF THE AVERAGE VECTOR.
94C
95C   REFERENCE
96C   ---------
97C
98C     HARTIGAN, J. A. (1975).  CLUSTERING ALGORITHMS, JOHN WILEY &
99C        SONS, INC., NEW YORK.  PAGES 69, 72.
100C
101C<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
102C
103      INTEGER DMCOV
104      DIMENSION COV(DMCOV,*),AVE(*),A(MM,*)
105      CHARACTER*4 CLAB(*), RLAB(*), COVLAB(*), AVECLB(*), AVERLB
106      CHARACTER*10 TITLE, COVTIT, AVETIT
107C
108C     COMPUTE COLUMN MEANS AND REPLACE MISSING VALUES
109C
110      DO 30 J=1,N
111         AVE(J)=0.
112         XP=0.
113         DO 10 I=1,M
114            IF(A(I,J).NE.99999.) THEN
115               XP=XP+1.
116               AVE(J)=AVE(J)+A(I,J)
117            ENDIF
118   10    CONTINUE
119         IF(XP.NE.0.) AVE(J)=AVE(J)/XP
120         DO 20 I=1,M
121   20       IF(A(I,J).EQ.99999.) A(I,J)=AVE(J)
122   30 CONTINUE
123C
124C     COMPUTE COVARIANCES
125C
126      DO 60 J=1,N
127         DO 60 K=1,J
128            COV(J,K)=0.
129            DO 50 I=1,M
130   50          COV(J,K)=COV(J,K)+(A(I,J)-AVE(J))*(A(I,K)-AVE(K))
131            COV(J,K)=COV(J,K)/M
132   60 COV(K,J)=COV(J,K)
133C
134C     LABEL MATRICES
135C
136      DO 70 J=1,N
137         AVECLB(J)=CLAB(J)
138   70    COVLAB(J)=CLAB(J)
139      AVERLB = '    '
140      COVTIT = 'COVARIANCE'
141      AVETIT = ' AVERAGES '
142      RETURN
143      END
144