1 /*
2  *  R : A Computer Language for Statistical Data Analysis
3  *  Copyright (C) 2004   The R Core Team.
4  *
5  *  This program is free software; you can redistribute it and/or modify
6  *  it under the terms of the GNU General Public License as published by
7  *  the Free Software Foundation; either version 2 of the License, or
8  *  (at your option) any later version.
9  *
10  *  This program is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  *  GNU General Public License for more details.
14  *
15  *  You should have received a copy of the GNU General Public License
16  *  along with this program; if not, a copy is available at
17  *  https://www.R-project.org/Licenses/
18  */
19 
20 #include "modreg.h" /* for declarations for registration */
21 
kmeans_Lloyd(double * x,int * pn,int * pp,double * cen,int * pk,int * cl,int * pmaxiter,int * nc,double * wss)22 void kmeans_Lloyd(double *x, int *pn, int *pp, double *cen, int *pk, int *cl,
23 		  int *pmaxiter, int *nc, double *wss)
24 {
25     int n = *pn, k = *pk, p = *pp, maxiter = *pmaxiter;
26     int iter, i, j, c, it, inew = 0;
27     double best, dd, tmp;
28     Rboolean updated;
29 
30     for(i = 0; i < n; i++) cl[i] = -1;
31     for(iter = 0; iter < maxiter; iter++) {
32 	updated = FALSE;
33 	for(i = 0; i < n; i++) {
34 	    /* find nearest centre for each point */
35 	    best = R_PosInf;
36 	    for(j = 0; j < k; j++) {
37 		dd = 0.0;
38 		for(c = 0; c < p; c++) {
39 		    tmp = x[i+n*c] - cen[j+k*c];
40 		    dd += tmp * tmp;
41 		}
42 		if(dd < best) {
43 		    best = dd;
44 		    inew = j+1;
45 		}
46 	    }
47 	    if(cl[i] != inew) {
48 		updated = TRUE;
49 		cl[i] = inew;
50 	    }
51 	}
52 	if(!updated) break;
53 	/* update each centre */
54 	for(j = 0; j < k*p; j++) cen[j] = 0.0;
55 	for(j = 0; j < k; j++) nc[j] = 0;
56 	for(i = 0; i < n; i++) {
57 	    it = cl[i] - 1; nc[it]++;
58 	    for(c = 0; c < p; c++) cen[it+c*k] += x[i+c*n];
59 	}
60 	for(j = 0; j < k*p; j++) cen[j] /= nc[j % k];
61     }
62 
63     *pmaxiter = iter + 1;
64     for(j = 0; j < k; j++) wss[j] = 0.0;
65     for(i = 0; i < n; i++) {
66 	it = cl[i] - 1;
67 	for(c = 0; c < p; c++) {
68 	    tmp = x[i+n*c] - cen[it+k*c];
69 	    wss[it] += tmp * tmp;
70 	}
71     }
72 }
73 
kmeans_MacQueen(double * x,int * pn,int * pp,double * cen,int * pk,int * cl,int * pmaxiter,int * nc,double * wss)74 void kmeans_MacQueen(double *x, int *pn, int *pp, double *cen, int *pk,
75 		     int *cl, int *pmaxiter, int *nc, double *wss)
76 {
77     int n = *pn, k = *pk, p = *pp, maxiter = *pmaxiter;
78     int iter, i, j, c, it, inew = 0, iold;
79     double best, dd, tmp;
80     Rboolean updated;
81 
82     /* first assign each point to the nearest cluster centre */
83     for(i = 0; i < n; i++) {
84 	best = R_PosInf;
85 	for(j = 0; j < k; j++) {
86 	    dd = 0.0;
87 	    for(c = 0; c < p; c++) {
88 		tmp = x[i+n*c] - cen[j+k*c];
89 		dd += tmp * tmp;
90 	    }
91 	    if(dd < best) {
92 		best = dd;
93 		inew = j+1;
94 	    }
95 	}
96 	if(cl[i] != inew) cl[i] = inew;
97     }
98    /* and recompute centres as centroids */
99     for(j = 0; j < k*p; j++) cen[j] = 0.0;
100     for(j = 0; j < k; j++) nc[j] = 0;
101     for(i = 0; i < n; i++) {
102 	it = cl[i] - 1; nc[it]++;
103 	for(c = 0; c < p; c++) cen[it+c*k] += x[i+c*n];
104     }
105     for(j = 0; j < k*p; j++) cen[j] /= nc[j % k];
106 
107     for(iter = 0; iter < maxiter; iter++) {
108 	updated = FALSE;
109 	for(i = 0; i < n; i++) {
110 	    best = R_PosInf;
111 	    for(j = 0; j < k; j++) {
112 		dd = 0.0;
113 		for(c = 0; c < p; c++) {
114 		    tmp = x[i+n*c] - cen[j+k*c];
115 		    dd += tmp * tmp;
116 		}
117 		if(dd < best) {
118 		    best = dd;
119 		    inew = j;
120 		}
121 	    }
122 	    if((iold = cl[i] - 1) != inew) {
123 		updated = TRUE;
124 		cl[i] = inew + 1;
125 		nc[iold]--; nc[inew]++;
126 		/* update old and new cluster centres */
127 		for(c = 0; c < p; c++) {
128 		    cen[iold+k*c] += (cen[iold+k*c] - x[i+n*c])/nc[iold];
129 		    cen[inew+k*c] += (x[i+n*c] - cen[inew+k*c])/nc[inew];
130 		}
131 	    }
132 	}
133 	if(!updated) break;
134     }
135 
136     *pmaxiter = iter + 1;
137     for(j = 0; j < k; j++) wss[j] = 0.0;
138     for(i = 0; i < n; i++) {
139 	it = cl[i] - 1;
140 	for(c = 0; c < p; c++) {
141 	    tmp = x[i+n*c] - cen[it+k*c];
142 	    wss[it] += tmp * tmp;
143 	}
144     }
145 }
146 
147 // tracing for  kmeans() in  ./kmns.f
148 
F77_SUB(kmns1)149 void F77_SUB(kmns1)(int *k, int *it, int *indx) {
150     Rprintf("KMNS(*, k=%d): iter=%3d, indx=%d\n", *k, *it, *indx);
151 }
152 
F77_SUB(kmnsqpr)153 void F77_SUB(kmnsqpr)(int *istep, int *icoun, int *NCP, int *k, int *trace)
154 {
155     Rprintf(" QTRAN(): istep=%d, icoun=%d", *istep, *icoun);
156     if(*trace >= 2) {
157 	Rprintf(", NCP[1:%d]=", k[0]);
158 	for(int i=0; i < k[0]; i++) Rprintf(" %d", NCP[i]);
159     }
160     Rprintf("\n");
161 }
162 
163