1 /*
2 * R : A Computer Language for Statistical Data Analysis
3 * Copyright (C) 2004 The R Core Team.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, a copy is available at
17 * https://www.R-project.org/Licenses/
18 */
19
20 #include "modreg.h" /* for declarations for registration */
21
kmeans_Lloyd(double * x,int * pn,int * pp,double * cen,int * pk,int * cl,int * pmaxiter,int * nc,double * wss)22 void kmeans_Lloyd(double *x, int *pn, int *pp, double *cen, int *pk, int *cl,
23 int *pmaxiter, int *nc, double *wss)
24 {
25 int n = *pn, k = *pk, p = *pp, maxiter = *pmaxiter;
26 int iter, i, j, c, it, inew = 0;
27 double best, dd, tmp;
28 Rboolean updated;
29
30 for(i = 0; i < n; i++) cl[i] = -1;
31 for(iter = 0; iter < maxiter; iter++) {
32 updated = FALSE;
33 for(i = 0; i < n; i++) {
34 /* find nearest centre for each point */
35 best = R_PosInf;
36 for(j = 0; j < k; j++) {
37 dd = 0.0;
38 for(c = 0; c < p; c++) {
39 tmp = x[i+n*c] - cen[j+k*c];
40 dd += tmp * tmp;
41 }
42 if(dd < best) {
43 best = dd;
44 inew = j+1;
45 }
46 }
47 if(cl[i] != inew) {
48 updated = TRUE;
49 cl[i] = inew;
50 }
51 }
52 if(!updated) break;
53 /* update each centre */
54 for(j = 0; j < k*p; j++) cen[j] = 0.0;
55 for(j = 0; j < k; j++) nc[j] = 0;
56 for(i = 0; i < n; i++) {
57 it = cl[i] - 1; nc[it]++;
58 for(c = 0; c < p; c++) cen[it+c*k] += x[i+c*n];
59 }
60 for(j = 0; j < k*p; j++) cen[j] /= nc[j % k];
61 }
62
63 *pmaxiter = iter + 1;
64 for(j = 0; j < k; j++) wss[j] = 0.0;
65 for(i = 0; i < n; i++) {
66 it = cl[i] - 1;
67 for(c = 0; c < p; c++) {
68 tmp = x[i+n*c] - cen[it+k*c];
69 wss[it] += tmp * tmp;
70 }
71 }
72 }
73
kmeans_MacQueen(double * x,int * pn,int * pp,double * cen,int * pk,int * cl,int * pmaxiter,int * nc,double * wss)74 void kmeans_MacQueen(double *x, int *pn, int *pp, double *cen, int *pk,
75 int *cl, int *pmaxiter, int *nc, double *wss)
76 {
77 int n = *pn, k = *pk, p = *pp, maxiter = *pmaxiter;
78 int iter, i, j, c, it, inew = 0, iold;
79 double best, dd, tmp;
80 Rboolean updated;
81
82 /* first assign each point to the nearest cluster centre */
83 for(i = 0; i < n; i++) {
84 best = R_PosInf;
85 for(j = 0; j < k; j++) {
86 dd = 0.0;
87 for(c = 0; c < p; c++) {
88 tmp = x[i+n*c] - cen[j+k*c];
89 dd += tmp * tmp;
90 }
91 if(dd < best) {
92 best = dd;
93 inew = j+1;
94 }
95 }
96 if(cl[i] != inew) cl[i] = inew;
97 }
98 /* and recompute centres as centroids */
99 for(j = 0; j < k*p; j++) cen[j] = 0.0;
100 for(j = 0; j < k; j++) nc[j] = 0;
101 for(i = 0; i < n; i++) {
102 it = cl[i] - 1; nc[it]++;
103 for(c = 0; c < p; c++) cen[it+c*k] += x[i+c*n];
104 }
105 for(j = 0; j < k*p; j++) cen[j] /= nc[j % k];
106
107 for(iter = 0; iter < maxiter; iter++) {
108 updated = FALSE;
109 for(i = 0; i < n; i++) {
110 best = R_PosInf;
111 for(j = 0; j < k; j++) {
112 dd = 0.0;
113 for(c = 0; c < p; c++) {
114 tmp = x[i+n*c] - cen[j+k*c];
115 dd += tmp * tmp;
116 }
117 if(dd < best) {
118 best = dd;
119 inew = j;
120 }
121 }
122 if((iold = cl[i] - 1) != inew) {
123 updated = TRUE;
124 cl[i] = inew + 1;
125 nc[iold]--; nc[inew]++;
126 /* update old and new cluster centres */
127 for(c = 0; c < p; c++) {
128 cen[iold+k*c] += (cen[iold+k*c] - x[i+n*c])/nc[iold];
129 cen[inew+k*c] += (x[i+n*c] - cen[inew+k*c])/nc[inew];
130 }
131 }
132 }
133 if(!updated) break;
134 }
135
136 *pmaxiter = iter + 1;
137 for(j = 0; j < k; j++) wss[j] = 0.0;
138 for(i = 0; i < n; i++) {
139 it = cl[i] - 1;
140 for(c = 0; c < p; c++) {
141 tmp = x[i+n*c] - cen[it+k*c];
142 wss[it] += tmp * tmp;
143 }
144 }
145 }
146
147 // tracing for kmeans() in ./kmns.f
148
F77_SUB(kmns1)149 void F77_SUB(kmns1)(int *k, int *it, int *indx) {
150 Rprintf("KMNS(*, k=%d): iter=%3d, indx=%d\n", *k, *it, *indx);
151 }
152
F77_SUB(kmnsqpr)153 void F77_SUB(kmnsqpr)(int *istep, int *icoun, int *NCP, int *k, int *trace)
154 {
155 Rprintf(" QTRAN(): istep=%d, icoun=%d", *istep, *icoun);
156 if(*trace >= 2) {
157 Rprintf(", NCP[1:%d]=", k[0]);
158 for(int i=0; i < k[0]; i++) Rprintf(" %d", NCP[i]);
159 }
160 Rprintf("\n");
161 }
162
163