1 #include "matrix.h"
2 #include "utils.h"
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <string.h>
6 #include <assert.h>
7 #include <math.h>
8
free_matrix(matrix m)9 void free_matrix(matrix m)
10 {
11 int i;
12 for(i = 0; i < m.rows; ++i) free(m.vals[i]);
13 free(m.vals);
14 }
15
matrix_topk_accuracy(matrix truth,matrix guess,int k)16 float matrix_topk_accuracy(matrix truth, matrix guess, int k)
17 {
18 int* indexes = (int*)xcalloc(k, sizeof(int));
19 int n = truth.cols;
20 int i,j;
21 int correct = 0;
22 for(i = 0; i < truth.rows; ++i){
23 top_k(guess.vals[i], n, k, indexes);
24 for(j = 0; j < k; ++j){
25 int class_id = indexes[j];
26 if(truth.vals[i][class_id]){
27 ++correct;
28 break;
29 }
30 }
31 }
32 free(indexes);
33 return (float)correct/truth.rows;
34 }
35
scale_matrix(matrix m,float scale)36 void scale_matrix(matrix m, float scale)
37 {
38 int i,j;
39 for(i = 0; i < m.rows; ++i){
40 for(j = 0; j < m.cols; ++j){
41 m.vals[i][j] *= scale;
42 }
43 }
44 }
45
resize_matrix(matrix m,int size)46 matrix resize_matrix(matrix m, int size)
47 {
48 int i;
49 if (m.rows == size) return m;
50 if (m.rows < size) {
51 m.vals = (float**)xrealloc(m.vals, size * sizeof(float*));
52 for (i = m.rows; i < size; ++i) {
53 m.vals[i] = (float*)xcalloc(m.cols, sizeof(float));
54 }
55 } else if (m.rows > size) {
56 for (i = size; i < m.rows; ++i) {
57 free(m.vals[i]);
58 }
59 m.vals = (float**)xrealloc(m.vals, size * sizeof(float*));
60 }
61 m.rows = size;
62 return m;
63 }
64
matrix_add_matrix(matrix from,matrix to)65 void matrix_add_matrix(matrix from, matrix to)
66 {
67 assert(from.rows == to.rows && from.cols == to.cols);
68 int i,j;
69 for(i = 0; i < from.rows; ++i){
70 for(j = 0; j < from.cols; ++j){
71 to.vals[i][j] += from.vals[i][j];
72 }
73 }
74 }
75
make_matrix(int rows,int cols)76 matrix make_matrix(int rows, int cols)
77 {
78 int i;
79 matrix m;
80 m.rows = rows;
81 m.cols = cols;
82 m.vals = (float**)xcalloc(m.rows, sizeof(float*));
83 for(i = 0; i < m.rows; ++i){
84 m.vals[i] = (float*)xcalloc(m.cols, sizeof(float));
85 }
86 return m;
87 }
88
hold_out_matrix(matrix * m,int n)89 matrix hold_out_matrix(matrix *m, int n)
90 {
91 int i;
92 matrix h;
93 h.rows = n;
94 h.cols = m->cols;
95 h.vals = (float**)xcalloc(h.rows, sizeof(float*));
96 for(i = 0; i < n; ++i){
97 int index = rand()%m->rows;
98 h.vals[i] = m->vals[index];
99 m->vals[index] = m->vals[--(m->rows)];
100 }
101 return h;
102 }
103
pop_column(matrix * m,int c)104 float *pop_column(matrix *m, int c)
105 {
106 float* col = (float*)xcalloc(m->rows, sizeof(float));
107 int i, j;
108 for(i = 0; i < m->rows; ++i){
109 col[i] = m->vals[i][c];
110 for(j = c; j < m->cols-1; ++j){
111 m->vals[i][j] = m->vals[i][j+1];
112 }
113 }
114 --m->cols;
115 return col;
116 }
117
csv_to_matrix(char * filename)118 matrix csv_to_matrix(char *filename)
119 {
120 FILE *fp = fopen(filename, "r");
121 if(!fp) file_error(filename);
122
123 matrix m;
124 m.cols = -1;
125
126 char *line;
127
128 int n = 0;
129 int size = 1024;
130 m.vals = (float**)xcalloc(size, sizeof(float*));
131 while((line = fgetl(fp))){
132 if(m.cols == -1) m.cols = count_fields(line);
133 if(n == size){
134 size *= 2;
135 m.vals = (float**)xrealloc(m.vals, size * sizeof(float*));
136 }
137 m.vals[n] = parse_fields(line, m.cols);
138 free(line);
139 ++n;
140 }
141 m.vals = (float**)xrealloc(m.vals, n * sizeof(float*));
142 m.rows = n;
143 return m;
144 }
145
matrix_to_csv(matrix m)146 void matrix_to_csv(matrix m)
147 {
148 int i, j;
149
150 for(i = 0; i < m.rows; ++i){
151 for(j = 0; j < m.cols; ++j){
152 if(j > 0) printf(",");
153 printf("%.17g", m.vals[i][j]);
154 }
155 printf("\n");
156 }
157 }
158
print_matrix(matrix m)159 void print_matrix(matrix m)
160 {
161 int i, j;
162 printf("%d X %d Matrix:\n",m.rows, m.cols);
163 printf(" __");
164 for(j = 0; j < 16*m.cols-1; ++j) printf(" ");
165 printf("__ \n");
166
167 printf("| ");
168 for(j = 0; j < 16*m.cols-1; ++j) printf(" ");
169 printf(" |\n");
170
171 for(i = 0; i < m.rows; ++i){
172 printf("| ");
173 for(j = 0; j < m.cols; ++j){
174 printf("%15.7f ", m.vals[i][j]);
175 }
176 printf(" |\n");
177 }
178 printf("|__");
179 for(j = 0; j < 16*m.cols-1; ++j) printf(" ");
180 printf("__|\n");
181 }
182
183
184 matrix make_matrix(int rows, int cols);
185
186 void copy(float *x, float *y, int n);
187 float dist(float *x, float *y, int n);
188 int *sample(int n);
189
closest_center(float * datum,matrix centers)190 int closest_center(float *datum, matrix centers)
191 {
192 int j;
193 int best = 0;
194 float best_dist = dist(datum, centers.vals[best], centers.cols);
195 for (j = 0; j < centers.rows; ++j) {
196 float new_dist = dist(datum, centers.vals[j], centers.cols);
197 if (new_dist < best_dist) {
198 best_dist = new_dist;
199 best = j;
200 }
201 }
202 return best;
203 }
204
dist_to_closest_center(float * datum,matrix centers)205 float dist_to_closest_center(float *datum, matrix centers)
206 {
207 int ci = closest_center(datum, centers);
208 return dist(datum, centers.vals[ci], centers.cols);
209 }
210
kmeans_expectation(matrix data,int * assignments,matrix centers)211 int kmeans_expectation(matrix data, int *assignments, matrix centers)
212 {
213 int i;
214 int converged = 1;
215 for (i = 0; i < data.rows; ++i) {
216 int closest = closest_center(data.vals[i], centers);
217 if (closest != assignments[i]) converged = 0;
218 assignments[i] = closest;
219 }
220 return converged;
221 }
222
kmeans_maximization(matrix data,int * assignments,matrix centers)223 void kmeans_maximization(matrix data, int *assignments, matrix centers)
224 {
225 matrix old_centers = make_matrix(centers.rows, centers.cols);
226
227 int i, j;
228 int *counts = (int*)xcalloc(centers.rows, sizeof(int));
229 for (i = 0; i < centers.rows; ++i) {
230 for (j = 0; j < centers.cols; ++j) {
231 old_centers.vals[i][j] = centers.vals[i][j];
232 centers.vals[i][j] = 0;
233 }
234 }
235 for (i = 0; i < data.rows; ++i) {
236 ++counts[assignments[i]];
237 for (j = 0; j < data.cols; ++j) {
238 centers.vals[assignments[i]][j] += data.vals[i][j];
239 }
240 }
241 for (i = 0; i < centers.rows; ++i) {
242 if (counts[i]) {
243 for (j = 0; j < centers.cols; ++j) {
244 centers.vals[i][j] /= counts[i];
245 }
246 }
247 }
248
249 for (i = 0; i < centers.rows; ++i) {
250 for (j = 0; j < centers.cols; ++j) {
251 if(centers.vals[i][j] == 0) centers.vals[i][j] = old_centers.vals[i][j];
252 }
253 }
254 free(counts);
255 free_matrix(old_centers);
256 }
257
258
259
random_centers(matrix data,matrix centers)260 void random_centers(matrix data, matrix centers) {
261 int i;
262 int *s = sample(data.rows);
263 for (i = 0; i < centers.rows; ++i) {
264 copy(data.vals[s[i]], centers.vals[i], data.cols);
265 }
266 free(s);
267 }
268
sample(int n)269 int *sample(int n)
270 {
271 int i;
272 int* s = (int*)xcalloc(n, sizeof(int));
273 for (i = 0; i < n; ++i) s[i] = i;
274 for (i = n - 1; i >= 0; --i) {
275 int swap = s[i];
276 int index = rand() % (i + 1);
277 s[i] = s[index];
278 s[index] = swap;
279 }
280 return s;
281 }
282
dist(float * x,float * y,int n)283 float dist(float *x, float *y, int n)
284 {
285 //printf(" x0 = %f, x1 = %f, y0 = %f, y1 = %f \n", x[0], x[1], y[0], y[1]);
286 float mw = (x[0] < y[0]) ? x[0] : y[0];
287 float mh = (x[1] < y[1]) ? x[1] : y[1];
288 float inter = mw*mh;
289 float sum = x[0] * x[1] + y[0] * y[1];
290 float un = sum - inter;
291 float iou = inter / un;
292 return 1 - iou;
293 }
294
copy(float * x,float * y,int n)295 void copy(float *x, float *y, int n)
296 {
297 int i;
298 for (i = 0; i < n; ++i) y[i] = x[i];
299 }
300
do_kmeans(matrix data,int k)301 model do_kmeans(matrix data, int k)
302 {
303 matrix centers = make_matrix(k, data.cols);
304 int* assignments = (int*)xcalloc(data.rows, sizeof(int));
305 //smart_centers(data, centers);
306 random_centers(data, centers); // IoU = 67.31% after kmeans
307
308 /*
309 // IoU = 63.29%, anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
310 centers.vals[0][0] = 10; centers.vals[0][1] = 13;
311 centers.vals[1][0] = 16; centers.vals[1][1] = 30;
312 centers.vals[2][0] = 33; centers.vals[2][1] = 23;
313 centers.vals[3][0] = 30; centers.vals[3][1] = 61;
314 centers.vals[4][0] = 62; centers.vals[4][1] = 45;
315 centers.vals[5][0] = 59; centers.vals[5][1] = 119;
316 centers.vals[6][0] = 116; centers.vals[6][1] = 90;
317 centers.vals[7][0] = 156; centers.vals[7][1] = 198;
318 centers.vals[8][0] = 373; centers.vals[8][1] = 326;
319 */
320
321 // range centers [min - max] using exp graph or Pyth example
322 if (k == 1) kmeans_maximization(data, assignments, centers);
323 int i;
324 for(i = 0; i < 1000 && !kmeans_expectation(data, assignments, centers); ++i) {
325 kmeans_maximization(data, assignments, centers);
326 }
327 printf("\n iterations = %d \n", i);
328 model m;
329 m.assignments = assignments;
330 m.centers = centers;
331 return m;
332 }
333