1 #include "matrix.h"
2 #include "utils.h"
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <string.h>
6 #include <assert.h>
7 #include <math.h>
8 
free_matrix(matrix m)9 void free_matrix(matrix m)
10 {
11     int i;
12     for(i = 0; i < m.rows; ++i) free(m.vals[i]);
13     free(m.vals);
14 }
15 
matrix_topk_accuracy(matrix truth,matrix guess,int k)16 float matrix_topk_accuracy(matrix truth, matrix guess, int k)
17 {
18     int* indexes = (int*)xcalloc(k, sizeof(int));
19     int n = truth.cols;
20     int i,j;
21     int correct = 0;
22     for(i = 0; i < truth.rows; ++i){
23         top_k(guess.vals[i], n, k, indexes);
24         for(j = 0; j < k; ++j){
25             int class_id = indexes[j];
26             if(truth.vals[i][class_id]){
27                 ++correct;
28                 break;
29             }
30         }
31     }
32     free(indexes);
33     return (float)correct/truth.rows;
34 }
35 
scale_matrix(matrix m,float scale)36 void scale_matrix(matrix m, float scale)
37 {
38     int i,j;
39     for(i = 0; i < m.rows; ++i){
40         for(j = 0; j < m.cols; ++j){
41             m.vals[i][j] *= scale;
42         }
43     }
44 }
45 
resize_matrix(matrix m,int size)46 matrix resize_matrix(matrix m, int size)
47 {
48     int i;
49     if (m.rows == size) return m;
50     if (m.rows < size) {
51         m.vals = (float**)xrealloc(m.vals, size * sizeof(float*));
52         for (i = m.rows; i < size; ++i) {
53             m.vals[i] = (float*)xcalloc(m.cols, sizeof(float));
54         }
55     } else if (m.rows > size) {
56         for (i = size; i < m.rows; ++i) {
57             free(m.vals[i]);
58         }
59         m.vals = (float**)xrealloc(m.vals, size * sizeof(float*));
60     }
61     m.rows = size;
62     return m;
63 }
64 
matrix_add_matrix(matrix from,matrix to)65 void matrix_add_matrix(matrix from, matrix to)
66 {
67     assert(from.rows == to.rows && from.cols == to.cols);
68     int i,j;
69     for(i = 0; i < from.rows; ++i){
70         for(j = 0; j < from.cols; ++j){
71             to.vals[i][j] += from.vals[i][j];
72         }
73     }
74 }
75 
make_matrix(int rows,int cols)76 matrix make_matrix(int rows, int cols)
77 {
78     int i;
79     matrix m;
80     m.rows = rows;
81     m.cols = cols;
82     m.vals = (float**)xcalloc(m.rows, sizeof(float*));
83     for(i = 0; i < m.rows; ++i){
84         m.vals[i] = (float*)xcalloc(m.cols, sizeof(float));
85     }
86     return m;
87 }
88 
hold_out_matrix(matrix * m,int n)89 matrix hold_out_matrix(matrix *m, int n)
90 {
91     int i;
92     matrix h;
93     h.rows = n;
94     h.cols = m->cols;
95     h.vals = (float**)xcalloc(h.rows, sizeof(float*));
96     for(i = 0; i < n; ++i){
97         int index = rand()%m->rows;
98         h.vals[i] = m->vals[index];
99         m->vals[index] = m->vals[--(m->rows)];
100     }
101     return h;
102 }
103 
pop_column(matrix * m,int c)104 float *pop_column(matrix *m, int c)
105 {
106     float* col = (float*)xcalloc(m->rows, sizeof(float));
107     int i, j;
108     for(i = 0; i < m->rows; ++i){
109         col[i] = m->vals[i][c];
110         for(j = c; j < m->cols-1; ++j){
111             m->vals[i][j] = m->vals[i][j+1];
112         }
113     }
114     --m->cols;
115     return col;
116 }
117 
csv_to_matrix(char * filename)118 matrix csv_to_matrix(char *filename)
119 {
120     FILE *fp = fopen(filename, "r");
121     if(!fp) file_error(filename);
122 
123     matrix m;
124     m.cols = -1;
125 
126     char *line;
127 
128     int n = 0;
129     int size = 1024;
130     m.vals = (float**)xcalloc(size, sizeof(float*));
131     while((line = fgetl(fp))){
132         if(m.cols == -1) m.cols = count_fields(line);
133         if(n == size){
134             size *= 2;
135             m.vals = (float**)xrealloc(m.vals, size * sizeof(float*));
136         }
137         m.vals[n] = parse_fields(line, m.cols);
138         free(line);
139         ++n;
140     }
141     m.vals = (float**)xrealloc(m.vals, n * sizeof(float*));
142     m.rows = n;
143     return m;
144 }
145 
matrix_to_csv(matrix m)146 void matrix_to_csv(matrix m)
147 {
148     int i, j;
149 
150     for(i = 0; i < m.rows; ++i){
151         for(j = 0; j < m.cols; ++j){
152             if(j > 0) printf(",");
153             printf("%.17g", m.vals[i][j]);
154         }
155         printf("\n");
156     }
157 }
158 
print_matrix(matrix m)159 void print_matrix(matrix m)
160 {
161     int i, j;
162     printf("%d X %d Matrix:\n",m.rows, m.cols);
163     printf(" __");
164     for(j = 0; j < 16*m.cols-1; ++j) printf(" ");
165     printf("__ \n");
166 
167     printf("|  ");
168     for(j = 0; j < 16*m.cols-1; ++j) printf(" ");
169     printf("  |\n");
170 
171     for(i = 0; i < m.rows; ++i){
172         printf("|  ");
173         for(j = 0; j < m.cols; ++j){
174             printf("%15.7f ", m.vals[i][j]);
175         }
176         printf(" |\n");
177     }
178     printf("|__");
179     for(j = 0; j < 16*m.cols-1; ++j) printf(" ");
180     printf("__|\n");
181 }
182 
183 
184 matrix make_matrix(int rows, int cols);
185 
186 void copy(float *x, float *y, int n);
187 float dist(float *x, float *y, int n);
188 int *sample(int n);
189 
closest_center(float * datum,matrix centers)190 int closest_center(float *datum, matrix centers)
191 {
192     int j;
193     int best = 0;
194     float best_dist = dist(datum, centers.vals[best], centers.cols);
195     for (j = 0; j < centers.rows; ++j) {
196         float new_dist = dist(datum, centers.vals[j], centers.cols);
197         if (new_dist < best_dist) {
198             best_dist = new_dist;
199             best = j;
200         }
201     }
202     return best;
203 }
204 
dist_to_closest_center(float * datum,matrix centers)205 float dist_to_closest_center(float *datum, matrix centers)
206 {
207     int ci = closest_center(datum, centers);
208     return dist(datum, centers.vals[ci], centers.cols);
209 }
210 
kmeans_expectation(matrix data,int * assignments,matrix centers)211 int kmeans_expectation(matrix data, int *assignments, matrix centers)
212 {
213     int i;
214     int converged = 1;
215     for (i = 0; i < data.rows; ++i) {
216         int closest = closest_center(data.vals[i], centers);
217         if (closest != assignments[i]) converged = 0;
218         assignments[i] = closest;
219     }
220     return converged;
221 }
222 
kmeans_maximization(matrix data,int * assignments,matrix centers)223 void kmeans_maximization(matrix data, int *assignments, matrix centers)
224 {
225     matrix old_centers = make_matrix(centers.rows, centers.cols);
226 
227     int i, j;
228     int *counts = (int*)xcalloc(centers.rows, sizeof(int));
229     for (i = 0; i < centers.rows; ++i) {
230         for (j = 0; j < centers.cols; ++j) {
231             old_centers.vals[i][j] = centers.vals[i][j];
232             centers.vals[i][j] = 0;
233         }
234     }
235     for (i = 0; i < data.rows; ++i) {
236         ++counts[assignments[i]];
237         for (j = 0; j < data.cols; ++j) {
238             centers.vals[assignments[i]][j] += data.vals[i][j];
239         }
240     }
241     for (i = 0; i < centers.rows; ++i) {
242         if (counts[i]) {
243             for (j = 0; j < centers.cols; ++j) {
244                 centers.vals[i][j] /= counts[i];
245             }
246         }
247     }
248 
249     for (i = 0; i < centers.rows; ++i) {
250         for (j = 0; j < centers.cols; ++j) {
251             if(centers.vals[i][j] == 0) centers.vals[i][j] = old_centers.vals[i][j];
252         }
253     }
254     free(counts);
255     free_matrix(old_centers);
256 }
257 
258 
259 
random_centers(matrix data,matrix centers)260 void random_centers(matrix data, matrix centers) {
261     int i;
262     int *s = sample(data.rows);
263     for (i = 0; i < centers.rows; ++i) {
264         copy(data.vals[s[i]], centers.vals[i], data.cols);
265     }
266     free(s);
267 }
268 
sample(int n)269 int *sample(int n)
270 {
271     int i;
272     int* s = (int*)xcalloc(n, sizeof(int));
273     for (i = 0; i < n; ++i) s[i] = i;
274     for (i = n - 1; i >= 0; --i) {
275         int swap = s[i];
276         int index = rand() % (i + 1);
277         s[i] = s[index];
278         s[index] = swap;
279     }
280     return s;
281 }
282 
dist(float * x,float * y,int n)283 float dist(float *x, float *y, int n)
284 {
285     //printf(" x0 = %f, x1 = %f, y0 = %f, y1 = %f \n", x[0], x[1], y[0], y[1]);
286     float mw = (x[0] < y[0]) ? x[0] : y[0];
287     float mh = (x[1] < y[1]) ? x[1] : y[1];
288     float inter = mw*mh;
289     float sum = x[0] * x[1] + y[0] * y[1];
290     float un = sum - inter;
291     float iou = inter / un;
292     return 1 - iou;
293 }
294 
copy(float * x,float * y,int n)295 void copy(float *x, float *y, int n)
296 {
297     int i;
298     for (i = 0; i < n; ++i) y[i] = x[i];
299 }
300 
do_kmeans(matrix data,int k)301 model do_kmeans(matrix data, int k)
302 {
303     matrix centers = make_matrix(k, data.cols);
304     int* assignments = (int*)xcalloc(data.rows, sizeof(int));
305     //smart_centers(data, centers);
306     random_centers(data, centers);  // IoU = 67.31% after kmeans
307 
308     /*
309     // IoU = 63.29%, anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
310     centers.vals[0][0] = 10; centers.vals[0][1] = 13;
311     centers.vals[1][0] = 16; centers.vals[1][1] = 30;
312     centers.vals[2][0] = 33; centers.vals[2][1] = 23;
313     centers.vals[3][0] = 30; centers.vals[3][1] = 61;
314     centers.vals[4][0] = 62; centers.vals[4][1] = 45;
315     centers.vals[5][0] = 59; centers.vals[5][1] = 119;
316     centers.vals[6][0] = 116; centers.vals[6][1] = 90;
317     centers.vals[7][0] = 156; centers.vals[7][1] = 198;
318     centers.vals[8][0] = 373; centers.vals[8][1] = 326;
319     */
320 
321     // range centers [min - max] using exp graph or Pyth example
322     if (k == 1) kmeans_maximization(data, assignments, centers);
323     int i;
324     for(i = 0; i < 1000 && !kmeans_expectation(data, assignments, centers); ++i) {
325         kmeans_maximization(data, assignments, centers);
326     }
327     printf("\n iterations = %d \n", i);
328     model m;
329     m.assignments = assignments;
330     m.centers = centers;
331     return m;
332 }
333