1 /*
2 * 疎行列を扱うためのコード
3 *
4 * (1) 行列(sparse_matrix)のインスタンスを作成し行列の要素を設定する
5 * (2) 行列から行列イメージ(matrix_image)を作成する
6 * * 行列イメージをnetwork byteorderでファイルに書き出す
7 * (3) 行列イメージを読み込み(or mmapする)要素にアクセスする
8 *
9 */
10 /*
11 * sparse matrix crammer
12 *
13 * sparse matrix storage uses following 2 sparse arrays
14 * *array of row
15 * *array of cells in a row
16 *
17 *(1/2)
18 * sparse row crammed row
19 * 0:0 1:1
20 * 1:1 ---->> 3:1
21 * 2:0 hash(h)%m 7:1
22 * 3:1 /
23 * 4:0 /
24 * 5:0 /
25 * 6:0
26 * 7:1
27 * 8:0
28 * (?:1 means non-all 0 row)
29 *(2/2)
30 * crammed row cram shift count
31 * 1:1 . . -> .. shift 0
32 * 3:1 . . -> .. shift 2
33 * 7:1 . . . -> ... shift 4
34 *
35 * contents of |
36 * matrix \|/
37 *
38 * ....... unified array of (value.column) pair
39 *
40 * matrix image
41 * image[0] : length of hashed row array
42 * image[1] : length of crammed cell array
43 * image[2 ~ 2+image[0]-1] : hashed row array
44 * image[2+image[0] ~ 2+image[0]+image[1]-1] : hashed row array
45 *
46 * Copyright (C) 2005 TABATA Yusuke
47 *
48 */
49 /*
50 This library is free software; you can redistribute it and/or
51 modify it under the terms of the GNU Lesser General Public
52 License as published by the Free Software Foundation; either
53 version 2 of the License, or (at your option) any later version.
54
55 This library is distributed in the hope that it will be useful,
56 but WITHOUT ANY WARRANTY; without even the implied warranty of
57 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
58 Lesser General Public License for more details.
59
60 You should have received a copy of the GNU Lesser General Public
61 License along with this library; if not, write to the Free Software
62 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
63 */
64 #include <stdio.h>
65 #include <stdlib.h>
66
67 #include <anthy/diclib.h>
68 /* public APIs */
69 #include <anthy/matrix.h>
70
71 /* maximum length allowed for hash chain */
72 #define MAX_FAILURE 50
73
74 struct list_elm {
75 int index;
76 int value;
77 void *ptr;
78 struct list_elm *next;
79 /* bypass to mitigate O(n) insertion cost */
80 struct list_elm *orig_next;
81 };
82
83 struct array_elm {
84 int index;
85 int value;
86 void *ptr;
87 };
88
89 /*
90 * sparse array has two representation
91 *
92 * (1) list and (2) hashed array
93 * build list first and sparse_array_make_array() to build hashed array
94 * this stores one value and one pointer
95 *
96 */
97 struct sparse_array {
98 /* list representation */
99 int elm_count;
100 /* sorted */
101 struct list_elm head;
102
103 /* array representation */
104 int array_len;
105 struct array_elm *array;
106 };
107
108 static struct sparse_array *
sparse_array_new(void)109 sparse_array_new(void)
110 {
111 struct sparse_array *a = malloc(sizeof(struct sparse_array));
112 /**/
113 a->elm_count = 0;
114 a->head.next = NULL;
115 a->head.orig_next = NULL;
116 a->head.index = -1;
117 /**/
118 a->array_len = 0;
119 a->array = NULL;
120 return a;
121 }
122
123 static void
insert_elm_after(struct list_elm * elm,int idx,int val,void * ptr)124 insert_elm_after(struct list_elm *elm, int idx, int val, void *ptr)
125 {
126 struct list_elm *new_elm = malloc(sizeof(struct list_elm));
127 new_elm->value = val;
128 new_elm->index = idx;
129 new_elm->ptr = ptr;
130 /**/
131 new_elm->next = elm->next;
132 new_elm->orig_next = elm->next;
133 elm->next = new_elm;
134 }
135
136 static void
sparse_array_set(struct sparse_array * sa,int idx,int val,void * ptr)137 sparse_array_set(struct sparse_array *sa, int idx, int val, void *ptr)
138 {
139 struct list_elm *e;
140 e = &sa->head;
141 while (e) {
142 if (e->index == idx) {
143 /* find same index and update */
144 e->value = val;
145 e->ptr = ptr;
146 return ;
147 }
148 /* search */
149 if (e->index < idx && (!e->next || idx < e->next->index)) {
150 insert_elm_after(e, idx, val, ptr);
151 /**/
152 sa->elm_count ++;
153 return ;
154 }
155 /* go next */
156 if (e->orig_next && e->orig_next->index < idx) {
157 /* leap ahead */
158 e = e->orig_next;
159 } else {
160 e = e->next;
161 }
162 }
163 }
164
165 static int
hash(int val,int max,int nth)166 hash(int val, int max, int nth)
167 {
168 val += nth * 113;
169 if (val < 0) {
170 val = -val;
171 }
172 if (max == 0) {
173 return 0;
174 }
175 return val % max;
176 }
177
178 static int
sparse_array_try_make_array(struct sparse_array * s)179 sparse_array_try_make_array(struct sparse_array *s)
180 {
181 int i;
182 struct list_elm *e;
183 /* initialize */
184 free(s->array);
185 s->array = malloc(sizeof(struct array_elm) * s->array_len);
186 for (i = 0; i < s->array_len; i++) {
187 s->array[i].index = -1;
188 }
189
190 /* push */
191 for (e = s->head.next; e; e = e->next) {
192 int ok = 0;
193 int n = 0;
194 do {
195 int h = hash(e->index, s->array_len, n);
196 if (s->array[h].index == -1) {
197 /* find unused element in this array */
198 ok = 1;
199 s->array[h].index = e->index;
200 s->array[h].value = e->value;
201 s->array[h].ptr = e->ptr;
202 } else {
203 /* collision */
204 n ++;
205 if (n > MAX_FAILURE) {
206 /* too much collision */
207 return 1;
208 }
209 }
210 } while (!ok);
211 }
212 return 0;
213 }
214
215 static void
sparse_array_make_array(struct sparse_array * s)216 sparse_array_make_array(struct sparse_array *s)
217 {
218 /* estimate length */
219 if (s->elm_count == 1) {
220 s->array_len = 1;
221 } else {
222 s->array_len = s->elm_count;
223 }
224 while (sparse_array_try_make_array(s)) {
225 /* expand a little */
226 s->array_len ++;
227 s->array_len *= 9;
228 s->array_len /= 8;
229 }
230 }
231
232 static struct array_elm *
sparse_array_get(struct sparse_array * s,int index,struct array_elm * arg)233 sparse_array_get(struct sparse_array *s, int index, struct array_elm *arg)
234 {
235 if (s->array) {
236 int n = 0;
237 while (1) {
238 int h = hash(index, s->array_len, n);
239 if (s->array[h].index == index) {
240 *arg = s->array[h];
241 return arg;
242 }
243 n ++;
244 if (n == MAX_FAILURE) {
245 return NULL;
246 }
247 }
248 } else {
249 struct list_elm *e = e = s->head.next;
250 while (e) {
251 if (e->index == index) {
252 arg->value = e->value;
253 arg->ptr = e->ptr;
254 return arg;
255 }
256 /* go next */
257 if (e->orig_next && e->orig_next->index < index) {
258 /* leap ahead */
259 e = e->orig_next;
260 } else {
261 e = e->next;
262 }
263 }
264 return NULL;
265 }
266 }
267
268 #if 0
269 static int
270 sparse_array_get_int(struct sparse_array *s, int index)
271 {
272 struct array_elm elm;
273 if (sparse_array_get(s, index, &elm)) {
274 return elm.value;
275 }
276 return 0;
277 }
278 #endif
279
280 static void *
sparse_array_get_ptr(struct sparse_array * s,int index)281 sparse_array_get_ptr(struct sparse_array *s, int index)
282 {
283 struct array_elm elm;
284 if (sparse_array_get(s, index, &elm)) {
285 return elm.ptr;
286 }
287 return NULL;
288 }
289
290 /**/
291 struct sparse_matrix {
292 /**/
293 struct sparse_array *row_array;
294 /* image information */
295 int nr_rows;
296 int array_length;
297 };
298
299 /* API */
300 struct sparse_matrix *
anthy_sparse_matrix_new()301 anthy_sparse_matrix_new()
302 {
303 struct sparse_matrix *m = malloc(sizeof(struct sparse_matrix));
304 m->row_array = sparse_array_new();
305 m->nr_rows = 0;
306 return m;
307 }
308
309 static struct sparse_array *
find_row(struct sparse_matrix * m,int row,int create)310 find_row(struct sparse_matrix *m, int row, int create)
311 {
312 struct sparse_array *a;
313 a = sparse_array_get_ptr(m->row_array, row);
314 if (a) {
315 return a;
316 }
317 if (!create) {
318 return NULL;
319 }
320 /* allocate a new row */
321 a = sparse_array_new();
322 sparse_array_set(m->row_array, row, 0, a);
323 m->nr_rows ++;
324 return a;
325 }
326
327 /* API */
328 void
anthy_sparse_matrix_set(struct sparse_matrix * m,int row,int column,int value,void * ptr)329 anthy_sparse_matrix_set(struct sparse_matrix *m, int row, int column,
330 int value, void *ptr)
331 {
332 struct sparse_array *a;
333 a = find_row(m, row, 1);
334 sparse_array_set(a, column, value, ptr);
335 }
336
337 /* API */
338 int
anthy_sparse_matrix_get_int(struct sparse_matrix * m,int row,int column)339 anthy_sparse_matrix_get_int(struct sparse_matrix *m, int row, int column)
340 {
341 struct sparse_array *a;
342 struct list_elm *e;
343 a = find_row(m, row, 1);
344 if (!a) {
345 return 0;
346 }
347 for (e = &a->head; e; e = e->next) {
348 if (e->index == column) {
349 return e->value;
350 }
351 }
352 return 0;
353 }
354
355 /* API */
356 void
anthy_sparse_matrix_make_matrix(struct sparse_matrix * m)357 anthy_sparse_matrix_make_matrix(struct sparse_matrix *m)
358 {
359 struct array_elm *ae;
360 int i;
361 int offset = 0;
362 /**/
363 sparse_array_make_array(m->row_array);
364 /**/
365 for (i = 0; i < m->row_array->array_len; i++) {
366 struct sparse_array *row;
367 ae = &m->row_array->array[i];
368 /**/
369 ae->value = offset;
370 if (ae->index == -1) {
371 continue;
372 }
373 /**/
374 row = ae->ptr;
375 sparse_array_make_array(row);
376 offset += row->array_len;
377 }
378 m->array_length = offset;
379 }
380
381 /* API */
382 struct matrix_image *
anthy_matrix_image_new(struct sparse_matrix * s)383 anthy_matrix_image_new(struct sparse_matrix *s)
384 {
385 struct matrix_image *mi;
386 int i;
387 int offset;
388 /**/
389 mi = malloc(sizeof(struct matrix_image));
390 mi->size = 2 + s->row_array->array_len * 2 + s->array_length * 2;
391 mi->image = malloc(sizeof(int) * mi->size);
392 mi->image[0] = s->row_array->array_len;
393 mi->image[1] = s->array_length;
394 /* row index */
395 offset = 2;
396 for (i = 0; i < s->row_array->array_len; i++) {
397 struct array_elm *ae;
398 ae = &s->row_array->array[i];
399 mi->image[offset + i*2] = ae->index;
400 mi->image[offset + i*2 + 1] = ae->value;
401 }
402 /* cells */
403 offset = 2 + s->row_array->array_len * 2;
404 for (i = 0; i < s->row_array->array_len; i++) {
405 struct array_elm *ae;
406 struct sparse_array *sa;
407 int j;
408 ae = &s->row_array->array[i];
409 if (ae->index == -1) {
410 continue;
411 }
412 sa = ae->ptr;
413 if (!sa) {
414 continue;
415 }
416 for (j = 0; j < sa->array_len; j++) {
417 struct array_elm *cell = &sa->array[j];
418 mi->image[offset] = cell->index;
419 if (cell->index == -1) {
420 mi->image[offset + 1] = -1;
421 } else {
422 mi->image[offset + 1] = cell->value;
423 }
424 offset += 2;
425 }
426 }
427 /**/
428 return mi;
429 }
430
431 static int
read_int(int * image,int idx,int en)432 read_int(int *image, int idx, int en)
433 {
434 if (en) {
435 return anthy_dic_ntohl(image[idx]);
436 }
437 return image[idx];
438 }
439
440 static int
do_matrix_peek(int * image,int row,int col,int en)441 do_matrix_peek(int *image, int row, int col, int en)
442 {
443 int n, h, shift, next_shift;
444 int row_array_len = read_int(image, 0, en);
445 int column_array_len;
446 int cell_offset;
447
448 /* find row */
449 if (row_array_len == 0) {
450 return 0;
451 }
452 for (n = 0; ; n++) {
453 h = hash(row, row_array_len, n);
454 if (read_int(image, 2+ h * 2, en) == row) {
455 shift = read_int(image, 2+h*2+1, en);
456 break;
457 }
458 if (read_int(image, 2+ h * 2, en) == -1) {
459 return 0;
460 }
461 if (n > MAX_FAILURE) {
462 return 0;
463 }
464 }
465
466 /* find shift count of next row */
467 if (h == row_array_len - 1) {
468 /* last one */
469 next_shift = read_int(image, 1, en);
470 } else {
471 /* not last one */
472 next_shift = read_int(image, 2+h*2+2+1, en);
473 }
474
475 /* crammed width of this row */
476 column_array_len = next_shift - shift;
477
478 /* cells in this image */
479 cell_offset = 2 + row_array_len * 2;
480 for (n = 0; ; n++) {
481 h = hash(col, column_array_len, n);
482 if (read_int(image, cell_offset + shift * 2+ h * 2, en) == col) {
483 return read_int(image, cell_offset + shift * 2 + h*2+1, en);
484 }
485 if (read_int(image, cell_offset + shift * 2+ h * 2, en) == -1) {
486 /* not exist */
487 return 0;
488 }
489 if (n > MAX_FAILURE) {
490 return 0;
491 }
492 }
493 return 0;
494 }
495
496 /* API */
497 int
anthy_matrix_image_peek(int * image,int row,int col)498 anthy_matrix_image_peek(int *image, int row, int col)
499 {
500 if (!image) {
501 return 0;
502 }
503 return do_matrix_peek(image, row, col, 1);
504 }
505
506 #ifdef DEBUG
507 /* for debug purpose */
508 static void
sparse_array_dump(struct sparse_array * s)509 sparse_array_dump(struct sparse_array *s)
510 {
511 struct list_elm *e;
512 int i;
513 printf("list(%d):", s->elm_count);
514 for (e = s->head.next; e; e = e->next) {
515 printf(" %d:%d(%x)", e->index, e->value, (unsigned long)e->ptr);
516 }
517 printf("\n");
518 if (!s->array) {
519 return ;
520 }
521 printf("array(%d):", s->array_len);
522 for (i = 0; i < s->array_len; i ++) {
523 struct array_elm *ae = &s->array[i];
524 if (ae->index != -1) {
525 printf(" %d:%d,%d(%x)", i, ae->index, ae->value, (unsigned long)ae->ptr);
526 }
527 }
528 printf("\n");
529 return ;
530 /**/
531 }
532
533 /* for debug purpose */
534 void
sparse_matrix_dump(struct sparse_matrix * m)535 sparse_matrix_dump(struct sparse_matrix *m)
536 {
537 struct list_elm *e;
538 struct array_elm *ae;
539 int i, offset;
540 if (!m->row_array) {
541 for (e = m->row_array->head.next; e; e = e->next) {
542 sparse_array_dump(e->ptr);
543 }
544 return ;
545 }
546 printf("\nnumber of row=%d, row array size=%d, cell array size=%d\n\n",
547 m->nr_rows, m->row_array->array_len, m->array_length);
548 /* row part */
549 for (i = 0; i < m->row_array->array_len; i++) {
550 struct array_elm *ae;
551 ae = &m->row_array->array[i];
552 if (ae->index != -1) {
553 printf(" [%d] row=%d, shift=%d\n", i, ae->index, ae->value);
554 }
555 }
556 printf("\n");
557 offset = 0;
558 for (i = 0; i < m->row_array->array_len; i++) {
559 struct array_elm *ae;
560 struct sparse_array *sa;
561 int j;
562 ae = &m->row_array->array[i];
563 sa = ae->ptr;
564 if (!sa) {
565 continue;
566 }
567 for (j = 0; j < sa->array_len; j++) {
568 struct array_elm *cell = &sa->array[j];
569 if (cell->index != -1) {
570 printf(" [%d] column=%d, value=%d\n", offset, cell->index, cell->value);
571 }
572 offset ++;
573 }
574 }
575 printf("\n");
576 }
577 #endif /* DEBUG */
578