1 /*
2  * 疎行列を扱うためのコード
3  *
4  * (1) 行列(sparse_matrix)のインスタンスを作成し行列の要素を設定する
5  * (2) 行列から行列イメージ(matrix_image)を作成する
6  *  *  行列イメージをnetwork byteorderでファイルに書き出す
7  * (3) 行列イメージを読み込み(or mmapする)要素にアクセスする
8  *
9  */
10 /*
11  * sparse matrix crammer
12  *
13  *  sparse matrix storage uses following 2 sparse arrays
14  *   *array of row
15  *   *array of cells in a row
16  *
17  *(1/2)
18  * sparse row    crammed row
19  *  0:0                1:1
20  *  1:1     ---->>     3:1
21  *  2:0   hash(h)%m    7:1
22  *  3:1       /
23  *  4:0      /
24  *  5:0     /
25  *  6:0
26  *  7:1
27  *  8:0
28  *     (?:1 means non-all 0 row)
29  *(2/2)
30  * crammed row      cram        shift count
31  *  1:1    .    .    -> ..      shift 0
32  *  3:1 .   .        ->   ..    shift 2
33  *  7:1   .  .    .  ->     ... shift 4
34  *
35  *     contents of        |
36  *         matrix        \|/
37  *
38  *                      ....... unified array of (value.column) pair
39  *
40  *  matrix image
41  *   image[0] : length of hashed row array
42  *   image[1] : length of crammed cell array
43  *   image[2 ~ 2+image[0]-1] : hashed row array
44  *   image[2+image[0] ~ 2+image[0]+image[1]-1] : hashed row array
45  *
46  * Copyright (C) 2005 TABATA Yusuke
47  *
48  */
49 /*
50   This library is free software; you can redistribute it and/or
51   modify it under the terms of the GNU Lesser General Public
52   License as published by the Free Software Foundation; either
53   version 2 of the License, or (at your option) any later version.
54 
55   This library is distributed in the hope that it will be useful,
56   but WITHOUT ANY WARRANTY; without even the implied warranty of
57   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
58   Lesser General Public License for more details.
59 
60   You should have received a copy of the GNU Lesser General Public
61   License along with this library; if not, write to the Free Software
62   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
63  */
64 #include <stdio.h>
65 #include <stdlib.h>
66 
67 #include <anthy/diclib.h>
68 /* public APIs */
69 #include <anthy/matrix.h>
70 
71 /* maximum length allowed for hash chain */
72 #define MAX_FAILURE 50
73 
74 struct list_elm {
75   int index;
76   int value;
77   void *ptr;
78   struct list_elm *next;
79   /* bypass to mitigate O(n) insertion cost */
80   struct list_elm *orig_next;
81 };
82 
83 struct array_elm {
84   int index;
85   int value;
86   void *ptr;
87 };
88 
89 /*
90  * sparse array has two representation
91  *
92  *  (1) list and (2) hashed array
93  * build list first and sparse_array_make_array() to build hashed array
94  * this stores one value and one pointer
95  *
96  */
97 struct sparse_array {
98   /* list representation */
99   int elm_count;
100   /* sorted */
101   struct list_elm head;
102 
103   /* array representation */
104   int array_len;
105   struct array_elm *array;
106 };
107 
108 static struct sparse_array *
sparse_array_new(void)109 sparse_array_new(void)
110 {
111   struct sparse_array *a = malloc(sizeof(struct sparse_array));
112   /**/
113   a->elm_count = 0;
114   a->head.next = NULL;
115   a->head.orig_next = NULL;
116   a->head.index = -1;
117   /**/
118   a->array_len = 0;
119   a->array = NULL;
120   return a;
121 }
122 
123 static void
insert_elm_after(struct list_elm * elm,int idx,int val,void * ptr)124 insert_elm_after(struct list_elm *elm, int idx, int val, void *ptr)
125 {
126   struct list_elm *new_elm = malloc(sizeof(struct list_elm));
127   new_elm->value = val;
128   new_elm->index = idx;
129   new_elm->ptr = ptr;
130   /**/
131   new_elm->next = elm->next;
132   new_elm->orig_next = elm->next;
133   elm->next = new_elm;
134 }
135 
136 static void
sparse_array_set(struct sparse_array * sa,int idx,int val,void * ptr)137 sparse_array_set(struct sparse_array *sa, int idx, int val, void *ptr)
138 {
139   struct list_elm *e;
140   e = &sa->head;
141   while (e) {
142     if (e->index == idx) {
143       /* find same index and update */
144       e->value = val;
145       e->ptr = ptr;
146       return ;
147     }
148     /* search */
149     if (e->index < idx && (!e->next || idx < e->next->index)) {
150       insert_elm_after(e, idx, val, ptr);
151       /**/
152       sa->elm_count ++;
153       return ;
154     }
155     /* go next */
156     if (e->orig_next && e->orig_next->index < idx) {
157       /* leap ahead */
158       e = e->orig_next;
159     } else {
160       e = e->next;
161     }
162   }
163 }
164 
165 static int
hash(int val,int max,int nth)166 hash(int val, int max, int nth)
167 {
168   val += nth * 113;
169   if (val < 0) {
170     val = -val;
171   }
172   if (max == 0) {
173     return 0;
174   }
175   return val % max;
176 }
177 
178 static int
sparse_array_try_make_array(struct sparse_array * s)179 sparse_array_try_make_array(struct sparse_array *s)
180 {
181   int i;
182   struct list_elm *e;
183   /* initialize */
184   free(s->array);
185   s->array = malloc(sizeof(struct array_elm) * s->array_len);
186   for (i = 0; i < s->array_len; i++) {
187     s->array[i].index = -1;
188   }
189 
190   /* push */
191   for (e = s->head.next; e; e = e->next) {
192     int ok = 0;
193     int n = 0;
194     do {
195       int h = hash(e->index, s->array_len, n);
196       if (s->array[h].index == -1) {
197 	/* find unused element in this array */
198 	ok = 1;
199 	s->array[h].index = e->index;
200 	s->array[h].value = e->value;
201 	s->array[h].ptr = e->ptr;
202       } else {
203 	/* collision */
204 	n ++;
205 	if (n > MAX_FAILURE) {
206 	  /* too much collision */
207 	  return 1;
208 	}
209       }
210     } while (!ok);
211   }
212   return 0;
213 }
214 
215 static void
sparse_array_make_array(struct sparse_array * s)216 sparse_array_make_array(struct sparse_array *s)
217 {
218   /* estimate length */
219   if (s->elm_count == 1) {
220     s->array_len = 1;
221   } else {
222     s->array_len = s->elm_count;
223   }
224   while (sparse_array_try_make_array(s)) {
225     /* expand a little */
226     s->array_len ++;
227     s->array_len *= 9;
228     s->array_len /= 8;
229   }
230 }
231 
232 static struct array_elm *
sparse_array_get(struct sparse_array * s,int index,struct array_elm * arg)233 sparse_array_get(struct sparse_array *s, int index, struct array_elm *arg)
234 {
235   if (s->array) {
236     int n = 0;
237     while (1) {
238       int h = hash(index, s->array_len, n);
239       if (s->array[h].index == index) {
240 	*arg = s->array[h];
241 	return arg;
242       }
243       n ++;
244       if (n == MAX_FAILURE) {
245 	return NULL;
246       }
247     }
248   } else {
249     struct list_elm *e = e = s->head.next;
250     while (e) {
251       if (e->index == index) {
252 	arg->value = e->value;
253 	arg->ptr = e->ptr;
254 	return arg;
255       }
256       /* go next */
257       if (e->orig_next && e->orig_next->index < index) {
258 	/* leap ahead */
259 	e = e->orig_next;
260       } else {
261 	e = e->next;
262       }
263     }
264     return NULL;
265   }
266 }
267 
268 #if 0
269 static int
270 sparse_array_get_int(struct sparse_array *s, int index)
271 {
272   struct array_elm elm;
273   if (sparse_array_get(s, index, &elm)) {
274     return elm.value;
275   }
276   return 0;
277 }
278 #endif
279 
280 static void *
sparse_array_get_ptr(struct sparse_array * s,int index)281 sparse_array_get_ptr(struct sparse_array *s, int index)
282 {
283   struct array_elm elm;
284   if (sparse_array_get(s, index, &elm)) {
285     return elm.ptr;
286   }
287   return NULL;
288 }
289 
290 /**/
291 struct sparse_matrix {
292   /**/
293   struct sparse_array *row_array;
294   /* image information */
295   int nr_rows;
296   int array_length;
297 };
298 
299 /* API */
300 struct sparse_matrix *
anthy_sparse_matrix_new()301 anthy_sparse_matrix_new()
302 {
303   struct sparse_matrix *m = malloc(sizeof(struct sparse_matrix));
304   m->row_array = sparse_array_new();
305   m->nr_rows = 0;
306   return m;
307 }
308 
309 static struct sparse_array *
find_row(struct sparse_matrix * m,int row,int create)310 find_row(struct sparse_matrix *m, int row, int create)
311 {
312   struct sparse_array *a;
313   a = sparse_array_get_ptr(m->row_array, row);
314   if (a) {
315     return a;
316   }
317   if (!create) {
318     return NULL;
319   }
320   /* allocate a new row */
321   a = sparse_array_new();
322   sparse_array_set(m->row_array, row, 0, a);
323   m->nr_rows ++;
324   return a;
325 }
326 
327 /* API */
328 void
anthy_sparse_matrix_set(struct sparse_matrix * m,int row,int column,int value,void * ptr)329 anthy_sparse_matrix_set(struct sparse_matrix *m, int row, int column,
330 			int value, void *ptr)
331 {
332   struct sparse_array *a;
333   a = find_row(m, row, 1);
334   sparse_array_set(a, column, value, ptr);
335 }
336 
337 /* API */
338 int
anthy_sparse_matrix_get_int(struct sparse_matrix * m,int row,int column)339 anthy_sparse_matrix_get_int(struct sparse_matrix *m, int row, int column)
340 {
341   struct sparse_array *a;
342   struct list_elm *e;
343   a = find_row(m, row, 1);
344   if (!a) {
345     return 0;
346   }
347   for (e = &a->head; e; e = e->next) {
348     if (e->index == column) {
349       return e->value;
350     }
351   }
352   return 0;
353 }
354 
355 /* API */
356 void
anthy_sparse_matrix_make_matrix(struct sparse_matrix * m)357 anthy_sparse_matrix_make_matrix(struct sparse_matrix *m)
358 {
359   struct array_elm *ae;
360   int i;
361   int offset = 0;
362   /**/
363   sparse_array_make_array(m->row_array);
364   /**/
365   for (i = 0; i < m->row_array->array_len; i++) {
366     struct sparse_array *row;
367     ae = &m->row_array->array[i];
368     /**/
369     ae->value = offset;
370     if (ae->index == -1) {
371       continue;
372     }
373     /**/
374     row = ae->ptr;
375     sparse_array_make_array(row);
376     offset += row->array_len;
377   }
378   m->array_length = offset;
379 }
380 
381 /* API */
382 struct matrix_image *
anthy_matrix_image_new(struct sparse_matrix * s)383 anthy_matrix_image_new(struct sparse_matrix *s)
384 {
385   struct matrix_image *mi;
386   int i;
387   int offset;
388   /**/
389   mi = malloc(sizeof(struct matrix_image));
390   mi->size = 2 + s->row_array->array_len * 2 + s->array_length * 2;
391   mi->image = malloc(sizeof(int) * mi->size);
392   mi->image[0] = s->row_array->array_len;
393   mi->image[1] = s->array_length;
394   /* row index */
395   offset = 2;
396   for (i = 0; i < s->row_array->array_len; i++) {
397     struct array_elm *ae;
398     ae = &s->row_array->array[i];
399     mi->image[offset + i*2] = ae->index;
400     mi->image[offset + i*2 + 1] = ae->value;
401   }
402   /* cells */
403   offset = 2 + s->row_array->array_len * 2;
404   for (i = 0; i < s->row_array->array_len; i++) {
405     struct array_elm *ae;
406     struct sparse_array *sa;
407     int j;
408     ae = &s->row_array->array[i];
409     if (ae->index == -1) {
410       continue;
411     }
412     sa = ae->ptr;
413     if (!sa) {
414       continue;
415     }
416     for (j = 0; j < sa->array_len; j++) {
417       struct array_elm *cell = &sa->array[j];
418       mi->image[offset] = cell->index;
419       if (cell->index == -1) {
420 	mi->image[offset + 1] = -1;
421       } else {
422 	mi->image[offset + 1] = cell->value;
423       }
424       offset += 2;
425     }
426   }
427   /**/
428   return mi;
429 }
430 
431 static int
read_int(int * image,int idx,int en)432 read_int(int *image, int idx, int en)
433 {
434   if (en) {
435     return anthy_dic_ntohl(image[idx]);
436   }
437   return image[idx];
438 }
439 
440 static int
do_matrix_peek(int * image,int row,int col,int en)441 do_matrix_peek(int *image, int row, int col, int en)
442 {
443   int n, h, shift, next_shift;
444   int row_array_len = read_int(image, 0, en);
445   int column_array_len;
446   int cell_offset;
447 
448   /* find row */
449   if (row_array_len == 0) {
450     return 0;
451   }
452   for (n = 0; ; n++) {
453     h = hash(row, row_array_len, n);
454     if (read_int(image, 2+ h * 2, en) == row) {
455       shift = read_int(image, 2+h*2+1, en);
456       break;
457     }
458     if (read_int(image, 2+ h * 2, en) == -1) {
459       return 0;
460     }
461     if (n > MAX_FAILURE) {
462       return 0;
463     }
464   }
465 
466   /* find shift count of next row */
467   if (h == row_array_len - 1) {
468     /* last one */
469     next_shift = read_int(image, 1, en);
470   } else {
471     /* not last one */
472     next_shift = read_int(image, 2+h*2+2+1, en);
473   }
474 
475   /* crammed width of this row */
476   column_array_len = next_shift - shift;
477 
478   /* cells in this image */
479   cell_offset = 2 + row_array_len * 2;
480   for (n = 0; ; n++) {
481     h = hash(col, column_array_len, n);
482     if (read_int(image, cell_offset + shift * 2+ h * 2, en) == col) {
483       return read_int(image, cell_offset + shift * 2 + h*2+1, en);
484     }
485     if (read_int(image, cell_offset + shift * 2+ h * 2, en) == -1) {
486       /* not exist */
487       return 0;
488     }
489     if (n > MAX_FAILURE) {
490       return 0;
491     }
492   }
493   return 0;
494 }
495 
496 /* API */
497 int
anthy_matrix_image_peek(int * image,int row,int col)498 anthy_matrix_image_peek(int *image, int row, int col)
499 {
500   if (!image) {
501     return 0;
502   }
503   return do_matrix_peek(image, row, col, 1);
504 }
505 
506 #ifdef DEBUG
507 /* for debug purpose */
508 static void
sparse_array_dump(struct sparse_array * s)509 sparse_array_dump(struct sparse_array *s)
510 {
511   struct list_elm *e;
512   int i;
513   printf("list(%d):", s->elm_count);
514   for (e = s->head.next; e; e = e->next) {
515     printf(" %d:%d(%x)", e->index, e->value, (unsigned long)e->ptr);
516   }
517   printf("\n");
518   if (!s->array) {
519     return ;
520   }
521   printf("array(%d):", s->array_len);
522   for (i = 0; i < s->array_len; i ++) {
523     struct array_elm *ae = &s->array[i];
524     if (ae->index != -1) {
525       printf(" %d:%d,%d(%x)", i, ae->index, ae->value, (unsigned long)ae->ptr);
526     }
527   }
528   printf("\n");
529   return ;
530   /**/
531 }
532 
533 /* for debug purpose */
534 void
sparse_matrix_dump(struct sparse_matrix * m)535 sparse_matrix_dump(struct sparse_matrix *m)
536 {
537   struct list_elm *e;
538   struct array_elm *ae;
539   int i, offset;
540   if (!m->row_array) {
541     for (e = m->row_array->head.next; e; e = e->next) {
542       sparse_array_dump(e->ptr);
543     }
544     return ;
545   }
546   printf("\nnumber of row=%d, row array size=%d, cell array size=%d\n\n",
547 	 m->nr_rows, m->row_array->array_len, m->array_length);
548   /* row part */
549   for (i = 0; i < m->row_array->array_len; i++) {
550     struct array_elm *ae;
551     ae = &m->row_array->array[i];
552     if (ae->index != -1) {
553       printf(" [%d] row=%d, shift=%d\n", i, ae->index, ae->value);
554     }
555   }
556   printf("\n");
557   offset = 0;
558   for (i = 0; i < m->row_array->array_len; i++) {
559     struct array_elm *ae;
560     struct sparse_array *sa;
561     int j;
562     ae = &m->row_array->array[i];
563     sa = ae->ptr;
564     if (!sa) {
565       continue;
566     }
567     for (j = 0; j < sa->array_len; j++) {
568       struct array_elm *cell = &sa->array[j];
569       if (cell->index != -1) {
570 	printf("  [%d] column=%d, value=%d\n", offset, cell->index, cell->value);
571       }
572       offset ++;
573     }
574   }
575   printf("\n");
576 }
577 #endif /* DEBUG */
578