1 /*
2 * 変換や文節の伸縮などの操作が進行中の文字列や候補などを
3 * まとめて変換コンテキストと呼ぶ。
4 * Anthyのコンテキストに対する操作は全てここから呼ばれる。
5 * 各操作に対して変換パイプラインの必要なモジュールを順に呼びだす。
6 *
7 * personalityの管理もする。
8 *
9 * Funded by IPA未踏ソフトウェア創造事業 2001 10/29
10 * Copyright (C) 2000-2007 TABATA Yusuke
11 *
12 */
13 /*
14 This library is free software; you can redistribute it and/or
15 modify it under the terms of the GNU Lesser General Public
16 License as published by the Free Software Foundation; either
17 version 2 of the License, or (at your option) any later version.
18
19 This library is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 Lesser General Public License for more details.
23
24 You should have received a copy of the GNU Lesser General Public
25 License along with this library; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
27 */
28 #include <sys/types.h>
29 #include <sys/stat.h>
30 #include <unistd.h>
31
32 #include <stdlib.h>
33 #include <stdio.h>
34 #include <string.h>
35
36 #include <anthy/anthy.h>
37 #include <anthy/alloc.h>
38 #include <anthy/record.h>
39 #include <anthy/ordering.h>
40 #include <anthy/splitter.h>
41 #include <anthy/xstr.h>
42 #include "main.h"
43
44 /**/
45 static allocator context_ator;
46
47 /** 現在のpersonality
48 * 未設定時: null
49 * 未設定のまま変換を開始した場合: "default"
50 * anonymousの場合: ""
51 */
52 static char *current_personality;
53
54 /**/
55 #define HISTORY_FILE_LIMIT 100000
56
57 static void
context_dtor(void * p)58 context_dtor(void *p)
59 {
60 anthy_do_reset_context((struct anthy_context *)p);
61 }
62
63 /** 現在のpersonalityを返す */
64 static char *
get_personality(void)65 get_personality(void)
66 {
67 if (!current_personality) {
68 current_personality = strdup("default");
69 anthy_dic_set_personality(current_personality);
70 }
71 return current_personality;
72 }
73
74 static void
release_segment(struct seg_ent * s)75 release_segment(struct seg_ent *s)
76 {
77 if (s->cands) {
78 int i;
79 for (i = 0; i < s->nr_cands; i++) {
80 anthy_release_cand_ent(s->cands[i]);
81 }
82 free (s->cands);
83 }
84 if (s->mw_array) {
85 free(s->mw_array);
86 }
87 free(s);
88
89 }
90
91 /** 文節リストの最後の要素を削除する */
92 static void
pop_back_seg_ent(struct anthy_context * c)93 pop_back_seg_ent(struct anthy_context *c)
94 {
95 struct seg_ent *s;
96 s = c->seg_list.list_head.prev;
97 if (s == &c->seg_list.list_head) {
98 return ;
99 }
100 s->prev->next = s->next;
101 s->next->prev = s->prev;
102 release_segment(s);
103 c->seg_list.nr_segments --;
104 }
105
106
107 /** n番目の文節の文字のindexを求める */
108 static int
get_nth_segment_index(struct anthy_context * c,int n)109 get_nth_segment_index(struct anthy_context *c, int n)
110 {
111 int i,s;
112 for (i = 0, s = 0; i < c->str.len; i++) {
113 if (c->split_info.ce[i].seg_border) {
114 if (s == n) {
115 return i;
116 }
117 s++;
118 }
119 }
120 return -1;
121 }
122
123 /** n番目の文節の長さを求める.
124 * segment_listが構成されていなくても計算できるようにする.
125 */
126 static int
get_nth_segment_len(struct anthy_context * c,int sindex)127 get_nth_segment_len(struct anthy_context *c, int sindex)
128 {
129 int a,i,l;
130 a = get_nth_segment_index(c, sindex);
131 if ( a == -1){
132 return -1;
133 }
134 l = 1;
135 for (i = a+1; !c->split_info.ce[i].seg_border; i++) {
136 l++;
137 }
138 return l;
139 }
140
141 /** metawordの配列を作る */
142 static void
make_metaword_array(struct anthy_context * ac,struct seg_ent * se)143 make_metaword_array(struct anthy_context *ac,
144 struct seg_ent *se)
145 {
146 int i;
147 se->mw_array = NULL;
148 for (i = se->len; i > 0; i--) {
149 int j;
150 /* 最後に濁点とかがついてたら直前の文字ごと落す */
151 if (i < se->len &&
152 anthy_get_xchar_type(se->str.str[i]) & XCT_PART) {
153 /* FIXME 濁点とかがありえない並びをしてたら */
154 i--;
155 continue ;
156 }
157
158 se->nr_metaword = anthy_get_nr_metaword(&ac->split_info, se->from, i);
159 if (!se->nr_metaword) {
160 continue ;
161 }
162 /* metawordを配列に取り込む */
163 se->mw_array = malloc(sizeof(struct meta_word*) * se->nr_metaword);
164 for (j = 0; j < se->nr_metaword; j++) {
165 se->mw_array[j] = anthy_get_nth_metaword(&ac->split_info, se->from, i, j);
166 }
167 return;
168 }
169 }
170
171 static struct seg_ent*
create_segment(struct anthy_context * ac,int from,int len,struct meta_word * best_mw)172 create_segment(struct anthy_context *ac, int from, int len,
173 struct meta_word* best_mw)
174 {
175 struct seg_ent* s;
176 s = (struct seg_ent *)malloc(sizeof(struct seg_ent));
177 s->str.str = &ac->str.str[from];
178 s->str.len = len;
179 s->from = from;
180 s->len = s->str.len;
181 s->nr_cands = 0;
182 s->cands = NULL;
183 s->best_seg_class = ac->split_info.ce[from].best_seg_class;
184 s->best_mw = best_mw;
185 make_metaword_array(ac, s);
186 return s;
187 }
188
189 /** 変換コンテキストに文節を追加する */
190 static void
push_back_segment(struct anthy_context * ac,struct seg_ent * se)191 push_back_segment(struct anthy_context *ac, struct seg_ent *se)
192 {
193 se->next = &ac->seg_list.list_head;
194 se->prev = ac->seg_list.list_head.prev;
195 ac->seg_list.list_head.prev->next = se;
196 ac->seg_list.list_head.prev = se;
197 ac->seg_list.nr_segments ++;
198 se->committed = -1;
199 }
200
201 /** splitterによって配列中に付けられた文節境界のマークから、
202 * 文節のリストを構成する
203 */
204 static void
create_segment_list(struct anthy_context * ac,int from,int to)205 create_segment_list(struct anthy_context *ac, int from, int to)
206 {
207 int i, n;
208 struct seg_ent *s;
209 /* from の所までにいくつの文節があるか調べる */
210 i = 0; n = 0;
211 while (i < from) {
212 i += get_nth_segment_len(ac, n);
213 n++;
214 };
215 /**/
216 for (i = from; i < to; i++) {
217 if (ac->split_info.ce[i].seg_border) {
218 int len = get_nth_segment_len(ac, n);
219 s = create_segment(ac, i, len, ac->split_info.ce[i].best_mw);
220
221 push_back_segment(ac, s);
222 n++;
223 }
224 }
225 }
226
227 /** コンテキストを作る */
228 struct anthy_context *
anthy_do_create_context(int encoding)229 anthy_do_create_context(int encoding)
230 {
231 struct anthy_context *ac;
232 char *p = get_personality();
233
234 if (!p) {
235 return NULL;
236 }
237
238 ac = (struct anthy_context *)anthy_smalloc(context_ator);
239 ac->str.str = NULL;
240 ac->str.len = 0;
241 ac->seg_list.nr_segments = 0;
242 ac->seg_list.list_head.prev = &ac->seg_list.list_head;
243 ac->seg_list.list_head.next = &ac->seg_list.list_head;
244 ac->split_info.word_split_info = NULL;
245 ac->split_info.ce = NULL;
246 ac->ordering_info.oc = NULL;
247 ac->dic_session = NULL;
248 ac->prediction.str.str = NULL;
249 ac->prediction.str.len = 0;
250 ac->prediction.nr_prediction = 0;
251 ac->prediction.predictions = NULL;
252 ac->encoding = encoding;
253 ac->reconversion_mode = ANTHY_RECONVERT_AUTO;
254
255 return ac;
256 }
257
258 /** コンテキストのアロケータを作る */
259 void
anthy_init_contexts(void)260 anthy_init_contexts(void)
261 {
262 context_ator = anthy_create_allocator(sizeof(struct anthy_context),
263 context_dtor);
264 }
265
266 void
anthy_quit_contexts(void)267 anthy_quit_contexts(void)
268 {
269 anthy_free_allocator(context_ator);
270 }
271
272 static void
release_prediction(struct prediction_cache * pc)273 release_prediction(struct prediction_cache *pc)
274 {
275 int i;
276 if (pc->str.str) {
277 free(pc->str.str);
278 pc->str.str = NULL;
279 }
280 if (pc->predictions) {
281 for (i = 0; i < pc->nr_prediction; ++i) {
282 anthy_free_xstr(pc->predictions[i].src_str);
283 anthy_free_xstr(pc->predictions[i].str);
284 }
285 free(pc->predictions);
286 pc->predictions = NULL;
287 }
288 }
289
290 void
anthy_release_segment_list(struct anthy_context * ac)291 anthy_release_segment_list(struct anthy_context *ac)
292 {
293 int i, sc;
294 sc = ac->seg_list.nr_segments;
295 for (i = 0; i < sc; i++) {
296 pop_back_seg_ent(ac);
297 }
298 ac->seg_list.nr_segments = 0;
299 }
300
301 /* resetではcontextのために確保されたリソースを全て解放する */
302 void
anthy_do_reset_context(struct anthy_context * ac)303 anthy_do_reset_context(struct anthy_context *ac)
304 {
305 /* まず辞書セッションを解放 */
306 if (ac->dic_session) {
307 anthy_dic_release_session(ac->dic_session);
308 ac->dic_session = NULL;
309 }
310 if (!ac->str.str) {
311 /* 文字列が設定されていなければ解放すべき物はもう無い */
312 return ;
313 }
314 free(ac->str.str);
315 ac->str.str = NULL;
316 anthy_release_split_context(&ac->split_info);
317 anthy_release_segment_list(ac);
318
319 /* 予測された文字列の解放 */
320 release_prediction(&ac->prediction);
321 }
322
323 void
anthy_do_release_context(struct anthy_context * ac)324 anthy_do_release_context(struct anthy_context *ac)
325 {
326 anthy_sfree(context_ator, ac);
327 }
328
329 static void
make_candidates(struct anthy_context * ac,int from,int from2,int is_reverse)330 make_candidates(struct anthy_context *ac, int from, int from2, int is_reverse)
331 {
332 int i;
333 int len = ac->str.len;
334
335 /* 文節の境界を設定 */
336 /* from と from2の間に境界を作ることを禁止する */
337 anthy_mark_border(&ac->split_info, from, from2, len);
338 create_segment_list(ac, from, len);
339 anthy_sort_metaword(&ac->seg_list);
340
341 /* 候補を列挙 */
342 for (i = 0; i < ac->seg_list.nr_segments; i++) {
343 anthy_do_make_candidates(&ac->split_info,
344 anthy_get_nth_segment(&ac->seg_list, i),
345 is_reverse);
346 }
347 /* 候補をソート */
348 anthy_sort_candidate(&ac->seg_list, 0);
349 }
350
351 int
anthy_do_context_set_str(struct anthy_context * ac,xstr * s,int is_reverse)352 anthy_do_context_set_str(struct anthy_context *ac, xstr *s, int is_reverse)
353 {
354 int i;
355
356 /* 文字列をコピー(一文字分余計にして0をセット) */
357 ac->str.str = (xchar *)malloc(sizeof(xchar)*(s->len+1));
358 anthy_xstrcpy(&ac->str, s);
359 ac->str.str[s->len] = 0;
360
361 /* splitterの初期化*/
362 anthy_init_split_context(&ac->str, &ac->split_info, is_reverse);
363
364 /* 解の候補を作成 */
365 make_candidates(ac, 0, 0, is_reverse);
366
367 /* 最初に設定した文節境界を覚えておく */
368 for (i = 0; i < ac->seg_list.nr_segments; i++) {
369 struct seg_ent *s = anthy_get_nth_segment(&ac->seg_list, i);
370 ac->split_info.ce[s->from].initial_seg_len = s->len;
371 }
372
373 return 0;
374 }
375
376 void
anthy_do_resize_segment(struct anthy_context * ac,int nth,int resize)377 anthy_do_resize_segment(struct anthy_context *ac,
378 int nth, int resize)
379 {
380 int i;
381 int index, len, sc;
382
383 /* resizeが可能か検査する */
384 if (nth >= ac->seg_list.nr_segments) {
385 return ;
386 }
387 index = get_nth_segment_index(ac, nth);
388 len = get_nth_segment_len(ac, nth);
389 if (index + len + resize > ac->str.len) {
390 return ;
391 }
392 if (len + resize < 1) {
393 return ;
394 }
395
396 /* nth以降のseg_entを解放する */
397 sc = ac->seg_list.nr_segments;
398 for (i = nth; i < sc; i++) {
399 pop_back_seg_ent(ac);
400 }
401
402 /* resizeしたseg_borderをマークする */
403 /* 現在のマークを消して新しいマークをつける */
404 ac->split_info.ce[index+len].seg_border = 0;
405 ac->split_info.ce[ac->str.len].seg_border = 1;
406 for (i = index+len+resize+1; i < ac->str.len; i++) {
407 ac->split_info.ce[i].seg_border = 0;
408 }
409 ac->split_info.ce[index+len+resize].seg_border = 1;
410 for (i = index; i < ac->str.len; i++) {
411 ac->split_info.ce[i].best_mw = NULL;
412 }
413
414 /* 解の候補を作成 */
415 make_candidates(ac, index, index+len+resize, 0);
416 }
417
418 /*
419 * n番めの文節を取得する、無い場合にはNULLを返す
420 */
421 struct seg_ent *
anthy_get_nth_segment(struct segment_list * sl,int n)422 anthy_get_nth_segment(struct segment_list *sl, int n)
423 {
424 int i;
425 struct seg_ent *se;
426 if (n >= sl->nr_segments ||
427 n < 0) {
428 return NULL;
429 }
430 for (i = 0, se = sl->list_head.next; i < n; i++, se = se->next);
431 return se;
432 }
433
434 int
anthy_do_set_prediction_str(struct anthy_context * ac,xstr * xs)435 anthy_do_set_prediction_str(struct anthy_context *ac, xstr* xs)
436 {
437 struct prediction_cache* prediction = &ac->prediction;
438 int nr_prediction;
439
440 /* まず辞書セッションを解放 */
441 if (ac->dic_session) {
442 anthy_dic_release_session(ac->dic_session);
443 ac->dic_session = NULL;
444 }
445 /* 予測された文字列の解放 */
446 release_prediction(&ac->prediction);
447
448 /* 辞書セッションの開始 */
449 if (!ac->dic_session) {
450 ac->dic_session = anthy_dic_create_session();
451 if (!ac->dic_session) {
452 return -1;
453 }
454 }
455
456 prediction->str.str = (xchar*)malloc(sizeof(xchar*)*(xs->len+1));
457 anthy_xstrcpy(&prediction->str, xs);
458 prediction->str.str[xs->len]=0;
459
460 nr_prediction = anthy_traverse_record_for_prediction(xs, NULL);
461 prediction->nr_prediction = nr_prediction;
462
463 if (nr_prediction) {
464 prediction->predictions = (struct prediction_t*)malloc(sizeof(struct prediction_t) *
465 nr_prediction);
466 anthy_traverse_record_for_prediction(xs, prediction->predictions);
467 }
468 return 0;
469 }
470
471 static const char *
get_change_state(struct anthy_context * ac)472 get_change_state(struct anthy_context *ac)
473 {
474 int resize = 0, cand_change = 0;
475 int i;
476 for (i = 0; i < ac->seg_list.nr_segments; i++) {
477 struct seg_ent *s = anthy_get_nth_segment(&ac->seg_list, i);
478 if (ac->split_info.ce[s->from].initial_seg_len != s->len) {
479 resize = 1;
480 }
481 if (s->committed > 0) {
482 cand_change = 1;
483 }
484 }
485 /**/
486 if (resize && cand_change) {
487 return "SC";
488 }
489 if (resize) {
490 return "S";
491 }
492 if (cand_change) {
493 return "C";
494 }
495 return "-";
496 }
497
498 static void
write_history(FILE * fp,struct anthy_context * ac)499 write_history(FILE *fp, struct anthy_context *ac)
500 {
501 int i;
502 /* 読み */
503 fprintf(fp, "|");
504 for (i = 0; i < ac->seg_list.nr_segments; i++) {
505 struct seg_ent *s = anthy_get_nth_segment(&ac->seg_list, i);
506 char *c = anthy_xstr_to_cstr(&s->str, ANTHY_UTF8_ENCODING);
507 fprintf(fp, "%s|", c);
508 free(c);
509 }
510 fprintf(fp, " |");
511 /* 結果 */
512 for (i = 0; i < ac->seg_list.nr_segments; i++) {
513 struct seg_ent *s = anthy_get_nth_segment(&ac->seg_list, i);
514 char *c;
515 /**/
516 if (s->committed < 0) {
517 fprintf(fp, "?|");
518 continue ;
519 }
520 c = anthy_xstr_to_cstr(&s->cands[s->committed]->str,
521 ANTHY_UTF8_ENCODING);
522 fprintf(fp, "%s|", c);
523 free(c);
524 }
525 }
526
527 void
anthy_save_history(const char * fn,struct anthy_context * ac)528 anthy_save_history(const char *fn, struct anthy_context *ac)
529 {
530 FILE *fp;
531 struct stat st;
532 if (!fn) {
533 return ;
534 }
535 fp = fopen(fn, "a");
536 if (!fp) {
537 return ;
538 }
539 if (stat(fn, &st) ||
540 st.st_size > HISTORY_FILE_LIMIT) {
541 fclose(fp);
542 return ;
543 }
544 /**/
545 fprintf(fp, "anthy-%s ", anthy_get_version_string());
546 fprintf(fp, "%s ", get_change_state(ac));
547 write_history(fp, ac);
548 fprintf(fp, "\n");
549 fclose(fp);
550 /**/
551 chmod(fn, S_IREAD | S_IWRITE);
552 }
553
554 /** 候補を表示する */
555 void
anthy_print_candidate(struct cand_ent * ce)556 anthy_print_candidate(struct cand_ent *ce)
557 {
558 int mod = (ce->score % 1000);
559 int seg_score = 0;
560
561 if (ce->mw) {
562 seg_score = ce->mw->score;
563 }
564 anthy_putxstr(&ce->str);
565 printf(":(");
566 /*if (ce->nr_words == 1) {printf("%d,", ce->elm[0].id); }*/
567 if (ce->flag & CEF_OCHAIRE) {
568 putchar('o');
569 }
570 if (ce->flag & CEF_SINGLEWORD) {
571 putchar('1');
572 }
573 if (ce->flag & CEF_GUESS) {
574 putchar('g');
575 }
576 if (ce->flag & (CEF_KATAKANA | CEF_HIRAGANA)) {
577 putchar('N');
578 }
579 if (ce->flag & CEF_USEDICT) {
580 putchar('U');
581 }
582 if (ce->flag & CEF_CONTEXT) {
583 putchar('C');
584 }
585 printf(",%d,", seg_score);
586
587
588 if (ce->mw) {
589 printf("%s,%d", anthy_seg_class_sym(ce->mw->seg_class),
590 ce->mw->struct_score);
591 } else {
592 putchar('-');
593 }
594 printf(")");
595
596 if (ce->score >= 1000) {
597 printf("%d,", ce->score/1000);
598 if (mod < 100) {
599 printf("0");
600 }
601 if (mod < 10) {
602 printf("0");
603 }
604 printf("%d ", mod);
605 } else {
606 printf("%d ", ce->score);
607 }
608 }
609
610 /** 文節を表示する */
611 static void
print_segment(struct seg_ent * e)612 print_segment(struct seg_ent *e)
613 {
614 int i;
615
616 anthy_putxstr(&e->str);
617 printf("(");
618 for ( i = 0 ; i < e->nr_cands ; i++) {
619 anthy_print_candidate(e->cands[i]);
620 printf(",");
621 }
622 printf(")");
623 printf(":\n");
624 }
625
626 /** コンテキストを表示する */
627 void
anthy_do_print_context(struct anthy_context * ac,int encoding)628 anthy_do_print_context(struct anthy_context *ac, int encoding)
629 {
630 int i;
631 struct char_ent *ce;
632 anthy_xstr_set_print_encoding(encoding);
633
634 ce = ac->split_info.ce;
635 if (!ce) {
636 printf("(invalid)\n");
637 return ;
638 }
639 /* 各文字を表示する */
640 for (i = 0, ce = ac->split_info.ce; i < ac->str.len; i++, ce++) {
641 if (ce->seg_border) {
642 printf("|");
643 }
644 anthy_putxchar(*(ce->c));
645 }
646 printf("\n");
647 /* 各文節を表示する */
648 for (i = 0; i < ac->seg_list.nr_segments; i++) {
649 print_segment(anthy_get_nth_segment(&ac->seg_list, i));
650 }
651 printf("\n");
652 }
653
654 void
anthy_release_cand_ent(struct cand_ent * ce)655 anthy_release_cand_ent(struct cand_ent *ce)
656 {
657 if (ce->elm) {
658 free(ce->elm);
659 }
660 if (&ce->str) {
661 anthy_free_xstr_str(&ce->str);
662 }
663 free(ce);
664 }
665
666 int
anthy_do_set_personality(const char * id)667 anthy_do_set_personality(const char *id)
668 {
669 if (current_personality) {
670 /* すでに設定されてる */
671 return -1;
672 }
673 if (!id || strchr(id, '/')) {
674 return -1;
675 }
676 current_personality = strdup(id);
677 anthy_dic_set_personality(current_personality);
678 return 0;
679 }
680
681 void
anthy_init_personality(void)682 anthy_init_personality(void)
683 {
684 current_personality = NULL;
685 }
686
687 void
anthy_quit_personality(void)688 anthy_quit_personality(void)
689 {
690 if (current_personality) {
691 free(current_personality);
692 current_personality = NULL;
693 }
694 }
695