1 /*
2 * 文節の構造metawordをソートする
3 *
4 * 文節に対する複数の構造の候補をソートする
5 *
6 * Copyright (C) 2000-2007 TABATA Yusuke
7 *
8 */
9 /*
10 This library is free software; you can redistribute it and/or
11 modify it under the terms of the GNU Lesser General Public
12 License as published by the Free Software Foundation; either
13 version 2 of the License, or (at your option) any later version.
14
15 This library is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 Lesser General Public License for more details.
19
20 You should have received a copy of the GNU Lesser General Public
21 License along with this library; if not, write to the Free Software
22 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 */
24 #include <stdlib.h>
25 #include <math.h>
26
27 #include <anthy/segment.h>
28 #include <anthy/ordering.h>
29 #include <anthy/feature_set.h>
30 #include <anthy/splitter.h>
31 #include <anthy/diclib.h>
32 #include "sorter.h"
33
34 static void *cand_info_array;
35
36 static double
calc_probability(struct feature_list * fl)37 calc_probability(struct feature_list *fl)
38 {
39 struct feature_freq *res, arg;
40 res = anthy_find_feature_freq(cand_info_array,
41 fl, &arg);
42 if (res) {
43 double pos = (double)res->f[15];
44 double neg = (double)res->f[14];
45 double prob = pos / (pos + neg);
46 prob = prob * prob;
47 /**/
48 return prob;
49 }
50 return 0;
51 }
52
53 static void
mw_eval(struct seg_ent * prev_seg,struct seg_ent * seg,struct meta_word * mw)54 mw_eval(struct seg_ent *prev_seg, struct seg_ent *seg,
55 struct meta_word *mw)
56 {
57 int pc;
58 struct feature_list fl;
59 double prob;
60 (void)seg;
61 anthy_feature_list_init(&fl);
62 /**/
63 anthy_feature_list_set_cur_class(&fl, mw->seg_class);
64 anthy_feature_list_set_dep_word(&fl, mw->dep_word_hash);
65 anthy_feature_list_set_dep_class(&fl, mw->dep_class);
66 anthy_feature_list_set_mw_features(&fl, mw->mw_features);
67 /* 前の文節の素性 */
68 if (prev_seg) {
69 pc = prev_seg->best_seg_class;
70 } else {
71 pc = SEG_HEAD;
72 }
73 anthy_feature_list_set_class_trans(&fl, pc, mw->seg_class);
74 anthy_feature_list_sort(&fl);
75 /* 計算する */
76 prob = 0.1 + calc_probability(&fl);
77 if (prob < 0) {
78 prob = (double)1 / (double)1000;
79 }
80 anthy_feature_list_free(&fl);
81 mw->struct_score = RATIO_BASE * RATIO_BASE;
82 mw->struct_score *= prob;
83 /*
84 anthy_feature_list_print(&fl);
85 printf(" prob=%f, struct_score=%d\n", prob, mw->struct_score);
86 */
87
88 /**/
89 if (mw->mw_features & MW_FEATURE_SUFFIX) {
90 mw->struct_score /= 2;
91 }
92 if (mw->mw_features & MW_FEATURE_WEAK_CONN) {
93 mw->struct_score /= 10;
94 }
95 }
96
97 static void
seg_eval(struct seg_ent * prev_seg,struct seg_ent * seg)98 seg_eval(struct seg_ent *prev_seg,
99 struct seg_ent *seg)
100 {
101 int i;
102 for (i = 0; i < seg->nr_metaword; i++) {
103 mw_eval(prev_seg, seg, seg->mw_array[i]);
104 }
105 }
106
107 static void
sl_eval(struct segment_list * seg_list)108 sl_eval(struct segment_list *seg_list)
109 {
110 int i;
111 struct seg_ent *prev_seg = NULL;
112 for (i = 0; i < seg_list->nr_segments; i++) {
113 struct seg_ent *seg;
114 seg = anthy_get_nth_segment(seg_list, i);
115 seg_eval(prev_seg, seg);
116 prev_seg = seg;
117 }
118 }
119
120 static int
metaword_compare_func(const void * p1,const void * p2)121 metaword_compare_func(const void *p1, const void *p2)
122 {
123 const struct meta_word * const *s1 = p1;
124 const struct meta_word * const *s2 = p2;
125 return (*s2)->struct_score - (*s1)->struct_score;
126 }
127
128 void
anthy_sort_metaword(struct segment_list * seg_list)129 anthy_sort_metaword(struct segment_list *seg_list)
130 {
131 int i;
132 /**/
133 sl_eval(seg_list);
134 /**/
135 for (i = 0; i < seg_list->nr_segments; i++) {
136 struct seg_ent *seg = anthy_get_nth_segment(seg_list, i);
137 if (seg->mw_array) { /* 不正なメモリアクセスを行うバグの修正 */
138 qsort(seg->mw_array, seg->nr_metaword, sizeof(struct meta_word *),
139 metaword_compare_func);
140 }
141 }
142 }
143
144 void
anthy_infosort_init(void)145 anthy_infosort_init(void)
146 {
147 cand_info_array = anthy_file_dic_get_section("cand_info");
148 }
149