1 /*
2 * libpinyin
3 * Library to deal with pinyin.
4 *
5 * Copyright (C) 2017 Peng Wu <alexepico@gmail.com>
6 *
7 * This program is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program. If not, see <http://www.gnu.org/licenses/>.
19 */
20
21 #include "phonetic_lookup.h"
22
23 namespace pinyin{
24
dump_max_value(GPtrArray * values)25 bool dump_max_value(GPtrArray * values){
26 if (0 == values->len)
27 return false;
28
29 const trellis_value_t * max =
30 (const trellis_value_t *) g_ptr_array_index(values, 0);
31
32 for (size_t i = 1; i < values->len; ++i) {
33 const trellis_value_t * cur =
34 (const trellis_value_t *) g_ptr_array_index(values, i);
35
36 if (cur->m_poss > max->m_poss)
37 max = cur;
38 }
39
40 printf("max value: %f\n", max->m_poss);
41
42 return true;
43 }
44
dump_all_values(GPtrArray * values)45 bool dump_all_values(GPtrArray * values) {
46 if (0 == values->len)
47 return false;
48
49 printf("values:");
50 for (size_t i = 0; i < values->len; ++i) {
51 const trellis_value_t * cur =
52 (const trellis_value_t *) g_ptr_array_index(values, i);
53
54 printf("%f\t", cur->m_poss);
55 }
56 printf("\n");
57
58 return true;
59 }
60
add_constraint(size_t start,size_t end,phrase_token_t token)61 int ForwardPhoneticConstraints::add_constraint(size_t start, size_t end,
62 phrase_token_t token) {
63
64 if (end > m_constraints->len)
65 return 0;
66
67 for (size_t i = start; i < end; ++i){
68 clear_constraint(i);
69 }
70
71 /* store one step constraint */
72 trellis_constraint_t * constraint = &g_array_index
73 (m_constraints, trellis_constraint_t, start);
74 constraint->m_type = CONSTRAINT_ONESTEP;
75 constraint->m_token = token;
76 constraint->m_constraint_step = end;
77
78 /* propagate no search constraint */
79 for (size_t i = start + 1; i < end; ++i){
80 constraint = &g_array_index(m_constraints, trellis_constraint_t, i);
81 constraint->m_type = CONSTRAINT_NOSEARCH;
82 constraint->m_constraint_step = start;
83 }
84
85 return end - start;
86 }
87
clear_constraint(size_t index)88 bool ForwardPhoneticConstraints::clear_constraint(size_t index) {
89 if (index < 0 || index >= m_constraints->len)
90 return false;
91
92 trellis_constraint_t * constraint = &g_array_index
93 (m_constraints, trellis_constraint_t, index);
94
95 if (NO_CONSTRAINT == constraint->m_type)
96 return false;
97
98 if (CONSTRAINT_NOSEARCH == constraint->m_type){
99 index = constraint->m_constraint_step;
100 constraint = &g_array_index(m_constraints, trellis_constraint_t, index);
101 }
102
103 /* now var constraint points to the one step constraint. */
104 assert(constraint->m_type == CONSTRAINT_ONESTEP);
105
106 /* phrase_token_t token = constraint->m_token; */
107 size_t end = constraint->m_constraint_step;
108 for (size_t i = index; i < end; ++i){
109 if (i >= m_constraints->len)
110 continue;
111
112 constraint = &g_array_index
113 (m_constraints, trellis_constraint_t, i);
114 constraint->m_type = NO_CONSTRAINT;
115 }
116
117 return true;
118 }
119
validate_constraint(PhoneticKeyMatrix * matrix)120 bool ForwardPhoneticConstraints::validate_constraint(PhoneticKeyMatrix * matrix) {
121 /* resize m_constraints array first */
122 const size_t oldlength = m_constraints->len;
123 const size_t newlength = matrix->size();
124
125 if ( newlength > oldlength ){
126 g_array_set_size(m_constraints, newlength);
127
128 /* initialize new element */
129 for( size_t i = oldlength; i < newlength; ++i){
130 trellis_constraint_t * constraint = &g_array_index
131 (m_constraints, trellis_constraint_t, i);
132 constraint->m_type = NO_CONSTRAINT;
133 }
134
135 }else if (newlength < oldlength ){
136 /* just shrink it */
137 g_array_set_size(m_constraints, newlength);
138 }
139
140 GArray * keys = g_array_new(TRUE, TRUE, sizeof(ChewingKey));
141 PhraseItem item;
142 for (size_t i = 0; i < m_constraints->len; ++i){
143 trellis_constraint_t * constraint = &g_array_index
144 (m_constraints, trellis_constraint_t, i);
145
146 /* handle one step constraint */
147 if ( constraint->m_type == CONSTRAINT_ONESTEP ){
148
149 phrase_token_t token = constraint->m_token;
150 m_phrase_index->get_phrase_item(token, item);
151 guint32 end = constraint->m_constraint_step;
152
153 /* clear too long constraint */
154 if (end >= m_constraints->len){
155 clear_constraint(i);
156 continue;
157 }
158
159 gfloat pinyin_poss = compute_pronunciation_possibility
160 (matrix, i, end, keys, item);
161 /* clear invalid pinyin */
162 if (pinyin_poss < FLT_EPSILON)
163 clear_constraint(i);
164 }
165 }
166
167 g_array_free(keys, TRUE);
168 return true;
169 }
170
171
diff_result(MatchResult best,MatchResult other)172 bool ForwardPhoneticConstraints::diff_result(MatchResult best,
173 MatchResult other){
174 bool changed = false;
175
176 assert(best->len == other->len);
177
178 for (size_t pos = 0; pos < other->len; ++pos) {
179 phrase_token_t other_token = g_array_index(other, phrase_token_t, pos);
180
181 if (null_token == other_token)
182 continue;
183
184 phrase_token_t best_token = g_array_index(best, phrase_token_t, pos);
185
186 /* the same token */
187 if (best_token == other_token)
188 continue;
189
190 changed = true;
191
192 /* skip the tail node, as not searched in nbest algorithm. */
193 size_t next_pos = other->len - 1;
194 for (size_t i = pos + 1; i < other->len; ++i) {
195 phrase_token_t token = g_array_index(other, phrase_token_t, i);
196
197 if (null_token != token) {
198 next_pos = i;
199 break;
200 }
201 }
202
203 assert(add_constraint(pos, next_pos, other_token));
204 }
205
206 return changed;
207 }
208
209 };
210