1 /*
2  *  libpinyin
3  *  Library to deal with pinyin.
4  *
5  *  Copyright (C) 2017 Peng Wu <alexepico@gmail.com>
6  *
7  *  This program is free software: you can redistribute it and/or modify
8  *  it under the terms of the GNU General Public License as published by
9  *  the Free Software Foundation, either version 3 of the License, or
10  *  (at your option) any later version.
11  *
12  *  This program is distributed in the hope that it will be useful,
13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *  GNU General Public License for more details.
16  *
17  *  You should have received a copy of the GNU General Public License
18  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
19  */
20 
21 #include "phonetic_lookup.h"
22 
23 namespace pinyin{
24 
dump_max_value(GPtrArray * values)25 bool dump_max_value(GPtrArray * values){
26     if (0 == values->len)
27         return false;
28 
29     const trellis_value_t * max =
30         (const trellis_value_t *) g_ptr_array_index(values, 0);
31 
32     for (size_t i = 1; i < values->len; ++i) {
33         const trellis_value_t * cur =
34             (const trellis_value_t *) g_ptr_array_index(values, i);
35 
36         if (cur->m_poss > max->m_poss)
37             max = cur;
38     }
39 
40     printf("max value: %f\n", max->m_poss);
41 
42     return true;
43 }
44 
dump_all_values(GPtrArray * values)45 bool dump_all_values(GPtrArray * values) {
46     if (0 == values->len)
47         return false;
48 
49     printf("values:");
50     for (size_t i = 0; i < values->len; ++i) {
51         const trellis_value_t * cur =
52             (const trellis_value_t *) g_ptr_array_index(values, i);
53 
54         printf("%f\t", cur->m_poss);
55     }
56     printf("\n");
57 
58     return true;
59 }
60 
add_constraint(size_t start,size_t end,phrase_token_t token)61 int ForwardPhoneticConstraints::add_constraint(size_t start, size_t end,
62                                                phrase_token_t token) {
63 
64     if (end > m_constraints->len)
65         return 0;
66 
67     for (size_t i = start; i < end; ++i){
68         clear_constraint(i);
69     }
70 
71     /* store one step constraint */
72     trellis_constraint_t * constraint = &g_array_index
73         (m_constraints, trellis_constraint_t, start);
74     constraint->m_type = CONSTRAINT_ONESTEP;
75     constraint->m_token = token;
76     constraint->m_constraint_step = end;
77 
78     /* propagate no search constraint */
79     for (size_t i = start + 1; i < end; ++i){
80         constraint = &g_array_index(m_constraints, trellis_constraint_t, i);
81         constraint->m_type = CONSTRAINT_NOSEARCH;
82         constraint->m_constraint_step = start;
83     }
84 
85     return end - start;
86 }
87 
clear_constraint(size_t index)88 bool ForwardPhoneticConstraints::clear_constraint(size_t index) {
89     if (index < 0 || index >= m_constraints->len)
90         return false;
91 
92     trellis_constraint_t * constraint = &g_array_index
93         (m_constraints, trellis_constraint_t, index);
94 
95     if (NO_CONSTRAINT == constraint->m_type)
96         return false;
97 
98     if (CONSTRAINT_NOSEARCH == constraint->m_type){
99         index = constraint->m_constraint_step;
100         constraint = &g_array_index(m_constraints, trellis_constraint_t, index);
101     }
102 
103     /* now var constraint points to the one step constraint. */
104     assert(constraint->m_type == CONSTRAINT_ONESTEP);
105 
106     /* phrase_token_t token = constraint->m_token; */
107     size_t end = constraint->m_constraint_step;
108     for (size_t i = index; i < end; ++i){
109         if (i >= m_constraints->len)
110             continue;
111 
112         constraint = &g_array_index
113             (m_constraints, trellis_constraint_t, i);
114         constraint->m_type = NO_CONSTRAINT;
115     }
116 
117     return true;
118 }
119 
validate_constraint(PhoneticKeyMatrix * matrix)120 bool ForwardPhoneticConstraints::validate_constraint(PhoneticKeyMatrix * matrix) {
121     /* resize m_constraints array first */
122     const size_t oldlength = m_constraints->len;
123     const size_t newlength = matrix->size();
124 
125     if ( newlength > oldlength ){
126         g_array_set_size(m_constraints, newlength);
127 
128         /* initialize new element */
129         for( size_t i = oldlength; i < newlength; ++i){
130             trellis_constraint_t * constraint = &g_array_index
131                 (m_constraints, trellis_constraint_t, i);
132             constraint->m_type = NO_CONSTRAINT;
133         }
134 
135     }else if (newlength < oldlength ){
136         /* just shrink it */
137         g_array_set_size(m_constraints, newlength);
138     }
139 
140     GArray * keys = g_array_new(TRUE, TRUE, sizeof(ChewingKey));
141     PhraseItem item;
142     for (size_t i = 0; i < m_constraints->len; ++i){
143         trellis_constraint_t * constraint = &g_array_index
144             (m_constraints, trellis_constraint_t, i);
145 
146         /* handle one step constraint */
147         if ( constraint->m_type == CONSTRAINT_ONESTEP ){
148 
149             phrase_token_t token = constraint->m_token;
150             m_phrase_index->get_phrase_item(token, item);
151             guint32 end = constraint->m_constraint_step;
152 
153             /* clear too long constraint */
154             if (end >= m_constraints->len){
155                 clear_constraint(i);
156                 continue;
157             }
158 
159             gfloat pinyin_poss = compute_pronunciation_possibility
160                 (matrix, i, end, keys, item);
161             /* clear invalid pinyin */
162             if (pinyin_poss < FLT_EPSILON)
163                 clear_constraint(i);
164         }
165     }
166 
167     g_array_free(keys, TRUE);
168     return true;
169 }
170 
171 
diff_result(MatchResult best,MatchResult other)172 bool ForwardPhoneticConstraints::diff_result(MatchResult best,
173                                              MatchResult other){
174     bool changed = false;
175 
176     assert(best->len == other->len);
177 
178     for (size_t pos = 0; pos < other->len; ++pos) {
179         phrase_token_t other_token = g_array_index(other, phrase_token_t, pos);
180 
181         if (null_token == other_token)
182             continue;
183 
184         phrase_token_t best_token = g_array_index(best, phrase_token_t, pos);
185 
186         /* the same token */
187         if (best_token == other_token)
188             continue;
189 
190         changed = true;
191 
192         /* skip the tail node, as not searched in nbest algorithm. */
193         size_t next_pos = other->len - 1;
194         for (size_t i = pos + 1; i < other->len; ++i) {
195             phrase_token_t token = g_array_index(other, phrase_token_t, i);
196 
197             if (null_token != token) {
198                 next_pos = i;
199                 break;
200             }
201         }
202 
203         assert(add_constraint(pos, next_pos, other_token));
204     }
205 
206     return changed;
207 }
208 
209 };
210