1 /* Copyright (C) 1995-2011 Edward Der-Hua Liu, Hsin-Chu, Taiwan
2 *
3 * This library is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU Lesser General Public
5 * License as published by the Free Software Foundation version 2.1
6 * of the License.
7 *
8 * This library is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
12 *
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16 */
17
18 #include "hime.h"
19
20 #include "gst.h"
21 #include "gtab.h"
22 #include "pho.h"
23 #include "tsin.h"
24
qcmp_pre_sel_usecount(const void * aa,const void * bb)25 static int qcmp_pre_sel_usecount (const void *aa, const void *bb) {
26 PRE_SEL *a = (PRE_SEL *) aa;
27 PRE_SEL *b = (PRE_SEL *) bb;
28
29 return b->usecount - a->usecount;
30 }
31
qcmp_pre_sel_str(const void * aa,const void * bb)32 static int qcmp_pre_sel_str (const void *aa, const void *bb) {
33 PRE_SEL *a = (PRE_SEL *) aa;
34 PRE_SEL *b = (PRE_SEL *) bb;
35
36 int d = strcmp (a->str, b->str);
37 if (d)
38 return d;
39
40 return b->usecount - a->usecount;
41 }
42
43 void extract_gtab_key (int start, int len, void *out);
44 gboolean check_gtab_fixed_mismatch (int idx, char *mtch, int plen);
45 void mask_tone (phokey_t *pho, int plen, char *tone_off);
46 void init_pre_sel ();
47 void mask_key_typ_pho (phokey_t *key);
48 extern u_int64_t vmaskci;
49
scanphr_e(int chpho_idx,int plen,gboolean pho_incr,int * rselN)50 u_char scanphr_e (int chpho_idx, int plen, gboolean pho_incr, int *rselN) {
51 if (plen >= MAX_PHRASE_LEN)
52 goto empty;
53 if (chpho_idx < 0)
54 goto empty;
55
56 phokey_t tailpho;
57
58 if (pho_incr) {
59 if (ph_key_sz == 2) {
60 tailpho = pho2key (poo.typ_pho);
61 if (!tailpho)
62 pho_incr = FALSE;
63 } else {
64 if (!ggg.kval)
65 pho_incr = FALSE;
66 }
67 }
68
69 u_int64_t pp64[MAX_PHRASE_LEN + 1];
70 phokey_t *pp = (phokey_t *) pp64;
71
72 if (ph_key_sz == 2) {
73 extract_pho (chpho_idx, plen, pp);
74 } else {
75 extract_gtab_key (chpho_idx, plen, pp64);
76 }
77
78 #if 0
79 dbg("scanphr %d\n", plen);
80
81 int t;
82 for(t=0; t < plen; t++)
83 prph(pp[t]);
84 puts("");
85 #endif
86
87 char pinyin_set[MAX_PH_BF_EXT];
88 char *t_pinyin_set = NULL;
89 gboolean is_pin_juyin = ph_key_sz == 2 && pin_juyin;
90
91 if (is_pin_juyin) {
92 get_chpho_pinyin_set (pinyin_set);
93 t_pinyin_set = pinyin_set + chpho_idx;
94 mask_tone (pp, plen, t_pinyin_set);
95 }
96
97 int sti, edi;
98 if (!tsin_seek (pp, plen, &sti, &edi, t_pinyin_set)) {
99 empty:
100 if (rselN)
101 *rselN = 0;
102 return 0;
103 }
104
105 tss.pre_selN = 0;
106 int maxlen = 0;
107
108 #define selNMax 300
109 PRE_SEL sel[selNMax];
110 int selN = 0;
111
112 u_int64_t mtk64[MAX_PHRASE_LEN + 1];
113 phokey_t *mtk = (phokey_t *) mtk64;
114 u_int *mtk32 = (u_int *) mtk64;
115
116 while (sti < edi && selN < selNMax) {
117 u_char mtch[MAX_PHRASE_LEN * CH_SZ + 1];
118 char match_len;
119 usecount_t usecount;
120
121 load_tsin_entry (sti, &match_len, &usecount, mtk, mtch);
122
123 sti++;
124 if (plen > match_len || (pho_incr && plen == match_len)) {
125 continue;
126 }
127
128 mask_tone (mtk, plen, t_pinyin_set);
129
130 int i;
131 for (i = 0; i < plen; i++) {
132 if (mtk[i] != pp[i])
133 break;
134 }
135
136 if (i < plen)
137 continue;
138
139 if (pho_incr) {
140 if (ph_key_sz == 2) {
141 phokey_t last_m = mtk[plen];
142 mask_key_typ_pho (&last_m);
143 if (last_m != tailpho)
144 continue;
145 } else {
146 u_int64_t v = ph_key_sz == 4 ? mtk32[plen] : mtk64[plen];
147 if (ggg.kval != (v & vmaskci))
148 continue;
149 }
150 }
151
152 #if 0
153 dbg("nnn ");
154 nputs(mtch, match_len);
155 dbg("\n");
156 #endif
157
158 if (ph_key_sz == 2) {
159 if (check_fixed_mismatch (chpho_idx, (char *) mtch, plen))
160 continue;
161 } else {
162 if (check_gtab_fixed_mismatch (chpho_idx, (char *) mtch, plen))
163 continue;
164 }
165
166 if (maxlen < match_len)
167 maxlen = match_len;
168
169 sel[selN].len = match_len;
170 // sel[selN].phidx = sti - 1;
171 sel[selN].usecount = usecount;
172 utf8cpyN (sel[selN].str, (char *) mtch, match_len);
173 memcpy (sel[selN].phkey, mtk, match_len * ph_key_sz);
174 selN++;
175 }
176
177 // dbg("SelN:%d\n", selN);
178
179 if (selN > 1) {
180 qsort (sel, selN, sizeof (PRE_SEL), qcmp_pre_sel_str);
181 int nselN = 0;
182 int i;
183 for (i = 0; i < selN; i++)
184 if (sel[i].len > 1 && (!i || strcmp (sel[i].str, sel[i - 1].str)))
185 sel[nselN++] = sel[i];
186 selN = nselN;
187 }
188
189 if (selN == 1 && sel[0].len <= 2)
190 goto empty;
191
192 qsort (sel, selN, sizeof (PRE_SEL), qcmp_pre_sel_usecount);
193
194 // dbg("selN:%d\n", selN);
195 if (ph_key_sz == 2)
196 tss.pre_selN = Min (selN, phkbm.selkeyN);
197 else
198 tss.pre_selN = Min (selN, strlen (cur_inmd->selkey));
199
200 // dbg("tss.pre_selN %d\n", tss.pre_selN);
201 memcpy (tss.pre_sel, sel, sizeof (PRE_SEL) * tss.pre_selN);
202
203 if (rselN)
204 *rselN = selN;
205
206 return maxlen;
207 }
208
209 void hide_pre_sel ();
210 void chpho_get_str (int idx, int len, char *ch);
211 void disp_pre_sel_page ();
212
tsin_scan_pre_select(gboolean b_incr)213 void tsin_scan_pre_select (gboolean b_incr) {
214 if (!tsin_phrase_pre_select)
215 return;
216 // dbg("gtab_scan_pre_select %d\n", tss.c_len);
217
218 tss.pre_selN = 0;
219
220 hide_pre_sel ();
221
222 if (!tss.c_idx || !tss.c_len)
223 return;
224
225 init_pre_sel ();
226
227 int Maxlen = tss.c_len;
228 if (Maxlen > MAX_PHRASE_LEN)
229 Maxlen = MAX_PHRASE_LEN;
230
231 int len, selN, max_len = -1, max_selN = -1;
232 for (len = 1; len <= Maxlen; len++) {
233 int idx = tss.c_len - len;
234 if (tss.chpho[idx].flag & FLAG_CHPHO_PHRASE_TAIL) {
235 // dbg("phrase tail %d\n", idx);
236 break;
237 }
238 int mlen = scanphr_e (tss.c_len - len, len, b_incr, &selN);
239 // dbg("mlen %d len:%d\n", mlen, len);
240
241 if (mlen) {
242 max_len = len;
243 max_selN = selN;
244 }
245 }
246
247 // dbg("max_len:%d max_selN:%d\n", max_len, max_selN);
248
249 if (max_len < 0 || max_selN >= strlen (pho_selkey) * 2) {
250 tss.pre_selN = 0;
251 return;
252 }
253
254 scanphr_e (tss.c_len - max_len, max_len, b_incr, &selN);
255
256 // dbg("selN:%d %d\n", selN, tss.pre_selN);
257 if (selN == 1 && tss.pre_sel[0].len == max_len) {
258 char out[MAX_PHRASE_LEN * CH_SZ + 1];
259 chpho_get_str (tss.c_len - max_len, max_len, out);
260 if (!strcmp (out, tss.pre_sel[0].str))
261 return;
262 }
263
264 // dbg("selN %d %d\n",selN, tss.pre_selN);
265 tss.ph_sta = tss.c_len - max_len;
266 disp_pre_sel_page ();
267 }
268