1 /* Copyright (C) 1995-2011 Edward Der-Hua Liu, Hsin-Chu, Taiwan
2  *
3  * This library is free software; you can redistribute it and/or
4  * modify it under the terms of the GNU Lesser General Public
5  * License as published by the Free Software Foundation version 2.1
6  * of the License.
7  *
8  * This library is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * Lesser General Public License for more details.
12  *
13  * You should have received a copy of the GNU Lesser General Public
14  * License along with this library; if not, write to the Free Software
15  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
16  */
17 
18 #include "hime.h"
19 
20 #include "gst.h"
21 #include "gtab.h"
22 #include "pho.h"
23 #include "tsin.h"
24 
qcmp_pre_sel_usecount(const void * aa,const void * bb)25 static int qcmp_pre_sel_usecount (const void *aa, const void *bb) {
26     PRE_SEL *a = (PRE_SEL *) aa;
27     PRE_SEL *b = (PRE_SEL *) bb;
28 
29     return b->usecount - a->usecount;
30 }
31 
qcmp_pre_sel_str(const void * aa,const void * bb)32 static int qcmp_pre_sel_str (const void *aa, const void *bb) {
33     PRE_SEL *a = (PRE_SEL *) aa;
34     PRE_SEL *b = (PRE_SEL *) bb;
35 
36     int d = strcmp (a->str, b->str);
37     if (d)
38         return d;
39 
40     return b->usecount - a->usecount;
41 }
42 
43 void extract_gtab_key (int start, int len, void *out);
44 gboolean check_gtab_fixed_mismatch (int idx, char *mtch, int plen);
45 void mask_tone (phokey_t *pho, int plen, char *tone_off);
46 void init_pre_sel ();
47 void mask_key_typ_pho (phokey_t *key);
48 extern u_int64_t vmaskci;
49 
scanphr_e(int chpho_idx,int plen,gboolean pho_incr,int * rselN)50 u_char scanphr_e (int chpho_idx, int plen, gboolean pho_incr, int *rselN) {
51     if (plen >= MAX_PHRASE_LEN)
52         goto empty;
53     if (chpho_idx < 0)
54         goto empty;
55 
56     phokey_t tailpho;
57 
58     if (pho_incr) {
59         if (ph_key_sz == 2) {
60             tailpho = pho2key (poo.typ_pho);
61             if (!tailpho)
62                 pho_incr = FALSE;
63         } else {
64             if (!ggg.kval)
65                 pho_incr = FALSE;
66         }
67     }
68 
69     u_int64_t pp64[MAX_PHRASE_LEN + 1];
70     phokey_t *pp = (phokey_t *) pp64;
71 
72     if (ph_key_sz == 2) {
73         extract_pho (chpho_idx, plen, pp);
74     } else {
75         extract_gtab_key (chpho_idx, plen, pp64);
76     }
77 
78 #if 0
79   dbg("scanphr %d\n", plen);
80 
81   int t;
82   for(t=0; t < plen; t++)
83     prph(pp[t]);
84   puts("");
85 #endif
86 
87     char pinyin_set[MAX_PH_BF_EXT];
88     char *t_pinyin_set = NULL;
89     gboolean is_pin_juyin = ph_key_sz == 2 && pin_juyin;
90 
91     if (is_pin_juyin) {
92         get_chpho_pinyin_set (pinyin_set);
93         t_pinyin_set = pinyin_set + chpho_idx;
94         mask_tone (pp, plen, t_pinyin_set);
95     }
96 
97     int sti, edi;
98     if (!tsin_seek (pp, plen, &sti, &edi, t_pinyin_set)) {
99     empty:
100         if (rselN)
101             *rselN = 0;
102         return 0;
103     }
104 
105     tss.pre_selN = 0;
106     int maxlen = 0;
107 
108 #define selNMax 300
109     PRE_SEL sel[selNMax];
110     int selN = 0;
111 
112     u_int64_t mtk64[MAX_PHRASE_LEN + 1];
113     phokey_t *mtk = (phokey_t *) mtk64;
114     u_int *mtk32 = (u_int *) mtk64;
115 
116     while (sti < edi && selN < selNMax) {
117         u_char mtch[MAX_PHRASE_LEN * CH_SZ + 1];
118         char match_len;
119         usecount_t usecount;
120 
121         load_tsin_entry (sti, &match_len, &usecount, mtk, mtch);
122 
123         sti++;
124         if (plen > match_len || (pho_incr && plen == match_len)) {
125             continue;
126         }
127 
128         mask_tone (mtk, plen, t_pinyin_set);
129 
130         int i;
131         for (i = 0; i < plen; i++) {
132             if (mtk[i] != pp[i])
133                 break;
134         }
135 
136         if (i < plen)
137             continue;
138 
139         if (pho_incr) {
140             if (ph_key_sz == 2) {
141                 phokey_t last_m = mtk[plen];
142                 mask_key_typ_pho (&last_m);
143                 if (last_m != tailpho)
144                     continue;
145             } else {
146                 u_int64_t v = ph_key_sz == 4 ? mtk32[plen] : mtk64[plen];
147                 if (ggg.kval != (v & vmaskci))
148                     continue;
149             }
150         }
151 
152 #if 0
153     dbg("nnn ");
154     nputs(mtch, match_len);
155     dbg("\n");
156 #endif
157 
158         if (ph_key_sz == 2) {
159             if (check_fixed_mismatch (chpho_idx, (char *) mtch, plen))
160                 continue;
161         } else {
162             if (check_gtab_fixed_mismatch (chpho_idx, (char *) mtch, plen))
163                 continue;
164         }
165 
166         if (maxlen < match_len)
167             maxlen = match_len;
168 
169         sel[selN].len = match_len;
170         //    sel[selN].phidx = sti - 1;
171         sel[selN].usecount = usecount;
172         utf8cpyN (sel[selN].str, (char *) mtch, match_len);
173         memcpy (sel[selN].phkey, mtk, match_len * ph_key_sz);
174         selN++;
175     }
176 
177     //  dbg("SelN:%d\n", selN);
178 
179     if (selN > 1) {
180         qsort (sel, selN, sizeof (PRE_SEL), qcmp_pre_sel_str);
181         int nselN = 0;
182         int i;
183         for (i = 0; i < selN; i++)
184             if (sel[i].len > 1 && (!i || strcmp (sel[i].str, sel[i - 1].str)))
185                 sel[nselN++] = sel[i];
186         selN = nselN;
187     }
188 
189     if (selN == 1 && sel[0].len <= 2)
190         goto empty;
191 
192     qsort (sel, selN, sizeof (PRE_SEL), qcmp_pre_sel_usecount);
193 
194     //  dbg("selN:%d\n", selN);
195     if (ph_key_sz == 2)
196         tss.pre_selN = Min (selN, phkbm.selkeyN);
197     else
198         tss.pre_selN = Min (selN, strlen (cur_inmd->selkey));
199 
200     //  dbg("tss.pre_selN %d\n", tss.pre_selN);
201     memcpy (tss.pre_sel, sel, sizeof (PRE_SEL) * tss.pre_selN);
202 
203     if (rselN)
204         *rselN = selN;
205 
206     return maxlen;
207 }
208 
209 void hide_pre_sel ();
210 void chpho_get_str (int idx, int len, char *ch);
211 void disp_pre_sel_page ();
212 
tsin_scan_pre_select(gboolean b_incr)213 void tsin_scan_pre_select (gboolean b_incr) {
214     if (!tsin_phrase_pre_select)
215         return;
216     //  dbg("gtab_scan_pre_select %d\n", tss.c_len);
217 
218     tss.pre_selN = 0;
219 
220     hide_pre_sel ();
221 
222     if (!tss.c_idx || !tss.c_len)
223         return;
224 
225     init_pre_sel ();
226 
227     int Maxlen = tss.c_len;
228     if (Maxlen > MAX_PHRASE_LEN)
229         Maxlen = MAX_PHRASE_LEN;
230 
231     int len, selN, max_len = -1, max_selN = -1;
232     for (len = 1; len <= Maxlen; len++) {
233         int idx = tss.c_len - len;
234         if (tss.chpho[idx].flag & FLAG_CHPHO_PHRASE_TAIL) {
235             //      dbg("phrase tail %d\n", idx);
236             break;
237         }
238         int mlen = scanphr_e (tss.c_len - len, len, b_incr, &selN);
239         //	dbg("mlen %d len:%d\n", mlen, len);
240 
241         if (mlen) {
242             max_len = len;
243             max_selN = selN;
244         }
245     }
246 
247     //  dbg("max_len:%d  max_selN:%d\n", max_len, max_selN);
248 
249     if (max_len < 0 || max_selN >= strlen (pho_selkey) * 2) {
250         tss.pre_selN = 0;
251         return;
252     }
253 
254     scanphr_e (tss.c_len - max_len, max_len, b_incr, &selN);
255 
256     //  dbg("selN:%d %d\n", selN, tss.pre_selN);
257     if (selN == 1 && tss.pre_sel[0].len == max_len) {
258         char out[MAX_PHRASE_LEN * CH_SZ + 1];
259         chpho_get_str (tss.c_len - max_len, max_len, out);
260         if (!strcmp (out, tss.pre_sel[0].str))
261             return;
262     }
263 
264     //  dbg("selN %d %d\n",selN, tss.pre_selN);
265     tss.ph_sta = tss.c_len - max_len;
266     disp_pre_sel_page ();
267 }
268