1 #include "gcin.h"
2 #include "pho.h"
3 #include "gtab.h"
4 // #include "gtab-phrase-db.h"
5 #include "tsin.h"
6 #include "lang.h"
7
8 #define MAX_K (500000)
9
10 ITEM it[MAX_K];
11 ITEM64 it64[MAX_K];
12 gboolean key64;
13 extern gboolean is_chs;
14 int itN;
15 struct TableHead th;
16 int kmask;
17
18 #define TRIM_N 5
19
20 #if TRIM_N
gtab_klen(u_int64_t k)21 int gtab_klen(u_int64_t k) {
22 int klen=0;
23 for(int i=0;i<th.MaxPress;i++) {
24 if (k & kmask)
25 klen++;
26 k>>=th.keybits;
27 }
28 return klen;
29 }
30
qcmp_klen(unsigned char * a,unsigned char * b)31 int qcmp_klen(unsigned char *a, unsigned char *b)
32 {
33 unsigned int ka=0, kb=0;
34 memcpy(&ka, a, sizeof(ka)); memcpy(&kb, b, sizeof(kb));
35 return (gtab_klen(ka) - gtab_klen(kb));
36 }
37
qcmp_klen64(const void * aa,const void * bb)38 int qcmp_klen64(const void *aa, const void *bb)
39 {
40 ITEM64 *a = (ITEM64 *)aa, *b=(ITEM64 *)bb;
41 u_int64_t ka, kb;
42 memcpy(&ka, a->key, sizeof(ka)); memcpy(&kb, b->key, sizeof(kb));
43 return (gtab_klen(ka) - gtab_klen(kb));
44 }
45 #endif
46
47
qcmp_ch_(const void * aa,const void * bb)48 int qcmp_ch_(const void *aa, const void *bb)
49 {
50 return memcmp(((ITEM *)aa)->ch, ((ITEM *)bb)->ch, CH_SZ);
51 }
52
qcmp_ch64_(const void * aa,const void * bb)53 int qcmp_ch64_(const void *aa, const void *bb)
54 {
55 return memcmp(((ITEM64 *)aa)->ch, ((ITEM64 *)bb)->ch, CH_SZ);
56 }
57
qcmp_ch(const void * aa,const void * bb)58 int qcmp_ch(const void *aa, const void *bb)
59 {
60 int d = memcmp(((ITEM *)aa)->ch, ((ITEM *)bb)->ch, CH_SZ);
61 #if TRIM_N
62 if (d)
63 return d;
64 return qcmp_klen(((ITEM *)aa)->key, ((ITEM *)bb)->key);
65 #else
66 return d;
67 #endif
68 }
69
qcmp_ch64(const void * aa,const void * bb)70 int qcmp_ch64(const void *aa, const void *bb)
71 {
72 int d = memcmp(((ITEM64 *)aa)->ch, ((ITEM64 *)bb)->ch, CH_SZ);
73 #if TRIM_N && 1
74 if (d)
75 return d;
76 return qcmp_klen64(aa, bb);
77 #else
78 return d;
79 #endif
80 }
81
82
find_ch(char * s,int * N)83 ITEM *find_ch(char *s, int *N)
84 {
85 ITEM t;
86
87 bzero(t.ch, CH_SZ);
88 u8cpy((char *)t.ch, s);
89
90 ITEM *p = (ITEM *)bsearch(&t, it, itN, sizeof(ITEM), qcmp_ch_);
91 if (!p)
92 return NULL;
93
94 ITEM *q = p+1;
95
96 while (p > it && !qcmp_ch_(p-1, &t))
97 p--;
98
99 ITEM *end = it + itN;
100 while (q < end && !qcmp_ch_(q, &t))
101 q++;
102
103 *N = q - p;
104 if (*N > 20)
105 p_err("err");
106
107 #if TRIM_N
108 if (*N > TRIM_N)
109 *N = TRIM_N;
110 #endif
111
112 return p;
113 }
114
find_ch64(char * s,int * N)115 ITEM64 *find_ch64(char *s, int *N)
116 {
117 ITEM64 t;
118
119 bzero(t.ch, CH_SZ);
120 u8cpy((char *)t.ch, s);
121
122 ITEM64 *p = (ITEM64 *)bsearch(&t, it64, itN, sizeof(ITEM64), qcmp_ch64_);
123 if (!p)
124 return NULL;
125
126 ITEM64 *q = p+1;
127
128 while (p > it64 && !qcmp_ch64_(p-1, &t))
129 p--;
130
131 ITEM64 *end = it64 + itN;
132 while (q < end && !qcmp_ch64_(q, &t))
133 q++;
134
135 *N = q - p;
136 if (*N > 20)
137 p_err("err");
138
139 #if TRIM_N
140 if (*N > TRIM_N)
141 *N = TRIM_N;
142 #endif
143
144 return p;
145 }
146
147 typedef struct {
148 ITEM *arr;
149 int N;
150 } KKARR;
151
152 typedef struct {
153 ITEM64 *arr;
154 int N;
155 } KKARR64;
156
157
158 void get_keymap_str(u_int64_t k, char *keymap, int keybits, char tkey[]);
159
160 #if WIN32
161 void init_gcin_program_files();
162 #pragma comment(linker, "/subsystem:\"windows\" /entry:\"mainCRTStartup\"")
163 #endif
164
main(int argc,char ** argv)165 int main(int argc, char **argv)
166 {
167 gtk_init(&argc, &argv);
168 set_is_chs();
169
170 #if 1
171 if (argc != 3)
172 p_err("%s a_file.gtab outfile", argv[0]);
173 #endif
174 #if 1
175 char *infile = argv[1];
176 char *outfile = argv[2];
177 #else
178 char *infile = "data/ar30.gtab";
179 char *outfile = "l";
180 #endif
181
182 FILE *fr;
183 if ((fr=fopen(infile, "rb"))==NULL)
184 p_err("cannot err open %s", infile);
185
186 FILE *fp_out;
187 if ((fp_out=fopen(outfile,"w"))==NULL) {
188 printf("Cannot open %s", outfile);
189 exit(-1);
190 }
191
192 fread(&th,1, sizeof(th), fr);
193 #if NEED_SWAP
194 swap_byte_4(&th.version);
195 swap_byte_4(&th.flag);
196 swap_byte_4(&th.space_style);
197 swap_byte_4(&th.KeyS);
198 swap_byte_4(&th.MaxPress);
199 swap_byte_4(&th.M_DUP_SEL);
200 swap_byte_4(&th.DefC);
201 for(i=0; i <= KeyNum; i++)
202 swap_byte_4(&idx1[i]);
203 #endif
204 int KeyNum = th.KeyS;
205 kmask = (1 << th.keybits) - 1;
206 dbg("keys %d kmask:%x\n",KeyNum, kmask);
207 dbg("th.DefC %d\n", th.DefC);
208
209 if (!th.keybits)
210 th.keybits = 6;
211 dbg("keybits:%d maxPress:%d\n", th.keybits, th.MaxPress);
212
213 int max_keyN;
214 if (th.MaxPress*th.keybits > 32) {
215 max_keyN = 64 / th.keybits;
216 key64 = TRUE;
217 dbg("it's a 64-bit .gtab\n");
218 } else {
219 max_keyN = 32 / th.keybits;
220 key64 = FALSE;
221 }
222
223 dbg("key64:%d\n", key64);
224
225 char kname[128][CH_SZ];
226 char keymap[128];
227 gtab_idx1_t idx1[256];
228 static char kno[128];
229
230 itN = th.DefC;
231
232 bzero(keymap, sizeof(keymap));
233 fread(keymap, 1, th.KeyS, fr);
234 fread(kname, CH_SZ, th.KeyS, fr);
235 fread(idx1, sizeof(gtab_idx1_t), KeyNum+1, fr);
236
237 int i;
238 for(i=0; i < th.KeyS; i++) {
239 kno[keymap[i]] = i;
240 }
241
242 fprintf(fp_out,TSIN_GTAB_KEY" %d %d %s\n", th.keybits, th.MaxPress, keymap+1);
243
244 if (key64) {
245 fread(it64, sizeof(ITEM64), th.DefC, fr);
246 qsort(it64, th.DefC, sizeof(ITEM64), qcmp_ch64);
247 }
248 else {
249 fread(it, sizeof(ITEM), th.DefC, fr);
250 qsort(it, th.DefC, sizeof(ITEM), qcmp_ch);
251 }
252
253 fclose(fr);
254
255 itN = th.DefC;
256
257 // dbg("itN:%d\n", itN);
258 #if 0
259 for(i=0; i < itN; i++) {
260 printf("\n%d ", i);
261 utf8_putchar(it64[i].ch);
262 }
263 #endif
264
265 char fname[128];
266 get_gcin_user_fname(tsin32_f, fname);
267
268 FILE *fp;
269 if ((fp=fopen(fname,"rb"))==NULL) {
270 printf("Cannot open %s", fname);
271 exit(-1);
272 }
273
274 while (!feof(fp)) {
275 int i;
276 phokey_t phbuf[MAX_PHRASE_LEN];
277 u_char clen;
278 usecount_t usecount;
279
280 fread(&clen,1,1,fp);
281 fread(&usecount, sizeof(usecount_t), 1,fp);
282 fread(phbuf,sizeof(phokey_t), clen, fp);
283
284 char str[MAX_PHRASE_LEN * CH_SZ + 1];
285 int strN = 0;
286 KKARR kk[MAX_PHRASE_LEN];
287 KKARR64 kk64[MAX_PHRASE_LEN];
288 gboolean has_err = FALSE;
289
290 if (key64)
291 bzero(kk64, sizeof(kk64));
292 else
293 bzero(kk, sizeof(kk));
294
295 // dbg("clen %d\n", clen);
296 for(i=0;i<clen;i++) {
297 char ch[CH_SZ];
298
299 int n = fread(ch, 1, 1, fp);
300 if (n<=0)
301 goto stop;
302
303 int len=utf8_sz(ch);
304
305 fread(&ch[1], 1, len-1, fp);
306 // utf8_putchar(ch);
307
308 if (key64) {
309 if (!(kk64[i].arr = find_ch64(ch, &kk64[i].N)))
310 has_err = TRUE;
311
312 #define M_CUT 2
313
314 #if TRIM_N
315 if (i > M_CUT)
316 kk64[i].N=1;
317 #endif
318 } else {
319 if (!(kk[i].arr = find_ch(ch, &kk[i].N)))
320 has_err = TRUE;
321 #if TRIM_N
322 if (i > M_CUT)
323 kk[i].N=1;
324 #endif
325 }
326
327 memcpy(str+strN, ch, len);
328 strN+=len;
329 }
330
331 if (has_err) {
332 // dbg("has_error\n");
333 continue;
334 }
335 #if 0
336 for(i=0; i < clen; i++)
337 printf("%d ", kk64[i].N);
338 printf("\n");
339 #endif
340 str[strN]=0;
341
342 int permN;
343 if (key64) {
344 permN=kk64[0].N;
345 for(i=1;i<clen;i++)
346 permN *= kk64[i].N;
347 }
348 else {
349 permN=kk[0].N;
350 for(i=1;i<clen;i++)
351 permN *= kk[i].N;
352 }
353
354 int z;
355 for(z=0; z < permN; z++) {
356 char vz[MAX_PHRASE_LEN];
357
358 int tz = z;
359
360 if (key64) {
361 for(i=0; i < clen; i++) {
362 vz[i] = tz % kk64[i].N;
363 tz /= kk64[i].N;
364 }
365 } else {
366 for(i=0; i < clen; i++) {
367 vz[i] = tz % kk[i].N;
368 tz /= kk[i].N;
369 }
370 }
371
372 char kstr[512];
373 kstr[0]=0;
374
375 for(i=0;i<clen;i++) {
376 char tkey[512];
377 u_int64_t k=0;
378
379 if (key64) {
380 memcpy(&k, kk64[i].arr[vz[i]].key, 8);
381 } else {
382 u_int t;
383 memcpy(&t, kk[i].arr[vz[i]].key, 4);
384 k = t;
385 }
386
387 get_keymap_str(k, keymap, th.keybits, tkey);
388
389 strcat(kstr, tkey);
390 strcat(kstr, " ");
391 }
392
393 fprintf(fp_out,"%s %s%d\n", str, kstr, usecount);
394 }
395 }
396 stop:
397
398 fclose(fp);
399 fclose(fp_out);
400
401 dbg("finish\n");
402 return 0;
403 }
404