1 /*
2 * Copyright (C) 2020 The HIME team, Taiwan
3 * Copyright (C) 1995-2011 Edward Der-Hua Liu, Hsin-Chu, Taiwan
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation version 2.1
8 * of the License.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19
20 #include <stdarg.h>
21 #include <stdio.h>
22 #include <string.h>
23
24 #include <sys/types.h>
25 #if FREEBSD
26 #include <sys/param.h>
27 #include <sys/stat.h>
28 #endif
29
30 #include "hime.h"
31
32 #include "gtab.h"
33 #include "hime-endian.h"
34
35 FILE *fr, *fw;
36 int lineno;
37 char tt[1024];
38
skip_space(char * s)39 static char *skip_space (char *s) {
40 while ((*s == ' ' || *s == '\t') && *s) {
41 s++;
42 }
43 return s;
44 }
45
to_space(char * s)46 static char *to_space (char *s) {
47 while (*s != ' ' && *s != '\t' && *s) {
48 s++;
49 }
50 return s;
51 }
52
del_newline_space(char * s)53 static void del_newline_space (char *s) {
54 if (!*s) {
55 return;
56 }
57
58 size_t len = strlen (s);
59 char *t = s + len - 1;
60
61 while (*t == '\n' || *t == ' ' || (*t == '\t' && t > s)) {
62 t--;
63 }
64
65 *(t + 1) = 0;
66 }
67
get_line(void)68 static void get_line (void) {
69 while (!feof (fr)) {
70 memset (tt, 0, sizeof (tt));
71 myfgets (tt, sizeof (tt), fr);
72
73 lineno++;
74 size_t len = strlen (tt);
75
76 if (tt[len - 1] == '\n') {
77 tt[len - 1] = 0;
78 }
79
80 if (tt[0] == '#' || strlen (tt) < 3) {
81 continue;
82 }
83 break;
84 }
85 }
86
cmd_arg(char ** cmd,char ** arg)87 static void cmd_arg (char **cmd, char **arg) {
88
89 get_line ();
90 char *s = tt;
91
92 if (!*s) {
93 *cmd = *arg = s;
94 return;
95 }
96
97 s = skip_space (s);
98 char *t = to_space (s);
99 *cmd = s;
100 if (!(*t)) {
101 *arg = t;
102 return;
103 }
104
105 *t = 0;
106 t++;
107
108 t = skip_space (t);
109 del_newline_space (t);
110
111 char *p = NULL;
112 if ((p = strchr (t, '\t'))) {
113 *p = 0;
114 }
115
116 *arg = t;
117 }
118
str_eq(const char * s,const char * t)119 static int str_eq (const char *s, const char *t) {
120 return (!strcmp (s, t));
121 }
122
123 typedef struct {
124 u_int32_t key;
125 uint8_t ch[CH_SZ];
126 int oseq;
127 } ITEM2;
128
129 typedef struct {
130 u_int64_t key;
131 u_int8_t ch[CH_SZ];
132 int oseq;
133 } ITEM2_64;
134
135 #define MAX_K (500000)
136
137 static ITEM2 itar[MAX_K];
138 static ITEM2_64 itar64[MAX_K];
139
140 static ITEM itout[MAX_K];
141 static ITEM64 itout64[MAX_K];
142
qcmp(const void * aa,const void * bb)143 static int qcmp (const void *aa, const void *bb) {
144 const ITEM2 *a = (ITEM2 *) aa;
145 const ITEM2 *b = (ITEM2 *) bb;
146
147 if (a->key > b->key) {
148 return 1;
149 }
150 if (a->key < b->key) {
151 return -1;
152 }
153
154 return a->oseq - b->oseq;
155 }
156
qcmp_64(const void * aa,const void * bb)157 static int qcmp_64 (const void *aa, const void *bb) {
158 ITEM2_64 *a = (ITEM2_64 *) aa;
159 ITEM2_64 *b = (ITEM2_64 *) bb;
160
161 if (a->key > b->key) {
162 return 1;
163 }
164 if (a->key < b->key) {
165 return -1;
166 }
167
168 return a->oseq - b->oseq;
169 }
170
171 #define mtolower(ch) ((ch) >= 'A' && (ch) <= 'Z' ? (ch) + 0x20 : (ch))
172
173 static char kno[128];
174
main(int argc,char ** argv)175 int main (int argc, char **argv) {
176
177 printf ("-- hime-cin2gtab encoding UTF-8 --\n");
178 printf ("--- please use iconv -f big5 -t utf-8 if your file is in big5 encoding\n");
179
180 char fname[64];
181 if (argc <= 1) {
182 printf ("Enter table file name [.cin] : ");
183 scanf ("%s", fname);
184 } else {
185 strncpy (fname, argv[1], sizeof (fname));
186 }
187
188 if (!strcmp (fname, "-v") || !strcmp (fname, "--version")) {
189 p_err ("hime-cin2gtab for hime %s \n", HIME_VERSION);
190 exit (0);
191 }
192
193 char *p = NULL;
194 if ((p = strstr (fname, ".cin"))) {
195 *p = 0;
196 }
197
198 char fname_cin[64];
199 char fname_tab[64];
200 strncpy (fname_cin, fname, sizeof (fname_cin));
201 strncpy (fname_tab, fname, sizeof (fname_tab));
202 strncat (fname_cin, ".cin", 4);
203 strncat (fname_tab, ".gtab", 5);
204
205 if ((fr = fopen (fname_cin, "rb")) == NULL) {
206 p_err ("Cannot open %s\n", fname_cin);
207 }
208
209 skip_utf8_sigature (fr);
210
211 struct TableHead th;
212 char keymap[128];
213 memset (&th, 0, sizeof (th));
214 memset (kno, 0, sizeof (kno));
215 memset (keymap, 0, sizeof (keymap));
216
217 memset (itar, 0, sizeof (itar));
218 memset (itout, 0, sizeof (itout));
219 memset (itar64, 0, sizeof (itar64));
220 memset (itout64, 0, sizeof (itout64));
221
222 char *cmd = NULL;
223 char *arg = NULL;
224 cmd_arg (&cmd, &arg);
225 if (str_eq (cmd, "%gen_inp")) {
226 dbg ("skip gen_inp\n");
227 cmd_arg (&cmd, &arg);
228 }
229
230 if (!str_eq (cmd, "%ename") || !(*arg)) {
231 p_err ("%d: %%ename english_name expected", lineno);
232 }
233 arg[15] = 0;
234
235 cmd_arg (&cmd, &arg);
236 if (!(str_eq (cmd, "%prompt") || str_eq (cmd, "%cname")) || !(*arg)) {
237 p_err ("%d: %%prompt prompt_name expected", lineno);
238 }
239 strncpy (th.cname, arg, MAX_CNAME);
240 dbg ("cname %s\n", th.cname);
241
242 cmd_arg (&cmd, &arg);
243 if (!str_eq (cmd, "%selkey") || !(*arg)) {
244 p_err ("%d: %%selkey select_key_list expected", lineno);
245 }
246
247 if (strlen (arg) >= sizeof (th.selkey)) {
248 memcpy (th.selkey, arg, sizeof (th.selkey));
249 strcpy (th.selkey2, arg + sizeof (th.selkey));
250 dbg ("th.selkey2 %s\n", th.selkey2);
251 } else {
252 strcpy (th.selkey, arg);
253 }
254
255 cmd_arg (&cmd, &arg);
256 if (!str_eq (cmd, "%dupsel") || !(*arg)) {
257 if (th.selkey[sizeof (th.selkey) - 1]) {
258 th.M_DUP_SEL = sizeof (th.selkey) + strlen (th.selkey2);
259 } else {
260 th.M_DUP_SEL = strlen (th.selkey);
261 }
262 } else {
263 th.M_DUP_SEL = atoi (arg);
264 cmd_arg (&cmd, &arg);
265 }
266
267 for (;;) {
268 if (str_eq (cmd, "%endkey")) {
269 strcpy (th.endkey, arg);
270 cmd_arg (&cmd, &arg);
271 } else if (str_eq (cmd, "%space_style")) {
272 th.space_style = (GTAB_space_pressed_E) atoi (arg);
273 cmd_arg (&cmd, &arg);
274 } else if (str_eq (cmd, "%keep_key_case")) {
275 th.flag |= FLAG_KEEP_KEY_CASE;
276 cmd_arg (&cmd, &arg);
277 } else if (str_eq (cmd, "%symbol_kbm")) {
278 th.flag |= FLAG_GTAB_SYM_KBM;
279 cmd_arg (&cmd, &arg);
280 } else if (str_eq (cmd, "%phase_auto_skip_endkey")) {
281 th.flag |= FLAG_PHRASE_AUTO_SKIP_ENDKEY;
282 cmd_arg (&cmd, &arg);
283 } else if (str_eq (cmd, "%flag_auto_select_by_phrase")) {
284 dbg ("flag_auto_select_by_phrase\n");
285 th.flag |= FLAG_AUTO_SELECT_BY_PHRASE;
286 cmd_arg (&cmd, &arg);
287 } else if (str_eq (cmd, "%flag_disp_partial_match")) {
288 dbg ("flag_disp_partial_match\n");
289 th.flag |= FLAG_GTAB_DISP_PARTIAL_MATCH;
290 cmd_arg (&cmd, &arg);
291 } else if (str_eq (cmd, "%flag_disp_full_match")) {
292 dbg ("flag_disp_full_match\n");
293 th.flag |= FLAG_GTAB_DISP_FULL_MATCH;
294 cmd_arg (&cmd, &arg);
295 } else if (str_eq (cmd, "%flag_vertical_selection")) {
296 dbg ("flag_vertical_selection\n");
297 th.flag |= FLAG_GTAB_VERTICAL_SELECTION;
298 cmd_arg (&cmd, &arg);
299 } else if (str_eq (cmd, "%flag_press_full_auto_send")) {
300 dbg ("flag_press_full_auto_send\n");
301 th.flag |= FLAG_GTAB_PRESS_FULL_AUTO_SEND;
302 cmd_arg (&cmd, &arg);
303 } else if (str_eq (cmd, "%flag_unique_auto_send")) {
304 dbg ("flag_unique_auto_send\n");
305 th.flag |= FLAG_GTAB_UNIQUE_AUTO_SEND;
306 cmd_arg (&cmd, &arg);
307 } else {
308 break;
309 }
310 }
311
312 if (!str_eq (cmd, "%keyname") || !str_eq (arg, "begin")) {
313 p_err ("%d: %%keyname begin expected, instead of %s %s", lineno, cmd, arg);
314 }
315
316 int KeyNum = 0;
317 char kname[128][CH_SZ];
318 for (KeyNum = 0;;) {
319 char k = 0;
320
321 cmd_arg (&cmd, &arg);
322 if (str_eq (cmd, "%keyname")) {
323 break;
324 }
325 if (BITON (th.flag, FLAG_KEEP_KEY_CASE)) {
326 k = cmd[0];
327 } else {
328 k = mtolower (cmd[0]);
329 }
330
331 if (kno[(int) k]) {
332 p_err ("%d: key %c is already used", lineno, k);
333 }
334
335 kno[(int) k] = ++KeyNum;
336 keymap[KeyNum] = k;
337 bchcpy (&kname[KeyNum][0], arg);
338 }
339
340 keymap[0] = kname[0][0] = kname[0][1] = ' ';
341 KeyNum++;
342 th.KeyS = KeyNum; /* include space */
343
344 cmd_arg (&cmd, &arg);
345
346 if (str_eq (cmd, "%quick") && str_eq (arg, "begin")) {
347 dbg (".. quick keys defined\n");
348 for (int quick_def = 0;; quick_def++) {
349
350 cmd_arg (&cmd, &arg);
351 if (str_eq (cmd, "%quick")) {
352 break;
353 }
354
355 const char k = kno[mtolower (cmd[0])] - 1;
356
357 int N = 0;
358 char *p = arg;
359
360 if (strlen (cmd) == 1) {
361 while (*p) {
362 int len = u8cpy (th.qkeys.quick1[(int) k][N++], p);
363 p += len;
364 }
365 } else if (strlen (cmd) == 2) {
366 const int k1 = kno[mtolower (cmd[1])] - 1;
367 while (*p) {
368 char tp[4];
369 int len = u8cpy (tp, p);
370
371 if (utf8_eq (tp, "□"))
372 tp[0] = 0;
373
374 u8cpy (th.qkeys.quick2[(int) k][(int) k1][N++], tp);
375 p += len;
376 }
377 } else {
378 p_err ("%d: %quick only 1&2 keys are allowed '%s'", lineno, cmd);
379 }
380 }
381 }
382
383 const long pos = ftell (fr);
384 const int olineno = lineno;
385 gboolean key64 = FALSE;
386 int max_key_len = 0;
387
388 while (!feof (fr)) {
389
390 cmd_arg (&cmd, &arg);
391 if (!cmd[0] || !arg[0])
392 continue;
393
394 if (!strcmp (cmd, "%chardef")) {
395 if (!strcmp (arg, "end")) {
396 break;
397 } else {
398 continue;
399 }
400 }
401
402 int len = strlen (cmd);
403
404 if (max_key_len < len) {
405 max_key_len = len;
406 }
407 }
408
409 fseek (fr, pos, SEEK_SET);
410 lineno = olineno;
411
412 INMD inmd, *cur_inmd = &inmd;
413
414 cur_inmd->key64 = key64;
415 cur_inmd->tbl64 = itout64;
416 cur_inmd->tbl = itout;
417
418 if (KeyNum < 64) {
419 cur_inmd->keybits = 6;
420 } else {
421 cur_inmd->keybits = 7;
422 }
423
424 if (cur_inmd->keybits * max_key_len > 32) {
425 cur_inmd->key64 = key64 = TRUE;
426 }
427
428 if (key64) {
429 dbg ("key64\n");
430 }
431
432 printf ("KeyNum:%d keybits:%d\n", KeyNum, cur_inmd->keybits);
433
434 th.keybits = cur_inmd->keybits;
435 cur_inmd->last_k_bitn = (((cur_inmd->key64 ? 64 : 32) / cur_inmd->keybits) - 1) * cur_inmd->keybits;
436
437 puts ("char def");
438 int chno = 0;
439 int *phridx = NULL;
440 int phr_cou = 0;
441 char *phrbuf = NULL;
442 int prbf_cou = 0;
443 while (!feof (fr)) {
444
445 cmd_arg (&cmd, &arg);
446 if (!cmd[0] || !arg[0])
447 continue;
448
449 if (!strcmp (cmd, "%chardef")) {
450 if (!strcmp (arg, "end"))
451 break;
452 else
453 continue;
454 }
455
456 int len = strlen (cmd);
457 if (len > th.MaxPress) {
458 th.MaxPress = len;
459 }
460
461 if (len > 10)
462 p_err ("%d: only <= 10 keys is allowed '%s'", lineno, cmd);
463
464 u_int64_t kk = 0;
465 for (int i = 0; i < len; i++) {
466 int key = BITON (th.flag, FLAG_KEEP_KEY_CASE) ? cmd[i] : mtolower (cmd[i]);
467
468 int k = kno[key];
469 if (!k) {
470 p_err ("%d: key undefined in keyname '%c'\n", lineno, cmd[i]);
471 }
472
473 kk |= (u_int64_t) k << (LAST_K_bitN - i * th.keybits);
474 }
475
476 // dbg("%s kk:%llx\n", cmd, kk);
477
478 if (key64) {
479 memcpy (&itar64[chno].key, &kk, 8);
480 itar64[chno].oseq = chno;
481 } else {
482 uint32_t key32 = (uint32_t) kk;
483 memcpy (&itar[chno].key, &key32, 4);
484 itar[chno].oseq = chno;
485 }
486
487 if ((len = strlen (arg)) <= CH_SZ && (arg[0] & 0x80)) {
488 char out[CH_SZ + 1];
489
490 memset (out, 0, sizeof (out));
491 memcpy (out, arg, len);
492
493 if (key64)
494 bchcpy (itar64[chno].ch, out);
495 else
496 bchcpy (itar[chno].ch, out);
497
498 } else {
499 if (key64) {
500 itar64[chno].ch[0] = phr_cou >> 16;
501 itar64[chno].ch[1] = (phr_cou >> 8) & 0xff;
502 itar64[chno].ch[2] = phr_cou & 0xff;
503 } else {
504 itar[chno].ch[0] = phr_cou >> 16;
505 itar[chno].ch[1] = (phr_cou >> 8) & 0xff;
506 itar[chno].ch[2] = phr_cou & 0xff;
507 }
508
509 if (len > MAX_CIN_PHR)
510 p_err ("phrase too long: %s max:%d bytes\n", arg, MAX_CIN_PHR);
511
512 phridx = trealloc (phridx, int, phr_cou + 1);
513 phridx[phr_cou++] = prbf_cou;
514 phrbuf = (char *) realloc (phrbuf, prbf_cou + len + 1);
515 strcpy (&phrbuf[prbf_cou], arg);
516 // printf("phrase:%d len:%d'%s'\n", phr_cou, len, arg);
517 prbf_cou += len;
518 }
519
520 chno++;
521 }
522 fclose (fr);
523
524 #define _sort qsort
525
526 printf ("MaxPress: %d\n", th.MaxPress);
527
528 th.DefC = chno;
529 cur_inmd->DefChars = chno;
530
531 if (key64)
532 _sort (itar64, chno, sizeof (ITEM2_64), qcmp_64);
533 else
534 _sort (itar, chno, sizeof (ITEM2), qcmp);
535
536 if (key64) {
537 for (int i = 0; i < chno; i++) {
538 memcpy (&itout64[i], &itar64[i], sizeof (ITEM64));
539 }
540 } else {
541 for (int i = 0; i < chno; i++) {
542 memcpy (&itout[i], &itar[i], sizeof (ITEM));
543 }
544 }
545
546 char def1[256];
547 gtab_idx1_t idx1[256];
548 memset (def1, 0, sizeof (def1));
549 memset (idx1, 0, sizeof (idx1));
550
551 u_int64_t keymask = KEY_MASK;
552 for (int i = 0; i < chno; i++) {
553 u_int64_t key = CONVT2 (cur_inmd, i);
554 int kk = (int) ((key >> LAST_K_bitN) & keymask);
555
556 if (!def1[kk]) {
557 idx1[kk] = (gtab_idx1_t) i;
558 def1[kk] = 1;
559 }
560 }
561
562 idx1[KeyNum] = chno;
563 for (int i = KeyNum - 1; i > 0; i--) {
564 if (!def1[i]) {
565 idx1[i] = idx1[i + 1];
566 }
567 }
568
569 if ((fw = fopen (fname_tab, "wb")) == NULL) {
570 p_err ("Cannot create: %s", fname_tab);
571 exit (1);
572 }
573
574 printf ("Defined Characters:%d\n", chno);
575
576 #if NEED_SWAP
577 swap_byte_4 (&th.version);
578 swap_byte_4 (&th.flag);
579 swap_byte_4 (&th.space_style);
580 swap_byte_4 (&th.KeyS);
581 swap_byte_4 (&th.MaxPress);
582 swap_byte_4 (&th.M_DUP_SEL);
583 swap_byte_4 (&th.DefC);
584 for (i = 0; i <= KeyNum; i++)
585 swap_byte_4 (&idx1[i]);
586 #endif
587 fwrite (&th, 1, sizeof (th), fw);
588 fwrite (keymap, 1, KeyNum, fw);
589 fwrite (kname, CH_SZ, KeyNum, fw);
590
591 fwrite (idx1, sizeof (gtab_idx1_t), KeyNum + 1, fw);
592
593 if (key64) {
594 #if NEED_SWAP
595 for (i = 0; i < chno; i++) {
596 swap_byte_8 (&itout64[i].key);
597 }
598 #endif
599 fwrite (itout64, sizeof (ITEM64), chno, fw);
600 #if 0
601 for(i=0; i < 100; i++)
602 dbg("%d] %c%c%c\n", i, itout64[i].ch[0], itout64[i].ch[1], itout64[i].ch[2]);
603 #endif
604 } else {
605 #if NEED_SWAP
606 for (i = 0; i < chno; i++) {
607 swap_byte_4 (&itout[i].key);
608 }
609 #endif
610 fwrite (itout, sizeof (ITEM), chno, fw);
611 }
612
613 if (phr_cou) {
614 phridx[phr_cou++] = prbf_cou;
615 printf ("phrase count:%d\n", phr_cou);
616
617 int ophr_cou = phr_cou;
618 #if NEED_SWAP
619 for (i = 0; i < phr_cou; i++)
620 swap_byte_4 (&phridx[i]);
621 swap_byte_4 (&phr_cou);
622 #endif
623 fwrite (&phr_cou, sizeof (int), 1, fw);
624 fwrite (phridx, sizeof (int), ophr_cou, fw);
625 fwrite (phrbuf, 1, prbf_cou, fw);
626 }
627
628 fclose (fw);
629
630 #if 0
631 char bzip2[128];
632 strcat(strcpy(bzip2, "bzip2 -f -k "), fname_tab);
633 system(bzip2);
634 #endif
635
636 return 0;
637 }
638