1 /*
2  * Copyright (C) 2020 The HIME team, Taiwan
3  * Copyright (C) 1995-2011 Edward Der-Hua Liu, Hsin-Chu, Taiwan
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation version 2.1
8  * of the License.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public
16  * License along with this library; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18  */
19 
20 #include <stdarg.h>
21 #include <stdio.h>
22 #include <string.h>
23 
24 #include <sys/types.h>
25 #if FREEBSD
26 #include <sys/param.h>
27 #include <sys/stat.h>
28 #endif
29 
30 #include "hime.h"
31 
32 #include "gtab.h"
33 #include "hime-endian.h"
34 
35 FILE *fr, *fw;
36 int lineno;
37 char tt[1024];
38 
skip_space(char * s)39 static char *skip_space (char *s) {
40     while ((*s == ' ' || *s == '\t') && *s) {
41         s++;
42     }
43     return s;
44 }
45 
to_space(char * s)46 static char *to_space (char *s) {
47     while (*s != ' ' && *s != '\t' && *s) {
48         s++;
49     }
50     return s;
51 }
52 
del_newline_space(char * s)53 static void del_newline_space (char *s) {
54     if (!*s) {
55         return;
56     }
57 
58     size_t len = strlen (s);
59     char *t = s + len - 1;
60 
61     while (*t == '\n' || *t == ' ' || (*t == '\t' && t > s)) {
62         t--;
63     }
64 
65     *(t + 1) = 0;
66 }
67 
get_line(void)68 static void get_line (void) {
69     while (!feof (fr)) {
70         memset (tt, 0, sizeof (tt));
71         myfgets (tt, sizeof (tt), fr);
72 
73         lineno++;
74         size_t len = strlen (tt);
75 
76         if (tt[len - 1] == '\n') {
77             tt[len - 1] = 0;
78         }
79 
80         if (tt[0] == '#' || strlen (tt) < 3) {
81             continue;
82         }
83         break;
84     }
85 }
86 
cmd_arg(char ** cmd,char ** arg)87 static void cmd_arg (char **cmd, char **arg) {
88 
89     get_line ();
90     char *s = tt;
91 
92     if (!*s) {
93         *cmd = *arg = s;
94         return;
95     }
96 
97     s = skip_space (s);
98     char *t = to_space (s);
99     *cmd = s;
100     if (!(*t)) {
101         *arg = t;
102         return;
103     }
104 
105     *t = 0;
106     t++;
107 
108     t = skip_space (t);
109     del_newline_space (t);
110 
111     char *p = NULL;
112     if ((p = strchr (t, '\t'))) {
113         *p = 0;
114     }
115 
116     *arg = t;
117 }
118 
str_eq(const char * s,const char * t)119 static int str_eq (const char *s, const char *t) {
120     return (!strcmp (s, t));
121 }
122 
123 typedef struct {
124     u_int32_t key;
125     uint8_t ch[CH_SZ];
126     int oseq;
127 } ITEM2;
128 
129 typedef struct {
130     u_int64_t key;
131     u_int8_t ch[CH_SZ];
132     int oseq;
133 } ITEM2_64;
134 
135 #define MAX_K (500000)
136 
137 static ITEM2 itar[MAX_K];
138 static ITEM2_64 itar64[MAX_K];
139 
140 static ITEM itout[MAX_K];
141 static ITEM64 itout64[MAX_K];
142 
qcmp(const void * aa,const void * bb)143 static int qcmp (const void *aa, const void *bb) {
144     const ITEM2 *a = (ITEM2 *) aa;
145     const ITEM2 *b = (ITEM2 *) bb;
146 
147     if (a->key > b->key) {
148         return 1;
149     }
150     if (a->key < b->key) {
151         return -1;
152     }
153 
154     return a->oseq - b->oseq;
155 }
156 
qcmp_64(const void * aa,const void * bb)157 static int qcmp_64 (const void *aa, const void *bb) {
158     ITEM2_64 *a = (ITEM2_64 *) aa;
159     ITEM2_64 *b = (ITEM2_64 *) bb;
160 
161     if (a->key > b->key) {
162         return 1;
163     }
164     if (a->key < b->key) {
165         return -1;
166     }
167 
168     return a->oseq - b->oseq;
169 }
170 
171 #define mtolower(ch) ((ch) >= 'A' && (ch) <= 'Z' ? (ch) + 0x20 : (ch))
172 
173 static char kno[128];
174 
main(int argc,char ** argv)175 int main (int argc, char **argv) {
176 
177     printf ("-- hime-cin2gtab encoding UTF-8 --\n");
178     printf ("--- please use iconv -f big5 -t utf-8 if your file is in big5 encoding\n");
179 
180     char fname[64];
181     if (argc <= 1) {
182         printf ("Enter table file name [.cin] : ");
183         scanf ("%s", fname);
184     } else {
185         strncpy (fname, argv[1], sizeof (fname));
186     }
187 
188     if (!strcmp (fname, "-v") || !strcmp (fname, "--version")) {
189         p_err ("hime-cin2gtab for hime %s \n", HIME_VERSION);
190         exit (0);
191     }
192 
193     char *p = NULL;
194     if ((p = strstr (fname, ".cin"))) {
195         *p = 0;
196     }
197 
198     char fname_cin[64];
199     char fname_tab[64];
200     strncpy (fname_cin, fname, sizeof (fname_cin));
201     strncpy (fname_tab, fname, sizeof (fname_tab));
202     strncat (fname_cin, ".cin", 4);
203     strncat (fname_tab, ".gtab", 5);
204 
205     if ((fr = fopen (fname_cin, "rb")) == NULL) {
206         p_err ("Cannot open %s\n", fname_cin);
207     }
208 
209     skip_utf8_sigature (fr);
210 
211     struct TableHead th;
212     char keymap[128];
213     memset (&th, 0, sizeof (th));
214     memset (kno, 0, sizeof (kno));
215     memset (keymap, 0, sizeof (keymap));
216 
217     memset (itar, 0, sizeof (itar));
218     memset (itout, 0, sizeof (itout));
219     memset (itar64, 0, sizeof (itar64));
220     memset (itout64, 0, sizeof (itout64));
221 
222     char *cmd = NULL;
223     char *arg = NULL;
224     cmd_arg (&cmd, &arg);
225     if (str_eq (cmd, "%gen_inp")) {
226         dbg ("skip gen_inp\n");
227         cmd_arg (&cmd, &arg);
228     }
229 
230     if (!str_eq (cmd, "%ename") || !(*arg)) {
231         p_err ("%d:  %%ename english_name  expected", lineno);
232     }
233     arg[15] = 0;
234 
235     cmd_arg (&cmd, &arg);
236     if (!(str_eq (cmd, "%prompt") || str_eq (cmd, "%cname")) || !(*arg)) {
237         p_err ("%d:  %%prompt prompt_name  expected", lineno);
238     }
239     strncpy (th.cname, arg, MAX_CNAME);
240     dbg ("cname %s\n", th.cname);
241 
242     cmd_arg (&cmd, &arg);
243     if (!str_eq (cmd, "%selkey") || !(*arg)) {
244         p_err ("%d:  %%selkey select_key_list expected", lineno);
245     }
246 
247     if (strlen (arg) >= sizeof (th.selkey)) {
248         memcpy (th.selkey, arg, sizeof (th.selkey));
249         strcpy (th.selkey2, arg + sizeof (th.selkey));
250         dbg ("th.selkey2 %s\n", th.selkey2);
251     } else {
252         strcpy (th.selkey, arg);
253     }
254 
255     cmd_arg (&cmd, &arg);
256     if (!str_eq (cmd, "%dupsel") || !(*arg)) {
257         if (th.selkey[sizeof (th.selkey) - 1]) {
258             th.M_DUP_SEL = sizeof (th.selkey) + strlen (th.selkey2);
259         } else {
260             th.M_DUP_SEL = strlen (th.selkey);
261         }
262     } else {
263         th.M_DUP_SEL = atoi (arg);
264         cmd_arg (&cmd, &arg);
265     }
266 
267     for (;;) {
268         if (str_eq (cmd, "%endkey")) {
269             strcpy (th.endkey, arg);
270             cmd_arg (&cmd, &arg);
271         } else if (str_eq (cmd, "%space_style")) {
272             th.space_style = (GTAB_space_pressed_E) atoi (arg);
273             cmd_arg (&cmd, &arg);
274         } else if (str_eq (cmd, "%keep_key_case")) {
275             th.flag |= FLAG_KEEP_KEY_CASE;
276             cmd_arg (&cmd, &arg);
277         } else if (str_eq (cmd, "%symbol_kbm")) {
278             th.flag |= FLAG_GTAB_SYM_KBM;
279             cmd_arg (&cmd, &arg);
280         } else if (str_eq (cmd, "%phase_auto_skip_endkey")) {
281             th.flag |= FLAG_PHRASE_AUTO_SKIP_ENDKEY;
282             cmd_arg (&cmd, &arg);
283         } else if (str_eq (cmd, "%flag_auto_select_by_phrase")) {
284             dbg ("flag_auto_select_by_phrase\n");
285             th.flag |= FLAG_AUTO_SELECT_BY_PHRASE;
286             cmd_arg (&cmd, &arg);
287         } else if (str_eq (cmd, "%flag_disp_partial_match")) {
288             dbg ("flag_disp_partial_match\n");
289             th.flag |= FLAG_GTAB_DISP_PARTIAL_MATCH;
290             cmd_arg (&cmd, &arg);
291         } else if (str_eq (cmd, "%flag_disp_full_match")) {
292             dbg ("flag_disp_full_match\n");
293             th.flag |= FLAG_GTAB_DISP_FULL_MATCH;
294             cmd_arg (&cmd, &arg);
295         } else if (str_eq (cmd, "%flag_vertical_selection")) {
296             dbg ("flag_vertical_selection\n");
297             th.flag |= FLAG_GTAB_VERTICAL_SELECTION;
298             cmd_arg (&cmd, &arg);
299         } else if (str_eq (cmd, "%flag_press_full_auto_send")) {
300             dbg ("flag_press_full_auto_send\n");
301             th.flag |= FLAG_GTAB_PRESS_FULL_AUTO_SEND;
302             cmd_arg (&cmd, &arg);
303         } else if (str_eq (cmd, "%flag_unique_auto_send")) {
304             dbg ("flag_unique_auto_send\n");
305             th.flag |= FLAG_GTAB_UNIQUE_AUTO_SEND;
306             cmd_arg (&cmd, &arg);
307         } else {
308             break;
309         }
310     }
311 
312     if (!str_eq (cmd, "%keyname") || !str_eq (arg, "begin")) {
313         p_err ("%d:  %%keyname begin   expected, instead of %s %s", lineno, cmd, arg);
314     }
315 
316     int KeyNum = 0;
317     char kname[128][CH_SZ];
318     for (KeyNum = 0;;) {
319         char k = 0;
320 
321         cmd_arg (&cmd, &arg);
322         if (str_eq (cmd, "%keyname")) {
323             break;
324         }
325         if (BITON (th.flag, FLAG_KEEP_KEY_CASE)) {
326             k = cmd[0];
327         } else {
328             k = mtolower (cmd[0]);
329         }
330 
331         if (kno[(int) k]) {
332             p_err ("%d:  key %c is already used", lineno, k);
333         }
334 
335         kno[(int) k] = ++KeyNum;
336         keymap[KeyNum] = k;
337         bchcpy (&kname[KeyNum][0], arg);
338     }
339 
340     keymap[0] = kname[0][0] = kname[0][1] = ' ';
341     KeyNum++;
342     th.KeyS = KeyNum; /* include space */
343 
344     cmd_arg (&cmd, &arg);
345 
346     if (str_eq (cmd, "%quick") && str_eq (arg, "begin")) {
347         dbg (".. quick keys defined\n");
348         for (int quick_def = 0;; quick_def++) {
349 
350             cmd_arg (&cmd, &arg);
351             if (str_eq (cmd, "%quick")) {
352                 break;
353             }
354 
355             const char k = kno[mtolower (cmd[0])] - 1;
356 
357             int N = 0;
358             char *p = arg;
359 
360             if (strlen (cmd) == 1) {
361                 while (*p) {
362                     int len = u8cpy (th.qkeys.quick1[(int) k][N++], p);
363                     p += len;
364                 }
365             } else if (strlen (cmd) == 2) {
366                 const int k1 = kno[mtolower (cmd[1])] - 1;
367                 while (*p) {
368                     char tp[4];
369                     int len = u8cpy (tp, p);
370 
371                     if (utf8_eq (tp, "□"))
372                         tp[0] = 0;
373 
374                     u8cpy (th.qkeys.quick2[(int) k][(int) k1][N++], tp);
375                     p += len;
376                 }
377             } else {
378                 p_err ("%d:  %quick only 1&2 keys are allowed '%s'", lineno, cmd);
379             }
380         }
381     }
382 
383     const long pos = ftell (fr);
384     const int olineno = lineno;
385     gboolean key64 = FALSE;
386     int max_key_len = 0;
387 
388     while (!feof (fr)) {
389 
390         cmd_arg (&cmd, &arg);
391         if (!cmd[0] || !arg[0])
392             continue;
393 
394         if (!strcmp (cmd, "%chardef")) {
395             if (!strcmp (arg, "end")) {
396                 break;
397             } else {
398                 continue;
399             }
400         }
401 
402         int len = strlen (cmd);
403 
404         if (max_key_len < len) {
405             max_key_len = len;
406         }
407     }
408 
409     fseek (fr, pos, SEEK_SET);
410     lineno = olineno;
411 
412     INMD inmd, *cur_inmd = &inmd;
413 
414     cur_inmd->key64 = key64;
415     cur_inmd->tbl64 = itout64;
416     cur_inmd->tbl = itout;
417 
418     if (KeyNum < 64) {
419         cur_inmd->keybits = 6;
420     } else {
421         cur_inmd->keybits = 7;
422     }
423 
424     if (cur_inmd->keybits * max_key_len > 32) {
425         cur_inmd->key64 = key64 = TRUE;
426     }
427 
428     if (key64) {
429         dbg ("key64\n");
430     }
431 
432     printf ("KeyNum:%d keybits:%d\n", KeyNum, cur_inmd->keybits);
433 
434     th.keybits = cur_inmd->keybits;
435     cur_inmd->last_k_bitn = (((cur_inmd->key64 ? 64 : 32) / cur_inmd->keybits) - 1) * cur_inmd->keybits;
436 
437     puts ("char def");
438     int chno = 0;
439     int *phridx = NULL;
440     int phr_cou = 0;
441     char *phrbuf = NULL;
442     int prbf_cou = 0;
443     while (!feof (fr)) {
444 
445         cmd_arg (&cmd, &arg);
446         if (!cmd[0] || !arg[0])
447             continue;
448 
449         if (!strcmp (cmd, "%chardef")) {
450             if (!strcmp (arg, "end"))
451                 break;
452             else
453                 continue;
454         }
455 
456         int len = strlen (cmd);
457         if (len > th.MaxPress) {
458             th.MaxPress = len;
459         }
460 
461         if (len > 10)
462             p_err ("%d:  only <= 10 keys is allowed '%s'", lineno, cmd);
463 
464         u_int64_t kk = 0;
465         for (int i = 0; i < len; i++) {
466             int key = BITON (th.flag, FLAG_KEEP_KEY_CASE) ? cmd[i] : mtolower (cmd[i]);
467 
468             int k = kno[key];
469             if (!k) {
470                 p_err ("%d: key undefined in keyname '%c'\n", lineno, cmd[i]);
471             }
472 
473             kk |= (u_int64_t) k << (LAST_K_bitN - i * th.keybits);
474         }
475 
476         //    dbg("%s kk:%llx\n", cmd, kk);
477 
478         if (key64) {
479             memcpy (&itar64[chno].key, &kk, 8);
480             itar64[chno].oseq = chno;
481         } else {
482             uint32_t key32 = (uint32_t) kk;
483             memcpy (&itar[chno].key, &key32, 4);
484             itar[chno].oseq = chno;
485         }
486 
487         if ((len = strlen (arg)) <= CH_SZ && (arg[0] & 0x80)) {
488             char out[CH_SZ + 1];
489 
490             memset (out, 0, sizeof (out));
491             memcpy (out, arg, len);
492 
493             if (key64)
494                 bchcpy (itar64[chno].ch, out);
495             else
496                 bchcpy (itar[chno].ch, out);
497 
498         } else {
499             if (key64) {
500                 itar64[chno].ch[0] = phr_cou >> 16;
501                 itar64[chno].ch[1] = (phr_cou >> 8) & 0xff;
502                 itar64[chno].ch[2] = phr_cou & 0xff;
503             } else {
504                 itar[chno].ch[0] = phr_cou >> 16;
505                 itar[chno].ch[1] = (phr_cou >> 8) & 0xff;
506                 itar[chno].ch[2] = phr_cou & 0xff;
507             }
508 
509             if (len > MAX_CIN_PHR)
510                 p_err ("phrase too long: %s  max:%d bytes\n", arg, MAX_CIN_PHR);
511 
512             phridx = trealloc (phridx, int, phr_cou + 1);
513             phridx[phr_cou++] = prbf_cou;
514             phrbuf = (char *) realloc (phrbuf, prbf_cou + len + 1);
515             strcpy (&phrbuf[prbf_cou], arg);
516             //      printf("phrase:%d  len:%d'%s'\n", phr_cou, len, arg);
517             prbf_cou += len;
518         }
519 
520         chno++;
521     }
522     fclose (fr);
523 
524 #define _sort qsort
525 
526     printf ("MaxPress: %d\n", th.MaxPress);
527 
528     th.DefC = chno;
529     cur_inmd->DefChars = chno;
530 
531     if (key64)
532         _sort (itar64, chno, sizeof (ITEM2_64), qcmp_64);
533     else
534         _sort (itar, chno, sizeof (ITEM2), qcmp);
535 
536     if (key64) {
537         for (int i = 0; i < chno; i++) {
538             memcpy (&itout64[i], &itar64[i], sizeof (ITEM64));
539         }
540     } else {
541         for (int i = 0; i < chno; i++) {
542             memcpy (&itout[i], &itar[i], sizeof (ITEM));
543         }
544     }
545 
546     char def1[256];
547     gtab_idx1_t idx1[256];
548     memset (def1, 0, sizeof (def1));
549     memset (idx1, 0, sizeof (idx1));
550 
551     u_int64_t keymask = KEY_MASK;
552     for (int i = 0; i < chno; i++) {
553         u_int64_t key = CONVT2 (cur_inmd, i);
554         int kk = (int) ((key >> LAST_K_bitN) & keymask);
555 
556         if (!def1[kk]) {
557             idx1[kk] = (gtab_idx1_t) i;
558             def1[kk] = 1;
559         }
560     }
561 
562     idx1[KeyNum] = chno;
563     for (int i = KeyNum - 1; i > 0; i--) {
564         if (!def1[i]) {
565             idx1[i] = idx1[i + 1];
566         }
567     }
568 
569     if ((fw = fopen (fname_tab, "wb")) == NULL) {
570         p_err ("Cannot create: %s", fname_tab);
571         exit (1);
572     }
573 
574     printf ("Defined Characters:%d\n", chno);
575 
576 #if NEED_SWAP
577     swap_byte_4 (&th.version);
578     swap_byte_4 (&th.flag);
579     swap_byte_4 (&th.space_style);
580     swap_byte_4 (&th.KeyS);
581     swap_byte_4 (&th.MaxPress);
582     swap_byte_4 (&th.M_DUP_SEL);
583     swap_byte_4 (&th.DefC);
584     for (i = 0; i <= KeyNum; i++)
585         swap_byte_4 (&idx1[i]);
586 #endif
587     fwrite (&th, 1, sizeof (th), fw);
588     fwrite (keymap, 1, KeyNum, fw);
589     fwrite (kname, CH_SZ, KeyNum, fw);
590 
591     fwrite (idx1, sizeof (gtab_idx1_t), KeyNum + 1, fw);
592 
593     if (key64) {
594 #if NEED_SWAP
595         for (i = 0; i < chno; i++) {
596             swap_byte_8 (&itout64[i].key);
597         }
598 #endif
599         fwrite (itout64, sizeof (ITEM64), chno, fw);
600 #if 0
601     for(i=0; i < 100; i++)
602       dbg("%d] %c%c%c\n", i, itout64[i].ch[0], itout64[i].ch[1], itout64[i].ch[2]);
603 #endif
604     } else {
605 #if NEED_SWAP
606         for (i = 0; i < chno; i++) {
607             swap_byte_4 (&itout[i].key);
608         }
609 #endif
610         fwrite (itout, sizeof (ITEM), chno, fw);
611     }
612 
613     if (phr_cou) {
614         phridx[phr_cou++] = prbf_cou;
615         printf ("phrase count:%d\n", phr_cou);
616 
617         int ophr_cou = phr_cou;
618 #if NEED_SWAP
619         for (i = 0; i < phr_cou; i++)
620             swap_byte_4 (&phridx[i]);
621         swap_byte_4 (&phr_cou);
622 #endif
623         fwrite (&phr_cou, sizeof (int), 1, fw);
624         fwrite (phridx, sizeof (int), ophr_cou, fw);
625         fwrite (phrbuf, 1, prbf_cou, fw);
626     }
627 
628     fclose (fw);
629 
630 #if 0
631   char bzip2[128];
632   strcat(strcpy(bzip2, "bzip2 -f -k "), fname_tab);
633   system(bzip2);
634 #endif
635 
636     return 0;
637 }
638