1 /* Copyright (C) 2011 Edward Der-Hua Liu, Hsin-Chu, Taiwan
2  *
3  * This library is free software; you can redistribute it and/or
4  * modify it under the terms of the GNU Lesser General Public
5  * License as published by the Free Software Foundation version 2.1
6  * of the License.
7  *
8  * This library is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * Lesser General Public License for more details.
12  *
13  * You should have received a copy of the GNU Lesser General Public
14  * License along with this library; if not, write to the Free Software
15  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
16  */
17 
18 #include "hime.h"
19 
20 #include "pho.h"
21 
22 #define MAX_CHS (100000)
23 
24 typedef struct {
25     u_short key;
26     u_char ch[CH_SZ];
27     short count;
28     int oseq;
29 } PHITEM;
30 
31 PHITEM items[MAX_CHS];
32 int itemsN;
33 
34 PHO_ITEM pho_items[MAX_CHS];
35 int pho_itemsN = 0;
36 
qcmp_key(const void * aa,const void * bb)37 int qcmp_key (const void *aa, const void *bb) {
38     PHITEM *a = (PHITEM *) aa;
39     PHITEM *b = (PHITEM *) bb;
40 
41     int d;
42     if ((d = a->key - b->key))
43         return a->key - b->key;
44 
45     if ((d = b->count - a->count))
46         return d;
47 
48     return a->oseq - b->oseq;
49 }
50 
qcmp_key_del(const void * aa,const void * bb)51 int qcmp_key_del (const void *aa, const void *bb) {
52     PHITEM *a = (PHITEM *) aa;
53     PHITEM *b = (PHITEM *) bb;
54 
55     int d;
56     if ((d = a->key - b->key))
57         return a->key - b->key;
58 
59     return memcmp (a->ch, b->ch, CH_SZ);
60 }
61 
main(int argc,char ** argv)62 int main (int argc, char **argv) {
63     char *fname = "pho.tab2.src";
64     FILE *fp;
65     char s[64];
66     int phrase_area_N = 0;
67     char *phrase_area = NULL;
68 
69     if (argc > 1)
70         fname = argv[1];
71 
72     if ((fp = fopen (fname, "rb")) == NULL)
73         p_err ("cannot open %s\n", fname);
74 
75     while (!feof (fp)) {
76         s[0] = 0;
77         myfgets (s, sizeof (s), fp);
78         int len = strlen (s);
79 
80         if (s[len - 1] == '\n')
81             s[--len] = 0;
82 
83         if (len == 0)
84             continue;
85 
86         phokey_t kk = 0;
87         char *p = s;
88 
89         while (*p && *p != ' ' && *p != 9) {
90             if (kk == (BACK_QUOTE_NO << 9))
91                 kk |= *p;
92             else
93                 kk |= lookup ((u_char *) p);
94 
95             p += utf8_sz (p);
96         }
97 
98         items[itemsN].key = kk;
99 
100         p++;
101 
102         char *str = p;
103         while (*p && *p != ' ' && *p != 9)
104             p++;
105 
106         *p = 0;
107         p++;
108 
109         int slen = strlen (str);
110         if (slen == utf8_sz (str)) {
111             u8cpy ((char *) items[itemsN].ch, str);
112         } else {
113             dbg ("str %s\n", str);
114             int newN = phrase_area_N + slen + 1;
115             phrase_area = trealloc (phrase_area, char, newN);
116             strcpy (phrase_area + phrase_area_N, str);
117             items[itemsN].ch[0] = PHO_PHRASE_ESCAPE;
118             items[itemsN].ch[1] = phrase_area_N & 0xff;
119             items[itemsN].ch[2] = (phrase_area_N >> 8) & 0xff;
120             items[itemsN].ch[3] = (phrase_area_N >> 16) & 0xff;
121             phrase_area_N = newN;
122         }
123 
124         items[itemsN].count = atoi (p);
125         items[itemsN].oseq = itemsN;
126 
127         itemsN++;
128     }
129 
130     fclose (fp);
131 
132     qsort (items, itemsN, sizeof (PHITEM), qcmp_key_del);
133     int i;
134 
135 #if 1
136     int newN = 1;
137     for (i = 1; i < itemsN; i++)
138         if (qcmp_key_del (&items[i - 1], &items[i]))
139             items[newN++] = items[i];
140         else {
141 #if 0
142       prph(items[i].key);
143       utf8_putchar((char *)items[i].ch);
144       dbg("\n");
145 #endif
146         }
147 
148     if (itemsN != newN) {
149         dbg ("deleted %d %d\n", itemsN, newN);
150         itemsN = newN;
151     }
152 #endif
153 
154     qsort (items, itemsN, sizeof (PHITEM), qcmp_key);
155 
156     PHO_IDX pho_idx[3000];
157     u_short pho_idxN = 0;
158 
159     for (i = 0; i < itemsN;) {
160         phokey_t key = items[i].key;
161         pho_idx[pho_idxN].key = key;
162         pho_idx[pho_idxN].start = i;
163         pho_idxN++;
164 
165         int j;
166 
167         for (j = i + 1; j < itemsN && items[j].key == key; j++)
168             ;
169 
170         int l;
171         for (l = i; l < j; l++) {
172             bchcpy (pho_items[pho_itemsN].ch, items[l].ch);
173             pho_items[pho_itemsN].count = items[l].count;
174             pho_itemsN++;
175         }
176 
177         i = j;
178     }
179 
180     char *tp = strstr (fname, ".tab2.src");
181     if (!tp)
182         p_err ("file name should be *.tab2.src");
183 
184     tp = strstr (fname, ".src");
185     *tp = 0;
186 
187     char *fname_out = fname;
188 
189     if ((fp = fopen (fname_out, "wb")) == NULL)
190         p_err ("cannot create %s\n", fname_out);
191 
192     fwrite ("PH", 1, 2, fp);
193     //  dbg("pho_itemsN:%d  pho_idxN:%d\n", pho_itemsN, pho_idxN);
194     fwrite (&pho_idxN, sizeof (u_short), 1, fp);
195     fwrite (&pho_itemsN, sizeof (pho_itemsN), 1, fp);
196     fwrite (&phrase_area_N, sizeof (phrase_area_N), 1, fp);
197 #if 0
198   fclose(fp); exit(0);
199 #endif
200     fwrite (pho_idx, sizeof (PHO_IDX), pho_idxN, fp);
201     fwrite (pho_items, sizeof (PHO_ITEM), pho_itemsN, fp);
202 
203     fwrite (phrase_area, 1, phrase_area_N, fp);
204 
205     fclose (fp);
206 
207     if (getenv ("HIME_NO_RELOAD") == NULL) {
208         /* caleb- did not found where "reload" is used.
209      * caleb- think the send_hime_message() here does nothing.
210      */
211         send_hime_message (GDK_DISPLAY (), "reload");
212     }
213 
214     return 0;
215 }
216