1 /* Copyright (C) 2011 Edward Der-Hua Liu, Hsin-Chu, Taiwan
2 *
3 * This library is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU Lesser General Public
5 * License as published by the Free Software Foundation version 2.1
6 * of the License.
7 *
8 * This library is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
12 *
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16 */
17
18 #include "hime.h"
19
20 #include "pho.h"
21
22 #define MAX_CHS (100000)
23
24 typedef struct {
25 u_short key;
26 u_char ch[CH_SZ];
27 short count;
28 int oseq;
29 } PHITEM;
30
31 PHITEM items[MAX_CHS];
32 int itemsN;
33
34 PHO_ITEM pho_items[MAX_CHS];
35 int pho_itemsN = 0;
36
qcmp_key(const void * aa,const void * bb)37 int qcmp_key (const void *aa, const void *bb) {
38 PHITEM *a = (PHITEM *) aa;
39 PHITEM *b = (PHITEM *) bb;
40
41 int d;
42 if ((d = a->key - b->key))
43 return a->key - b->key;
44
45 if ((d = b->count - a->count))
46 return d;
47
48 return a->oseq - b->oseq;
49 }
50
qcmp_key_del(const void * aa,const void * bb)51 int qcmp_key_del (const void *aa, const void *bb) {
52 PHITEM *a = (PHITEM *) aa;
53 PHITEM *b = (PHITEM *) bb;
54
55 int d;
56 if ((d = a->key - b->key))
57 return a->key - b->key;
58
59 return memcmp (a->ch, b->ch, CH_SZ);
60 }
61
main(int argc,char ** argv)62 int main (int argc, char **argv) {
63 char *fname = "pho.tab2.src";
64 FILE *fp;
65 char s[64];
66 int phrase_area_N = 0;
67 char *phrase_area = NULL;
68
69 if (argc > 1)
70 fname = argv[1];
71
72 if ((fp = fopen (fname, "rb")) == NULL)
73 p_err ("cannot open %s\n", fname);
74
75 while (!feof (fp)) {
76 s[0] = 0;
77 myfgets (s, sizeof (s), fp);
78 int len = strlen (s);
79
80 if (s[len - 1] == '\n')
81 s[--len] = 0;
82
83 if (len == 0)
84 continue;
85
86 phokey_t kk = 0;
87 char *p = s;
88
89 while (*p && *p != ' ' && *p != 9) {
90 if (kk == (BACK_QUOTE_NO << 9))
91 kk |= *p;
92 else
93 kk |= lookup ((u_char *) p);
94
95 p += utf8_sz (p);
96 }
97
98 items[itemsN].key = kk;
99
100 p++;
101
102 char *str = p;
103 while (*p && *p != ' ' && *p != 9)
104 p++;
105
106 *p = 0;
107 p++;
108
109 int slen = strlen (str);
110 if (slen == utf8_sz (str)) {
111 u8cpy ((char *) items[itemsN].ch, str);
112 } else {
113 dbg ("str %s\n", str);
114 int newN = phrase_area_N + slen + 1;
115 phrase_area = trealloc (phrase_area, char, newN);
116 strcpy (phrase_area + phrase_area_N, str);
117 items[itemsN].ch[0] = PHO_PHRASE_ESCAPE;
118 items[itemsN].ch[1] = phrase_area_N & 0xff;
119 items[itemsN].ch[2] = (phrase_area_N >> 8) & 0xff;
120 items[itemsN].ch[3] = (phrase_area_N >> 16) & 0xff;
121 phrase_area_N = newN;
122 }
123
124 items[itemsN].count = atoi (p);
125 items[itemsN].oseq = itemsN;
126
127 itemsN++;
128 }
129
130 fclose (fp);
131
132 qsort (items, itemsN, sizeof (PHITEM), qcmp_key_del);
133 int i;
134
135 #if 1
136 int newN = 1;
137 for (i = 1; i < itemsN; i++)
138 if (qcmp_key_del (&items[i - 1], &items[i]))
139 items[newN++] = items[i];
140 else {
141 #if 0
142 prph(items[i].key);
143 utf8_putchar((char *)items[i].ch);
144 dbg("\n");
145 #endif
146 }
147
148 if (itemsN != newN) {
149 dbg ("deleted %d %d\n", itemsN, newN);
150 itemsN = newN;
151 }
152 #endif
153
154 qsort (items, itemsN, sizeof (PHITEM), qcmp_key);
155
156 PHO_IDX pho_idx[3000];
157 u_short pho_idxN = 0;
158
159 for (i = 0; i < itemsN;) {
160 phokey_t key = items[i].key;
161 pho_idx[pho_idxN].key = key;
162 pho_idx[pho_idxN].start = i;
163 pho_idxN++;
164
165 int j;
166
167 for (j = i + 1; j < itemsN && items[j].key == key; j++)
168 ;
169
170 int l;
171 for (l = i; l < j; l++) {
172 bchcpy (pho_items[pho_itemsN].ch, items[l].ch);
173 pho_items[pho_itemsN].count = items[l].count;
174 pho_itemsN++;
175 }
176
177 i = j;
178 }
179
180 char *tp = strstr (fname, ".tab2.src");
181 if (!tp)
182 p_err ("file name should be *.tab2.src");
183
184 tp = strstr (fname, ".src");
185 *tp = 0;
186
187 char *fname_out = fname;
188
189 if ((fp = fopen (fname_out, "wb")) == NULL)
190 p_err ("cannot create %s\n", fname_out);
191
192 fwrite ("PH", 1, 2, fp);
193 // dbg("pho_itemsN:%d pho_idxN:%d\n", pho_itemsN, pho_idxN);
194 fwrite (&pho_idxN, sizeof (u_short), 1, fp);
195 fwrite (&pho_itemsN, sizeof (pho_itemsN), 1, fp);
196 fwrite (&phrase_area_N, sizeof (phrase_area_N), 1, fp);
197 #if 0
198 fclose(fp); exit(0);
199 #endif
200 fwrite (pho_idx, sizeof (PHO_IDX), pho_idxN, fp);
201 fwrite (pho_items, sizeof (PHO_ITEM), pho_itemsN, fp);
202
203 fwrite (phrase_area, 1, phrase_area_N, fp);
204
205 fclose (fp);
206
207 if (getenv ("HIME_NO_RELOAD") == NULL) {
208 /* caleb- did not found where "reload" is used.
209 * caleb- think the send_hime_message() here does nothing.
210 */
211 send_hime_message (GDK_DISPLAY (), "reload");
212 }
213
214 return 0;
215 }
216