1 /*
2 * This file is part of StarDict.
3 *
4 * StarDict is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * StarDict is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with StarDict. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 #include "stdio.h"
19 #include "stdlib.h"
20 #include <locale.h>
21 #include <string.h>
22 #include <sys/stat.h>
23
24
25 #include <gtk/gtk.h>
26 #include <glib.h>
27
28 struct _worditem
29 {
30 gchar *word;
31 gchar *pinyin;
32 gchar *definition;
33 };
34
stardict_strcmp(const gchar * s1,const gchar * s2)35 gint stardict_strcmp(const gchar *s1, const gchar *s2)
36 {
37 gint a;
38 a = g_ascii_strcasecmp(s1, s2);
39 if (a == 0)
40 return strcmp(s1, s2);
41 else
42 return a;
43 }
44
comparefunc(gconstpointer a,gconstpointer b)45 gint comparefunc(gconstpointer a,gconstpointer b)
46 {
47 gint x;
48 x = stardict_strcmp(((struct _worditem *)a)->word,((struct _worditem *)b)->word);
49 if (x == 0)
50 return ((struct _worditem *)a)->definition - ((struct _worditem *)b)->definition;
51 else
52 return x;
53 }
54
to_pinyin(gchar * str)55 void to_pinyin(gchar *str)
56 {
57 gchar *p1 = str;
58 gchar *p2 = str;
59 while (*p1) {
60 if (*p1 == 'u' && *(p1+1) == ':') {
61 *p2 = 'v';
62 p2++;
63 p1+=2;
64 continue;
65 } else {
66 *p2 = *p1;
67 p2++;
68 p1++;
69 continue;
70 }
71 }
72 *p2 = '\0';
73 }
74
to_definition(gchar * str)75 void to_definition(gchar *str)
76 {
77 while (*str) {
78 if (*str=='/')
79 *str='\n';
80 str++;
81 }
82 }
83
convert(char * filename)84 void convert(char *filename)
85 {
86 struct stat stats;
87 if (stat (filename, &stats) == -1)
88 {
89 printf("file not exist!\n");
90 return;
91 }
92 gchar *basefilename = g_path_get_basename(filename);
93 FILE *tabfile;
94 tabfile = fopen(filename,"r");
95
96 gchar *buffer = (gchar *)g_malloc (stats.st_size + 1);
97 size_t fread_size;
98 fread_size = fread (buffer, 1, stats.st_size, tabfile);
99 if (fread_size != (size_t)stats.st_size) {
100 g_print("fread error!\n");
101 }
102 fclose (tabfile);
103 buffer[stats.st_size] = '\0';
104
105 GArray *array = g_array_sized_new(FALSE,FALSE, sizeof(struct _worditem),20000);
106
107 gchar *p, *p1, *p2, *p3;
108 p = buffer;
109 if ((guchar)*p==0xEF && (guchar)*(p+1)==0xBB && (guchar)*(p+2)==0xBF) // UTF-8 order characters.
110 p+=3;
111 struct _worditem worditem;
112 glong linenum=1;
113 while (1) {
114 if (*p == '\0') {
115 g_print("over\n");
116 break;
117 }
118 p1 = strchr(p,'\n');
119 if (!p1) {
120 g_print("error, no end line\n");
121 return;
122 }
123 *p1 = '\0';
124 p1++;
125 p2 = strchr(p,'[');
126 if (!p2) {
127 g_print("error, no [, %ld\n", linenum);
128 return;
129 }
130 *p2 = '\0';
131 p2++;
132 p3 = strchr(p2, ']');
133 if (!p3) {
134 g_print("error, no ], %ld\n", linenum);
135 return;
136 }
137 *p3 = '\0';
138 p3++;
139 worditem.word = p;
140 to_pinyin(p2);
141 worditem.pinyin = p2;
142 to_definition(p3);
143 worditem.definition = p3;
144 g_strstrip(worditem.word);
145 g_strstrip(worditem.pinyin);
146 g_strstrip(worditem.definition);
147 if (!worditem.word[0]) {
148 g_print("%s-%ld, bad word!!!\n", basefilename, linenum);
149 p= p1;
150 linenum++;
151 continue;
152 }
153 if (!worditem.pinyin[0]) {
154 g_print("%s-%ld, bad pinyin!!!\n", basefilename, linenum);
155 }
156 if (!worditem.definition[0]) {
157 g_print("%s-%ld, bad definition!!!\n", basefilename, linenum);
158 }
159 if (!worditem.pinyin[0] && !worditem.definition[0]) {
160 g_print("%s-%ld, bad pinyin and definition!!!\n", basefilename, linenum);
161 p= p1;
162 linenum++;
163 continue;
164 }
165 g_array_append_val(array, worditem);
166 p= p1;
167 linenum++;
168 }
169 g_array_sort(array,comparefunc);
170
171 gchar idxfilename[256];
172 gchar dicfilename[256];
173 sprintf(idxfilename, "%s.idx", basefilename);
174 sprintf(dicfilename, "%s.dict", basefilename);
175 FILE *idxfile = fopen(idxfilename,"w");
176 FILE *dicfile = fopen(dicfilename,"w");
177
178 guint32 offset_old;
179 guint32 tmpglong;
180 struct _worditem *pworditem;
181 gint pinyin_len;
182 gint definition_len;
183 gulong i;
184 for (i=0; i< array->len; i++) {
185 offset_old = ftell(dicfile);
186 pworditem = &g_array_index(array, struct _worditem, i);
187 pinyin_len = strlen(pworditem->pinyin);
188 fwrite(pworditem->pinyin, 1 , pinyin_len+1,dicfile);
189 definition_len = strlen(pworditem->definition);
190 fwrite(pworditem->definition, 1 ,definition_len,dicfile);
191 fwrite(pworditem->word,sizeof(gchar),strlen(pworditem->word)+1,idxfile);
192 tmpglong = g_htonl(offset_old);
193 fwrite(&(tmpglong),sizeof(guint32),1,idxfile);
194 tmpglong = g_htonl(pinyin_len+1+ definition_len);
195 fwrite(&(tmpglong),sizeof(guint32),1,idxfile);
196 }
197 fclose(idxfile);
198 fclose(dicfile);
199 g_print("%s wordcount: %d\n", basefilename, array->len);
200
201 g_free(buffer);
202 g_array_free(array,TRUE);
203
204 gchar command[256];
205 sprintf(command, "dictzip %s.dict", basefilename);
206 int result;
207 result = system(command);
208 if (result == -1) {
209 g_print("system() error!\n");
210 }
211
212 g_free(basefilename);
213 }
214
main(int argc,char * argv[])215 int main(int argc,char * argv [])
216 {
217 if (argc<2) {
218 printf("please type this:\n./cedict cedict.gb.utf8\n");
219 return FALSE;
220 }
221
222 setlocale(LC_ALL, "");
223 for (int i=1; i< argc; i++)
224 convert (argv[i]);
225 return FALSE;
226 }
227
228