1 /*
2  * This file is part of StarDict.
3  *
4  * StarDict is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 3 of the License, or
7  * (at your option) any later version.
8  *
9  * StarDict is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with StarDict.  If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include "stdio.h"
19 #include "stdlib.h"
20 #include <locale.h>
21 #include <string.h>
22 #include <sys/stat.h>
23 
24 
25 #include <gtk/gtk.h>
26 #include <glib.h>
27 
28 struct _worditem
29 {
30 	gchar *word;
31 	gchar *pinyin;
32 	gchar *definition;
33 };
34 
stardict_strcmp(const gchar * s1,const gchar * s2)35 gint stardict_strcmp(const gchar *s1, const gchar *s2)
36 {
37 	gint a;
38 	a = g_ascii_strcasecmp(s1, s2);
39 	if (a == 0)
40 		return strcmp(s1, s2);
41 	else
42 		return a;
43 }
44 
comparefunc(gconstpointer a,gconstpointer b)45 gint comparefunc(gconstpointer a,gconstpointer b)
46 {
47 	gint x;
48 	x = stardict_strcmp(((struct _worditem *)a)->word,((struct _worditem *)b)->word);
49 	if (x == 0)
50 		return ((struct _worditem *)a)->definition - ((struct _worditem *)b)->definition;
51 	else
52 		return x;
53 }
54 
to_pinyin(gchar * str)55 void to_pinyin(gchar *str)
56 {
57 	gchar *p1 = str;
58 	gchar *p2 = str;
59 	while (*p1) {
60 		if (*p1 == 'u' && *(p1+1) == ':') {
61 			*p2 = 'v';
62 			p2++;
63 			p1+=2;
64 			continue;
65 		} else {
66 			*p2 = *p1;
67 			p2++;
68 			p1++;
69 			continue;
70 		}
71 	}
72 	*p2 = '\0';
73 }
74 
to_definition(gchar * str)75 void to_definition(gchar *str)
76 {
77 	while (*str) {
78 		if (*str=='/')
79 			*str='\n';
80 		str++;
81 	}
82 }
83 
convert(char * filename)84 void convert(char *filename)
85 {
86 	struct stat stats;
87 	if (stat (filename, &stats) == -1)
88 	{
89 		printf("file not exist!\n");
90 		return;
91 	}
92 	gchar *basefilename = g_path_get_basename(filename);
93 	FILE *tabfile;
94 	tabfile = fopen(filename,"r");
95 
96 	gchar *buffer = (gchar *)g_malloc (stats.st_size + 1);
97 	size_t fread_size;
98 	fread_size = fread (buffer, 1, stats.st_size, tabfile);
99 	if (fread_size != (size_t)stats.st_size) {
100 		g_print("fread error!\n");
101 	}
102 	fclose (tabfile);
103 	buffer[stats.st_size] = '\0';
104 
105 	GArray *array = g_array_sized_new(FALSE,FALSE, sizeof(struct _worditem),20000);
106 
107 	gchar *p, *p1, *p2, *p3;
108 	p = buffer;
109 	if ((guchar)*p==0xEF && (guchar)*(p+1)==0xBB && (guchar)*(p+2)==0xBF) // UTF-8 order characters.
110 		p+=3;
111 	struct _worditem worditem;
112 	glong linenum=1;
113 	while (1) {
114 		if (*p == '\0') {
115                         g_print("over\n");
116                         break;
117                 }
118 		p1 = strchr(p,'\n');
119 		if (!p1) {
120 			g_print("error, no end line\n");
121 			return;
122 		}
123 		*p1 = '\0';
124 		p1++;
125 		p2 = strchr(p,'[');
126 		if (!p2) {
127 			g_print("error, no [, %ld\n", linenum);
128 			return;
129 		}
130 		*p2 = '\0';
131 		p2++;
132 		p3 = strchr(p2, ']');
133 		if (!p3) {
134 			g_print("error, no ], %ld\n", linenum);
135 			return;
136 		}
137 		*p3 = '\0';
138 		p3++;
139 		worditem.word = p;
140 		to_pinyin(p2);
141 		worditem.pinyin = p2;
142 		to_definition(p3);
143 		worditem.definition = p3;
144 		g_strstrip(worditem.word);
145 		g_strstrip(worditem.pinyin);
146 		g_strstrip(worditem.definition);
147 		if (!worditem.word[0]) {
148 			g_print("%s-%ld, bad word!!!\n", basefilename, linenum);
149 			p= p1;
150                 	linenum++;
151 			continue;
152 		}
153 		if (!worditem.pinyin[0]) {
154 			g_print("%s-%ld, bad pinyin!!!\n", basefilename, linenum);
155 		}
156 		if (!worditem.definition[0]) {
157 			g_print("%s-%ld, bad definition!!!\n", basefilename, linenum);
158 		}
159 		if (!worditem.pinyin[0] && !worditem.definition[0]) {
160 			g_print("%s-%ld, bad pinyin and definition!!!\n", basefilename, linenum);
161 			p= p1;
162 			linenum++;
163 			continue;
164 		}
165 		g_array_append_val(array, worditem);
166 		p= p1;
167 		linenum++;
168 	}
169 	g_array_sort(array,comparefunc);
170 
171 	gchar idxfilename[256];
172 	gchar dicfilename[256];
173 	sprintf(idxfilename, "%s.idx", basefilename);
174 	sprintf(dicfilename, "%s.dict", basefilename);
175 	FILE *idxfile = fopen(idxfilename,"w");
176 	FILE *dicfile = fopen(dicfilename,"w");
177 
178 	guint32 offset_old;
179         guint32 tmpglong;
180         struct _worditem *pworditem;
181 	gint pinyin_len;
182         gint definition_len;
183         gulong i;
184         for (i=0; i< array->len; i++) {
185                 offset_old = ftell(dicfile);
186                 pworditem = &g_array_index(array, struct _worditem, i);
187 		pinyin_len = strlen(pworditem->pinyin);
188 		fwrite(pworditem->pinyin, 1 , pinyin_len+1,dicfile);
189                 definition_len = strlen(pworditem->definition);
190                 fwrite(pworditem->definition, 1 ,definition_len,dicfile);
191                 fwrite(pworditem->word,sizeof(gchar),strlen(pworditem->word)+1,idxfile);
192                 tmpglong = g_htonl(offset_old);
193                 fwrite(&(tmpglong),sizeof(guint32),1,idxfile);
194                 tmpglong = g_htonl(pinyin_len+1+ definition_len);
195                 fwrite(&(tmpglong),sizeof(guint32),1,idxfile);
196         }
197         fclose(idxfile);
198         fclose(dicfile);
199         g_print("%s wordcount: %d\n", basefilename, array->len);
200 
201 	g_free(buffer);
202 	g_array_free(array,TRUE);
203 
204 	gchar command[256];
205         sprintf(command, "dictzip %s.dict", basefilename);
206 	int result;
207         result = system(command);
208 	if (result == -1) {
209 		g_print("system() error!\n");
210 	}
211 
212 	g_free(basefilename);
213 }
214 
main(int argc,char * argv[])215 int main(int argc,char * argv [])
216 {
217 	if (argc<2) {
218 		printf("please type this:\n./cedict cedict.gb.utf8\n");
219 		return FALSE;
220 	}
221 
222 	setlocale(LC_ALL, "");
223 	for (int i=1; i< argc; i++)
224 		convert (argv[i]);
225 	return FALSE;
226 }
227 
228