1 /*
2  * This file is part of StarDict.
3  *
4  * StarDict is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 3 of the License, or
7  * (at your option) any later version.
8  *
9  * StarDict is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with StarDict.  If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include "stdio.h"
19 #include "stdlib.h"
20 #include <locale.h>
21 #include <string.h>
22 #include <sys/stat.h>
23 
24 #include <string>
25 
26 #include <gtk/gtk.h>
27 #include <glib.h>
28 
29 struct _worditem
30 {
31 	gchar *word;
32 	gchar *definition;
33 	gboolean definition_need_free;
34 };
35 
stardict_strcmp(const gchar * s1,const gchar * s2)36 gint stardict_strcmp(const gchar *s1, const gchar *s2)
37 {
38 	gint a;
39 	a = g_ascii_strcasecmp(s1, s2);
40 	if (a == 0)
41 		return strcmp(s1, s2);
42 	else
43 		return a;
44 }
45 
comparefunc(gconstpointer a,gconstpointer b)46 gint comparefunc(gconstpointer a,gconstpointer b)
47 {
48 	return stardict_strcmp(((struct _worditem *)a)->word,((struct _worditem *)b)->word);
49 }
50 
convert(char * filename)51 void convert(char *filename)
52 {
53 	struct stat stats;
54 	if (stat (filename, &stats) == -1)
55 	{
56 		printf("file not exist!\n");
57 		return;
58 	}
59 	gchar *basefilename = g_path_get_basename(filename);
60 	FILE *tabfile;
61 	tabfile = fopen(filename,"r");
62 
63 	gchar *buffer = (gchar *)g_malloc (stats.st_size + 1);
64 	size_t fread_size;
65 	fread_size = fread (buffer, 1, stats.st_size, tabfile);
66 	if (fread_size != (size_t)stats.st_size) {
67 		g_print("fread error!\n");
68 	}
69 	fclose (tabfile);
70 	buffer[stats.st_size] = '\0';
71 
72 	GArray *array = g_array_sized_new(FALSE,FALSE, sizeof(struct _worditem),20000);
73 
74 	gchar *p, *p1, *p2;
75 	p = buffer;
76 	struct _worditem worditem;
77 	std::string definition;
78 	glong linenum=1;
79 	while (1) {
80 		if (*p == '\0') {
81 			g_print("over\n");
82 			break;
83 		}
84 		p2 = strchr(p,'\n');
85 		if (p2) {
86 			*p2 = '\0';
87 		}
88 		else {
89 			g_print("error! not end up new line found %ld\n", linenum);
90 			return;
91 		}
92 		p1 = strstr(p, "  ");
93 		if (p1) {
94 			*p1 = '\0';
95 			p1+=2;
96 		} else {
97 			p1 = strstr(p, ": ");
98 			if (p1) {
99 				*p1 = '\0';
100 				p1+=2;
101 			} else {
102 				p1 = strstr(p, " = ");
103 				if (p1) {
104 					*p1 = '\0';
105 					p1+=3;
106 				} else {
107 					g_print("%ld: %s\n", linenum, p);
108 					p= p2+1;
109 					linenum++;
110 					continue;
111 				}
112 			}
113 		}
114 		worditem.word = p;
115 		worditem.definition = p1;
116 		g_strstrip(worditem.word);
117 		g_strstrip(worditem.definition);
118 		if (!worditem.word[0]) {
119 			g_print("%ld: %s, bad word! %s\n", linenum, basefilename, worditem.definition);
120 			p= p2+1;
121 			linenum++;
122 			continue;
123 		}
124 		if (!worditem.definition[0]) {
125 			g_print("%s, bad definition!!!\n", basefilename);
126 			return;
127 		}
128 		worditem.definition_need_free = false;
129 		g_array_append_val(array, worditem);
130 		p= p2+1;
131 		linenum++;
132 	}
133 	g_array_sort(array,comparefunc);
134 
135 	gchar idxfilename[256];
136 	gchar dicfilename[256];
137 	sprintf(idxfilename, "%s.idx", basefilename);
138 	sprintf(dicfilename, "%s.dict", basefilename);
139 	FILE *idxfile = fopen(idxfilename,"w");
140 	FILE *dicfile = fopen(dicfilename,"w");
141 
142 	glong tmpglong = 0;
143 
144 	glong wordcount = array->len;
145 
146 	long offset_old;
147 	const gchar *previous_word = "";
148 	struct _worditem *pworditem;
149 	gulong i=0;
150 	glong thedatasize;
151 	const gchar *insert_word = "\n";
152 	gboolean flag;
153 	pworditem = &g_array_index(array, struct _worditem, i);
154 	gint definition_len;
155 	while (i<array->len)
156 	{
157 		thedatasize = 0;
158 		offset_old = ftell(dicfile);
159 		flag = true;
160 		while (flag == true)
161 		{
162 			definition_len = strlen(pworditem->definition);
163 			fwrite(pworditem->definition, 1 ,definition_len,dicfile);
164 			thedatasize += definition_len;
165 			previous_word = pworditem->word;
166 
167 			i++;
168 			if (i<array->len)
169 			{
170 				pworditem = &g_array_index(array, struct _worditem, i);
171 				if (strcmp(previous_word,pworditem->word)==0)
172 				{
173 					//g_print("D! %s\n",previous_word);
174 					wordcount--;
175 					fwrite(insert_word,sizeof(gchar),strlen(insert_word),dicfile);
176 					thedatasize += strlen(insert_word);
177 				}
178 				else
179 				{
180 					flag = false;
181 				}
182 			}
183 			else
184 				flag = false;
185 		}
186 		fwrite(previous_word,sizeof(gchar),strlen(previous_word)+1,idxfile);
187 		tmpglong = g_htonl(offset_old);
188 		fwrite(&(tmpglong),sizeof(glong),1,idxfile);
189 		tmpglong = g_htonl(thedatasize);
190 		fwrite(&(tmpglong),sizeof(glong),1,idxfile);
191 	}
192 	for (i= 0;i < array->len; i++) {
193 		pworditem = &g_array_index(array, struct _worditem, i);
194 		if (pworditem->definition_need_free)
195 			g_free(pworditem->definition);
196 	}
197 
198 
199 	g_print("%s wordcount: %ld\n", basefilename, wordcount);
200 
201 	g_free(buffer);
202 	g_array_free(array,TRUE);
203 
204 	fclose(idxfile);
205 	fclose(dicfile);
206 
207 	g_free(basefilename);
208 }
209 
210 int
main(int argc,char * argv[])211 main(int argc,char * argv [])
212 {
213 	if (argc<2) {
214 		printf("please type this:\n./duden duden.utf\n");
215 		return FALSE;
216 	}
217 
218 	setlocale(LC_ALL, "");
219 	for (int i=1; i< argc; i++)
220 		convert (argv[i]);
221 	return FALSE;
222 
223 }
224