1 /*
2  * This file is part of StarDict.
3  *
4  * StarDict is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 3 of the License, or
7  * (at your option) any later version.
8  *
9  * StarDict is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with StarDict.  If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include "stdio.h"
19 #include "stdlib.h"
20 #include <locale.h>
21 #include <string.h>
22 #include <sys/stat.h>
23 
24 
25 #include <gtk/gtk.h>
26 #include <glib.h>
27 
28 struct _worditem
29 {
30 	gchar *word;
31 	gchar *definition;
32 };
33 
stardict_strcmp(const gchar * s1,const gchar * s2)34 gint stardict_strcmp(const gchar *s1, const gchar *s2)
35 {
36 	gint a;
37 	a = g_ascii_strcasecmp(s1, s2);
38 	if (a == 0)
39 		return strcmp(s1, s2);
40 	else
41 		return a;
42 }
43 
comparefunc(gconstpointer a,gconstpointer b)44 gint comparefunc(gconstpointer a,gconstpointer b)
45 {
46 	return stardict_strcmp(((struct _worditem *)a)->word,((struct _worditem *)b)->word);
47 }
48 
convert(char * filename)49 void convert(char *filename)
50 {
51 	struct stat stats;
52 	if (stat (filename, &stats) == -1)
53 	{
54 		printf("file not exist!\n");
55 		return;
56 	}
57 	gchar *basefilename = g_path_get_basename(filename);
58 	FILE *tabfile;
59 	tabfile = fopen(filename,"r");
60 
61 	gchar *buffer = (gchar *)g_malloc (stats.st_size + 1);
62 	size_t fread_size;
63 	fread_size = fread (buffer, 1, stats.st_size, tabfile);
64 	if (fread_size != (size_t)stats.st_size) {
65 		g_print("fread error!\n");
66 	}
67 	fclose (tabfile);
68 	buffer[stats.st_size] = '\0';
69 
70 	GArray *array = g_array_sized_new(FALSE,FALSE, sizeof(struct _worditem),20000);
71 
72 	gchar *p, *p1, *p2;
73 	p = buffer;
74 	if ((guchar)*p==0xEF && (guchar)*(p+1)==0xBB && (guchar)*(p+2)==0xBF) // UTF-8 order characters.
75 		p+=3;
76 	struct _worditem worditem;
77 	glong linenum=1;
78 	while (1) {
79 		if (*p == '\0') {
80                         g_print("over\n");
81                         break;
82                 }
83 		p1 = strchr(p,'\n');
84 		if (!p1) {
85 			g_print("error, no end line\n");
86 			return;
87 		}
88 		*p1 = '\0';
89 		p1++;
90 		p2 = strstr(p,"===");
91 		if (!p2) {
92 			g_print("wrong, no ===, %ld\n", linenum);
93 			p= p1;
94                 	linenum++;
95 			continue;
96 		}
97 		*p2 = '\0';
98 		p2+=3;
99 		worditem.word = p;
100 		worditem.definition = p2;
101 		g_strstrip(worditem.word);
102 		g_strstrip(worditem.definition);
103 		if (!worditem.word[0]) {
104 			g_print("%s-%ld, bad word!!!\n", basefilename, linenum);
105 			p = p1;
106                         linenum++;
107                         continue;
108 		}
109 		if (!worditem.definition[0]) {
110 			g_print("%s-%ld, bad definition!!!\n", basefilename, linenum);
111 			p = p1;
112                         linenum++;
113                         continue;
114 		}
115 		g_array_append_val(array, worditem);
116 		p= p1;
117 		linenum++;
118 	}
119 	g_array_sort(array,comparefunc);
120 
121 	gchar idxfilename[256];
122 	gchar dicfilename[256];
123 	sprintf(idxfilename, "%s.idx", basefilename);
124 	sprintf(dicfilename, "%s.dict", basefilename);
125 	FILE *idxfile = fopen(idxfilename,"w");
126 	FILE *dicfile = fopen(dicfilename,"w");
127 
128 
129 	glong wordcount = array->len;
130 
131 	long offset_old;
132 	glong tmpglong;
133 	const gchar *previous_word = "";
134 	struct _worditem *pworditem;
135 	gulong i=0;
136 	glong thedatasize;
137 	const gchar *insert_word = "\n";
138 	gboolean flag;
139 	pworditem = &g_array_index(array, struct _worditem, i);
140 	gint definition_len;
141 	while (i<array->len)
142 	{
143 		thedatasize = 0;
144 		offset_old = ftell(dicfile);
145 		flag = true;
146 		while (flag == true)
147 		{
148 			definition_len = strlen(pworditem->definition);
149 			fwrite(pworditem->definition, 1 ,definition_len,dicfile);
150 			thedatasize += definition_len;
151 			previous_word = pworditem->word;
152 
153 			i++;
154 			if (i<array->len)
155 			{
156 				pworditem = &g_array_index(array, struct _worditem, i);
157 				if (strcmp(previous_word,pworditem->word)==0)
158 				{
159 					//g_print("D! %s\n",previous_word);
160 					flag = true;
161 					wordcount--;
162 					fwrite(insert_word,sizeof(gchar),strlen(insert_word),dicfile);
163 					thedatasize += strlen(insert_word);
164 				}
165 				else
166 				{
167 					flag = false;
168 				}
169 			}
170 			else
171 			{
172 				flag = false;
173 			}
174 		}
175 		fwrite(previous_word,sizeof(gchar),strlen(previous_word)+1,idxfile);
176 		tmpglong = g_htonl(offset_old);
177 		fwrite(&(tmpglong),sizeof(glong),1,idxfile);
178 		tmpglong = g_htonl(thedatasize);
179 		fwrite(&(tmpglong),sizeof(glong),1,idxfile);
180 	}
181 
182 	g_print("%s wordcount: %ld\n", basefilename, wordcount);
183 
184 	g_free(buffer);
185 	g_array_free(array,TRUE);
186 
187 	fclose(idxfile);
188 	fclose(dicfile);
189 
190 	g_free(basefilename);
191 }
192 
main(int argc,char * argv[])193 int main(int argc,char * argv [])
194 {
195 	if (argc<2) {
196 		printf("please type this:\n./buddhist SanZunFaSu.txt\n");
197 		return FALSE;
198 	}
199 
200 	setlocale(LC_ALL, "");
201 	for (int i=1; i< argc; i++)
202 		convert (argv[i]);
203 	return FALSE;
204 }
205 
206