1 /*
2 * This file is part of StarDict.
3 *
4 * StarDict is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * StarDict is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with StarDict. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 #include "stdio.h"
19 #include "stdlib.h"
20 #include <locale.h>
21 #include <string.h>
22 #include <sys/stat.h>
23
24 #include <string>
25
26 #include <gtk/gtk.h>
27 #include <glib.h>
28
29 struct _worditem
30 {
31 gchar *word;
32 gchar *definition;
33 gboolean definition_need_free;
34 };
35
stardict_strcmp(const gchar * s1,const gchar * s2)36 gint stardict_strcmp(const gchar *s1, const gchar *s2)
37 {
38 gint a;
39 a = g_ascii_strcasecmp(s1, s2);
40 if (a == 0)
41 return strcmp(s1, s2);
42 else
43 return a;
44 }
45
comparefunc(gconstpointer a,gconstpointer b)46 gint comparefunc(gconstpointer a,gconstpointer b)
47 {
48 return stardict_strcmp(((struct _worditem *)a)->word,((struct _worditem *)b)->word);
49 }
50
convert(char * filename)51 void convert(char *filename)
52 {
53 struct stat stats;
54 if (stat (filename, &stats) == -1)
55 {
56 printf("file not exist!\n");
57 return;
58 }
59 gchar *basefilename = g_path_get_basename(filename);
60 FILE *tabfile;
61 tabfile = fopen(filename,"r");
62
63 gchar *buffer = (gchar *)g_malloc (stats.st_size + 1);
64 size_t fread_size;
65 fread_size = fread (buffer, 1, stats.st_size, tabfile);
66 if (fread_size != (size_t)stats.st_size) {
67 g_print("fread error!\n");
68 }
69 fclose (tabfile);
70 buffer[stats.st_size] = '\0';
71
72 GArray *array = g_array_sized_new(FALSE,FALSE, sizeof(struct _worditem),20000);
73
74 gchar *p, *p1, *p2;
75 p = buffer;
76 struct _worditem worditem;
77 std::string definition;
78 glong linenum=1;
79 while (1) {
80 if (*p == '\0') {
81 g_print("over\n");
82 break;
83 }
84 p2 = strchr(p,'\n');
85 if (p2) {
86 *p2 = '\0';
87 }
88 else {
89 g_print("error! not end up new line found %ld\n", linenum);
90 return;
91 }
92 p1 = strstr(p, " ");
93 if (p1) {
94 *p1 = '\0';
95 p1+=2;
96 } else {
97 p1 = strstr(p, ": ");
98 if (p1) {
99 *p1 = '\0';
100 p1+=2;
101 } else {
102 p1 = strstr(p, " = ");
103 if (p1) {
104 *p1 = '\0';
105 p1+=3;
106 } else {
107 g_print("%ld: %s\n", linenum, p);
108 p= p2+1;
109 linenum++;
110 continue;
111 }
112 }
113 }
114 worditem.word = p;
115 worditem.definition = p1;
116 g_strstrip(worditem.word);
117 g_strstrip(worditem.definition);
118 if (!worditem.word[0]) {
119 g_print("%ld: %s, bad word! %s\n", linenum, basefilename, worditem.definition);
120 p= p2+1;
121 linenum++;
122 continue;
123 }
124 if (!worditem.definition[0]) {
125 g_print("%s, bad definition!!!\n", basefilename);
126 return;
127 }
128 worditem.definition_need_free = false;
129 g_array_append_val(array, worditem);
130 p= p2+1;
131 linenum++;
132 }
133 g_array_sort(array,comparefunc);
134
135 gchar idxfilename[256];
136 gchar dicfilename[256];
137 sprintf(idxfilename, "%s.idx", basefilename);
138 sprintf(dicfilename, "%s.dict", basefilename);
139 FILE *idxfile = fopen(idxfilename,"w");
140 FILE *dicfile = fopen(dicfilename,"w");
141
142 glong tmpglong = 0;
143
144 glong wordcount = array->len;
145
146 long offset_old;
147 const gchar *previous_word = "";
148 struct _worditem *pworditem;
149 gulong i=0;
150 glong thedatasize;
151 const gchar *insert_word = "\n";
152 gboolean flag;
153 pworditem = &g_array_index(array, struct _worditem, i);
154 gint definition_len;
155 while (i<array->len)
156 {
157 thedatasize = 0;
158 offset_old = ftell(dicfile);
159 flag = true;
160 while (flag == true)
161 {
162 definition_len = strlen(pworditem->definition);
163 fwrite(pworditem->definition, 1 ,definition_len,dicfile);
164 thedatasize += definition_len;
165 previous_word = pworditem->word;
166
167 i++;
168 if (i<array->len)
169 {
170 pworditem = &g_array_index(array, struct _worditem, i);
171 if (strcmp(previous_word,pworditem->word)==0)
172 {
173 //g_print("D! %s\n",previous_word);
174 wordcount--;
175 fwrite(insert_word,sizeof(gchar),strlen(insert_word),dicfile);
176 thedatasize += strlen(insert_word);
177 }
178 else
179 {
180 flag = false;
181 }
182 }
183 else
184 flag = false;
185 }
186 fwrite(previous_word,sizeof(gchar),strlen(previous_word)+1,idxfile);
187 tmpglong = g_htonl(offset_old);
188 fwrite(&(tmpglong),sizeof(glong),1,idxfile);
189 tmpglong = g_htonl(thedatasize);
190 fwrite(&(tmpglong),sizeof(glong),1,idxfile);
191 }
192 for (i= 0;i < array->len; i++) {
193 pworditem = &g_array_index(array, struct _worditem, i);
194 if (pworditem->definition_need_free)
195 g_free(pworditem->definition);
196 }
197
198
199 g_print("%s wordcount: %ld\n", basefilename, wordcount);
200
201 g_free(buffer);
202 g_array_free(array,TRUE);
203
204 fclose(idxfile);
205 fclose(dicfile);
206
207 g_free(basefilename);
208 }
209
210 int
main(int argc,char * argv[])211 main(int argc,char * argv [])
212 {
213 if (argc<2) {
214 printf("please type this:\n./duden duden.utf\n");
215 return FALSE;
216 }
217
218 setlocale(LC_ALL, "");
219 for (int i=1; i< argc; i++)
220 convert (argv[i]);
221 return FALSE;
222
223 }
224