1 /*
2 * This file is part of StarDict.
3 *
4 * StarDict is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * StarDict is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with StarDict. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 #include "stdio.h"
19 #include "stdlib.h"
20 #include <locale.h>
21 #include <string.h>
22 #include <sys/stat.h>
23
24
25 #include <gtk/gtk.h>
26 #include <glib.h>
27
28 struct _worditem
29 {
30 gchar *word;
31 gchar *definition;
32 };
33
stardict_strcmp(const gchar * s1,const gchar * s2)34 gint stardict_strcmp(const gchar *s1, const gchar *s2)
35 {
36 gint a;
37 a = g_ascii_strcasecmp(s1, s2);
38 if (a == 0)
39 return strcmp(s1, s2);
40 else
41 return a;
42 }
43
comparefunc(gconstpointer a,gconstpointer b)44 gint comparefunc(gconstpointer a,gconstpointer b)
45 {
46 return stardict_strcmp(((struct _worditem *)a)->word,((struct _worditem *)b)->word);
47 }
48
convert(char * filename)49 void convert(char *filename)
50 {
51 struct stat stats;
52 if (stat (filename, &stats) == -1)
53 {
54 printf("file not exist!\n");
55 return;
56 }
57 gchar *basefilename = g_path_get_basename(filename);
58 FILE *tabfile;
59 tabfile = fopen(filename,"r");
60
61 gchar *buffer = (gchar *)g_malloc (stats.st_size + 1);
62 size_t fread_size;
63 fread_size = fread (buffer, 1, stats.st_size, tabfile);
64 if (fread_size != (size_t)stats.st_size) {
65 g_print("fread error!\n");
66 }
67 fclose (tabfile);
68 buffer[stats.st_size] = '\0';
69
70 GArray *array = g_array_sized_new(FALSE,FALSE, sizeof(struct _worditem),20000);
71
72 gchar *p, *p1, *p2;
73 p = buffer;
74 if ((guchar)*p==0xEF && (guchar)*(p+1)==0xBB && (guchar)*(p+2)==0xBF) // UTF-8 order characters.
75 p+=3;
76 struct _worditem worditem;
77 glong linenum=1;
78 while (1) {
79 if (*p == '\0') {
80 g_print("over\n");
81 break;
82 }
83 p1 = strchr(p,'\n');
84 if (!p1) {
85 g_print("error, no end line\n");
86 return;
87 }
88 *p1 = '\0';
89 p1++;
90 p2 = strstr(p,"===");
91 if (!p2) {
92 g_print("wrong, no ===, %ld\n", linenum);
93 p= p1;
94 linenum++;
95 continue;
96 }
97 *p2 = '\0';
98 p2+=3;
99 worditem.word = p;
100 worditem.definition = p2;
101 g_strstrip(worditem.word);
102 g_strstrip(worditem.definition);
103 if (!worditem.word[0]) {
104 g_print("%s-%ld, bad word!!!\n", basefilename, linenum);
105 p = p1;
106 linenum++;
107 continue;
108 }
109 if (!worditem.definition[0]) {
110 g_print("%s-%ld, bad definition!!!\n", basefilename, linenum);
111 p = p1;
112 linenum++;
113 continue;
114 }
115 g_array_append_val(array, worditem);
116 p= p1;
117 linenum++;
118 }
119 g_array_sort(array,comparefunc);
120
121 gchar idxfilename[256];
122 gchar dicfilename[256];
123 sprintf(idxfilename, "%s.idx", basefilename);
124 sprintf(dicfilename, "%s.dict", basefilename);
125 FILE *idxfile = fopen(idxfilename,"w");
126 FILE *dicfile = fopen(dicfilename,"w");
127
128
129 glong wordcount = array->len;
130
131 long offset_old;
132 glong tmpglong;
133 const gchar *previous_word = "";
134 struct _worditem *pworditem;
135 gulong i=0;
136 glong thedatasize;
137 const gchar *insert_word = "\n";
138 gboolean flag;
139 pworditem = &g_array_index(array, struct _worditem, i);
140 gint definition_len;
141 while (i<array->len)
142 {
143 thedatasize = 0;
144 offset_old = ftell(dicfile);
145 flag = true;
146 while (flag == true)
147 {
148 definition_len = strlen(pworditem->definition);
149 fwrite(pworditem->definition, 1 ,definition_len,dicfile);
150 thedatasize += definition_len;
151 previous_word = pworditem->word;
152
153 i++;
154 if (i<array->len)
155 {
156 pworditem = &g_array_index(array, struct _worditem, i);
157 if (strcmp(previous_word,pworditem->word)==0)
158 {
159 //g_print("D! %s\n",previous_word);
160 flag = true;
161 wordcount--;
162 fwrite(insert_word,sizeof(gchar),strlen(insert_word),dicfile);
163 thedatasize += strlen(insert_word);
164 }
165 else
166 {
167 flag = false;
168 }
169 }
170 else
171 {
172 flag = false;
173 }
174 }
175 fwrite(previous_word,sizeof(gchar),strlen(previous_word)+1,idxfile);
176 tmpglong = g_htonl(offset_old);
177 fwrite(&(tmpglong),sizeof(glong),1,idxfile);
178 tmpglong = g_htonl(thedatasize);
179 fwrite(&(tmpglong),sizeof(glong),1,idxfile);
180 }
181
182 g_print("%s wordcount: %ld\n", basefilename, wordcount);
183
184 g_free(buffer);
185 g_array_free(array,TRUE);
186
187 fclose(idxfile);
188 fclose(dicfile);
189
190 g_free(basefilename);
191 }
192
main(int argc,char * argv[])193 int main(int argc,char * argv [])
194 {
195 if (argc<2) {
196 printf("please type this:\n./buddhist SanZunFaSu.txt\n");
197 return FALSE;
198 }
199
200 setlocale(LC_ALL, "");
201 for (int i=1; i< argc; i++)
202 convert (argv[i]);
203 return FALSE;
204 }
205
206