1 /**************************************************************************
2 * E X J D X G E N
3 * Author: Jim Breen
4 *
5 * This is the Unix version of EJDXGEN, ported from MS-DOS
6 ***************************************************************************/
7 /* This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 1, or (at your option)
10 any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
20
21 #include <sys/types.h>
22 #include <sys/stat.h>
23
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <ctype.h>
27 #include <string.h>
28 #include "xjdic.h"
29
30 #define TRUE 1
31 #define FALSE 0
32 #define EXLIM 100
33
34 unsigned char *db;
35 unsigned char ENVname[50];
36 unsigned char *dicenv;
37 struct stat *buf;
38 unsigned long dbyte;
39 unsigned long *jindex;
40 unsigned long indptr,llone,charno,recpos,charnost;
41 int State;
42 unsigned char Dname[80] = {"edictext"};
43 unsigned char JDXname[80] = {"edictext.xjdx"};
44 int jiver = 14; /*The last time the index structure changed was Version1.4*/
45
46 /*====== prototypes=================================================*/
47 int stringcomp(unsigned char *s1, unsigned char *s2);
48 void jqsort(long i, long j);
49 int Kstrcmp(unsigned long lhs, unsigned long rhs);
50 /*====== end prototypes=================================================*/
51
stringcomp(unsigned char * s1,unsigned char * s2)52 int stringcomp(unsigned char *s1, unsigned char *s2)
53 {
54 int i;
55 unsigned char c1,c2;
56
57 for(i = 0; i < strlen(s1);i++)
58 {
59 c1 = s1[i];
60 if (c1 < 0x60) c1 = (c1|0x20);
61 c2 = s2[i];
62 if (c2 < 0x60) c2 = (c2|0x20);
63 if (c1 != c2) return(1);
64 }
65 return (0);
66 }
67
68 /*====function to Load Dictionary and load/create index table=======*/
main(argc,argv)69 main(argc,argv)
70 int argc;
71 char **argv;
72 {
73 FILE *fp,*fopen();
74 unsigned long schi,diclen,indlen;
75 int i,inwd,saving,nodread;
76 unsigned char c;
77 unsigned char **ap;
78
79 printf("\nEXJDXGEN V2.0 Extension Index Table Generator for XJDIC. \n Copyright J.W. Breen, 1995\n");
80 if (argc > 1)
81 {
82 ap = argv;
83 ap++;
84 if(strcmp(*ap,"-h") == 0)
85 {
86 printf("\nThere are two command-line options:\n");
87 printf(" -h this display\n");
88 printf(" filename - file to be indexed\n\n");
89 exit(0);
90 }
91 strcpy(Dname,*ap);
92 strcpy(JDXname,*ap);
93 strcat(JDXname,".xjdx");
94 printf("Commandline request to use files %s and %s \n",Dname,JDXname);
95 }
96 inwd = FALSE;
97 indptr = 1;
98 llone = 1;
99 buf = (void *)malloc(1000);
100 if(stat(Dname, buf) != 0)
101 {
102 perror(NULL);
103 printf("Cannot stat: %s \n",Dname);
104 exit(1);
105 }
106 diclen = buf->st_size;
107 printf("\nWARNING!! This program may take a long time to run .....\n");
108
109 puts ("\nLoading Dictionary file. Please wait.....\n");
110 fp=fopen(Dname,"rb");
111 if (fp==NULL )
112 {
113 printf("\nCannot open dictionary file\n");
114 exit(1);
115 }
116 db = (unsigned char *)malloc((diclen+100) * sizeof(unsigned char));
117 if(db == NULL)
118 {
119 fprintf(stderr,"malloc() for dictionary failed.\n");
120 fclose(fp);
121 exit(1);
122 }
123 nodread = diclen/1024;
124 dbyte = fread((unsigned char *)db+1, 1024, nodread, fp);
125 nodread = diclen % 1024;
126 dbyte = fread((unsigned char *)(db+(diclen/1024)*1024)+1, nodread,1, fp);
127 fclose(fp);
128 diclen++;
129 dbyte = diclen;
130 db[diclen] = 10;
131 db[0] = 10;
132 printf("Dictionary size: %ld bytes.\n",dbyte);
133 indlen = diclen / 2;
134 jindex = (unsigned long *)malloc(indlen);
135 if(jindex == NULL)
136 {
137 fprintf(stderr,"malloc() for index table failed.\n");
138 fclose(fp);
139 exit(1);
140 }
141 printf("Parsing.... \n");
142 indptr = 1;
143 saving = FALSE;
144 charno = -1;
145 State = 0;
146 for (schi =0; schi < dbyte; schi++) /* scan whole dictionary */
147 {
148 c = db[schi];
149 charno++;
150 if (c == 0x0a)
151 {
152 recpos = charno+1;
153 continue;
154 }
155 switch (State)
156 {
157 case 0 : /* Looking for < */
158 if (c == '<') State = 1;
159 break;
160 case 1 : /* Inside <..>, but nothing started yet */
161 if (c >= 127)
162 {
163 saving = TRUE;
164 charnost = charno-1;
165 State = 2;
166 break;
167 }
168 else
169 {
170 State = 3;
171 schi--;
172 charno--;
173 break;
174 }
175 case 2 : /* storing keywords */
176 if (c >= 127)
177 {
178 break;
179 }
180 else
181 {
182 jindex[indptr] = charnost;
183 jindex[indptr+1] = recpos;
184 indptr+=2;
185 if (indptr > indlen/sizeof(long))
186 {
187 printf("Index table overflow. Dictionary too large?\n");
188 exit(1);
189 }
190 saving = FALSE;
191 schi--;
192 charno--;
193 State = 3;
194 break;
195 }
196 case 3 : /* encountered non-JIS */
197 if (c == '>')
198 {
199 State = 0;
200 break;
201 }
202 if (c == '[')
203 {
204 State = 4;
205 break;
206 }
207 if (c >= 127)
208 {
209 schi--;
210 charno--;
211 State = 1;
212 }
213 break;
214 case 4 : /* skip all until ] */
215 if (c == ']') State = 1;
216 break;
217 }
218 }
219 indptr-=2;
220 printf("Index entries: %ld \nSorting (this is slow)......\n",indptr);
221 jqsort(llone,(indptr/2)+1);
222 printf("Sorted\nWriting index file ....\n");
223 fp = fopen(JDXname,"wb");
224 if (fp==NULL )
225 {
226 printf("\nCannot open %s output file\n",JDXname);
227 exit(1);
228 }
229 jindex[0] = diclen+jiver;
230 fwrite(jindex,sizeof(long),indptr+2,fp);
231 fclose(fp);
232 return (0);
233 }
234 /*======function to sort jindex table====================*/
235
jqsort(long lhsr,long rhsr)236 void jqsort(long lhsr, long rhsr)
237 {
238 long i,last,midp,lhs,rhs;
239 unsigned long temp,temp2;
240
241 lhs = ((lhsr-1)*2)+1;
242 rhs = ((rhsr-1)*2)+1;
243 if (lhs >= rhs) return;
244 /* Swap ( lhs , (lhs+rhs)/2);*/
245 midp = (lhs+rhs)/2;
246 if (!(midp & 1)) midp--;
247 temp = jindex[lhs];
248 temp2 = jindex[lhs+1];
249 jindex[lhs] = jindex[midp];
250 jindex[lhs+1] = jindex[midp+1];
251 jindex[midp] = temp;
252 jindex[midp+1] = temp2;
253 last = lhs;
254 for (i = lhs+2;i <= rhs; i+=2)
255 {
256 if (Kstrcmp(jindex[i],jindex[lhs]) < 0)
257 {
258 /* Swap(++last,i);*/
259 last+=2;
260 temp = jindex[i];
261 jindex[i] = jindex[last];
262 jindex[last] = temp;
263 temp = jindex[i+1];
264 jindex[i+1] = jindex[last+1];
265 jindex[last+1] = temp;
266 }
267 }
268 /* Swap (lhs,last);*/
269 temp = jindex[lhs];
270 jindex[lhs] = jindex[last];
271 jindex[last] = temp;
272 temp = jindex[lhs+1];
273 jindex[lhs+1] = jindex[last+1];
274 jindex[last+1] = temp;
275 jqsort((lhs/2)+1,last/2);
276 jqsort((last/2)+2,(rhs/2)+1);
277 }
278 /*=====string comparison used by jqsort==========================*/
Kstrcmp(unsigned long lhs,unsigned long rhs)279 int Kstrcmp(unsigned long lhs, unsigned long rhs)
280 {
281 int i,c1,c2;
282 /* effectively does a strnicmp on two "strings" within the dictionary,
283 except it will make katakana and hirgana match (EUC A4 & A5) */
284
285 for (i = 0; i<20 ; i++)
286 {
287 c1 = db[lhs+i+1];
288 c2 = db[rhs+i+1];
289 if ((i % 2) == 0)
290 {
291 if (c1 == 0xA5)
292 {
293 c1 = 0xA4;
294 }
295 if (c2 == 0xA5)
296 {
297 c2 = 0xA4;
298 }
299 }
300 if ((c1 >= 'A') && (c1 <= 'Z')) c1 |= 0x20;
301 if ((c2 >= 'A') && (c2 <= 'Z')) c2 |= 0x20;
302 if (c1 != c2 ) break;
303 }
304 return(c1-c2);
305 }
306
307