1 /**************************************************************************
2 *                   E X J D X G E N
3 *                                                   Author: Jim Breen
4 *
5 *   This is the Unix version of EJDXGEN, ported from MS-DOS
6 ***************************************************************************/
7 /*  This program is free software; you can redistribute it and/or modify
8     it under the terms of the GNU General Public License as published by
9     the Free Software Foundation; either version 1, or (at your option)
10     any later version.
11 
12     This program is distributed in the hope that it will be useful,
13     but WITHOUT ANY WARRANTY; without even the implied warranty of
14     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15     GNU General Public License for more details.
16 
17     You should have received a copy of the GNU General Public License
18     along with this program; if not, write to the Free Software
19     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.     */
20 
21 #include <sys/types.h>
22 #include <sys/stat.h>
23 
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <ctype.h>
27 #include <string.h>
28 #include "xjdic.h"
29 
30 #define TRUE 1
31 #define FALSE 0
32 #define EXLIM 100
33 
34 unsigned char *db;
35 unsigned char ENVname[50];
36 unsigned char *dicenv;
37 struct stat *buf;
38 unsigned long dbyte;
39 unsigned long  *jindex;
40 unsigned long indptr,llone,charno,recpos,charnost;
41 int State;
42 unsigned char Dname[80] = {"edictext"};
43 unsigned char JDXname[80] = {"edictext.xjdx"};
44 int jiver = 14;		/*The last time the index structure changed was Version1.4*/
45 
46 /*====== prototypes=================================================*/
47 int stringcomp(unsigned char *s1, unsigned char *s2);
48 void jqsort(long i, long j);
49 int Kstrcmp(unsigned long lhs, unsigned long rhs);
50 /*====== end prototypes=================================================*/
51 
stringcomp(unsigned char * s1,unsigned char * s2)52 int stringcomp(unsigned char *s1, unsigned char *s2)
53 {
54 	int i;
55 	unsigned char c1,c2;
56 
57 	for(i = 0; i < strlen(s1);i++)
58 	{
59 		c1 = s1[i];
60 		if (c1 < 0x60) c1 = (c1|0x20);
61 		c2 = s2[i];
62 		if (c2 < 0x60) c2 = (c2|0x20);
63 		if (c1 != c2) return(1);
64 	}
65 	return (0);
66 }
67 
68 /*====function to Load Dictionary and load/create index table=======*/
main(argc,argv)69 main(argc,argv)
70 int argc;
71  char **argv;
72 {
73   FILE *fp,*fopen();
74   unsigned long schi,diclen,indlen;
75   int i,inwd,saving,nodread;
76   unsigned char c;
77   unsigned char **ap;
78 
79   printf("\nEXJDXGEN V2.0 Extension Index Table Generator for XJDIC. \n      Copyright J.W. Breen, 1995\n");
80   if (argc > 1)
81   {
82 	ap = argv;
83 	ap++;
84 	if(strcmp(*ap,"-h") == 0)
85 	{
86 		printf("\nThere are two command-line options:\n");
87 		printf("  -h  this display\n");
88 		printf("  filename - file to be indexed\n\n");
89 		exit(0);
90 	}
91 	strcpy(Dname,*ap);
92 	strcpy(JDXname,*ap);
93 	strcat(JDXname,".xjdx");
94     printf("Commandline request to use files %s and %s \n",Dname,JDXname);
95   }
96   inwd = FALSE;
97   indptr = 1;
98   llone = 1;
99   buf = (void *)malloc(1000);
100   if(stat(Dname, buf) != 0)
101   {
102 	 perror(NULL);
103 	 printf("Cannot stat: %s \n",Dname);
104 	 exit(1);
105   }
106   diclen = buf->st_size;
107   printf("\nWARNING!!  This program may take a long time to run .....\n");
108 
109   puts ("\nLoading Dictionary file.  Please wait.....\n");
110   fp=fopen(Dname,"rb");
111   if (fp==NULL )
112   {
113 	printf("\nCannot open dictionary file\n");
114 	exit(1);
115   }
116   db = (unsigned char *)malloc((diclen+100) * sizeof(unsigned char));
117   if(db == NULL)
118   {
119       fprintf(stderr,"malloc() for dictionary failed.\n");
120       fclose(fp);
121       exit(1);
122   }
123   nodread = diclen/1024;
124   dbyte = fread((unsigned char *)db+1, 1024, nodread, fp);
125   nodread = diclen % 1024;
126   dbyte = fread((unsigned char *)(db+(diclen/1024)*1024)+1, nodread,1, fp);
127   fclose(fp);
128   diclen++;
129   dbyte = diclen;
130   db[diclen] = 10;
131   db[0] = 10;
132   printf("Dictionary size: %ld bytes.\n",dbyte);
133   indlen = diclen / 2;
134   jindex = (unsigned long *)malloc(indlen);
135   if(jindex == NULL)
136   {
137 	  fprintf(stderr,"malloc() for index table failed.\n");
138 	  fclose(fp);
139 	  exit(1);
140   }
141   printf("Parsing.... \n");
142   indptr = 1;
143   saving = FALSE;
144   charno = -1;
145   State = 0;
146   for (schi =0; schi < dbyte; schi++) /* scan whole dictionary  */
147   {
148 	  c = db[schi];
149 	  charno++;
150 	  if (c == 0x0a)
151 	  {
152 		recpos = charno+1;
153 		continue;
154 	  }
155 	  switch (State)
156 	  {
157 	  case 0 :	/* Looking for < 	*/
158 		if (c == '<') State = 1;
159 		break;
160 	  case 1 :	/* Inside <..>, but nothing started yet	*/
161 		if (c >= 127)
162 		{
163 			saving = TRUE;
164 			charnost = charno-1;
165 			State = 2;
166 			break;
167 		}
168 		else
169 		{
170 			State = 3;
171 	  		schi--;
172 	  		charno--;
173 			break;
174 		}
175 	case 2 :	/* storing keywords  */
176 		if (c >= 127)
177 		{
178 			break;
179 		}
180 		else
181 		{
182 			jindex[indptr] = charnost;
183 			jindex[indptr+1] = recpos;
184 			indptr+=2;
185 			if (indptr > indlen/sizeof(long))
186 			{
187 			  	printf("Index table overflow. Dictionary too large?\n");
188 			  	exit(1);
189 			}
190 			saving = FALSE;
191 	  		schi--;
192 	  		charno--;
193 			State = 3;
194 			break;
195 		}
196 	case 3 : 	/* encountered non-JIS	*/
197 		if (c == '>')
198 		{
199 			State = 0;
200 			break;
201 		}
202 		if (c == '[')
203 		{
204 			State = 4;
205 			break;
206 		}
207 		if (c >= 127)
208 		{
209 			schi--;
210   			charno--;
211 			State = 1;
212 		}
213 		break;
214 	case 4 :	/* skip all until ]		*/
215 		if (c == ']') State = 1;
216 		break;
217 	}
218     }
219     indptr-=2;
220     printf("Index entries: %ld  \nSorting (this is slow)......\n",indptr);
221     jqsort(llone,(indptr/2)+1);
222     printf("Sorted\nWriting index file ....\n");
223     fp = fopen(JDXname,"wb");
224     if (fp==NULL )
225     {
226     	printf("\nCannot open %s output file\n",JDXname);
227     	exit(1);
228     }
229   jindex[0] = diclen+jiver;
230   fwrite(jindex,sizeof(long),indptr+2,fp);
231   fclose(fp);
232   return (0);
233 }
234 /*======function to sort jindex table====================*/
235 
jqsort(long lhsr,long rhsr)236 void jqsort(long lhsr, long rhsr)
237 {
238 	long i,last,midp,lhs,rhs;
239 	unsigned long temp,temp2;
240 
241 	lhs = ((lhsr-1)*2)+1;
242 	rhs = ((rhsr-1)*2)+1;
243 	if (lhs >= rhs) return;
244 	/* Swap ( lhs , (lhs+rhs)/2);*/
245 	midp = (lhs+rhs)/2;
246 	if (!(midp & 1)) midp--;
247 	temp = jindex[lhs];
248 	temp2 = jindex[lhs+1];
249 	jindex[lhs] = jindex[midp];
250 	jindex[lhs+1] = jindex[midp+1];
251 	jindex[midp] = temp;
252 	jindex[midp+1] = temp2;
253 	last = lhs;
254 	for (i = lhs+2;i <= rhs; i+=2)
255 		{
256 			if (Kstrcmp(jindex[i],jindex[lhs]) < 0)
257 			{
258 				/* Swap(++last,i);*/
259 				last+=2;
260 				temp = jindex[i];
261 				jindex[i] = jindex[last];
262 				jindex[last] = temp;
263 				temp = jindex[i+1];
264 				jindex[i+1] = jindex[last+1];
265 				jindex[last+1] = temp;
266 			}
267 		}
268 /*	Swap (lhs,last);*/
269 	temp = jindex[lhs];
270 	jindex[lhs] = jindex[last];
271 	jindex[last] = temp;
272 	temp = jindex[lhs+1];
273 	jindex[lhs+1] = jindex[last+1];
274 	jindex[last+1] = temp;
275 	jqsort((lhs/2)+1,last/2);
276 	jqsort((last/2)+2,(rhs/2)+1);
277 }
278 /*=====string comparison used by jqsort==========================*/
Kstrcmp(unsigned long lhs,unsigned long rhs)279 int Kstrcmp(unsigned long lhs, unsigned long rhs)
280 {
281 	int i,c1,c2;
282 /* effectively does a strnicmp on two "strings" within the dictionary,
283    except it will make katakana and hirgana match (EUC A4 & A5) */
284 
285 	for (i = 0; i<20 ; i++)
286 	{
287 		c1 = db[lhs+i+1];
288 		c2 = db[rhs+i+1];
289 		if ((i % 2) == 0)
290 		{
291 			if (c1 == 0xA5)
292 			{
293 				c1 = 0xA4;
294 			}
295 			if (c2 == 0xA5)
296 			{
297 				c2 = 0xA4;
298 			}
299 		}
300 		if ((c1 >= 'A') && (c1 <= 'Z')) c1 |= 0x20;
301 		if ((c2 >= 'A') && (c2 <= 'Z')) c2 |= 0x20;
302 		if (c1 != c2 ) break;
303 	}
304 	return(c1-c2);
305 }
306 
307