1 /*
2  * NAME:
3  *	hclib.c - Hanzi Converter Version 3.0 library implementation file
4  *	Copyright (C) 1988,1989,1990,1993  by Fung F. Lee & Ricky Yeung
5  *
6  * DESCRIPTION:
7  *	hc converts a GB file to a BIG-5 file, or a BIG-5 file to a GB file.
8  *	GB (GuoBiao) refers to the standard implementation of GB2312-80
9  *	of Mainland China, in which the two bytes representing a GB code
10  *	have their most significant bit set to 1.  BIG-5 refers to the Big
11  *	Five standard published in 1984 by Taiwan's Institute for Information
12  *	Industry. Currently, most popular Chinese systems use either
13  *	GB or BIG-5.
14  *
15  * API:
16  *	See hclib.h.
17  *
18  * FORMAT OF THE CONVERSION TABLE FILE
19  *	The table file contains newline-terminated mapping entries.
20  *	Each entry is a two-byte GB code followed by a list of two-byte
21  *	BIG5 codes.  Each entry line cannot exceed BUFSIZE characters.
22  *	See hc.tab.  Users may build their own separate table files.
23  *
24  * AUTHORS:
25  *	Ricky Yeung (Ricky.Yeung@eng.sun.com)
26  *	Fung F. Lee (lee@umunhum.stanford.edu)
27  *
28  * ACKNOWLEDGEMENT:
29  *	Thanks to Mr. Edmund Lai (lai@apple.com) for providing most of
30  *	the mapping data for the less-frequently-used hanzi of GB.
31  *
32  * DISTRIBUTION:
33  *
34  *	This program and the table file are NOT in the public domain.
35  *	All Rights Reserved.
36  *
37  *	You may copy and distribute verbatim copies of hc source code
38  *	files, table file(s), and documentation files as you receive it
39  *	for non-commercial purposes.
40  *
41  *	If you wish to incorporate parts of hc into other programs,
42  *	write to the authors.  We have not yet worked out a simple rule
43  *	that can be stated here, but we will often permit this.
44  *
45  *	This software is provided "as is" without warranty of any kind,
46  *	either expressed or implied, including, but not limited to,
47  *	the implied warranty of fitness for a particular purpose.
48  *
49  * DISCLAIMER
50  *
51  *	This software has no connection with our employers.
52  *
53  */
54 
55 #include <stdio.h>
56 #include <ctype.h>
57 #include <stdlib.h>
58 #include "hclib.h"
59 
60 #define GBfirst	 0xA1A1	/* first code of GB */
61 #define GBlast	 0xFEFE	/* last code of GB */
62 #define GBsize	 0x5E5E	/* GBlast - GBfirst + 1 */
63 #define BIGfirst 0xA140	/* first code of BIG */
64 #define BIGlast	 0xF9FE	/* last code of BIG */
65 #define	BIGsize	 0x58BF	/* BIGlast - BIGfirst + 1 */
66 
67 #define GBbox	0xA1F5	/* GB code for the empty box symbol */
68 #define BIGbox	0xA1BC	/* BIG code for the empty box symbol */
69 
70 #define BUFSIZE 256	/* Buffer size for each table entry. */
71 
72 #define DB(hi,lo)	(((hi)&0xFF) << 8 | (lo)&0xFF)
73 #define inGBrange(x)	(((x)>=GBfirst) && ((x)<=GBlast))
74 #define inBIGrange(x)	(((x)>=BIGfirst) && ((x)<=BIGlast))
75 
76 /* Code mapping tables. */
77 static u_int16 BtoG[BIGsize], GtoB[GBsize];
78 
79 /* Arrays to store multiple mapping codes.  */
80 static u_int16 *mBtoG[BIGsize], *mGtoB[GBsize];
81 
82 static u_int16 b5_default_code = BIGbox;
83 static u_int16 gb_default_code = GBbox;
84 
hc_set_default_code(mode,code)85 u_int16 hc_set_default_code(mode, code)
86     int mode;
87     u_int16 code;
88 {
89     u_int16 result;
90 
91     if (mode==HC_GBtoBIG)
92     {
93 	result = gb_default_code;
94 	gb_default_code = code;
95     }
96     else if (mode==HC_BIGtoGB)
97     {
98 	result = b5_default_code;
99 	b5_default_code = code;
100     }
101     return result;
102 }
103 
hc_clear_tabs()104 void hc_clear_tabs()
105 {
106     register int i;
107 
108     for (i=0; i<BIGsize; i++)
109     {
110 	mBtoG[i] = NULL;
111 	BtoG[i] = 0;
112     }
113     for (i=0; i<GBsize; i++)
114     {
115 	mGtoB[i] = NULL;
116 	GtoB[i] = 0;
117     }
118 }
119 
120 
hc_clear_tab_entry(mode,code)121 void hc_clear_tab_entry(mode, code)
122     int mode;
123     u_int16 code;
124 {
125     int i;
126 
127     if (mode==HC_GBtoBIG)
128     {
129 	i = code - GBfirst;
130 	if (mGtoB[i])
131 	    free(mGtoB[i]);
132 	mGtoB[i] = NULL;
133 	GtoB[i] = 0;
134     }
135     else if (mode==HC_BIGtoGB)
136     {
137 	i = code - BIGfirst;
138 	if (mBtoG[i])
139 	    free(mBtoG[i]);
140 	mBtoG[i] = NULL;
141 	BtoG[i] = 0;
142     }
143 }
144 
145 
146 /* Add a code to the table or the multiple mapping table. */
add(a1,am,i,code)147 static void add(a1, am, i, code)
148     u_int16 *a1, **am, i, code;
149 {
150     int n = 0;
151     u_int16 x;
152 
153     if (!a1[i]) a1[i] = code;		/* no code, just add it */
154     else if (a1[i]==code) return;	/* already there, return */
155     else if (am[i])			/* already has multiple mappings */
156     {
157 	/* Check multiple mapping list, if there, return. */
158 	while ((x = am[i][n]))
159 	{
160 	    if (x==code) return;
161 	    else n++;
162 	}
163 
164 	/* Append to multiple mapping list, expand the array.
165 	   After the above check, n now contains the number of mappings
166 	   in the array, not counting the terminating zero.
167 	   Needs two extra spaces, one for the terminating 0. */
168 	am[i] = (u_int16 *) realloc(am[i], sizeof(u_int16) * (n+2));
169 	am[i][n] = code;
170 	am[i][n + 1] = 0;
171     }
172     else
173     {
174 	/* First multiple mapping, allocate new list.
175 	   Needs two spaces, one for the terminating 0. */
176 	am[i] = (u_int16 *) malloc(2 * sizeof(u_int16));
177 	am[i][0] = code;
178 	am[i][1] = 0;
179     }
180 }
181 
182 
183 /* Process the mapping entry line.  */
do_line(lcnt,buffer)184 static int do_line (lcnt, buffer)
185     long lcnt;
186     char *buffer;
187 {
188     int c1 = buffer[0], c2 = buffer[1];
189     int i = 2, total = 0;
190     u_int16 gb_code = DB(c1,c2), big_code;
191 
192     if (!inGBrange(gb_code))
193     {
194 	fprintf(stderr, "Invalid GB code in line %ld\n", lcnt);
195 	return(0);
196     }
197     while ((c1 = buffer[i++]))
198     {
199 	c2 = buffer[i++];
200 	if (!(c1&&c2)) break;
201 	big_code = DB(c1,c2);
202 	if (!inBIGrange(big_code))
203 	{
204 	    fprintf(stderr, "Invalid BIG5 code in line %ld\n", lcnt);
205 	    return(0);
206 	}
207 	add(GtoB, mGtoB, gb_code - GBfirst, big_code);
208 	add(BtoG, mBtoG, big_code - BIGfirst, gb_code);
209 	total++;
210     }
211     return(total);
212 }
213 
214 
hc_readtab(fn)215 long  hc_readtab(fn)
216 char *fn;
217 {
218     static char buffer[BUFSIZE];
219 
220     long total = 0;
221     long lcnt = 0;
222     FILE *fp = fopen(fn,"r");
223 
224     if (!fp)
225     {
226 	fprintf(stderr, "can't open table file: %s\n", fn);
227 	return(-1);
228     }
229 
230     for (;;)
231     {
232 	if (!fgets(buffer, BUFSIZE, fp)) break;
233 	if (HC_ISFIRSTBYTE(buffer[0]))
234 	    total += do_line(lcnt, buffer);
235 	lcnt++;
236     }
237     fclose(fp);
238     return(total);
239 }
240 
hc_add_tab_entry(mode,code,mapping)241 void hc_add_tab_entry(mode, code, mapping)
242     int mode;
243     u_int16 code, mapping;
244 {
245     if (mode==HC_GBtoBIG)
246 	add(GtoB, mGtoB, code - GBfirst, mapping);
247     else if (mode==HC_BIGtoGB)
248 	add(BtoG, mBtoG, code - BIGfirst, mapping);
249 }
250 
251 /*
252   Look up the code in the single/multiple mapping table for index i,
253   and put the result in the result array of size n.
254 */
cvrt(a1,am,i,result,n)255 static int cvrt(a1, am, i, result, n)
256     u_int16 *a1, **am, i, *result;
257 {
258     int k = 0;
259     u_int16 x, codeDes = a1[i];
260 
261     if (codeDes == 0) return(0);
262     result[0] = codeDes;
263     if (am[i])
264 	while ((x = am[i][k]))
265 	{
266 	    if (k>=n) break;
267 	    result[++k] = x;
268 	}
269     return k + 1;
270 }
271 
272 
hc_convert(mode,codeSrc,result,n)273 int hc_convert(mode, codeSrc, result, n)
274     int mode;
275     u_int16 codeSrc, *result;
276     int n;
277 {
278     if (n<=0) return -2;
279      if (mode == HC_GBtoBIG)
280     {
281 	result[0] = b5_default_code;
282 	if (inGBrange(codeSrc))
283 	    return cvrt(GtoB, mGtoB, codeSrc - GBfirst, result, n);
284     }
285     else if (mode == HC_BIGtoGB)
286     {
287 	result[0] = gb_default_code;
288 	if (inBIGrange(codeSrc))
289 	    return cvrt(BtoG, mBtoG, codeSrc - BIGfirst, result, n);
290     }
291     return(-1);		/* unconverted due to error */
292 }
293 
294 
hc_convert_fp(ifp,ofp,mode,do_mult)295 int hc_convert_fp(ifp, ofp, mode, do_mult)
296     FILE *ifp, *ofp;
297     int mode, do_mult;
298 {
299     int c1, c2, n;
300     long unconverted = 0;
301     u_int16 result[BUFSIZE];
302 
303     while ((c1=fgetc(ifp))!=EOF)
304     {
305 	if (!HC_ISFIRSTBYTE(c1)) fputc(c1, ofp);
306 	else
307 	{
308 	    c2 = fgetc(ifp);
309 	    if ((n=hc_convert(mode, DB(c1, c2), result, BUFSIZE))<=0)
310 		++unconverted;
311 	    if ((n<=1) || (do_mult==HC_DO_SINGLE) ||
312 		((do_mult==HC_DO_ALL_BUT_SYMBOLS) &&
313 		 (((mode == HC_GBtoBIG) && (HC_IS_GB_SYMBOL(DB(c1,c2)))) ||
314 		  ((mode == HC_BIGtoGB) && (HC_IS_BIG_SYMBOL(DB(c1,c2)))))))
315 	    {
316 		fputc(HC_HB(result[0]), ofp);
317 		fputc(HC_LB(result[0]), ofp);
318 	    }
319 	    else
320 	    {
321 		fprintf(ofp, "<<");
322 		for (c1=0; c1<n; c1++)
323 		{
324 		    fputc(HC_HB(result[c1]), ofp);
325 		    fputc(HC_LB(result[c1]), ofp);
326 		}
327 		fprintf(ofp, ">>");
328 	    }
329 	}
330     }
331     return(unconverted);
332 }
333 
334 
hc_convert1(mode,code)335 u_int16 hc_convert1(mode, code)
336     int mode;
337     u_int16 code;
338 {
339     u_int16 result[BUFSIZE];
340 
341     hc_convert(mode, code, result, BUFSIZE);
342     return(result[0]);
343 }
344 
345 
346