1 /*
2 * NAME:
3 * hclib.c - Hanzi Converter Version 3.0 library implementation file
4 * Copyright (C) 1988,1989,1990,1993 by Fung F. Lee & Ricky Yeung
5 *
6 * DESCRIPTION:
7 * hc converts a GB file to a BIG-5 file, or a BIG-5 file to a GB file.
8 * GB (GuoBiao) refers to the standard implementation of GB2312-80
9 * of Mainland China, in which the two bytes representing a GB code
10 * have their most significant bit set to 1. BIG-5 refers to the Big
11 * Five standard published in 1984 by Taiwan's Institute for Information
12 * Industry. Currently, most popular Chinese systems use either
13 * GB or BIG-5.
14 *
15 * API:
16 * See hclib.h.
17 *
18 * FORMAT OF THE CONVERSION TABLE FILE
19 * The table file contains newline-terminated mapping entries.
20 * Each entry is a two-byte GB code followed by a list of two-byte
21 * BIG5 codes. Each entry line cannot exceed BUFSIZE characters.
22 * See hc.tab. Users may build their own separate table files.
23 *
24 * AUTHORS:
25 * Ricky Yeung (Ricky.Yeung@eng.sun.com)
26 * Fung F. Lee (lee@umunhum.stanford.edu)
27 *
28 * ACKNOWLEDGEMENT:
29 * Thanks to Mr. Edmund Lai (lai@apple.com) for providing most of
30 * the mapping data for the less-frequently-used hanzi of GB.
31 *
32 * DISTRIBUTION:
33 *
34 * This program and the table file are NOT in the public domain.
35 * All Rights Reserved.
36 *
37 * You may copy and distribute verbatim copies of hc source code
38 * files, table file(s), and documentation files as you receive it
39 * for non-commercial purposes.
40 *
41 * If you wish to incorporate parts of hc into other programs,
42 * write to the authors. We have not yet worked out a simple rule
43 * that can be stated here, but we will often permit this.
44 *
45 * This software is provided "as is" without warranty of any kind,
46 * either expressed or implied, including, but not limited to,
47 * the implied warranty of fitness for a particular purpose.
48 *
49 * DISCLAIMER
50 *
51 * This software has no connection with our employers.
52 *
53 */
54
55 #include <stdio.h>
56 #include <ctype.h>
57 #include <stdlib.h>
58 #include "hclib.h"
59
60 #define GBfirst 0xA1A1 /* first code of GB */
61 #define GBlast 0xFEFE /* last code of GB */
62 #define GBsize 0x5E5E /* GBlast - GBfirst + 1 */
63 #define BIGfirst 0xA140 /* first code of BIG */
64 #define BIGlast 0xF9FE /* last code of BIG */
65 #define BIGsize 0x58BF /* BIGlast - BIGfirst + 1 */
66
67 #define GBbox 0xA1F5 /* GB code for the empty box symbol */
68 #define BIGbox 0xA1BC /* BIG code for the empty box symbol */
69
70 #define BUFSIZE 256 /* Buffer size for each table entry. */
71
72 #define DB(hi,lo) (((hi)&0xFF) << 8 | (lo)&0xFF)
73 #define inGBrange(x) (((x)>=GBfirst) && ((x)<=GBlast))
74 #define inBIGrange(x) (((x)>=BIGfirst) && ((x)<=BIGlast))
75
76 /* Code mapping tables. */
77 static u_int16 BtoG[BIGsize], GtoB[GBsize];
78
79 /* Arrays to store multiple mapping codes. */
80 static u_int16 *mBtoG[BIGsize], *mGtoB[GBsize];
81
82 static u_int16 b5_default_code = BIGbox;
83 static u_int16 gb_default_code = GBbox;
84
hc_set_default_code(mode,code)85 u_int16 hc_set_default_code(mode, code)
86 int mode;
87 u_int16 code;
88 {
89 u_int16 result;
90
91 if (mode==HC_GBtoBIG)
92 {
93 result = gb_default_code;
94 gb_default_code = code;
95 }
96 else if (mode==HC_BIGtoGB)
97 {
98 result = b5_default_code;
99 b5_default_code = code;
100 }
101 return result;
102 }
103
hc_clear_tabs()104 void hc_clear_tabs()
105 {
106 register int i;
107
108 for (i=0; i<BIGsize; i++)
109 {
110 mBtoG[i] = NULL;
111 BtoG[i] = 0;
112 }
113 for (i=0; i<GBsize; i++)
114 {
115 mGtoB[i] = NULL;
116 GtoB[i] = 0;
117 }
118 }
119
120
hc_clear_tab_entry(mode,code)121 void hc_clear_tab_entry(mode, code)
122 int mode;
123 u_int16 code;
124 {
125 int i;
126
127 if (mode==HC_GBtoBIG)
128 {
129 i = code - GBfirst;
130 if (mGtoB[i])
131 free(mGtoB[i]);
132 mGtoB[i] = NULL;
133 GtoB[i] = 0;
134 }
135 else if (mode==HC_BIGtoGB)
136 {
137 i = code - BIGfirst;
138 if (mBtoG[i])
139 free(mBtoG[i]);
140 mBtoG[i] = NULL;
141 BtoG[i] = 0;
142 }
143 }
144
145
146 /* Add a code to the table or the multiple mapping table. */
add(a1,am,i,code)147 static void add(a1, am, i, code)
148 u_int16 *a1, **am, i, code;
149 {
150 int n = 0;
151 u_int16 x;
152
153 if (!a1[i]) a1[i] = code; /* no code, just add it */
154 else if (a1[i]==code) return; /* already there, return */
155 else if (am[i]) /* already has multiple mappings */
156 {
157 /* Check multiple mapping list, if there, return. */
158 while ((x = am[i][n]))
159 {
160 if (x==code) return;
161 else n++;
162 }
163
164 /* Append to multiple mapping list, expand the array.
165 After the above check, n now contains the number of mappings
166 in the array, not counting the terminating zero.
167 Needs two extra spaces, one for the terminating 0. */
168 am[i] = (u_int16 *) realloc(am[i], sizeof(u_int16) * (n+2));
169 am[i][n] = code;
170 am[i][n + 1] = 0;
171 }
172 else
173 {
174 /* First multiple mapping, allocate new list.
175 Needs two spaces, one for the terminating 0. */
176 am[i] = (u_int16 *) malloc(2 * sizeof(u_int16));
177 am[i][0] = code;
178 am[i][1] = 0;
179 }
180 }
181
182
183 /* Process the mapping entry line. */
do_line(lcnt,buffer)184 static int do_line (lcnt, buffer)
185 long lcnt;
186 char *buffer;
187 {
188 int c1 = buffer[0], c2 = buffer[1];
189 int i = 2, total = 0;
190 u_int16 gb_code = DB(c1,c2), big_code;
191
192 if (!inGBrange(gb_code))
193 {
194 fprintf(stderr, "Invalid GB code in line %ld\n", lcnt);
195 return(0);
196 }
197 while ((c1 = buffer[i++]))
198 {
199 c2 = buffer[i++];
200 if (!(c1&&c2)) break;
201 big_code = DB(c1,c2);
202 if (!inBIGrange(big_code))
203 {
204 fprintf(stderr, "Invalid BIG5 code in line %ld\n", lcnt);
205 return(0);
206 }
207 add(GtoB, mGtoB, gb_code - GBfirst, big_code);
208 add(BtoG, mBtoG, big_code - BIGfirst, gb_code);
209 total++;
210 }
211 return(total);
212 }
213
214
hc_readtab(fn)215 long hc_readtab(fn)
216 char *fn;
217 {
218 static char buffer[BUFSIZE];
219
220 long total = 0;
221 long lcnt = 0;
222 FILE *fp = fopen(fn,"r");
223
224 if (!fp)
225 {
226 fprintf(stderr, "can't open table file: %s\n", fn);
227 return(-1);
228 }
229
230 for (;;)
231 {
232 if (!fgets(buffer, BUFSIZE, fp)) break;
233 if (HC_ISFIRSTBYTE(buffer[0]))
234 total += do_line(lcnt, buffer);
235 lcnt++;
236 }
237 fclose(fp);
238 return(total);
239 }
240
hc_add_tab_entry(mode,code,mapping)241 void hc_add_tab_entry(mode, code, mapping)
242 int mode;
243 u_int16 code, mapping;
244 {
245 if (mode==HC_GBtoBIG)
246 add(GtoB, mGtoB, code - GBfirst, mapping);
247 else if (mode==HC_BIGtoGB)
248 add(BtoG, mBtoG, code - BIGfirst, mapping);
249 }
250
251 /*
252 Look up the code in the single/multiple mapping table for index i,
253 and put the result in the result array of size n.
254 */
cvrt(a1,am,i,result,n)255 static int cvrt(a1, am, i, result, n)
256 u_int16 *a1, **am, i, *result;
257 {
258 int k = 0;
259 u_int16 x, codeDes = a1[i];
260
261 if (codeDes == 0) return(0);
262 result[0] = codeDes;
263 if (am[i])
264 while ((x = am[i][k]))
265 {
266 if (k>=n) break;
267 result[++k] = x;
268 }
269 return k + 1;
270 }
271
272
hc_convert(mode,codeSrc,result,n)273 int hc_convert(mode, codeSrc, result, n)
274 int mode;
275 u_int16 codeSrc, *result;
276 int n;
277 {
278 if (n<=0) return -2;
279 if (mode == HC_GBtoBIG)
280 {
281 result[0] = b5_default_code;
282 if (inGBrange(codeSrc))
283 return cvrt(GtoB, mGtoB, codeSrc - GBfirst, result, n);
284 }
285 else if (mode == HC_BIGtoGB)
286 {
287 result[0] = gb_default_code;
288 if (inBIGrange(codeSrc))
289 return cvrt(BtoG, mBtoG, codeSrc - BIGfirst, result, n);
290 }
291 return(-1); /* unconverted due to error */
292 }
293
294
hc_convert_fp(ifp,ofp,mode,do_mult)295 int hc_convert_fp(ifp, ofp, mode, do_mult)
296 FILE *ifp, *ofp;
297 int mode, do_mult;
298 {
299 int c1, c2, n;
300 long unconverted = 0;
301 u_int16 result[BUFSIZE];
302
303 while ((c1=fgetc(ifp))!=EOF)
304 {
305 if (!HC_ISFIRSTBYTE(c1)) fputc(c1, ofp);
306 else
307 {
308 c2 = fgetc(ifp);
309 if ((n=hc_convert(mode, DB(c1, c2), result, BUFSIZE))<=0)
310 ++unconverted;
311 if ((n<=1) || (do_mult==HC_DO_SINGLE) ||
312 ((do_mult==HC_DO_ALL_BUT_SYMBOLS) &&
313 (((mode == HC_GBtoBIG) && (HC_IS_GB_SYMBOL(DB(c1,c2)))) ||
314 ((mode == HC_BIGtoGB) && (HC_IS_BIG_SYMBOL(DB(c1,c2)))))))
315 {
316 fputc(HC_HB(result[0]), ofp);
317 fputc(HC_LB(result[0]), ofp);
318 }
319 else
320 {
321 fprintf(ofp, "<<");
322 for (c1=0; c1<n; c1++)
323 {
324 fputc(HC_HB(result[c1]), ofp);
325 fputc(HC_LB(result[c1]), ofp);
326 }
327 fprintf(ofp, ">>");
328 }
329 }
330 }
331 return(unconverted);
332 }
333
334
hc_convert1(mode,code)335 u_int16 hc_convert1(mode, code)
336 int mode;
337 u_int16 code;
338 {
339 u_int16 result[BUFSIZE];
340
341 hc_convert(mode, code, result, BUFSIZE);
342 return(result[0]);
343 }
344
345
346