1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 1995, by Sun Microsystems, Inc.
24  * All rights reserved.
25  */
26 
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <errno.h>
30 #include "cns11643_big5.h"	/* CNS 11643 to Big-5 mapping table */
31 
32 #define	MSB	0x80	/* most significant bit */
33 #define	MBYTE	0x8e	/* multi-byte (4 byte character) */
34 #define	PMASK	0xa0	/* plane number mask */
35 #define ONEBYTE	0xff	/* right most byte */
36 #define MSB_OFF	0x7f	/* mask off MBS */
37 
38 #define NON_ID_CHAR '_'	/* non-identified character */
39 
40 typedef struct _icv_state {
41 	char	keepc[4];	/* maximum # byte of CNS11643 code */
42 	short	cstate;		/* state machine id */
43 	int	_errno;		/* internal errno */
44 } _iconv_st;
45 
46 enum _CSTATE	{ C0, C1, C2, C3 };
47 
48 
49 static int get_plane_no_by_char(const char);
50 static int cns_to_big5(int, char[], char*, size_t);
51 static int binsearch(unsigned long, table_t[], int);
52 
53 
54 /*
55  * Open; called from iconv_open()
56  */
57 void *
_icv_open()58 _icv_open()
59 {
60 	_iconv_st *st;
61 
62 	if ((st = (_iconv_st *)malloc(sizeof(_iconv_st))) == NULL) {
63 		errno = ENOMEM;
64 		return ((void *) -1);
65 	}
66 
67 	st->cstate = C0;
68 	st->_errno = 0;
69 
70 #ifdef DEBUG
71     fprintf(stderr, "==========     iconv(): CNS11643 --> Big-5     ==========\n");
72 #endif
73 
74 	return ((void *) st);
75 }
76 
77 
78 /*
79  * Close; called from iconv_close()
80  */
81 void
_icv_close(_iconv_st * st)82 _icv_close(_iconv_st *st)
83 {
84 	if (!st)
85 		errno = EBADF;
86 	else
87 		free(st);
88 }
89 
90 
91 /*
92  * Actual conversion; called from iconv()
93  */
94 /*=======================================================
95  *
96  *   State Machine for interpreting CNS 11643 code
97  *
98  *=======================================================
99  *
100  *                          plane 2 - 16
101  *                1st C         2nd C       3rd C
102  *    +------> C0 -----> C1 -----------> C2 -----> C3
103  *    |  ascii |  plane 1 |                   4th C |
104  *    ^        v  2nd C   v                         v
105  *    +----<---+-----<----+-------<---------<-------+
106  *
107  *=======================================================*/
108 size_t
_icv_iconv(_iconv_st * st,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)109 _icv_iconv(_iconv_st *st, char **inbuf, size_t *inbytesleft,
110 				char **outbuf, size_t *outbytesleft)
111 {
112 	int		plane_no = -1, n;
113 
114 	if (st == NULL) {
115 		errno = EBADF;
116 		return ((size_t) -1);
117 	}
118 
119 	if (inbuf == NULL || *inbuf == NULL) { /* Reset request. */
120 		st->cstate = C0;
121 		st->_errno = 0;
122 		return ((size_t) 0);
123 	}
124 
125 #ifdef DEBUG
126     fprintf(stderr, "=== (Re-entry)   iconv(): CNS 11643 --> Big-5 ===\n");
127 #endif
128 	st->_errno = 0;         /* reset internal errno */
129 	errno = 0;		/* reset external errno */
130 
131 	/* a state machine for interpreting CNS 11643 code */
132 	while (*inbytesleft > 0 && *outbytesleft > 0) {
133 		switch (st->cstate) {
134 		case C0:		/* assuming ASCII in the beginning */
135 			if (**inbuf & MSB) {
136 				st->keepc[0] = (**inbuf);
137 				st->cstate = C1;
138 			} else {	/* real ASCII */
139 				**outbuf = **inbuf;
140 				(*outbuf)++;
141 				(*outbytesleft)--;
142 			}
143 			break;
144 		case C1:		/* Chinese characters: 2nd byte */
145 			if ((st->keepc[0] & ONEBYTE) == MBYTE) { /* 4-byte (0x8e) */
146 				plane_no = get_plane_no_by_char(**inbuf);
147 				if (plane_no == -1) {	/* illegal plane */
148 					st->_errno = errno = EILSEQ;
149 				} else {	/* 4-byte Chinese character */
150 					st->keepc[1] = (**inbuf);
151 					st->cstate = C2;
152 				}
153 			} else {	/* 2-byte Chinese character - plane #1 */
154 				if (**inbuf & MSB) {	/* plane #1 */
155 					st->keepc[1] = (**inbuf);
156 					st->keepc[2] = st->keepc[3] = '\0';
157 					n = cns_to_big5(1, st->keepc, *outbuf,
158 							*outbytesleft);
159 					if (n > 0) {
160 						(*outbuf) += n;
161 						(*outbytesleft) -= n;
162 
163 						st->cstate = C0;
164 					} else {	/* don't reset state */
165 						st->_errno = errno = E2BIG;
166 					}
167 				} else {	/* input char doesn't belong
168 						 * to the input code set
169 						 */
170 					st->_errno = errno = EILSEQ;
171 				}
172 			}
173 			break;
174 		case C2:	/* plane #2 - #16 (4 bytes): get 3nd byte */
175 			if (**inbuf & MSB) {	/* 3rd byte */
176 				st->keepc[2] = (**inbuf);
177 				st->cstate = C3;
178 			} else {
179 				st->_errno = errno = EILSEQ;
180 			}
181 			break;
182 		case C3:	/* plane #2 - #16 (4 bytes): get 4th byte */
183 			if (**inbuf & MSB) {	/* 4th byte */
184 				st->keepc[3] = (**inbuf);
185 				n = cns_to_big5(plane_no, st->keepc, *outbuf,
186 						*outbytesleft );
187 				if (n > 0) {
188 					(*outbuf) += n;
189 					(*outbytesleft) -= n;
190 
191 					st->cstate = C0;	/* reset state */
192 				} else {	/* don't reset state */
193 					st->_errno = errno = E2BIG;
194 				}
195 			} else {
196 				st->_errno = errno = EILSEQ;
197 			}
198 			break;
199 		default:			/* should never come here */
200 			st->_errno = errno = EILSEQ;
201 			st->cstate = C0;	/* reset state */
202 			break;
203 		}
204 
205 		if (st->_errno) {
206 #ifdef DEBUG
207     fprintf(stderr, "!!!!!\tst->_errno = %d\tst->cstate = %d\n",
208 		st->_errno, st->cstate);
209 #endif
210 			break;
211 		}
212 
213 		(*inbuf)++;
214 		(*inbytesleft)--;
215 	}
216 
217         if (errno) return ((size_t) -1);
218 
219         if (*inbytesleft == 0 && st->cstate != C0) {
220                 errno = EINVAL;
221                 return ((size_t) -1);
222         }
223 
224 	if (*inbytesleft > 0 && *outbytesleft == 0) {
225 		errno = E2BIG;
226 		return((size_t)-1);
227 	}
228 	return (*inbytesleft);
229 }
230 
231 
232 /*
233  * Get plane number by char; i.e. 0xa2 returns 2, 0xae returns 14, etc.
234  * Returns -1 on error conditions
235  */
get_plane_no_by_char(const char inbuf)236 static int get_plane_no_by_char(const char inbuf)
237 {
238 	int ret;
239 	unsigned char uc = (unsigned char) inbuf;
240 
241 	ret = uc - PMASK;
242 	switch (ret) {
243 	case 1:		/* 0x8EA1 */
244 	case 2:		/* 0x8EA2 */
245 	case 3:		/* 0x8EA3 */
246 	case 4:		/* 0x8EA4 */
247 	case 5:		/* 0x8EA5 */
248 	case 6:		/* 0x8EA6 */
249 	case 7:		/* 0x8EA7 */
250 	case 12:	/* 0x8EAC */
251 	case 14:	/* 0x8EAE */
252 	case 15:	/* 0x8EAF */
253 	case 16:	/* 0x8EB0 */
254 		return (ret);
255 	default:
256 		return (-1);
257 	}
258 }
259 
260 
261 /*
262  * CNS 11643 code --> Big-5
263  * Return: > 0 - converted with enough space in output buffer
264  *         = 0 - no space in outbuf
265  */
cns_to_big5(int plane_no,char keepc[],char * buf,size_t buflen)266 static int cns_to_big5(int plane_no, char keepc[], char *buf, size_t buflen)
267 {
268 	char		cns_str[3];
269 	unsigned long	cns_val;	/* MSB mask off CNS 11643 value */
270 	int		unidx;		/* binary search index */
271 	unsigned long	big5_val, val;	/* Big-5 code */
272 
273 #ifdef DEBUG
274     fprintf(stderr, "%s %d ", keepc, plane_no);
275 #endif
276 	if (buflen < 2) {
277 		errno = E2BIG;
278 		return(0);
279 	}
280 
281 	if (plane_no == 1) {
282 		cns_str[0] = keepc[0] & MSB_OFF;
283 		cns_str[1] = keepc[1] & MSB_OFF;
284 	} else {
285 		cns_str[0] = keepc[2] & MSB_OFF;
286 		cns_str[1] = keepc[3] & MSB_OFF;
287 	}
288 	cns_val = (cns_str[0] << 8) + cns_str[1];
289 #ifdef DEBUG
290     fprintf(stderr, "%x\t", cns_val);
291 #endif
292 
293 	switch (plane_no) {
294 	case 1:
295 		unidx = binsearch(cns_val, cns_big5_tab1, MAX_CNS1_NUM);
296 		if (unidx >= 0)
297 			big5_val = cns_big5_tab1[unidx].value;
298 		break;
299 	case 2:
300 		unidx = binsearch(cns_val, cns_big5_tab2, MAX_CNS2_NUM);
301 		if (unidx >= 0)
302 			big5_val = cns_big5_tab2[unidx].value;
303 		break;
304 	case 3:
305 		unidx = binsearch(cns_val, cns_big5_tab3, MAX_CNS3_NUM);
306 		if (unidx >= 0)
307 			big5_val = cns_big5_tab3[unidx].value;
308 		break;
309 	default:
310 		unidx = -1;	/* no mapping from CNS to Big-5 */
311 		break;
312 	}
313 
314 #ifdef DEBUG
315     fprintf(stderr, "unidx = %d, value = %x\t", unidx, big5_val);
316 #endif
317 
318 	if (unidx < 0) {	/* no match from CNS to Big-5 */
319 		*buf = *(buf+1) = NON_ID_CHAR;
320 	} else {
321 		val = big5_val & 0xffff;
322 		*buf = (char) ((val & 0xff00) >> 8);
323 		*(buf+1) = (char) (val & 0xff);
324 	}
325 
326 #ifdef DEBUG
327     fprintf(stderr, "\t->%x %x<-\n", *buf, *(buf+1));
328 #endif
329 
330 	return(2);
331 }
332 
333 
334 /* binsearch: find x in v[0] <= v[1] <= ... <= v[n-1] */
binsearch(unsigned long x,table_t v[],int n)335 static int binsearch(unsigned long x, table_t v[], int n)
336 {
337 	int low, high, mid;
338 
339 	low = 0;
340 	high = n - 1;
341 	while (low <= high) {
342 		mid = (low + high) / 2;
343 		if (x < v[mid].key)
344 			high = mid - 1;
345 		else if (x > v[mid].key)
346 			low = mid + 1;
347 		else	/* found match */
348 			return mid;
349 	}
350 	return (-1);	/* no match */
351 }
352