1*16d86563SAlexander Pyhalov /*
2*16d86563SAlexander Pyhalov  * CDDL HEADER START
3*16d86563SAlexander Pyhalov  *
4*16d86563SAlexander Pyhalov  * The contents of this file are subject to the terms of the
5*16d86563SAlexander Pyhalov  * Common Development and Distribution License (the "License").
6*16d86563SAlexander Pyhalov  * You may not use this file except in compliance with the License.
7*16d86563SAlexander Pyhalov  *
8*16d86563SAlexander Pyhalov  * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9*16d86563SAlexander Pyhalov  * or http://www.opensolaris.org/os/licensing.
10*16d86563SAlexander Pyhalov  * See the License for the specific language governing permissions
11*16d86563SAlexander Pyhalov  * and limitations under the License.
12*16d86563SAlexander Pyhalov  *
13*16d86563SAlexander Pyhalov  * When distributing Covered Code, include this CDDL HEADER in each
14*16d86563SAlexander Pyhalov  * file and include the License file at src/OPENSOLARIS.LICENSE.
15*16d86563SAlexander Pyhalov  * If applicable, add the following below this CDDL HEADER, with the
16*16d86563SAlexander Pyhalov  * fields enclosed by brackets "[]" replaced with your own identifying
17*16d86563SAlexander Pyhalov  * information: Portions Copyright [yyyy] [name of copyright owner]
18*16d86563SAlexander Pyhalov  *
19*16d86563SAlexander Pyhalov  * CDDL HEADER END
20*16d86563SAlexander Pyhalov  */
21*16d86563SAlexander Pyhalov 
22*16d86563SAlexander Pyhalov /*
23*16d86563SAlexander Pyhalov  * Copyright (c) 1995, by Sun Microsystems, Inc.
24*16d86563SAlexander Pyhalov  * All rights reserved.
25*16d86563SAlexander Pyhalov  */
26*16d86563SAlexander Pyhalov 
27*16d86563SAlexander Pyhalov #include <stdio.h>
28*16d86563SAlexander Pyhalov #include <stdlib.h>
29*16d86563SAlexander Pyhalov #include <sys/types.h>
30*16d86563SAlexander Pyhalov #include <sys/isa_defs.h>
31*16d86563SAlexander Pyhalov #include <errno.h>
32*16d86563SAlexander Pyhalov #include "common_defs.h"
33*16d86563SAlexander Pyhalov #include "big5_unicode.h"	/* Big-5 to Unicode mapping table */
34*16d86563SAlexander Pyhalov 
35*16d86563SAlexander Pyhalov #define	MSB	0x80	/* most significant bit */
36*16d86563SAlexander Pyhalov #define	MBYTE	0x8e	/* multi-byte (4 byte character) */
37*16d86563SAlexander Pyhalov #define	PMASK	0xa0	/* plane number mask */
38*16d86563SAlexander Pyhalov #define ONEBYTE	0xff	/* right most byte */
39*16d86563SAlexander Pyhalov 
40*16d86563SAlexander Pyhalov /* non-identified character */
41*16d86563SAlexander Pyhalov #define UTF8_NON_ID_CHAR1 0xEF
42*16d86563SAlexander Pyhalov #define UTF8_NON_ID_CHAR2 0xBF
43*16d86563SAlexander Pyhalov #define UTF8_NON_ID_CHAR3 0xBD
44*16d86563SAlexander Pyhalov 
45*16d86563SAlexander Pyhalov 
46*16d86563SAlexander Pyhalov typedef struct  _icv_state {
47*16d86563SAlexander Pyhalov 	char	keepc[2];	/* maximum # byte of Big-5 code */
48*16d86563SAlexander Pyhalov 	short	cstate;		/* state machine id */
49*16d86563SAlexander Pyhalov 	int	_errno;		/* internal errno */
50*16d86563SAlexander Pyhalov         boolean little_endian;
51*16d86563SAlexander Pyhalov         boolean bom_written;
52*16d86563SAlexander Pyhalov }_iconv_st;
53*16d86563SAlexander Pyhalov 
54*16d86563SAlexander Pyhalov enum _CSTATE	{ C0, C1 };
55*16d86563SAlexander Pyhalov 
56*16d86563SAlexander Pyhalov static int big5_2nd_byte(char);
57*16d86563SAlexander Pyhalov static int big5_to_utf8(_iconv_st *, char*, size_t, int *);
58*16d86563SAlexander Pyhalov static int binsearch(unsigned long, big5_utf[], int);
59*16d86563SAlexander Pyhalov 
60*16d86563SAlexander Pyhalov 
61*16d86563SAlexander Pyhalov /*
62*16d86563SAlexander Pyhalov  * Open; called from iconv_open()
63*16d86563SAlexander Pyhalov  */
64*16d86563SAlexander Pyhalov void *
_icv_open()65*16d86563SAlexander Pyhalov _icv_open()
66*16d86563SAlexander Pyhalov {
67*16d86563SAlexander Pyhalov 	_iconv_st *st;
68*16d86563SAlexander Pyhalov 
69*16d86563SAlexander Pyhalov 	if ((st = (_iconv_st *)malloc(sizeof(_iconv_st))) == NULL) {
70*16d86563SAlexander Pyhalov 		errno = ENOMEM;
71*16d86563SAlexander Pyhalov 		return ((void *) -1);
72*16d86563SAlexander Pyhalov 	}
73*16d86563SAlexander Pyhalov 
74*16d86563SAlexander Pyhalov 	st->cstate = C0;
75*16d86563SAlexander Pyhalov 	st->_errno = 0;
76*16d86563SAlexander Pyhalov 	st->little_endian = false;
77*16d86563SAlexander Pyhalov 	st->bom_written = false;
78*16d86563SAlexander Pyhalov #if defined(UCS_2LE)
79*16d86563SAlexander Pyhalov 	st->little_endian = true;
80*16d86563SAlexander Pyhalov 	st->bom_written = true;
81*16d86563SAlexander Pyhalov #endif
82*16d86563SAlexander Pyhalov 	return ((void *) st);
83*16d86563SAlexander Pyhalov }
84*16d86563SAlexander Pyhalov 
85*16d86563SAlexander Pyhalov 
86*16d86563SAlexander Pyhalov /*
87*16d86563SAlexander Pyhalov  * Close; called from iconv_close()
88*16d86563SAlexander Pyhalov  */
89*16d86563SAlexander Pyhalov void
_icv_close(_iconv_st * st)90*16d86563SAlexander Pyhalov _icv_close(_iconv_st *st)
91*16d86563SAlexander Pyhalov {
92*16d86563SAlexander Pyhalov 	if (!st)
93*16d86563SAlexander Pyhalov 		errno = EBADF;
94*16d86563SAlexander Pyhalov 	else
95*16d86563SAlexander Pyhalov 		free(st);
96*16d86563SAlexander Pyhalov }
97*16d86563SAlexander Pyhalov 
98*16d86563SAlexander Pyhalov 
99*16d86563SAlexander Pyhalov /*
100*16d86563SAlexander Pyhalov  * Actual conversion; called from iconv()
101*16d86563SAlexander Pyhalov  */
102*16d86563SAlexander Pyhalov /*=======================================================
103*16d86563SAlexander Pyhalov  *
104*16d86563SAlexander Pyhalov  *   State Machine for interpreting Big-5 code
105*16d86563SAlexander Pyhalov  *
106*16d86563SAlexander Pyhalov  *=======================================================
107*16d86563SAlexander Pyhalov  *
108*16d86563SAlexander Pyhalov  *                     1st C
109*16d86563SAlexander Pyhalov  *    +--------> C0 ----------> C1
110*16d86563SAlexander Pyhalov  *    |    ascii |        2nd C |
111*16d86563SAlexander Pyhalov  *    ^          v              v
112*16d86563SAlexander Pyhalov  *    +----<-----+-----<--------+
113*16d86563SAlexander Pyhalov  *
114*16d86563SAlexander Pyhalov  *=======================================================*/
115*16d86563SAlexander Pyhalov /*
116*16d86563SAlexander Pyhalov  * Big-5 encoding range:
117*16d86563SAlexander Pyhalov  *	High byte: 0xA1 - 0xFE			(   94 encoding space)
118*16d86563SAlexander Pyhalov  *	Low byte:  0x40 - 0x7E, 0xA1 - 0xFE	(  157 encoding space)
119*16d86563SAlexander Pyhalov  *	Plane #1:  0xA140 - 0xC8FE		( 6280 encoding space)
120*16d86563SAlexander Pyhalov  *	Plane #2:  0xC940 - 0xFEFE		( 8478 encoding space)
121*16d86563SAlexander Pyhalov  *	Total:	   94 * 157 = 14,758		(14758 encoding space)
122*16d86563SAlexander Pyhalov  */
123*16d86563SAlexander Pyhalov size_t
_icv_iconv(_iconv_st * st,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)124*16d86563SAlexander Pyhalov _icv_iconv(_iconv_st *st, char **inbuf, size_t *inbytesleft,
125*16d86563SAlexander Pyhalov 				char **outbuf, size_t *outbytesleft)
126*16d86563SAlexander Pyhalov {
127*16d86563SAlexander Pyhalov 	int		n;
128*16d86563SAlexander Pyhalov 	int		uconv_num = 0;
129*16d86563SAlexander Pyhalov 
130*16d86563SAlexander Pyhalov #ifdef DEBUG
131*16d86563SAlexander Pyhalov     fprintf(stderr, "==========     iconv(): Big-5 --> UTF2     ==========\n");
132*16d86563SAlexander Pyhalov #endif
133*16d86563SAlexander Pyhalov 	if (st == NULL) {
134*16d86563SAlexander Pyhalov 		errno = EBADF;
135*16d86563SAlexander Pyhalov 		return ((size_t) -1);
136*16d86563SAlexander Pyhalov 	}
137*16d86563SAlexander Pyhalov 
138*16d86563SAlexander Pyhalov 	if (inbuf == NULL || *inbuf == NULL) { /* Reset request. */
139*16d86563SAlexander Pyhalov 		st->cstate = C0;
140*16d86563SAlexander Pyhalov 		st->_errno = 0;
141*16d86563SAlexander Pyhalov 		return ((size_t) 0);
142*16d86563SAlexander Pyhalov 	}
143*16d86563SAlexander Pyhalov 
144*16d86563SAlexander Pyhalov 	st->_errno = 0;         /* reset internal errno */
145*16d86563SAlexander Pyhalov 	errno = 0;		/* reset external errno */
146*16d86563SAlexander Pyhalov 
147*16d86563SAlexander Pyhalov 	/* a state machine for interpreting CNS 11643 code */
148*16d86563SAlexander Pyhalov 	while (*inbytesleft > 0 && *outbytesleft > 0) {
149*16d86563SAlexander Pyhalov 		switch (st->cstate) {
150*16d86563SAlexander Pyhalov 		case C0:		/* assuming ASCII in the beginning */
151*16d86563SAlexander Pyhalov 			if (**inbuf & MSB) {
152*16d86563SAlexander Pyhalov 				st->keepc[0] = (**inbuf);
153*16d86563SAlexander Pyhalov 				st->cstate = C1;
154*16d86563SAlexander Pyhalov 			} else {	/* real ASCII */
155*16d86563SAlexander Pyhalov 			  if (st->little_endian) {
156*16d86563SAlexander Pyhalov 			    if (!st->bom_written) {
157*16d86563SAlexander Pyhalov 			      if (*outbytesleft < 4)
158*16d86563SAlexander Pyhalov 				errno = E2BIG;
159*16d86563SAlexander Pyhalov 			      else {
160*16d86563SAlexander Pyhalov 				*(*outbuf)++ = (uchar_t)0xff;
161*16d86563SAlexander Pyhalov 				*(*outbuf)++ = (uchar_t)0xfe;
162*16d86563SAlexander Pyhalov 				*outbytesleft -= 2;
163*16d86563SAlexander Pyhalov 
164*16d86563SAlexander Pyhalov 				st->bom_written = true;
165*16d86563SAlexander Pyhalov 			      }
166*16d86563SAlexander Pyhalov 			    }
167*16d86563SAlexander Pyhalov 
168*16d86563SAlexander Pyhalov 			    if (*outbytesleft < 2)
169*16d86563SAlexander Pyhalov 			      return E2BIG;
170*16d86563SAlexander Pyhalov 			    else {
171*16d86563SAlexander Pyhalov 			      *(*outbuf)++ = **inbuf;
172*16d86563SAlexander Pyhalov 			      *(*outbuf)++ = (uchar_t)0x0;
173*16d86563SAlexander Pyhalov 			      *outbytesleft -= 2;
174*16d86563SAlexander Pyhalov 			    }
175*16d86563SAlexander Pyhalov 			  } else {
176*16d86563SAlexander Pyhalov 				**outbuf = **inbuf;
177*16d86563SAlexander Pyhalov 				(*outbuf)++;
178*16d86563SAlexander Pyhalov 				(*outbytesleft)--;
179*16d86563SAlexander Pyhalov 			  }
180*16d86563SAlexander Pyhalov 			}
181*16d86563SAlexander Pyhalov 			break;
182*16d86563SAlexander Pyhalov 		case C1:		/* Chinese characters: 2nd byte */
183*16d86563SAlexander Pyhalov 			if (big5_2nd_byte(**inbuf) == 0) {
184*16d86563SAlexander Pyhalov 				int uconv_num_internal = 0;
185*16d86563SAlexander Pyhalov 
186*16d86563SAlexander Pyhalov 				st->keepc[1] = (**inbuf);
187*16d86563SAlexander Pyhalov 				n = big5_to_utf8(st, *outbuf,
188*16d86563SAlexander Pyhalov 						*outbytesleft, &uconv_num_internal);
189*16d86563SAlexander Pyhalov 				if (n > 0) {
190*16d86563SAlexander Pyhalov 					(*outbuf) += n;
191*16d86563SAlexander Pyhalov 					(*outbytesleft) -= n;
192*16d86563SAlexander Pyhalov 
193*16d86563SAlexander Pyhalov 					uconv_num += uconv_num_internal;
194*16d86563SAlexander Pyhalov 
195*16d86563SAlexander Pyhalov 					st->cstate = C0;
196*16d86563SAlexander Pyhalov 				} else {	/* don't reset state */
197*16d86563SAlexander Pyhalov 					st->_errno = errno = E2BIG;
198*16d86563SAlexander Pyhalov 				}
199*16d86563SAlexander Pyhalov 			} else {	/* input char doesn't belong
200*16d86563SAlexander Pyhalov 					 * to the input code set
201*16d86563SAlexander Pyhalov 					 */
202*16d86563SAlexander Pyhalov 				st->_errno = errno = EILSEQ;
203*16d86563SAlexander Pyhalov 			}
204*16d86563SAlexander Pyhalov 			break;
205*16d86563SAlexander Pyhalov 		default:			/* should never come here */
206*16d86563SAlexander Pyhalov 			st->_errno = errno = EILSEQ;
207*16d86563SAlexander Pyhalov 			st->cstate = C0;	/* reset state */
208*16d86563SAlexander Pyhalov 			break;
209*16d86563SAlexander Pyhalov 		}
210*16d86563SAlexander Pyhalov 
211*16d86563SAlexander Pyhalov 		if (st->_errno) {
212*16d86563SAlexander Pyhalov #ifdef DEBUG
213*16d86563SAlexander Pyhalov     fprintf(stderr, "!!!!!\tst->_errno = %d\tst->cstate = %d\n",
214*16d86563SAlexander Pyhalov 		st->_errno, st->cstate);
215*16d86563SAlexander Pyhalov #endif
216*16d86563SAlexander Pyhalov 			break;
217*16d86563SAlexander Pyhalov 		}
218*16d86563SAlexander Pyhalov 
219*16d86563SAlexander Pyhalov 		(*inbuf)++;
220*16d86563SAlexander Pyhalov 		(*inbytesleft)--;
221*16d86563SAlexander Pyhalov 	}
222*16d86563SAlexander Pyhalov 
223*16d86563SAlexander Pyhalov         if (*inbytesleft == 0 && st->cstate != C0)
224*16d86563SAlexander Pyhalov                 errno = EINVAL;
225*16d86563SAlexander Pyhalov 
226*16d86563SAlexander Pyhalov 	if (*inbytesleft > 0 && *outbytesleft == 0)
227*16d86563SAlexander Pyhalov 		errno = E2BIG;
228*16d86563SAlexander Pyhalov 
229*16d86563SAlexander Pyhalov 	if (errno) {
230*16d86563SAlexander Pyhalov 		/*
231*16d86563SAlexander Pyhalov 		 * if error, *inbuf points to the byte following the last byte
232*16d86563SAlexander Pyhalov 		 * successfully used in the conversion.
233*16d86563SAlexander Pyhalov 		 */
234*16d86563SAlexander Pyhalov 		*inbuf -= (st->cstate - C0);
235*16d86563SAlexander Pyhalov 		*inbytesleft += (st->cstate - C0);
236*16d86563SAlexander Pyhalov 		st->cstate = C0;
237*16d86563SAlexander Pyhalov 		return ((size_t) -1);
238*16d86563SAlexander Pyhalov 	}
239*16d86563SAlexander Pyhalov 
240*16d86563SAlexander Pyhalov 	return uconv_num;
241*16d86563SAlexander Pyhalov }
242*16d86563SAlexander Pyhalov 
243*16d86563SAlexander Pyhalov 
244*16d86563SAlexander Pyhalov /*
245*16d86563SAlexander Pyhalov  * Test whether inbuf is a valid character for 2nd byte Big-5 code
246*16d86563SAlexander Pyhalov  * Return: = 0 - valid Big-5 2nd byte
247*16d86563SAlexander Pyhalov  *         = 1 - invalid Big-5 2nd byte
248*16d86563SAlexander Pyhalov  */
big5_2nd_byte(char inbuf)249*16d86563SAlexander Pyhalov static int big5_2nd_byte(char inbuf)
250*16d86563SAlexander Pyhalov {
251*16d86563SAlexander Pyhalov 	unsigned int	buf = (unsigned int) (inbuf & ONEBYTE);
252*16d86563SAlexander Pyhalov 
253*16d86563SAlexander Pyhalov 	if ((buf >= 0x40) && (buf <= 0x7E))
254*16d86563SAlexander Pyhalov 		return (0);
255*16d86563SAlexander Pyhalov 	if ((buf >= 0xA1) && (buf <= 0xFE))
256*16d86563SAlexander Pyhalov 		return (0);
257*16d86563SAlexander Pyhalov 	return(1);
258*16d86563SAlexander Pyhalov }
259*16d86563SAlexander Pyhalov 
260*16d86563SAlexander Pyhalov #ifdef UDC_SUPPORT
261*16d86563SAlexander Pyhalov typedef struct _udc_sect {
262*16d86563SAlexander Pyhalov         unsigned int start, end, count;
263*16d86563SAlexander Pyhalov } UDC;
264*16d86563SAlexander Pyhalov 
265*16d86563SAlexander Pyhalov UDC udc[] = {
266*16d86563SAlexander Pyhalov   { 0xFA40, 0xFEFE, 0x311 }
267*16d86563SAlexander Pyhalov };
268*16d86563SAlexander Pyhalov 
269*16d86563SAlexander Pyhalov #define UDC_START_UNICODE 0xF0000
270*16d86563SAlexander Pyhalov 
271*16d86563SAlexander Pyhalov static int
ifUDC(UDC * udc,unsigned int code)272*16d86563SAlexander Pyhalov ifUDC(UDC *udc, unsigned int code)
273*16d86563SAlexander Pyhalov {
274*16d86563SAlexander Pyhalov    int i;
275*16d86563SAlexander Pyhalov 
276*16d86563SAlexander Pyhalov    for (i=0; i < 1; ++i)
277*16d86563SAlexander Pyhalov       if (code >= udc[i].start && code <= udc[i].end)
278*16d86563SAlexander Pyhalov 	{
279*16d86563SAlexander Pyhalov 	  unsigned char c1, c2, leading_c1;
280*16d86563SAlexander Pyhalov 
281*16d86563SAlexander Pyhalov 	  c1 = (unsigned char)(code >> 8);
282*16d86563SAlexander Pyhalov 	  c2 = (unsigned char)code;
283*16d86563SAlexander Pyhalov 	  leading_c1 = (unsigned char) (udc[i].start >> 8);
284*16d86563SAlexander Pyhalov 
285*16d86563SAlexander Pyhalov 	  return UDC_START_UNICODE + (i ? udc[i-1].count : 0) + \
286*16d86563SAlexander Pyhalov                  (c1 - leading_c1) * 157 + ((c2 <= 0x7E) ? (c2 - 0x40) : ((c2 - 0x40) - (0xA1 - 0x7F)));
287*16d86563SAlexander Pyhalov 	}
288*16d86563SAlexander Pyhalov 
289*16d86563SAlexander Pyhalov    return 0;
290*16d86563SAlexander Pyhalov }
291*16d86563SAlexander Pyhalov #endif
292*16d86563SAlexander Pyhalov 
293*16d86563SAlexander Pyhalov /*
294*16d86563SAlexander Pyhalov  * Big-5 code --> ISO/IEC 10646 (Unicode)
295*16d86563SAlexander Pyhalov  * Unicode --> UTF8 (FSS-UTF)
296*16d86563SAlexander Pyhalov  *             (File System Safe Universal Character Set Transformation Format)
297*16d86563SAlexander Pyhalov  * Return: > 0 - converted with enough space in output buffer
298*16d86563SAlexander Pyhalov  *         = 0 - no space in outbuf
299*16d86563SAlexander Pyhalov  */
big5_to_utf8(_iconv_st * st,char * buf,size_t buflen,int * uconv_num)300*16d86563SAlexander Pyhalov static int big5_to_utf8(_iconv_st *st, char *buf, size_t buflen, int *uconv_num)
301*16d86563SAlexander Pyhalov {
302*16d86563SAlexander Pyhalov 	unsigned long	big5_val;	/* Big-5 value */
303*16d86563SAlexander Pyhalov 	int		unidx = 0;		/* Unicode index */
304*16d86563SAlexander Pyhalov 	unsigned long	uni_val = 0;	/* Unicode */
305*16d86563SAlexander Pyhalov 	char            *keepc = st->keepc;
306*16d86563SAlexander Pyhalov 
307*16d86563SAlexander Pyhalov 	big5_val = ((keepc[0]&ONEBYTE) << 8) + (keepc[1]&ONEBYTE);
308*16d86563SAlexander Pyhalov #ifdef DEBUG
309*16d86563SAlexander Pyhalov     fprintf(stderr, "%x\t", big5_val);
310*16d86563SAlexander Pyhalov #endif
311*16d86563SAlexander Pyhalov 
312*16d86563SAlexander Pyhalov #ifdef UDC_SUPPORT
313*16d86563SAlexander Pyhalov       if ((uni_val = ifUDC(udc, big5_val)) == 0) {
314*16d86563SAlexander Pyhalov #endif
315*16d86563SAlexander Pyhalov 	unidx = binsearch(big5_val, big5_utf_tab, MAX_BIG5_NUM);
316*16d86563SAlexander Pyhalov 	if (unidx >= 0)
317*16d86563SAlexander Pyhalov 
318*16d86563SAlexander Pyhalov 	   uni_val = big5_utf_tab[unidx].unicode;
319*16d86563SAlexander Pyhalov #ifdef UDC_SUPPORT
320*16d86563SAlexander Pyhalov       }
321*16d86563SAlexander Pyhalov #endif
322*16d86563SAlexander Pyhalov #ifdef DEBUG
323*16d86563SAlexander Pyhalov     fprintf(stderr, "unidx = %d, unicode = %x\t", unidx, uni_val);
324*16d86563SAlexander Pyhalov #endif
325*16d86563SAlexander Pyhalov 
326*16d86563SAlexander Pyhalov         /*
327*16d86563SAlexander Pyhalov 	 * Code conversion for UCS-2LE to support Samba
328*16d86563SAlexander Pyhalov 	 */
329*16d86563SAlexander Pyhalov         if (st->little_endian) {
330*16d86563SAlexander Pyhalov 	  int size = 0;
331*16d86563SAlexander Pyhalov 
332*16d86563SAlexander Pyhalov 	  if (unidx < 0 || uni_val > 0x00ffff ) {
333*16d86563SAlexander Pyhalov 	    uni_val = ICV_CHAR_UCS2_REPLACEMENT;
334*16d86563SAlexander Pyhalov 	    *uconv_num = 1;
335*16d86563SAlexander Pyhalov 	  }
336*16d86563SAlexander Pyhalov 
337*16d86563SAlexander Pyhalov 	  if (!st->bom_written) {
338*16d86563SAlexander Pyhalov 	    if (buflen < 4)
339*16d86563SAlexander Pyhalov 	      return 0;
340*16d86563SAlexander Pyhalov 
341*16d86563SAlexander Pyhalov 	    *(buf + size++) = (uchar_t)0xff;
342*16d86563SAlexander Pyhalov 	    *(buf + size++) = (uchar_t)0xfe;
343*16d86563SAlexander Pyhalov 	    st->bom_written = true;
344*16d86563SAlexander Pyhalov 	  }
345*16d86563SAlexander Pyhalov 
346*16d86563SAlexander Pyhalov 	  if (buflen < 2)
347*16d86563SAlexander Pyhalov 	    return 0;
348*16d86563SAlexander Pyhalov 
349*16d86563SAlexander Pyhalov 	  *(buf + size++) = (uchar_t)(uni_val & 0xff);
350*16d86563SAlexander Pyhalov 	  *(buf + size++) = (uchar_t)((uni_val >> 8) & 0xff);
351*16d86563SAlexander Pyhalov 
352*16d86563SAlexander Pyhalov 	  return size;
353*16d86563SAlexander Pyhalov 	}
354*16d86563SAlexander Pyhalov 
355*16d86563SAlexander Pyhalov 	if (unidx >= 0) {	/* do Unicode to UTF8 conversion */
356*16d86563SAlexander Pyhalov 		if (uni_val >= 0x0080 && uni_val <= 0x07ff) {
357*16d86563SAlexander Pyhalov 			if (buflen < 2) {
358*16d86563SAlexander Pyhalov #ifdef DEBUG
359*16d86563SAlexander Pyhalov     fprintf(stderr, "outbuf overflow in big5_to_utf8()!!\n");
360*16d86563SAlexander Pyhalov #endif
361*16d86563SAlexander Pyhalov 				errno = E2BIG;
362*16d86563SAlexander Pyhalov 				return(0);
363*16d86563SAlexander Pyhalov 			}
364*16d86563SAlexander Pyhalov 			*buf = (char)((uni_val >> 6) & 0x1f) | 0xc0;
365*16d86563SAlexander Pyhalov 			*(buf+1) = (char)(uni_val & 0x3f) | 0x80;
366*16d86563SAlexander Pyhalov #ifdef DEBUG
367*16d86563SAlexander Pyhalov     fprintf(stderr, "%x %x\n", *buf&ONEBYTE, *(buf+1)&ONEBYTE);
368*16d86563SAlexander Pyhalov #endif
369*16d86563SAlexander Pyhalov 			return(2);
370*16d86563SAlexander Pyhalov 		}
371*16d86563SAlexander Pyhalov 		if (uni_val >= 0x0800 && uni_val <= 0xffff) {
372*16d86563SAlexander Pyhalov 			if (buflen < 3) {
373*16d86563SAlexander Pyhalov #ifdef DEBUG
374*16d86563SAlexander Pyhalov     fprintf(stderr, "outbuf overflow in big5_to_utf8()!!\n");
375*16d86563SAlexander Pyhalov #endif
376*16d86563SAlexander Pyhalov 				errno = E2BIG;
377*16d86563SAlexander Pyhalov 				return(0);
378*16d86563SAlexander Pyhalov 			}
379*16d86563SAlexander Pyhalov 			*buf = (char)((uni_val >> 12) & 0xf) | 0xe0;
380*16d86563SAlexander Pyhalov 			*(buf+1) = (char)((uni_val >>6) & 0x3f) | 0x80;
381*16d86563SAlexander Pyhalov 			*(buf+2) = (char)(uni_val & 0x3f) | 0x80;
382*16d86563SAlexander Pyhalov #ifdef DEBUG
383*16d86563SAlexander Pyhalov     fprintf(stderr, "%x %x %x\n", *buf&ONEBYTE, *(buf+1)&ONEBYTE, *(buf+2)&ONEBYTE);
384*16d86563SAlexander Pyhalov #endif
385*16d86563SAlexander Pyhalov 			return(3);
386*16d86563SAlexander Pyhalov 		}
387*16d86563SAlexander Pyhalov 		if (uni_val >= 0x10000 && uni_val <= 0x10ffff) {
388*16d86563SAlexander Pyhalov 		        if (buflen < 4) {
389*16d86563SAlexander Pyhalov 			   errno = E2BIG;
390*16d86563SAlexander Pyhalov 			   return 0;
391*16d86563SAlexander Pyhalov 			}
392*16d86563SAlexander Pyhalov 
393*16d86563SAlexander Pyhalov 			*buf = (char) ((uni_val >> 18 ) & 0x7) | 0xf0;
394*16d86563SAlexander Pyhalov 			*(buf+1) = (char) ((uni_val >> 12) & 0x3f) | 0x80;
395*16d86563SAlexander Pyhalov 			*(buf+2) = (char) ((uni_val >> 6) & 0x3f) | 0x80;
396*16d86563SAlexander Pyhalov 			*(buf+3) = (char) (uni_val & 0x3f) | 0x80;
397*16d86563SAlexander Pyhalov 
398*16d86563SAlexander Pyhalov 			return 4;
399*16d86563SAlexander Pyhalov 		}
400*16d86563SAlexander Pyhalov 	}
401*16d86563SAlexander Pyhalov 
402*16d86563SAlexander Pyhalov 	/* can't find a match in Big-5 --> UTF8 table or illegal UTF8 code */
403*16d86563SAlexander Pyhalov 	if (buflen < 3) {
404*16d86563SAlexander Pyhalov #ifdef DEBUG
405*16d86563SAlexander Pyhalov     fprintf(stderr, "outbuf overflow in big5_to_utf8()!!\n");
406*16d86563SAlexander Pyhalov #endif
407*16d86563SAlexander Pyhalov 		errno = E2BIG;
408*16d86563SAlexander Pyhalov 		return(0);
409*16d86563SAlexander Pyhalov 	}
410*16d86563SAlexander Pyhalov 
411*16d86563SAlexander Pyhalov         *(unsigned char*) buf     = UTF8_NON_ID_CHAR1;
412*16d86563SAlexander Pyhalov         *(unsigned char*)(buf+1) = UTF8_NON_ID_CHAR2;
413*16d86563SAlexander Pyhalov         *(unsigned char*)(buf+2) = UTF8_NON_ID_CHAR3;
414*16d86563SAlexander Pyhalov 
415*16d86563SAlexander Pyhalov 	/* non-identical conversion */
416*16d86563SAlexander Pyhalov 	*uconv_num = 1;
417*16d86563SAlexander Pyhalov 
418*16d86563SAlexander Pyhalov #ifdef DEBUG
419*16d86563SAlexander Pyhalov     fprintf(stderr, "%c %c %c\n", *buf, *(buf+1), *(buf+2));
420*16d86563SAlexander Pyhalov #endif
421*16d86563SAlexander Pyhalov 	return(3);
422*16d86563SAlexander Pyhalov }
423*16d86563SAlexander Pyhalov 
424*16d86563SAlexander Pyhalov 
425*16d86563SAlexander Pyhalov /* binsearch: find x in v[0] <= v[1] <= ... <= v[n-1] */
binsearch(unsigned long x,big5_utf v[],int n)426*16d86563SAlexander Pyhalov static int binsearch(unsigned long x, big5_utf v[], int n)
427*16d86563SAlexander Pyhalov {
428*16d86563SAlexander Pyhalov 	int low, high, mid;
429*16d86563SAlexander Pyhalov 
430*16d86563SAlexander Pyhalov 	low = 0;
431*16d86563SAlexander Pyhalov 	high = n - 1;
432*16d86563SAlexander Pyhalov 	while (low <= high) {
433*16d86563SAlexander Pyhalov 		mid = (low + high) / 2;
434*16d86563SAlexander Pyhalov 		if (x < v[mid].big5code)
435*16d86563SAlexander Pyhalov 			high = mid - 1;
436*16d86563SAlexander Pyhalov 		else if (x > v[mid].big5code)
437*16d86563SAlexander Pyhalov 			low = mid + 1;
438*16d86563SAlexander Pyhalov 		else	/* found match */
439*16d86563SAlexander Pyhalov 			return mid;
440*16d86563SAlexander Pyhalov 	}
441*16d86563SAlexander Pyhalov 	return (-1);	/* no match */
442*16d86563SAlexander Pyhalov }
443