1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  *	Copyright(c) 1997, Sun Microsystems, Inc.
23  *	All rights reserved.
24  */
25 
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <errno.h>
29 #include <big5_gb2312.h>
30 
31 #define NON_ID_CHAR '_'	/* non-identifier charactor */
32 #define MSB 0x80
33 #define ONEBYTE 0xff
34 
35 typedef struct _icv_state {
36 	char keepc[2];	/* maximum # byte of BIG5 charactor */
37 	short cstate;
38 	int _errno;		/* internal errno */
39 } _iconv_st;
40 
41 enum _CSTATE { C0, C1 };
42 
43 int big5_2nd_byte(char inbuf);
44 int big5_to_gb2312(char keepc[], char *buf, size_t buflen);
45 int binsearch(unsigned long x, table_t table[], int n);
46 
47 /*
48  *	Open; called from iconv_open()
49  */
_icv_open()50 void * _icv_open() {
51 	_iconv_st * st;
52 
53 	if ((st = (_iconv_st *) malloc(sizeof(_iconv_st))) == NULL) {
54 		errno = ENOMEM;
55 		return ((void *) -1);
56 	}
57 
58 	st->cstate = C0;
59 	st->_errno = 0;
60 
61 	return ((void *) st);
62 }
63 
64 /*
65  *	Close; called from iconv_close()
66  */
_icv_close(_iconv_st * st)67 void _icv_close(_iconv_st * st) {
68 	if (!st)
69 		errno = EBADF;
70 	else
71 		free(st);
72 }
73 
74 /*
75  *	Actual conversion; called from iconv()
76  */
77 
_icv_iconv(_iconv_st * st,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)78 size_t _icv_iconv(_iconv_st * st, char **inbuf, size_t *inbytesleft,
79 					char ** outbuf, size_t *outbytesleft) {
80 	int n;
81 	if (st == NULL) {
82 		errno = EBADF;
83 		return ((size_t) -1);
84 	}
85 
86 	if (inbuf == NULL || *inbuf == NULL) {	/* Reset request. */
87 		st->cstate = C0;
88 		st->_errno = 0;
89 		return ((size_t) 0);
90 	}
91 
92 	errno = st->_errno = 0;
93 
94 	while (*inbytesleft > 0 && *outbytesleft > 0) {
95 		switch (st->cstate) {
96 			case C0:
97 				if (**inbuf & MSB) {	/* big5 charactor */
98 					st->keepc[0] = (**inbuf);
99 					st->cstate = C1;
100 				} else {	/* ASCII */
101 					**outbuf = **inbuf;
102 					(*outbuf)++;
103 					(*outbytesleft)--;
104 				}
105 				break;
106 			case C1:	/* Big5 charactor 2nd byte */
107 				if (big5_2nd_byte(**inbuf) == 0) {
108 					st->keepc[1] = (**inbuf);
109 					n = big5_to_gb2312(st->keepc, *outbuf, *outbytesleft);
110 					if (n > 0) {
111 						(*outbuf) += n;
112 						(*outbytesleft) -= n;
113 
114 						st->cstate = C0;
115 					} else {
116 						st->_errno = errno = E2BIG;
117 					}
118 				} else {	/* illegal input */
119 					st->_errno = errno =EILSEQ;
120 				}
121 				break;
122 			default:
123 				st->_errno = errno = EILSEQ;
124 				st->cstate = C0;
125 				break;
126 		}
127 
128 		if (st->_errno)
129 			break;
130 
131 		(*inbuf) ++;
132 		(*inbytesleft)--;
133 	}
134 
135         if (errno) return ((size_t) -1);
136 
137         if (*inbytesleft == 0 && st->cstate != C0) {
138                 errno = EINVAL;
139                 return ((size_t) -1);
140         }
141 
142 	if (*inbytesleft > 0 && *outbytesleft == 0) {
143 		errno = E2BIG;
144 		return (size_t)-1;
145 	}
146 
147 	return (size_t)(*inbytesleft);
148 }
149 
150 /*
151  *	Test whether inbuf is a valid character for
152  *	2nd byte of BIG5 charactor:
153  *	Return:	0 --- valid BIG5 2nd byte
154  *			1 --- invalid BIG5 2nd byte
155  */
big5_2nd_byte(inbuf)156 int big5_2nd_byte(inbuf)
157 char inbuf;
158 {
159 	unsigned int buf = (unsigned int)(inbuf & ONEBYTE);
160 
161 	if ((buf >= 0x40) && (buf <= 0xfe))
162 		return 0;
163 	return 1;
164 }
165 
166 /*
167  *	big5_to_gb2312: Convert Big5 to gb2312.
168  *	Return:	>0 --- converted with enough space in output buffer
169  *			=0 --- no space in outbuf
170  */
171 
big5_to_gb2312(char keepc[],char * buf,size_t buflen)172 int big5_to_gb2312(char keepc[], char *buf, size_t buflen) {
173 
174 	unsigned long gb_val;
175 	int index;
176 	unsigned long big5_val;
177 
178 	if (buflen < 2) {
179 		errno = E2BIG;
180 		return 0;
181 	}
182 
183 	big5_val = ((keepc[0] & ONEBYTE) << 8) + (keepc[1] & ONEBYTE);
184 	index = binsearch(big5_val, big5_gb_tab, BIG5MAX);
185 	if (index >= 0) {
186 		gb_val = big5_gb_tab[index].value;
187 		*buf = (gb_val >> 8) & ONEBYTE;
188 		*(buf + 1) = gb_val & ONEBYTE;
189 	} else
190 		*buf = *(buf + 1) = (char)NON_ID_CHAR;
191 	return 2;
192 }
193 
194 /*
195  *	binsearch()
196  */
binsearch(unsigned long x,table_t table[],int n)197 int binsearch(unsigned long x, table_t table[], int n) {
198 	int low, high, mid;
199 
200 	low = 0;
201 	high = n - 1;
202 	while (low <= high) {
203 		mid = (low + high) >> 1;
204 		if (x < table[mid].key)
205 			high = mid - 1;
206 		else if (x > table[mid].key)
207 			low = mid + 1;
208 		else
209 			return mid;
210 	}
211 	return -1;
212 }
213 
214 #ifdef DEBUG
main(int argc,char * argv[])215 main(int argc, char * argv[]) {
216 	_iconv_st * ist;
217 	char * inbuf = "�H�U�ҦC���C�@���D�N��@�Ӥw�w�˨ê`�U�F�p�����ܪ����~�t�C�C�C�@���D�]�M�ϼС^�O�@�ӦC�X�Өt�C���ܪ��W�챵�C";
218 	char * outbuf;
219 	char * ib, * oub;
220 	int inbyteleft;
221 	int outbyteleft;
222 
223 	ist = (_iconv_st *) _icv_open();
224 	inbyteleft = outbyteleft = 2 * strlen(inbuf);
225 	outbuf = (char *)malloc(outbyteleft);
226 	ib = inbuf;
227 	oub = outbuf;
228 	_icv_iconv(ist, &inbuf, &inbyteleft, &outbuf, &outbyteleft);
229 	printf("IN -- %s\n", ib);
230 	printf("OUT -- %s\n", oub);
231 }
232 #endif
233