1 /*-
2  * Copyright (c) 1999,2000
3  *	Konstantin Chuguev.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by Konstantin Chuguev
16  *	and its contributors.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  *
30  *	iconv (Charset Conversion Library) v2.0
31  */
32 
33 #include <errno.h>	/* errno */
34 #include <stdlib.h>	/* free, malloc */
35 #include <string.h>	/* bzero, memcmp, memcpy */
36 
37 #define ICONV_INTERNAL
38 #include <iconv.h>
39 
40 typedef struct {
41 	const char	*sequence;
42 	size_t		length;
43 	int		prefix_type;
44 } iconv_ces_iso2022_shift;
45 
46 enum { ICONV_PREFIX_STATE = 0, ICONV_PREFIX_LINE, ICONV_PREFIX_CHAR };
47 
48 static const iconv_ces_iso2022_shift iso_shift[] = {
49 	{ "\x0f",  1, ICONV_PREFIX_STATE },
50 	{ "\x0e",  1, ICONV_PREFIX_LINE },
51 	{ "\x1bN", 2, ICONV_PREFIX_CHAR },
52 	{ "\x1bO", 2, ICONV_PREFIX_CHAR }
53 };
54 
55 #define shift_num (sizeof(iso_shift) / sizeof(iconv_ces_iso2022_shift))
56 
57 typedef struct {
58 	int		nccs;
59 	ucs_t		previous_char;
60 	int		shift_index;
61 	int		shift_tab[shift_num];
62 	char		prefix_cache[128];
63 	struct		iconv_ccs ccs[1];
64 } iconv_ces_iso2022_state;
65 
66 int
iconv_iso2022_init(void ** data,const void * desc_data,size_t num)67 iconv_iso2022_init(void **data, const void *desc_data, size_t num)
68 {
69 	size_t stsz = sizeof(iconv_ces_iso2022_state) +
70 	              sizeof(struct iconv_ccs) * (num - 1);
71 	int i;
72 	iconv_ces_iso2022_state *state
73 		= (iconv_ces_iso2022_state *)malloc(stsz);
74 
75 	if (state == NULL)
76 		return errno;
77 	bzero(state->prefix_cache, sizeof(state->prefix_cache));
78 	for (i = 0; i < num; i++) {
79 		const iconv_ces_iso2022_ccs *ccsattr
80 			= &(((const iconv_ces_iso2022_ccs *)desc_data)[i]);
81 		int res = iconv_ccs_init(&(state->ccs[i]), ccsattr->name);
82 		if (res) {
83 			while (--i >= 0)
84 				state->ccs[i].close(&(state->ccs[i]));
85 			free(state);
86 			return res;
87 		}
88 		if (ccsattr->designatorlen)
89 			state->prefix_cache[(int)ccsattr->designator[0]] = 1;
90 		if (ccsattr->shift >= 0)
91 			state->prefix_cache[(int)iso_shift[ccsattr->shift].sequence[0]] = 1;
92 	}
93 	state->nccs = num;
94 	iconv_iso2022_reset(state);
95 	*(iconv_ces_iso2022_state **)data = state;
96 	return 0;
97 }
98 
99 #define state ((iconv_ces_iso2022_state *)data)
100 
101 int
iconv_iso2022_close(void * data)102 iconv_iso2022_close(void *data)
103 {
104 	int i, res = 0;
105 
106 	for (i = 0; i < state->nccs; i++)
107 		res = state->ccs[i].close(&(state->ccs[i])) || res;
108 	free(data);
109 	return res;
110 }
111 
112 void
iconv_iso2022_reset(void * data)113 iconv_iso2022_reset(void *data)
114 {
115 	size_t i;
116 
117 	state->shift_index = 0;
118 	state->shift_tab[0] = 0;
119 	for (i = 1; i < shift_num; i++)
120 		state->shift_tab[i] = -1;
121 	state->previous_char = UCS_CHAR_NONE;
122 }
123 
124 #undef state
125 
126 #define	CES_STATE(ces)	 ((iconv_ces_iso2022_state *)((ces)->data))
127 #define	CES_CCSATTR(ces) ((const iconv_ces_iso2022_ccs *) \
128                               (((struct iconv_ces_desc *)((ces)->desc))->data))
129 
130 static void
update_shift_state(const struct iconv_ces * ces,ucs_t ch)131 update_shift_state(const struct iconv_ces *ces, ucs_t ch)
132 {
133 	iconv_ces_iso2022_state *iso_state = CES_STATE(ces);
134 	size_t i;
135 
136 	if (ch == '\n' && iso_state->previous_char == '\r') {
137 		for (i = 0; i < shift_num; i ++) {
138 			if (iso_shift[i].prefix_type != ICONV_PREFIX_STATE)
139 				iso_state->shift_tab[i] = -1;
140 		}
141         }
142 	iso_state->previous_char = ch;
143 }
144 
145 #define is_7_14bit(ccs) ((ccs)->nbits & 7)
146 
147 static ssize_t
cvt_ucs2iso(const struct iconv_ces * ces,ucs_t in,unsigned char ** outbuf,size_t * outbytesleft,int cs)148 cvt_ucs2iso(const struct iconv_ces *ces, ucs_t in,
149 	unsigned char **outbuf, size_t *outbytesleft, int cs)
150 {
151 	iconv_ces_iso2022_state *iso_state = CES_STATE(ces);
152 	const iconv_ces_iso2022_ccs *ccsattr;
153 	const struct iconv_ccs *ccs;
154 	ucs_t res;
155 	size_t len = 0;
156 	int need_designator, need_shift;
157 
158 	ccs = &(iso_state->ccs[cs]);
159 	res = (in == UCS_CHAR_NONE) ?
160 	    in : ICONV_CCS_CONVERT_FROM_UCS(ccs, in);
161 	if (in != UCS_CHAR_NONE) {
162 		if (iso_shift[cs].prefix_type == ICONV_PREFIX_CHAR &&
163 		    !is_7_14bit(ccs)) {
164 			if ((res & 0x8080) == 0)
165 				return -1;
166 		    res &= 0x7F7F;
167 		} else if (res & 0x8080)
168 			return -1; /* Invalid/missing character in the output charset */
169 	}
170 	ccsattr = &(CES_CCSATTR(ces)[cs]);
171 	if ((need_shift = (ccsattr->shift != iso_state->shift_index)))
172 		len += iso_shift[ccsattr->shift].length;
173 	if ((need_designator = (cs != iso_state->shift_tab[ccsattr->shift])))
174 		len += ccsattr->designatorlen;
175 	if (in != UCS_CHAR_NONE)
176 		len += res & 0xFF00 ? 2 : 1;
177 	if (len > *outbytesleft)
178 		return 0;	/* No space in output buffer */
179 	if (need_designator && (len = ccsattr->designatorlen)) {
180 		memcpy(*outbuf, ccsattr->designator, len);
181 		(*outbuf) += len;
182 		(*outbytesleft) -= len;
183 		iso_state->shift_tab[ccsattr->shift] = cs;
184 	}
185 	if (need_shift && (len = iso_shift[ccsattr->shift].length)) {
186 		memcpy(*outbuf, iso_shift[ccsattr->shift].sequence, len);
187 		(*outbuf) += len;
188 		(*outbytesleft) -= len;
189 		if (iso_shift[ccsattr->shift].prefix_type != ICONV_PREFIX_CHAR)
190 			iso_state->shift_index = ccsattr->shift;
191 	}
192 	if (in == UCS_CHAR_NONE)
193 		return 1;
194 	if (res & 0xFF00) {
195 		*(unsigned char *)(*outbuf) ++ = res >> 8;
196 		(*outbytesleft)--;
197 	}
198 	*(unsigned char *)(*outbuf) ++ = res;
199 	(*outbytesleft) --;
200 	update_shift_state(ces, res);
201 	return 1;
202 }
203 
204 ssize_t
iconv_iso2022_convert_from_ucs(struct iconv_ces * ces,ucs_t in,unsigned char ** outbuf,size_t * outbytesleft)205 iconv_iso2022_convert_from_ucs(struct iconv_ces *ces,
206 	ucs_t in, unsigned char **outbuf, size_t *outbytesleft)
207 {
208 	iconv_ces_iso2022_state *iso_state = CES_STATE(ces);
209 	ssize_t res;
210 	int cs, i;
211 
212 	if (in == UCS_CHAR_NONE)
213 		return cvt_ucs2iso(ces, in, outbuf, outbytesleft, 0);
214 	if (iconv_char32bit(in))
215 		return -1;
216 	cs = iso_state->shift_tab[iso_state->shift_index];
217 	if ((res = cvt_ucs2iso(ces, in, outbuf, outbytesleft, cs)) >= 0)
218 		return res;
219 	for (i = 0; i < iso_state->nccs; i++) {
220 		if (i == cs)
221 			continue;
222 		if ((res = cvt_ucs2iso(ces, in, outbuf, outbytesleft, i)) >= 0)
223 			return res;
224 	}
225 	(*outbuf) ++;
226 	(*outbytesleft) --;
227 	return -1;	/* No character in output charset */
228 }
229 
230 static ucs_t
cvt_iso2ucs(const struct iconv_ccs * ccs,const unsigned char ** inbuf,size_t * inbytesleft,int prefix_type)231 cvt_iso2ucs(const struct iconv_ccs *ccs, const unsigned char **inbuf,
232 	size_t *inbytesleft, int prefix_type)
233 {
234 	size_t bytes = ccs->nbits > 8 ? 2 : 1;
235 	ucs_t ch = **inbuf;
236 
237 	if (*inbytesleft < bytes)
238 		return UCS_CHAR_NONE;	/* Not enough bytes in the input buffer */
239 	if (bytes == 2)
240 		ch = (ch << 8) | *(++(*inbuf));
241 	(*inbuf)++;
242 	(*inbytesleft) -= bytes;
243 	if (ch & 0x8080)
244 		return UCS_CHAR_INVALID;
245 	if (prefix_type == ICONV_PREFIX_CHAR && !is_7_14bit(ccs))
246 		ch |= (bytes == 2) ? 0x8080 : 0x80;
247 	return ICONV_CCS_CONVERT_TO_UCS(ccs, ch);
248 }
249 
250 ucs_t
iconv_iso2022_convert_to_ucs(struct iconv_ces * ces,const unsigned char ** inbuf,size_t * inbytesleft)251 iconv_iso2022_convert_to_ucs(struct iconv_ces *ces,
252 	const unsigned char **inbuf, size_t *inbytesleft)
253 {
254 	iconv_ces_iso2022_state *iso_state = CES_STATE(ces);
255 	const iconv_ces_iso2022_ccs *ccsattr;
256 	ucs_t res;
257 	const unsigned char *ptr = *inbuf;
258 	unsigned char byte;
259 	size_t len, left = *inbytesleft;
260 	int i;
261 
262 	while (left) {
263 		byte = *ptr;
264 		if (byte & 0x80) {
265 			(*inbuf)++;
266 			(*inbytesleft) --;
267 			return UCS_CHAR_INVALID;
268 		}
269 		if (!iso_state->prefix_cache[byte])
270 			break;
271 		for (i = 0; i < iso_state->nccs; i++) {
272 			ccsattr = &(CES_CCSATTR(ces)[i]);
273 			len = ccsattr->designatorlen;
274 			if (len) {
275 				if (len + 1 > left)
276 					return UCS_CHAR_NONE;
277 				if (memcmp(ptr, ccsattr->designator, len) == 0) {
278 					iso_state->shift_tab[ccsattr->shift] = i;
279 					ptr += len;
280 					left -= len;
281 					break;
282 				}
283 			}
284 			len = iso_shift[ccsattr->shift].length;
285 			if (len) {
286 				if (len + 1 > left)
287 					return UCS_CHAR_NONE;
288 				if (memcmp(ptr,
289 				    iso_shift[ccsattr->shift].sequence, len) == 0) {
290 					if (iso_shift[ccsattr->shift].prefix_type != ICONV_PREFIX_CHAR)
291 						iso_state->shift_index = ccsattr->shift;
292 					ptr += len;
293 					left -= len;
294 					break;
295 				}
296 			}
297 		}
298 	}
299 	i = iso_state->shift_tab[iso_state->shift_index];
300 	if (i < 0) {
301 		(*inbuf) ++;
302 		(*inbytesleft) --;
303 		return UCS_CHAR_INVALID;
304 	}
305 	res = cvt_iso2ucs(&(iso_state->ccs[i]), &ptr, &left,
306 	                  iso_shift[i].prefix_type);
307 	if (res != UCS_CHAR_NONE) {
308 		*inbuf = (const char*)ptr;
309 		*inbytesleft = left;
310 		update_shift_state(ces, res);
311 	}
312 	return res;
313 }
314