1 /*-
2 * Copyright (c) 1999,2000
3 * Konstantin Chuguev. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by Konstantin Chuguev
16 * and its contributors.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 *
30 * iconv (Charset Conversion Library) v2.0
31 */
32
33 #include <errno.h> /* errno */
34 #include <stdlib.h> /* free, malloc */
35 #include <string.h> /* bzero, memcmp, memcpy */
36
37 #define ICONV_INTERNAL
38 #include <iconv.h>
39
40 typedef struct {
41 const char *sequence;
42 size_t length;
43 int prefix_type;
44 } iconv_ces_iso2022_shift;
45
46 enum { ICONV_PREFIX_STATE = 0, ICONV_PREFIX_LINE, ICONV_PREFIX_CHAR };
47
48 static const iconv_ces_iso2022_shift iso_shift[] = {
49 { "\x0f", 1, ICONV_PREFIX_STATE },
50 { "\x0e", 1, ICONV_PREFIX_LINE },
51 { "\x1bN", 2, ICONV_PREFIX_CHAR },
52 { "\x1bO", 2, ICONV_PREFIX_CHAR }
53 };
54
55 #define shift_num (sizeof(iso_shift) / sizeof(iconv_ces_iso2022_shift))
56
57 typedef struct {
58 int nccs;
59 ucs_t previous_char;
60 int shift_index;
61 int shift_tab[shift_num];
62 char prefix_cache[128];
63 struct iconv_ccs ccs[1];
64 } iconv_ces_iso2022_state;
65
66 int
iconv_iso2022_init(void ** data,const void * desc_data,size_t num)67 iconv_iso2022_init(void **data, const void *desc_data, size_t num)
68 {
69 size_t stsz = sizeof(iconv_ces_iso2022_state) +
70 sizeof(struct iconv_ccs) * (num - 1);
71 int i;
72 iconv_ces_iso2022_state *state
73 = (iconv_ces_iso2022_state *)malloc(stsz);
74
75 if (state == NULL)
76 return errno;
77 bzero(state->prefix_cache, sizeof(state->prefix_cache));
78 for (i = 0; i < num; i++) {
79 const iconv_ces_iso2022_ccs *ccsattr
80 = &(((const iconv_ces_iso2022_ccs *)desc_data)[i]);
81 int res = iconv_ccs_init(&(state->ccs[i]), ccsattr->name);
82 if (res) {
83 while (--i >= 0)
84 state->ccs[i].close(&(state->ccs[i]));
85 free(state);
86 return res;
87 }
88 if (ccsattr->designatorlen)
89 state->prefix_cache[(int)ccsattr->designator[0]] = 1;
90 if (ccsattr->shift >= 0)
91 state->prefix_cache[(int)iso_shift[ccsattr->shift].sequence[0]] = 1;
92 }
93 state->nccs = num;
94 iconv_iso2022_reset(state);
95 *(iconv_ces_iso2022_state **)data = state;
96 return 0;
97 }
98
99 #define state ((iconv_ces_iso2022_state *)data)
100
101 int
iconv_iso2022_close(void * data)102 iconv_iso2022_close(void *data)
103 {
104 int i, res = 0;
105
106 for (i = 0; i < state->nccs; i++)
107 res = state->ccs[i].close(&(state->ccs[i])) || res;
108 free(data);
109 return res;
110 }
111
112 void
iconv_iso2022_reset(void * data)113 iconv_iso2022_reset(void *data)
114 {
115 size_t i;
116
117 state->shift_index = 0;
118 state->shift_tab[0] = 0;
119 for (i = 1; i < shift_num; i++)
120 state->shift_tab[i] = -1;
121 state->previous_char = UCS_CHAR_NONE;
122 }
123
124 #undef state
125
126 #define CES_STATE(ces) ((iconv_ces_iso2022_state *)((ces)->data))
127 #define CES_CCSATTR(ces) ((const iconv_ces_iso2022_ccs *) \
128 (((struct iconv_ces_desc *)((ces)->desc))->data))
129
130 static void
update_shift_state(const struct iconv_ces * ces,ucs_t ch)131 update_shift_state(const struct iconv_ces *ces, ucs_t ch)
132 {
133 iconv_ces_iso2022_state *iso_state = CES_STATE(ces);
134 size_t i;
135
136 if (ch == '\n' && iso_state->previous_char == '\r') {
137 for (i = 0; i < shift_num; i ++) {
138 if (iso_shift[i].prefix_type != ICONV_PREFIX_STATE)
139 iso_state->shift_tab[i] = -1;
140 }
141 }
142 iso_state->previous_char = ch;
143 }
144
145 #define is_7_14bit(ccs) ((ccs)->nbits & 7)
146
147 static ssize_t
cvt_ucs2iso(const struct iconv_ces * ces,ucs_t in,unsigned char ** outbuf,size_t * outbytesleft,int cs)148 cvt_ucs2iso(const struct iconv_ces *ces, ucs_t in,
149 unsigned char **outbuf, size_t *outbytesleft, int cs)
150 {
151 iconv_ces_iso2022_state *iso_state = CES_STATE(ces);
152 const iconv_ces_iso2022_ccs *ccsattr;
153 const struct iconv_ccs *ccs;
154 ucs_t res;
155 size_t len = 0;
156 int need_designator, need_shift;
157
158 ccs = &(iso_state->ccs[cs]);
159 res = (in == UCS_CHAR_NONE) ?
160 in : ICONV_CCS_CONVERT_FROM_UCS(ccs, in);
161 if (in != UCS_CHAR_NONE) {
162 if (iso_shift[cs].prefix_type == ICONV_PREFIX_CHAR &&
163 !is_7_14bit(ccs)) {
164 if ((res & 0x8080) == 0)
165 return -1;
166 res &= 0x7F7F;
167 } else if (res & 0x8080)
168 return -1; /* Invalid/missing character in the output charset */
169 }
170 ccsattr = &(CES_CCSATTR(ces)[cs]);
171 if ((need_shift = (ccsattr->shift != iso_state->shift_index)))
172 len += iso_shift[ccsattr->shift].length;
173 if ((need_designator = (cs != iso_state->shift_tab[ccsattr->shift])))
174 len += ccsattr->designatorlen;
175 if (in != UCS_CHAR_NONE)
176 len += res & 0xFF00 ? 2 : 1;
177 if (len > *outbytesleft)
178 return 0; /* No space in output buffer */
179 if (need_designator && (len = ccsattr->designatorlen)) {
180 memcpy(*outbuf, ccsattr->designator, len);
181 (*outbuf) += len;
182 (*outbytesleft) -= len;
183 iso_state->shift_tab[ccsattr->shift] = cs;
184 }
185 if (need_shift && (len = iso_shift[ccsattr->shift].length)) {
186 memcpy(*outbuf, iso_shift[ccsattr->shift].sequence, len);
187 (*outbuf) += len;
188 (*outbytesleft) -= len;
189 if (iso_shift[ccsattr->shift].prefix_type != ICONV_PREFIX_CHAR)
190 iso_state->shift_index = ccsattr->shift;
191 }
192 if (in == UCS_CHAR_NONE)
193 return 1;
194 if (res & 0xFF00) {
195 *(unsigned char *)(*outbuf) ++ = res >> 8;
196 (*outbytesleft)--;
197 }
198 *(unsigned char *)(*outbuf) ++ = res;
199 (*outbytesleft) --;
200 update_shift_state(ces, res);
201 return 1;
202 }
203
204 ssize_t
iconv_iso2022_convert_from_ucs(struct iconv_ces * ces,ucs_t in,unsigned char ** outbuf,size_t * outbytesleft)205 iconv_iso2022_convert_from_ucs(struct iconv_ces *ces,
206 ucs_t in, unsigned char **outbuf, size_t *outbytesleft)
207 {
208 iconv_ces_iso2022_state *iso_state = CES_STATE(ces);
209 ssize_t res;
210 int cs, i;
211
212 if (in == UCS_CHAR_NONE)
213 return cvt_ucs2iso(ces, in, outbuf, outbytesleft, 0);
214 if (iconv_char32bit(in))
215 return -1;
216 cs = iso_state->shift_tab[iso_state->shift_index];
217 if ((res = cvt_ucs2iso(ces, in, outbuf, outbytesleft, cs)) >= 0)
218 return res;
219 for (i = 0; i < iso_state->nccs; i++) {
220 if (i == cs)
221 continue;
222 if ((res = cvt_ucs2iso(ces, in, outbuf, outbytesleft, i)) >= 0)
223 return res;
224 }
225 (*outbuf) ++;
226 (*outbytesleft) --;
227 return -1; /* No character in output charset */
228 }
229
230 static ucs_t
cvt_iso2ucs(const struct iconv_ccs * ccs,const unsigned char ** inbuf,size_t * inbytesleft,int prefix_type)231 cvt_iso2ucs(const struct iconv_ccs *ccs, const unsigned char **inbuf,
232 size_t *inbytesleft, int prefix_type)
233 {
234 size_t bytes = ccs->nbits > 8 ? 2 : 1;
235 ucs_t ch = **inbuf;
236
237 if (*inbytesleft < bytes)
238 return UCS_CHAR_NONE; /* Not enough bytes in the input buffer */
239 if (bytes == 2)
240 ch = (ch << 8) | *(++(*inbuf));
241 (*inbuf)++;
242 (*inbytesleft) -= bytes;
243 if (ch & 0x8080)
244 return UCS_CHAR_INVALID;
245 if (prefix_type == ICONV_PREFIX_CHAR && !is_7_14bit(ccs))
246 ch |= (bytes == 2) ? 0x8080 : 0x80;
247 return ICONV_CCS_CONVERT_TO_UCS(ccs, ch);
248 }
249
250 ucs_t
iconv_iso2022_convert_to_ucs(struct iconv_ces * ces,const unsigned char ** inbuf,size_t * inbytesleft)251 iconv_iso2022_convert_to_ucs(struct iconv_ces *ces,
252 const unsigned char **inbuf, size_t *inbytesleft)
253 {
254 iconv_ces_iso2022_state *iso_state = CES_STATE(ces);
255 const iconv_ces_iso2022_ccs *ccsattr;
256 ucs_t res;
257 const unsigned char *ptr = *inbuf;
258 unsigned char byte;
259 size_t len, left = *inbytesleft;
260 int i;
261
262 while (left) {
263 byte = *ptr;
264 if (byte & 0x80) {
265 (*inbuf)++;
266 (*inbytesleft) --;
267 return UCS_CHAR_INVALID;
268 }
269 if (!iso_state->prefix_cache[byte])
270 break;
271 for (i = 0; i < iso_state->nccs; i++) {
272 ccsattr = &(CES_CCSATTR(ces)[i]);
273 len = ccsattr->designatorlen;
274 if (len) {
275 if (len + 1 > left)
276 return UCS_CHAR_NONE;
277 if (memcmp(ptr, ccsattr->designator, len) == 0) {
278 iso_state->shift_tab[ccsattr->shift] = i;
279 ptr += len;
280 left -= len;
281 break;
282 }
283 }
284 len = iso_shift[ccsattr->shift].length;
285 if (len) {
286 if (len + 1 > left)
287 return UCS_CHAR_NONE;
288 if (memcmp(ptr,
289 iso_shift[ccsattr->shift].sequence, len) == 0) {
290 if (iso_shift[ccsattr->shift].prefix_type != ICONV_PREFIX_CHAR)
291 iso_state->shift_index = ccsattr->shift;
292 ptr += len;
293 left -= len;
294 break;
295 }
296 }
297 }
298 }
299 i = iso_state->shift_tab[iso_state->shift_index];
300 if (i < 0) {
301 (*inbuf) ++;
302 (*inbytesleft) --;
303 return UCS_CHAR_INVALID;
304 }
305 res = cvt_iso2ucs(&(iso_state->ccs[i]), &ptr, &left,
306 iso_shift[i].prefix_type);
307 if (res != UCS_CHAR_NONE) {
308 *inbuf = (const char*)ptr;
309 *inbytesleft = left;
310 update_shift_state(ces, res);
311 }
312 return res;
313 }
314