1 /*- 2 * Copyright (c) 1999,2000 3 * Konstantin Chuguev. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * iconv (Charset Conversion Library) v2.0 27 */ 28 #include <errno.h> 29 #include <stdlib.h> 30 #include <string.h> 31 #include "local.h" 32 33 typedef struct { 34 _CONST char *sequence; 35 size_t length; 36 int prefix_type; 37 } iconv_ces_iso2022_shift_t; 38 39 enum { ICONV_PREFIX_STATE = 0, ICONV_PREFIX_LINE, ICONV_PREFIX_CHAR }; 40 41 static _CONST iconv_ces_iso2022_shift_t iso_shift[] = { 42 { "\x0f", 1, ICONV_PREFIX_STATE }, 43 { "\x0e", 1, ICONV_PREFIX_LINE }, 44 { "\x1bN", 2, ICONV_PREFIX_CHAR }, 45 { "\x1bO", 2, ICONV_PREFIX_CHAR } 46 }; 47 48 #define shift_num (sizeof(iso_shift) / sizeof(iconv_ces_iso2022_shift_t)) 49 50 typedef struct { 51 int nccs; 52 ucs_t previous_char; 53 int shift_index; 54 int shift_tab[shift_num]; 55 char prefix_cache[128]; 56 struct iconv_ccs ccs[1]; 57 } iconv_ces_iso2022_state_t; 58 59 int 60 _DEFUN(_iconv_iso2022_init, (rptr, data, desc_data, num), 61 struct _reent *rptr _AND 62 _VOID_PTR *data _AND 63 _CONST _VOID_PTR desc_data _AND 64 size_t num) 65 { 66 size_t stsz = sizeof(iconv_ces_iso2022_state_t) + 67 sizeof(struct iconv_ccs) * (num - 1); 68 int i; 69 iconv_ces_iso2022_state_t *state 70 = (iconv_ces_iso2022_state_t *)_malloc_r(rptr, stsz); 71 72 if (state == NULL) 73 return __errno_r(rptr); 74 bzero(state->prefix_cache, sizeof(state->prefix_cache)); 75 for (i = 0; i < num; i++) { 76 _CONST iconv_ces_iso2022_ccs_t *ccsattr = 77 &(((_CONST iconv_ces_iso2022_ccs_t *)desc_data)[i]); 78 int res = _iconv_ccs_init(rptr, &(state->ccs[i]), ccsattr->name); 79 if (res) { 80 while (--i >= 0) 81 state->ccs[i].close(rptr, &(state->ccs[i])); 82 _free_r(rptr, state); 83 return res; 84 } 85 if (ccsattr->designatorlen) 86 state->prefix_cache[(int)ccsattr->designator[0]] = 1; 87 if (ccsattr->shift >= 0) 88 state->prefix_cache[(int)iso_shift[ccsattr->shift].sequence[0]] = 1; 89 } 90 state->nccs = num; 91 iconv_iso2022_reset(state); 92 (iconv_ces_iso2022_state_t *)*data = state; 93 return 0; 94 } 95 96 #define state ((iconv_ces_iso2022_state_t *)data) 97 98 int 99 _DEFUN(_iconv_iso2022_close, (rptr, data), 100 struct _reent *rptr _AND 101 _VOID_PTR data) 102 { 103 int i, res = 0; 104 105 for (i = 0; i < state->nccs; i++) 106 res = state->ccs[i].close(rptr, &(state->ccs[i])) || res; 107 _free_r(rptr, data); 108 return res; 109 } 110 111 _VOID 112 _DEFUN(_iconv_iso2022_reset, (data), _VOID_PTR data) 113 { 114 size_t i; 115 116 state->shift_index = 0; 117 state->shift_tab[0] = 0; 118 for (i = 1; i < shift_num; i++) 119 state->shift_tab[i] = -1; 120 state->previous_char = UCS_CHAR_NONE; 121 } 122 123 #undef state 124 125 #define CES_STATE(ces) ((iconv_ces_iso2022_state_t *)((ces)->data)) 126 #define CES_CCSATTR(ces) ((_CONST iconv_ces_iso2022_ccs_t *) \ 127 (((struct iconv_ces_desc *)((ces)->desc))->data)) 128 129 static _VOID 130 _DEFUN(update_shift_state, (ces, ch), 131 _CONST struct iconv_ces *ces _AND 132 ucs_t ch) 133 { 134 iconv_ces_iso2022_state_t *iso_state = CES_STATE(ces); 135 size_t i; 136 137 if (ch == '\n' && iso_state->previous_char == '\r') { 138 for (i = 0; i < shift_num; i ++) { 139 if (iso_shift[i].prefix_type != ICONV_PREFIX_STATE) 140 iso_state->shift_tab[i] = -1; 141 } 142 } 143 iso_state->previous_char = ch; 144 } 145 146 #define is_7_14bit(ccs) ((ccs)->nbits & 7) 147 148 static ssize_t 149 _DEFUN(cvt_ucs2iso, (ces, in, outbuf, outbytesleft, cs), 150 _CONST struct iconv_ces *ces _AND 151 ucs_t in _AND 152 unsigned char **outbuf _AND 153 size_t *outbytesleft _AND 154 int cs) 155 { 156 iconv_ces_iso2022_state_t *iso_state = CES_STATE(ces); 157 _CONST iconv_ces_iso2022_ccs_t *ccsattr; 158 _CONST struct iconv_ccs *ccs; 159 ucs_t res; 160 size_t len = 0; 161 int need_designator, need_shift; 162 163 ccs = &(iso_state->ccs[cs]); 164 res = (in == UCS_CHAR_NONE) ? 165 in : ICONV_CCS_CONVERT_FROM_UCS(ccs, in); 166 if (in != UCS_CHAR_NONE) { 167 if (iso_shift[cs].prefix_type == ICONV_PREFIX_CHAR && 168 !is_7_14bit(ccs)) { 169 if ((res & 0x8080) == 0) 170 return -1; 171 res &= 0x7F7F; 172 } else if (res & 0x8080) 173 return -1; /* Invalid/missing character in the output charset */ 174 } 175 ccsattr = &(CES_CCSATTR(ces)[cs]); 176 if ((need_shift = (ccsattr->shift != iso_state->shift_index))) 177 len += iso_shift[ccsattr->shift].length; 178 if ((need_designator = (cs != iso_state->shift_tab[ccsattr->shift]))) 179 len += ccsattr->designatorlen; 180 if (in != UCS_CHAR_NONE) 181 len += res & 0xFF00 ? 2 : 1; 182 if (len > *outbytesleft) 183 return 0; /* No space in output buffer */ 184 if (need_designator && (len = ccsattr->designatorlen)) { 185 memcpy(*outbuf, ccsattr->designator, len); 186 (*outbuf) += len; 187 (*outbytesleft) -= len; 188 iso_state->shift_tab[ccsattr->shift] = cs; 189 } 190 if (need_shift && (len = iso_shift[ccsattr->shift].length)) { 191 memcpy(*outbuf, iso_shift[ccsattr->shift].sequence, len); 192 (*outbuf) += len; 193 (*outbytesleft) -= len; 194 if (iso_shift[ccsattr->shift].prefix_type != ICONV_PREFIX_CHAR) 195 iso_state->shift_index = ccsattr->shift; 196 } 197 if (in == UCS_CHAR_NONE) 198 return 1; 199 if (res & 0xFF00) { 200 *(unsigned char *)(*outbuf) ++ = res >> 8; 201 (*outbytesleft)--; 202 } 203 *(unsigned char *)(*outbuf) ++ = res; 204 (*outbytesleft) --; 205 update_shift_state(ces, res); 206 return 1; 207 } 208 209 ssize_t 210 _DEFUN(_iconv_iso2022_convert_from_ucs, (ces, in, outbuf, outbytesleft), 211 struct iconv_ces *ces _AND 212 ucs_t in _AND 213 unsigned char **outbuf _AND 214 size_t *outbytesleft) 215 { 216 iconv_ces_iso2022_state_t *iso_state = CES_STATE(ces); 217 ssize_t res; 218 int cs, i; 219 220 if (in == UCS_CHAR_NONE) 221 return cvt_ucs2iso(ces, in, outbuf, outbytesleft, 0); 222 if (iconv_char32bit(in)) 223 return -1; 224 cs = iso_state->shift_tab[iso_state->shift_index]; 225 if ((res = cvt_ucs2iso(ces, in, outbuf, outbytesleft, cs)) >= 0) 226 return res; 227 for (i = 0; i < iso_state->nccs; i++) { 228 if (i == cs) 229 continue; 230 if ((res = cvt_ucs2iso(ces, in, outbuf, outbytesleft, i)) >= 0) 231 return res; 232 } 233 (*outbuf) ++; 234 (*outbytesleft) --; 235 return -1; /* No character in output charset */ 236 } 237 238 static ucs_t 239 _DEFUN(cvt_iso2ucs, (ccs, inbuf, inbytesleft, prefix_type), 240 _CONST struct iconv_ccs *ccs _AND 241 _CONST unsigned char **inbuf _AND 242 size_t *inbytesleft _AND 243 int prefix_type) 244 { 245 size_t bytes = ccs->nbits > 8 ? 2 : 1; 246 ucs_t ch = **inbuf; 247 248 if (*inbytesleft < bytes) 249 return UCS_CHAR_NONE; /* Not enough bytes in the input buffer */ 250 if (bytes == 2) 251 ch = (ch << 8) | *(++(*inbuf)); 252 (*inbuf)++; 253 (*inbytesleft) -= bytes; 254 if (ch & 0x8080) 255 return UCS_CHAR_INVALID; 256 if (prefix_type == ICONV_PREFIX_CHAR && !is_7_14bit(ccs)) 257 ch |= (bytes == 2) ? 0x8080 : 0x80; 258 return ICONV_CCS_CONVERT_TO_UCS(ccs, ch); 259 } 260 261 ucs_t 262 _DEFUN(_iconv_iso2022_convert_to_ucs, (ces, inbuf, inbytesleft), 263 struct iconv_ces *ces _AND 264 _CONST unsigned char **inbuf _AND 265 size_t *inbytesleft) 266 { 267 iconv_ces_iso2022_state_t *iso_state = CES_STATE(ces); 268 _CONST iconv_ces_iso2022_ccs_t *ccsattr; 269 ucs_t res; 270 _CONST unsigned char *ptr = *inbuf; 271 unsigned char byte; 272 size_t len, left = *inbytesleft; 273 int i; 274 275 while (left) { 276 byte = *ptr; 277 if (byte & 0x80) { 278 (*inbuf)++; 279 (*inbytesleft) --; 280 return UCS_CHAR_INVALID; 281 } 282 if (!iso_state->prefix_cache[byte]) 283 break; 284 for (i = 0; i < iso_state->nccs; i++) { 285 ccsattr = &(CES_CCSATTR(ces)[i]); 286 len = ccsattr->designatorlen; 287 if (len) { 288 if (len + 1 > left) 289 return UCS_CHAR_NONE; 290 if (memcmp(ptr, ccsattr->designator, len) == 0) { 291 iso_state->shift_tab[ccsattr->shift] = i; 292 ptr += len; 293 left -= len; 294 break; 295 } 296 } 297 len = iso_shift[ccsattr->shift].length; 298 if (len) { 299 if (len + 1 > left) 300 return UCS_CHAR_NONE; 301 if (memcmp(ptr, 302 iso_shift[ccsattr->shift].sequence, len) == 0) { 303 if (iso_shift[ccsattr->shift].prefix_type != ICONV_PREFIX_CHAR) 304 iso_state->shift_index = ccsattr->shift; 305 ptr += len; 306 left -= len; 307 break; 308 } 309 } 310 } 311 } 312 i = iso_state->shift_tab[iso_state->shift_index]; 313 if (i < 0) { 314 (*inbuf) ++; 315 (*inbytesleft) --; 316 return UCS_CHAR_INVALID; 317 } 318 res = cvt_iso2ucs(&(iso_state->ccs[i]), &ptr, &left, 319 iso_shift[i].prefix_type); 320 if (res != UCS_CHAR_NONE) { 321 *inbuf = (_CONST char*)ptr; 322 *inbytesleft = left; 323 update_shift_state(ces, res); 324 } 325 return res; 326 } 327 328