1 /* $NetBSD: citrus_euc.c,v 1.15 2013/05/28 16:57:56 joerg Exp $ */ 2 3 /*- 4 * Copyright (c)2002 Citrus Project, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /*- 30 * Copyright (c) 1993 31 * The Regents of the University of California. All rights reserved. 32 * 33 * This code is derived from software contributed to Berkeley by 34 * Paul Borman at Krystal Technologies. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 3. Neither the name of the University nor the names of its contributors 45 * may be used to endorse or promote products derived from this software 46 * without specific prior written permission. 47 * 48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 58 * SUCH DAMAGE. 59 */ 60 61 #include <sys/cdefs.h> 62 #if defined(LIBC_SCCS) && !defined(lint) 63 __RCSID("$NetBSD: citrus_euc.c,v 1.15 2013/05/28 16:57:56 joerg Exp $"); 64 #endif /* LIBC_SCCS and not lint */ 65 66 #include <assert.h> 67 #include <errno.h> 68 #include <string.h> 69 #include <stdio.h> 70 #include <stdlib.h> 71 #include <stddef.h> 72 #include <wchar.h> 73 #include <sys/types.h> 74 #include <limits.h> 75 76 #include "citrus_namespace.h" 77 #include "citrus_bcs.h" 78 #include "citrus_types.h" 79 #include "citrus_module.h" 80 #include "citrus_ctype.h" 81 #include "citrus_stdenc.h" 82 #include "citrus_euc.h" 83 84 85 /* ---------------------------------------------------------------------- 86 * private stuffs used by templates 87 */ 88 89 typedef struct { 90 char ch[3]; 91 int chlen; 92 } _EUCState; 93 94 typedef struct { 95 unsigned count[4]; 96 wchar_t bits[4]; 97 wchar_t mask; 98 unsigned mb_cur_max; 99 } _EUCEncodingInfo; 100 101 typedef struct { 102 _EUCEncodingInfo ei; 103 struct { 104 /* for future multi-locale facility */ 105 _EUCState s_mblen; 106 _EUCState s_mbrlen; 107 _EUCState s_mbrtowc; 108 _EUCState s_mbtowc; 109 _EUCState s_mbsrtowcs; 110 _EUCState s_mbsnrtowcs; 111 _EUCState s_wcrtomb; 112 _EUCState s_wcsrtombs; 113 _EUCState s_wcsnrtombs; 114 _EUCState s_wctomb; 115 } states; 116 } _EUCCTypeInfo; 117 118 #define _SS2 0x008e 119 #define _SS3 0x008f 120 121 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 122 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 123 124 #define _FUNCNAME(m) _citrus_EUC_##m 125 #define _ENCODING_INFO _EUCEncodingInfo 126 #define _CTYPE_INFO _EUCCTypeInfo 127 #define _ENCODING_STATE _EUCState 128 #define _ENCODING_MB_CUR_MAX(_ei_) (_ei_)->mb_cur_max 129 #define _ENCODING_IS_STATE_DEPENDENT 0 130 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0 131 132 133 static __inline int 134 _citrus_EUC_cs(unsigned int c) 135 { 136 c &= 0xff; 137 138 return ((c & 0x80) ? c == _SS3 ? 3 : c == _SS2 ? 2 : 1 : 0); 139 } 140 141 static __inline int 142 _citrus_EUC_parse_variable(_EUCEncodingInfo *ei, 143 const void *var, size_t lenvar) 144 { 145 const char *v, *e; 146 int x; 147 148 /* parse variable string */ 149 if (!var) 150 return (EFTYPE); 151 152 v = (const char *) var; 153 154 while (*v == ' ' || *v == '\t') 155 ++v; 156 157 ei->mb_cur_max = 1; 158 for (x = 0; x < 4; ++x) { 159 ei->count[x] = (int)_bcs_strtol(v, (char **)&e, 0); 160 if (v == e || !(v = e) || ei->count[x]<1 || ei->count[x]>4) { 161 return (EFTYPE); 162 } 163 if (ei->mb_cur_max < ei->count[x]) 164 ei->mb_cur_max = ei->count[x]; 165 while (*v == ' ' || *v == '\t') 166 ++v; 167 ei->bits[x] = (int)_bcs_strtol(v, (char **)&e, 0); 168 if (v == e || !(v = e)) { 169 return (EFTYPE); 170 } 171 while (*v == ' ' || *v == '\t') 172 ++v; 173 } 174 ei->mask = (int)_bcs_strtol(v, (char **)&e, 0); 175 if (v == e || !(v = e)) { 176 return (EFTYPE); 177 } 178 179 return 0; 180 } 181 182 183 static __inline void 184 /*ARGSUSED*/ 185 _citrus_EUC_init_state(_EUCEncodingInfo *ei, _EUCState *s) 186 { 187 memset(s, 0, sizeof(*s)); 188 } 189 190 static __inline void 191 /*ARGSUSED*/ 192 _citrus_EUC_pack_state(_EUCEncodingInfo *ei, void *pspriv, const _EUCState *s) 193 { 194 memcpy(pspriv, (const void *)s, sizeof(*s)); 195 } 196 197 static __inline void 198 /*ARGSUSED*/ 199 _citrus_EUC_unpack_state(_EUCEncodingInfo *ei, _EUCState *s, 200 const void *pspriv) 201 { 202 memcpy((void *)s, pspriv, sizeof(*s)); 203 } 204 205 static int 206 _citrus_EUC_mbrtowc_priv(_EUCEncodingInfo *ei, wchar_t *pwc, const char **s, 207 size_t n, _EUCState *psenc, size_t *nresult) 208 { 209 wchar_t wchar; 210 int c, cs, len; 211 int chlenbak; 212 const char *s0, *s1 = NULL; 213 214 _DIAGASSERT(nresult != 0); 215 _DIAGASSERT(ei != NULL); 216 _DIAGASSERT(psenc != NULL); 217 _DIAGASSERT(s != NULL); 218 219 s0 = *s; 220 221 if (s0 == NULL) { 222 _citrus_EUC_init_state(ei, psenc); 223 *nresult = 0; /* state independent */ 224 return (0); 225 } 226 227 chlenbak = psenc->chlen; 228 229 /* make sure we have the first byte in the buffer */ 230 switch (psenc->chlen) { 231 case 0: 232 if (n < 1) 233 goto restart; 234 psenc->ch[0] = *s0++; 235 psenc->chlen = 1; 236 n--; 237 break; 238 case 1: 239 case 2: 240 break; 241 default: 242 /* illgeal state */ 243 goto encoding_error; 244 } 245 246 c = ei->count[cs = _citrus_EUC_cs(psenc->ch[0] & 0xff)]; 247 if (c == 0) 248 goto encoding_error; 249 while (psenc->chlen < c) { 250 if (n < 1) 251 goto restart; 252 psenc->ch[psenc->chlen] = *s0++; 253 psenc->chlen++; 254 n--; 255 } 256 *s = s0; 257 258 switch (cs) { 259 case 3: 260 case 2: 261 /* skip SS2/SS3 */ 262 len = c - 1; 263 s1 = &psenc->ch[1]; 264 break; 265 case 1: 266 case 0: 267 len = c; 268 s1 = &psenc->ch[0]; 269 break; 270 default: 271 goto encoding_error; 272 } 273 wchar = 0; 274 while (len-- > 0) 275 wchar = (wchar << 8) | (*s1++ & 0xff); 276 wchar = (wchar & ~ei->mask) | ei->bits[cs]; 277 278 psenc->chlen = 0; 279 if (pwc) 280 *pwc = wchar; 281 282 if (!wchar) { 283 *nresult = 0; 284 } else { 285 *nresult = (size_t)(c - chlenbak); 286 } 287 288 return 0; 289 290 encoding_error: 291 psenc->chlen = 0; 292 *nresult = (size_t)-1; 293 return (EILSEQ); 294 295 restart: 296 *nresult = (size_t)-2; 297 *s = s0; 298 return (0); 299 } 300 301 static int 302 _citrus_EUC_wcrtomb_priv(_EUCEncodingInfo *ei, char *s, size_t n, wchar_t wc, 303 _EUCState *psenc, size_t *nresult) 304 { 305 wchar_t m, nm; 306 int cs, i, ret; 307 308 _DIAGASSERT(ei != NULL); 309 _DIAGASSERT(nresult != 0); 310 _DIAGASSERT(s != NULL); 311 312 m = wc & ei->mask; 313 nm = wc & ~m; 314 315 for (cs = 0; 316 cs < sizeof(ei->count)/sizeof(ei->count[0]); 317 cs++) { 318 if (m == ei->bits[cs]) 319 break; 320 } 321 /* fallback case - not sure if it is necessary */ 322 if (cs == sizeof(ei->count)/sizeof(ei->count[0])) 323 cs = 1; 324 325 i = ei->count[cs]; 326 if (n < i) { 327 ret = E2BIG; 328 goto err; 329 } 330 m = (cs) ? 0x80 : 0x00; 331 switch (cs) { 332 case 2: 333 *s++ = _SS2; 334 i--; 335 break; 336 case 3: 337 *s++ = _SS3; 338 i--; 339 break; 340 } 341 342 while (i-- > 0) 343 *s++ = ((nm >> (i << 3)) & 0xff) | m; 344 345 *nresult = (size_t)ei->count[cs]; 346 return 0; 347 348 err: 349 *nresult = (size_t)-1; 350 return ret; 351 } 352 353 static __inline int 354 /*ARGSUSED*/ 355 _citrus_EUC_stdenc_wctocs(_EUCEncodingInfo * __restrict ei, 356 _csid_t * __restrict csid, 357 _index_t * __restrict idx, wchar_t wc) 358 { 359 wchar_t m, nm; 360 361 _DIAGASSERT(ei != NULL && csid != NULL && idx != NULL); 362 363 m = wc & ei->mask; 364 nm = wc & ~m; 365 366 *csid = (_citrus_csid_t)m; 367 *idx = (_citrus_index_t)nm; 368 369 return (0); 370 } 371 372 static __inline int 373 /*ARGSUSED*/ 374 _citrus_EUC_stdenc_cstowc(_EUCEncodingInfo * __restrict ei, 375 wchar_t * __restrict wc, 376 _csid_t csid, _index_t idx) 377 { 378 379 _DIAGASSERT(ei != NULL && wc != NULL); 380 381 if ((csid & ~ei->mask) != 0 || (idx & ei->mask) != 0) 382 return (EINVAL); 383 384 *wc = (wchar_t)csid | (wchar_t)idx; 385 386 return (0); 387 } 388 389 static __inline int 390 /*ARGSUSED*/ 391 _citrus_EUC_stdenc_get_state_desc_generic(_EUCEncodingInfo * __restrict ei, 392 _EUCState * __restrict psenc, 393 int * __restrict rstate) 394 { 395 396 if (psenc->chlen == 0) 397 *rstate = _STDENC_SDGEN_INITIAL; 398 else 399 *rstate = _STDENC_SDGEN_INCOMPLETE_CHAR; 400 401 return 0; 402 } 403 404 static int 405 /*ARGSUSED*/ 406 _citrus_EUC_encoding_module_init(_EUCEncodingInfo * __restrict ei, 407 const void * __restrict var, size_t lenvar) 408 { 409 410 _DIAGASSERT(ei != NULL); 411 412 return (_citrus_EUC_parse_variable(ei, var, lenvar)); 413 } 414 415 static void 416 /*ARGSUSED*/ 417 _citrus_EUC_encoding_module_uninit(_EUCEncodingInfo * __restrict ei) 418 { 419 } 420 421 /* ---------------------------------------------------------------------- 422 * public interface for ctype 423 */ 424 425 _CITRUS_CTYPE_DECLS(EUC); 426 _CITRUS_CTYPE_DEF_OPS(EUC); 427 428 #include "citrus_ctype_template.h" 429 430 /* ---------------------------------------------------------------------- 431 * public interface for stdenc 432 */ 433 434 _CITRUS_STDENC_DECLS(EUC); 435 _CITRUS_STDENC_DEF_OPS(EUC); 436 437 #include "citrus_stdenc_template.h" 438