1 /* $NetBSD: citrus_euc.c,v 1.5 2002/03/28 10:53:48 yamt Exp $ */ 2 3 /*- 4 * Copyright (c)2002 Citrus Project, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /*- 30 * Copyright (c) 1993 31 * The Regents of the University of California. All rights reserved. 32 * 33 * This code is derived from software contributed to Berkeley by 34 * Paul Borman at Krystal Technologies. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 3. All advertising materials mentioning features or use of this software 45 * must display the following acknowledgement: 46 * This product includes software developed by the University of 47 * California, Berkeley and its contributors. 48 * 4. Neither the name of the University nor the names of its contributors 49 * may be used to endorse or promote products derived from this software 50 * without specific prior written permission. 51 * 52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 62 * SUCH DAMAGE. 63 */ 64 65 #include <sys/cdefs.h> 66 #if defined(LIBC_SCCS) && !defined(lint) 67 __RCSID("$NetBSD: citrus_euc.c,v 1.5 2002/03/28 10:53:48 yamt Exp $"); 68 #endif /* LIBC_SCCS and not lint */ 69 70 #include <assert.h> 71 #include <errno.h> 72 #include <string.h> 73 #include <stdio.h> 74 #include <stdlib.h> 75 #include <stddef.h> 76 #include <locale.h> 77 #include <wchar.h> 78 #include <sys/types.h> 79 #include <limits.h> 80 #include "citrus_module.h" 81 #include "citrus_ctype.h" 82 #include "citrus_euc.h" 83 84 85 /* ---------------------------------------------------------------------- 86 * private stuffs used by templates 87 */ 88 89 typedef struct { 90 char ch[3]; 91 int chlen; 92 } _EUCState; 93 94 typedef struct { 95 unsigned count[4]; 96 wchar_t bits[4]; 97 wchar_t mask; 98 unsigned mb_cur_max; 99 } _EUCEncodingInfo; 100 101 typedef struct { 102 _EUCEncodingInfo ei; 103 struct { 104 /* for future multi-locale facility */ 105 _EUCState s_mblen; 106 _EUCState s_mbrlen; 107 _EUCState s_mbrtowc; 108 _EUCState s_mbtowc; 109 _EUCState s_mbsrtowcs; 110 _EUCState s_wcrtomb; 111 _EUCState s_wcsrtombs; 112 _EUCState s_wctomb; 113 } states; 114 } _EUCCTypeInfo; 115 116 #define _SS2 0x008e 117 #define _SS3 0x008f 118 119 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 120 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 121 122 #define _FUNCNAME(m) _citrus_EUC_##m 123 #define _ENCODING_INFO _EUCEncodingInfo 124 #define _CTYPE_INFO _EUCCTypeInfo 125 #define _ENCODING_STATE _EUCState 126 #define _ENCODING_MB_CUR_MAX(_ei_) (_ei_)->mb_cur_max 127 #define _ENCODING_IS_STATE_DEPENDENT 0 128 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0 129 130 131 static __inline int 132 _citrus_EUC_cs(unsigned int c) 133 { 134 c &= 0xff; 135 136 return ((c & 0x80) ? c == _SS3 ? 3 : c == _SS2 ? 2 : 1 : 0); 137 } 138 139 static __inline int 140 _citrus_EUC_parse_variable(_EUCEncodingInfo *ei, 141 const void *var, size_t lenvar) 142 { 143 const char *v, *e; 144 int x; 145 146 /* parse variable string */ 147 if (!var) 148 return (EFTYPE); 149 150 v = (const char *) var; 151 152 while (*v == ' ' || *v == '\t') 153 ++v; 154 155 ei->mb_cur_max = 1; 156 for (x = 0; x < 4; ++x) { 157 ei->count[x] = (int) strtol(v, (char **)&e, 0); 158 if (v == e || !(v = e) || ei->count[x]<1 || ei->count[x]>4) { 159 return (EFTYPE); 160 } 161 if (ei->mb_cur_max < ei->count[x]) 162 ei->mb_cur_max = ei->count[x]; 163 while (*v == ' ' || *v == '\t') 164 ++v; 165 ei->bits[x] = (int) strtol(v, (char **)&e, 0); 166 if (v == e || !(v = e)) { 167 return (EFTYPE); 168 } 169 while (*v == ' ' || *v == '\t') 170 ++v; 171 } 172 ei->mask = (int)strtol(v, (char **)&e, 0); 173 if (v == e || !(v = e)) { 174 return (EFTYPE); 175 } 176 177 return 0; 178 } 179 180 181 static __inline void 182 /*ARGSUSED*/ 183 _citrus_EUC_init_state(_EUCEncodingInfo *ei, _EUCState *s) 184 { 185 memset(s, 0, sizeof(*s)); 186 } 187 188 static __inline void 189 /*ARGSUSED*/ 190 _citrus_EUC_pack_state(_EUCEncodingInfo *ei, void *pspriv, const _EUCState *s) 191 { 192 memcpy(pspriv, (const void *)s, sizeof(*s)); 193 } 194 195 static __inline void 196 /*ARGSUSED*/ 197 _citrus_EUC_unpack_state(_EUCEncodingInfo *ei, _EUCState *s, 198 const void *pspriv) 199 { 200 memcpy((void *)s, pspriv, sizeof(*s)); 201 } 202 203 static int 204 _citrus_EUC_mbrtowc_priv(_EUCEncodingInfo *ei, wchar_t *pwc, const char **s, 205 size_t n, _EUCState *psenc, size_t *nresult) 206 { 207 wchar_t wchar; 208 int c, cs, len; 209 int chlenbak; 210 const char *s0, *s1 = NULL; 211 212 _DIAGASSERT(nresult != 0); 213 _DIAGASSERT(ei != NULL); 214 _DIAGASSERT(psenc != NULL); 215 _DIAGASSERT(s != NULL); 216 217 s0 = *s; 218 219 if (s0 == NULL) { 220 _citrus_EUC_init_state(ei, psenc); 221 *nresult = 0; /* state independent */ 222 return (0); 223 } 224 225 chlenbak = psenc->chlen; 226 227 /* make sure we have the first byte in the buffer */ 228 switch (psenc->chlen) { 229 case 0: 230 if (n < 1) 231 goto restart; 232 psenc->ch[0] = *s0++; 233 psenc->chlen = 1; 234 n--; 235 break; 236 case 1: 237 case 2: 238 break; 239 default: 240 /* illgeal state */ 241 goto encoding_error; 242 } 243 244 c = ei->count[cs = _citrus_EUC_cs(psenc->ch[0] & 0xff)]; 245 if (c == 0) 246 goto encoding_error; 247 while (psenc->chlen < c) { 248 if (n < 1) 249 goto restart; 250 psenc->ch[psenc->chlen] = *s0++; 251 psenc->chlen++; 252 n--; 253 } 254 *s = s0; 255 256 switch (cs) { 257 case 3: 258 case 2: 259 /* skip SS2/SS3 */ 260 len = c - 1; 261 s1 = &psenc->ch[1]; 262 break; 263 case 1: 264 case 0: 265 len = c; 266 s1 = &psenc->ch[0]; 267 break; 268 } 269 wchar = 0; 270 while (len-- > 0) 271 wchar = (wchar << 8) | (*s1++ & 0xff); 272 wchar = (wchar & ~ei->mask) | ei->bits[cs]; 273 274 psenc->chlen = 0; 275 if (pwc) 276 *pwc = wchar; 277 278 if (!wchar) { 279 *nresult = 0; 280 } else { 281 *nresult = (size_t)(c - chlenbak); 282 } 283 284 return 0; 285 286 encoding_error: 287 psenc->chlen = 0; 288 *nresult = (size_t)-1; 289 return (EILSEQ); 290 291 restart: 292 *nresult = (size_t)-2; 293 *s = s0; 294 return (0); 295 } 296 297 static int 298 _citrus_EUC_wcrtomb_priv(_EUCEncodingInfo *ei, char *s, size_t n, wchar_t wc, 299 _EUCState *psenc, size_t *nresult) 300 { 301 wchar_t m, nm; 302 int cs, i; 303 304 _DIAGASSERT(ei != NULL); 305 _DIAGASSERT(nresult != 0); 306 _DIAGASSERT(s != NULL); 307 308 m = wc & ei->mask; 309 nm = wc & ~m; 310 311 for (cs = 0; 312 cs < sizeof(ei->count)/sizeof(ei->count[0]); 313 cs++) { 314 if (m == ei->bits[cs]) 315 break; 316 } 317 /* fallback case - not sure if it is necessary */ 318 if (cs == sizeof(ei->count)/sizeof(ei->count[0])) 319 cs = 1; 320 321 i = ei->count[cs]; 322 if (n < i) 323 goto ilseq; 324 m = (cs % 2) ? 0x80 : 0x00; 325 switch (cs) { 326 case 2: 327 *s++ = _SS2; 328 i--; 329 break; 330 case 3: 331 *s++ = _SS3; 332 i--; 333 break; 334 } 335 336 while (i-- > 0) 337 *s++ = ((nm >> (i << 3)) & 0xff) | m; 338 339 *nresult = (size_t)ei->count[cs]; 340 return 0; 341 342 ilseq: 343 *nresult = (size_t)-1; 344 return EILSEQ; /*XXX*/ 345 } 346 347 static int 348 /*ARGSUSED*/ 349 _citrus_EUC_stdencoding_init(_EUCEncodingInfo * __restrict ei, 350 const void * __restrict var, size_t lenvar) 351 { 352 353 _DIAGASSERT(ei != NULL); 354 355 return (_citrus_EUC_parse_variable(ei, var, lenvar)); 356 } 357 358 static void 359 /*ARGSUSED*/ 360 _citrus_EUC_stdencoding_uninit(_EUCEncodingInfo * __restrict ei) 361 { 362 } 363 364 /* ---------------------------------------------------------------------- 365 * public interface for ctype 366 */ 367 368 _CITRUS_CTYPE_DECLS(EUC); 369 _CITRUS_CTYPE_DEF_OPS(EUC); 370 371 #include "citrus_ctype_template.h" 372