1 /*- 2 * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved. 3 * Copyright (c) 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * Paul Borman at Krystal Technologies. 8 * 9 * Copyright (c) 2011 The FreeBSD Foundation 10 * All rights reserved. 11 * Portions of this software were developed by David Chisnall 12 * under sponsorship from the FreeBSD Foundation. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. All advertising materials mentioning features or use of this software 23 * must display the following acknowledgement: 24 * This product includes software developed by the University of 25 * California, Berkeley and its contributors. 26 * 4. Neither the name of the University nor the names of its contributors 27 * may be used to endorse or promote products derived from this software 28 * without specific prior written permission. 29 * 30 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 31 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 33 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 34 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 35 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 36 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 37 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 38 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 39 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 40 * SUCH DAMAGE. 41 * 42 * @(#)euc.c 8.1 (Berkeley) 6/4/93 43 * $FreeBSD: head/lib/libc/locale/euc.c 227753 2011-11-20 14:45:42Z theraven $ 44 */ 45 46 #include <sys/param.h> 47 48 #include <errno.h> 49 #include <limits.h> 50 #include <runetype.h> 51 #include <stdlib.h> 52 #include <string.h> 53 #include <wchar.h> 54 #include "mblocal.h" 55 56 extern int __mb_sb_limit; 57 58 static size_t _EUC_mbrtowc(wchar_t * __restrict, const char * __restrict, 59 size_t, mbstate_t * __restrict); 60 static int _EUC_mbsinit(const mbstate_t *); 61 static size_t _EUC_wcrtomb(char * __restrict, wchar_t, 62 mbstate_t * __restrict); 63 64 typedef struct { 65 int count[4]; 66 wchar_t bits[4]; 67 wchar_t mask; 68 } _EucInfo; 69 70 typedef struct { 71 wchar_t ch; 72 int set; 73 int want; 74 } _EucState; 75 76 int 77 _EUC_init(struct xlocale_ctype *l, _RuneLocale *rl) 78 { 79 _EucInfo *ei; 80 int x, new__mb_cur_max; 81 char *v, *e; 82 83 if (rl->__variable == NULL) 84 return (EFTYPE); 85 86 v = (char *)rl->__variable; 87 88 while (*v == ' ' || *v == '\t') 89 ++v; 90 91 if ((ei = malloc(sizeof(_EucInfo))) == NULL) 92 return (errno == 0 ? ENOMEM : errno); 93 94 new__mb_cur_max = 0; 95 for (x = 0; x < 4; ++x) { 96 ei->count[x] = (int)strtol(v, &e, 0); 97 if (v == e || !(v = e)) { 98 free(ei); 99 return (EFTYPE); 100 } 101 if (new__mb_cur_max < ei->count[x]) 102 new__mb_cur_max = ei->count[x]; 103 while (*v == ' ' || *v == '\t') 104 ++v; 105 ei->bits[x] = (int)strtol(v, &e, 0); 106 if (v == e || !(v = e)) { 107 free(ei); 108 return (EFTYPE); 109 } 110 while (*v == ' ' || *v == '\t') 111 ++v; 112 } 113 ei->mask = (int)strtol(v, &e, 0); 114 if (v == e || !(v = e)) { 115 free(ei); 116 return (EFTYPE); 117 } 118 rl->__variable = ei; 119 rl->__variable_len = sizeof(_EucInfo); 120 l->runes = rl; 121 l->__mb_cur_max = new__mb_cur_max; 122 l->__mbrtowc = _EUC_mbrtowc; 123 l->__wcrtomb = _EUC_wcrtomb; 124 l->__mbsinit = _EUC_mbsinit; 125 l->__mb_sb_limit = 256; 126 return (0); 127 } 128 129 static int 130 _EUC_mbsinit(const mbstate_t *ps) 131 { 132 133 return (ps == NULL || ((const _EucState *)ps)->want == 0); 134 } 135 136 #define CEI ((_EucInfo *)(_CurrentRuneLocale->__variable)) 137 138 #define _SS2 0x008e 139 #define _SS3 0x008f 140 141 #define GR_BITS 0x80808080 /* XXX: to be fixed */ 142 143 static __inline int 144 _euc_set(u_int c) 145 { 146 147 c &= 0xff; 148 return ((c & 0x80) ? c == _SS3 ? 3 : c == _SS2 ? 2 : 1 : 0); 149 } 150 151 static size_t 152 _EUC_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, 153 mbstate_t * __restrict ps) 154 { 155 _EucState *es; 156 int i, set, want; 157 wchar_t wc; 158 const char *os; 159 160 es = (_EucState *)ps; 161 162 if (es->want < 0 || es->want > MB_CUR_MAX || es->set < 0 || 163 es->set > 3) { 164 errno = EINVAL; 165 return ((size_t)-1); 166 } 167 168 if (s == NULL) { 169 s = ""; 170 n = 1; 171 pwc = NULL; 172 } 173 174 if (n == 0) 175 /* Incomplete multibyte sequence */ 176 return ((size_t)-2); 177 178 os = s; 179 180 if (es->want == 0) { 181 want = CEI->count[set = _euc_set(*s)]; 182 if (set == 2 || set == 3) { 183 --want; 184 if (--n == 0) { 185 /* Incomplete multibyte sequence */ 186 es->set = set; 187 es->want = want; 188 es->ch = 0; 189 return ((size_t)-2); 190 } 191 ++s; 192 if (*s == '\0') { 193 errno = EILSEQ; 194 return ((size_t)-1); 195 } 196 } 197 wc = (unsigned char)*s++; 198 } else { 199 set = es->set; 200 want = es->want; 201 wc = es->ch; 202 } 203 for (i = (es->want == 0) ? 1 : 0; i < MIN(want, n); i++) { 204 if (*s == '\0') { 205 errno = EILSEQ; 206 return ((size_t)-1); 207 } 208 wc = (wc << 8) | (unsigned char)*s++; 209 } 210 if (i < want) { 211 /* Incomplete multibyte sequence */ 212 es->set = set; 213 es->want = want - i; 214 es->ch = wc; 215 return ((size_t)-2); 216 } 217 wc = (wc & ~CEI->mask) | CEI->bits[set]; 218 if (pwc != NULL) 219 *pwc = wc; 220 es->want = 0; 221 return (wc == L'\0' ? 0 : s - os); 222 } 223 224 static size_t 225 _EUC_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps) 226 { 227 _EucState *es; 228 wchar_t m, nm; 229 int i, len; 230 231 es = (_EucState *)ps; 232 233 if (es->want != 0) { 234 errno = EINVAL; 235 return ((size_t)-1); 236 } 237 238 if (s == NULL) 239 /* Reset to initial shift state (no-op) */ 240 return (1); 241 242 m = wc & CEI->mask; 243 nm = wc & ~m; 244 245 if (m == CEI->bits[1]) { 246 CodeSet1: 247 /* Codeset 1: The first byte must have 0x80 in it. */ 248 i = len = CEI->count[1]; 249 while (i-- > 0) 250 *s++ = (nm >> (i << 3)) | 0x80; 251 } else { 252 if (m == CEI->bits[0]) 253 i = len = CEI->count[0]; 254 else if (m == CEI->bits[2]) { 255 i = len = CEI->count[2]; 256 *s++ = _SS2; 257 --i; 258 /* SS2 designates G2 into GR */ 259 nm |= GR_BITS; 260 } else if (m == CEI->bits[3]) { 261 i = len = CEI->count[3]; 262 *s++ = _SS3; 263 --i; 264 /* SS3 designates G3 into GR */ 265 nm |= GR_BITS; 266 } else 267 goto CodeSet1; /* Bletch */ 268 while (i-- > 0) 269 *s++ = (nm >> (i << 3)) & 0xff; 270 } 271 return (len); 272 } 273