1ad30f8e7SGabor Kovesdan /* $FreeBSD$ */ 2ad30f8e7SGabor Kovesdan /* $NetBSD: citrus_utf1632.c,v 1.9 2008/06/14 16:01:08 tnozaki Exp $ */ 3ad30f8e7SGabor Kovesdan 4ad30f8e7SGabor Kovesdan /*- 5ad30f8e7SGabor Kovesdan * Copyright (c)2003 Citrus Project, 6ad30f8e7SGabor Kovesdan * All rights reserved. 7ad30f8e7SGabor Kovesdan * 8ad30f8e7SGabor Kovesdan * Redistribution and use in source and binary forms, with or without 9ad30f8e7SGabor Kovesdan * modification, are permitted provided that the following conditions 10ad30f8e7SGabor Kovesdan * are met: 11ad30f8e7SGabor Kovesdan * 1. Redistributions of source code must retain the above copyright 12ad30f8e7SGabor Kovesdan * notice, this list of conditions and the following disclaimer. 13ad30f8e7SGabor Kovesdan * 2. Redistributions in binary form must reproduce the above copyright 14ad30f8e7SGabor Kovesdan * notice, this list of conditions and the following disclaimer in the 15ad30f8e7SGabor Kovesdan * documentation and/or other materials provided with the distribution. 16ad30f8e7SGabor Kovesdan * 17ad30f8e7SGabor Kovesdan * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18ad30f8e7SGabor Kovesdan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19ad30f8e7SGabor Kovesdan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20ad30f8e7SGabor Kovesdan * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21ad30f8e7SGabor Kovesdan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22ad30f8e7SGabor Kovesdan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23ad30f8e7SGabor Kovesdan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24ad30f8e7SGabor Kovesdan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25ad30f8e7SGabor Kovesdan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26ad30f8e7SGabor Kovesdan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27ad30f8e7SGabor Kovesdan * SUCH DAMAGE. 28ad30f8e7SGabor Kovesdan */ 29ad30f8e7SGabor Kovesdan 30ad30f8e7SGabor Kovesdan #include <sys/cdefs.h> 31ad30f8e7SGabor Kovesdan #include <sys/endian.h> 32ad30f8e7SGabor Kovesdan #include <sys/types.h> 33ad30f8e7SGabor Kovesdan 34ad30f8e7SGabor Kovesdan #include <assert.h> 35ad30f8e7SGabor Kovesdan #include <errno.h> 36ad30f8e7SGabor Kovesdan #include <limits.h> 37ad30f8e7SGabor Kovesdan #include <stddef.h> 38ad30f8e7SGabor Kovesdan #include <stdio.h> 39ad30f8e7SGabor Kovesdan #include <stdlib.h> 40ad30f8e7SGabor Kovesdan #include <string.h> 41ad30f8e7SGabor Kovesdan #include <wchar.h> 42ad30f8e7SGabor Kovesdan 43ad30f8e7SGabor Kovesdan #include "citrus_namespace.h" 44ad30f8e7SGabor Kovesdan #include "citrus_types.h" 45ad30f8e7SGabor Kovesdan #include "citrus_module.h" 46ad30f8e7SGabor Kovesdan #include "citrus_stdenc.h" 47ad30f8e7SGabor Kovesdan #include "citrus_bcs.h" 48ad30f8e7SGabor Kovesdan 49ad30f8e7SGabor Kovesdan #include "citrus_utf1632.h" 50ad30f8e7SGabor Kovesdan 51ad30f8e7SGabor Kovesdan 52ad30f8e7SGabor Kovesdan /* ---------------------------------------------------------------------- 53ad30f8e7SGabor Kovesdan * private stuffs used by templates 54ad30f8e7SGabor Kovesdan */ 55ad30f8e7SGabor Kovesdan 56ad30f8e7SGabor Kovesdan typedef struct { 57ad30f8e7SGabor Kovesdan int chlen; 58ad30f8e7SGabor Kovesdan int current_endian; 59ad30f8e7SGabor Kovesdan uint8_t ch[4]; 60ad30f8e7SGabor Kovesdan } _UTF1632State; 61ad30f8e7SGabor Kovesdan 62ad30f8e7SGabor Kovesdan #define _ENDIAN_UNKNOWN 0 63ad30f8e7SGabor Kovesdan #define _ENDIAN_BIG 1 64ad30f8e7SGabor Kovesdan #define _ENDIAN_LITTLE 2 65ad30f8e7SGabor Kovesdan #if BYTE_ORDER == BIG_ENDIAN 66ad30f8e7SGabor Kovesdan #define _ENDIAN_INTERNAL _ENDIAN_BIG 67ad30f8e7SGabor Kovesdan #define _ENDIAN_SWAPPED _ENDIAN_LITTLE 68ad30f8e7SGabor Kovesdan #else 69ad30f8e7SGabor Kovesdan #define _ENDIAN_INTERNAL _ENDIAN_LITTLE 70ad30f8e7SGabor Kovesdan #define _ENDIAN_SWAPPED _ENDIAN_BIG 71ad30f8e7SGabor Kovesdan #endif 72ad30f8e7SGabor Kovesdan #define _MODE_UTF32 0x00000001U 73ad30f8e7SGabor Kovesdan #define _MODE_FORCE_ENDIAN 0x00000002U 74ad30f8e7SGabor Kovesdan 75ad30f8e7SGabor Kovesdan typedef struct { 76ad30f8e7SGabor Kovesdan int preffered_endian; 77ad30f8e7SGabor Kovesdan unsigned int cur_max; 78ad30f8e7SGabor Kovesdan uint32_t mode; 79ad30f8e7SGabor Kovesdan } _UTF1632EncodingInfo; 80ad30f8e7SGabor Kovesdan 81ad30f8e7SGabor Kovesdan #define _FUNCNAME(m) _citrus_UTF1632_##m 82ad30f8e7SGabor Kovesdan #define _ENCODING_INFO _UTF1632EncodingInfo 83ad30f8e7SGabor Kovesdan #define _ENCODING_STATE _UTF1632State 84ad30f8e7SGabor Kovesdan #define _ENCODING_MB_CUR_MAX(_ei_) ((_ei_)->cur_max) 85ad30f8e7SGabor Kovesdan #define _ENCODING_IS_STATE_DEPENDENT 0 86ad30f8e7SGabor Kovesdan #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0 87ad30f8e7SGabor Kovesdan 88ad30f8e7SGabor Kovesdan 89ad30f8e7SGabor Kovesdan static __inline void 90ad30f8e7SGabor Kovesdan /*ARGSUSED*/ 91ad30f8e7SGabor Kovesdan _citrus_UTF1632_init_state(_UTF1632EncodingInfo *ei __unused, 92ad30f8e7SGabor Kovesdan _UTF1632State *s) 93ad30f8e7SGabor Kovesdan { 94ad30f8e7SGabor Kovesdan 95ad30f8e7SGabor Kovesdan memset(s, 0, sizeof(*s)); 96ad30f8e7SGabor Kovesdan } 97ad30f8e7SGabor Kovesdan 98ad30f8e7SGabor Kovesdan static int 99ad30f8e7SGabor Kovesdan _citrus_UTF1632_mbrtowc_priv(_UTF1632EncodingInfo *ei, wchar_t *pwc, 1001243a98eSTijl Coosemans char **s, size_t n, _UTF1632State *psenc, size_t *nresult) 101ad30f8e7SGabor Kovesdan { 1021243a98eSTijl Coosemans char *s0; 103ad30f8e7SGabor Kovesdan size_t result; 104ad30f8e7SGabor Kovesdan wchar_t wc = L'\0'; 105ad30f8e7SGabor Kovesdan int chlenbak, endian, needlen; 106ad30f8e7SGabor Kovesdan 107ad30f8e7SGabor Kovesdan s0 = *s; 108ad30f8e7SGabor Kovesdan 109ad30f8e7SGabor Kovesdan if (s0 == NULL) { 110ad30f8e7SGabor Kovesdan _citrus_UTF1632_init_state(ei, psenc); 111ad30f8e7SGabor Kovesdan *nresult = 0; /* state independent */ 112ad30f8e7SGabor Kovesdan return (0); 113ad30f8e7SGabor Kovesdan } 114ad30f8e7SGabor Kovesdan 115ad30f8e7SGabor Kovesdan result = 0; 116ad30f8e7SGabor Kovesdan chlenbak = psenc->chlen; 117ad30f8e7SGabor Kovesdan 118ad30f8e7SGabor Kovesdan refetch: 119ad30f8e7SGabor Kovesdan needlen = ((ei->mode & _MODE_UTF32) != 0 || chlenbak >= 2) ? 4 : 2; 120ad30f8e7SGabor Kovesdan 121ad30f8e7SGabor Kovesdan while (chlenbak < needlen) { 122ad30f8e7SGabor Kovesdan if (n == 0) 123ad30f8e7SGabor Kovesdan goto restart; 124ad30f8e7SGabor Kovesdan psenc->ch[chlenbak++] = *s0++; 125ad30f8e7SGabor Kovesdan n--; 126ad30f8e7SGabor Kovesdan result++; 127ad30f8e7SGabor Kovesdan } 128ad30f8e7SGabor Kovesdan 129ad30f8e7SGabor Kovesdan /* judge endian marker */ 130ad30f8e7SGabor Kovesdan if ((ei->mode & _MODE_UTF32) == 0) { 131ad30f8e7SGabor Kovesdan /* UTF16 */ 132ad30f8e7SGabor Kovesdan if (psenc->ch[0] == 0xFE && psenc->ch[1] == 0xFF) { 133ad30f8e7SGabor Kovesdan psenc->current_endian = _ENDIAN_BIG; 134ad30f8e7SGabor Kovesdan chlenbak = 0; 135ad30f8e7SGabor Kovesdan goto refetch; 136ad30f8e7SGabor Kovesdan } else if (psenc->ch[0] == 0xFF && psenc->ch[1] == 0xFE) { 137ad30f8e7SGabor Kovesdan psenc->current_endian = _ENDIAN_LITTLE; 138ad30f8e7SGabor Kovesdan chlenbak = 0; 139ad30f8e7SGabor Kovesdan goto refetch; 140ad30f8e7SGabor Kovesdan } 141ad30f8e7SGabor Kovesdan } else { 142ad30f8e7SGabor Kovesdan /* UTF32 */ 143ad30f8e7SGabor Kovesdan if (psenc->ch[0] == 0x00 && psenc->ch[1] == 0x00 && 144ad30f8e7SGabor Kovesdan psenc->ch[2] == 0xFE && psenc->ch[3] == 0xFF) { 145ad30f8e7SGabor Kovesdan psenc->current_endian = _ENDIAN_BIG; 146ad30f8e7SGabor Kovesdan chlenbak = 0; 147ad30f8e7SGabor Kovesdan goto refetch; 148ad30f8e7SGabor Kovesdan } else if (psenc->ch[0] == 0xFF && psenc->ch[1] == 0xFE && 149ad30f8e7SGabor Kovesdan psenc->ch[2] == 0x00 && psenc->ch[3] == 0x00) { 150ad30f8e7SGabor Kovesdan psenc->current_endian = _ENDIAN_LITTLE; 151ad30f8e7SGabor Kovesdan chlenbak = 0; 152ad30f8e7SGabor Kovesdan goto refetch; 153ad30f8e7SGabor Kovesdan } 154ad30f8e7SGabor Kovesdan } 155ad30f8e7SGabor Kovesdan endian = ((ei->mode & _MODE_FORCE_ENDIAN) != 0 || 156ad30f8e7SGabor Kovesdan psenc->current_endian == _ENDIAN_UNKNOWN) ? ei->preffered_endian : 157ad30f8e7SGabor Kovesdan psenc->current_endian; 158ad30f8e7SGabor Kovesdan 159ad30f8e7SGabor Kovesdan /* get wc */ 160ad30f8e7SGabor Kovesdan if ((ei->mode & _MODE_UTF32) == 0) { 161ad30f8e7SGabor Kovesdan /* UTF16 */ 162ad30f8e7SGabor Kovesdan if (needlen == 2) { 163ad30f8e7SGabor Kovesdan switch (endian) { 164ad30f8e7SGabor Kovesdan case _ENDIAN_LITTLE: 165ad30f8e7SGabor Kovesdan wc = (psenc->ch[0] | 166ad30f8e7SGabor Kovesdan ((wchar_t)psenc->ch[1] << 8)); 167ad30f8e7SGabor Kovesdan break; 168ad30f8e7SGabor Kovesdan case _ENDIAN_BIG: 169ad30f8e7SGabor Kovesdan wc = (psenc->ch[1] | 170ad30f8e7SGabor Kovesdan ((wchar_t)psenc->ch[0] << 8)); 171ad30f8e7SGabor Kovesdan break; 172ad30f8e7SGabor Kovesdan default: 173ad30f8e7SGabor Kovesdan goto ilseq; 174ad30f8e7SGabor Kovesdan } 175ad30f8e7SGabor Kovesdan if (wc >= 0xD800 && wc <= 0xDBFF) { 176ad30f8e7SGabor Kovesdan /* surrogate high */ 177ad30f8e7SGabor Kovesdan needlen = 4; 178ad30f8e7SGabor Kovesdan goto refetch; 179ad30f8e7SGabor Kovesdan } 180ad30f8e7SGabor Kovesdan } else { 181ad30f8e7SGabor Kovesdan /* surrogate low */ 182ad30f8e7SGabor Kovesdan wc -= 0xD800; /* wc : surrogate high (see above) */ 183ad30f8e7SGabor Kovesdan wc <<= 10; 184ad30f8e7SGabor Kovesdan switch (endian) { 185ad30f8e7SGabor Kovesdan case _ENDIAN_LITTLE: 186ad30f8e7SGabor Kovesdan if (psenc->ch[3] < 0xDC || psenc->ch[3] > 0xDF) 187ad30f8e7SGabor Kovesdan goto ilseq; 188ad30f8e7SGabor Kovesdan wc |= psenc->ch[2]; 189ad30f8e7SGabor Kovesdan wc |= (wchar_t)(psenc->ch[3] & 3) << 8; 190ad30f8e7SGabor Kovesdan break; 191ad30f8e7SGabor Kovesdan case _ENDIAN_BIG: 192ad30f8e7SGabor Kovesdan if (psenc->ch[2]<0xDC || psenc->ch[2]>0xDF) 193ad30f8e7SGabor Kovesdan goto ilseq; 194ad30f8e7SGabor Kovesdan wc |= psenc->ch[3]; 195ad30f8e7SGabor Kovesdan wc |= (wchar_t)(psenc->ch[2] & 3) << 8; 196ad30f8e7SGabor Kovesdan break; 197ad30f8e7SGabor Kovesdan default: 198ad30f8e7SGabor Kovesdan goto ilseq; 199ad30f8e7SGabor Kovesdan } 200ad30f8e7SGabor Kovesdan wc += 0x10000; 201ad30f8e7SGabor Kovesdan } 202ad30f8e7SGabor Kovesdan } else { 203ad30f8e7SGabor Kovesdan /* UTF32 */ 204ad30f8e7SGabor Kovesdan switch (endian) { 205ad30f8e7SGabor Kovesdan case _ENDIAN_LITTLE: 206ad30f8e7SGabor Kovesdan wc = (psenc->ch[0] | 207ad30f8e7SGabor Kovesdan ((wchar_t)psenc->ch[1] << 8) | 208ad30f8e7SGabor Kovesdan ((wchar_t)psenc->ch[2] << 16) | 209ad30f8e7SGabor Kovesdan ((wchar_t)psenc->ch[3] << 24)); 210ad30f8e7SGabor Kovesdan break; 211ad30f8e7SGabor Kovesdan case _ENDIAN_BIG: 212ad30f8e7SGabor Kovesdan wc = (psenc->ch[3] | 213ad30f8e7SGabor Kovesdan ((wchar_t)psenc->ch[2] << 8) | 214ad30f8e7SGabor Kovesdan ((wchar_t)psenc->ch[1] << 16) | 215ad30f8e7SGabor Kovesdan ((wchar_t)psenc->ch[0] << 24)); 216ad30f8e7SGabor Kovesdan break; 217ad30f8e7SGabor Kovesdan default: 218ad30f8e7SGabor Kovesdan goto ilseq; 219ad30f8e7SGabor Kovesdan } 220ad30f8e7SGabor Kovesdan if (wc >= 0xD800 && wc <= 0xDFFF) 221ad30f8e7SGabor Kovesdan goto ilseq; 222ad30f8e7SGabor Kovesdan } 223ad30f8e7SGabor Kovesdan 224ad30f8e7SGabor Kovesdan 225ad30f8e7SGabor Kovesdan *pwc = wc; 226ad30f8e7SGabor Kovesdan psenc->chlen = 0; 227ad30f8e7SGabor Kovesdan *nresult = result; 228ad30f8e7SGabor Kovesdan *s = s0; 229ad30f8e7SGabor Kovesdan 230ad30f8e7SGabor Kovesdan return (0); 231ad30f8e7SGabor Kovesdan 232ad30f8e7SGabor Kovesdan ilseq: 233ad30f8e7SGabor Kovesdan *nresult = (size_t)-1; 234ad30f8e7SGabor Kovesdan psenc->chlen = 0; 235ad30f8e7SGabor Kovesdan return (EILSEQ); 236ad30f8e7SGabor Kovesdan 237ad30f8e7SGabor Kovesdan restart: 238ad30f8e7SGabor Kovesdan *nresult = (size_t)-2; 239ad30f8e7SGabor Kovesdan psenc->chlen = chlenbak; 240ad30f8e7SGabor Kovesdan *s = s0; 241ad30f8e7SGabor Kovesdan return (0); 242ad30f8e7SGabor Kovesdan } 243ad30f8e7SGabor Kovesdan 244ad30f8e7SGabor Kovesdan static int 245ad30f8e7SGabor Kovesdan _citrus_UTF1632_wcrtomb_priv(_UTF1632EncodingInfo *ei, char *s, size_t n, 246ad30f8e7SGabor Kovesdan wchar_t wc, _UTF1632State *psenc, size_t *nresult) 247ad30f8e7SGabor Kovesdan { 248ad30f8e7SGabor Kovesdan wchar_t wc2; 249ad30f8e7SGabor Kovesdan static const char _bom[4] = { 250ad30f8e7SGabor Kovesdan 0x00, 0x00, 0xFE, 0xFF, 251ad30f8e7SGabor Kovesdan }; 252ad30f8e7SGabor Kovesdan const char *bom = &_bom[0]; 253ad30f8e7SGabor Kovesdan size_t cnt; 254ad30f8e7SGabor Kovesdan 255ad30f8e7SGabor Kovesdan cnt = (size_t)0; 256ad30f8e7SGabor Kovesdan if (psenc->current_endian == _ENDIAN_UNKNOWN) { 257ad30f8e7SGabor Kovesdan if ((ei->mode & _MODE_FORCE_ENDIAN) == 0) { 258ad30f8e7SGabor Kovesdan if (ei->mode & _MODE_UTF32) 259ad30f8e7SGabor Kovesdan cnt = 4; 260ad30f8e7SGabor Kovesdan else { 261ad30f8e7SGabor Kovesdan cnt = 2; 262ad30f8e7SGabor Kovesdan bom += 2; 263ad30f8e7SGabor Kovesdan } 264ad30f8e7SGabor Kovesdan if (n < cnt) 265ad30f8e7SGabor Kovesdan goto e2big; 266ad30f8e7SGabor Kovesdan memcpy(s, bom, cnt); 267ad30f8e7SGabor Kovesdan s += cnt, n -= cnt; 268ad30f8e7SGabor Kovesdan } 269ad30f8e7SGabor Kovesdan psenc->current_endian = ei->preffered_endian; 270ad30f8e7SGabor Kovesdan } 271ad30f8e7SGabor Kovesdan 272ad30f8e7SGabor Kovesdan wc2 = 0; 273ad30f8e7SGabor Kovesdan if ((ei->mode & _MODE_UTF32)==0) { 274ad30f8e7SGabor Kovesdan /* UTF16 */ 275ad30f8e7SGabor Kovesdan if (wc > 0xFFFF) { 276ad30f8e7SGabor Kovesdan /* surrogate */ 277ad30f8e7SGabor Kovesdan if (wc > 0x10FFFF) 278ad30f8e7SGabor Kovesdan goto ilseq; 279ad30f8e7SGabor Kovesdan if (n < 4) 280ad30f8e7SGabor Kovesdan goto e2big; 281ad30f8e7SGabor Kovesdan cnt += 4; 282ad30f8e7SGabor Kovesdan wc -= 0x10000; 283ad30f8e7SGabor Kovesdan wc2 = (wc & 0x3FF) | 0xDC00; 284ad30f8e7SGabor Kovesdan wc = (wc>>10) | 0xD800; 285ad30f8e7SGabor Kovesdan } else { 286ad30f8e7SGabor Kovesdan if (n < 2) 287ad30f8e7SGabor Kovesdan goto e2big; 288ad30f8e7SGabor Kovesdan cnt += 2; 289ad30f8e7SGabor Kovesdan } 290ad30f8e7SGabor Kovesdan 291ad30f8e7SGabor Kovesdan surrogate: 292ad30f8e7SGabor Kovesdan switch (psenc->current_endian) { 293ad30f8e7SGabor Kovesdan case _ENDIAN_BIG: 294ad30f8e7SGabor Kovesdan s[1] = wc; 295ad30f8e7SGabor Kovesdan s[0] = (wc >>= 8); 296ad30f8e7SGabor Kovesdan break; 297ad30f8e7SGabor Kovesdan case _ENDIAN_LITTLE: 298ad30f8e7SGabor Kovesdan s[0] = wc; 299ad30f8e7SGabor Kovesdan s[1] = (wc >>= 8); 300ad30f8e7SGabor Kovesdan break; 301ad30f8e7SGabor Kovesdan } 302ad30f8e7SGabor Kovesdan if (wc2 != 0) { 303ad30f8e7SGabor Kovesdan wc = wc2; 304ad30f8e7SGabor Kovesdan wc2 = 0; 305ad30f8e7SGabor Kovesdan s += 2; 306ad30f8e7SGabor Kovesdan goto surrogate; 307ad30f8e7SGabor Kovesdan } 308ad30f8e7SGabor Kovesdan } else { 309ad30f8e7SGabor Kovesdan /* UTF32 */ 310ad30f8e7SGabor Kovesdan if (wc >= 0xD800 && wc <= 0xDFFF) 311ad30f8e7SGabor Kovesdan goto ilseq; 312ad30f8e7SGabor Kovesdan if (n < 4) 313ad30f8e7SGabor Kovesdan goto e2big; 314ad30f8e7SGabor Kovesdan cnt += 4; 315ad30f8e7SGabor Kovesdan switch (psenc->current_endian) { 316ad30f8e7SGabor Kovesdan case _ENDIAN_BIG: 317ad30f8e7SGabor Kovesdan s[3] = wc; 318ad30f8e7SGabor Kovesdan s[2] = (wc >>= 8); 319ad30f8e7SGabor Kovesdan s[1] = (wc >>= 8); 320ad30f8e7SGabor Kovesdan s[0] = (wc >>= 8); 321ad30f8e7SGabor Kovesdan break; 322ad30f8e7SGabor Kovesdan case _ENDIAN_LITTLE: 323ad30f8e7SGabor Kovesdan s[0] = wc; 324ad30f8e7SGabor Kovesdan s[1] = (wc >>= 8); 325ad30f8e7SGabor Kovesdan s[2] = (wc >>= 8); 326ad30f8e7SGabor Kovesdan s[3] = (wc >>= 8); 327ad30f8e7SGabor Kovesdan break; 328ad30f8e7SGabor Kovesdan } 329ad30f8e7SGabor Kovesdan } 330ad30f8e7SGabor Kovesdan *nresult = cnt; 331ad30f8e7SGabor Kovesdan 332ad30f8e7SGabor Kovesdan return (0); 333ad30f8e7SGabor Kovesdan 334ad30f8e7SGabor Kovesdan ilseq: 335ad30f8e7SGabor Kovesdan *nresult = (size_t)-1; 336ad30f8e7SGabor Kovesdan return (EILSEQ); 337ad30f8e7SGabor Kovesdan e2big: 338ad30f8e7SGabor Kovesdan *nresult = (size_t)-1; 339ad30f8e7SGabor Kovesdan return (E2BIG); 340ad30f8e7SGabor Kovesdan } 341ad30f8e7SGabor Kovesdan 342ad30f8e7SGabor Kovesdan static void 343ad30f8e7SGabor Kovesdan parse_variable(_UTF1632EncodingInfo * __restrict ei, 344ad30f8e7SGabor Kovesdan const void * __restrict var, size_t lenvar) 345ad30f8e7SGabor Kovesdan { 346ad30f8e7SGabor Kovesdan const char *p; 347ad30f8e7SGabor Kovesdan 348ad30f8e7SGabor Kovesdan p = var; 349ad30f8e7SGabor Kovesdan while (lenvar > 0) { 350ad30f8e7SGabor Kovesdan switch (*p) { 351ad30f8e7SGabor Kovesdan case 'B': 352ad30f8e7SGabor Kovesdan case 'b': 353ad30f8e7SGabor Kovesdan MATCH(big, ei->preffered_endian = _ENDIAN_BIG); 354ad30f8e7SGabor Kovesdan break; 355ad30f8e7SGabor Kovesdan case 'L': 356ad30f8e7SGabor Kovesdan case 'l': 357ad30f8e7SGabor Kovesdan MATCH(little, ei->preffered_endian = _ENDIAN_LITTLE); 358ad30f8e7SGabor Kovesdan break; 359ad30f8e7SGabor Kovesdan case 'i': 360ad30f8e7SGabor Kovesdan case 'I': 361ad30f8e7SGabor Kovesdan MATCH(internal, ei->preffered_endian = _ENDIAN_INTERNAL); 362ad30f8e7SGabor Kovesdan break; 363ad30f8e7SGabor Kovesdan case 's': 364ad30f8e7SGabor Kovesdan case 'S': 365ad30f8e7SGabor Kovesdan MATCH(swapped, ei->preffered_endian = _ENDIAN_SWAPPED); 366ad30f8e7SGabor Kovesdan break; 367ad30f8e7SGabor Kovesdan case 'F': 368ad30f8e7SGabor Kovesdan case 'f': 369ad30f8e7SGabor Kovesdan MATCH(force, ei->mode |= _MODE_FORCE_ENDIAN); 370ad30f8e7SGabor Kovesdan break; 371ad30f8e7SGabor Kovesdan case 'U': 372ad30f8e7SGabor Kovesdan case 'u': 373ad30f8e7SGabor Kovesdan MATCH(utf32, ei->mode |= _MODE_UTF32); 374ad30f8e7SGabor Kovesdan break; 375ad30f8e7SGabor Kovesdan } 376ad30f8e7SGabor Kovesdan p++; 377ad30f8e7SGabor Kovesdan lenvar--; 378ad30f8e7SGabor Kovesdan } 379ad30f8e7SGabor Kovesdan } 380ad30f8e7SGabor Kovesdan 381ad30f8e7SGabor Kovesdan static int 382ad30f8e7SGabor Kovesdan /*ARGSUSED*/ 383ad30f8e7SGabor Kovesdan _citrus_UTF1632_encoding_module_init(_UTF1632EncodingInfo * __restrict ei, 384ad30f8e7SGabor Kovesdan const void * __restrict var, size_t lenvar) 385ad30f8e7SGabor Kovesdan { 386ad30f8e7SGabor Kovesdan 387ad30f8e7SGabor Kovesdan memset((void *)ei, 0, sizeof(*ei)); 388ad30f8e7SGabor Kovesdan 389ad30f8e7SGabor Kovesdan parse_variable(ei, var, lenvar); 390ad30f8e7SGabor Kovesdan 391ad30f8e7SGabor Kovesdan ei->cur_max = ((ei->mode&_MODE_UTF32) == 0) ? 6 : 8; 392ad30f8e7SGabor Kovesdan /* 6: endian + surrogate */ 393ad30f8e7SGabor Kovesdan /* 8: endian + normal */ 394ad30f8e7SGabor Kovesdan 395ad30f8e7SGabor Kovesdan if (ei->preffered_endian == _ENDIAN_UNKNOWN) { 396ad30f8e7SGabor Kovesdan ei->preffered_endian = _ENDIAN_BIG; 397ad30f8e7SGabor Kovesdan } 398ad30f8e7SGabor Kovesdan 399ad30f8e7SGabor Kovesdan return (0); 400ad30f8e7SGabor Kovesdan } 401ad30f8e7SGabor Kovesdan 402ad30f8e7SGabor Kovesdan static void 403ad30f8e7SGabor Kovesdan /*ARGSUSED*/ 404ad30f8e7SGabor Kovesdan _citrus_UTF1632_encoding_module_uninit(_UTF1632EncodingInfo *ei __unused) 405ad30f8e7SGabor Kovesdan { 406ad30f8e7SGabor Kovesdan 407ad30f8e7SGabor Kovesdan } 408ad30f8e7SGabor Kovesdan 409ad30f8e7SGabor Kovesdan static __inline int 410ad30f8e7SGabor Kovesdan /*ARGSUSED*/ 411ad30f8e7SGabor Kovesdan _citrus_UTF1632_stdenc_wctocs(_UTF1632EncodingInfo * __restrict ei __unused, 412ad30f8e7SGabor Kovesdan _csid_t * __restrict csid, _index_t * __restrict idx, _wc_t wc) 413ad30f8e7SGabor Kovesdan { 414ad30f8e7SGabor Kovesdan 415ad30f8e7SGabor Kovesdan *csid = 0; 416ad30f8e7SGabor Kovesdan *idx = (_index_t)wc; 417ad30f8e7SGabor Kovesdan 418ad30f8e7SGabor Kovesdan return (0); 419ad30f8e7SGabor Kovesdan } 420ad30f8e7SGabor Kovesdan 421ad30f8e7SGabor Kovesdan static __inline int 422ad30f8e7SGabor Kovesdan /*ARGSUSED*/ 423ad30f8e7SGabor Kovesdan _citrus_UTF1632_stdenc_cstowc(_UTF1632EncodingInfo * __restrict ei __unused, 424ad30f8e7SGabor Kovesdan _wc_t * __restrict wc, _csid_t csid, _index_t idx) 425ad30f8e7SGabor Kovesdan { 426ad30f8e7SGabor Kovesdan 427ad30f8e7SGabor Kovesdan if (csid != 0) 428ad30f8e7SGabor Kovesdan return (EILSEQ); 429ad30f8e7SGabor Kovesdan 430ad30f8e7SGabor Kovesdan *wc = (_wc_t)idx; 431ad30f8e7SGabor Kovesdan 432ad30f8e7SGabor Kovesdan return (0); 433ad30f8e7SGabor Kovesdan } 434ad30f8e7SGabor Kovesdan 435ad30f8e7SGabor Kovesdan static __inline int 436ad30f8e7SGabor Kovesdan /*ARGSUSED*/ 437ad30f8e7SGabor Kovesdan _citrus_UTF1632_stdenc_get_state_desc_generic(_UTF1632EncodingInfo * __restrict ei __unused, 438ad30f8e7SGabor Kovesdan _UTF1632State * __restrict psenc, int * __restrict rstate) 439ad30f8e7SGabor Kovesdan { 440ad30f8e7SGabor Kovesdan 441ad30f8e7SGabor Kovesdan *rstate = (psenc->chlen == 0) ? _STDENC_SDGEN_INITIAL : 442ad30f8e7SGabor Kovesdan _STDENC_SDGEN_INCOMPLETE_CHAR; 443ad30f8e7SGabor Kovesdan return (0); 444ad30f8e7SGabor Kovesdan } 445ad30f8e7SGabor Kovesdan 446ad30f8e7SGabor Kovesdan /* ---------------------------------------------------------------------- 447ad30f8e7SGabor Kovesdan * public interface for stdenc 448ad30f8e7SGabor Kovesdan */ 449ad30f8e7SGabor Kovesdan 450ad30f8e7SGabor Kovesdan _CITRUS_STDENC_DECLS(UTF1632); 451ad30f8e7SGabor Kovesdan _CITRUS_STDENC_DEF_OPS(UTF1632); 452ad30f8e7SGabor Kovesdan 453ad30f8e7SGabor Kovesdan #include "citrus_stdenc_template.h" 454