1*15d9d0b5Syy154373 /* 2*15d9d0b5Syy154373 * CDDL HEADER START 3*15d9d0b5Syy154373 * 4*15d9d0b5Syy154373 * The contents of this file are subject to the terms of the 5*15d9d0b5Syy154373 * Common Development and Distribution License (the "License"). 6*15d9d0b5Syy154373 * You may not use this file except in compliance with the License. 7*15d9d0b5Syy154373 * 8*15d9d0b5Syy154373 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*15d9d0b5Syy154373 * or http://www.opensolaris.org/os/licensing. 10*15d9d0b5Syy154373 * See the License for the specific language governing permissions 11*15d9d0b5Syy154373 * and limitations under the License. 12*15d9d0b5Syy154373 * 13*15d9d0b5Syy154373 * When distributing Covered Code, include this CDDL HEADER in each 14*15d9d0b5Syy154373 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*15d9d0b5Syy154373 * If applicable, add the following below this CDDL HEADER, with the 16*15d9d0b5Syy154373 * fields enclosed by brackets "[]" replaced with your own identifying 17*15d9d0b5Syy154373 * information: Portions Copyright [yyyy] [name of copyright owner] 18*15d9d0b5Syy154373 * 19*15d9d0b5Syy154373 * CDDL HEADER END 20*15d9d0b5Syy154373 */ 21*15d9d0b5Syy154373 /* 22*15d9d0b5Syy154373 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23*15d9d0b5Syy154373 * Use is subject to license terms. 24*15d9d0b5Syy154373 */ 25*15d9d0b5Syy154373 26*15d9d0b5Syy154373 #ifndef _SYS_KICONV_CCK_COMMON_H 27*15d9d0b5Syy154373 #define _SYS_KICONV_CCK_COMMON_H 28*15d9d0b5Syy154373 29*15d9d0b5Syy154373 #ifdef __cplusplus 30*15d9d0b5Syy154373 extern "C" { 31*15d9d0b5Syy154373 #endif 32*15d9d0b5Syy154373 33*15d9d0b5Syy154373 #ifdef _KERNEL 34*15d9d0b5Syy154373 35*15d9d0b5Syy154373 /* The start value of leading byte of EUC encoding. */ 36*15d9d0b5Syy154373 #define KICONV_EUC_START (0xA1) 37*15d9d0b5Syy154373 38*15d9d0b5Syy154373 /* Valid EUC range or not. */ 39*15d9d0b5Syy154373 #define KICONV_IS_VALID_EUC_BYTE(v) ((v) >= 0xA1 && (v) <= 0xFE) 40*15d9d0b5Syy154373 41*15d9d0b5Syy154373 /* Is ASCII character or not: 0x00 - 0x7F. */ 42*15d9d0b5Syy154373 #define KICONV_IS_ASCII(c) (((uchar_t)(c)) <= 0x7F) 43*15d9d0b5Syy154373 44*15d9d0b5Syy154373 /* UTF-8 replacement character for non-identicals and its length. */ 45*15d9d0b5Syy154373 #define KICONV_UTF8_REPLACEMENT_CHAR1 (0xEF) 46*15d9d0b5Syy154373 #define KICONV_UTF8_REPLACEMENT_CHAR2 (0xBF) 47*15d9d0b5Syy154373 #define KICONV_UTF8_REPLACEMENT_CHAR3 (0xBD) 48*15d9d0b5Syy154373 #define KICONV_UTF8_REPLACEMENT_CHAR (0xefbfbd) 49*15d9d0b5Syy154373 #define KICONV_UTF8_REPLACEMENT_CHAR_LEN (3) 50*15d9d0b5Syy154373 51*15d9d0b5Syy154373 /* 52*15d9d0b5Syy154373 * Whether the 2nd byte of 3 or 4 bytes UTF-8 character is invalid or not. 53*15d9d0b5Syy154373 */ 54*15d9d0b5Syy154373 #define KICONV_IS_INVALID_UTF8_SECOND_BYTE(second, first) \ 55*15d9d0b5Syy154373 ((second) < u8_valid_min_2nd_byte[(first)] || \ 56*15d9d0b5Syy154373 (second) > u8_valid_max_2nd_byte[(first)]) 57*15d9d0b5Syy154373 58*15d9d0b5Syy154373 /* 59*15d9d0b5Syy154373 * If we haven't checked on the UTF-8 signature BOM character in 60*15d9d0b5Syy154373 * the beginning of the conversion data stream, we check it and if 61*15d9d0b5Syy154373 * find one, we skip it since we have no use for it. 62*15d9d0b5Syy154373 */ 63*15d9d0b5Syy154373 #define KICONV_CHECK_UTF8_BOM(ib, ibtail) \ 64*15d9d0b5Syy154373 if (((kiconv_state_t)kcd)->bom_processed == 0 && \ 65*15d9d0b5Syy154373 ((ibtail) - (ib)) >= 3 && *(ib) == 0xef && \ 66*15d9d0b5Syy154373 *((ib) + 1) == 0xbb && *((ib) + 2) == 0xbf) { \ 67*15d9d0b5Syy154373 (ib) += 3; \ 68*15d9d0b5Syy154373 } \ 69*15d9d0b5Syy154373 ((kiconv_state_t)kcd)->bom_processed = 1 70*15d9d0b5Syy154373 71*15d9d0b5Syy154373 /* 72*15d9d0b5Syy154373 * Check BOM of UTF-8 without state information. 73*15d9d0b5Syy154373 */ 74*15d9d0b5Syy154373 #define KICONV_CHECK_UTF8_BOM_WITHOUT_STATE(ib, ibtail) \ 75*15d9d0b5Syy154373 if (((ibtail) - (ib)) >= 3 && *(ib) == 0xef && \ 76*15d9d0b5Syy154373 *((ib) + 1) == 0xbb && *((ib) + 2) == 0xbf) { \ 77*15d9d0b5Syy154373 (ib) += 3; \ 78*15d9d0b5Syy154373 } 79*15d9d0b5Syy154373 80*15d9d0b5Syy154373 /* 81*15d9d0b5Syy154373 * Set errno and break. 82*15d9d0b5Syy154373 */ 83*15d9d0b5Syy154373 #define KICONV_SET_ERRNO_AND_BREAK(err) \ 84*15d9d0b5Syy154373 *errno = (err); \ 85*15d9d0b5Syy154373 ret_val = (size_t)-1; \ 86*15d9d0b5Syy154373 break 87*15d9d0b5Syy154373 88*15d9d0b5Syy154373 /* 89*15d9d0b5Syy154373 * Handling flag, advance input buffer, set errno and break. 90*15d9d0b5Syy154373 */ 91*15d9d0b5Syy154373 #define KICONV_SET_ERRNO_WITH_FLAG(advance, err) \ 92*15d9d0b5Syy154373 if (flag & KICONV_REPLACE_INVALID) { \ 93*15d9d0b5Syy154373 ib += (advance); \ 94*15d9d0b5Syy154373 goto REPLACE_INVALID; \ 95*15d9d0b5Syy154373 } \ 96*15d9d0b5Syy154373 KICONV_SET_ERRNO_AND_BREAK((err)) 97*15d9d0b5Syy154373 98*15d9d0b5Syy154373 /* Conversion table for UTF-8 -> CCK encoding. */ 99*15d9d0b5Syy154373 typedef struct { 100*15d9d0b5Syy154373 uint32_t key; 101*15d9d0b5Syy154373 uint32_t value; 102*15d9d0b5Syy154373 } kiconv_table_t; 103*15d9d0b5Syy154373 104*15d9d0b5Syy154373 /* Conversion table for CCK encoding -> utf8. */ 105*15d9d0b5Syy154373 typedef struct { 106*15d9d0b5Syy154373 uint32_t key; 107*15d9d0b5Syy154373 uchar_t u8[4]; 108*15d9d0b5Syy154373 } kiconv_table_array_t; 109*15d9d0b5Syy154373 110*15d9d0b5Syy154373 /* 111*15d9d0b5Syy154373 * Function prototype for UTF-8 -> GB18030/BIG5/EUC-TW/UHC... 112*15d9d0b5Syy154373 * Currently parameter ib/ibtail are used by BIG5HKSCS only. 113*15d9d0b5Syy154373 */ 114*15d9d0b5Syy154373 typedef int8_t (*kiconv_utf8tocck_t)(uint32_t utf8, uchar_t **ib, 115*15d9d0b5Syy154373 uchar_t *ibtail, uchar_t *ob, uchar_t *obtail, size_t *ret_val); 116*15d9d0b5Syy154373 117*15d9d0b5Syy154373 /* Common open and close function for UTF-8 to CCK conversion. */ 118*15d9d0b5Syy154373 void * kiconv_open_to_cck(void); 119*15d9d0b5Syy154373 int kiconv_close_to_cck(void *); 120*15d9d0b5Syy154373 121*15d9d0b5Syy154373 /* Binary search funciton. */ 122*15d9d0b5Syy154373 size_t kiconv_binsearch(uint32_t key, void *tbl, size_t nitems); 123*15d9d0b5Syy154373 124*15d9d0b5Syy154373 /* Wrapper for conversion from UTF-8 to GB18030/BIG5/EUC-TW/UHC... */ 125*15d9d0b5Syy154373 size_t kiconv_utf8_to_cck(void *kcd, char **inbuf, size_t *inbytesleft, 126*15d9d0b5Syy154373 char **outbuf, size_t *outbytesleft, int *errno, 127*15d9d0b5Syy154373 kiconv_utf8tocck_t ptr_utf8tocck); 128*15d9d0b5Syy154373 129*15d9d0b5Syy154373 /* 130*15d9d0b5Syy154373 * Wrapper for string based conversion from UTF-8 to GB18030/BIG5/EUC-TW/UHC... 131*15d9d0b5Syy154373 */ 132*15d9d0b5Syy154373 size_t kiconvstr_utf8_to_cck(uchar_t *inarray, size_t *inlen, 133*15d9d0b5Syy154373 uchar_t *outarray, size_t *outlen, int flag, int *errno, 134*15d9d0b5Syy154373 kiconv_utf8tocck_t ptr_utf8tocck); 135*15d9d0b5Syy154373 136*15d9d0b5Syy154373 /* 137*15d9d0b5Syy154373 * The following tables are coming from u8_textprep.c. We use them to 138*15d9d0b5Syy154373 * check on validity of UTF-8 characters and their bytes. 139*15d9d0b5Syy154373 */ 140*15d9d0b5Syy154373 extern const int8_t u8_number_of_bytes[]; 141*15d9d0b5Syy154373 extern const uint8_t u8_valid_min_2nd_byte[]; 142*15d9d0b5Syy154373 extern const uint8_t u8_valid_max_2nd_byte[]; 143*15d9d0b5Syy154373 144*15d9d0b5Syy154373 #endif /* _KERNEL */ 145*15d9d0b5Syy154373 146*15d9d0b5Syy154373 #ifdef __cplusplus 147*15d9d0b5Syy154373 } 148*15d9d0b5Syy154373 #endif 149*15d9d0b5Syy154373 150*15d9d0b5Syy154373 #endif /* _SYS_KICONV_CCK_COMMON_H */ 151