1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #ifndef _SYS_KICONV_CCK_COMMON_H 27 #define _SYS_KICONV_CCK_COMMON_H 28 29 #pragma ident "%Z%%M% %I% %E% SMI" 30 31 #ifdef __cplusplus 32 extern "C" { 33 #endif 34 35 #ifdef _KERNEL 36 37 /* The start value of leading byte of EUC encoding. */ 38 #define KICONV_EUC_START (0xA1) 39 40 /* Valid EUC range or not. */ 41 #define KICONV_IS_VALID_EUC_BYTE(v) ((v) >= 0xA1 && (v) <= 0xFE) 42 43 /* Is ASCII character or not: 0x00 - 0x7F. */ 44 #define KICONV_IS_ASCII(c) (((uchar_t)(c)) <= 0x7F) 45 46 /* UTF-8 replacement character for non-identicals and its length. */ 47 #define KICONV_UTF8_REPLACEMENT_CHAR1 (0xEF) 48 #define KICONV_UTF8_REPLACEMENT_CHAR2 (0xBF) 49 #define KICONV_UTF8_REPLACEMENT_CHAR3 (0xBD) 50 #define KICONV_UTF8_REPLACEMENT_CHAR (0xefbfbd) 51 #define KICONV_UTF8_REPLACEMENT_CHAR_LEN (3) 52 53 /* 54 * Whether the 2nd byte of 3 or 4 bytes UTF-8 character is invalid or not. 55 */ 56 #define KICONV_IS_INVALID_UTF8_SECOND_BYTE(second, first) \ 57 ((second) < u8_valid_min_2nd_byte[(first)] || \ 58 (second) > u8_valid_max_2nd_byte[(first)]) 59 60 /* 61 * If we haven't checked on the UTF-8 signature BOM character in 62 * the beginning of the conversion data stream, we check it and if 63 * find one, we skip it since we have no use for it. 64 */ 65 #define KICONV_CHECK_UTF8_BOM(ib, ibtail) \ 66 if (((kiconv_state_t)kcd)->bom_processed == 0 && \ 67 ((ibtail) - (ib)) >= 3 && *(ib) == 0xef && \ 68 *((ib) + 1) == 0xbb && *((ib) + 2) == 0xbf) { \ 69 (ib) += 3; \ 70 } \ 71 ((kiconv_state_t)kcd)->bom_processed = 1 72 73 /* 74 * Check BOM of UTF-8 without state information. 75 */ 76 #define KICONV_CHECK_UTF8_BOM_WITHOUT_STATE(ib, ibtail) \ 77 if (((ibtail) - (ib)) >= 3 && *(ib) == 0xef && \ 78 *((ib) + 1) == 0xbb && *((ib) + 2) == 0xbf) { \ 79 (ib) += 3; \ 80 } 81 82 /* 83 * Set errno and break. 84 */ 85 #define KICONV_SET_ERRNO_AND_BREAK(err) \ 86 *errno = (err); \ 87 ret_val = (size_t)-1; \ 88 break 89 90 /* 91 * Handling flag, advance input buffer, set errno and break. 92 */ 93 #define KICONV_SET_ERRNO_WITH_FLAG(advance, err) \ 94 if (flag & KICONV_REPLACE_INVALID) { \ 95 ib += (advance); \ 96 goto REPLACE_INVALID; \ 97 } \ 98 KICONV_SET_ERRNO_AND_BREAK((err)) 99 100 /* Conversion table for UTF-8 -> CCK encoding. */ 101 typedef struct { 102 uint32_t key; 103 uint32_t value; 104 } kiconv_table_t; 105 106 /* Conversion table for CCK encoding -> utf8. */ 107 typedef struct { 108 uint32_t key; 109 uchar_t u8[4]; 110 } kiconv_table_array_t; 111 112 /* 113 * Function prototype for UTF-8 -> GB18030/BIG5/EUC-TW/UHC... 114 * Currently parameter ib/ibtail are used by BIG5HKSCS only. 115 */ 116 typedef int8_t (*kiconv_utf8tocck_t)(uint32_t utf8, uchar_t **ib, 117 uchar_t *ibtail, uchar_t *ob, uchar_t *obtail, size_t *ret_val); 118 119 /* Common open and close function for UTF-8 to CCK conversion. */ 120 void * kiconv_open_to_cck(void); 121 int kiconv_close_to_cck(void *); 122 123 /* Binary search funciton. */ 124 size_t kiconv_binsearch(uint32_t key, void *tbl, size_t nitems); 125 126 /* Wrapper for conversion from UTF-8 to GB18030/BIG5/EUC-TW/UHC... */ 127 size_t kiconv_utf8_to_cck(void *kcd, char **inbuf, size_t *inbytesleft, 128 char **outbuf, size_t *outbytesleft, int *errno, 129 kiconv_utf8tocck_t ptr_utf8tocck); 130 131 /* 132 * Wrapper for string based conversion from UTF-8 to GB18030/BIG5/EUC-TW/UHC... 133 */ 134 size_t kiconvstr_utf8_to_cck(uchar_t *inarray, size_t *inlen, 135 uchar_t *outarray, size_t *outlen, int flag, int *errno, 136 kiconv_utf8tocck_t ptr_utf8tocck); 137 138 /* 139 * The following tables are coming from u8_textprep.c. We use them to 140 * check on validity of UTF-8 characters and their bytes. 141 */ 142 extern const int8_t u8_number_of_bytes[]; 143 extern const uint8_t u8_valid_min_2nd_byte[]; 144 extern const uint8_t u8_valid_max_2nd_byte[]; 145 146 #endif /* _KERNEL */ 147 148 #ifdef __cplusplus 149 } 150 #endif 151 152 #endif /* _SYS_KICONV_CCK_COMMON_H */ 153