1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * 25 * In this program, we assume that each table entry provided will contain 26 * a valid UCS character, an illegal character, or, a replacement character. 27 * In other words, it is table provider's responsibility to provide 28 * an appropriate mapping for each single byte character in the table since 29 * the program in this file will not do any special checking on the table 30 * component values. 31 * 32 * This particular file is to cover conversions from various single byte 33 * codesets to UCS-2, UCS-2BE, UCS-2LE, UCS-4, UCS-4BE, UCS-4LE, UTF-16, 34 * UTF-16BE, UTF-16LE, UTF-32, UTF-32BE, and UTF-32LE. 35 */ 36 37 38 #include <stdlib.h> 39 #include <errno.h> 40 #include <sys/types.h> 41 #include <sys/isa_defs.h> 42 #include "sb_to_ucs.h" 43 44 45 void * 46 _icv_open() 47 { 48 ucs_state_t *cd = (ucs_state_t *)calloc(1, sizeof(ucs_state_t)); 49 50 if (cd == (ucs_state_t *)NULL) { 51 errno = ENOMEM; 52 return((void *)-1); 53 } 54 55 #if defined(UTF_16BE) || defined(UCS_2BE) || defined(UCS_4BE) || \ 56 defined(UTF_32BE) 57 cd->little_endian = false; 58 cd->bom_written = true; 59 #elif defined(UTF_16LE) || defined(UCS_2LE) || defined(UCS_4LE) || \ 60 defined(UTF_32LE) 61 cd->little_endian = true; 62 cd->bom_written = true; 63 #elif defined(_LITTLE_ENDIAN) 64 cd->little_endian = true; 65 #endif 66 67 return((void *)cd); 68 } 69 70 71 void 72 _icv_close(ucs_state_t *cd) 73 { 74 if (! cd) 75 errno = EBADF; 76 else 77 free((void *)cd); 78 } 79 80 81 size_t 82 _icv_iconv(ucs_state_t *cd, char **inbuf, size_t *inbufleft, char **outbuf, 83 size_t *outbufleft) 84 { 85 size_t ret_val = 0; 86 unsigned char *ib; 87 unsigned char *ob; 88 unsigned char *ibtail; 89 unsigned char *obtail; 90 unsigned int u4; 91 #if defined(UTF_16) || defined(UTF_16BE) || defined(UTF_16LE) 92 unsigned int u4_2; 93 #endif 94 signed char obsz; 95 96 97 if (! cd) { 98 errno = EBADF; 99 return((size_t)-1); 100 } 101 102 if (!inbuf || !(*inbuf)) { 103 #if defined(UCS_2) || defined(UCS_4) || defined(UTF_16) || defined(UTF_32) 104 cd->bom_written = false; 105 #endif 106 return((size_t)0); 107 } 108 109 ib = (unsigned char *)*inbuf; 110 ob = (unsigned char *)*outbuf; 111 ibtail = ib + *inbufleft; 112 obtail = ob + *outbufleft; 113 114 while (ib < ibtail) { 115 u4 = sb_u4_tbl[*ib].u8; 116 #if defined(UTF_16) || defined(UTF_16BE) || defined(UTF_16LE) 117 u4_2 = 0; 118 #endif 119 120 if (sb_u4_tbl[*ib].size == ICV_TYPE_ILLEGAL_CHAR) { 121 errno = EILSEQ; 122 ret_val = (size_t)-1; 123 break; 124 } 125 126 obsz = (cd->bom_written) ? ICV_FETCH_UCS_SIZE : 127 ICV_FETCH_UCS_SIZE_TWO; 128 #if defined(UCS_2) || defined(UCS_2BE) || defined(UCS_2LE) 129 if (u4 > 0x00ffff) { 130 u4 = ICV_CHAR_UCS2_REPLACEMENT; 131 ret_val++; 132 } 133 #elif defined(UTF_16) || defined(UTF_16BE) || defined(UTF_16LE) 134 if (u4 > 0x00ffff && u4 < 0x110000) { 135 u4_2 = ((u4 - 0x010000) % 0x400) + 0x00dc00; 136 u4 = ((u4 - 0x010000) / 0x400) + 0x00d800; 137 obsz += 2; 138 } else if (u4 > 0x10ffff) { 139 u4 = ICV_CHAR_UCS2_REPLACEMENT; 140 ret_val++; 141 } 142 #elif defined(UTF_32) || defined(UTF_32BE) || defined(UTF_32LE) 143 if (u4 > 0x10ffff) { 144 u4 = ICV_CHAR_UCS2_REPLACEMENT; 145 ret_val++; 146 } 147 #elif defined(UCS_4) || defined(UCS_4BE) || defined(UCS_4LE) 148 /* do nothing */ 149 #else 150 #error "Fatal: one of the UCS macros need to be defined." 151 #endif 152 153 /* 154 * The target values in the conversion tables are in UCS-4 155 * without BOM and so the max target value possible would be 156 * U+7FFFFFFF. 157 */ 158 if (u4 == 0x00fffe || u4 == 0x00ffff || u4 > 0x7fffffff || 159 (u4 >= 0x00d800 && u4 <= 0x00dfff)) { 160 /* 161 * if conversion table is right, this should not 162 * happen. 163 */ 164 errno = EILSEQ; 165 ret_val = (size_t)-1; 166 break; 167 } 168 169 if ((obtail - ob) < obsz) { 170 errno = E2BIG; 171 ret_val = (size_t)-1; 172 break; 173 } 174 175 if (cd->little_endian) { 176 if (! cd->bom_written) { 177 *ob++ = (uchar_t)0xff; 178 *ob++ = (uchar_t)0xfe; 179 #if defined(UCS_4) || defined(UCS_4BE) || defined(UCS_4LE) || \ 180 defined(UTF_32) || defined(UTF_32BE) || defined(UTF_32LE) 181 *(ushort_t *)ob = (ushort_t)0; 182 ob += 2; 183 #endif 184 cd->bom_written = true; 185 } 186 *ob++ = (uchar_t)(u4 & 0xff); 187 *ob++ = (uchar_t)((u4 >> 8) & 0xff); 188 #if defined(UCS_4) || defined(UCS_4BE) || defined(UCS_4LE) || \ 189 defined(UTF_32) || defined(UTF_32BE) || defined(UTF_32LE) 190 *ob++ = (uchar_t)((u4 >> 16) & 0xff); 191 *ob++ = (uchar_t)((u4 >> 24) & 0xff); 192 #elif defined(UTF_16) || defined(UTF_16BE) || defined(UTF_16LE) 193 if (u4_2) { 194 *ob++ = (uchar_t)(u4_2 & 0xff); 195 *ob++ = (uchar_t)((u4_2 >> 8) & 0xff); 196 } 197 #endif 198 } else { 199 if (! cd->bom_written) { 200 #if defined(UCS_4) || defined(UCS_4BE) || defined(UCS_4LE) || \ 201 defined(UTF_32) || defined(UTF_32BE) || defined(UTF_32LE) 202 *(ushort_t *)ob = (ushort_t)0; 203 ob += 2; 204 #endif 205 *ob++ = (uchar_t)0xfe; 206 *ob++ = (uchar_t)0xff; 207 cd->bom_written = true; 208 } 209 #if defined(UCS_4) || defined(UCS_4BE) || defined(UCS_4LE) || \ 210 defined(UTF_32) || defined(UTF_32BE) || defined(UTF_32LE) 211 *ob++ = (uchar_t)((u4 >> 24) & 0xff); 212 *ob++ = (uchar_t)((u4 >> 16) & 0xff); 213 #endif 214 *ob++ = (uchar_t)((u4 >> 8) & 0xff); 215 *ob++ = (uchar_t)(u4 & 0xff); 216 #if defined(UTF_16) || defined(UTF_16BE) || defined(UTF_16LE) 217 if (u4_2) { 218 *ob++ = (uchar_t)((u4_2 >> 8) & 0xff); 219 *ob++ = (uchar_t)(u4_2 & 0xff); 220 } 221 #endif 222 } 223 ib++; 224 } 225 226 *inbuf = (char *)ib; 227 *inbufleft = ibtail - ib; 228 *outbuf = (char *)ob; 229 *outbufleft = obtail - ob; 230 231 return(ret_val); 232 } 233