1 /* 2 * MultiByteToWideChar implementation 3 * 4 * Copyright 2000 Alexandre Julliard 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA 19 */ 20 21 #include <string.h> 22 23 #include "wine/unicode.h" 24 25 extern unsigned int wine_decompose( int flags, WCHAR ch, WCHAR *dst, unsigned int dstlen ) DECLSPEC_HIDDEN; 26 27 /* check the code whether it is in Unicode Private Use Area (PUA). */ 28 /* MB_ERR_INVALID_CHARS raises an error converting from 1-byte character to PUA. */ 29 static inline int is_private_use_area_char(WCHAR code) 30 { 31 return (code >= 0xe000 && code <= 0xf8ff); 32 } 33 34 /* check src string for invalid chars; return non-zero if invalid char found */ 35 static inline int check_invalid_chars_sbcs( const struct sbcs_table *table, int flags, 36 const unsigned char *src, unsigned int srclen ) 37 { 38 const WCHAR * const cp2uni = (flags & MB_USEGLYPHCHARS) ? table->cp2uni_glyphs : table->cp2uni; 39 const WCHAR def_unicode_char = table->info.def_unicode_char; 40 const unsigned char def_char = table->uni2cp_low[table->uni2cp_high[def_unicode_char >> 8] 41 + (def_unicode_char & 0xff)]; 42 while (srclen) 43 { 44 if ((cp2uni[*src] == def_unicode_char && *src != def_char) || 45 is_private_use_area_char(cp2uni[*src])) break; 46 src++; 47 srclen--; 48 } 49 return srclen; 50 } 51 52 /* mbstowcs for single-byte code page */ 53 /* all lengths are in characters, not bytes */ 54 static inline int mbstowcs_sbcs( const struct sbcs_table *table, int flags, 55 const unsigned char *src, unsigned int srclen, 56 WCHAR *dst, unsigned int dstlen ) 57 { 58 const WCHAR * const cp2uni = (flags & MB_USEGLYPHCHARS) ? table->cp2uni_glyphs : table->cp2uni; 59 int ret = srclen; 60 61 if (dstlen < srclen) 62 { 63 /* buffer too small: fill it up to dstlen and return error */ 64 srclen = dstlen; 65 ret = -1; 66 } 67 68 while (srclen >= 16) 69 { 70 dst[0] = cp2uni[src[0]]; 71 dst[1] = cp2uni[src[1]]; 72 dst[2] = cp2uni[src[2]]; 73 dst[3] = cp2uni[src[3]]; 74 dst[4] = cp2uni[src[4]]; 75 dst[5] = cp2uni[src[5]]; 76 dst[6] = cp2uni[src[6]]; 77 dst[7] = cp2uni[src[7]]; 78 dst[8] = cp2uni[src[8]]; 79 dst[9] = cp2uni[src[9]]; 80 dst[10] = cp2uni[src[10]]; 81 dst[11] = cp2uni[src[11]]; 82 dst[12] = cp2uni[src[12]]; 83 dst[13] = cp2uni[src[13]]; 84 dst[14] = cp2uni[src[14]]; 85 dst[15] = cp2uni[src[15]]; 86 src += 16; 87 dst += 16; 88 srclen -= 16; 89 } 90 91 /* now handle the remaining characters */ 92 src += srclen; 93 dst += srclen; 94 switch (srclen) 95 { 96 case 15: dst[-15] = cp2uni[src[-15]]; 97 case 14: dst[-14] = cp2uni[src[-14]]; 98 case 13: dst[-13] = cp2uni[src[-13]]; 99 case 12: dst[-12] = cp2uni[src[-12]]; 100 case 11: dst[-11] = cp2uni[src[-11]]; 101 case 10: dst[-10] = cp2uni[src[-10]]; 102 case 9: dst[-9] = cp2uni[src[-9]]; 103 case 8: dst[-8] = cp2uni[src[-8]]; 104 case 7: dst[-7] = cp2uni[src[-7]]; 105 case 6: dst[-6] = cp2uni[src[-6]]; 106 case 5: dst[-5] = cp2uni[src[-5]]; 107 case 4: dst[-4] = cp2uni[src[-4]]; 108 case 3: dst[-3] = cp2uni[src[-3]]; 109 case 2: dst[-2] = cp2uni[src[-2]]; 110 case 1: dst[-1] = cp2uni[src[-1]]; 111 case 0: break; 112 } 113 return ret; 114 } 115 116 /* mbstowcs for single-byte code page with char decomposition */ 117 static int mbstowcs_sbcs_decompose( const struct sbcs_table *table, int flags, 118 const unsigned char *src, unsigned int srclen, 119 WCHAR *dst, unsigned int dstlen ) 120 { 121 const WCHAR * const cp2uni = (flags & MB_USEGLYPHCHARS) ? table->cp2uni_glyphs : table->cp2uni; 122 unsigned int len; 123 124 if (!dstlen) /* compute length */ 125 { 126 WCHAR dummy[4]; /* no decomposition is larger than 4 chars */ 127 for (len = 0; srclen; srclen--, src++) 128 len += wine_decompose( 0, cp2uni[*src], dummy, 4 ); 129 return len; 130 } 131 132 for (len = dstlen; srclen && len; srclen--, src++) 133 { 134 unsigned int res = wine_decompose( 0, cp2uni[*src], dst, len ); 135 if (!res) break; 136 len -= res; 137 dst += res; 138 } 139 if (srclen) return -1; /* overflow */ 140 return dstlen - len; 141 } 142 143 /* query necessary dst length for src string */ 144 static inline int get_length_dbcs( const struct dbcs_table *table, 145 const unsigned char *src, unsigned int srclen ) 146 { 147 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes; 148 int len; 149 150 for (len = 0; srclen; srclen--, src++, len++) 151 { 152 if (cp2uni_lb[*src] && srclen > 1 && src[1]) 153 { 154 src++; 155 srclen--; 156 } 157 } 158 return len; 159 } 160 161 /* check src string for invalid chars; return non-zero if invalid char found */ 162 static inline int check_invalid_chars_dbcs( const struct dbcs_table *table, 163 const unsigned char *src, unsigned int srclen ) 164 { 165 const WCHAR * const cp2uni = table->cp2uni; 166 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes; 167 const WCHAR def_unicode_char = table->info.def_unicode_char; 168 const unsigned short def_char = table->uni2cp_low[table->uni2cp_high[def_unicode_char >> 8] 169 + (def_unicode_char & 0xff)]; 170 while (srclen) 171 { 172 unsigned char off = cp2uni_lb[*src]; 173 if (off) /* multi-byte char */ 174 { 175 if (srclen == 1) break; /* partial char, error */ 176 if (cp2uni[(off << 8) + src[1]] == def_unicode_char && 177 ((src[0] << 8) | src[1]) != def_char) break; 178 src++; 179 srclen--; 180 } 181 else if ((cp2uni[*src] == def_unicode_char && *src != def_char) || 182 is_private_use_area_char(cp2uni[*src])) break; 183 src++; 184 srclen--; 185 } 186 return srclen; 187 } 188 189 /* mbstowcs for double-byte code page */ 190 /* all lengths are in characters, not bytes */ 191 static inline int mbstowcs_dbcs( const struct dbcs_table *table, 192 const unsigned char *src, unsigned int srclen, 193 WCHAR *dst, unsigned int dstlen ) 194 { 195 const WCHAR * const cp2uni = table->cp2uni; 196 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes; 197 unsigned int len; 198 199 if (!dstlen) return get_length_dbcs( table, src, srclen ); 200 201 for (len = dstlen; srclen && len; len--, srclen--, src++, dst++) 202 { 203 unsigned char off = cp2uni_lb[*src]; 204 if (off && srclen > 1 && src[1]) 205 { 206 src++; 207 srclen--; 208 *dst = cp2uni[(off << 8) + *src]; 209 } 210 else *dst = cp2uni[*src]; 211 } 212 if (srclen) return -1; /* overflow */ 213 return dstlen - len; 214 } 215 216 217 /* mbstowcs for double-byte code page with character decomposition */ 218 static int mbstowcs_dbcs_decompose( const struct dbcs_table *table, 219 const unsigned char *src, unsigned int srclen, 220 WCHAR *dst, unsigned int dstlen ) 221 { 222 const WCHAR * const cp2uni = table->cp2uni; 223 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes; 224 unsigned int len, res; 225 WCHAR ch; 226 227 if (!dstlen) /* compute length */ 228 { 229 WCHAR dummy[4]; /* no decomposition is larger than 4 chars */ 230 for (len = 0; srclen; srclen--, src++) 231 { 232 unsigned char off = cp2uni_lb[*src]; 233 if (off && srclen > 1 && src[1]) 234 { 235 src++; 236 srclen--; 237 ch = cp2uni[(off << 8) + *src]; 238 } 239 else ch = cp2uni[*src]; 240 len += wine_decompose( 0, ch, dummy, 4 ); 241 } 242 return len; 243 } 244 245 for (len = dstlen; srclen && len; srclen--, src++) 246 { 247 unsigned char off = cp2uni_lb[*src]; 248 if (off && srclen > 1 && src[1]) 249 { 250 src++; 251 srclen--; 252 ch = cp2uni[(off << 8) + *src]; 253 } 254 else ch = cp2uni[*src]; 255 if (!(res = wine_decompose( 0, ch, dst, len ))) break; 256 dst += res; 257 len -= res; 258 } 259 if (srclen) return -1; /* overflow */ 260 return dstlen - len; 261 } 262 263 264 /* return -1 on dst buffer overflow, -2 on invalid input char */ 265 int wine_cp_mbstowcs( const union cptable *table, int flags, 266 const char *s, int srclen, 267 WCHAR *dst, int dstlen ) 268 { 269 const unsigned char *src = (const unsigned char*) s; 270 271 if (table->info.char_size == 1) 272 { 273 if (flags & MB_ERR_INVALID_CHARS) 274 { 275 if (check_invalid_chars_sbcs( &table->sbcs, flags, src, srclen )) return -2; 276 } 277 if (!(flags & MB_COMPOSITE)) 278 { 279 if (!dstlen) return srclen; 280 return mbstowcs_sbcs( &table->sbcs, flags, src, srclen, dst, dstlen ); 281 } 282 return mbstowcs_sbcs_decompose( &table->sbcs, flags, src, srclen, dst, dstlen ); 283 } 284 else /* mbcs */ 285 { 286 if (flags & MB_ERR_INVALID_CHARS) 287 { 288 if (check_invalid_chars_dbcs( &table->dbcs, src, srclen )) return -2; 289 } 290 if (!(flags & MB_COMPOSITE)) 291 return mbstowcs_dbcs( &table->dbcs, src, srclen, dst, dstlen ); 292 else 293 return mbstowcs_dbcs_decompose( &table->dbcs, src, srclen, dst, dstlen ); 294 } 295 } 296