1 /* 2 * MultiByteToWideChar implementation 3 * 4 * Copyright 2000 Alexandre Julliard 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA 19 */ 20 21 #include <string.h> 22 23 #include "wine/unicode.h" 24 25 extern unsigned int wine_decompose( WCHAR ch, WCHAR *dst, unsigned int dstlen ) DECLSPEC_HIDDEN; 26 27 /* check the code whether it is in Unicode Private Use Area (PUA). */ 28 /* MB_ERR_INVALID_CHARS raises an error converting from 1-byte character to PUA. */ 29 static inline int is_private_use_area_char(WCHAR code) 30 { 31 return (code >= 0xe000 && code <= 0xf8ff); 32 } 33 34 /* check src string for invalid chars; return non-zero if invalid char found */ 35 static inline int check_invalid_chars_sbcs( const struct sbcs_table *table, int flags, 36 const unsigned char *src, unsigned int srclen ) 37 { 38 const WCHAR * const cp2uni = (flags & MB_USEGLYPHCHARS) ? table->cp2uni_glyphs : table->cp2uni; 39 const WCHAR def_unicode_char = table->info.def_unicode_char; 40 const unsigned char def_char = table->uni2cp_low[table->uni2cp_high[def_unicode_char >> 8] 41 + (def_unicode_char & 0xff)]; 42 while (srclen) 43 { 44 if ((cp2uni[*src] == def_unicode_char && *src != def_char) || 45 is_private_use_area_char(cp2uni[*src])) break; 46 src++; 47 srclen--; 48 } 49 return srclen; 50 } 51 52 /* mbstowcs for single-byte code page */ 53 /* all lengths are in characters, not bytes */ 54 static inline int mbstowcs_sbcs( const struct sbcs_table *table, int flags, 55 const unsigned char *src, unsigned int srclen, 56 WCHAR *dst, unsigned int dstlen ) 57 { 58 const WCHAR * const cp2uni = (flags & MB_USEGLYPHCHARS) ? table->cp2uni_glyphs : table->cp2uni; 59 int ret = srclen; 60 61 if (dstlen < srclen) 62 { 63 /* buffer too small: fill it up to dstlen and return error */ 64 srclen = dstlen; 65 ret = -1; 66 } 67 68 for (;;) 69 { 70 switch(srclen) 71 { 72 default: 73 case 16: dst[15] = cp2uni[src[15]]; 74 case 15: dst[14] = cp2uni[src[14]]; 75 case 14: dst[13] = cp2uni[src[13]]; 76 case 13: dst[12] = cp2uni[src[12]]; 77 case 12: dst[11] = cp2uni[src[11]]; 78 case 11: dst[10] = cp2uni[src[10]]; 79 case 10: dst[9] = cp2uni[src[9]]; 80 case 9: dst[8] = cp2uni[src[8]]; 81 case 8: dst[7] = cp2uni[src[7]]; 82 case 7: dst[6] = cp2uni[src[6]]; 83 case 6: dst[5] = cp2uni[src[5]]; 84 case 5: dst[4] = cp2uni[src[4]]; 85 case 4: dst[3] = cp2uni[src[3]]; 86 case 3: dst[2] = cp2uni[src[2]]; 87 case 2: dst[1] = cp2uni[src[1]]; 88 case 1: dst[0] = cp2uni[src[0]]; 89 case 0: break; 90 } 91 if (srclen < 16) return ret; 92 dst += 16; 93 src += 16; 94 srclen -= 16; 95 } 96 } 97 98 /* mbstowcs for single-byte code page with char decomposition */ 99 static int mbstowcs_sbcs_decompose( const struct sbcs_table *table, int flags, 100 const unsigned char *src, unsigned int srclen, 101 WCHAR *dst, unsigned int dstlen ) 102 { 103 const WCHAR * const cp2uni = (flags & MB_USEGLYPHCHARS) ? table->cp2uni_glyphs : table->cp2uni; 104 unsigned int len; 105 106 if (!dstlen) /* compute length */ 107 { 108 WCHAR dummy[4]; /* no decomposition is larger than 4 chars */ 109 for (len = 0; srclen; srclen--, src++) 110 len += wine_decompose( cp2uni[*src], dummy, 4 ); 111 return len; 112 } 113 114 for (len = dstlen; srclen && len; srclen--, src++) 115 { 116 unsigned int res = wine_decompose( cp2uni[*src], dst, len ); 117 if (!res) break; 118 len -= res; 119 dst += res; 120 } 121 if (srclen) return -1; /* overflow */ 122 return dstlen - len; 123 } 124 125 /* query necessary dst length for src string */ 126 static inline int get_length_dbcs( const struct dbcs_table *table, 127 const unsigned char *src, unsigned int srclen ) 128 { 129 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes; 130 int len; 131 132 for (len = 0; srclen; srclen--, src++, len++) 133 { 134 if (cp2uni_lb[*src] && srclen > 1 && src[1]) 135 { 136 src++; 137 srclen--; 138 } 139 } 140 return len; 141 } 142 143 /* check src string for invalid chars; return non-zero if invalid char found */ 144 static inline int check_invalid_chars_dbcs( const struct dbcs_table *table, 145 const unsigned char *src, unsigned int srclen ) 146 { 147 const WCHAR * const cp2uni = table->cp2uni; 148 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes; 149 const WCHAR def_unicode_char = table->info.def_unicode_char; 150 const unsigned short def_char = table->uni2cp_low[table->uni2cp_high[def_unicode_char >> 8] 151 + (def_unicode_char & 0xff)]; 152 while (srclen) 153 { 154 unsigned char off = cp2uni_lb[*src]; 155 if (off) /* multi-byte char */ 156 { 157 if (srclen == 1) break; /* partial char, error */ 158 if (cp2uni[(off << 8) + src[1]] == def_unicode_char && 159 ((src[0] << 8) | src[1]) != def_char) break; 160 src++; 161 srclen--; 162 } 163 else if ((cp2uni[*src] == def_unicode_char && *src != def_char) || 164 is_private_use_area_char(cp2uni[*src])) break; 165 src++; 166 srclen--; 167 } 168 return srclen; 169 } 170 171 /* mbstowcs for double-byte code page */ 172 /* all lengths are in characters, not bytes */ 173 static inline int mbstowcs_dbcs( const struct dbcs_table *table, 174 const unsigned char *src, unsigned int srclen, 175 WCHAR *dst, unsigned int dstlen ) 176 { 177 const WCHAR * const cp2uni = table->cp2uni; 178 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes; 179 unsigned int len; 180 181 if (!dstlen) return get_length_dbcs( table, src, srclen ); 182 183 for (len = dstlen; srclen && len; len--, srclen--, src++, dst++) 184 { 185 unsigned char off = cp2uni_lb[*src]; 186 if (off && srclen > 1 && src[1]) 187 { 188 src++; 189 srclen--; 190 *dst = cp2uni[(off << 8) + *src]; 191 } 192 else *dst = cp2uni[*src]; 193 } 194 if (srclen) return -1; /* overflow */ 195 return dstlen - len; 196 } 197 198 199 /* mbstowcs for double-byte code page with character decomposition */ 200 static int mbstowcs_dbcs_decompose( const struct dbcs_table *table, 201 const unsigned char *src, unsigned int srclen, 202 WCHAR *dst, unsigned int dstlen ) 203 { 204 const WCHAR * const cp2uni = table->cp2uni; 205 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes; 206 unsigned int len, res; 207 WCHAR ch; 208 209 if (!dstlen) /* compute length */ 210 { 211 WCHAR dummy[4]; /* no decomposition is larger than 4 chars */ 212 for (len = 0; srclen; srclen--, src++) 213 { 214 unsigned char off = cp2uni_lb[*src]; 215 if (off && srclen > 1 && src[1]) 216 { 217 src++; 218 srclen--; 219 ch = cp2uni[(off << 8) + *src]; 220 } 221 else ch = cp2uni[*src]; 222 len += wine_decompose( ch, dummy, 4 ); 223 } 224 return len; 225 } 226 227 for (len = dstlen; srclen && len; srclen--, src++) 228 { 229 unsigned char off = cp2uni_lb[*src]; 230 if (off && srclen > 1 && src[1]) 231 { 232 src++; 233 srclen--; 234 ch = cp2uni[(off << 8) + *src]; 235 } 236 else ch = cp2uni[*src]; 237 if (!(res = wine_decompose( ch, dst, len ))) break; 238 dst += res; 239 len -= res; 240 } 241 if (srclen) return -1; /* overflow */ 242 return dstlen - len; 243 } 244 245 246 /* return -1 on dst buffer overflow, -2 on invalid input char */ 247 int wine_cp_mbstowcs( const union cptable *table, int flags, 248 const char *s, int srclen, 249 WCHAR *dst, int dstlen ) 250 { 251 const unsigned char *src = (const unsigned char*) s; 252 253 if (table->info.char_size == 1) 254 { 255 if (flags & MB_ERR_INVALID_CHARS) 256 { 257 if (check_invalid_chars_sbcs( &table->sbcs, flags, src, srclen )) return -2; 258 } 259 if (!(flags & MB_COMPOSITE)) 260 { 261 if (!dstlen) return srclen; 262 return mbstowcs_sbcs( &table->sbcs, flags, src, srclen, dst, dstlen ); 263 } 264 return mbstowcs_sbcs_decompose( &table->sbcs, flags, src, srclen, dst, dstlen ); 265 } 266 else /* mbcs */ 267 { 268 if (flags & MB_ERR_INVALID_CHARS) 269 { 270 if (check_invalid_chars_dbcs( &table->dbcs, src, srclen )) return -2; 271 } 272 if (!(flags & MB_COMPOSITE)) 273 return mbstowcs_dbcs( &table->dbcs, src, srclen, dst, dstlen ); 274 else 275 return mbstowcs_dbcs_decompose( &table->dbcs, src, srclen, dst, dstlen ); 276 } 277 } 278