1 /* 2 * WideCharToMultiByte implementation 3 * 4 * Copyright 2000 Alexandre Julliard 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA 19 */ 20 21 #include <string.h> 22 23 #include "wine/unicode.h" 24 25 extern WCHAR wine_compose( const WCHAR *str ) DECLSPEC_HIDDEN; 26 27 /****************************************************************/ 28 /* sbcs support */ 29 30 /* check if 'ch' is an acceptable sbcs mapping for 'wch' */ 31 static inline int is_valid_sbcs_mapping( const struct sbcs_table *table, int flags, 32 WCHAR wch, unsigned char ch ) 33 { 34 if ((flags & WC_NO_BEST_FIT_CHARS) || ch == (unsigned char)table->info.def_char) 35 return (table->cp2uni[ch] == wch); 36 return 1; 37 } 38 39 /* query necessary dst length for src string */ 40 static int get_length_sbcs( const struct sbcs_table *table, int flags, 41 const WCHAR *src, unsigned int srclen, int *used ) 42 { 43 const unsigned char * const uni2cp_low = table->uni2cp_low; 44 const unsigned short * const uni2cp_high = table->uni2cp_high; 45 int ret, tmp; 46 WCHAR composed; 47 48 if (!used) used = &tmp; /* avoid checking on every char */ 49 *used = 0; 50 51 for (ret = 0; srclen; ret++, src++, srclen--) 52 { 53 WCHAR wch = *src; 54 unsigned char ch; 55 56 if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = wine_compose(src))) 57 { 58 /* now check if we can use the composed char */ 59 ch = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)]; 60 if (is_valid_sbcs_mapping( table, flags, composed, ch )) 61 { 62 /* we have a good mapping, use it */ 63 src++; 64 srclen--; 65 continue; 66 } 67 /* no mapping for the composed char, check the other flags */ 68 if (flags & WC_DEFAULTCHAR) /* use the default char instead */ 69 { 70 *used = 1; 71 src++; /* skip the non-spacing char */ 72 srclen--; 73 continue; 74 } 75 if (flags & WC_DISCARDNS) /* skip the second char of the composition */ 76 { 77 src++; 78 srclen--; 79 } 80 /* WC_SEPCHARS is the default */ 81 } 82 if (!*used) 83 { 84 ch = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)]; 85 *used = !is_valid_sbcs_mapping( table, flags, wch, ch ); 86 } 87 } 88 return ret; 89 } 90 91 /* wcstombs for single-byte code page */ 92 static inline int wcstombs_sbcs( const struct sbcs_table *table, 93 const WCHAR *src, unsigned int srclen, 94 char *dst, unsigned int dstlen ) 95 { 96 const unsigned char * const uni2cp_low = table->uni2cp_low; 97 const unsigned short * const uni2cp_high = table->uni2cp_high; 98 int ret = srclen; 99 100 if (dstlen < srclen) 101 { 102 /* buffer too small: fill it up to dstlen and return error */ 103 srclen = dstlen; 104 ret = -1; 105 } 106 107 while (srclen >= 16) 108 { 109 dst[0] = uni2cp_low[uni2cp_high[src[0] >> 8] + (src[0] & 0xff)]; 110 dst[1] = uni2cp_low[uni2cp_high[src[1] >> 8] + (src[1] & 0xff)]; 111 dst[2] = uni2cp_low[uni2cp_high[src[2] >> 8] + (src[2] & 0xff)]; 112 dst[3] = uni2cp_low[uni2cp_high[src[3] >> 8] + (src[3] & 0xff)]; 113 dst[4] = uni2cp_low[uni2cp_high[src[4] >> 8] + (src[4] & 0xff)]; 114 dst[5] = uni2cp_low[uni2cp_high[src[5] >> 8] + (src[5] & 0xff)]; 115 dst[6] = uni2cp_low[uni2cp_high[src[6] >> 8] + (src[6] & 0xff)]; 116 dst[7] = uni2cp_low[uni2cp_high[src[7] >> 8] + (src[7] & 0xff)]; 117 dst[8] = uni2cp_low[uni2cp_high[src[8] >> 8] + (src[8] & 0xff)]; 118 dst[9] = uni2cp_low[uni2cp_high[src[9] >> 8] + (src[9] & 0xff)]; 119 dst[10] = uni2cp_low[uni2cp_high[src[10] >> 8] + (src[10] & 0xff)]; 120 dst[11] = uni2cp_low[uni2cp_high[src[11] >> 8] + (src[11] & 0xff)]; 121 dst[12] = uni2cp_low[uni2cp_high[src[12] >> 8] + (src[12] & 0xff)]; 122 dst[13] = uni2cp_low[uni2cp_high[src[13] >> 8] + (src[13] & 0xff)]; 123 dst[14] = uni2cp_low[uni2cp_high[src[14] >> 8] + (src[14] & 0xff)]; 124 dst[15] = uni2cp_low[uni2cp_high[src[15] >> 8] + (src[15] & 0xff)]; 125 src += 16; 126 dst += 16; 127 srclen -= 16; 128 } 129 130 /* now handle remaining characters */ 131 src += srclen; 132 dst += srclen; 133 switch(srclen) 134 { 135 case 15: dst[-15] = uni2cp_low[uni2cp_high[src[-15] >> 8] + (src[-15] & 0xff)]; 136 case 14: dst[-14] = uni2cp_low[uni2cp_high[src[-14] >> 8] + (src[-14] & 0xff)]; 137 case 13: dst[-13] = uni2cp_low[uni2cp_high[src[-13] >> 8] + (src[-13] & 0xff)]; 138 case 12: dst[-12] = uni2cp_low[uni2cp_high[src[-12] >> 8] + (src[-12] & 0xff)]; 139 case 11: dst[-11] = uni2cp_low[uni2cp_high[src[-11] >> 8] + (src[-11] & 0xff)]; 140 case 10: dst[-10] = uni2cp_low[uni2cp_high[src[-10] >> 8] + (src[-10] & 0xff)]; 141 case 9: dst[-9] = uni2cp_low[uni2cp_high[src[-9] >> 8] + (src[-9] & 0xff)]; 142 case 8: dst[-8] = uni2cp_low[uni2cp_high[src[-8] >> 8] + (src[-8] & 0xff)]; 143 case 7: dst[-7] = uni2cp_low[uni2cp_high[src[-7] >> 8] + (src[-7] & 0xff)]; 144 case 6: dst[-6] = uni2cp_low[uni2cp_high[src[-6] >> 8] + (src[-6] & 0xff)]; 145 case 5: dst[-5] = uni2cp_low[uni2cp_high[src[-5] >> 8] + (src[-5] & 0xff)]; 146 case 4: dst[-4] = uni2cp_low[uni2cp_high[src[-4] >> 8] + (src[-4] & 0xff)]; 147 case 3: dst[-3] = uni2cp_low[uni2cp_high[src[-3] >> 8] + (src[-3] & 0xff)]; 148 case 2: dst[-2] = uni2cp_low[uni2cp_high[src[-2] >> 8] + (src[-2] & 0xff)]; 149 case 1: dst[-1] = uni2cp_low[uni2cp_high[src[-1] >> 8] + (src[-1] & 0xff)]; 150 case 0: break; 151 } 152 return ret; 153 } 154 155 /* slow version of wcstombs_sbcs that handles the various flags */ 156 static int wcstombs_sbcs_slow( const struct sbcs_table *table, int flags, 157 const WCHAR *src, unsigned int srclen, 158 char *dst, unsigned int dstlen, 159 const char *defchar, int *used ) 160 { 161 const unsigned char * const uni2cp_low = table->uni2cp_low; 162 const unsigned short * const uni2cp_high = table->uni2cp_high; 163 unsigned char def; 164 unsigned int len; 165 int tmp; 166 WCHAR composed; 167 168 if (!defchar) 169 def = table->info.def_char & 0xff; 170 else 171 def = *defchar; 172 173 if (!used) used = &tmp; /* avoid checking on every char */ 174 *used = 0; 175 176 for (len = dstlen; srclen && len; dst++, len--, src++, srclen--) 177 { 178 WCHAR wch = *src; 179 180 if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = wine_compose(src))) 181 { 182 /* now check if we can use the composed char */ 183 *dst = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)]; 184 if (is_valid_sbcs_mapping( table, flags, composed, *dst )) 185 { 186 /* we have a good mapping, use it */ 187 src++; 188 srclen--; 189 continue; 190 } 191 /* no mapping for the composed char, check the other flags */ 192 if (flags & WC_DEFAULTCHAR) /* use the default char instead */ 193 { 194 *dst = def; 195 *used = 1; 196 src++; /* skip the non-spacing char */ 197 srclen--; 198 continue; 199 } 200 if (flags & WC_DISCARDNS) /* skip the second char of the composition */ 201 { 202 src++; 203 srclen--; 204 } 205 /* WC_SEPCHARS is the default */ 206 } 207 208 *dst = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)]; 209 if (!is_valid_sbcs_mapping( table, flags, wch, *dst )) 210 { 211 *dst = def; 212 *used = 1; 213 } 214 } 215 if (srclen) return -1; /* overflow */ 216 return dstlen - len; 217 } 218 219 220 /****************************************************************/ 221 /* dbcs support */ 222 223 /* check if 'ch' is an acceptable dbcs mapping for 'wch' */ 224 static inline int is_valid_dbcs_mapping( const struct dbcs_table *table, int flags, 225 WCHAR wch, unsigned short ch ) 226 { 227 if ((flags & WC_NO_BEST_FIT_CHARS) || ch == table->info.def_char) 228 { 229 /* check if char maps back to the same Unicode value */ 230 if (ch & 0xff00) 231 { 232 unsigned char off = table->cp2uni_leadbytes[ch >> 8]; 233 return (table->cp2uni[(off << 8) + (ch & 0xff)] == wch); 234 } 235 return (table->cp2uni[ch & 0xff] == wch); 236 } 237 return 1; 238 } 239 240 /* compute the default char for the dbcs case */ 241 static inline WCHAR get_defchar_dbcs( const struct dbcs_table *table, const char *defchar ) 242 { 243 if (!defchar) return table->info.def_char; 244 if (!defchar[1]) return (unsigned char)defchar[0]; 245 return ((unsigned char)defchar[0] << 8) | (unsigned char)defchar[1]; 246 } 247 248 /* query necessary dst length for src string */ 249 static int get_length_dbcs( const struct dbcs_table *table, int flags, 250 const WCHAR *src, unsigned int srclen, 251 const char *defchar, int *used ) 252 { 253 const unsigned short * const uni2cp_low = table->uni2cp_low; 254 const unsigned short * const uni2cp_high = table->uni2cp_high; 255 WCHAR defchar_value, composed; 256 int len, tmp; 257 258 if (!defchar && !used && !(flags & WC_COMPOSITECHECK)) 259 { 260 for (len = 0; srclen; srclen--, src++, len++) 261 { 262 if (uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)] & 0xff00) len++; 263 } 264 return len; 265 } 266 267 defchar_value = get_defchar_dbcs( table, defchar ); 268 if (!used) used = &tmp; /* avoid checking on every char */ 269 *used = 0; 270 for (len = 0; srclen; len++, srclen--, src++) 271 { 272 unsigned short res; 273 WCHAR wch = *src; 274 275 if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = wine_compose(src))) 276 { 277 /* now check if we can use the composed char */ 278 res = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)]; 279 280 if (is_valid_dbcs_mapping( table, flags, composed, res )) 281 { 282 /* we have a good mapping for the composed char, use it */ 283 if (res & 0xff00) len++; 284 src++; 285 srclen--; 286 continue; 287 } 288 /* no mapping for the composed char, check the other flags */ 289 if (flags & WC_DEFAULTCHAR) /* use the default char instead */ 290 { 291 if (defchar_value & 0xff00) len++; 292 *used = 1; 293 src++; /* skip the non-spacing char */ 294 srclen--; 295 continue; 296 } 297 if (flags & WC_DISCARDNS) /* skip the second char of the composition */ 298 { 299 src++; 300 srclen--; 301 } 302 /* WC_SEPCHARS is the default */ 303 } 304 305 res = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)]; 306 if (!is_valid_dbcs_mapping( table, flags, wch, res )) 307 { 308 res = defchar_value; 309 *used = 1; 310 } 311 if (res & 0xff00) len++; 312 } 313 return len; 314 } 315 316 /* wcstombs for double-byte code page */ 317 static inline int wcstombs_dbcs( const struct dbcs_table *table, 318 const WCHAR *src, unsigned int srclen, 319 char *dst, unsigned int dstlen ) 320 { 321 const unsigned short * const uni2cp_low = table->uni2cp_low; 322 const unsigned short * const uni2cp_high = table->uni2cp_high; 323 int len; 324 325 for (len = dstlen; srclen && len; len--, srclen--, src++) 326 { 327 unsigned short res = uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)]; 328 if (res & 0xff00) 329 { 330 if (len == 1) break; /* do not output a partial char */ 331 len--; 332 *dst++ = res >> 8; 333 } 334 *dst++ = (char)res; 335 } 336 if (srclen) return -1; /* overflow */ 337 return dstlen - len; 338 } 339 340 /* slow version of wcstombs_dbcs that handles the various flags */ 341 static int wcstombs_dbcs_slow( const struct dbcs_table *table, int flags, 342 const WCHAR *src, unsigned int srclen, 343 char *dst, unsigned int dstlen, 344 const char *defchar, int *used ) 345 { 346 const unsigned short * const uni2cp_low = table->uni2cp_low; 347 const unsigned short * const uni2cp_high = table->uni2cp_high; 348 WCHAR defchar_value = get_defchar_dbcs( table, defchar ); 349 WCHAR composed; 350 int len, tmp; 351 352 if (!used) used = &tmp; /* avoid checking on every char */ 353 *used = 0; 354 355 for (len = dstlen; srclen && len; len--, srclen--, src++) 356 { 357 unsigned short res; 358 WCHAR wch = *src; 359 360 if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = wine_compose(src))) 361 { 362 /* now check if we can use the composed char */ 363 res = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)]; 364 365 if (is_valid_dbcs_mapping( table, flags, composed, res )) 366 { 367 /* we have a good mapping for the composed char, use it */ 368 src++; 369 srclen--; 370 goto output_char; 371 } 372 /* no mapping for the composed char, check the other flags */ 373 if (flags & WC_DEFAULTCHAR) /* use the default char instead */ 374 { 375 res = defchar_value; 376 *used = 1; 377 src++; /* skip the non-spacing char */ 378 srclen--; 379 goto output_char; 380 } 381 if (flags & WC_DISCARDNS) /* skip the second char of the composition */ 382 { 383 src++; 384 srclen--; 385 } 386 /* WC_SEPCHARS is the default */ 387 } 388 389 res = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)]; 390 if (!is_valid_dbcs_mapping( table, flags, wch, res )) 391 { 392 res = defchar_value; 393 *used = 1; 394 } 395 396 output_char: 397 if (res & 0xff00) 398 { 399 if (len == 1) break; /* do not output a partial char */ 400 len--; 401 *dst++ = res >> 8; 402 } 403 *dst++ = (char)res; 404 } 405 if (srclen) return -1; /* overflow */ 406 return dstlen - len; 407 } 408 409 /* wide char to multi byte string conversion */ 410 /* return -1 on dst buffer overflow */ 411 int wine_cp_wcstombs( const union cptable *table, int flags, 412 const WCHAR *src, int srclen, 413 char *dst, int dstlen, const char *defchar, int *used ) 414 { 415 if (table->info.char_size == 1) 416 { 417 if (flags || defchar || used) 418 { 419 if (!dstlen) return get_length_sbcs( &table->sbcs, flags, src, srclen, used ); 420 return wcstombs_sbcs_slow( &table->sbcs, flags, src, srclen, 421 dst, dstlen, defchar, used ); 422 } 423 if (!dstlen) return srclen; 424 return wcstombs_sbcs( &table->sbcs, src, srclen, dst, dstlen ); 425 } 426 else /* mbcs */ 427 { 428 if (!dstlen) return get_length_dbcs( &table->dbcs, flags, src, srclen, defchar, used ); 429 if (flags || defchar || used) 430 return wcstombs_dbcs_slow( &table->dbcs, flags, src, srclen, 431 dst, dstlen, defchar, used ); 432 return wcstombs_dbcs( &table->dbcs, src, srclen, dst, dstlen ); 433 } 434 } 435