1 /*
2 * WideCharToMultiByte implementation
3 *
4 * Copyright 2000 Alexandre Julliard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19 */
20
21 #include <string.h>
22
23 #include "wine/unicode.h"
24
25 extern WCHAR wine_compose( const WCHAR *str ) DECLSPEC_HIDDEN;
26
27 /****************************************************************/
28 /* sbcs support */
29
30 /* check if 'ch' is an acceptable sbcs mapping for 'wch' */
is_valid_sbcs_mapping(const struct sbcs_table * table,int flags,WCHAR wch,unsigned char ch)31 static inline int is_valid_sbcs_mapping( const struct sbcs_table *table, int flags,
32 WCHAR wch, unsigned char ch )
33 {
34 if ((flags & WC_NO_BEST_FIT_CHARS) || ch == (unsigned char)table->info.def_char)
35 return (table->cp2uni[ch] == wch);
36 return 1;
37 }
38
39 /* query necessary dst length for src string */
get_length_sbcs(const struct sbcs_table * table,int flags,const WCHAR * src,unsigned int srclen,int * used)40 static int get_length_sbcs( const struct sbcs_table *table, int flags,
41 const WCHAR *src, unsigned int srclen, int *used )
42 {
43 const unsigned char * const uni2cp_low = table->uni2cp_low;
44 const unsigned short * const uni2cp_high = table->uni2cp_high;
45 int ret, tmp;
46 WCHAR composed;
47
48 if (!used) used = &tmp; /* avoid checking on every char */
49 *used = 0;
50
51 for (ret = 0; srclen; ret++, src++, srclen--)
52 {
53 WCHAR wch = *src;
54 unsigned char ch;
55
56 if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = wine_compose(src)))
57 {
58 /* now check if we can use the composed char */
59 ch = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)];
60 if (is_valid_sbcs_mapping( table, flags, composed, ch ))
61 {
62 /* we have a good mapping, use it */
63 src++;
64 srclen--;
65 continue;
66 }
67 /* no mapping for the composed char, check the other flags */
68 if (flags & WC_DEFAULTCHAR) /* use the default char instead */
69 {
70 *used = 1;
71 src++; /* skip the non-spacing char */
72 srclen--;
73 continue;
74 }
75 if (flags & WC_DISCARDNS) /* skip the second char of the composition */
76 {
77 src++;
78 srclen--;
79 }
80 /* WC_SEPCHARS is the default */
81 }
82 if (!*used)
83 {
84 ch = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)];
85 *used = !is_valid_sbcs_mapping( table, flags, wch, ch );
86 }
87 }
88 return ret;
89 }
90
91 /* wcstombs for single-byte code page */
wcstombs_sbcs(const struct sbcs_table * table,const WCHAR * src,unsigned int srclen,char * dst,unsigned int dstlen)92 static inline int wcstombs_sbcs( const struct sbcs_table *table,
93 const WCHAR *src, unsigned int srclen,
94 char *dst, unsigned int dstlen )
95 {
96 const unsigned char * const uni2cp_low = table->uni2cp_low;
97 const unsigned short * const uni2cp_high = table->uni2cp_high;
98 int ret = srclen;
99
100 if (dstlen < srclen)
101 {
102 /* buffer too small: fill it up to dstlen and return error */
103 srclen = dstlen;
104 ret = -1;
105 }
106
107 while (srclen >= 16)
108 {
109 dst[0] = uni2cp_low[uni2cp_high[src[0] >> 8] + (src[0] & 0xff)];
110 dst[1] = uni2cp_low[uni2cp_high[src[1] >> 8] + (src[1] & 0xff)];
111 dst[2] = uni2cp_low[uni2cp_high[src[2] >> 8] + (src[2] & 0xff)];
112 dst[3] = uni2cp_low[uni2cp_high[src[3] >> 8] + (src[3] & 0xff)];
113 dst[4] = uni2cp_low[uni2cp_high[src[4] >> 8] + (src[4] & 0xff)];
114 dst[5] = uni2cp_low[uni2cp_high[src[5] >> 8] + (src[5] & 0xff)];
115 dst[6] = uni2cp_low[uni2cp_high[src[6] >> 8] + (src[6] & 0xff)];
116 dst[7] = uni2cp_low[uni2cp_high[src[7] >> 8] + (src[7] & 0xff)];
117 dst[8] = uni2cp_low[uni2cp_high[src[8] >> 8] + (src[8] & 0xff)];
118 dst[9] = uni2cp_low[uni2cp_high[src[9] >> 8] + (src[9] & 0xff)];
119 dst[10] = uni2cp_low[uni2cp_high[src[10] >> 8] + (src[10] & 0xff)];
120 dst[11] = uni2cp_low[uni2cp_high[src[11] >> 8] + (src[11] & 0xff)];
121 dst[12] = uni2cp_low[uni2cp_high[src[12] >> 8] + (src[12] & 0xff)];
122 dst[13] = uni2cp_low[uni2cp_high[src[13] >> 8] + (src[13] & 0xff)];
123 dst[14] = uni2cp_low[uni2cp_high[src[14] >> 8] + (src[14] & 0xff)];
124 dst[15] = uni2cp_low[uni2cp_high[src[15] >> 8] + (src[15] & 0xff)];
125 src += 16;
126 dst += 16;
127 srclen -= 16;
128 }
129
130 /* now handle remaining characters */
131 src += srclen;
132 dst += srclen;
133 switch(srclen)
134 {
135 case 15: dst[-15] = uni2cp_low[uni2cp_high[src[-15] >> 8] + (src[-15] & 0xff)];
136 case 14: dst[-14] = uni2cp_low[uni2cp_high[src[-14] >> 8] + (src[-14] & 0xff)];
137 case 13: dst[-13] = uni2cp_low[uni2cp_high[src[-13] >> 8] + (src[-13] & 0xff)];
138 case 12: dst[-12] = uni2cp_low[uni2cp_high[src[-12] >> 8] + (src[-12] & 0xff)];
139 case 11: dst[-11] = uni2cp_low[uni2cp_high[src[-11] >> 8] + (src[-11] & 0xff)];
140 case 10: dst[-10] = uni2cp_low[uni2cp_high[src[-10] >> 8] + (src[-10] & 0xff)];
141 case 9: dst[-9] = uni2cp_low[uni2cp_high[src[-9] >> 8] + (src[-9] & 0xff)];
142 case 8: dst[-8] = uni2cp_low[uni2cp_high[src[-8] >> 8] + (src[-8] & 0xff)];
143 case 7: dst[-7] = uni2cp_low[uni2cp_high[src[-7] >> 8] + (src[-7] & 0xff)];
144 case 6: dst[-6] = uni2cp_low[uni2cp_high[src[-6] >> 8] + (src[-6] & 0xff)];
145 case 5: dst[-5] = uni2cp_low[uni2cp_high[src[-5] >> 8] + (src[-5] & 0xff)];
146 case 4: dst[-4] = uni2cp_low[uni2cp_high[src[-4] >> 8] + (src[-4] & 0xff)];
147 case 3: dst[-3] = uni2cp_low[uni2cp_high[src[-3] >> 8] + (src[-3] & 0xff)];
148 case 2: dst[-2] = uni2cp_low[uni2cp_high[src[-2] >> 8] + (src[-2] & 0xff)];
149 case 1: dst[-1] = uni2cp_low[uni2cp_high[src[-1] >> 8] + (src[-1] & 0xff)];
150 case 0: break;
151 }
152 return ret;
153 }
154
155 /* slow version of wcstombs_sbcs that handles the various flags */
wcstombs_sbcs_slow(const struct sbcs_table * table,int flags,const WCHAR * src,unsigned int srclen,char * dst,unsigned int dstlen,const char * defchar,int * used)156 static int wcstombs_sbcs_slow( const struct sbcs_table *table, int flags,
157 const WCHAR *src, unsigned int srclen,
158 char *dst, unsigned int dstlen,
159 const char *defchar, int *used )
160 {
161 const unsigned char * const uni2cp_low = table->uni2cp_low;
162 const unsigned short * const uni2cp_high = table->uni2cp_high;
163 unsigned char def;
164 unsigned int len;
165 int tmp;
166 WCHAR composed;
167
168 if (!defchar)
169 def = table->info.def_char & 0xff;
170 else
171 def = *defchar;
172
173 if (!used) used = &tmp; /* avoid checking on every char */
174 *used = 0;
175
176 for (len = dstlen; srclen && len; dst++, len--, src++, srclen--)
177 {
178 WCHAR wch = *src;
179
180 if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = wine_compose(src)))
181 {
182 /* now check if we can use the composed char */
183 *dst = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)];
184 if (is_valid_sbcs_mapping( table, flags, composed, *dst ))
185 {
186 /* we have a good mapping, use it */
187 src++;
188 srclen--;
189 continue;
190 }
191 /* no mapping for the composed char, check the other flags */
192 if (flags & WC_DEFAULTCHAR) /* use the default char instead */
193 {
194 *dst = def;
195 *used = 1;
196 src++; /* skip the non-spacing char */
197 srclen--;
198 continue;
199 }
200 if (flags & WC_DISCARDNS) /* skip the second char of the composition */
201 {
202 src++;
203 srclen--;
204 }
205 /* WC_SEPCHARS is the default */
206 }
207
208 *dst = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)];
209 if (!is_valid_sbcs_mapping( table, flags, wch, *dst ))
210 {
211 *dst = def;
212 *used = 1;
213 }
214 }
215 if (srclen) return -1; /* overflow */
216 return dstlen - len;
217 }
218
219
220 /****************************************************************/
221 /* dbcs support */
222
223 /* check if 'ch' is an acceptable dbcs mapping for 'wch' */
is_valid_dbcs_mapping(const struct dbcs_table * table,int flags,WCHAR wch,unsigned short ch)224 static inline int is_valid_dbcs_mapping( const struct dbcs_table *table, int flags,
225 WCHAR wch, unsigned short ch )
226 {
227 if ((flags & WC_NO_BEST_FIT_CHARS) || ch == table->info.def_char)
228 {
229 /* check if char maps back to the same Unicode value */
230 if (ch & 0xff00)
231 {
232 unsigned char off = table->cp2uni_leadbytes[ch >> 8];
233 return (table->cp2uni[(off << 8) + (ch & 0xff)] == wch);
234 }
235 return (table->cp2uni[ch & 0xff] == wch);
236 }
237 return 1;
238 }
239
240 /* compute the default char for the dbcs case */
get_defchar_dbcs(const struct dbcs_table * table,const char * defchar)241 static inline WCHAR get_defchar_dbcs( const struct dbcs_table *table, const char *defchar )
242 {
243 if (!defchar) return table->info.def_char;
244 if (!defchar[1]) return (unsigned char)defchar[0];
245 return ((unsigned char)defchar[0] << 8) | (unsigned char)defchar[1];
246 }
247
248 /* query necessary dst length for src string */
get_length_dbcs(const struct dbcs_table * table,int flags,const WCHAR * src,unsigned int srclen,const char * defchar,int * used)249 static int get_length_dbcs( const struct dbcs_table *table, int flags,
250 const WCHAR *src, unsigned int srclen,
251 const char *defchar, int *used )
252 {
253 const unsigned short * const uni2cp_low = table->uni2cp_low;
254 const unsigned short * const uni2cp_high = table->uni2cp_high;
255 WCHAR defchar_value, composed;
256 int len, tmp;
257
258 if (!defchar && !used && !(flags & WC_COMPOSITECHECK))
259 {
260 for (len = 0; srclen; srclen--, src++, len++)
261 {
262 if (uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)] & 0xff00) len++;
263 }
264 return len;
265 }
266
267 defchar_value = get_defchar_dbcs( table, defchar );
268 if (!used) used = &tmp; /* avoid checking on every char */
269 *used = 0;
270 for (len = 0; srclen; len++, srclen--, src++)
271 {
272 unsigned short res;
273 WCHAR wch = *src;
274
275 if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = wine_compose(src)))
276 {
277 /* now check if we can use the composed char */
278 res = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)];
279
280 if (is_valid_dbcs_mapping( table, flags, composed, res ))
281 {
282 /* we have a good mapping for the composed char, use it */
283 if (res & 0xff00) len++;
284 src++;
285 srclen--;
286 continue;
287 }
288 /* no mapping for the composed char, check the other flags */
289 if (flags & WC_DEFAULTCHAR) /* use the default char instead */
290 {
291 if (defchar_value & 0xff00) len++;
292 *used = 1;
293 src++; /* skip the non-spacing char */
294 srclen--;
295 continue;
296 }
297 if (flags & WC_DISCARDNS) /* skip the second char of the composition */
298 {
299 src++;
300 srclen--;
301 }
302 /* WC_SEPCHARS is the default */
303 }
304
305 res = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)];
306 if (!is_valid_dbcs_mapping( table, flags, wch, res ))
307 {
308 res = defchar_value;
309 *used = 1;
310 }
311 if (res & 0xff00) len++;
312 }
313 return len;
314 }
315
316 /* wcstombs for double-byte code page */
wcstombs_dbcs(const struct dbcs_table * table,const WCHAR * src,unsigned int srclen,char * dst,unsigned int dstlen)317 static inline int wcstombs_dbcs( const struct dbcs_table *table,
318 const WCHAR *src, unsigned int srclen,
319 char *dst, unsigned int dstlen )
320 {
321 const unsigned short * const uni2cp_low = table->uni2cp_low;
322 const unsigned short * const uni2cp_high = table->uni2cp_high;
323 int len;
324
325 for (len = dstlen; srclen && len; len--, srclen--, src++)
326 {
327 unsigned short res = uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)];
328 if (res & 0xff00)
329 {
330 if (len == 1) break; /* do not output a partial char */
331 len--;
332 *dst++ = res >> 8;
333 }
334 *dst++ = (char)res;
335 }
336 if (srclen) return -1; /* overflow */
337 return dstlen - len;
338 }
339
340 /* slow version of wcstombs_dbcs that handles the various flags */
wcstombs_dbcs_slow(const struct dbcs_table * table,int flags,const WCHAR * src,unsigned int srclen,char * dst,unsigned int dstlen,const char * defchar,int * used)341 static int wcstombs_dbcs_slow( const struct dbcs_table *table, int flags,
342 const WCHAR *src, unsigned int srclen,
343 char *dst, unsigned int dstlen,
344 const char *defchar, int *used )
345 {
346 const unsigned short * const uni2cp_low = table->uni2cp_low;
347 const unsigned short * const uni2cp_high = table->uni2cp_high;
348 WCHAR defchar_value = get_defchar_dbcs( table, defchar );
349 WCHAR composed;
350 int len, tmp;
351
352 if (!used) used = &tmp; /* avoid checking on every char */
353 *used = 0;
354
355 for (len = dstlen; srclen && len; len--, srclen--, src++)
356 {
357 unsigned short res;
358 WCHAR wch = *src;
359
360 if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = wine_compose(src)))
361 {
362 /* now check if we can use the composed char */
363 res = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)];
364
365 if (is_valid_dbcs_mapping( table, flags, composed, res ))
366 {
367 /* we have a good mapping for the composed char, use it */
368 src++;
369 srclen--;
370 goto output_char;
371 }
372 /* no mapping for the composed char, check the other flags */
373 if (flags & WC_DEFAULTCHAR) /* use the default char instead */
374 {
375 res = defchar_value;
376 *used = 1;
377 src++; /* skip the non-spacing char */
378 srclen--;
379 goto output_char;
380 }
381 if (flags & WC_DISCARDNS) /* skip the second char of the composition */
382 {
383 src++;
384 srclen--;
385 }
386 /* WC_SEPCHARS is the default */
387 }
388
389 res = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)];
390 if (!is_valid_dbcs_mapping( table, flags, wch, res ))
391 {
392 res = defchar_value;
393 *used = 1;
394 }
395
396 output_char:
397 if (res & 0xff00)
398 {
399 if (len == 1) break; /* do not output a partial char */
400 len--;
401 *dst++ = res >> 8;
402 }
403 *dst++ = (char)res;
404 }
405 if (srclen) return -1; /* overflow */
406 return dstlen - len;
407 }
408
409 /* wide char to multi byte string conversion */
410 /* return -1 on dst buffer overflow */
wine_cp_wcstombs(const union cptable * table,int flags,const WCHAR * src,int srclen,char * dst,int dstlen,const char * defchar,int * used)411 int wine_cp_wcstombs( const union cptable *table, int flags,
412 const WCHAR *src, int srclen,
413 char *dst, int dstlen, const char *defchar, int *used )
414 {
415 if (table->info.char_size == 1)
416 {
417 if (flags || defchar || used)
418 {
419 if (!dstlen) return get_length_sbcs( &table->sbcs, flags, src, srclen, used );
420 return wcstombs_sbcs_slow( &table->sbcs, flags, src, srclen,
421 dst, dstlen, defchar, used );
422 }
423 if (!dstlen) return srclen;
424 return wcstombs_sbcs( &table->sbcs, src, srclen, dst, dstlen );
425 }
426 else /* mbcs */
427 {
428 if (!dstlen) return get_length_dbcs( &table->dbcs, flags, src, srclen, defchar, used );
429 if (flags || defchar || used)
430 return wcstombs_dbcs_slow( &table->dbcs, flags, src, srclen,
431 dst, dstlen, defchar, used );
432 return wcstombs_dbcs( &table->dbcs, src, srclen, dst, dstlen );
433 }
434 }
435