1 /*
2 * MultiByteToWideChar implementation
3 *
4 * Copyright 2000 Alexandre Julliard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19 */
20
21 #include <string.h>
22
23 #include "wine/unicode.h"
24
25 extern unsigned int wine_decompose( int flags, WCHAR ch, WCHAR *dst, unsigned int dstlen ) DECLSPEC_HIDDEN;
26
27 /* check the code whether it is in Unicode Private Use Area (PUA). */
28 /* MB_ERR_INVALID_CHARS raises an error converting from 1-byte character to PUA. */
is_private_use_area_char(WCHAR code)29 static inline int is_private_use_area_char(WCHAR code)
30 {
31 return (code >= 0xe000 && code <= 0xf8ff);
32 }
33
34 /* check src string for invalid chars; return non-zero if invalid char found */
check_invalid_chars_sbcs(const struct sbcs_table * table,int flags,const unsigned char * src,unsigned int srclen)35 static inline int check_invalid_chars_sbcs( const struct sbcs_table *table, int flags,
36 const unsigned char *src, unsigned int srclen )
37 {
38 const WCHAR * const cp2uni = (flags & MB_USEGLYPHCHARS) ? table->cp2uni_glyphs : table->cp2uni;
39 const WCHAR def_unicode_char = table->info.def_unicode_char;
40 const unsigned char def_char = table->uni2cp_low[table->uni2cp_high[def_unicode_char >> 8]
41 + (def_unicode_char & 0xff)];
42 while (srclen)
43 {
44 if ((cp2uni[*src] == def_unicode_char && *src != def_char) ||
45 is_private_use_area_char(cp2uni[*src])) break;
46 src++;
47 srclen--;
48 }
49 return srclen;
50 }
51
52 /* mbstowcs for single-byte code page */
53 /* all lengths are in characters, not bytes */
mbstowcs_sbcs(const struct sbcs_table * table,int flags,const unsigned char * src,unsigned int srclen,WCHAR * dst,unsigned int dstlen)54 static inline int mbstowcs_sbcs( const struct sbcs_table *table, int flags,
55 const unsigned char *src, unsigned int srclen,
56 WCHAR *dst, unsigned int dstlen )
57 {
58 const WCHAR * const cp2uni = (flags & MB_USEGLYPHCHARS) ? table->cp2uni_glyphs : table->cp2uni;
59 int ret = srclen;
60
61 if (dstlen < srclen)
62 {
63 /* buffer too small: fill it up to dstlen and return error */
64 srclen = dstlen;
65 ret = -1;
66 }
67
68 while (srclen >= 16)
69 {
70 dst[0] = cp2uni[src[0]];
71 dst[1] = cp2uni[src[1]];
72 dst[2] = cp2uni[src[2]];
73 dst[3] = cp2uni[src[3]];
74 dst[4] = cp2uni[src[4]];
75 dst[5] = cp2uni[src[5]];
76 dst[6] = cp2uni[src[6]];
77 dst[7] = cp2uni[src[7]];
78 dst[8] = cp2uni[src[8]];
79 dst[9] = cp2uni[src[9]];
80 dst[10] = cp2uni[src[10]];
81 dst[11] = cp2uni[src[11]];
82 dst[12] = cp2uni[src[12]];
83 dst[13] = cp2uni[src[13]];
84 dst[14] = cp2uni[src[14]];
85 dst[15] = cp2uni[src[15]];
86 src += 16;
87 dst += 16;
88 srclen -= 16;
89 }
90
91 /* now handle the remaining characters */
92 src += srclen;
93 dst += srclen;
94 switch (srclen)
95 {
96 case 15: dst[-15] = cp2uni[src[-15]];
97 case 14: dst[-14] = cp2uni[src[-14]];
98 case 13: dst[-13] = cp2uni[src[-13]];
99 case 12: dst[-12] = cp2uni[src[-12]];
100 case 11: dst[-11] = cp2uni[src[-11]];
101 case 10: dst[-10] = cp2uni[src[-10]];
102 case 9: dst[-9] = cp2uni[src[-9]];
103 case 8: dst[-8] = cp2uni[src[-8]];
104 case 7: dst[-7] = cp2uni[src[-7]];
105 case 6: dst[-6] = cp2uni[src[-6]];
106 case 5: dst[-5] = cp2uni[src[-5]];
107 case 4: dst[-4] = cp2uni[src[-4]];
108 case 3: dst[-3] = cp2uni[src[-3]];
109 case 2: dst[-2] = cp2uni[src[-2]];
110 case 1: dst[-1] = cp2uni[src[-1]];
111 case 0: break;
112 }
113 return ret;
114 }
115
116 /* mbstowcs for single-byte code page with char decomposition */
mbstowcs_sbcs_decompose(const struct sbcs_table * table,int flags,const unsigned char * src,unsigned int srclen,WCHAR * dst,unsigned int dstlen)117 static int mbstowcs_sbcs_decompose( const struct sbcs_table *table, int flags,
118 const unsigned char *src, unsigned int srclen,
119 WCHAR *dst, unsigned int dstlen )
120 {
121 const WCHAR * const cp2uni = (flags & MB_USEGLYPHCHARS) ? table->cp2uni_glyphs : table->cp2uni;
122 unsigned int len;
123
124 if (!dstlen) /* compute length */
125 {
126 WCHAR dummy[4]; /* no decomposition is larger than 4 chars */
127 for (len = 0; srclen; srclen--, src++)
128 len += wine_decompose( 0, cp2uni[*src], dummy, 4 );
129 return len;
130 }
131
132 for (len = dstlen; srclen && len; srclen--, src++)
133 {
134 unsigned int res = wine_decompose( 0, cp2uni[*src], dst, len );
135 if (!res) break;
136 len -= res;
137 dst += res;
138 }
139 if (srclen) return -1; /* overflow */
140 return dstlen - len;
141 }
142
143 /* query necessary dst length for src string */
get_length_dbcs(const struct dbcs_table * table,const unsigned char * src,unsigned int srclen)144 static inline int get_length_dbcs( const struct dbcs_table *table,
145 const unsigned char *src, unsigned int srclen )
146 {
147 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
148 int len;
149
150 for (len = 0; srclen; srclen--, src++, len++)
151 {
152 if (cp2uni_lb[*src] && srclen > 1 && src[1])
153 {
154 src++;
155 srclen--;
156 }
157 }
158 return len;
159 }
160
161 /* check src string for invalid chars; return non-zero if invalid char found */
check_invalid_chars_dbcs(const struct dbcs_table * table,const unsigned char * src,unsigned int srclen)162 static inline int check_invalid_chars_dbcs( const struct dbcs_table *table,
163 const unsigned char *src, unsigned int srclen )
164 {
165 const WCHAR * const cp2uni = table->cp2uni;
166 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
167 const WCHAR def_unicode_char = table->info.def_unicode_char;
168 const unsigned short def_char = table->uni2cp_low[table->uni2cp_high[def_unicode_char >> 8]
169 + (def_unicode_char & 0xff)];
170 while (srclen)
171 {
172 unsigned char off = cp2uni_lb[*src];
173 if (off) /* multi-byte char */
174 {
175 if (srclen == 1) break; /* partial char, error */
176 if (cp2uni[(off << 8) + src[1]] == def_unicode_char &&
177 ((src[0] << 8) | src[1]) != def_char) break;
178 src++;
179 srclen--;
180 }
181 else if ((cp2uni[*src] == def_unicode_char && *src != def_char) ||
182 is_private_use_area_char(cp2uni[*src])) break;
183 src++;
184 srclen--;
185 }
186 return srclen;
187 }
188
189 /* mbstowcs for double-byte code page */
190 /* all lengths are in characters, not bytes */
mbstowcs_dbcs(const struct dbcs_table * table,const unsigned char * src,unsigned int srclen,WCHAR * dst,unsigned int dstlen)191 static inline int mbstowcs_dbcs( const struct dbcs_table *table,
192 const unsigned char *src, unsigned int srclen,
193 WCHAR *dst, unsigned int dstlen )
194 {
195 const WCHAR * const cp2uni = table->cp2uni;
196 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
197 unsigned int len;
198
199 if (!dstlen) return get_length_dbcs( table, src, srclen );
200
201 for (len = dstlen; srclen && len; len--, srclen--, src++, dst++)
202 {
203 unsigned char off = cp2uni_lb[*src];
204 if (off && srclen > 1 && src[1])
205 {
206 src++;
207 srclen--;
208 *dst = cp2uni[(off << 8) + *src];
209 }
210 else *dst = cp2uni[*src];
211 }
212 if (srclen) return -1; /* overflow */
213 return dstlen - len;
214 }
215
216
217 /* mbstowcs for double-byte code page with character decomposition */
mbstowcs_dbcs_decompose(const struct dbcs_table * table,const unsigned char * src,unsigned int srclen,WCHAR * dst,unsigned int dstlen)218 static int mbstowcs_dbcs_decompose( const struct dbcs_table *table,
219 const unsigned char *src, unsigned int srclen,
220 WCHAR *dst, unsigned int dstlen )
221 {
222 const WCHAR * const cp2uni = table->cp2uni;
223 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
224 unsigned int len, res;
225 WCHAR ch;
226
227 if (!dstlen) /* compute length */
228 {
229 WCHAR dummy[4]; /* no decomposition is larger than 4 chars */
230 for (len = 0; srclen; srclen--, src++)
231 {
232 unsigned char off = cp2uni_lb[*src];
233 if (off && srclen > 1 && src[1])
234 {
235 src++;
236 srclen--;
237 ch = cp2uni[(off << 8) + *src];
238 }
239 else ch = cp2uni[*src];
240 len += wine_decompose( 0, ch, dummy, 4 );
241 }
242 return len;
243 }
244
245 for (len = dstlen; srclen && len; srclen--, src++)
246 {
247 unsigned char off = cp2uni_lb[*src];
248 if (off && srclen > 1 && src[1])
249 {
250 src++;
251 srclen--;
252 ch = cp2uni[(off << 8) + *src];
253 }
254 else ch = cp2uni[*src];
255 if (!(res = wine_decompose( 0, ch, dst, len ))) break;
256 dst += res;
257 len -= res;
258 }
259 if (srclen) return -1; /* overflow */
260 return dstlen - len;
261 }
262
263
264 /* return -1 on dst buffer overflow, -2 on invalid input char */
wine_cp_mbstowcs(const union cptable * table,int flags,const char * s,int srclen,WCHAR * dst,int dstlen)265 int wine_cp_mbstowcs( const union cptable *table, int flags,
266 const char *s, int srclen,
267 WCHAR *dst, int dstlen )
268 {
269 const unsigned char *src = (const unsigned char*) s;
270
271 if (table->info.char_size == 1)
272 {
273 if (flags & MB_ERR_INVALID_CHARS)
274 {
275 if (check_invalid_chars_sbcs( &table->sbcs, flags, src, srclen )) return -2;
276 }
277 if (!(flags & MB_COMPOSITE))
278 {
279 if (!dstlen) return srclen;
280 return mbstowcs_sbcs( &table->sbcs, flags, src, srclen, dst, dstlen );
281 }
282 return mbstowcs_sbcs_decompose( &table->sbcs, flags, src, srclen, dst, dstlen );
283 }
284 else /* mbcs */
285 {
286 if (flags & MB_ERR_INVALID_CHARS)
287 {
288 if (check_invalid_chars_dbcs( &table->dbcs, src, srclen )) return -2;
289 }
290 if (!(flags & MB_COMPOSITE))
291 return mbstowcs_dbcs( &table->dbcs, src, srclen, dst, dstlen );
292 else
293 return mbstowcs_dbcs_decompose( &table->dbcs, src, srclen, dst, dstlen );
294 }
295 }
296