xref: /reactos/sdk/lib/ucrt/convert/mbstowcs.cpp (revision e3e520d1)
1 /***
2 *mbstowcs.c - Convert multibyte char string to wide char string.
3 *
4 *       Copyright (c) Microsoft Corporation. All rights reserved.
5 *
6 *Purpose:
7 *       Convert a multibyte char string into the equivalent wide char string.
8 *
9 *******************************************************************************/
10 #include <corecrt_internal_mbstring.h>
11 #include <corecrt_internal_ptd_propagation.h>
12 #include <corecrt_internal_securecrt.h>
13 #include <ctype.h>
14 #include <errno.h>
15 #include <locale.h>
16 #include <stdlib.h>
17 
18 using namespace __crt_mbstring;
19 
20 /***
21 *size_t mbstowcs() - Convert multibyte char string to wide char string.
22 *
23 *Purpose:
24 *       Convert a multi-byte char string into the equivalent wide char string,
25 *       according to the LC_CTYPE category of the current locale.
26 *       [ANSI].
27 *
28 *Entry:
29 *       wchar_t *pwcs = pointer to destination wide character string buffer
30 *       const char *s = pointer to source multibyte character string
31 *       size_t      n = maximum number of wide characters to store
32 *
33 *Exit:
34 *       If pwcs != nullptr returns the number of words modified (<=n): note that
35 *       if the return value == n, then no destination string is not 0 terminated.
36 *       If pwcs == nullptr returns the length (not size) needed for the destination buffer.
37 *
38 *Exceptions:
39 *       Returns (size_t)-1 if s is nullptr or invalid mbcs character encountered
40 *       and errno is set to EILSEQ.
41 *
42 *******************************************************************************/
43 
44 /* Helper shared by secure and non-secure functions */
45 
46 static size_t __cdecl _mbstowcs_l_helper(
47     _Out_writes_opt_z_(n)               wchar_t *              pwcs,
48     _In_reads_or_z_(n) _Pre_z_          const char *           s,
49     _In_                                size_t                 n,
50     _In_opt_                            __crt_cached_ptd_host& ptd
51     ) throw()
52 {
53     size_t count = 0;
54 
55     if (pwcs && n == 0)
56     {
57         /* dest string exists, but 0 bytes converted */
58         return (size_t) 0;
59     }
60 
61     if (pwcs && n > 0)
62     {
63         *pwcs = '\0';
64     }
65 
66     /* validation section */
67     _UCRT_VALIDATE_RETURN(ptd, s != nullptr, EINVAL, (size_t) - 1);
68 
69     _locale_t const locale = ptd.get_locale();
70 
71     if (locale->locinfo->_public._locale_lc_codepage == CP_UTF8)
72     {
73         mbstate_t state{};
74         return __mbsrtowcs_utf8(pwcs, &s, n, &state, ptd);
75     }
76 
77     /* if destination string exists, fill it in */
78     if (pwcs)
79     {
80         if (locale->locinfo->locale_name[LC_CTYPE] == nullptr)
81         {
82             /* C locale: easy and fast */
83             while (count < n)
84             {
85                 *pwcs = (wchar_t) ((unsigned char) s[count]);
86                 if (!s[count])
87                 {
88                     return count;
89                 }
90                 count++;
91                 pwcs++;
92             }
93             return count;
94 
95         }
96         else {
97             int bytecnt, charcnt;
98             unsigned char *p;
99 
100             /* Assume that the buffer is large enough */
101             if ((count = __acrt_MultiByteToWideChar(locale->locinfo->_public._locale_lc_codepage,
102                 MB_PRECOMPOSED |
103                 MB_ERR_INVALID_CHARS,
104                 s,
105                 -1,
106                 pwcs,
107                 (int) n)) != 0)
108             {
109                 return count - 1; /* don't count NUL */
110             }
111 
112             if (GetLastError() != ERROR_INSUFFICIENT_BUFFER)
113             {
114                 ptd.get_errno().set(EILSEQ);
115                 *pwcs = '\0';
116                 return (size_t) - 1;
117             }
118 
119             /* User-supplied buffer not large enough. */
120 
121             /* How many bytes are in n characters of the string? */
122             charcnt = (int) n;
123             for (p = (unsigned char *) s; (charcnt-- && *p); p++)
124             {
125                 if (_isleadbyte_fast_internal(*p, locale))
126                 {
127                     if (p[1] == '\0')
128                     {
129                         /*  this is a leadbyte followed by EOS -- a dud MBCS string
130                         We choose not to assert here because this
131                         function is defined to deal with dud strings on
132                         input and return a known value
133                         */
134                         ptd.get_errno().set(EILSEQ);
135                         *pwcs = '\0';
136                         return (size_t) - 1;
137                     }
138                     else
139                     {
140                         p++;
141                     }
142                 }
143             }
144             bytecnt = ((int) ((char *) p - (char *) s));
145 
146             if ((count = __acrt_MultiByteToWideChar(locale->locinfo->_public._locale_lc_codepage,
147                 MB_PRECOMPOSED,
148                 s,
149                 bytecnt,
150                 pwcs,
151                 (int) n)) == 0)
152             {
153                 ptd.get_errno().set(EILSEQ);
154                 *pwcs = '\0';
155                 return (size_t) - 1;
156             }
157 
158             return count; /* no NUL in string */
159         }
160     }
161     else {
162         /* pwcs == nullptr, get size only, s must be NUL-terminated */
163         if (locale->locinfo->locale_name[LC_CTYPE] == nullptr)
164         {
165             return strlen(s);
166         }
167         else if ((count = __acrt_MultiByteToWideChar(locale->locinfo->_public._locale_lc_codepage,
168             MB_PRECOMPOSED | MB_ERR_INVALID_CHARS,
169             s,
170             -1,
171             nullptr,
172             0)) == 0)
173         {
174                 ptd.get_errno().set(EILSEQ);
175                 return (size_t) - 1;
176         }
177         else
178         {
179             return count - 1;
180         }
181     }
182 
183 }
184 
185 extern "C" size_t __cdecl _mbstowcs_l(
186     wchar_t  *pwcs,
187     const char *s,
188     size_t n,
189     _locale_t plocinfo
190     )
191 {
192     /* Call a non-deprecated helper to do the work. */
193     __crt_cached_ptd_host ptd(plocinfo);
194     return _mbstowcs_l_helper(pwcs, s, n, ptd);
195 }
196 
197 extern "C" size_t __cdecl mbstowcs(
198     wchar_t  *pwcs,
199     const char *s,
200     size_t n
201     )
202 {
203     __crt_cached_ptd_host ptd;
204     return _mbstowcs_l_helper(pwcs, s, n, ptd);
205 }
206 
207 /***
208 *errno_t mbstowcs_s() - Convert multibyte char string to wide char string.
209 *
210 *Purpose:
211 *       Convert a multi-byte char string into the equivalent wide char string,
212 *       according to the LC_CTYPE category of the current locale.
213 *       Same as mbstowcs(), but the destination is ensured to be null terminated.
214 *       If there's not enough space, we return EINVAL.
215 *
216 *Entry:
217 *       size_t *pConvertedChars = Number of bytes modified including the terminating nullptr
218 *                                 This pointer can be nullptr.
219 *       wchar_t *pwcs = pointer to destination wide character string buffer
220 *       size_t sizeInWords = size of the destination buffer
221 *       const char *s = pointer to source multibyte character string
222 *       size_t n = maximum number of wide characters to store (not including the terminating nullptr)
223 *
224 *Exit:
225 *       The error code.
226 *
227 *Exceptions:
228 *       Input parameters are validated. Refer to the validation section of the function.
229 *
230 *******************************************************************************/
231 
232 static errno_t __cdecl _mbstowcs_internal(
233     size_t *               pConvertedChars,
234     wchar_t *              pwcs,
235     size_t                 sizeInWords,
236     const char *           s,
237     size_t                 n,
238     __crt_cached_ptd_host& ptd
239     )
240 {
241     size_t retsize;
242     errno_t retvalue = 0;
243 
244     /* validation section */
245     _UCRT_VALIDATE_RETURN_ERRCODE(ptd, (pwcs == nullptr && sizeInWords == 0) || (pwcs != nullptr && sizeInWords > 0), EINVAL);
246 
247     if (pwcs != nullptr)
248     {
249         _RESET_STRING(pwcs, sizeInWords);
250     }
251 
252     if (pConvertedChars != nullptr)
253     {
254         *pConvertedChars = 0;
255     }
256 
257     size_t bufferSize = n > sizeInWords ? sizeInWords : n;
258     /* n must fit into an int for MultiByteToWideChar */
259     _UCRT_VALIDATE_RETURN_ERRCODE(ptd, bufferSize <= INT_MAX, EINVAL);
260 
261     /* Call a non-deprecated helper to do the work. */
262 
263     retsize = _mbstowcs_l_helper(pwcs, s, bufferSize, ptd);
264 
265     if (retsize == (size_t) - 1)
266     {
267         if (pwcs != nullptr)
268         {
269             _RESET_STRING(pwcs, sizeInWords);
270         }
271         return ptd.get_errno().value_or(0);
272     }
273 
274     /* count the null terminator */
275     retsize++;
276 
277     if (pwcs != nullptr)
278     {
279         /* return error if the string does not fit, unless n == _TRUNCATE */
280         if (retsize > sizeInWords)
281         {
282             if (n != _TRUNCATE)
283             {
284                 _RESET_STRING(pwcs, sizeInWords);
285                 _UCRT_VALIDATE_RETURN_ERRCODE(ptd, retsize <= sizeInWords, ERANGE);
286             }
287             retsize = sizeInWords;
288             retvalue = STRUNCATE;
289         }
290 
291         /* ensure the string is null terminated */
292         pwcs[retsize - 1] = '\0';
293     }
294 
295     if (pConvertedChars != nullptr)
296     {
297         *pConvertedChars = retsize;
298     }
299 
300     return retvalue;
301 }
302 
303 extern "C" errno_t __cdecl _mbstowcs_s_l(
304     size_t *     pConvertedChars,
305     wchar_t *    pwcs,
306     size_t       sizeInWords,
307     const char * s,
308     size_t       n,
309     _locale_t    plocinfo
310     )
311 {
312     __crt_cached_ptd_host ptd(plocinfo);
313     return _mbstowcs_internal(pConvertedChars, pwcs, sizeInWords, s, n, ptd);
314 }
315 
316 extern "C" errno_t __cdecl mbstowcs_s(
317     size_t *pConvertedChars,
318     wchar_t  *pwcs,
319     size_t sizeInWords,
320     const char *s,
321     size_t n
322     )
323 {
324     __crt_cached_ptd_host ptd;
325     return _mbstowcs_internal(pConvertedChars, pwcs, sizeInWords, s, n, ptd);
326 }
327