xref: /reactos/sdk/lib/ucrt/convert/wcrtomb.cpp (revision e3e520d1)
1 /***
2 *wcrtomb.cpp - Convert wide character to multibyte character, with locale.
3 *
4 *       Copyright (c) Microsoft Corporation. All rights reserved.
5 *
6 *Purpose:
7 *       Convert a wide character into the equivalent multibyte character.
8 *
9 *******************************************************************************/
10 #include <corecrt_internal_mbstring.h>
11 #include <corecrt_internal_ptd_propagation.h>
12 #include <corecrt_internal_securecrt.h>
13 #include <limits.h>
14 #include <locale.h>
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <string.h>
18 #include <wchar.h>
19 
20 using namespace __crt_mbstring;
21 
22 /***
23 *errno_t _wcrtomb_internal() - Helper function to convert wide character to multibyte character.
24 *
25 *Purpose:
26 *       Convert a wide character into the equivalent multi-byte character,
27 *       according to the specified LC_CTYPE category, or the current locale.
28 *       [ANSI].
29 *
30 *       NOTE:  Currently, the C libraries support the "C" locale only.
31 *              Non-C locale support now available under _INTL switch.
32 *Entry:
33 *       int *return_value      = the number of chars written (-1 in error case)
34 *       char *destination           = pointer to multibyte character
35 *       size_t destination_count  = size of the destinarion buffer
36 *       wchar_t wchar       = source wide character
37 *       mbstate_t *state      = pointer to state (not used)
38 *       _locale_t locale  = locale info
39 *
40 *Exit:
41 *       Returns:
42 *       Value of errno if errors, 0 otherwise. *return_value is set to -1 in error case.
43 *
44 *Exceptions:
45 *
46 *******************************************************************************/
47 
48 _Success_(return == 0)
49 static errno_t __cdecl _wcrtomb_internal(
50                                             int*               const return_value,
51     __out_bcount_z_opt(destination_count)   char*              const destination,
52                                             size_t             const destination_count,
53                                             wchar_t            const wchar,
54                                             mbstate_t*         const state,
55     _Inout_                                 __crt_cached_ptd_host&   ptd
56     )
57 {
58     _ASSERTE(destination != nullptr && destination_count > 0);
59 
60     _locale_t const locale = ptd.get_locale();
61 
62     _ASSERTE(
63         locale->locinfo->_public._locale_mb_cur_max == 1 ||
64         locale->locinfo->_public._locale_mb_cur_max == 2 ||
65         locale->locinfo->_public._locale_lc_codepage == CP_UTF8);
66 
67     if (state)
68     {
69         state->_Wchar = 0;
70     }
71 
72     if (locale->locinfo->_public._locale_lc_codepage == CP_UTF8)
73     {
74         // Unlike c16rtomb. wctomb/wcrtomb have no ability to process a partial code point.
75         // So, we could call c16rtomb and check for a lone surrogate or other error, or for simplicity
76         // We can instead just call c32rtomb and check for any error. I choose the latter.
77         static mbstate_t local_state{};
78         int result = static_cast<int>(__crt_mbstring::__c32rtomb_utf8(destination, static_cast<char32_t>(wchar), (state != nullptr ? state : &local_state), ptd));
79         if (return_value != nullptr)
80         {
81             *return_value = result;
82         }
83         if (result <= 4)
84         {
85             return 0;
86         }
87         else
88         {
89             return ptd.get_errno().value_or(0);
90         }
91     }
92 
93     if (!locale->locinfo->locale_name[LC_CTYPE])
94     {
95         if (wchar > 255) // Validate high byte
96         {
97             if (return_value)
98                 *return_value = -1;
99 
100             return ptd.get_errno().set(EILSEQ);
101         }
102 
103         *destination = static_cast<char>(wchar);
104         if (return_value)
105         {
106             *return_value = 1;
107         }
108 
109         return 0;
110     }
111 
112     BOOL default_used{};
113     int const size = __acrt_WideCharToMultiByte(
114         locale->locinfo->_public._locale_lc_codepage,
115         0,
116         &wchar,
117         1,
118         destination,
119         static_cast<int>(destination_count),
120         nullptr,
121         &default_used);
122 
123     if (size == 0 || default_used)
124     {
125         if (return_value)
126         {
127             *return_value = -1;
128         }
129 
130         return ptd.get_errno().set(EILSEQ);
131     }
132 
133     if (return_value)
134     {
135         *return_value = size;
136     }
137 
138     return 0;
139 }
140 
141 /***
142 *errno_t wcrtomb_s(retValue, destination, destination_count, wchar, state) - translate wchar_t to multibyte, restartably
143 *
144 *Purpose:
145 *
146 *Entry:
147 *
148 *Exit:
149 *
150 *Exceptions:
151 *
152 *******************************************************************************/
153 
154 static errno_t __cdecl wcrtomb_s_internal(
155     size_t*            const return_value,
156     char*              const destination,
157     size_t             const destination_count,
158     wchar_t            const wchar,
159     mbstate_t*         const state,
160     __crt_cached_ptd_host&   ptd
161     )
162 {
163     // Note that we do not force destination_count > 0 in the destination !=
164     // nullptr case because we do not need to add a null terminator, due to
165     // the fact that the destination will receive a character and not a string.
166     _UCRT_VALIDATE_RETURN_ERRCODE(ptd, (destination == nullptr && destination_count == 0) || (destination != nullptr), EINVAL);
167 
168     errno_t e = 0;
169     int     int_return_value = -1;
170     if (destination == nullptr)
171     {
172         char buf[MB_LEN_MAX];
173         e = _wcrtomb_internal(&int_return_value, buf, MB_LEN_MAX, wchar, state, ptd);
174     }
175     else
176     {
177         e = _wcrtomb_internal(&int_return_value, destination, destination_count, wchar, state, ptd);
178     }
179 
180     if (return_value != nullptr)
181     {
182         *return_value = static_cast<size_t>(int_return_value);
183     }
184 
185     return e;
186 }
187 
188 extern "C" errno_t __cdecl wcrtomb_s(
189     size_t*    const return_value,
190     char*      const destination,
191     size_t     const destination_count,
192     wchar_t    const wchar,
193     mbstate_t* const state
194     )
195 {
196     __crt_cached_ptd_host ptd;
197     return wcrtomb_s_internal(return_value, destination, destination_count, wchar, state, ptd);
198 }
199 
200 extern "C" size_t __cdecl wcrtomb(
201     char*      const destination,
202     wchar_t    const wchar,
203     mbstate_t* const state
204     )
205 {
206     size_t return_value = static_cast<size_t>(-1);
207     wcrtomb_s(&return_value, destination, (destination == nullptr ? 0 : MB_LEN_MAX), wchar, state);
208     return return_value;
209 }
210 
211 /***
212 *errno_t wcsrtombs_s(retValue, destination, destination_count, pwcs, n, state) - translate wide char string to multibyte
213 *       string
214 *
215 *Purpose:
216 *
217 *Entry:
218 *
219 *Exit:
220 *
221 *Exceptions:
222 *
223 *******************************************************************************/
224 
225 /* Helper shared by secure and non-secure functions. */
226 
227 static size_t __cdecl _wcsrtombs_internal(
228     _Pre_maybenull_ _Post_z_    char*                   destination,
229     _Inout_ _Deref_prepost_z_   wchar_t const** const   source,
230     _In_                        size_t                  n,
231     _Out_opt_                   mbstate_t*      const   state,
232     _Inout_                     __crt_cached_ptd_host&  ptd
233     ) throw()
234 {
235     /* validation section */
236     _UCRT_VALIDATE_RETURN(ptd, source != nullptr, EINVAL, (size_t)-1);
237 
238     _locale_t const locale = ptd.get_locale();
239 
240     if (locale->locinfo->_public._locale_lc_codepage == CP_UTF8)
241     {
242         return __wcsrtombs_utf8(destination, source, n, state, ptd);
243     }
244 
245     char buf[MB_LEN_MAX];
246     int i = 0;
247     size_t nc = 0;
248     wchar_t const* wcs = *source;
249 
250     if (!destination)
251     {
252         for (; ; nc += i, ++wcs)
253         {
254             /* translate but don't store */
255             _wcrtomb_internal(&i, buf, MB_LEN_MAX, *wcs, state, ptd);
256             if (i <= 0)
257             {
258                 return static_cast<size_t>(-1);
259             }
260             else if (buf[i - 1] == '\0')
261             {
262                 return nc + i - 1;
263             }
264         }
265     }
266 
267     for (; 0 < n; nc += i, ++wcs, destination += i, n -= i)
268     {
269         /* translate and store */
270         char *t = nullptr;
271 
272         if (n < (size_t)locale->locinfo->_public._locale_mb_cur_max)
273         {
274             t = buf;
275         }
276         else
277         {
278             t = destination;
279         }
280 
281         _wcrtomb_internal(&i, t, MB_LEN_MAX, *wcs, state, ptd);
282         if (i <= 0)
283         {
284             /* encountered invalid sequence */
285             nc = (size_t)-1;
286             break;
287         }
288 
289         if (destination == t)
290         {
291             /* do nothing */
292         }
293         else if (n < static_cast<size_t>(i))
294         {
295             break;  // Won't all fit
296         }
297         else
298         {
299             memcpy_s(destination, n, buf, i);
300         }
301 
302         if (destination[i - 1] == '\0')
303         {
304             // Encountered terminating null
305             *source = 0;
306             return nc + i - 1;
307         }
308     }
309 
310     *source = wcs;
311     return nc;
312 }
313 
314 extern "C" size_t __cdecl wcsrtombs(
315     char*           const destination,
316     wchar_t const** const source,
317     size_t          const n,
318     mbstate_t*      const state
319     )
320 {
321     __crt_cached_ptd_host ptd;
322     return _wcsrtombs_internal(destination, source, n, state, ptd);
323 }
324 
325 /***
326 *errno_t wcstombs_s() - Convert wide char string to multibyte char string.
327 *
328 *Purpose:
329 *       Convert a wide char string into the equivalent multibyte char string,
330 *       according to the LC_CTYPE category of the current locale.
331 *
332 *       The destination string is always null terminated.
333 *
334 *Entry:
335 *       size_t *return_value = Number of bytes modified including the terminating nullptr
336 *                           This pointer can be nullptr.
337 *       char *destination = pointer to destination multibyte char string
338 *       size_t destination_count = size of the destination buffer
339 *       const wchar_t *source = pointer to source wide character string
340 *       size_t n = maximum number of bytes to store in s (not including the terminating nullptr)
341 *       mbstate_t *state = pointer to state
342 *
343 *Exit:
344 *       The error code.
345 *
346 *Exceptions:
347 *       Input parameters are validated. Refer to the validation section of the function.
348 *
349 *******************************************************************************/
350 
351 extern "C" errno_t __cdecl wcsrtombs_s(
352     size_t*         const return_value,
353     char*           const destination,
354     size_t          const destination_count,
355     wchar_t const** const source,
356     size_t          const n,
357     mbstate_t*      const state
358     )
359 {
360     __crt_cached_ptd_host ptd;
361 
362     if (return_value != nullptr)
363     {
364         *return_value = static_cast<size_t>(-1);
365     }
366 
367     _UCRT_VALIDATE_RETURN_ERRCODE(
368         ptd,
369         (destination == nullptr && destination_count == 0) ||
370         (destination != nullptr && destination_count >  0),
371     EINVAL);
372 
373     if (destination != nullptr)
374     {
375         _RESET_STRING(destination, destination_count);
376     }
377 
378     _UCRT_VALIDATE_RETURN_ERRCODE(ptd, source != nullptr, EINVAL);
379 
380     size_t retsize = _wcsrtombs_internal(destination, source, (n > destination_count ? destination_count : n), state, ptd);
381     if (retsize == static_cast<size_t>(-1))
382     {
383         if (destination != nullptr)
384         {
385             _RESET_STRING(destination, destination_count);
386         }
387 
388         return ptd.get_errno().value_or(0);
389     }
390 
391     ++retsize; // Account for the null terminator
392 
393     if (destination != nullptr)
394     {
395         // Return error if the string does not fit:
396         if (retsize > destination_count)
397         {
398             _RESET_STRING(destination, destination_count);
399             _UCRT_VALIDATE_RETURN_ERRCODE(ptd, retsize <= destination_count, ERANGE);
400         }
401 
402         // Ensure the string is null terminated:
403         destination[retsize - 1] = '\0';
404     }
405 
406     if (return_value != nullptr)
407     {
408         *return_value = retsize;
409     }
410 
411     return 0;
412 }
413 
414 
415 
416 // Converts a wide character into a one-byte character
417 extern "C" int __cdecl wctob(wint_t const wchar)
418 {
419     __crt_cached_ptd_host ptd;
420 
421     if (wchar == WEOF)
422     {
423         return EOF;
424     }
425 
426     int  return_value = -1;
427     char local_buffer[MB_LEN_MAX];
428 
429     mbstate_t state{};
430     errno_t const e = _wcrtomb_internal(&return_value, local_buffer, MB_LEN_MAX, wchar, &state, ptd);
431     if (e == 0 && return_value == 1)
432     {
433         return local_buffer[0];
434     }
435 
436     return EOF;
437 }
438 
439 size_t __cdecl __crt_mbstring::__wcsrtombs_utf8(char* dst, const wchar_t** src, size_t len, mbstate_t* ps, __crt_cached_ptd_host& ptd)
440 {
441     const wchar_t* current_src = *src;
442     char buf[MB_LEN_MAX];
443 
444     if (dst != nullptr)
445     {
446         char* current_dest = dst;
447 
448         // Wide chars are actually UTF-16, so a code point might take 2 input units (a surrogate pair)
449         // In case of a failure, keep track of where the current code point began, which might be the previous
450         // wchar for a surrogate pair
451         const wchar_t* start_of_code_point = current_src;
452         for (;;)
453         {
454             // If we don't have at least 4 MB_CUR_LEN bytes available in the buffer
455             // the next char isn't guaranteed to fit, so put it into a temp buffer
456             char* temp;
457             if (len < 4)
458             {
459                 temp = buf;
460             }
461             else
462             {
463                 temp = current_dest;
464             }
465             const size_t retval = __c16rtomb_utf8(temp, *current_src, ps, ptd);
466 
467             if (retval == __crt_mbstring::INVALID)
468             {
469                 // Set src to the beginning of the invalid char
470                 // If this was the second half of a surrogate pair, return the beginning of the surrogate pair
471                 *src = start_of_code_point;
472                 return retval;
473             }
474 
475             if (temp == current_dest)
476             {
477                 // We wrote in-place. Nothing to do.
478             }
479             else if (len < retval)
480             {
481                 // Won't fit, so bail out
482                 // If this was the second half of a surrogate pair, make sure we return that location
483                 current_src = start_of_code_point;
484                 break;
485             }
486             else
487             {
488                 // Will fit in remaining buffer, so let's copy it over
489                 memcpy(current_dest, temp, retval);
490             }
491 
492             if (retval > 0 && current_dest[retval - 1] == '\0')
493             {
494                 // Reached null terminator, so break out, but don't count that last terminating byte
495                 current_src = nullptr;
496                 current_dest += retval - 1;
497                 break;
498             }
499 
500             ++current_src;
501             if (retval > 0)
502             {
503                 start_of_code_point = current_src;
504             }
505 
506             len -= retval;
507             current_dest += retval;
508         }
509         *src = current_src;
510         return current_dest - dst;
511     }
512     else
513     {
514         size_t total_count = 0;
515         for (;;)
516         {
517             const size_t retval = __c16rtomb_utf8(buf, *current_src, ps, ptd);
518             if (retval == __crt_mbstring::INVALID)
519             {
520                 return retval;
521             }
522             else if (retval > 0 && buf[retval - 1] == '\0')
523             {
524                 // Hit null terminator. Don't count it in the return value.
525                 total_count += retval - 1;
526                 break;
527             }
528             total_count += retval;
529             ++current_src;
530         }
531         return total_count;
532     }
533 }
534