1 /***
2 *wcrtomb.cpp - Convert wide character to multibyte character, with locale.
3 *
4 * Copyright (c) Microsoft Corporation. All rights reserved.
5 *
6 *Purpose:
7 * Convert a wide character into the equivalent multibyte character.
8 *
9 *******************************************************************************/
10 #include <corecrt_internal_mbstring.h>
11 #include <corecrt_internal_ptd_propagation.h>
12 #include <corecrt_internal_securecrt.h>
13 #include <limits.h>
14 #include <locale.h>
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <string.h>
18 #include <wchar.h>
19
20 using namespace __crt_mbstring;
21
22 /***
23 *errno_t _wcrtomb_internal() - Helper function to convert wide character to multibyte character.
24 *
25 *Purpose:
26 * Convert a wide character into the equivalent multi-byte character,
27 * according to the specified LC_CTYPE category, or the current locale.
28 * [ANSI].
29 *
30 * NOTE: Currently, the C libraries support the "C" locale only.
31 * Non-C locale support now available under _INTL switch.
32 *Entry:
33 * int *return_value = the number of chars written (-1 in error case)
34 * char *destination = pointer to multibyte character
35 * size_t destination_count = size of the destinarion buffer
36 * wchar_t wchar = source wide character
37 * mbstate_t *state = pointer to state (not used)
38 * _locale_t locale = locale info
39 *
40 *Exit:
41 * Returns:
42 * Value of errno if errors, 0 otherwise. *return_value is set to -1 in error case.
43 *
44 *Exceptions:
45 *
46 *******************************************************************************/
47
48 _Success_(return == 0)
_wcrtomb_internal(int * const return_value,__out_bcount_z_opt (destination_count)char * const destination,size_t const destination_count,wchar_t const wchar,mbstate_t * const state,_Inout_ __crt_cached_ptd_host & ptd)49 static errno_t __cdecl _wcrtomb_internal(
50 int* const return_value,
51 __out_bcount_z_opt(destination_count) char* const destination,
52 size_t const destination_count,
53 wchar_t const wchar,
54 mbstate_t* const state,
55 _Inout_ __crt_cached_ptd_host& ptd
56 )
57 {
58 _ASSERTE(destination != nullptr && destination_count > 0);
59
60 _locale_t const locale = ptd.get_locale();
61
62 _ASSERTE(
63 locale->locinfo->_public._locale_mb_cur_max == 1 ||
64 locale->locinfo->_public._locale_mb_cur_max == 2 ||
65 locale->locinfo->_public._locale_lc_codepage == CP_UTF8);
66
67 if (state)
68 {
69 state->_Wchar = 0;
70 }
71
72 if (locale->locinfo->_public._locale_lc_codepage == CP_UTF8)
73 {
74 // Unlike c16rtomb. wctomb/wcrtomb have no ability to process a partial code point.
75 // So, we could call c16rtomb and check for a lone surrogate or other error, or for simplicity
76 // We can instead just call c32rtomb and check for any error. I choose the latter.
77 static mbstate_t local_state{};
78 int result = static_cast<int>(__crt_mbstring::__c32rtomb_utf8(destination, static_cast<char32_t>(wchar), (state != nullptr ? state : &local_state), ptd));
79 if (return_value != nullptr)
80 {
81 *return_value = result;
82 }
83 if (result <= 4)
84 {
85 return 0;
86 }
87 else
88 {
89 return ptd.get_errno().value_or(0);
90 }
91 }
92
93 if (!locale->locinfo->locale_name[LC_CTYPE])
94 {
95 if (wchar > 255) // Validate high byte
96 {
97 if (return_value)
98 *return_value = -1;
99
100 return ptd.get_errno().set(EILSEQ);
101 }
102
103 *destination = static_cast<char>(wchar);
104 if (return_value)
105 {
106 *return_value = 1;
107 }
108
109 return 0;
110 }
111
112 BOOL default_used{};
113 int const size = __acrt_WideCharToMultiByte(
114 locale->locinfo->_public._locale_lc_codepage,
115 0,
116 &wchar,
117 1,
118 destination,
119 static_cast<int>(destination_count),
120 nullptr,
121 &default_used);
122
123 if (size == 0 || default_used)
124 {
125 if (return_value)
126 {
127 *return_value = -1;
128 }
129
130 return ptd.get_errno().set(EILSEQ);
131 }
132
133 if (return_value)
134 {
135 *return_value = size;
136 }
137
138 return 0;
139 }
140
141 /***
142 *errno_t wcrtomb_s(retValue, destination, destination_count, wchar, state) - translate wchar_t to multibyte, restartably
143 *
144 *Purpose:
145 *
146 *Entry:
147 *
148 *Exit:
149 *
150 *Exceptions:
151 *
152 *******************************************************************************/
153
wcrtomb_s_internal(size_t * const return_value,char * const destination,size_t const destination_count,wchar_t const wchar,mbstate_t * const state,__crt_cached_ptd_host & ptd)154 static errno_t __cdecl wcrtomb_s_internal(
155 size_t* const return_value,
156 char* const destination,
157 size_t const destination_count,
158 wchar_t const wchar,
159 mbstate_t* const state,
160 __crt_cached_ptd_host& ptd
161 )
162 {
163 // Note that we do not force destination_count > 0 in the destination !=
164 // nullptr case because we do not need to add a null terminator, due to
165 // the fact that the destination will receive a character and not a string.
166 _UCRT_VALIDATE_RETURN_ERRCODE(ptd, (destination == nullptr && destination_count == 0) || (destination != nullptr), EINVAL);
167
168 errno_t e = 0;
169 int int_return_value = -1;
170 if (destination == nullptr)
171 {
172 char buf[MB_LEN_MAX];
173 e = _wcrtomb_internal(&int_return_value, buf, MB_LEN_MAX, wchar, state, ptd);
174 }
175 else
176 {
177 e = _wcrtomb_internal(&int_return_value, destination, destination_count, wchar, state, ptd);
178 }
179
180 if (return_value != nullptr)
181 {
182 *return_value = static_cast<size_t>(int_return_value);
183 }
184
185 return e;
186 }
187
wcrtomb_s(size_t * const return_value,char * const destination,size_t const destination_count,wchar_t const wchar,mbstate_t * const state)188 extern "C" errno_t __cdecl wcrtomb_s(
189 size_t* const return_value,
190 char* const destination,
191 size_t const destination_count,
192 wchar_t const wchar,
193 mbstate_t* const state
194 )
195 {
196 __crt_cached_ptd_host ptd;
197 return wcrtomb_s_internal(return_value, destination, destination_count, wchar, state, ptd);
198 }
199
wcrtomb(char * const destination,wchar_t const wchar,mbstate_t * const state)200 extern "C" size_t __cdecl wcrtomb(
201 char* const destination,
202 wchar_t const wchar,
203 mbstate_t* const state
204 )
205 {
206 size_t return_value = static_cast<size_t>(-1);
207 wcrtomb_s(&return_value, destination, (destination == nullptr ? 0 : MB_LEN_MAX), wchar, state);
208 return return_value;
209 }
210
211 /***
212 *errno_t wcsrtombs_s(retValue, destination, destination_count, pwcs, n, state) - translate wide char string to multibyte
213 * string
214 *
215 *Purpose:
216 *
217 *Entry:
218 *
219 *Exit:
220 *
221 *Exceptions:
222 *
223 *******************************************************************************/
224
225 /* Helper shared by secure and non-secure functions. */
226
_wcsrtombs_internal(_Pre_maybenull_ _Post_z_ char * destination,_Inout_ _Deref_prepost_z_ wchar_t const ** const source,_In_ size_t n,_Out_opt_ mbstate_t * const state,_Inout_ __crt_cached_ptd_host & ptd)227 static size_t __cdecl _wcsrtombs_internal(
228 _Pre_maybenull_ _Post_z_ char* destination,
229 _Inout_ _Deref_prepost_z_ wchar_t const** const source,
230 _In_ size_t n,
231 _Out_opt_ mbstate_t* const state,
232 _Inout_ __crt_cached_ptd_host& ptd
233 ) throw()
234 {
235 /* validation section */
236 _UCRT_VALIDATE_RETURN(ptd, source != nullptr, EINVAL, (size_t)-1);
237
238 _locale_t const locale = ptd.get_locale();
239
240 if (locale->locinfo->_public._locale_lc_codepage == CP_UTF8)
241 {
242 return __wcsrtombs_utf8(destination, source, n, state, ptd);
243 }
244
245 char buf[MB_LEN_MAX];
246 int i = 0;
247 size_t nc = 0;
248 wchar_t const* wcs = *source;
249
250 if (!destination)
251 {
252 for (; ; nc += i, ++wcs)
253 {
254 /* translate but don't store */
255 _wcrtomb_internal(&i, buf, MB_LEN_MAX, *wcs, state, ptd);
256 if (i <= 0)
257 {
258 return static_cast<size_t>(-1);
259 }
260 else if (buf[i - 1] == '\0')
261 {
262 return nc + i - 1;
263 }
264 }
265 }
266
267 for (; 0 < n; nc += i, ++wcs, destination += i, n -= i)
268 {
269 /* translate and store */
270 char *t = nullptr;
271
272 if (n < (size_t)locale->locinfo->_public._locale_mb_cur_max)
273 {
274 t = buf;
275 }
276 else
277 {
278 t = destination;
279 }
280
281 _wcrtomb_internal(&i, t, MB_LEN_MAX, *wcs, state, ptd);
282 if (i <= 0)
283 {
284 /* encountered invalid sequence */
285 nc = (size_t)-1;
286 break;
287 }
288
289 if (destination == t)
290 {
291 /* do nothing */
292 }
293 else if (n < static_cast<size_t>(i))
294 {
295 break; // Won't all fit
296 }
297 else
298 {
299 memcpy_s(destination, n, buf, i);
300 }
301
302 if (destination[i - 1] == '\0')
303 {
304 // Encountered terminating null
305 *source = 0;
306 return nc + i - 1;
307 }
308 }
309
310 *source = wcs;
311 return nc;
312 }
313
wcsrtombs(char * const destination,wchar_t const ** const source,size_t const n,mbstate_t * const state)314 extern "C" size_t __cdecl wcsrtombs(
315 char* const destination,
316 wchar_t const** const source,
317 size_t const n,
318 mbstate_t* const state
319 )
320 {
321 __crt_cached_ptd_host ptd;
322 return _wcsrtombs_internal(destination, source, n, state, ptd);
323 }
324
325 /***
326 *errno_t wcstombs_s() - Convert wide char string to multibyte char string.
327 *
328 *Purpose:
329 * Convert a wide char string into the equivalent multibyte char string,
330 * according to the LC_CTYPE category of the current locale.
331 *
332 * The destination string is always null terminated.
333 *
334 *Entry:
335 * size_t *return_value = Number of bytes modified including the terminating nullptr
336 * This pointer can be nullptr.
337 * char *destination = pointer to destination multibyte char string
338 * size_t destination_count = size of the destination buffer
339 * const wchar_t *source = pointer to source wide character string
340 * size_t n = maximum number of bytes to store in s (not including the terminating nullptr)
341 * mbstate_t *state = pointer to state
342 *
343 *Exit:
344 * The error code.
345 *
346 *Exceptions:
347 * Input parameters are validated. Refer to the validation section of the function.
348 *
349 *******************************************************************************/
350
wcsrtombs_s(size_t * const return_value,char * const destination,size_t const destination_count,wchar_t const ** const source,size_t const n,mbstate_t * const state)351 extern "C" errno_t __cdecl wcsrtombs_s(
352 size_t* const return_value,
353 char* const destination,
354 size_t const destination_count,
355 wchar_t const** const source,
356 size_t const n,
357 mbstate_t* const state
358 )
359 {
360 __crt_cached_ptd_host ptd;
361
362 if (return_value != nullptr)
363 {
364 *return_value = static_cast<size_t>(-1);
365 }
366
367 _UCRT_VALIDATE_RETURN_ERRCODE(
368 ptd,
369 (destination == nullptr && destination_count == 0) ||
370 (destination != nullptr && destination_count > 0),
371 EINVAL);
372
373 if (destination != nullptr)
374 {
375 _RESET_STRING(destination, destination_count);
376 }
377
378 _UCRT_VALIDATE_RETURN_ERRCODE(ptd, source != nullptr, EINVAL);
379
380 size_t retsize = _wcsrtombs_internal(destination, source, (n > destination_count ? destination_count : n), state, ptd);
381 if (retsize == static_cast<size_t>(-1))
382 {
383 if (destination != nullptr)
384 {
385 _RESET_STRING(destination, destination_count);
386 }
387
388 return ptd.get_errno().value_or(0);
389 }
390
391 ++retsize; // Account for the null terminator
392
393 if (destination != nullptr)
394 {
395 // Return error if the string does not fit:
396 if (retsize > destination_count)
397 {
398 _RESET_STRING(destination, destination_count);
399 _UCRT_VALIDATE_RETURN_ERRCODE(ptd, retsize <= destination_count, ERANGE);
400 }
401
402 // Ensure the string is null terminated:
403 destination[retsize - 1] = '\0';
404 }
405
406 if (return_value != nullptr)
407 {
408 *return_value = retsize;
409 }
410
411 return 0;
412 }
413
414
415
416 // Converts a wide character into a one-byte character
wctob(wint_t const wchar)417 extern "C" int __cdecl wctob(wint_t const wchar)
418 {
419 __crt_cached_ptd_host ptd;
420
421 if (wchar == WEOF)
422 {
423 return EOF;
424 }
425
426 int return_value = -1;
427 char local_buffer[MB_LEN_MAX];
428
429 mbstate_t state{};
430 errno_t const e = _wcrtomb_internal(&return_value, local_buffer, MB_LEN_MAX, wchar, &state, ptd);
431 if (e == 0 && return_value == 1)
432 {
433 return local_buffer[0];
434 }
435
436 return EOF;
437 }
438
__wcsrtombs_utf8(char * dst,const wchar_t ** src,size_t len,mbstate_t * ps,__crt_cached_ptd_host & ptd)439 size_t __cdecl __crt_mbstring::__wcsrtombs_utf8(char* dst, const wchar_t** src, size_t len, mbstate_t* ps, __crt_cached_ptd_host& ptd)
440 {
441 const wchar_t* current_src = *src;
442 char buf[MB_LEN_MAX];
443
444 if (dst != nullptr)
445 {
446 char* current_dest = dst;
447
448 // Wide chars are actually UTF-16, so a code point might take 2 input units (a surrogate pair)
449 // In case of a failure, keep track of where the current code point began, which might be the previous
450 // wchar for a surrogate pair
451 const wchar_t* start_of_code_point = current_src;
452 for (;;)
453 {
454 // If we don't have at least 4 MB_CUR_LEN bytes available in the buffer
455 // the next char isn't guaranteed to fit, so put it into a temp buffer
456 char* temp;
457 if (len < 4)
458 {
459 temp = buf;
460 }
461 else
462 {
463 temp = current_dest;
464 }
465 const size_t retval = __c16rtomb_utf8(temp, *current_src, ps, ptd);
466
467 if (retval == __crt_mbstring::INVALID)
468 {
469 // Set src to the beginning of the invalid char
470 // If this was the second half of a surrogate pair, return the beginning of the surrogate pair
471 *src = start_of_code_point;
472 return retval;
473 }
474
475 if (temp == current_dest)
476 {
477 // We wrote in-place. Nothing to do.
478 }
479 else if (len < retval)
480 {
481 // Won't fit, so bail out
482 // If this was the second half of a surrogate pair, make sure we return that location
483 current_src = start_of_code_point;
484 break;
485 }
486 else
487 {
488 // Will fit in remaining buffer, so let's copy it over
489 memcpy(current_dest, temp, retval);
490 }
491
492 if (retval > 0 && current_dest[retval - 1] == '\0')
493 {
494 // Reached null terminator, so break out, but don't count that last terminating byte
495 current_src = nullptr;
496 current_dest += retval - 1;
497 break;
498 }
499
500 ++current_src;
501 if (retval > 0)
502 {
503 start_of_code_point = current_src;
504 }
505
506 len -= retval;
507 current_dest += retval;
508 }
509 *src = current_src;
510 return current_dest - dst;
511 }
512 else
513 {
514 size_t total_count = 0;
515 for (;;)
516 {
517 const size_t retval = __c16rtomb_utf8(buf, *current_src, ps, ptd);
518 if (retval == __crt_mbstring::INVALID)
519 {
520 return retval;
521 }
522 else if (retval > 0 && buf[retval - 1] == '\0')
523 {
524 // Hit null terminator. Don't count it in the return value.
525 total_count += retval - 1;
526 break;
527 }
528 total_count += retval;
529 ++current_src;
530 }
531 return total_count;
532 }
533 }
534