1 /*** 2 *wcrtomb.cpp - Convert wide character to multibyte character, with locale. 3 * 4 * Copyright (c) Microsoft Corporation. All rights reserved. 5 * 6 *Purpose: 7 * Convert a wide character into the equivalent multibyte character. 8 * 9 *******************************************************************************/ 10 #include <corecrt_internal_mbstring.h> 11 #include <corecrt_internal_ptd_propagation.h> 12 #include <corecrt_internal_securecrt.h> 13 #include <limits.h> 14 #include <locale.h> 15 #include <stdio.h> 16 #include <stdlib.h> 17 #include <string.h> 18 #include <wchar.h> 19 20 using namespace __crt_mbstring; 21 22 /*** 23 *errno_t _wcrtomb_internal() - Helper function to convert wide character to multibyte character. 24 * 25 *Purpose: 26 * Convert a wide character into the equivalent multi-byte character, 27 * according to the specified LC_CTYPE category, or the current locale. 28 * [ANSI]. 29 * 30 * NOTE: Currently, the C libraries support the "C" locale only. 31 * Non-C locale support now available under _INTL switch. 32 *Entry: 33 * int *return_value = the number of chars written (-1 in error case) 34 * char *destination = pointer to multibyte character 35 * size_t destination_count = size of the destinarion buffer 36 * wchar_t wchar = source wide character 37 * mbstate_t *state = pointer to state (not used) 38 * _locale_t locale = locale info 39 * 40 *Exit: 41 * Returns: 42 * Value of errno if errors, 0 otherwise. *return_value is set to -1 in error case. 43 * 44 *Exceptions: 45 * 46 *******************************************************************************/ 47 48 _Success_(return == 0) 49 static errno_t __cdecl _wcrtomb_internal( 50 int* const return_value, 51 __out_bcount_z_opt(destination_count) char* const destination, 52 size_t const destination_count, 53 wchar_t const wchar, 54 mbstate_t* const state, 55 _Inout_ __crt_cached_ptd_host& ptd 56 ) 57 { 58 _ASSERTE(destination != nullptr && destination_count > 0); 59 60 _locale_t const locale = ptd.get_locale(); 61 62 _ASSERTE( 63 locale->locinfo->_public._locale_mb_cur_max == 1 || 64 locale->locinfo->_public._locale_mb_cur_max == 2 || 65 locale->locinfo->_public._locale_lc_codepage == CP_UTF8); 66 67 if (state) 68 { 69 state->_Wchar = 0; 70 } 71 72 if (locale->locinfo->_public._locale_lc_codepage == CP_UTF8) 73 { 74 // Unlike c16rtomb. wctomb/wcrtomb have no ability to process a partial code point. 75 // So, we could call c16rtomb and check for a lone surrogate or other error, or for simplicity 76 // We can instead just call c32rtomb and check for any error. I choose the latter. 77 static mbstate_t local_state{}; 78 int result = static_cast<int>(__crt_mbstring::__c32rtomb_utf8(destination, static_cast<char32_t>(wchar), (state != nullptr ? state : &local_state), ptd)); 79 if (return_value != nullptr) 80 { 81 *return_value = result; 82 } 83 if (result <= 4) 84 { 85 return 0; 86 } 87 else 88 { 89 return ptd.get_errno().value_or(0); 90 } 91 } 92 93 if (!locale->locinfo->locale_name[LC_CTYPE]) 94 { 95 if (wchar > 255) // Validate high byte 96 { 97 if (return_value) 98 *return_value = -1; 99 100 return ptd.get_errno().set(EILSEQ); 101 } 102 103 *destination = static_cast<char>(wchar); 104 if (return_value) 105 { 106 *return_value = 1; 107 } 108 109 return 0; 110 } 111 112 BOOL default_used{}; 113 int const size = __acrt_WideCharToMultiByte( 114 locale->locinfo->_public._locale_lc_codepage, 115 0, 116 &wchar, 117 1, 118 destination, 119 static_cast<int>(destination_count), 120 nullptr, 121 &default_used); 122 123 if (size == 0 || default_used) 124 { 125 if (return_value) 126 { 127 *return_value = -1; 128 } 129 130 return ptd.get_errno().set(EILSEQ); 131 } 132 133 if (return_value) 134 { 135 *return_value = size; 136 } 137 138 return 0; 139 } 140 141 /*** 142 *errno_t wcrtomb_s(retValue, destination, destination_count, wchar, state) - translate wchar_t to multibyte, restartably 143 * 144 *Purpose: 145 * 146 *Entry: 147 * 148 *Exit: 149 * 150 *Exceptions: 151 * 152 *******************************************************************************/ 153 154 static errno_t __cdecl wcrtomb_s_internal( 155 size_t* const return_value, 156 char* const destination, 157 size_t const destination_count, 158 wchar_t const wchar, 159 mbstate_t* const state, 160 __crt_cached_ptd_host& ptd 161 ) 162 { 163 // Note that we do not force destination_count > 0 in the destination != 164 // nullptr case because we do not need to add a null terminator, due to 165 // the fact that the destination will receive a character and not a string. 166 _UCRT_VALIDATE_RETURN_ERRCODE(ptd, (destination == nullptr && destination_count == 0) || (destination != nullptr), EINVAL); 167 168 errno_t e = 0; 169 int int_return_value = -1; 170 if (destination == nullptr) 171 { 172 char buf[MB_LEN_MAX]; 173 e = _wcrtomb_internal(&int_return_value, buf, MB_LEN_MAX, wchar, state, ptd); 174 } 175 else 176 { 177 e = _wcrtomb_internal(&int_return_value, destination, destination_count, wchar, state, ptd); 178 } 179 180 if (return_value != nullptr) 181 { 182 *return_value = static_cast<size_t>(int_return_value); 183 } 184 185 return e; 186 } 187 188 extern "C" errno_t __cdecl wcrtomb_s( 189 size_t* const return_value, 190 char* const destination, 191 size_t const destination_count, 192 wchar_t const wchar, 193 mbstate_t* const state 194 ) 195 { 196 __crt_cached_ptd_host ptd; 197 return wcrtomb_s_internal(return_value, destination, destination_count, wchar, state, ptd); 198 } 199 200 extern "C" size_t __cdecl wcrtomb( 201 char* const destination, 202 wchar_t const wchar, 203 mbstate_t* const state 204 ) 205 { 206 size_t return_value = static_cast<size_t>(-1); 207 wcrtomb_s(&return_value, destination, (destination == nullptr ? 0 : MB_LEN_MAX), wchar, state); 208 return return_value; 209 } 210 211 /*** 212 *errno_t wcsrtombs_s(retValue, destination, destination_count, pwcs, n, state) - translate wide char string to multibyte 213 * string 214 * 215 *Purpose: 216 * 217 *Entry: 218 * 219 *Exit: 220 * 221 *Exceptions: 222 * 223 *******************************************************************************/ 224 225 /* Helper shared by secure and non-secure functions. */ 226 227 static size_t __cdecl _wcsrtombs_internal( 228 _Pre_maybenull_ _Post_z_ char* destination, 229 _Inout_ _Deref_prepost_z_ wchar_t const** const source, 230 _In_ size_t n, 231 _Out_opt_ mbstate_t* const state, 232 _Inout_ __crt_cached_ptd_host& ptd 233 ) throw() 234 { 235 /* validation section */ 236 _UCRT_VALIDATE_RETURN(ptd, source != nullptr, EINVAL, (size_t)-1); 237 238 _locale_t const locale = ptd.get_locale(); 239 240 if (locale->locinfo->_public._locale_lc_codepage == CP_UTF8) 241 { 242 return __wcsrtombs_utf8(destination, source, n, state, ptd); 243 } 244 245 char buf[MB_LEN_MAX]; 246 int i = 0; 247 size_t nc = 0; 248 wchar_t const* wcs = *source; 249 250 if (!destination) 251 { 252 for (; ; nc += i, ++wcs) 253 { 254 /* translate but don't store */ 255 _wcrtomb_internal(&i, buf, MB_LEN_MAX, *wcs, state, ptd); 256 if (i <= 0) 257 { 258 return static_cast<size_t>(-1); 259 } 260 else if (buf[i - 1] == '\0') 261 { 262 return nc + i - 1; 263 } 264 } 265 } 266 267 for (; 0 < n; nc += i, ++wcs, destination += i, n -= i) 268 { 269 /* translate and store */ 270 char *t = nullptr; 271 272 if (n < (size_t)locale->locinfo->_public._locale_mb_cur_max) 273 { 274 t = buf; 275 } 276 else 277 { 278 t = destination; 279 } 280 281 _wcrtomb_internal(&i, t, MB_LEN_MAX, *wcs, state, ptd); 282 if (i <= 0) 283 { 284 /* encountered invalid sequence */ 285 nc = (size_t)-1; 286 break; 287 } 288 289 if (destination == t) 290 { 291 /* do nothing */ 292 } 293 else if (n < static_cast<size_t>(i)) 294 { 295 break; // Won't all fit 296 } 297 else 298 { 299 memcpy_s(destination, n, buf, i); 300 } 301 302 if (destination[i - 1] == '\0') 303 { 304 // Encountered terminating null 305 *source = 0; 306 return nc + i - 1; 307 } 308 } 309 310 *source = wcs; 311 return nc; 312 } 313 314 extern "C" size_t __cdecl wcsrtombs( 315 char* const destination, 316 wchar_t const** const source, 317 size_t const n, 318 mbstate_t* const state 319 ) 320 { 321 __crt_cached_ptd_host ptd; 322 return _wcsrtombs_internal(destination, source, n, state, ptd); 323 } 324 325 /*** 326 *errno_t wcstombs_s() - Convert wide char string to multibyte char string. 327 * 328 *Purpose: 329 * Convert a wide char string into the equivalent multibyte char string, 330 * according to the LC_CTYPE category of the current locale. 331 * 332 * The destination string is always null terminated. 333 * 334 *Entry: 335 * size_t *return_value = Number of bytes modified including the terminating nullptr 336 * This pointer can be nullptr. 337 * char *destination = pointer to destination multibyte char string 338 * size_t destination_count = size of the destination buffer 339 * const wchar_t *source = pointer to source wide character string 340 * size_t n = maximum number of bytes to store in s (not including the terminating nullptr) 341 * mbstate_t *state = pointer to state 342 * 343 *Exit: 344 * The error code. 345 * 346 *Exceptions: 347 * Input parameters are validated. Refer to the validation section of the function. 348 * 349 *******************************************************************************/ 350 351 extern "C" errno_t __cdecl wcsrtombs_s( 352 size_t* const return_value, 353 char* const destination, 354 size_t const destination_count, 355 wchar_t const** const source, 356 size_t const n, 357 mbstate_t* const state 358 ) 359 { 360 __crt_cached_ptd_host ptd; 361 362 if (return_value != nullptr) 363 { 364 *return_value = static_cast<size_t>(-1); 365 } 366 367 _UCRT_VALIDATE_RETURN_ERRCODE( 368 ptd, 369 (destination == nullptr && destination_count == 0) || 370 (destination != nullptr && destination_count > 0), 371 EINVAL); 372 373 if (destination != nullptr) 374 { 375 _RESET_STRING(destination, destination_count); 376 } 377 378 _UCRT_VALIDATE_RETURN_ERRCODE(ptd, source != nullptr, EINVAL); 379 380 size_t retsize = _wcsrtombs_internal(destination, source, (n > destination_count ? destination_count : n), state, ptd); 381 if (retsize == static_cast<size_t>(-1)) 382 { 383 if (destination != nullptr) 384 { 385 _RESET_STRING(destination, destination_count); 386 } 387 388 return ptd.get_errno().value_or(0); 389 } 390 391 ++retsize; // Account for the null terminator 392 393 if (destination != nullptr) 394 { 395 // Return error if the string does not fit: 396 if (retsize > destination_count) 397 { 398 _RESET_STRING(destination, destination_count); 399 _UCRT_VALIDATE_RETURN_ERRCODE(ptd, retsize <= destination_count, ERANGE); 400 } 401 402 // Ensure the string is null terminated: 403 destination[retsize - 1] = '\0'; 404 } 405 406 if (return_value != nullptr) 407 { 408 *return_value = retsize; 409 } 410 411 return 0; 412 } 413 414 415 416 // Converts a wide character into a one-byte character 417 extern "C" int __cdecl wctob(wint_t const wchar) 418 { 419 __crt_cached_ptd_host ptd; 420 421 if (wchar == WEOF) 422 { 423 return EOF; 424 } 425 426 int return_value = -1; 427 char local_buffer[MB_LEN_MAX]; 428 429 mbstate_t state{}; 430 errno_t const e = _wcrtomb_internal(&return_value, local_buffer, MB_LEN_MAX, wchar, &state, ptd); 431 if (e == 0 && return_value == 1) 432 { 433 return local_buffer[0]; 434 } 435 436 return EOF; 437 } 438 439 size_t __cdecl __crt_mbstring::__wcsrtombs_utf8(char* dst, const wchar_t** src, size_t len, mbstate_t* ps, __crt_cached_ptd_host& ptd) 440 { 441 const wchar_t* current_src = *src; 442 char buf[MB_LEN_MAX]; 443 444 if (dst != nullptr) 445 { 446 char* current_dest = dst; 447 448 // Wide chars are actually UTF-16, so a code point might take 2 input units (a surrogate pair) 449 // In case of a failure, keep track of where the current code point began, which might be the previous 450 // wchar for a surrogate pair 451 const wchar_t* start_of_code_point = current_src; 452 for (;;) 453 { 454 // If we don't have at least 4 MB_CUR_LEN bytes available in the buffer 455 // the next char isn't guaranteed to fit, so put it into a temp buffer 456 char* temp; 457 if (len < 4) 458 { 459 temp = buf; 460 } 461 else 462 { 463 temp = current_dest; 464 } 465 const size_t retval = __c16rtomb_utf8(temp, *current_src, ps, ptd); 466 467 if (retval == __crt_mbstring::INVALID) 468 { 469 // Set src to the beginning of the invalid char 470 // If this was the second half of a surrogate pair, return the beginning of the surrogate pair 471 *src = start_of_code_point; 472 return retval; 473 } 474 475 if (temp == current_dest) 476 { 477 // We wrote in-place. Nothing to do. 478 } 479 else if (len < retval) 480 { 481 // Won't fit, so bail out 482 // If this was the second half of a surrogate pair, make sure we return that location 483 current_src = start_of_code_point; 484 break; 485 } 486 else 487 { 488 // Will fit in remaining buffer, so let's copy it over 489 memcpy(current_dest, temp, retval); 490 } 491 492 if (retval > 0 && current_dest[retval - 1] == '\0') 493 { 494 // Reached null terminator, so break out, but don't count that last terminating byte 495 current_src = nullptr; 496 current_dest += retval - 1; 497 break; 498 } 499 500 ++current_src; 501 if (retval > 0) 502 { 503 start_of_code_point = current_src; 504 } 505 506 len -= retval; 507 current_dest += retval; 508 } 509 *src = current_src; 510 return current_dest - dst; 511 } 512 else 513 { 514 size_t total_count = 0; 515 for (;;) 516 { 517 const size_t retval = __c16rtomb_utf8(buf, *current_src, ps, ptd); 518 if (retval == __crt_mbstring::INVALID) 519 { 520 return retval; 521 } 522 else if (retval > 0 && buf[retval - 1] == '\0') 523 { 524 // Hit null terminator. Don't count it in the return value. 525 total_count += retval - 1; 526 break; 527 } 528 total_count += retval; 529 ++current_src; 530 } 531 return total_count; 532 } 533 } 534