xref: /reactos/sdk/lib/ucrt/convert/mbtowc.cpp (revision 04e0dc4a)
1 /***
2 *mbtowc.c - Convert multibyte char to wide char.
3 *
4 *       Copyright (c) Microsoft Corporation. All rights reserved.
5 *
6 *Purpose:
7 *       Convert a multibyte character into the equivalent wide character.
8 *
9 *******************************************************************************/
10 #include <corecrt_internal_mbstring.h>
11 #include <corecrt_internal_ptd_propagation.h>
12 #include <locale.h>
13 #include <stdlib.h>
14 #include <wchar.h>
15 
16 using namespace __crt_mbstring;
17 
18 /***
19 *int mbtowc() - Convert multibyte char to wide character.
20 *
21 *Purpose:
22 *       Convert a multi-byte character into the equivalent wide character,
23 *       according to the LC_CTYPE category of the current locale.
24 *       [ANSI].
25 *
26 *       NOTE:  Currently, the C libraries support the "C" locale only.
27 *              Non-C locale support now available under _INTL switch.
28 *Entry:
29 *       wchar_t  *pwc = pointer to destination wide character
30 *       const char *s = pointer to multibyte character
31 *       size_t      n = maximum length of multibyte character to consider
32 *
33 *Exit:
34 *       If s = nullptr, returns 0, indicating we only use state-independent
35 *       character encodings.
36 *       If s != nullptr, returns:  0 (if *s = null char)
37 *                               -1 (if the next n or fewer bytes not valid mbc)
38 *                               number of bytes comprising converted mbc
39 *
40 *Exceptions:
41 *       If errors are encountered, -1 is returned and errno is set to EILSEQ.
42 *
43 *******************************************************************************/
44 
_mbtowc_internal(wchar_t * pwc,const char * s,size_t n,__crt_cached_ptd_host & ptd)45 extern "C" int __cdecl _mbtowc_internal(
46     wchar_t *              pwc,
47     const char *           s,
48     size_t                 n,
49     __crt_cached_ptd_host& ptd
50     )
51 {
52     static mbstate_t internal_state{};
53     if (!s || n == 0)
54     {
55         /* indicate do not have state-dependent encodings,
56         handle zero length string */
57         internal_state = {};
58         return 0;
59     }
60 
61     if (!*s)
62     {
63         /* handle NULL char */
64         if (pwc)
65         {
66             *pwc = 0;
67         }
68         return 0;
69     }
70 
71     const _locale_t locale = ptd.get_locale();
72 
73     if (locale->locinfo->_public._locale_lc_codepage == CP_UTF8)
74     {
75         int result = static_cast<int>(__mbrtowc_utf8(pwc, s, n, &internal_state, ptd));
76         if (result < 0)
77             result = -1;
78         return result;
79     }
80 
81     _ASSERTE(locale->locinfo->_public._locale_mb_cur_max == 1 ||
82              locale->locinfo->_public._locale_mb_cur_max == 2);
83 
84     if (locale->locinfo->locale_name[LC_CTYPE] == nullptr)
85     {
86         if (pwc)
87         {
88             *pwc = (wchar_t) (unsigned char) *s;
89         }
90         return sizeof(char);
91     }
92 
93     if (_isleadbyte_fast_internal((unsigned char) *s, locale))
94     {
95         _ASSERTE(locale->locinfo->_public._locale_lc_codepage != CP_UTF8 && L"UTF-8 isn't supported in this _mbtowc_l function yet!!!");
96 
97         /* multi-byte char */
98         // If this is a lead byte, then the codepage better be a multibyte codepage
99         _ASSERTE(locale->locinfo->_public._locale_mb_cur_max > 1);
100 
101         if ((locale->locinfo->_public._locale_mb_cur_max <= 1) || ((int) n < locale->locinfo->_public._locale_mb_cur_max) ||
102             (__acrt_MultiByteToWideChar(locale->locinfo->_public._locale_lc_codepage,
103             MB_PRECOMPOSED | MB_ERR_INVALID_CHARS,
104             s,
105             locale->locinfo->_public._locale_mb_cur_max,
106             pwc,
107             (pwc) ? 1 : 0) == 0))
108         {
109             /* validate high byte of mbcs char */
110             if ((n < (size_t) locale->locinfo->_public._locale_mb_cur_max) || (!*(s + 1)))
111             {
112                 ptd.get_errno().set(EILSEQ);
113                 return -1;
114             }
115         }
116         return locale->locinfo->_public._locale_mb_cur_max;
117     }
118     else {
119         /* single byte char */
120         if (__acrt_MultiByteToWideChar(locale->locinfo->_public._locale_lc_codepage,
121             MB_PRECOMPOSED | MB_ERR_INVALID_CHARS,
122             s,
123             1,
124             pwc,
125             (pwc) ? 1 : 0) == 0)
126         {
127             ptd.get_errno().set(EILSEQ);
128             return -1;
129         }
130         return sizeof(char);
131     }
132 }
133 
_mbtowc_l(wchar_t * pwc,const char * s,size_t n,_locale_t plocinfo)134 extern "C" int __cdecl _mbtowc_l(
135     wchar_t  *pwc,
136     const char *s,
137     size_t n,
138     _locale_t plocinfo
139     )
140 {
141     __crt_cached_ptd_host ptd(plocinfo);
142     return _mbtowc_internal(pwc, s, n, ptd);
143 }
144 
mbtowc(wchar_t * pwc,const char * s,size_t n)145 extern "C" int __cdecl mbtowc(
146     wchar_t  *pwc,
147     const char *s,
148     size_t n
149     )
150 {
151     __crt_cached_ptd_host ptd;
152     return _mbtowc_internal(pwc, s, n, ptd);
153 }
154