1 //
2 // mbrtoc16.cpp
3 //
4 // Copyright (c) Microsoft Corporation. All rights reserved.
5 //
6
7 #include <corecrt_internal_mbstring.h>
8 #include <corecrt_internal_ptd_propagation.h>
9 #include <errno.h>
10 #include <uchar.h>
11 #include <wchar.h>
12
13 using namespace __crt_mbstring;
14
15 namespace
16 {
begin_surrogate_state(char16_t * pc16,char32_t c32,size_t retval,mbstate_t * ps)17 inline size_t begin_surrogate_state(char16_t* pc16, char32_t c32, size_t retval, mbstate_t* ps)
18 {
19 ps->_Wchar = c32;
20 ps->_State = static_cast<decltype(ps->_State)>(-1);
21 if (pc16)
22 {
23 *pc16 = static_cast<char16_t>((((c32 - 0x10000) & 0xfffff) >> 10) | 0xd800);
24 }
25 return retval;
26 }
27
end_surrogate_state(char16_t * pc16,mbstate_t * ps)28 inline size_t end_surrogate_state(char16_t* pc16, mbstate_t* ps)
29 {
30 if (pc16)
31 {
32 *pc16 = ((ps->_Wchar - 0x10000) & 0x03ff) | 0xdc00;
33 }
34 return reset_and_return(static_cast<size_t>(-3), ps);
35 }
36
is_surrogate_state(const mbstate_t * ps)37 inline bool is_surrogate_state(const mbstate_t* ps)
38 {
39 return ps->_State == static_cast<decltype(ps->_State)>(-1);
40 }
41 }
42
mbrtoc16(char16_t * pc16,const char * s,size_t n,mbstate_t * ps)43 extern "C" size_t __cdecl mbrtoc16(char16_t* pc16, const char* s, size_t n, mbstate_t* ps)
44 {
45 // TODO: Bug 13307590 says this is always assuming UTF-8.
46 __crt_cached_ptd_host ptd;
47 return __mbrtoc16_utf8(pc16, s, n, ps, ptd);
48 }
49
__mbrtoc16_utf8(char16_t * pc16,const char * s,size_t n,mbstate_t * ps,__crt_cached_ptd_host & ptd)50 size_t __cdecl __crt_mbstring::__mbrtoc16_utf8(char16_t* pc16, const char* s, size_t n, mbstate_t* ps, __crt_cached_ptd_host& ptd)
51 {
52 static mbstate_t internal_pst{};
53 if (ps == nullptr)
54 {
55 ps = &internal_pst;
56 }
57
58 if (is_surrogate_state(ps))
59 {
60 return end_surrogate_state(pc16, ps);
61 }
62
63 char32_t c32;
64 const size_t retval = __mbrtoc32_utf8(&c32, s, n, ps, ptd);
65 if (!s || retval == INVALID || retval == INCOMPLETE)
66 {
67 return retval;
68 }
69 else if (c32 > 0x10ffff)
70 {
71 // Input is out of range for UTF-16
72 return return_illegal_sequence(ps, ptd);
73 }
74
75 // Got a valid character
76 if (c32 <= 0xffff)
77 {
78 if (pc16)
79 {
80 *pc16 = static_cast<char16_t>(c32);
81 }
82 return reset_and_return(retval, ps);
83 }
84 else
85 {
86 return begin_surrogate_state(pc16, c32, retval, ps);
87 }
88 }
89