xref: /reactos/sdk/lib/ucrt/convert/c32rtomb.cpp (revision 04e0dc4a)
1 //
2 // c32rtomb.cpp
3 //
4 //      Copyright (c) Microsoft Corporation. All rights reserved.
5 //
6 
7 #include <corecrt_internal_mbstring.h>
8 #include <corecrt_internal_ptd_propagation.h>
9 #include <stdint.h>
10 #include <uchar.h>
11 
12 using namespace __crt_mbstring;
13 
c32rtomb(char * s,char32_t c32,mbstate_t * ps)14 extern "C" size_t __cdecl c32rtomb(char* s, char32_t c32, mbstate_t* ps)
15 {
16     // TODO: Bug 13307590 says this is always assuming UTF-8.
17     __crt_cached_ptd_host ptd;
18     return __c32rtomb_utf8(s, c32, ps, ptd);
19 }
20 
__c32rtomb_utf8(char * s,char32_t c32,mbstate_t * ps,__crt_cached_ptd_host & ptd)21 size_t __cdecl __crt_mbstring::__c32rtomb_utf8(char* s, char32_t c32, mbstate_t* ps, __crt_cached_ptd_host& ptd)
22 {
23     if (!s)
24     {
25         // Equivalent to c32rtomb(buf, U'\0', ps) for some internal buffer buf
26         *ps = {};
27         return 1;
28     }
29 
30     if (c32 == U'\0')
31     {
32         *s = '\0';
33         *ps = {};
34         return 1;
35     }
36 
37     // Fast path for ASCII
38     if ((c32 & ~0x7f) == 0)
39     {
40         *s = static_cast<char>(c32);
41         return 1;
42     }
43 
44     // Figure out how many trail bytes we need
45     size_t trail_bytes;
46     uint8_t lead_byte;
47     if ((c32 & ~0x7ff) == 0)
48     {
49         trail_bytes = 1;
50         lead_byte = 0xc0;
51     }
52     else if ((c32 & ~0xffff) == 0)
53     {
54         // high/low surrogates are only valid in UTF-16 encoded data
55         if (0xd800 <= c32 && c32 <= 0xdfff)
56         {
57             return return_illegal_sequence(ps, ptd);
58         }
59         trail_bytes = 2;
60         lead_byte = 0xe0;
61     }
62     else if ((c32 & ~0x001fffff) == 0)
63     {
64         // Unicode's max code point is 0x10ffff
65         if (0x10ffff < c32)
66         {
67             return return_illegal_sequence(ps, ptd);
68         }
69         trail_bytes = 3;
70         lead_byte = 0xf0;
71     }
72     else
73     {
74         return return_illegal_sequence(ps, ptd);
75     }
76     _ASSERTE(1 <= trail_bytes && trail_bytes <= 3);
77 
78     // Put six bits into each of the trail bytes
79     // Lowest bits are in the last UTF-8 byte.
80     // Filling back to front.
81     for (size_t i = trail_bytes; i > 0; --i)
82     {
83         s[i] = (c32 & 0x3f) | 0x80;
84         c32 >>= 6;
85     }
86 
87     // The first byte needs the upper (trail_bytes + 1) bits to store the length
88     // And the lower (7 - trail_bytes) to store the upper bits of the code point
89     _ASSERTE(c32 < (1u << (7 - trail_bytes)));
90     s[0] = static_cast<uint8_t>(c32) | lead_byte;
91 
92     return reset_and_return(trail_bytes + 1, ps);
93 }
94