1 //
2 // corecrt_internal_mbstring.h
3 //
4 //      Copyright (c) Microsoft Corporation. All rights reserved.
5 //
6 // This internal header defines internal utilities for working with the multibyte
7 // character and string library.
8 //
9 #pragma once
10 
11 #include <corecrt_internal.h>
12 #include <mbctype.h>
13 #include <mbstring.h>
14 #include <uchar.h>
15 
16 _CRT_BEGIN_C_HEADER
17 
18 
19 
20  // Multibyte full-width-latin upper/lower info
21 #define NUM_ULINFO 6
22 
23 /* internal use macros since tolower/toupper are locale-dependent */
24 #define _mbbisupper(_c) ((_mbctype.value()[(_c) + 1] & _SBUP) == _SBUP)
25 #define _mbbislower(_c) ((_mbctype.value()[(_c) + 1] & _SBLOW) == _SBLOW)
26 
27 #define _mbbtolower(_c) (_mbbisupper(_c) ? _mbcasemap.value()[_c] : _c)
28 #define _mbbtoupper(_c) (_mbbislower(_c) ? _mbcasemap.value()[_c] : _c)
29 
30 #define _ismbbtruelead_l(_lb,_ch,p)   (!(_lb) && _ismbblead_l((_ch), p))
31 #define _mbbisupper_l(_c, p)      ((p->mbcinfo->mbctype[(_c) + 1] & _SBUP) == _SBUP)
32 #define _mbbislower_l(_c, p)      ((p->mbcinfo->mbctype[(_c) + 1] & _SBLOW) == _SBLOW)
33 #define _mbbtolower_l(_c, p)      (_mbbisupper_l(_c, p) ? p->mbcinfo->mbcasemap[_c] : _c)
34 #define _mbbtoupper_l(_c, p)      (_mbbislower_l(_c, p) ? p->mbcinfo->mbcasemap[_c] : _c)
35 
36 /* define full-width-latin upper/lower ranges */
37 
38 #define _MBUPPERLOW1_MT(p)  p->mbcinfo->mbulinfo[0]
39 #define _MBUPPERHIGH1_MT(p) p->mbcinfo->mbulinfo[1]
40 #define _MBCASEDIFF1_MT(p)  p->mbcinfo->mbulinfo[2]
41 
42 #define _MBUPPERLOW2_MT(p)  p->mbcinfo->mbulinfo[3]
43 #define _MBUPPERHIGH2_MT(p) p->mbcinfo->mbulinfo[4]
44 #define _MBCASEDIFF2_MT(p)  p->mbcinfo->mbulinfo[5]
45 
46 // Kanji-specific ranges
47 #define _MBHIRALOW      0x829f  // Hiragana
48 #define _MBHIRAHIGH     0x82f1
49 
50 #define _MBKATALOW      0x8340  // Katakana
51 #define _MBKATAHIGH     0x8396
52 #define _MBKATAEXCEPT   0x837f  // Exception
53 
54 #define _MBKIGOULOW     0x8141  // Kanji punctuation
55 #define _MBKIGOUHIGH    0x81ac
56 #define _MBKIGOUEXCEPT  0x817f  // Exception
57 
58 // Macros used in the implementation of the classification functions.
59 // These accesses of _locale_pctype are internal and guarded by bounds checks when used.
60 #define _ismbbalnum_l(_c, pt)  ((((pt)->locinfo->_public._locale_pctype)[_c] & \
61                                 (_ALPHA|_DIGIT)) || \
62                                 (((pt)->mbcinfo->mbctype+1)[_c] & _MS))
63 #define _ismbbalpha_l(_c, pt)  ((((pt)->locinfo->_public._locale_pctype)[_c] & \
64                             (_ALPHA)) || \
65                             (((pt)->mbcinfo->mbctype+1)[_c] & _MS))
66 #define _ismbbgraph_l(_c, pt)  ((((pt)->locinfo->_public._locale_pctype)[_c] & \
67                             (_PUNCT|_ALPHA|_DIGIT)) || \
68                             (((pt)->mbcinfo->mbctype+1)[_c] & (_MS|_MP)))
69 #define _ismbbprint_l(_c, pt)  ((((pt)->locinfo->_public._locale_pctype)[_c] & \
70                             (_BLANK|_PUNCT|_ALPHA|_DIGIT)) || \
71                             (((pt)->mbcinfo->mbctype + 1)[_c] & (_MS|_MP)))
72 #define _ismbbpunct_l(_c, pt)  ((((pt)->locinfo->_public._locale_pctype)[_c] & _PUNCT) || \
73                                 (((pt)->mbcinfo->mbctype+1)[_c] & _MP))
74 #define _ismbbblank_l(_c, pt)  (((_c) == '\t') ? _BLANK : (((pt)->locinfo->_public._locale_pctype)[_c] & _BLANK) || \
75                                (((pt)->mbcinfo->mbctype+1)[_c] & _MP))
76 // Note that these are intended for double byte character sets (DBCS) and so UTF-8 doesn't consider either to be true for any bytes
77 // (for UTF-8 we never set _M1 or _M2 in this array)
78 #define _ismbblead_l(_c, p)   ((p->mbcinfo->mbctype + 1)[_c] & _M1)
79 #define _ismbbtrail_l(_c, p)  ((p->mbcinfo->mbctype + 1)[_c] & _M2)
80 
81 
82 
83 #ifdef __cplusplus
__dcrt_multibyte_check_type(unsigned int const c,_locale_t const locale,unsigned short const category_bits,bool const expected)84 extern "C" inline int __cdecl __dcrt_multibyte_check_type(
85     unsigned int   const c,
86     _locale_t      const locale,
87     unsigned short const category_bits,
88     bool           const expected
89     )
90 {
91     // Return false if we are not in a supported multibyte codepage:
92     if (!locale->mbcinfo->ismbcodepage)
93         return FALSE;
94 
95     int const code_page = locale->mbcinfo->mbcodepage;
96 
97     char const bytes[] = { static_cast<char>((c >> 8) & 0xff), static_cast<char>(c & 0xff) };
98 
99     // The 'c' "character" could be two one-byte multibyte characters, so we
100     // need room in the type array to handle this.  If 'c' is two one-byte
101     // multibyte characters, the second element in the type array will be
102     // nonzero.
103     unsigned short ctypes[2] = { };
104 
105     if (__acrt_GetStringTypeA(locale, CT_CTYPE1, bytes, _countof(bytes), ctypes, code_page, TRUE) == 0)
106         return FALSE;
107 
108     // Ensure 'c' is a single multibyte character:
109     if (ctypes[1] != 0)
110         return FALSE;
111 
112     // Test the category:
113     return static_cast<bool>((ctypes[0] & category_bits) != 0) == expected ? TRUE : FALSE;
114 }
115 #endif
116 
117 _Check_return_wat_
118 extern "C" errno_t __cdecl _wctomb_internal(
119     _Out_opt_                        int*                  _SizeConverted,
120     _Out_writes_opt_z_(_SizeInBytes) char*                 _MbCh,
121     _In_                             size_t                _SizeInBytes,
122     _In_                             wchar_t               _WCh,
123     _Inout_                         __crt_cached_ptd_host& _Ptd
124     );
125 
126 _Success_(return != -1)
127 extern "C" int __cdecl _mbtowc_internal(
128     _Pre_notnull_ _Post_z_               wchar_t*               _DstCh,
129     _In_reads_or_z_opt_(_SrcSizeInBytes) char const*            _SrcCh,
130     _In_                                 size_t                 _SrcSizeInBytes,
131     _Inout_                              __crt_cached_ptd_host& _Ptd
132     );
133 
134 _CRT_END_C_HEADER
135 
136 namespace __crt_mbstring
137 {
138     size_t __cdecl __c16rtomb_utf8(char* s, char16_t c16, mbstate_t* ps, __crt_cached_ptd_host& ptd);
139     size_t __cdecl __c32rtomb_utf8(char* s, char32_t c32, mbstate_t* ps, __crt_cached_ptd_host& ptd);
140     size_t __cdecl __mbrtoc16_utf8(char16_t* pc32, const char* s, size_t n, mbstate_t* ps, __crt_cached_ptd_host& ptd);
141     size_t __cdecl __mbrtoc32_utf8(char32_t* pc32, const char* s, size_t n, mbstate_t* ps, __crt_cached_ptd_host& ptd);
142 
143     size_t __cdecl __mbrtowc_utf8(wchar_t* pwc, const char* s, size_t n, mbstate_t* ps, __crt_cached_ptd_host& ptd);
144     size_t __cdecl __mbsrtowcs_utf8(wchar_t* dst, const char** src, size_t len, mbstate_t* ps, __crt_cached_ptd_host& ptd);
145     size_t __cdecl __wcsrtombs_utf8(char* dst, const wchar_t** src, size_t len, mbstate_t* ps, __crt_cached_ptd_host& ptd);
146 
147     constexpr size_t INVALID = static_cast<size_t>(-1);
148     constexpr size_t INCOMPLETE = static_cast<size_t>(-2);
149 
150     size_t return_illegal_sequence(mbstate_t* ps, __crt_cached_ptd_host& ptd);
151     size_t reset_and_return(size_t retval, mbstate_t* ps);
152 }
153