1 //
2 // corecrt_internal_mbstring.h
3 //
4 // Copyright (c) Microsoft Corporation. All rights reserved.
5 //
6 // This internal header defines internal utilities for working with the multibyte
7 // character and string library.
8 //
9 #pragma once
10
11 #include <corecrt_internal.h>
12 #include <mbctype.h>
13 #include <mbstring.h>
14 #include <uchar.h>
15
16 _CRT_BEGIN_C_HEADER
17
18
19
20 // Multibyte full-width-latin upper/lower info
21 #define NUM_ULINFO 6
22
23 /* internal use macros since tolower/toupper are locale-dependent */
24 #define _mbbisupper(_c) ((_mbctype.value()[(_c) + 1] & _SBUP) == _SBUP)
25 #define _mbbislower(_c) ((_mbctype.value()[(_c) + 1] & _SBLOW) == _SBLOW)
26
27 #define _mbbtolower(_c) (_mbbisupper(_c) ? _mbcasemap.value()[_c] : _c)
28 #define _mbbtoupper(_c) (_mbbislower(_c) ? _mbcasemap.value()[_c] : _c)
29
30 #define _ismbbtruelead_l(_lb,_ch,p) (!(_lb) && _ismbblead_l((_ch), p))
31 #define _mbbisupper_l(_c, p) ((p->mbcinfo->mbctype[(_c) + 1] & _SBUP) == _SBUP)
32 #define _mbbislower_l(_c, p) ((p->mbcinfo->mbctype[(_c) + 1] & _SBLOW) == _SBLOW)
33 #define _mbbtolower_l(_c, p) (_mbbisupper_l(_c, p) ? p->mbcinfo->mbcasemap[_c] : _c)
34 #define _mbbtoupper_l(_c, p) (_mbbislower_l(_c, p) ? p->mbcinfo->mbcasemap[_c] : _c)
35
36 /* define full-width-latin upper/lower ranges */
37
38 #define _MBUPPERLOW1_MT(p) p->mbcinfo->mbulinfo[0]
39 #define _MBUPPERHIGH1_MT(p) p->mbcinfo->mbulinfo[1]
40 #define _MBCASEDIFF1_MT(p) p->mbcinfo->mbulinfo[2]
41
42 #define _MBUPPERLOW2_MT(p) p->mbcinfo->mbulinfo[3]
43 #define _MBUPPERHIGH2_MT(p) p->mbcinfo->mbulinfo[4]
44 #define _MBCASEDIFF2_MT(p) p->mbcinfo->mbulinfo[5]
45
46 // Kanji-specific ranges
47 #define _MBHIRALOW 0x829f // Hiragana
48 #define _MBHIRAHIGH 0x82f1
49
50 #define _MBKATALOW 0x8340 // Katakana
51 #define _MBKATAHIGH 0x8396
52 #define _MBKATAEXCEPT 0x837f // Exception
53
54 #define _MBKIGOULOW 0x8141 // Kanji punctuation
55 #define _MBKIGOUHIGH 0x81ac
56 #define _MBKIGOUEXCEPT 0x817f // Exception
57
58 // Macros used in the implementation of the classification functions.
59 // These accesses of _locale_pctype are internal and guarded by bounds checks when used.
60 #define _ismbbalnum_l(_c, pt) ((((pt)->locinfo->_public._locale_pctype)[_c] & \
61 (_ALPHA|_DIGIT)) || \
62 (((pt)->mbcinfo->mbctype+1)[_c] & _MS))
63 #define _ismbbalpha_l(_c, pt) ((((pt)->locinfo->_public._locale_pctype)[_c] & \
64 (_ALPHA)) || \
65 (((pt)->mbcinfo->mbctype+1)[_c] & _MS))
66 #define _ismbbgraph_l(_c, pt) ((((pt)->locinfo->_public._locale_pctype)[_c] & \
67 (_PUNCT|_ALPHA|_DIGIT)) || \
68 (((pt)->mbcinfo->mbctype+1)[_c] & (_MS|_MP)))
69 #define _ismbbprint_l(_c, pt) ((((pt)->locinfo->_public._locale_pctype)[_c] & \
70 (_BLANK|_PUNCT|_ALPHA|_DIGIT)) || \
71 (((pt)->mbcinfo->mbctype + 1)[_c] & (_MS|_MP)))
72 #define _ismbbpunct_l(_c, pt) ((((pt)->locinfo->_public._locale_pctype)[_c] & _PUNCT) || \
73 (((pt)->mbcinfo->mbctype+1)[_c] & _MP))
74 #define _ismbbblank_l(_c, pt) (((_c) == '\t') ? _BLANK : (((pt)->locinfo->_public._locale_pctype)[_c] & _BLANK) || \
75 (((pt)->mbcinfo->mbctype+1)[_c] & _MP))
76 // Note that these are intended for double byte character sets (DBCS) and so UTF-8 doesn't consider either to be true for any bytes
77 // (for UTF-8 we never set _M1 or _M2 in this array)
78 #define _ismbblead_l(_c, p) ((p->mbcinfo->mbctype + 1)[_c] & _M1)
79 #define _ismbbtrail_l(_c, p) ((p->mbcinfo->mbctype + 1)[_c] & _M2)
80
81
82
83 #ifdef __cplusplus
__dcrt_multibyte_check_type(unsigned int const c,_locale_t const locale,unsigned short const category_bits,bool const expected)84 extern "C" inline int __cdecl __dcrt_multibyte_check_type(
85 unsigned int const c,
86 _locale_t const locale,
87 unsigned short const category_bits,
88 bool const expected
89 )
90 {
91 // Return false if we are not in a supported multibyte codepage:
92 if (!locale->mbcinfo->ismbcodepage)
93 return FALSE;
94
95 int const code_page = locale->mbcinfo->mbcodepage;
96
97 char const bytes[] = { static_cast<char>((c >> 8) & 0xff), static_cast<char>(c & 0xff) };
98
99 // The 'c' "character" could be two one-byte multibyte characters, so we
100 // need room in the type array to handle this. If 'c' is two one-byte
101 // multibyte characters, the second element in the type array will be
102 // nonzero.
103 unsigned short ctypes[2] = { };
104
105 if (__acrt_GetStringTypeA(locale, CT_CTYPE1, bytes, _countof(bytes), ctypes, code_page, TRUE) == 0)
106 return FALSE;
107
108 // Ensure 'c' is a single multibyte character:
109 if (ctypes[1] != 0)
110 return FALSE;
111
112 // Test the category:
113 return static_cast<bool>((ctypes[0] & category_bits) != 0) == expected ? TRUE : FALSE;
114 }
115 #endif
116
117 _Check_return_wat_
118 extern "C" errno_t __cdecl _wctomb_internal(
119 _Out_opt_ int* _SizeConverted,
120 _Out_writes_opt_z_(_SizeInBytes) char* _MbCh,
121 _In_ size_t _SizeInBytes,
122 _In_ wchar_t _WCh,
123 _Inout_ __crt_cached_ptd_host& _Ptd
124 );
125
126 _Success_(return != -1)
127 extern "C" int __cdecl _mbtowc_internal(
128 _Pre_notnull_ _Post_z_ wchar_t* _DstCh,
129 _In_reads_or_z_opt_(_SrcSizeInBytes) char const* _SrcCh,
130 _In_ size_t _SrcSizeInBytes,
131 _Inout_ __crt_cached_ptd_host& _Ptd
132 );
133
134 _CRT_END_C_HEADER
135
136 namespace __crt_mbstring
137 {
138 size_t __cdecl __c16rtomb_utf8(char* s, char16_t c16, mbstate_t* ps, __crt_cached_ptd_host& ptd);
139 size_t __cdecl __c32rtomb_utf8(char* s, char32_t c32, mbstate_t* ps, __crt_cached_ptd_host& ptd);
140 size_t __cdecl __mbrtoc16_utf8(char16_t* pc32, const char* s, size_t n, mbstate_t* ps, __crt_cached_ptd_host& ptd);
141 size_t __cdecl __mbrtoc32_utf8(char32_t* pc32, const char* s, size_t n, mbstate_t* ps, __crt_cached_ptd_host& ptd);
142
143 size_t __cdecl __mbrtowc_utf8(wchar_t* pwc, const char* s, size_t n, mbstate_t* ps, __crt_cached_ptd_host& ptd);
144 size_t __cdecl __mbsrtowcs_utf8(wchar_t* dst, const char** src, size_t len, mbstate_t* ps, __crt_cached_ptd_host& ptd);
145 size_t __cdecl __wcsrtombs_utf8(char* dst, const wchar_t** src, size_t len, mbstate_t* ps, __crt_cached_ptd_host& ptd);
146
147 constexpr size_t INVALID = static_cast<size_t>(-1);
148 constexpr size_t INCOMPLETE = static_cast<size_t>(-2);
149
150 size_t return_illegal_sequence(mbstate_t* ps, __crt_cached_ptd_host& ptd);
151 size_t reset_and_return(size_t retval, mbstate_t* ps);
152 }
153