xref: /reactos/sdk/lib/ucrt/mbstring/mbctype.cpp (revision e98e9000)
1 /***
2 *mbctype.c - MBCS table used by the functions that test for types of char
3 *
4 *       Copyright (c) Microsoft Corporation.  All rights reserved.
5 *
6 *Purpose:
7 *       table used to determine the type of char
8 *
9 *******************************************************************************/
10 #include <corecrt_internal.h>
11 #include <locale.h>
12 #include <corecrt_internal_mbstring.h>
13 #include <mbctype.h>
14 #include <winnls.h>
15 
16 #ifndef CRTDLL
17 
18 _CRT_LINKER_FORCE_INCLUDE(__acrt_multibyte_initializer);
19 
20 #endif  /* CRTDLL */
21 
22 #define _CHINESE_SIMP_CP    936
23 #define _KOREAN_WANGSUNG_CP 949
24 #define _CHINESE_TRAD_CP    950
25 #define _KOREAN_JOHAB_CP    1361
26 
27 #define NUM_CHARS 257 /* -1 through 255 */
28 
29 #define NUM_CTYPES 4 /* table contains 4 types of info */
30 #define MAX_RANGES 8 /* max number of ranges needed given languages so far */
31 
32 /* character type info in ranges (pair of low/high), zeros indicate end */
33 typedef struct
34 {
35     int             code_page;
36     unsigned short  mbulinfo[NUM_ULINFO];
37     unsigned char   rgrange[NUM_CTYPES][MAX_RANGES];
38 } code_page_info;
39 
40 extern "C"
41 {
42 __crt_multibyte_data __acrt_initial_multibyte_data =
43 {
44     0,                       /* refcount */
45     CP_ACP,                  /* mbcodepage: _MB_CP_ANSI */
46     0,                       /* ismbcodepage */
47     { 0, 0, 0, 0, 0, 0 },    /* mbulinfo[6] */
48     {                        /* mbctype[257] */
49     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
50     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
51     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
52     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
53     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
54     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
55     0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
56     0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x00, 0x00, 0x00, 0x00,
57     0x00, 0x00, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
58     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
59     0x20, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, 0x00 /* rest is zero */
60     },
61     {     /* mbcasemap[256] */
62     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
63     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
64     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
65     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
66     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
67     0x00, 0x00, 0x00, 0x00, 0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
68     0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73,
69     0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x00, 0x00,
70     0x00, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b,
71     0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
72     0x58, 0x59, 0x5a, 0x00, 0x00, 0x00, 0x00, 0x00 /* rest is zero */
73     },
74     nullptr /* mblocalename */
75 };
76 
77 #define _MBCTYPE_DEFAULT                                                        \
78     {                                                                           \
79         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
80         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
81         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
82         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
83         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
84         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, \
85         0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, \
86         0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x00, 0x00, 0x00, 0x00, \
87         0x00, 0x00, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, \
88         0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, \
89         0x20, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, 0x00 /* rest is zero */       \
90     }
91 
92 /* MBCS ctype array */
93 static unsigned char _mbctypes[__crt_state_management::state_index_count][NUM_CHARS] =
94 {
95     _MBCTYPE_DEFAULT
96     #ifdef _CRT_GLOBAL_STATE_ISOLATION
97     ,_MBCTYPE_DEFAULT
98     #endif
99 };
100 
101 #define _MBCASEMAP_DEFAULT                                                      \
102     {                                                                           \
103         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
104         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
105         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
106         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
107         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
108         0x00, 0x00, 0x00, 0x00, 0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, \
109         0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, \
110         0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x00, 0x00, \
111         0x00, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, \
112         0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, \
113         0x58, 0x59, 0x5a, 0x00, 0x00, 0x00, 0x00, 0x00 /* rest is zero */       \
114     }
115 
116 static unsigned char _mbcasemaps[__crt_state_management::state_index_count][256] =
117 {
118     _MBCASEMAP_DEFAULT
119     #ifdef _CRT_GLOBAL_STATE_ISOLATION
120     ,_MBCASEMAP_DEFAULT
121     #endif
122 };
123 
124 /* global pointer to the multi-byte case type (i.e. upper or lower or n/a) */
125 __crt_state_management::dual_state_global<unsigned char*> _mbctype;
126 
127 /* global pointer to the multi-byte casemap */
128 __crt_state_management::dual_state_global<unsigned char*> _mbcasemap;
129 
130 /* global pointer to the current per-thread mbc information structure. */
131 __crt_state_management::dual_state_global<__crt_multibyte_data*> __acrt_current_multibyte_data;
132 }
133 
134 static int fSystemSet;
135 
136 static char __rgctypeflag[NUM_CTYPES] = { _MS, _MP, _M1, _M2 };
137 
138 static code_page_info __rgcode_page_info[] =
139 {
140     {
141       _KANJI_CP, /* Kanji (Japanese) Code Page */
142       { 0x8260, 0x8279,   /* Full-Width Latin Upper Range 1 */
143         0x8281 - 0x8260,  /* Full-Width Latin Case Difference 1 */
144 
145         0x0000, 0x0000,   /* Full-Width Latin Upper Range 2 */
146         0x0000            /* Full-Width Latin Case Difference 2 */
147       },
148       {
149         { 0xA6, 0xDF, 0,    0,    0,    0,    0, 0, }, /* Single Byte Ranges */
150         { 0xA1, 0xA5, 0,    0,    0,    0,    0, 0, }, /* Punctuation Ranges */
151         { 0x81, 0x9F, 0xE0, 0xFC, 0,    0,    0, 0, }, /* Lead Byte Ranges */
152         { 0x40, 0x7E, 0x80, 0xFC, 0,    0,    0, 0, }, /* Trail Byte Ranges */
153       }
154     },
155     {
156       _CHINESE_SIMP_CP, /* Chinese Simplified (PRC) Code Page */
157       { 0xA3C1, 0xA3DA,   /* Full-Width Latin Upper Range 1 */
158         0xA3E1 - 0xA3C1,  /* Full-Width Latin Case Difference 1 */
159 
160         0x0000, 0x0000,   /* Full-Width Latin Upper Range 2 */
161         0x0000            /* Full-Width Latin Case Difference 2 */
162       },
163       {
164         { 0,    0,    0,    0,    0,    0,    0, 0, }, /* Single Byte Ranges */
165         { 0,    0,    0,    0,    0,    0,    0, 0, }, /* Punctuation Ranges */
166         { 0x81, 0xFE, 0,    0,    0,    0,    0, 0, }, /* Lead Byte Ranges */
167         { 0x40, 0xFE, 0,    0,    0,    0,    0, 0, }, /* Trail Byte Ranges */
168       }
169     },
170     {
171       _KOREAN_WANGSUNG_CP, /* Wangsung (Korean) Code Page */
172       { 0xA3C1, 0xA3DA,   /* Full-Width Latin Upper Range 1 */
173         0xA3E1 - 0xA3C1,  /* Full-Width Latin Case Difference 1 */
174 
175         0x0000, 0x0000,   /* Full-Width Latin Upper Range 2 */
176         0x0000            /* Full-Width Latin Case Difference 2 */
177       },
178       {
179         { 0,    0,    0,    0,    0,    0,    0, 0, }, /* Single Byte Ranges */
180         { 0,    0,    0,    0,    0,    0,    0, 0, }, /* Punctuation Ranges */
181         { 0x81, 0xFE, 0,    0,    0,    0,    0, 0, }, /* Lead Byte Ranges */
182         { 0x41, 0xFE, 0,    0,    0,    0,    0, 0, }, /* Trail Byte Ranges */
183       }
184     },
185     {
186       _CHINESE_TRAD_CP, /* Chinese Traditional (Taiwan) Code Page */
187       { 0xA2CF, 0xA2E4,   /* Full-Width Latin Upper Range 1 */
188         0xA2E9 - 0xA2CF,  /* Full-Width Latin Case Difference 1 */
189 
190         0xA2E5, 0xA2E8,   /* Full-Width Latin Upper Range 2 */
191         0xA340 - 0XA2E5   /* Full-Width Latin Case Difference 2 */
192       },
193       {
194         { 0,    0,    0,    0,    0,    0,    0, 0, }, /* Single Byte Ranges */
195         { 0,    0,    0,    0,    0,    0,    0, 0, }, /* Punctuation Ranges */
196         { 0x81, 0xFE, 0,    0,    0,    0,    0, 0, }, /* Lead Byte Ranges */
197         { 0x40, 0x7E, 0xA1, 0xFE, 0,    0,    0, 0, }, /* Trail Byte Ranges */
198       }
199     },
200     {
201       _KOREAN_JOHAB_CP, /* Johab (Korean) Code Page */
202       { 0xDA51, 0xDA5E,   /* Full-Width Latin Upper Range 1 */
203         0xDA71 - 0xDA51,  /* Full-Width Latin Case Difference 1 */
204 
205         0xDA5F, 0xDA6A,   /* Full-Width Latin Upper Range 2 */
206         0xDA91 - 0xDA5F   /* Full-Width Latin Case Difference 2 */
207       },
208       {
209         { 0,    0,    0,    0,    0,    0,    0, 0, }, /* Single Byte Ranges */
210         { 0,    0,    0,    0,    0,    0,    0, 0, }, /* Punctuation Ranges */
211         { 0x81, 0xD3, 0xD8, 0xDE, 0xE0, 0xF9, 0, 0, }, /* Lead Byte Ranges */
212         { 0x31, 0x7E, 0x81, 0xFE, 0,    0,    0, 0, }, /* Trail Byte Ranges */
213       }
214     }
215 };
216 
217 #define JAPANSE_DEFAULT_LOCALE_NAME_INDEX         0
218 #define CHINESE_SIMPLIFIED_LOCALE_NAME_INDEX      1
219 #define KOREAN_DEFAULT_LOCALE_NAME_INDEX          2
220 #define CHINESE_TRADITIONAL_LOCALE_NAME_INDEX     3
221 
222 const wchar_t* const _mb_locale_names[] =
223 {
224     L"ja-JP",   /* JAPANSE_DEFAULT_LOCALE_NAME_INDEX     */
225     L"zh-CN",   /* CHINESE_SIMPLIFIED_LOCALE_NAME_INDEX  */
226     L"ko-KR",   /* KOREAN_DEFAULT_LOCALE_NAME_INDEX      */
227     L"zh-TW",   /* CHINESE_TRADITIONAL_LOCALE_NAME_INDEX */
228 };
229 
230 
231 
__p__mbctype()232 extern "C" unsigned char* __cdecl __p__mbctype()
233 {
234     return _mbctype.value();
235 }
236 
__p__mbcasemap()237 extern "C" unsigned char* __cdecl __p__mbcasemap()
238 {
239     return _mbcasemap.value();
240 }
241 
242 
243 
244 extern "C" int __cdecl _setmbcp_nolock(int, __crt_multibyte_data*);
245 
246 static int getSystemCP (int);
247 
248 /***
249 *setSBCS() - Set MB code page to SBCS.
250 *
251 *Purpose:
252 *           Set MB code page to SBCS.
253 *Entry:
254 *
255 *Exit:
256 *
257 *Exceptions:
258 *
259 *******************************************************************************/
260 
setSBCS(__crt_multibyte_data * ptmbci)261 static void setSBCS (__crt_multibyte_data* ptmbci)
262 {
263     int i;
264 
265     /* set for single-byte code page */
266     for (i = 0; i < NUM_CHARS; i++)
267         ptmbci->mbctype[i] = 0;
268 
269     /* code page has changed, set global flag */
270     ptmbci->mbcodepage = 0;
271 
272     /* clear flag to indicate single-byte code */
273     ptmbci->ismbcodepage = 0;
274 
275     ptmbci->mblocalename = nullptr;
276 
277     for (i = 0; i < NUM_ULINFO; i++)
278         ptmbci->mbulinfo[i] = 0;
279 
280     for ( i = 0 ; i < 257 ; i++ )
281         ptmbci->mbctype[i] = __acrt_initial_multibyte_data.mbctype[i];
282 
283     for ( i = 0 ; i < 256 ; i++ )
284         ptmbci->mbcasemap[i] = __acrt_initial_multibyte_data.mbcasemap[i];
285 }
286 
287 /***
288 *__acrt_update_thread_multibyte_data() - refresh the thread's mbc info
289 *
290 *Purpose:
291 *       Update the current thread's reference to the multibyte character
292 *       information to match the current global mbc info. Decrement the
293 *       reference on the old mbc information struct and if this count is now
294 *       zero (so that no threads are using it), free it.
295 *
296 *Entry:
297 *
298 *Exit:
299 *       _getptd()->ptmbcinfo == current_multibyte_data (which should always be __acrt_current_multibyte_data)
300 *
301 *Exceptions:
302 *
303 *******************************************************************************/
304 
update_thread_multibyte_data_internal(__acrt_ptd * const ptd,__crt_multibyte_data ** const current_multibyte_data)305 static __crt_multibyte_data* __cdecl update_thread_multibyte_data_internal(
306     __acrt_ptd*           const ptd,
307     __crt_multibyte_data** const current_multibyte_data
308     ) throw()
309 {
310         __crt_multibyte_data* ptmbci = nullptr;
311 
312         if (__acrt_should_sync_with_global_locale(ptd) || ptd->_locale_info == nullptr)
313         {
314             __acrt_lock(__acrt_multibyte_cp_lock);
315             __try
316             {
317                 ptmbci = ptd->_multibyte_info;
318                 if (ptmbci != *current_multibyte_data)
319                 {
320                     /*
321                      * Decrement the reference count in the old mbc info structure
322                      * and free it, if necessary
323                      */
324                     if (ptmbci != nullptr &&
325                         InterlockedDecrement(&ptmbci->refcount) == 0 &&
326                         ptmbci != &__acrt_initial_multibyte_data)
327                     {
328                         /*
329                          * Free it
330                          */
331                         _free_crt(ptmbci);
332                     }
333 
334                     /*
335                      * Point to the current mbc info structure and increment its
336                      * reference count.
337                      */
338                     ptmbci = ptd->_multibyte_info = *current_multibyte_data;
339                     InterlockedIncrement(&ptmbci->refcount);
340                 }
341             }
342             __finally
343             {
344                 __acrt_unlock(__acrt_multibyte_cp_lock);
345             }
346             __endtry
347         }
348         else
349         {
350             ptmbci = ptd->_multibyte_info;
351         }
352 
353         if (!ptmbci)
354         {
355             abort();
356         }
357 
358         return ptmbci;
359 }
360 
__acrt_update_thread_multibyte_data()361 extern "C" __crt_multibyte_data* __cdecl __acrt_update_thread_multibyte_data()
362 {
363     return update_thread_multibyte_data_internal(__acrt_getptd(), &__acrt_current_multibyte_data.value());
364 }
365 
366 /***
367 *_setmbcp() - Set MBC data based on code page
368 *
369 *Purpose:
370 *       Init MBC character type tables based on code page number. If
371 *       given code page is supported, load that code page info into
372 *       mbctype table. If not, query OS to find the information,
373 *       otherwise set up table with single byte info.
374 *
375 *       Multithread Notes: First, allocate an mbc information struct. Set the
376 *       mbc info in the static vars and arrays as does the single-thread
377 *       version. Then, copy this info into the new allocated struct and set
378 *       the current mbc info pointer (__acrt_current_multibyte_data) to point to it.
379 *
380 *Entry:
381 *       codepage - code page to initialize MBC table
382 *           _MB_CP_OEM = use system OEM code page
383 *           _MB_CP_ANSI = use system ANSI code page
384 *           _MB_CP_SBCS = set to single byte 'code page'
385 *
386 *Exit:
387 *        0 = Success
388 *       -1 = Error, code page not changed.
389 *
390 *Exceptions:
391 *
392 *******************************************************************************/
393 
setmbcp_internal(int const requested_codepage,bool const is_for_crt_initialization,__acrt_ptd * const ptd,__crt_multibyte_data ** const current_multibyte_data)394 static int __cdecl setmbcp_internal(
395     int                    const requested_codepage,
396     bool                   const is_for_crt_initialization,
397     __acrt_ptd*            const ptd,
398     __crt_multibyte_data** const current_multibyte_data
399     ) throw()
400 {
401     update_thread_multibyte_data_internal(ptd, current_multibyte_data);
402     int const system_codepage = getSystemCP(requested_codepage);
403 
404     // If it's not a new codepage, just return success:
405     if (system_codepage == ptd->_multibyte_info->mbcodepage)
406     {
407         return 0;
408     }
409 
410     // Always allocate space so that we don't have to take a lock for any update:
411     __crt_unique_heap_ptr<__crt_multibyte_data> mb_data(_malloc_crt_t(__crt_multibyte_data, 1));
412     if (!mb_data)
413     {
414         return -1;
415     }
416 
417     // Initialize the new multibyte data structure from the current multibyte
418     // data structure for this thread, resetting the reference count (since it
419     // is not actually referenced by anything yet).
420     *mb_data.get() = *ptd->_multibyte_info;
421     mb_data.get()->refcount = 0;
422 
423     // Actually initialize the new multibyte data using the new codepage:
424     // CRT_REFACTOR TODO _setmbcp_nolock is a terrible name.
425     int const setmbcp_status = _setmbcp_nolock(system_codepage, mb_data.get());
426     if (setmbcp_status == -1)
427     {
428         errno = EINVAL;
429         return -1;
430     }
431 
432     // At this point, we have a valid, new set of multibyte data to swap in.  If
433     // this is not the initial codepage initialization during process startup,
434     // we need to toggle the locale-changed state:
435     if (!is_for_crt_initialization)
436     {
437         __acrt_set_locale_changed();
438     }
439 
440     if (InterlockedDecrement(&ptd->_multibyte_info->refcount) == 0 &&
441         ptd->_multibyte_info != &__acrt_initial_multibyte_data)
442     {
443         _free_crt(ptd->_multibyte_info);
444     }
445 
446     // Update the multibyte codepage for this thread:
447     mb_data.get()->refcount = 1;
448     ptd->_multibyte_info = mb_data.detach();
449 
450     // If this thread has its own locale, do not update the global codepage:
451     if (!__acrt_should_sync_with_global_locale(ptd))
452     {
453         return setmbcp_status;
454     }
455 
456     // Otherwise, update the global codepage:
457     __acrt_lock_and_call(__acrt_multibyte_cp_lock, [&]
458     {
459         memcpy_s(_mbctype.value(),   sizeof(_mbctypes[0]),   ptd->_multibyte_info->mbctype,   sizeof(ptd->_multibyte_info->mbctype));
460         memcpy_s(_mbcasemap.value(), sizeof(_mbcasemaps[0]), ptd->_multibyte_info->mbcasemap, sizeof(ptd->_multibyte_info->mbcasemap));
461 
462         if (InterlockedDecrement(&(*current_multibyte_data)->refcount) == 0 &&
463             (*current_multibyte_data) != &__acrt_initial_multibyte_data)
464         {
465             _free_crt(*current_multibyte_data);
466         }
467 
468         *current_multibyte_data = ptd->_multibyte_info;
469         InterlockedIncrement(&ptd->_multibyte_info->refcount);
470     });
471 
472     if (is_for_crt_initialization)
473     {
474         __acrt_initial_locale_pointers.mbcinfo = *current_multibyte_data;
475     }
476 
477     return setmbcp_status;
478 }
479 
480 /* Enclaves only support built-in CP_ACP */
481 #ifdef _UCRT_ENCLAVE_BUILD
482 
getSystemCP(int)483 static int getSystemCP(int)
484 {
485     return CP_ACP;
486 }
487 
488 
_setmbcp_nolock(int,__crt_multibyte_data * ptmbci)489 extern "C" int __cdecl _setmbcp_nolock(int, __crt_multibyte_data* ptmbci)
490 {
491     setSBCS(ptmbci);
492     return 0;
493 }
494 
495 #else /* ^^^ _UCRT_ENCLAVE_BUILD ^^^ // vvv !_UCRT_ENCLAVE_BUILD vvv */
496 
497     /***
498 *CPtoLocaleName() - Code page to locale name.
499 *
500 *Purpose:
501 *       Some API calls want a locale name, so convert MB CP to appropriate locale name,
502 *       and then API converts back to ANSI CP for that locale name.
503 *
504 *Entry:
505 *   codepage - code page to convert
506 *Exit:
507 *       returns appropriate locale name
508 *       Returned locale names are stored in static structs, so they must not be deleted.
509 *
510 *Exceptions:
511 *
512 *******************************************************************************/
513 
CPtoLocaleName(int codepage)514 static const wchar_t* CPtoLocaleName (int codepage)
515 {
516     switch (codepage) {
517     case 932:
518         return _mb_locale_names[JAPANSE_DEFAULT_LOCALE_NAME_INDEX];
519     case 936:
520         return _mb_locale_names[CHINESE_SIMPLIFIED_LOCALE_NAME_INDEX];
521     case 949:
522         return _mb_locale_names[KOREAN_DEFAULT_LOCALE_NAME_INDEX];
523     case 950:
524         return _mb_locale_names[CHINESE_TRADITIONAL_LOCALE_NAME_INDEX];
525     }
526 
527     return 0;
528 }
529 
530 /***
531 *getSystemCP - Get system default CP if requested.
532 *
533 *Purpose:
534 *       Get system default CP if requested.
535 *
536 *Entry:
537 *       codepage - user requested code page/world script
538 *
539 *       Docs specify:
540 *          _MB_CP_SBCS    0 - Use a single byte codepage
541 *          _MB_CP_OEM    -2 - use the OEMCP
542 *          _MB_CP_ANSI   -3 - use the ACP
543 *          _MB_CP_LOCALE -4 - use the codepage for a previous setlocale call
544 *          Codepage #       - use the specified codepage (UTF-7 is disallowed)
545 *                           - 54936 and other interesting stateful codepages aren't
546 *                           - explicitly disallowed but I can't imagine them working right.
547 *
548 *Exit:
549 *       requested code page
550 *
551 *Exceptions:
552 *
553 *******************************************************************************/
getSystemCP(int codepage)554 static int getSystemCP(int codepage)
555 {
556     _locale_t plocinfo = nullptr;
557     _LocaleUpdate _loc_update(plocinfo);
558     fSystemSet = 0;
559 
560     /* get system code page values if requested */
561 
562     if (codepage == _MB_CP_OEM)
563     {
564         fSystemSet = 1;
565         return GetOEMCP();
566     }
567     else if (codepage == _MB_CP_ANSI)
568     {
569         fSystemSet = 1;
570         return GetACP();
571     }
572     else if (codepage == _MB_CP_LOCALE)
573     {
574         fSystemSet = 1;
575         return _loc_update.GetLocaleT()->locinfo->_public._locale_lc_codepage;
576     }
577 
578     return codepage;
579 }
580 
581 /***
582 *setSBUpLow() - Set single byte range upper/lower mappings
583 *
584 *Purpose:
585 *           Set single byte mapping for tolower/toupper.
586 *           Basically this is ASCII-mapping plus a few if you're a lucky
587 *           SBCS codepage.
588 *           DBCS + UTF ranges > 0x7f are basically ignored.
589 *
590 *Entry:
591 *
592 *Exit:
593 *
594 *Exceptions:
595 *
596 *******************************************************************************/
597 
setSBUpLow(__crt_multibyte_data * ptmbci)598 static void setSBUpLow (__crt_multibyte_data* ptmbci)
599 {
600     BYTE *  pbPair;
601     UINT    ich;
602     CPINFO  cpInfo;
603     UCHAR   sbVector[256];
604     UCHAR   upVector[256];
605     UCHAR   lowVector[256];
606     USHORT  wVector[512];
607 
608     //    test if codepage exists
609     if (ptmbci->mbcodepage != CP_UTF8 && GetCPInfo(ptmbci->mbcodepage, &cpInfo) != 0)
610     {
611         // This code attempts to generate casing tables for characters 0-255
612         // For DBCS codepages that will be basically ASCII casing but won't help DBCS mapping.
613         // For SBCS codepages that will include the codepage-specific characters.
614         // Mappings do not appear to include Turkish-i variations.
615 
616         //  if so, create vector 0-255
617         for (ich = 0; ich < 256; ich++)
618             sbVector[ich] = (UCHAR) ich;
619 
620         //  set byte 0 and any leading byte value to non-alpha char ' '
621         sbVector[0] = (UCHAR)' ';
622         for (pbPair = &cpInfo.LeadByte[0]; *pbPair; pbPair += 2)
623             // make sure ich within a valid range
624             for (ich = *pbPair; ich <= *(pbPair + 1) && ich < 256; ich++)
625                 sbVector[ich] = (UCHAR)' ';
626 
627         //  get char type for character vector
628 
629         __acrt_GetStringTypeA(nullptr, CT_CTYPE1, (LPCSTR)sbVector, 256, wVector,
630                             ptmbci->mbcodepage, FALSE);
631 
632         //  get lower case mappings for character vector
633 
634         __acrt_LCMapStringA(nullptr, ptmbci->mblocalename, LCMAP_LOWERCASE, (LPCSTR)sbVector, 256,
635                                     (LPSTR)lowVector, 256, ptmbci->mbcodepage, FALSE);
636 
637         //  get upper case mappings for character vector
638 
639         __acrt_LCMapStringA(nullptr, ptmbci->mblocalename, LCMAP_UPPERCASE, (LPCSTR)sbVector, 256,
640                                     (LPSTR)upVector, 256, ptmbci->mbcodepage, FALSE);
641 
642         //  set _SBUP, _SBLOW in ptmbci->mbctype if type is upper. lower
643         //  set mapping array with lower or upper mapping value
644 
645         for (ich = 0; ich < 256; ich++)
646         {
647             if (wVector[ich] & _UPPER)
648             {
649                 // WARNING: +1 because the mbctype array starts with a -1 EOF character
650                 ptmbci->mbctype[ich + 1] |= _SBUP;
651                 ptmbci->mbcasemap[ich] = lowVector[ich];
652             }
653             else if (wVector[ich] & _LOWER)
654             {
655                 // WARNING: +1 because the mbctype array starts with a -1 EOF character
656                 ptmbci->mbctype[ich + 1] |= _SBLOW;
657                 ptmbci->mbcasemap[ich] = upVector[ich];
658             }
659             else
660                 ptmbci->mbcasemap[ich] = 0;
661         }
662     }
663     else
664     {
665         //  Either no codepage or UTF-8 (which looks a lot like ASCII in the lower bits)
666         //  Set 'A'-'Z' as upper, 'a'-'z' as lower (eg: ASCII casing)
667         for (ich = 0; ich < 256; ich++)
668         {
669             if (ich >= (UINT)'A' && ich <= (UINT)'Z')
670             {
671                 // WARNING: +1 because the mbctype array starts with a -1 EOF character
672                 ptmbci->mbctype[ich + 1] |= _SBUP;
673                 ptmbci->mbcasemap[ich] = static_cast<unsigned char>(ich + ('a' - 'A'));
674             }
675             else if (ich >= (UINT)'a' && ich <= (UINT)'z')
676             {
677                 // WARNING: +1 because the mbctype array starts with a -1 EOF character
678                 ptmbci->mbctype[ich + 1] |= _SBLOW;
679                 ptmbci->mbcasemap[ich] = static_cast<unsigned char>(ich - ('a' - 'A'));
680             }
681             else
682                 ptmbci->mbcasemap[ich] = 0;
683         }
684     }
685 }
686 
_setmbcp(int const codepage)687 extern "C" int __cdecl _setmbcp(int const codepage)
688 {
689     return setmbcp_internal(codepage, false, __acrt_getptd(), &__acrt_current_multibyte_data.value());
690 }
691 
_setmbcp_nolock(int codepage,__crt_multibyte_data * ptmbci)692 extern "C" int __cdecl _setmbcp_nolock(int codepage, __crt_multibyte_data* ptmbci)
693 {
694         unsigned int icp;
695         unsigned int irg;
696         unsigned int ich;
697         unsigned char *rgptr;
698         CPINFO cpInfo;
699 
700         codepage = getSystemCP(codepage);
701 
702         /* user wants 'single-byte' MB code page */
703         if (codepage == _MB_CP_SBCS)
704         {
705             setSBCS(ptmbci);
706             return 0;
707         }
708 
709         /* check for CRT code page info */
710         for (icp = 0;
711             icp < (sizeof(__rgcode_page_info) / sizeof(code_page_info));
712             icp++)
713         {
714             /* see if we have info for this code page */
715             if (__rgcode_page_info[icp].code_page == codepage)
716             {
717                 /* clear the table */
718                 for (ich = 0; ich < NUM_CHARS; ich++)
719                     ptmbci->mbctype[ich] = 0;
720 
721                 /* for each type of info, load table */
722                 for (irg = 0; irg < NUM_CTYPES; irg++)
723                 {
724                     /* go through all the ranges for each type of info */
725                     for (rgptr = (unsigned char *)__rgcode_page_info[icp].rgrange[irg];
726                         rgptr[0] && rgptr[1];
727                         rgptr += 2)
728                     {
729                         /* set the type for every character in range */
730                         for (ich = rgptr[0]; ich <= rgptr[1] && ich < 256; ich++)
731                             ptmbci->mbctype[ich + 1] |= __rgctypeflag[irg];
732                     }
733                 }
734                 /* code page has changed */
735                 ptmbci->mbcodepage = codepage;
736                 /* all the code pages we keep info for are truly multibyte */
737                 ptmbci->ismbcodepage = 1;
738                 ptmbci->mblocalename = CPtoLocaleName(ptmbci->mbcodepage);
739                 for (irg = 0; irg < NUM_ULINFO; irg++)
740                 {
741                     ptmbci->mbulinfo[irg] = __rgcode_page_info[icp].mbulinfo[irg];
742                 }
743 
744                 /* return success */
745                 setSBUpLow(ptmbci);
746                 return 0;
747             }
748         }
749 
750         /*  verify codepage validity */
751         // Unclear why UTF7 is excluded yet stateful and other complex encodings are not
752         if (codepage == 0 || codepage == CP_UTF7 || !IsValidCodePage((WORD)codepage))
753         {
754             /* return failure, code page not changed */
755             return -1;
756         }
757 
758         // Special case for UTF-8
759         if (codepage == CP_UTF8)
760         {
761             ptmbci->mbcodepage = CP_UTF8;
762             ptmbci->mblocalename = nullptr;
763 
764             // UTF-8 does not have lead or trail bytes in the terms
765             // the CRT thinks of it for DBCS codepages, so we'll
766             // clear the flags for all bytes.
767             // Note that this array is 257 bytes because there's a
768             // "-1" that is used someplaces for EOF.  So this array
769             // is actually -1 based.
770             for (ich = 0; ich < NUM_ULINFO; ich++)
771             {
772                 ptmbci->mbctype[ich] = 0;
773             }
774 
775             // not really a multibyte code page, we'll have to test
776             // ptmbci->mbcodepage == CP_UTF8 when we use this structure.
777             ptmbci->ismbcodepage = 0;
778 
779             // CJK encodings have some full-width mappings, but not here.
780             for (irg = 0; irg < NUM_ULINFO; irg++)
781             {
782                 ptmbci->mbulinfo[irg] = 0;
783             }
784 
785             setSBUpLow(ptmbci);
786 
787             // return success
788             return 0;
789         }
790         /* code page not supported by CRT, try the OS */\
791         else if (GetCPInfo(codepage, &cpInfo) != 0)
792         {
793             BYTE *lbptr;
794 
795             /* clear the table */
796             for (ich = 0; ich < NUM_CHARS; ich++)
797             {
798                 ptmbci->mbctype[ich] = 0;
799             }
800 
801             ptmbci->mbcodepage = codepage;
802             ptmbci->mblocalename = nullptr;
803 
804             // Special case for DBCS where we know there may be a leadbyte/trailbyte pattern
805             if (cpInfo.MaxCharSize == 2)
806             {
807                 /* LeadByte range always terminated by two 0's */
808                 for (lbptr = cpInfo.LeadByte; *lbptr && *(lbptr + 1); lbptr += 2)
809                 {
810                     for (ich = *lbptr; ich <= *(lbptr + 1); ich++)
811                         ptmbci->mbctype[ich + 1] |= _M1;
812                 }
813 
814                 /* All chars > 1 must be considered valid trail bytes */
815                 for (ich = 0x01; ich < 0xFF; ich++)
816                 {
817                     ptmbci->mbctype[ich + 1] |= _M2;
818                 }
819 
820                 /* code page has changed */
821                 ptmbci->mblocalename = CPtoLocaleName(ptmbci->mbcodepage);
822 
823                 /* really a multibyte code page */
824                 ptmbci->ismbcodepage = 1;
825             }
826             else
827             {
828                 /* single-byte code page */
829                 ptmbci->ismbcodepage = 0;
830             }
831 
832             for (irg = 0; irg < NUM_ULINFO; irg++)
833             {
834                 ptmbci->mbulinfo[irg] = 0;
835             }
836 
837             setSBUpLow(ptmbci);
838             /* return success */
839             return 0;
840         }
841 
842 
843         /* If system default call, don't fail - set to SBCS */
844         if (fSystemSet)
845         {
846             setSBCS(ptmbci);
847             return 0;
848         }
849 
850         /* return failure, code page not changed */
851         return -1;
852 }
853 
854 #endif /* _UCRT_ENCLAVE_BUILD */
855 
856 /***
857 *_getmbcp() - Get the current MBC code page
858 *
859 *Purpose:
860 *           Get code page value.
861 *Entry:
862 *       none.
863 *Exit:
864 *           return current MB codepage value.
865 *
866 *Exceptions:
867 *
868 *******************************************************************************/
869 
_getmbcp()870 extern "C" int __cdecl _getmbcp()
871 {
872     _locale_t plocinfo = nullptr;
873     _LocaleUpdate _loc_update(plocinfo);
874     if ( _loc_update.GetLocaleT()->mbcinfo->ismbcodepage )
875         return _loc_update.GetLocaleT()->mbcinfo->mbcodepage;
876     else
877         return 0;
878 }
879 
880 
881 /***
882 *_initmbctable() - Set MB ctype table to initial default value.
883 *
884 *Purpose:
885 *       Initialization.
886 *Entry:
887 *       none.
888 *Exit:
889 *       Returns 0 to indicate no error.
890 *Exceptions:
891 *
892 *******************************************************************************/
893 
__acrt_initialize_multibyte()894 extern "C" bool __cdecl __acrt_initialize_multibyte()
895 {
896     static bool initialized = false;
897 
898     // Synchronization note:  it is not possible for a data race to occur here.
899     // In the CRT DLLs, this function is called during CRT startup, befor any
900     // user code using the CRT may run.  In the static CRT, this function is
901     // called by a CRT initializer (at the top of this file), so 'initialized'
902     // will be true before any user code can enter the CRT.
903     //
904     // CRT_REFACTOR TODO We should split this function into two parts:  one that
905     // does the initialization (without any check), and one that does nothing,
906     // but can be used to cause this object to be linked in.
907     if (!initialized)
908     {
909         // initialize global pointer to the current per-thread mbc information structure
910         __acrt_current_multibyte_data.initialize(&__acrt_initial_multibyte_data);
911 
912         // initialize mbc pointers
913         _mbcasemap.initialize_from_array(_mbcasemaps);
914         _mbctype  .initialize_from_array(_mbctypes);
915 
916         // initialize the multibyte globals
917         __acrt_ptd* const ptd_head = __acrt_getptd_head();
918         for (size_t i = 0; i != __crt_state_management::state_index_count; ++i)
919         {
920             setmbcp_internal(_MB_CP_ANSI, true, ptd_head + i, &__acrt_current_multibyte_data.dangerous_get_state_array()[i]);
921         }
922 
923         initialized = 1;
924     }
925 
926     return true;
927 }
928 
929 
930 /************************ Code Page info from NT/Win95 ********************
931 
932 
933 *** Code Page 932 ***
934 
935 0x824f  ;Fullwidth Digit Zero
936 0x8250  ;Fullwidth Digit One
937 0x8251  ;Fullwidth Digit Two
938 0x8252  ;Fullwidth Digit Three
939 0x8253  ;Fullwidth Digit Four
940 0x8254  ;Fullwidth Digit Five
941 0x8255  ;Fullwidth Digit Six
942 0x8256  ;Fullwidth Digit Seven
943 0x8257  ;Fullwidth Digit Eight
944 0x8258  ;Fullwidth Digit Nine
945 
946 0x8281  0x8260  ;Fullwidth Small A -> Fullwidth Capital A
947 0x8282  0x8261  ;Fullwidth Small B -> Fullwidth Capital B
948 0x8283  0x8262  ;Fullwidth Small C -> Fullwidth Capital C
949 0x8284  0x8263  ;Fullwidth Small D -> Fullwidth Capital D
950 0x8285  0x8264  ;Fullwidth Small E -> Fullwidth Capital E
951 0x8286  0x8265  ;Fullwidth Small F -> Fullwidth Capital F
952 0x8287  0x8266  ;Fullwidth Small G -> Fullwidth Capital G
953 0x8288  0x8267  ;Fullwidth Small H -> Fullwidth Capital H
954 0x8289  0x8268  ;Fullwidth Small I -> Fullwidth Capital I
955 0x828a  0x8269  ;Fullwidth Small J -> Fullwidth Capital J
956 0x828b  0x826a  ;Fullwidth Small K -> Fullwidth Capital K
957 0x828c  0x826b  ;Fullwidth Small L -> Fullwidth Capital L
958 0x828d  0x826c  ;Fullwidth Small M -> Fullwidth Capital M
959 0x828e  0x826d  ;Fullwidth Small N -> Fullwidth Capital N
960 0x828f  0x826e  ;Fullwidth Small O -> Fullwidth Capital O
961 0x8290  0x826f  ;Fullwidth Small P -> Fullwidth Capital P
962 0x8291  0x8270  ;Fullwidth Small Q -> Fullwidth Capital Q
963 0x8292  0x8271  ;Fullwidth Small R -> Fullwidth Capital R
964 0x8293  0x8272  ;Fullwidth Small S -> Fullwidth Capital S
965 0x8294  0x8273  ;Fullwidth Small T -> Fullwidth Capital T
966 0x8295  0x8274  ;Fullwidth Small U -> Fullwidth Capital U
967 0x8296  0x8275  ;Fullwidth Small V -> Fullwidth Capital V
968 0x8297  0x8276  ;Fullwidth Small W -> Fullwidth Capital W
969 0x8298  0x8277  ;Fullwidth Small X -> Fullwidth Capital X
970 0x8299  0x8278  ;Fullwidth Small Y -> Fullwidth Capital Y
971 0x829a  0x8279  ;Fullwidth Small Z -> Fullwidth Capital Z
972 
973 
974 *** Code Page 936 ***
975 
976 0xa3b0  ;Fullwidth Digit Zero
977 0xa3b1  ;Fullwidth Digit One
978 0xa3b2  ;Fullwidth Digit Two
979 0xa3b3  ;Fullwidth Digit Three
980 0xa3b4  ;Fullwidth Digit Four
981 0xa3b5  ;Fullwidth Digit Five
982 0xa3b6  ;Fullwidth Digit Six
983 0xa3b7  ;Fullwidth Digit Seven
984 0xa3b8  ;Fullwidth Digit Eight
985 0xa3b9  ;Fullwidth Digit Nine
986 
987 0xa3e1  0xa3c1  ;Fullwidth Small A -> Fullwidth Capital A
988 0xa3e2  0xa3c2  ;Fullwidth Small B -> Fullwidth Capital B
989 0xa3e3  0xa3c3  ;Fullwidth Small C -> Fullwidth Capital C
990 0xa3e4  0xa3c4  ;Fullwidth Small D -> Fullwidth Capital D
991 0xa3e5  0xa3c5  ;Fullwidth Small E -> Fullwidth Capital E
992 0xa3e6  0xa3c6  ;Fullwidth Small F -> Fullwidth Capital F
993 0xa3e7  0xa3c7  ;Fullwidth Small G -> Fullwidth Capital G
994 0xa3e8  0xa3c8  ;Fullwidth Small H -> Fullwidth Capital H
995 0xa3e9  0xa3c9  ;Fullwidth Small I -> Fullwidth Capital I
996 0xa3ea  0xa3ca  ;Fullwidth Small J -> Fullwidth Capital J
997 0xa3eb  0xa3cb  ;Fullwidth Small K -> Fullwidth Capital K
998 0xa3ec  0xa3cc  ;Fullwidth Small L -> Fullwidth Capital L
999 0xa3ed  0xa3cd  ;Fullwidth Small M -> Fullwidth Capital M
1000 0xa3ee  0xa3ce  ;Fullwidth Small N -> Fullwidth Capital N
1001 0xa3ef  0xa3cf  ;Fullwidth Small O -> Fullwidth Capital O
1002 0xa3f0  0xa3d0  ;Fullwidth Small P -> Fullwidth Capital P
1003 0xa3f1  0xa3d1  ;Fullwidth Small Q -> Fullwidth Capital Q
1004 0xa3f2  0xa3d2  ;Fullwidth Small R -> Fullwidth Capital R
1005 0xa3f3  0xa3d3  ;Fullwidth Small S -> Fullwidth Capital S
1006 0xa3f4  0xa3d4  ;Fullwidth Small T -> Fullwidth Capital T
1007 0xa3f5  0xa3d5  ;Fullwidth Small U -> Fullwidth Capital U
1008 0xa3f6  0xa3d6  ;Fullwidth Small V -> Fullwidth Capital V
1009 0xa3f7  0xa3d7  ;Fullwidth Small W -> Fullwidth Capital W
1010 0xa3f8  0xa3d8  ;Fullwidth Small X -> Fullwidth Capital X
1011 0xa3f9  0xa3d9  ;Fullwidth Small Y -> Fullwidth Capital Y
1012 0xa3fa  0xa3da  ;Fullwidth Small Z -> Fullwidth Capital Z
1013 
1014 
1015 *** Code Page 949 ***
1016 
1017 0xa3b0  ;Fullwidth Digit Zero
1018 0xa3b1  ;Fullwidth Digit One
1019 0xa3b2  ;Fullwidth Digit Two
1020 0xa3b3  ;Fullwidth Digit Three
1021 0xa3b4  ;Fullwidth Digit Four
1022 0xa3b5  ;Fullwidth Digit Five
1023 0xa3b6  ;Fullwidth Digit Six
1024 0xa3b7  ;Fullwidth Digit Seven
1025 0xa3b8  ;Fullwidth Digit Eight
1026 0xa3b9  ;Fullwidth Digit Nine
1027 
1028 0xa3e1  0xa3c1  ;Fullwidth Small A -> Fullwidth Capital A
1029 0xa3e2  0xa3c2  ;Fullwidth Small B -> Fullwidth Capital B
1030 0xa3e3  0xa3c3  ;Fullwidth Small C -> Fullwidth Capital C
1031 0xa3e4  0xa3c4  ;Fullwidth Small D -> Fullwidth Capital D
1032 0xa3e5  0xa3c5  ;Fullwidth Small E -> Fullwidth Capital E
1033 0xa3e6  0xa3c6  ;Fullwidth Small F -> Fullwidth Capital F
1034 0xa3e7  0xa3c7  ;Fullwidth Small G -> Fullwidth Capital G
1035 0xa3e8  0xa3c8  ;Fullwidth Small H -> Fullwidth Capital H
1036 0xa3e9  0xa3c9  ;Fullwidth Small I -> Fullwidth Capital I
1037 0xa3ea  0xa3ca  ;Fullwidth Small J -> Fullwidth Capital J
1038 0xa3eb  0xa3cb  ;Fullwidth Small K -> Fullwidth Capital K
1039 0xa3ec  0xa3cc  ;Fullwidth Small L -> Fullwidth Capital L
1040 0xa3ed  0xa3cd  ;Fullwidth Small M -> Fullwidth Capital M
1041 0xa3ee  0xa3ce  ;Fullwidth Small N -> Fullwidth Capital N
1042 0xa3ef  0xa3cf  ;Fullwidth Small O -> Fullwidth Capital O
1043 0xa3f0  0xa3d0  ;Fullwidth Small P -> Fullwidth Capital P
1044 0xa3f1  0xa3d1  ;Fullwidth Small Q -> Fullwidth Capital Q
1045 0xa3f2  0xa3d2  ;Fullwidth Small R -> Fullwidth Capital R
1046 0xa3f3  0xa3d3  ;Fullwidth Small S -> Fullwidth Capital S
1047 0xa3f4  0xa3d4  ;Fullwidth Small T -> Fullwidth Capital T
1048 0xa3f5  0xa3d5  ;Fullwidth Small U -> Fullwidth Capital U
1049 0xa3f6  0xa3d6  ;Fullwidth Small V -> Fullwidth Capital V
1050 0xa3f7  0xa3d7  ;Fullwidth Small W -> Fullwidth Capital W
1051 0xa3f8  0xa3d8  ;Fullwidth Small X -> Fullwidth Capital X
1052 0xa3f9  0xa3d9  ;Fullwidth Small Y -> Fullwidth Capital Y
1053 0xa3fa  0xa3da  ;Fullwidth Small Z -> Fullwidth Capital Z
1054 
1055 
1056 *** Code Page 950 ***
1057 
1058 0xa2af  ;Fullwidth Digit Zero
1059 0xa2b0  ;Fullwidth Digit One
1060 0xa2b1  ;Fullwidth Digit Two
1061 0xa2b2  ;Fullwidth Digit Three
1062 0xa2b3  ;Fullwidth Digit Four
1063 0xa2b4  ;Fullwidth Digit Five
1064 0xa2b5  ;Fullwidth Digit Six
1065 0xa2b6  ;Fullwidth Digit Seven
1066 0xa2b7  ;Fullwidth Digit Eight
1067 0xa2b8  ;Fullwidth Digit Nine
1068 
1069 0xa2e9  0xa2cf  ;Fullwidth Small A -> Fullwidth Capital A
1070 0xa2ea  0xa2d0  ;Fullwidth Small B -> Fullwidth Capital B
1071 0xa2eb  0xa2d1  ;Fullwidth Small C -> Fullwidth Capital C
1072 0xa2ec  0xa2d2  ;Fullwidth Small D -> Fullwidth Capital D
1073 0xa2ed  0xa2d3  ;Fullwidth Small E -> Fullwidth Capital E
1074 0xa2ee  0xa2d4  ;Fullwidth Small F -> Fullwidth Capital F
1075 0xa2ef  0xa2d5  ;Fullwidth Small G -> Fullwidth Capital G
1076 0xa2f0  0xa2d6  ;Fullwidth Small H -> Fullwidth Capital H
1077 0xa2f1  0xa2d7  ;Fullwidth Small I -> Fullwidth Capital I
1078 0xa2f2  0xa2d8  ;Fullwidth Small J -> Fullwidth Capital J
1079 0xa2f3  0xa2d9  ;Fullwidth Small K -> Fullwidth Capital K
1080 0xa2f4  0xa2da  ;Fullwidth Small L -> Fullwidth Capital L
1081 0xa2f5  0xa2db  ;Fullwidth Small M -> Fullwidth Capital M
1082 0xa2f6  0xa2dc  ;Fullwidth Small N -> Fullwidth Capital N
1083 0xa2f7  0xa2dd  ;Fullwidth Small O -> Fullwidth Capital O
1084 0xa2f8  0xa2de  ;Fullwidth Small P -> Fullwidth Capital P
1085 0xa2f9  0xa2df  ;Fullwidth Small Q -> Fullwidth Capital Q
1086 0xa2fa  0xa2e0  ;Fullwidth Small R -> Fullwidth Capital R
1087 0xa2fb  0xa2e1  ;Fullwidth Small S -> Fullwidth Capital S
1088 0xa2fc  0xa2e2  ;Fullwidth Small T -> Fullwidth Capital T
1089 0xa2fd  0xa2e3  ;Fullwidth Small U -> Fullwidth Capital U
1090 0xa2fe  0xa2e4  ;Fullwidth Small V -> Fullwidth Capital V
1091 
1092 ...Note break in sequence...
1093 
1094 0xa340  0xa2e5  ;Fullwidth Small W -> Fullwidth Capital W
1095 0xa341  0xa2e6  ;Fullwidth Small X -> Fullwidth Capital X
1096 0xa342  0xa2e7  ;Fullwidth Small Y -> Fullwidth Capital Y
1097 0xa343  0xa2e8  ;Fullwidth Small Z -> Fullwidth Capital Z
1098 
1099 
1100 *** Code Page 1361 ***
1101 
1102 Not yet available (05/17/94)
1103 
1104 
1105 
1106 ****************************************************************************/
1107