1 /***
2 *mbctype.c - MBCS table used by the functions that test for types of char
3 *
4 * Copyright (c) Microsoft Corporation. All rights reserved.
5 *
6 *Purpose:
7 * table used to determine the type of char
8 *
9 *******************************************************************************/
10 #include <corecrt_internal.h>
11 #include <locale.h>
12 #include <corecrt_internal_mbstring.h>
13 #include <mbctype.h>
14 #include <winnls.h>
15
16 #ifndef CRTDLL
17
18 _CRT_LINKER_FORCE_INCLUDE(__acrt_multibyte_initializer);
19
20 #endif /* CRTDLL */
21
22 #define _CHINESE_SIMP_CP 936
23 #define _KOREAN_WANGSUNG_CP 949
24 #define _CHINESE_TRAD_CP 950
25 #define _KOREAN_JOHAB_CP 1361
26
27 #define NUM_CHARS 257 /* -1 through 255 */
28
29 #define NUM_CTYPES 4 /* table contains 4 types of info */
30 #define MAX_RANGES 8 /* max number of ranges needed given languages so far */
31
32 /* character type info in ranges (pair of low/high), zeros indicate end */
33 typedef struct
34 {
35 int code_page;
36 unsigned short mbulinfo[NUM_ULINFO];
37 unsigned char rgrange[NUM_CTYPES][MAX_RANGES];
38 } code_page_info;
39
40 extern "C"
41 {
42 __crt_multibyte_data __acrt_initial_multibyte_data =
43 {
44 0, /* refcount */
45 CP_ACP, /* mbcodepage: _MB_CP_ANSI */
46 0, /* ismbcodepage */
47 { 0, 0, 0, 0, 0, 0 }, /* mbulinfo[6] */
48 { /* mbctype[257] */
49 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
50 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
51 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
52 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
53 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
54 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
55 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
56 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x00, 0x00, 0x00, 0x00,
57 0x00, 0x00, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
58 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
59 0x20, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, 0x00 /* rest is zero */
60 },
61 { /* mbcasemap[256] */
62 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
63 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
64 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
65 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
66 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
67 0x00, 0x00, 0x00, 0x00, 0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
68 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73,
69 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x00, 0x00,
70 0x00, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b,
71 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
72 0x58, 0x59, 0x5a, 0x00, 0x00, 0x00, 0x00, 0x00 /* rest is zero */
73 },
74 nullptr /* mblocalename */
75 };
76
77 #define _MBCTYPE_DEFAULT \
78 { \
79 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
80 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
81 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
82 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
83 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
84 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, \
85 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, \
86 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x00, 0x00, 0x00, 0x00, \
87 0x00, 0x00, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, \
88 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, \
89 0x20, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, 0x00 /* rest is zero */ \
90 }
91
92 /* MBCS ctype array */
93 static unsigned char _mbctypes[__crt_state_management::state_index_count][NUM_CHARS] =
94 {
95 _MBCTYPE_DEFAULT
96 #ifdef _CRT_GLOBAL_STATE_ISOLATION
97 ,_MBCTYPE_DEFAULT
98 #endif
99 };
100
101 #define _MBCASEMAP_DEFAULT \
102 { \
103 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
104 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
105 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
106 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
107 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
108 0x00, 0x00, 0x00, 0x00, 0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, \
109 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, \
110 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x00, 0x00, \
111 0x00, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, \
112 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, \
113 0x58, 0x59, 0x5a, 0x00, 0x00, 0x00, 0x00, 0x00 /* rest is zero */ \
114 }
115
116 static unsigned char _mbcasemaps[__crt_state_management::state_index_count][256] =
117 {
118 _MBCASEMAP_DEFAULT
119 #ifdef _CRT_GLOBAL_STATE_ISOLATION
120 ,_MBCASEMAP_DEFAULT
121 #endif
122 };
123
124 /* global pointer to the multi-byte case type (i.e. upper or lower or n/a) */
125 __crt_state_management::dual_state_global<unsigned char*> _mbctype;
126
127 /* global pointer to the multi-byte casemap */
128 __crt_state_management::dual_state_global<unsigned char*> _mbcasemap;
129
130 /* global pointer to the current per-thread mbc information structure. */
131 __crt_state_management::dual_state_global<__crt_multibyte_data*> __acrt_current_multibyte_data;
132 }
133
134 static int fSystemSet;
135
136 static char __rgctypeflag[NUM_CTYPES] = { _MS, _MP, _M1, _M2 };
137
138 static code_page_info __rgcode_page_info[] =
139 {
140 {
141 _KANJI_CP, /* Kanji (Japanese) Code Page */
142 { 0x8260, 0x8279, /* Full-Width Latin Upper Range 1 */
143 0x8281 - 0x8260, /* Full-Width Latin Case Difference 1 */
144
145 0x0000, 0x0000, /* Full-Width Latin Upper Range 2 */
146 0x0000 /* Full-Width Latin Case Difference 2 */
147 },
148 {
149 { 0xA6, 0xDF, 0, 0, 0, 0, 0, 0, }, /* Single Byte Ranges */
150 { 0xA1, 0xA5, 0, 0, 0, 0, 0, 0, }, /* Punctuation Ranges */
151 { 0x81, 0x9F, 0xE0, 0xFC, 0, 0, 0, 0, }, /* Lead Byte Ranges */
152 { 0x40, 0x7E, 0x80, 0xFC, 0, 0, 0, 0, }, /* Trail Byte Ranges */
153 }
154 },
155 {
156 _CHINESE_SIMP_CP, /* Chinese Simplified (PRC) Code Page */
157 { 0xA3C1, 0xA3DA, /* Full-Width Latin Upper Range 1 */
158 0xA3E1 - 0xA3C1, /* Full-Width Latin Case Difference 1 */
159
160 0x0000, 0x0000, /* Full-Width Latin Upper Range 2 */
161 0x0000 /* Full-Width Latin Case Difference 2 */
162 },
163 {
164 { 0, 0, 0, 0, 0, 0, 0, 0, }, /* Single Byte Ranges */
165 { 0, 0, 0, 0, 0, 0, 0, 0, }, /* Punctuation Ranges */
166 { 0x81, 0xFE, 0, 0, 0, 0, 0, 0, }, /* Lead Byte Ranges */
167 { 0x40, 0xFE, 0, 0, 0, 0, 0, 0, }, /* Trail Byte Ranges */
168 }
169 },
170 {
171 _KOREAN_WANGSUNG_CP, /* Wangsung (Korean) Code Page */
172 { 0xA3C1, 0xA3DA, /* Full-Width Latin Upper Range 1 */
173 0xA3E1 - 0xA3C1, /* Full-Width Latin Case Difference 1 */
174
175 0x0000, 0x0000, /* Full-Width Latin Upper Range 2 */
176 0x0000 /* Full-Width Latin Case Difference 2 */
177 },
178 {
179 { 0, 0, 0, 0, 0, 0, 0, 0, }, /* Single Byte Ranges */
180 { 0, 0, 0, 0, 0, 0, 0, 0, }, /* Punctuation Ranges */
181 { 0x81, 0xFE, 0, 0, 0, 0, 0, 0, }, /* Lead Byte Ranges */
182 { 0x41, 0xFE, 0, 0, 0, 0, 0, 0, }, /* Trail Byte Ranges */
183 }
184 },
185 {
186 _CHINESE_TRAD_CP, /* Chinese Traditional (Taiwan) Code Page */
187 { 0xA2CF, 0xA2E4, /* Full-Width Latin Upper Range 1 */
188 0xA2E9 - 0xA2CF, /* Full-Width Latin Case Difference 1 */
189
190 0xA2E5, 0xA2E8, /* Full-Width Latin Upper Range 2 */
191 0xA340 - 0XA2E5 /* Full-Width Latin Case Difference 2 */
192 },
193 {
194 { 0, 0, 0, 0, 0, 0, 0, 0, }, /* Single Byte Ranges */
195 { 0, 0, 0, 0, 0, 0, 0, 0, }, /* Punctuation Ranges */
196 { 0x81, 0xFE, 0, 0, 0, 0, 0, 0, }, /* Lead Byte Ranges */
197 { 0x40, 0x7E, 0xA1, 0xFE, 0, 0, 0, 0, }, /* Trail Byte Ranges */
198 }
199 },
200 {
201 _KOREAN_JOHAB_CP, /* Johab (Korean) Code Page */
202 { 0xDA51, 0xDA5E, /* Full-Width Latin Upper Range 1 */
203 0xDA71 - 0xDA51, /* Full-Width Latin Case Difference 1 */
204
205 0xDA5F, 0xDA6A, /* Full-Width Latin Upper Range 2 */
206 0xDA91 - 0xDA5F /* Full-Width Latin Case Difference 2 */
207 },
208 {
209 { 0, 0, 0, 0, 0, 0, 0, 0, }, /* Single Byte Ranges */
210 { 0, 0, 0, 0, 0, 0, 0, 0, }, /* Punctuation Ranges */
211 { 0x81, 0xD3, 0xD8, 0xDE, 0xE0, 0xF9, 0, 0, }, /* Lead Byte Ranges */
212 { 0x31, 0x7E, 0x81, 0xFE, 0, 0, 0, 0, }, /* Trail Byte Ranges */
213 }
214 }
215 };
216
217 #define JAPANSE_DEFAULT_LOCALE_NAME_INDEX 0
218 #define CHINESE_SIMPLIFIED_LOCALE_NAME_INDEX 1
219 #define KOREAN_DEFAULT_LOCALE_NAME_INDEX 2
220 #define CHINESE_TRADITIONAL_LOCALE_NAME_INDEX 3
221
222 const wchar_t* const _mb_locale_names[] =
223 {
224 L"ja-JP", /* JAPANSE_DEFAULT_LOCALE_NAME_INDEX */
225 L"zh-CN", /* CHINESE_SIMPLIFIED_LOCALE_NAME_INDEX */
226 L"ko-KR", /* KOREAN_DEFAULT_LOCALE_NAME_INDEX */
227 L"zh-TW", /* CHINESE_TRADITIONAL_LOCALE_NAME_INDEX */
228 };
229
230
231
__p__mbctype()232 extern "C" unsigned char* __cdecl __p__mbctype()
233 {
234 return _mbctype.value();
235 }
236
__p__mbcasemap()237 extern "C" unsigned char* __cdecl __p__mbcasemap()
238 {
239 return _mbcasemap.value();
240 }
241
242
243
244 extern "C" int __cdecl _setmbcp_nolock(int, __crt_multibyte_data*);
245
246 static int getSystemCP (int);
247
248 /***
249 *setSBCS() - Set MB code page to SBCS.
250 *
251 *Purpose:
252 * Set MB code page to SBCS.
253 *Entry:
254 *
255 *Exit:
256 *
257 *Exceptions:
258 *
259 *******************************************************************************/
260
setSBCS(__crt_multibyte_data * ptmbci)261 static void setSBCS (__crt_multibyte_data* ptmbci)
262 {
263 int i;
264
265 /* set for single-byte code page */
266 for (i = 0; i < NUM_CHARS; i++)
267 ptmbci->mbctype[i] = 0;
268
269 /* code page has changed, set global flag */
270 ptmbci->mbcodepage = 0;
271
272 /* clear flag to indicate single-byte code */
273 ptmbci->ismbcodepage = 0;
274
275 ptmbci->mblocalename = nullptr;
276
277 for (i = 0; i < NUM_ULINFO; i++)
278 ptmbci->mbulinfo[i] = 0;
279
280 for ( i = 0 ; i < 257 ; i++ )
281 ptmbci->mbctype[i] = __acrt_initial_multibyte_data.mbctype[i];
282
283 for ( i = 0 ; i < 256 ; i++ )
284 ptmbci->mbcasemap[i] = __acrt_initial_multibyte_data.mbcasemap[i];
285 }
286
287 /***
288 *__acrt_update_thread_multibyte_data() - refresh the thread's mbc info
289 *
290 *Purpose:
291 * Update the current thread's reference to the multibyte character
292 * information to match the current global mbc info. Decrement the
293 * reference on the old mbc information struct and if this count is now
294 * zero (so that no threads are using it), free it.
295 *
296 *Entry:
297 *
298 *Exit:
299 * _getptd()->ptmbcinfo == current_multibyte_data (which should always be __acrt_current_multibyte_data)
300 *
301 *Exceptions:
302 *
303 *******************************************************************************/
304
update_thread_multibyte_data_internal(__acrt_ptd * const ptd,__crt_multibyte_data ** const current_multibyte_data)305 static __crt_multibyte_data* __cdecl update_thread_multibyte_data_internal(
306 __acrt_ptd* const ptd,
307 __crt_multibyte_data** const current_multibyte_data
308 ) throw()
309 {
310 __crt_multibyte_data* ptmbci = nullptr;
311
312 if (__acrt_should_sync_with_global_locale(ptd) || ptd->_locale_info == nullptr)
313 {
314 __acrt_lock(__acrt_multibyte_cp_lock);
315 __try
316 {
317 ptmbci = ptd->_multibyte_info;
318 if (ptmbci != *current_multibyte_data)
319 {
320 /*
321 * Decrement the reference count in the old mbc info structure
322 * and free it, if necessary
323 */
324 if (ptmbci != nullptr &&
325 InterlockedDecrement(&ptmbci->refcount) == 0 &&
326 ptmbci != &__acrt_initial_multibyte_data)
327 {
328 /*
329 * Free it
330 */
331 _free_crt(ptmbci);
332 }
333
334 /*
335 * Point to the current mbc info structure and increment its
336 * reference count.
337 */
338 ptmbci = ptd->_multibyte_info = *current_multibyte_data;
339 InterlockedIncrement(&ptmbci->refcount);
340 }
341 }
342 __finally
343 {
344 __acrt_unlock(__acrt_multibyte_cp_lock);
345 }
346 __endtry
347 }
348 else
349 {
350 ptmbci = ptd->_multibyte_info;
351 }
352
353 if (!ptmbci)
354 {
355 abort();
356 }
357
358 return ptmbci;
359 }
360
__acrt_update_thread_multibyte_data()361 extern "C" __crt_multibyte_data* __cdecl __acrt_update_thread_multibyte_data()
362 {
363 return update_thread_multibyte_data_internal(__acrt_getptd(), &__acrt_current_multibyte_data.value());
364 }
365
366 /***
367 *_setmbcp() - Set MBC data based on code page
368 *
369 *Purpose:
370 * Init MBC character type tables based on code page number. If
371 * given code page is supported, load that code page info into
372 * mbctype table. If not, query OS to find the information,
373 * otherwise set up table with single byte info.
374 *
375 * Multithread Notes: First, allocate an mbc information struct. Set the
376 * mbc info in the static vars and arrays as does the single-thread
377 * version. Then, copy this info into the new allocated struct and set
378 * the current mbc info pointer (__acrt_current_multibyte_data) to point to it.
379 *
380 *Entry:
381 * codepage - code page to initialize MBC table
382 * _MB_CP_OEM = use system OEM code page
383 * _MB_CP_ANSI = use system ANSI code page
384 * _MB_CP_SBCS = set to single byte 'code page'
385 *
386 *Exit:
387 * 0 = Success
388 * -1 = Error, code page not changed.
389 *
390 *Exceptions:
391 *
392 *******************************************************************************/
393
setmbcp_internal(int const requested_codepage,bool const is_for_crt_initialization,__acrt_ptd * const ptd,__crt_multibyte_data ** const current_multibyte_data)394 static int __cdecl setmbcp_internal(
395 int const requested_codepage,
396 bool const is_for_crt_initialization,
397 __acrt_ptd* const ptd,
398 __crt_multibyte_data** const current_multibyte_data
399 ) throw()
400 {
401 update_thread_multibyte_data_internal(ptd, current_multibyte_data);
402 int const system_codepage = getSystemCP(requested_codepage);
403
404 // If it's not a new codepage, just return success:
405 if (system_codepage == ptd->_multibyte_info->mbcodepage)
406 {
407 return 0;
408 }
409
410 // Always allocate space so that we don't have to take a lock for any update:
411 __crt_unique_heap_ptr<__crt_multibyte_data> mb_data(_malloc_crt_t(__crt_multibyte_data, 1));
412 if (!mb_data)
413 {
414 return -1;
415 }
416
417 // Initialize the new multibyte data structure from the current multibyte
418 // data structure for this thread, resetting the reference count (since it
419 // is not actually referenced by anything yet).
420 *mb_data.get() = *ptd->_multibyte_info;
421 mb_data.get()->refcount = 0;
422
423 // Actually initialize the new multibyte data using the new codepage:
424 // CRT_REFACTOR TODO _setmbcp_nolock is a terrible name.
425 int const setmbcp_status = _setmbcp_nolock(system_codepage, mb_data.get());
426 if (setmbcp_status == -1)
427 {
428 errno = EINVAL;
429 return -1;
430 }
431
432 // At this point, we have a valid, new set of multibyte data to swap in. If
433 // this is not the initial codepage initialization during process startup,
434 // we need to toggle the locale-changed state:
435 if (!is_for_crt_initialization)
436 {
437 __acrt_set_locale_changed();
438 }
439
440 if (InterlockedDecrement(&ptd->_multibyte_info->refcount) == 0 &&
441 ptd->_multibyte_info != &__acrt_initial_multibyte_data)
442 {
443 _free_crt(ptd->_multibyte_info);
444 }
445
446 // Update the multibyte codepage for this thread:
447 mb_data.get()->refcount = 1;
448 ptd->_multibyte_info = mb_data.detach();
449
450 // If this thread has its own locale, do not update the global codepage:
451 if (!__acrt_should_sync_with_global_locale(ptd))
452 {
453 return setmbcp_status;
454 }
455
456 // Otherwise, update the global codepage:
457 __acrt_lock_and_call(__acrt_multibyte_cp_lock, [&]
458 {
459 memcpy_s(_mbctype.value(), sizeof(_mbctypes[0]), ptd->_multibyte_info->mbctype, sizeof(ptd->_multibyte_info->mbctype));
460 memcpy_s(_mbcasemap.value(), sizeof(_mbcasemaps[0]), ptd->_multibyte_info->mbcasemap, sizeof(ptd->_multibyte_info->mbcasemap));
461
462 if (InterlockedDecrement(&(*current_multibyte_data)->refcount) == 0 &&
463 (*current_multibyte_data) != &__acrt_initial_multibyte_data)
464 {
465 _free_crt(*current_multibyte_data);
466 }
467
468 *current_multibyte_data = ptd->_multibyte_info;
469 InterlockedIncrement(&ptd->_multibyte_info->refcount);
470 });
471
472 if (is_for_crt_initialization)
473 {
474 __acrt_initial_locale_pointers.mbcinfo = *current_multibyte_data;
475 }
476
477 return setmbcp_status;
478 }
479
480 /* Enclaves only support built-in CP_ACP */
481 #ifdef _UCRT_ENCLAVE_BUILD
482
getSystemCP(int)483 static int getSystemCP(int)
484 {
485 return CP_ACP;
486 }
487
488
_setmbcp_nolock(int,__crt_multibyte_data * ptmbci)489 extern "C" int __cdecl _setmbcp_nolock(int, __crt_multibyte_data* ptmbci)
490 {
491 setSBCS(ptmbci);
492 return 0;
493 }
494
495 #else /* ^^^ _UCRT_ENCLAVE_BUILD ^^^ // vvv !_UCRT_ENCLAVE_BUILD vvv */
496
497 /***
498 *CPtoLocaleName() - Code page to locale name.
499 *
500 *Purpose:
501 * Some API calls want a locale name, so convert MB CP to appropriate locale name,
502 * and then API converts back to ANSI CP for that locale name.
503 *
504 *Entry:
505 * codepage - code page to convert
506 *Exit:
507 * returns appropriate locale name
508 * Returned locale names are stored in static structs, so they must not be deleted.
509 *
510 *Exceptions:
511 *
512 *******************************************************************************/
513
CPtoLocaleName(int codepage)514 static const wchar_t* CPtoLocaleName (int codepage)
515 {
516 switch (codepage) {
517 case 932:
518 return _mb_locale_names[JAPANSE_DEFAULT_LOCALE_NAME_INDEX];
519 case 936:
520 return _mb_locale_names[CHINESE_SIMPLIFIED_LOCALE_NAME_INDEX];
521 case 949:
522 return _mb_locale_names[KOREAN_DEFAULT_LOCALE_NAME_INDEX];
523 case 950:
524 return _mb_locale_names[CHINESE_TRADITIONAL_LOCALE_NAME_INDEX];
525 }
526
527 return 0;
528 }
529
530 /***
531 *getSystemCP - Get system default CP if requested.
532 *
533 *Purpose:
534 * Get system default CP if requested.
535 *
536 *Entry:
537 * codepage - user requested code page/world script
538 *
539 * Docs specify:
540 * _MB_CP_SBCS 0 - Use a single byte codepage
541 * _MB_CP_OEM -2 - use the OEMCP
542 * _MB_CP_ANSI -3 - use the ACP
543 * _MB_CP_LOCALE -4 - use the codepage for a previous setlocale call
544 * Codepage # - use the specified codepage (UTF-7 is disallowed)
545 * - 54936 and other interesting stateful codepages aren't
546 * - explicitly disallowed but I can't imagine them working right.
547 *
548 *Exit:
549 * requested code page
550 *
551 *Exceptions:
552 *
553 *******************************************************************************/
getSystemCP(int codepage)554 static int getSystemCP(int codepage)
555 {
556 _locale_t plocinfo = nullptr;
557 _LocaleUpdate _loc_update(plocinfo);
558 fSystemSet = 0;
559
560 /* get system code page values if requested */
561
562 if (codepage == _MB_CP_OEM)
563 {
564 fSystemSet = 1;
565 return GetOEMCP();
566 }
567 else if (codepage == _MB_CP_ANSI)
568 {
569 fSystemSet = 1;
570 return GetACP();
571 }
572 else if (codepage == _MB_CP_LOCALE)
573 {
574 fSystemSet = 1;
575 return _loc_update.GetLocaleT()->locinfo->_public._locale_lc_codepage;
576 }
577
578 return codepage;
579 }
580
581 /***
582 *setSBUpLow() - Set single byte range upper/lower mappings
583 *
584 *Purpose:
585 * Set single byte mapping for tolower/toupper.
586 * Basically this is ASCII-mapping plus a few if you're a lucky
587 * SBCS codepage.
588 * DBCS + UTF ranges > 0x7f are basically ignored.
589 *
590 *Entry:
591 *
592 *Exit:
593 *
594 *Exceptions:
595 *
596 *******************************************************************************/
597
setSBUpLow(__crt_multibyte_data * ptmbci)598 static void setSBUpLow (__crt_multibyte_data* ptmbci)
599 {
600 BYTE * pbPair;
601 UINT ich;
602 CPINFO cpInfo;
603 UCHAR sbVector[256];
604 UCHAR upVector[256];
605 UCHAR lowVector[256];
606 USHORT wVector[512];
607
608 // test if codepage exists
609 if (ptmbci->mbcodepage != CP_UTF8 && GetCPInfo(ptmbci->mbcodepage, &cpInfo) != 0)
610 {
611 // This code attempts to generate casing tables for characters 0-255
612 // For DBCS codepages that will be basically ASCII casing but won't help DBCS mapping.
613 // For SBCS codepages that will include the codepage-specific characters.
614 // Mappings do not appear to include Turkish-i variations.
615
616 // if so, create vector 0-255
617 for (ich = 0; ich < 256; ich++)
618 sbVector[ich] = (UCHAR) ich;
619
620 // set byte 0 and any leading byte value to non-alpha char ' '
621 sbVector[0] = (UCHAR)' ';
622 for (pbPair = &cpInfo.LeadByte[0]; *pbPair; pbPair += 2)
623 // make sure ich within a valid range
624 for (ich = *pbPair; ich <= *(pbPair + 1) && ich < 256; ich++)
625 sbVector[ich] = (UCHAR)' ';
626
627 // get char type for character vector
628
629 __acrt_GetStringTypeA(nullptr, CT_CTYPE1, (LPCSTR)sbVector, 256, wVector,
630 ptmbci->mbcodepage, FALSE);
631
632 // get lower case mappings for character vector
633
634 __acrt_LCMapStringA(nullptr, ptmbci->mblocalename, LCMAP_LOWERCASE, (LPCSTR)sbVector, 256,
635 (LPSTR)lowVector, 256, ptmbci->mbcodepage, FALSE);
636
637 // get upper case mappings for character vector
638
639 __acrt_LCMapStringA(nullptr, ptmbci->mblocalename, LCMAP_UPPERCASE, (LPCSTR)sbVector, 256,
640 (LPSTR)upVector, 256, ptmbci->mbcodepage, FALSE);
641
642 // set _SBUP, _SBLOW in ptmbci->mbctype if type is upper. lower
643 // set mapping array with lower or upper mapping value
644
645 for (ich = 0; ich < 256; ich++)
646 {
647 if (wVector[ich] & _UPPER)
648 {
649 // WARNING: +1 because the mbctype array starts with a -1 EOF character
650 ptmbci->mbctype[ich + 1] |= _SBUP;
651 ptmbci->mbcasemap[ich] = lowVector[ich];
652 }
653 else if (wVector[ich] & _LOWER)
654 {
655 // WARNING: +1 because the mbctype array starts with a -1 EOF character
656 ptmbci->mbctype[ich + 1] |= _SBLOW;
657 ptmbci->mbcasemap[ich] = upVector[ich];
658 }
659 else
660 ptmbci->mbcasemap[ich] = 0;
661 }
662 }
663 else
664 {
665 // Either no codepage or UTF-8 (which looks a lot like ASCII in the lower bits)
666 // Set 'A'-'Z' as upper, 'a'-'z' as lower (eg: ASCII casing)
667 for (ich = 0; ich < 256; ich++)
668 {
669 if (ich >= (UINT)'A' && ich <= (UINT)'Z')
670 {
671 // WARNING: +1 because the mbctype array starts with a -1 EOF character
672 ptmbci->mbctype[ich + 1] |= _SBUP;
673 ptmbci->mbcasemap[ich] = static_cast<unsigned char>(ich + ('a' - 'A'));
674 }
675 else if (ich >= (UINT)'a' && ich <= (UINT)'z')
676 {
677 // WARNING: +1 because the mbctype array starts with a -1 EOF character
678 ptmbci->mbctype[ich + 1] |= _SBLOW;
679 ptmbci->mbcasemap[ich] = static_cast<unsigned char>(ich - ('a' - 'A'));
680 }
681 else
682 ptmbci->mbcasemap[ich] = 0;
683 }
684 }
685 }
686
_setmbcp(int const codepage)687 extern "C" int __cdecl _setmbcp(int const codepage)
688 {
689 return setmbcp_internal(codepage, false, __acrt_getptd(), &__acrt_current_multibyte_data.value());
690 }
691
_setmbcp_nolock(int codepage,__crt_multibyte_data * ptmbci)692 extern "C" int __cdecl _setmbcp_nolock(int codepage, __crt_multibyte_data* ptmbci)
693 {
694 unsigned int icp;
695 unsigned int irg;
696 unsigned int ich;
697 unsigned char *rgptr;
698 CPINFO cpInfo;
699
700 codepage = getSystemCP(codepage);
701
702 /* user wants 'single-byte' MB code page */
703 if (codepage == _MB_CP_SBCS)
704 {
705 setSBCS(ptmbci);
706 return 0;
707 }
708
709 /* check for CRT code page info */
710 for (icp = 0;
711 icp < (sizeof(__rgcode_page_info) / sizeof(code_page_info));
712 icp++)
713 {
714 /* see if we have info for this code page */
715 if (__rgcode_page_info[icp].code_page == codepage)
716 {
717 /* clear the table */
718 for (ich = 0; ich < NUM_CHARS; ich++)
719 ptmbci->mbctype[ich] = 0;
720
721 /* for each type of info, load table */
722 for (irg = 0; irg < NUM_CTYPES; irg++)
723 {
724 /* go through all the ranges for each type of info */
725 for (rgptr = (unsigned char *)__rgcode_page_info[icp].rgrange[irg];
726 rgptr[0] && rgptr[1];
727 rgptr += 2)
728 {
729 /* set the type for every character in range */
730 for (ich = rgptr[0]; ich <= rgptr[1] && ich < 256; ich++)
731 ptmbci->mbctype[ich + 1] |= __rgctypeflag[irg];
732 }
733 }
734 /* code page has changed */
735 ptmbci->mbcodepage = codepage;
736 /* all the code pages we keep info for are truly multibyte */
737 ptmbci->ismbcodepage = 1;
738 ptmbci->mblocalename = CPtoLocaleName(ptmbci->mbcodepage);
739 for (irg = 0; irg < NUM_ULINFO; irg++)
740 {
741 ptmbci->mbulinfo[irg] = __rgcode_page_info[icp].mbulinfo[irg];
742 }
743
744 /* return success */
745 setSBUpLow(ptmbci);
746 return 0;
747 }
748 }
749
750 /* verify codepage validity */
751 // Unclear why UTF7 is excluded yet stateful and other complex encodings are not
752 if (codepage == 0 || codepage == CP_UTF7 || !IsValidCodePage((WORD)codepage))
753 {
754 /* return failure, code page not changed */
755 return -1;
756 }
757
758 // Special case for UTF-8
759 if (codepage == CP_UTF8)
760 {
761 ptmbci->mbcodepage = CP_UTF8;
762 ptmbci->mblocalename = nullptr;
763
764 // UTF-8 does not have lead or trail bytes in the terms
765 // the CRT thinks of it for DBCS codepages, so we'll
766 // clear the flags for all bytes.
767 // Note that this array is 257 bytes because there's a
768 // "-1" that is used someplaces for EOF. So this array
769 // is actually -1 based.
770 for (ich = 0; ich < NUM_ULINFO; ich++)
771 {
772 ptmbci->mbctype[ich] = 0;
773 }
774
775 // not really a multibyte code page, we'll have to test
776 // ptmbci->mbcodepage == CP_UTF8 when we use this structure.
777 ptmbci->ismbcodepage = 0;
778
779 // CJK encodings have some full-width mappings, but not here.
780 for (irg = 0; irg < NUM_ULINFO; irg++)
781 {
782 ptmbci->mbulinfo[irg] = 0;
783 }
784
785 setSBUpLow(ptmbci);
786
787 // return success
788 return 0;
789 }
790 /* code page not supported by CRT, try the OS */\
791 else if (GetCPInfo(codepage, &cpInfo) != 0)
792 {
793 BYTE *lbptr;
794
795 /* clear the table */
796 for (ich = 0; ich < NUM_CHARS; ich++)
797 {
798 ptmbci->mbctype[ich] = 0;
799 }
800
801 ptmbci->mbcodepage = codepage;
802 ptmbci->mblocalename = nullptr;
803
804 // Special case for DBCS where we know there may be a leadbyte/trailbyte pattern
805 if (cpInfo.MaxCharSize == 2)
806 {
807 /* LeadByte range always terminated by two 0's */
808 for (lbptr = cpInfo.LeadByte; *lbptr && *(lbptr + 1); lbptr += 2)
809 {
810 for (ich = *lbptr; ich <= *(lbptr + 1); ich++)
811 ptmbci->mbctype[ich + 1] |= _M1;
812 }
813
814 /* All chars > 1 must be considered valid trail bytes */
815 for (ich = 0x01; ich < 0xFF; ich++)
816 {
817 ptmbci->mbctype[ich + 1] |= _M2;
818 }
819
820 /* code page has changed */
821 ptmbci->mblocalename = CPtoLocaleName(ptmbci->mbcodepage);
822
823 /* really a multibyte code page */
824 ptmbci->ismbcodepage = 1;
825 }
826 else
827 {
828 /* single-byte code page */
829 ptmbci->ismbcodepage = 0;
830 }
831
832 for (irg = 0; irg < NUM_ULINFO; irg++)
833 {
834 ptmbci->mbulinfo[irg] = 0;
835 }
836
837 setSBUpLow(ptmbci);
838 /* return success */
839 return 0;
840 }
841
842
843 /* If system default call, don't fail - set to SBCS */
844 if (fSystemSet)
845 {
846 setSBCS(ptmbci);
847 return 0;
848 }
849
850 /* return failure, code page not changed */
851 return -1;
852 }
853
854 #endif /* _UCRT_ENCLAVE_BUILD */
855
856 /***
857 *_getmbcp() - Get the current MBC code page
858 *
859 *Purpose:
860 * Get code page value.
861 *Entry:
862 * none.
863 *Exit:
864 * return current MB codepage value.
865 *
866 *Exceptions:
867 *
868 *******************************************************************************/
869
_getmbcp()870 extern "C" int __cdecl _getmbcp()
871 {
872 _locale_t plocinfo = nullptr;
873 _LocaleUpdate _loc_update(plocinfo);
874 if ( _loc_update.GetLocaleT()->mbcinfo->ismbcodepage )
875 return _loc_update.GetLocaleT()->mbcinfo->mbcodepage;
876 else
877 return 0;
878 }
879
880
881 /***
882 *_initmbctable() - Set MB ctype table to initial default value.
883 *
884 *Purpose:
885 * Initialization.
886 *Entry:
887 * none.
888 *Exit:
889 * Returns 0 to indicate no error.
890 *Exceptions:
891 *
892 *******************************************************************************/
893
__acrt_initialize_multibyte()894 extern "C" bool __cdecl __acrt_initialize_multibyte()
895 {
896 static bool initialized = false;
897
898 // Synchronization note: it is not possible for a data race to occur here.
899 // In the CRT DLLs, this function is called during CRT startup, befor any
900 // user code using the CRT may run. In the static CRT, this function is
901 // called by a CRT initializer (at the top of this file), so 'initialized'
902 // will be true before any user code can enter the CRT.
903 //
904 // CRT_REFACTOR TODO We should split this function into two parts: one that
905 // does the initialization (without any check), and one that does nothing,
906 // but can be used to cause this object to be linked in.
907 if (!initialized)
908 {
909 // initialize global pointer to the current per-thread mbc information structure
910 __acrt_current_multibyte_data.initialize(&__acrt_initial_multibyte_data);
911
912 // initialize mbc pointers
913 _mbcasemap.initialize_from_array(_mbcasemaps);
914 _mbctype .initialize_from_array(_mbctypes);
915
916 // initialize the multibyte globals
917 __acrt_ptd* const ptd_head = __acrt_getptd_head();
918 for (size_t i = 0; i != __crt_state_management::state_index_count; ++i)
919 {
920 setmbcp_internal(_MB_CP_ANSI, true, ptd_head + i, &__acrt_current_multibyte_data.dangerous_get_state_array()[i]);
921 }
922
923 initialized = 1;
924 }
925
926 return true;
927 }
928
929
930 /************************ Code Page info from NT/Win95 ********************
931
932
933 *** Code Page 932 ***
934
935 0x824f ;Fullwidth Digit Zero
936 0x8250 ;Fullwidth Digit One
937 0x8251 ;Fullwidth Digit Two
938 0x8252 ;Fullwidth Digit Three
939 0x8253 ;Fullwidth Digit Four
940 0x8254 ;Fullwidth Digit Five
941 0x8255 ;Fullwidth Digit Six
942 0x8256 ;Fullwidth Digit Seven
943 0x8257 ;Fullwidth Digit Eight
944 0x8258 ;Fullwidth Digit Nine
945
946 0x8281 0x8260 ;Fullwidth Small A -> Fullwidth Capital A
947 0x8282 0x8261 ;Fullwidth Small B -> Fullwidth Capital B
948 0x8283 0x8262 ;Fullwidth Small C -> Fullwidth Capital C
949 0x8284 0x8263 ;Fullwidth Small D -> Fullwidth Capital D
950 0x8285 0x8264 ;Fullwidth Small E -> Fullwidth Capital E
951 0x8286 0x8265 ;Fullwidth Small F -> Fullwidth Capital F
952 0x8287 0x8266 ;Fullwidth Small G -> Fullwidth Capital G
953 0x8288 0x8267 ;Fullwidth Small H -> Fullwidth Capital H
954 0x8289 0x8268 ;Fullwidth Small I -> Fullwidth Capital I
955 0x828a 0x8269 ;Fullwidth Small J -> Fullwidth Capital J
956 0x828b 0x826a ;Fullwidth Small K -> Fullwidth Capital K
957 0x828c 0x826b ;Fullwidth Small L -> Fullwidth Capital L
958 0x828d 0x826c ;Fullwidth Small M -> Fullwidth Capital M
959 0x828e 0x826d ;Fullwidth Small N -> Fullwidth Capital N
960 0x828f 0x826e ;Fullwidth Small O -> Fullwidth Capital O
961 0x8290 0x826f ;Fullwidth Small P -> Fullwidth Capital P
962 0x8291 0x8270 ;Fullwidth Small Q -> Fullwidth Capital Q
963 0x8292 0x8271 ;Fullwidth Small R -> Fullwidth Capital R
964 0x8293 0x8272 ;Fullwidth Small S -> Fullwidth Capital S
965 0x8294 0x8273 ;Fullwidth Small T -> Fullwidth Capital T
966 0x8295 0x8274 ;Fullwidth Small U -> Fullwidth Capital U
967 0x8296 0x8275 ;Fullwidth Small V -> Fullwidth Capital V
968 0x8297 0x8276 ;Fullwidth Small W -> Fullwidth Capital W
969 0x8298 0x8277 ;Fullwidth Small X -> Fullwidth Capital X
970 0x8299 0x8278 ;Fullwidth Small Y -> Fullwidth Capital Y
971 0x829a 0x8279 ;Fullwidth Small Z -> Fullwidth Capital Z
972
973
974 *** Code Page 936 ***
975
976 0xa3b0 ;Fullwidth Digit Zero
977 0xa3b1 ;Fullwidth Digit One
978 0xa3b2 ;Fullwidth Digit Two
979 0xa3b3 ;Fullwidth Digit Three
980 0xa3b4 ;Fullwidth Digit Four
981 0xa3b5 ;Fullwidth Digit Five
982 0xa3b6 ;Fullwidth Digit Six
983 0xa3b7 ;Fullwidth Digit Seven
984 0xa3b8 ;Fullwidth Digit Eight
985 0xa3b9 ;Fullwidth Digit Nine
986
987 0xa3e1 0xa3c1 ;Fullwidth Small A -> Fullwidth Capital A
988 0xa3e2 0xa3c2 ;Fullwidth Small B -> Fullwidth Capital B
989 0xa3e3 0xa3c3 ;Fullwidth Small C -> Fullwidth Capital C
990 0xa3e4 0xa3c4 ;Fullwidth Small D -> Fullwidth Capital D
991 0xa3e5 0xa3c5 ;Fullwidth Small E -> Fullwidth Capital E
992 0xa3e6 0xa3c6 ;Fullwidth Small F -> Fullwidth Capital F
993 0xa3e7 0xa3c7 ;Fullwidth Small G -> Fullwidth Capital G
994 0xa3e8 0xa3c8 ;Fullwidth Small H -> Fullwidth Capital H
995 0xa3e9 0xa3c9 ;Fullwidth Small I -> Fullwidth Capital I
996 0xa3ea 0xa3ca ;Fullwidth Small J -> Fullwidth Capital J
997 0xa3eb 0xa3cb ;Fullwidth Small K -> Fullwidth Capital K
998 0xa3ec 0xa3cc ;Fullwidth Small L -> Fullwidth Capital L
999 0xa3ed 0xa3cd ;Fullwidth Small M -> Fullwidth Capital M
1000 0xa3ee 0xa3ce ;Fullwidth Small N -> Fullwidth Capital N
1001 0xa3ef 0xa3cf ;Fullwidth Small O -> Fullwidth Capital O
1002 0xa3f0 0xa3d0 ;Fullwidth Small P -> Fullwidth Capital P
1003 0xa3f1 0xa3d1 ;Fullwidth Small Q -> Fullwidth Capital Q
1004 0xa3f2 0xa3d2 ;Fullwidth Small R -> Fullwidth Capital R
1005 0xa3f3 0xa3d3 ;Fullwidth Small S -> Fullwidth Capital S
1006 0xa3f4 0xa3d4 ;Fullwidth Small T -> Fullwidth Capital T
1007 0xa3f5 0xa3d5 ;Fullwidth Small U -> Fullwidth Capital U
1008 0xa3f6 0xa3d6 ;Fullwidth Small V -> Fullwidth Capital V
1009 0xa3f7 0xa3d7 ;Fullwidth Small W -> Fullwidth Capital W
1010 0xa3f8 0xa3d8 ;Fullwidth Small X -> Fullwidth Capital X
1011 0xa3f9 0xa3d9 ;Fullwidth Small Y -> Fullwidth Capital Y
1012 0xa3fa 0xa3da ;Fullwidth Small Z -> Fullwidth Capital Z
1013
1014
1015 *** Code Page 949 ***
1016
1017 0xa3b0 ;Fullwidth Digit Zero
1018 0xa3b1 ;Fullwidth Digit One
1019 0xa3b2 ;Fullwidth Digit Two
1020 0xa3b3 ;Fullwidth Digit Three
1021 0xa3b4 ;Fullwidth Digit Four
1022 0xa3b5 ;Fullwidth Digit Five
1023 0xa3b6 ;Fullwidth Digit Six
1024 0xa3b7 ;Fullwidth Digit Seven
1025 0xa3b8 ;Fullwidth Digit Eight
1026 0xa3b9 ;Fullwidth Digit Nine
1027
1028 0xa3e1 0xa3c1 ;Fullwidth Small A -> Fullwidth Capital A
1029 0xa3e2 0xa3c2 ;Fullwidth Small B -> Fullwidth Capital B
1030 0xa3e3 0xa3c3 ;Fullwidth Small C -> Fullwidth Capital C
1031 0xa3e4 0xa3c4 ;Fullwidth Small D -> Fullwidth Capital D
1032 0xa3e5 0xa3c5 ;Fullwidth Small E -> Fullwidth Capital E
1033 0xa3e6 0xa3c6 ;Fullwidth Small F -> Fullwidth Capital F
1034 0xa3e7 0xa3c7 ;Fullwidth Small G -> Fullwidth Capital G
1035 0xa3e8 0xa3c8 ;Fullwidth Small H -> Fullwidth Capital H
1036 0xa3e9 0xa3c9 ;Fullwidth Small I -> Fullwidth Capital I
1037 0xa3ea 0xa3ca ;Fullwidth Small J -> Fullwidth Capital J
1038 0xa3eb 0xa3cb ;Fullwidth Small K -> Fullwidth Capital K
1039 0xa3ec 0xa3cc ;Fullwidth Small L -> Fullwidth Capital L
1040 0xa3ed 0xa3cd ;Fullwidth Small M -> Fullwidth Capital M
1041 0xa3ee 0xa3ce ;Fullwidth Small N -> Fullwidth Capital N
1042 0xa3ef 0xa3cf ;Fullwidth Small O -> Fullwidth Capital O
1043 0xa3f0 0xa3d0 ;Fullwidth Small P -> Fullwidth Capital P
1044 0xa3f1 0xa3d1 ;Fullwidth Small Q -> Fullwidth Capital Q
1045 0xa3f2 0xa3d2 ;Fullwidth Small R -> Fullwidth Capital R
1046 0xa3f3 0xa3d3 ;Fullwidth Small S -> Fullwidth Capital S
1047 0xa3f4 0xa3d4 ;Fullwidth Small T -> Fullwidth Capital T
1048 0xa3f5 0xa3d5 ;Fullwidth Small U -> Fullwidth Capital U
1049 0xa3f6 0xa3d6 ;Fullwidth Small V -> Fullwidth Capital V
1050 0xa3f7 0xa3d7 ;Fullwidth Small W -> Fullwidth Capital W
1051 0xa3f8 0xa3d8 ;Fullwidth Small X -> Fullwidth Capital X
1052 0xa3f9 0xa3d9 ;Fullwidth Small Y -> Fullwidth Capital Y
1053 0xa3fa 0xa3da ;Fullwidth Small Z -> Fullwidth Capital Z
1054
1055
1056 *** Code Page 950 ***
1057
1058 0xa2af ;Fullwidth Digit Zero
1059 0xa2b0 ;Fullwidth Digit One
1060 0xa2b1 ;Fullwidth Digit Two
1061 0xa2b2 ;Fullwidth Digit Three
1062 0xa2b3 ;Fullwidth Digit Four
1063 0xa2b4 ;Fullwidth Digit Five
1064 0xa2b5 ;Fullwidth Digit Six
1065 0xa2b6 ;Fullwidth Digit Seven
1066 0xa2b7 ;Fullwidth Digit Eight
1067 0xa2b8 ;Fullwidth Digit Nine
1068
1069 0xa2e9 0xa2cf ;Fullwidth Small A -> Fullwidth Capital A
1070 0xa2ea 0xa2d0 ;Fullwidth Small B -> Fullwidth Capital B
1071 0xa2eb 0xa2d1 ;Fullwidth Small C -> Fullwidth Capital C
1072 0xa2ec 0xa2d2 ;Fullwidth Small D -> Fullwidth Capital D
1073 0xa2ed 0xa2d3 ;Fullwidth Small E -> Fullwidth Capital E
1074 0xa2ee 0xa2d4 ;Fullwidth Small F -> Fullwidth Capital F
1075 0xa2ef 0xa2d5 ;Fullwidth Small G -> Fullwidth Capital G
1076 0xa2f0 0xa2d6 ;Fullwidth Small H -> Fullwidth Capital H
1077 0xa2f1 0xa2d7 ;Fullwidth Small I -> Fullwidth Capital I
1078 0xa2f2 0xa2d8 ;Fullwidth Small J -> Fullwidth Capital J
1079 0xa2f3 0xa2d9 ;Fullwidth Small K -> Fullwidth Capital K
1080 0xa2f4 0xa2da ;Fullwidth Small L -> Fullwidth Capital L
1081 0xa2f5 0xa2db ;Fullwidth Small M -> Fullwidth Capital M
1082 0xa2f6 0xa2dc ;Fullwidth Small N -> Fullwidth Capital N
1083 0xa2f7 0xa2dd ;Fullwidth Small O -> Fullwidth Capital O
1084 0xa2f8 0xa2de ;Fullwidth Small P -> Fullwidth Capital P
1085 0xa2f9 0xa2df ;Fullwidth Small Q -> Fullwidth Capital Q
1086 0xa2fa 0xa2e0 ;Fullwidth Small R -> Fullwidth Capital R
1087 0xa2fb 0xa2e1 ;Fullwidth Small S -> Fullwidth Capital S
1088 0xa2fc 0xa2e2 ;Fullwidth Small T -> Fullwidth Capital T
1089 0xa2fd 0xa2e3 ;Fullwidth Small U -> Fullwidth Capital U
1090 0xa2fe 0xa2e4 ;Fullwidth Small V -> Fullwidth Capital V
1091
1092 ...Note break in sequence...
1093
1094 0xa340 0xa2e5 ;Fullwidth Small W -> Fullwidth Capital W
1095 0xa341 0xa2e6 ;Fullwidth Small X -> Fullwidth Capital X
1096 0xa342 0xa2e7 ;Fullwidth Small Y -> Fullwidth Capital Y
1097 0xa343 0xa2e8 ;Fullwidth Small Z -> Fullwidth Capital Z
1098
1099
1100 *** Code Page 1361 ***
1101
1102 Not yet available (05/17/94)
1103
1104
1105
1106 ****************************************************************************/
1107