1 /*
2  * Copyright (c) 2000-2010, Yandex
3  *
4  * This file is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU Lesser Public License as published by
6  * the Free Software Foundation, either version 3 of the License, or
7  * (at your option) any later version.
8  *
9  * This file is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU Lesser Public License for more details.
13  * You should have received a copy of the GNU Lesser Public License
14  * along with Pire.  If not, see <http://www.gnu.org/licenses>.
15  */
16 
17 
18 #ifndef PIRE_STUB_UNIDATA_H_H
19 #define PIRE_STUB_UNIDATA_H_H
20 
21 
22 
23 enum WC_TYPE {
24 	// Category           // DefaultChar
25 	Lu_UPPER     =  1, // 'Ъ'
26 	Ll_LOWER     =  2, // 'ъ'
27 	Lt_TITLE     =  3, // 'Ъ'
28 	Lm_EXTENDER  =  4, // '-'
29 	Lm_LETTER    =  5, // 'ъ'
30 	Lo_OTHER     =  6, // '?'
31 	Lo_IDEOGRAPH =  7, // '?'
32 	Lo_KATAKANA  =  8, // '?'
33 	Lo_HIRAGANA  =  9, // '?'
34 	Lo_LEADING   = 10, // '?'
35 	Lo_VOWEL     = 11, // '?'
36 	Lo_TRAILING  = 12, // '?'
37 
38 	Mn_NONSPACING= 13, // '`'
39 	Me_ENCLOSING = 14, // '`'
40 	Mc_SPACING   = 15, // '`'
41 
42 	Nd_DIGIT     = 16, // '9'           // convert to digit
43 	Nl_LETTER    = 17, // 'X'           // X,V,C,L,I ...
44 	Nl_IDEOGRAPH = 18, // '?'
45 	No_OTHER     = 19, // '9'
46 
47 	Zs_SPACE     = 20, // ' ' [\40\240] SPACE ... NO-BREAK SPACE (00A0)
48 	Zs_ZWSPACE   = 21, // ' '           // nothing ?
49 	Zl_LINE      = 22, // '\n'
50 	Zp_PARAGRAPH = 23, // '\n'
51 
52 	Cc_ASCII     = 24, // '\x1A'        // can not happen
53 	Cc_SPACE     = 25, // '\x1A'        // can not happen
54 	Cc_SEPARATOR = 26, // '\x1A'        // can not happen
55 
56 	Cf_FORMAT    = 27, // '\x1A'        // nothing ?
57 	Cf_JOIN      = 28, // '\x1A'        // nothing ?
58 	Cf_BIDI      = 29, // '\x1A'        // nothing ?
59 	Cf_ZWNBSP    = 30, // '\x1A'        // nothing ?
60 
61 	Cn_UNASSIGNED=  0, // '?'
62 	Co_PRIVATE   =  0, // '?'
63 	Cs_LOW       = 31, // '?'
64 	Cs_HIGH      = 32, // '?'
65 
66 	Pd_DASH      = 33, // '-'
67 	Pd_HYPHEN    = 34, // '-' [-]       HYPHEN-MINUS
68 	Ps_START     = 35, // '(' [([{]     LEFT PARENTHESIS ... LEFT CURLY BRACKET
69 	Ps_QUOTE     = 36, // '"'
70 	Pe_END       = 37, // ')' [)]}]     RIGHT PARENTHESIS ... RIGHT CURLY BRACKET
71 	Pe_QUOTE     = 38, // '"'
72 	Pi_QUOTE     = 39, // '"'
73 	Pf_QUOTE     = 40, // '"'
74 	Pc_CONNECTOR = 41, // '_' [_]       LOW LINE
75 	Po_OTHER     = 42, // '*' [#%&*/@\] NUMBER SIGN ... REVERSE SOLIDUS
76 	Po_QUOTE     = 43, // '"' ["]       QUOTATION MARK
77 	Po_TERMINAL  = 44, // '.' [!,.:;?]  EXCLAMATION MARK ... QUESTION MARK
78 	Po_EXTENDER  = 45, // '-' [№]       MIDDLE DOT (00B7)
79 	Po_HYPHEN    = 46, // '-'
80 
81 	Sm_MATH      = 47, // '=' [+<=>|~]  PLUS SIGN ... TILDE
82 	Sm_MINUS     = 48, // '-'
83 	Sc_CURRENCY  = 49, // '$' [$]       DOLLAR SIGN
84 	Sk_MODIFIER  = 50, // '`' [^`]      CIRCUMFLEX ACCENT ... GRAVE ACCENT
85 	So_OTHER     = 51, // '°' [°]       DEGREE SIGN (00B0)
86 
87 	Ps_SINGLE_QUOTE = 52, // '\'' [']   OPENING SINGLE QUOTE
88 	Pe_SINGLE_QUOTE = 53, // '\'' [']   CLOSING SINGLE QUOTE
89 	Pi_SINGLE_QUOTE = 54, // '\'' [']   INITIAL SINGLE QUOTE
90 	Pf_SINGLE_QUOTE = 55, // '\'' [']   FINAL SINGLE QUOTE
91 	Po_SINGLE_QUOTE = 56, // '\'' [']   APOSTROPHE and PRIME
92 
93 	CCL_NUM      = 57,
94 	CCL_MASK     = 0x3F,
95 
96 	TO_LOWER     = 1<< 6,
97 	TO_UPPER     = 1<< 7,
98 	TO_TITLE     = 1<< 8,
99 
100 	IS_XDIGIT    = 1<< 9,
101 	IS_DIGIT     = 1<<10,
102 	IS_NONBREAK  = 1<<11,
103 
104 	IS_PRIVATE   = 1<<12,
105 	IS_ORDERED   = 1<<13,
106 
107 	IS_COMPAT    = 1<<14,
108 	IS_CANON     = 1<<15,
109 
110 	BIDI_OFFSET   =  16,
111 	SVAL_OFFSET   =  22,
112 };
113 
114 const size_t DEFCHAR_BUF = 58; // CCL_NUM + 1
115 
116 extern const ui32 unicode_types[];
117 extern const wchar32 decomp_mapping[];
118 extern const ui32 *unicode_pages[];
119 
120 extern const unsigned DECOMP_OFFSET;
121 extern const unsigned DECOMP_MASK;
122 extern const unsigned LENGTH_OFFSET;
123 extern const unsigned LENGTH_MASK;
124 extern const unsigned TYPES_OFFSET;
125 extern const unsigned TYPES_MASK;
126 
127 #define _(i) (ULL(1)<<(i))
128 
_runeinfo(wchar32 ch)129 ui32 _runeinfo(wchar32 ch)
130 {
131 	if (ch > 0xFFFF)
132 		return _runeinfo(0xE001);//as characters from Private Use Zone
133 	return unicode_pages[(ch>>5)&0x7FF][ch&0x1F];
134 }
wc_info(wchar32 ch)135 ui32 wc_info(wchar32 ch)
136 {
137 	return unicode_types[(_runeinfo(ch)>>TYPES_OFFSET) & TYPES_MASK];
138 }
wc_type(wchar32 ch)139 WC_TYPE wc_type(wchar32 ch)
140 {
141 	return (WC_TYPE)(wc_info(ch) & CCL_MASK);
142 }
get_decomp_mapping(wchar32 ch,const wchar32 * & decomp_p,unsigned & decomp_len)143 unsigned get_decomp_mapping(wchar32 ch, const wchar32 *&decomp_p, unsigned &decomp_len)
144 {
145 	ui32 info = _runeinfo(ch);
146 	decomp_len = (info>>LENGTH_OFFSET)&LENGTH_MASK;
147 	decomp_p = &decomp_mapping[(info>>DECOMP_OFFSET) & DECOMP_MASK];
148 	return decomp_len;
149 }
wc_istype(wchar32 ch,ui64 type_bits)150 bool wc_istype(wchar32 ch, ui64 type_bits)
151 {
152 	return (_(wc_type(ch)) & type_bits) != 0;
153 }
154 
155 // all usefull properties
156 
is_unicode_space(wchar32 ch)157 bool is_unicode_space(wchar32 ch) // is_space without \n,\r,\v,\f,\40,\t
158 {
159 	return wc_istype(ch, _(Zs_SPACE)|_(Zs_ZWSPACE)|_(Zl_LINE)|_(Zp_PARAGRAPH));
160 }
is_whitespace(wchar32 ch)161 bool is_whitespace(wchar32 ch)
162 {
163 	return wc_istype(ch, _(Cc_SPACE)|_(Zs_SPACE)|_(Zs_ZWSPACE)|_(Zl_LINE)|_(Zp_PARAGRAPH));
164 }
is_ascii_cntrl(wchar32 ch)165 bool is_ascii_cntrl(wchar32 ch)
166 {
167 	return wc_istype(ch, _(Cc_ASCII)|_(Cc_SPACE)|_(Cc_SEPARATOR));
168 }
is_bidi_cntrl(wchar32 ch)169 bool is_bidi_cntrl(wchar32 ch)
170 {
171 	return wc_istype(ch, _(Cf_BIDI));
172 }
is_join_cntrl(wchar32 ch)173 bool is_join_cntrl(wchar32 ch)
174 {
175 	return wc_istype(ch, _(Cf_JOIN));
176 }
is_format_cntrl(wchar32 ch)177 bool is_format_cntrl(wchar32 ch)
178 {
179 	return wc_istype(ch, _(Cf_FORMAT));
180 }
is_ignorable_cntrl(wchar32 ch)181 bool is_ignorable_cntrl(wchar32 ch)
182 {
183 	return wc_istype(ch, _(Cf_FORMAT)|_(Cf_JOIN)|_(Cf_BIDI)|_(Cf_ZWNBSP));
184 }
is_cntrl(wchar32 ch)185 bool is_cntrl(wchar32 ch)
186 {
187 	return wc_istype(ch,
188 		_(Cf_FORMAT)|_(Cf_JOIN)|_(Cf_BIDI)|_(Cf_ZWNBSP)|
189 		_(Cc_ASCII)|_(Cc_SPACE)|_(Cc_SEPARATOR)
190 	);
191 }
is_zerowidth(wchar32 ch)192 bool is_zerowidth(wchar32 ch)
193 {
194 	return wc_istype(ch, _(Cf_FORMAT)|_(Cf_JOIN)|_(Cf_BIDI)|_(Cf_ZWNBSP)|_(Zs_ZWSPACE));
195 }
is_line_sep(wchar32 ch)196 bool is_line_sep(wchar32 ch)
197 {
198 	return wc_istype(ch, _(Zl_LINE));
199 }
is_para_sep(wchar32 ch)200 bool is_para_sep(wchar32 ch)
201 {
202 	return wc_istype(ch, _(Zp_PARAGRAPH));
203 }
is_dash(wchar32 ch)204 bool is_dash(wchar32 ch)
205 {
206 	return wc_istype(ch, _(Pd_DASH)|_(Pd_HYPHEN)|_(Sm_MINUS));
207 }
is_hyphen(wchar32 ch)208 bool is_hyphen(wchar32 ch)
209 {
210 	return wc_istype(ch, _(Pd_HYPHEN)|_(Po_HYPHEN));
211 }
is_quotation(wchar32 ch)212 bool is_quotation(wchar32 ch)
213 {
214 	return wc_istype(ch, _(Po_QUOTE)|_(Ps_QUOTE)|_(Pe_QUOTE)|_(Pi_QUOTE)|_(Pf_QUOTE)|
215 		_(Po_SINGLE_QUOTE)|_(Ps_SINGLE_QUOTE)|_(Pe_SINGLE_QUOTE)|_(Pi_SINGLE_QUOTE)|_(Pf_SINGLE_QUOTE));
216 
217 }
is_terminal(wchar32 ch)218 bool is_terminal(wchar32 ch)
219 {
220 	return wc_istype(ch, _(Po_TERMINAL));
221 }
is_paired_punct(wchar32 ch)222 bool is_paired_punct(wchar32 ch)
223 {
224 	return wc_istype(ch, _(Ps_START)|_(Pe_END) |
225 		_(Ps_QUOTE)|_(Pe_QUOTE)|_(Pi_QUOTE)|_(Pf_QUOTE)|
226 		_(Ps_SINGLE_QUOTE)|_(Pe_SINGLE_QUOTE)|_(Pi_SINGLE_QUOTE)|_(Pf_SINGLE_QUOTE));
227 }
is_left_punct(wchar32 ch)228 bool is_left_punct(wchar32 ch)
229 {
230 	return wc_istype(ch, _(Ps_START)|_(Ps_QUOTE)|_(Ps_SINGLE_QUOTE));
231 }
is_right_punct(wchar32 ch)232 bool is_right_punct(wchar32 ch)
233 {
234 	return wc_istype(ch, _(Pe_END)|_(Pe_QUOTE)|_(Pe_SINGLE_QUOTE));
235 }
is_combining(wchar32 ch)236 bool is_combining(wchar32 ch)
237 {
238 	return wc_istype(ch, _(Mc_SPACING)|_(Mn_NONSPACING)|_(Me_ENCLOSING));
239 }
is_nonspacing(wchar32 ch)240 bool is_nonspacing(wchar32 ch)
241 {
242 	return wc_istype(ch, _(Mn_NONSPACING)|_(Me_ENCLOSING));
243 }
is_alphabetic(wchar32 ch)244 bool is_alphabetic(wchar32 ch)
245 {
246 	return wc_istype(ch, _(Lu_UPPER)|_(Ll_LOWER)|_(Lt_TITLE)|
247 		_(Lm_EXTENDER)|_(Lm_LETTER)|_(Lo_OTHER)|
248 		_(Nl_LETTER)
249 	);
250 }
is_ideographic(wchar32 ch)251 bool is_ideographic(wchar32 ch)
252 {
253 	return wc_istype(ch, _(Lo_IDEOGRAPH)|_(Nl_IDEOGRAPH));
254 }
is_katakana(wchar32 ch)255 bool is_katakana(wchar32 ch)
256 {
257 	return wc_istype(ch, _(Lo_KATAKANA));
258 }
is_hiragana(wchar32 ch)259 bool is_hiragana(wchar32 ch)
260 {
261 	return wc_istype(ch, _(Lo_HIRAGANA));
262 }
is_hangul_leading(wchar32 ch)263 bool is_hangul_leading(wchar32 ch)
264 {
265 	return wc_istype(ch, _(Lo_LEADING));
266 }
is_hangul_vowel(wchar32 ch)267 bool is_hangul_vowel(wchar32 ch)
268 {
269 	return wc_istype(ch, _(Lo_VOWEL));
270 }
is_hangul_trailing(wchar32 ch)271 bool is_hangul_trailing(wchar32 ch)
272 {
273 	return wc_istype(ch, _(Lo_TRAILING));
274 }
is_hexdigit(wchar32 ch)275 bool is_hexdigit(wchar32 ch)
276 {
277 	return (wc_info(ch) & IS_XDIGIT) != 0;
278 }
is_decdigit(wchar32 ch)279 bool is_decdigit(wchar32 ch)
280 {
281 	return wc_istype(ch, _(Nd_DIGIT));
282 }
is_numeric(wchar32 ch)283 bool is_numeric(wchar32 ch)
284 {
285 	return wc_istype(ch, _(Nd_DIGIT)|_(Nl_LETTER)|_(Nl_IDEOGRAPH)|_(No_OTHER));
286 }
is_currency(wchar32 ch)287 bool is_currency(wchar32 ch)
288 {
289 	return wc_istype(ch, _(Sc_CURRENCY));
290 }
is_math(wchar32 ch)291 bool is_math(wchar32 ch)
292 {
293 	return wc_istype(ch, _(Sm_MATH));
294 }
is_symbol(wchar32 ch)295 bool is_symbol(wchar32 ch)
296 {
297 	return wc_istype(ch, _(Sm_MATH)|_(Sm_MINUS)|_(Sc_CURRENCY)|_(Sk_MODIFIER)|_(So_OTHER));
298 }
is_idstart(wchar32 ch)299 bool is_idstart(wchar32 ch) // unicode
300 {
301 	return wc_istype(ch,
302 		_(Lu_UPPER)|_(Ll_LOWER)|_(Lt_TITLE)|_(Lm_EXTENDER)|_(Lm_LETTER)|
303 		_(Lo_OTHER)|_(Lo_IDEOGRAPH)|_(Lo_KATAKANA)|_(Lo_HIRAGANA)|
304 		_(Lo_LEADING)|_(Lo_VOWEL)|_(Lo_TRAILING)|
305 		_(Nl_LETTER)
306 	);
307 }
is_idignorable(wchar32 ch)308 bool is_idignorable(wchar32 ch)
309 {
310 	return is_ignorable_cntrl(ch);
311 }
is_idpart(wchar32 ch)312 bool is_idpart(wchar32 ch) // unicode
313 {
314 	return is_idignorable(ch) || is_idstart(ch) || wc_istype(ch,
315 		_(Mn_NONSPACING)|_(Mc_SPACING)|_(Nd_DIGIT)|_(Pc_CONNECTOR)
316 	);
317 }
is_nmstart(wchar32 ch)318 bool is_nmstart(wchar32 ch) // xml
319 {
320 	return ch == ':' || ch == '_' || ((wc_info(ch) & IS_COMPAT) == 0 &&
321 		wc_istype(ch,
322 			_(Lu_UPPER)|_(Ll_LOWER)|_(Lt_TITLE)|_(Lm_LETTER)|
323 			_(Lo_OTHER)|_(Lo_IDEOGRAPH)|_(Lo_KATAKANA)|_(Lo_HIRAGANA)|
324 			_(Lo_LEADING)|_(Lo_VOWEL)|_(Lo_TRAILING)|
325 			_(Nl_LETTER)
326 		));
327 }
is_nmchar(wchar32 ch)328 int is_nmchar(wchar32 ch) // xml
329 {
330 	return is_nmstart(ch) || ch == '.' || ch == '-' ||
331 		((wc_info(ch) & IS_COMPAT) == 0 &&
332 			wc_istype(ch,
333 				_(Lm_EXTENDER)|_(Po_EXTENDER)|
334 				_(Mc_SPACING)|_(Mn_NONSPACING)|_(Nd_DIGIT)|
335 				_(Nl_IDEOGRAPH)
336 			));
337 }
is_low_surrogate(wchar32 ch)338 bool is_low_surrogate(wchar32 ch)
339 {
340 	return wc_istype(ch, _(Cs_LOW));
341 }
is_high_surrogate(wchar32 ch)342 bool is_high_surrogate(wchar32 ch)
343 {
344 	return wc_istype(ch, _(Cs_HIGH));
345 }
is_nonbreak(wchar32 ch)346 bool is_nonbreak(wchar32 ch)
347 {
348 	return (wc_info(ch) & IS_NONBREAK) != 0;
349 }
is_private(wchar32 ch)350 bool is_private(wchar32 ch)
351 {
352 	return (wc_info(ch) & IS_PRIVATE) && !wc_istype(ch, _(Cs_HIGH));
353 }
is_unassigned(wchar32 ch)354 bool is_unassigned(wchar32 ch)
355 {
356 	int i = wc_info(ch);
357 	return ((i & 0x3F) == 0) && !(i & IS_PRIVATE);
358 }
is_private_high_surrogate(wchar32 ch)359 bool is_private_high_surrogate(wchar32 ch)
360 {
361 	return wc_istype(ch, _(Cs_HIGH)) && (wc_info(ch) & IS_PRIVATE);
362 }
is_composed(wchar32 ch)363 bool is_composed(wchar32 ch)
364 {
365 	return wc_info(ch) & (IS_COMPAT|IS_CANON) ? true : false;
366 }
is_canon_composed(wchar32 ch)367 bool is_canon_composed(wchar32 ch)
368 {
369 	return wc_info(ch) & IS_CANON ? true : false;
370 }
371 
372 // transformations
373 
to_lower(wchar32 ch)374 wchar32 to_lower(wchar32 ch)
375 {
376 	i32 i = wc_info(ch);
377 	return (wchar32)(ch + ((i & TO_LOWER) ? (i >> SVAL_OFFSET) : 0));
378 }
to_upper(wchar32 ch)379 wchar32 to_upper(wchar32 ch)
380 {
381 	i32 i = wc_info(ch);
382 	return (wchar32)(ch - ((i & TO_UPPER) ? (i >> SVAL_OFFSET) : 0));
383 }
to_title(wchar32 ch)384 wchar32 to_title(wchar32 ch)
385 {
386 	i32 i = wc_info(ch);
387 	wchar32 ret = ch;
388 	if (i & TO_TITLE) {
389 		if (wc_istype(ch, _(Lu_UPPER)))
390 			ret++;
391 		else if (wc_istype(ch, _(Ll_LOWER)))
392 			ret--;
393 	} else if (i & TO_UPPER) {
394 		ret = (wchar32)(ret - (i >> SVAL_OFFSET));
395 	}
396 	return ret;
397 }
to_digit(wchar32 ch)398 int to_digit(wchar32 ch)
399 {
400 	i32 i = wc_info(ch);
401 	return (i & IS_DIGIT) ? (i >> SVAL_OFFSET) : -1;
402 }
403 
404 // BIDI properties (C2_...)
405 
is_bidi_left(wchar32 ch)406 int is_bidi_left(wchar32 ch)    {return ((wc_info(ch) >> BIDI_OFFSET) & 15) == 1;}
is_bidi_right(wchar32 ch)407 int is_bidi_right(wchar32 ch)   {return ((wc_info(ch) >> BIDI_OFFSET) & 15) == 2;}
is_bidi_euronum(wchar32 ch)408 int is_bidi_euronum(wchar32 ch) {return ((wc_info(ch) >> BIDI_OFFSET) & 15) == 3;}
is_bidi_eurosep(wchar32 ch)409 int is_bidi_eurosep(wchar32 ch) {return ((wc_info(ch) >> BIDI_OFFSET) & 15) == 4;}
is_bidi_euroterm(wchar32 ch)410 int is_bidi_euroterm(wchar32 ch){return ((wc_info(ch) >> BIDI_OFFSET) & 15) == 5;}
is_bidi_arabnum(wchar32 ch)411 int is_bidi_arabnum(wchar32 ch) {return ((wc_info(ch) >> BIDI_OFFSET) & 15) == 6;}
is_bidi_commsep(wchar32 ch)412 int is_bidi_commsep(wchar32 ch) {return ((wc_info(ch) >> BIDI_OFFSET) & 15) == 7;}
is_bidi_blocksep(wchar32 ch)413 int is_bidi_blocksep(wchar32 ch){return ((wc_info(ch) >> BIDI_OFFSET) & 15) == 8;}
is_bidi_segmsep(wchar32 ch)414 int is_bidi_segmsep(wchar32 ch) {return ((wc_info(ch) >> BIDI_OFFSET) & 15) == 9;}
is_bidi_space(wchar32 ch)415 int is_bidi_space(wchar32 ch)   {return ((wc_info(ch) >> BIDI_OFFSET) & 15) == 10;}
is_bidi_neutral(wchar32 ch)416 int is_bidi_neutral(wchar32 ch) {return ((wc_info(ch) >> BIDI_OFFSET) & 15) == 11;}
is_bidi_notappl(wchar32 ch)417 int is_bidi_notappl(wchar32 ch) {return ((wc_info(ch) >> BIDI_OFFSET) & 15) == 0;}
418 
419 // C properties (C1_...)
420 
is_space(wchar32 ch)421 bool is_space(wchar32 ch) // == is_whitespace
422 {
423 	return is_whitespace(ch);
424 }
is_lower(wchar32 ch)425 bool is_lower(wchar32 ch)
426 {
427 	return wc_istype(ch, _(Ll_LOWER));
428 }
is_upper(wchar32 ch)429 bool is_upper(wchar32 ch)
430 {
431 	return wc_istype(ch, _(Lu_UPPER));
432 }
is_alpha(wchar32 ch)433 bool is_alpha(wchar32 ch)
434 {
435 	return wc_istype(ch,
436 		_(Lu_UPPER)|_(Ll_LOWER)|_(Lt_TITLE)|_(Lm_LETTER)|_(Lm_EXTENDER)|
437 		_(Lo_OTHER)|_(Lo_IDEOGRAPH)|_(Lo_KATAKANA)|_(Lo_HIRAGANA)|
438 		_(Lo_LEADING)|_(Lo_VOWEL)|_(Lo_TRAILING)
439 	);
440 }
is_alnum(wchar32 ch)441 bool is_alnum(wchar32 ch)
442 {
443 	return wc_istype(ch,
444 		_(Lu_UPPER)|_(Ll_LOWER)|_(Lt_TITLE)|_(Lm_LETTER)|_(Lm_EXTENDER)|
445 		_(Lo_OTHER)|_(Lo_IDEOGRAPH)|_(Lo_KATAKANA)|_(Lo_HIRAGANA)|
446 		_(Lo_LEADING)|_(Lo_VOWEL)|_(Lo_TRAILING)|
447 		_(Nd_DIGIT)|_(Nl_LETTER)|_(Nl_IDEOGRAPH)|_(No_OTHER)
448 	);
449 }
is_punct(wchar32 ch)450 bool is_punct(wchar32 ch)
451 {
452 	return wc_istype(ch,
453 		_(Pd_DASH)|
454 		_(Pd_HYPHEN)|_(Ps_START)|_(Ps_QUOTE)|_(Pe_END)|_(Pe_QUOTE)|_(Pc_CONNECTOR)|
455 		_(Po_OTHER)|_(Po_QUOTE)|_(Po_TERMINAL)|_(Po_EXTENDER)|_(Po_HYPHEN)|
456 		_(Pi_QUOTE)|_(Pf_QUOTE)
457 	);
458 }
is_xdigit(wchar32 ch)459 bool is_xdigit(wchar32 ch) {return is_hexdigit(ch);}
is_digit(wchar32 ch)460 bool is_digit(wchar32 ch) {return is_decdigit(ch);}
is_graph(wchar32 ch)461 bool is_graph(wchar32 ch) {return is_alnum(ch)||is_punct(ch)||is_symbol(ch);}
is_blank(wchar32 ch)462 bool is_blank(wchar32 ch)
463 {
464 	return wc_istype(ch, _(Zs_SPACE)|_(Zs_ZWSPACE)) || ch == '\t';
465 }
is_print(wchar32 ch)466 bool is_print(wchar32 ch) {return is_alnum(ch)||is_punct(ch)||is_symbol(ch)||is_blank(ch);}
467 
468 #undef _
469 
470 
471 #define UCS2_SURROGATE_CHAR 0x046C // CAPITAL IOTIFIED BIG YUS
472 
473 #endif
474