1 /*
2 * Copyright (c) 2000-2010, Yandex
3 *
4 * This file is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU Lesser Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * This file is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU Lesser Public License for more details.
13 * You should have received a copy of the GNU Lesser Public License
14 * along with Pire. If not, see <http://www.gnu.org/licenses>.
15 */
16
17
18 #ifndef PIRE_STUB_UNIDATA_H_H
19 #define PIRE_STUB_UNIDATA_H_H
20
21
22
23 enum WC_TYPE {
24 // Category // DefaultChar
25 Lu_UPPER = 1, // 'Ъ'
26 Ll_LOWER = 2, // 'ъ'
27 Lt_TITLE = 3, // 'Ъ'
28 Lm_EXTENDER = 4, // '-'
29 Lm_LETTER = 5, // 'ъ'
30 Lo_OTHER = 6, // '?'
31 Lo_IDEOGRAPH = 7, // '?'
32 Lo_KATAKANA = 8, // '?'
33 Lo_HIRAGANA = 9, // '?'
34 Lo_LEADING = 10, // '?'
35 Lo_VOWEL = 11, // '?'
36 Lo_TRAILING = 12, // '?'
37
38 Mn_NONSPACING= 13, // '`'
39 Me_ENCLOSING = 14, // '`'
40 Mc_SPACING = 15, // '`'
41
42 Nd_DIGIT = 16, // '9' // convert to digit
43 Nl_LETTER = 17, // 'X' // X,V,C,L,I ...
44 Nl_IDEOGRAPH = 18, // '?'
45 No_OTHER = 19, // '9'
46
47 Zs_SPACE = 20, // ' ' [\40\240] SPACE ... NO-BREAK SPACE (00A0)
48 Zs_ZWSPACE = 21, // ' ' // nothing ?
49 Zl_LINE = 22, // '\n'
50 Zp_PARAGRAPH = 23, // '\n'
51
52 Cc_ASCII = 24, // '\x1A' // can not happen
53 Cc_SPACE = 25, // '\x1A' // can not happen
54 Cc_SEPARATOR = 26, // '\x1A' // can not happen
55
56 Cf_FORMAT = 27, // '\x1A' // nothing ?
57 Cf_JOIN = 28, // '\x1A' // nothing ?
58 Cf_BIDI = 29, // '\x1A' // nothing ?
59 Cf_ZWNBSP = 30, // '\x1A' // nothing ?
60
61 Cn_UNASSIGNED= 0, // '?'
62 Co_PRIVATE = 0, // '?'
63 Cs_LOW = 31, // '?'
64 Cs_HIGH = 32, // '?'
65
66 Pd_DASH = 33, // '-'
67 Pd_HYPHEN = 34, // '-' [-] HYPHEN-MINUS
68 Ps_START = 35, // '(' [([{] LEFT PARENTHESIS ... LEFT CURLY BRACKET
69 Ps_QUOTE = 36, // '"'
70 Pe_END = 37, // ')' [)]}] RIGHT PARENTHESIS ... RIGHT CURLY BRACKET
71 Pe_QUOTE = 38, // '"'
72 Pi_QUOTE = 39, // '"'
73 Pf_QUOTE = 40, // '"'
74 Pc_CONNECTOR = 41, // '_' [_] LOW LINE
75 Po_OTHER = 42, // '*' [#%&*/@\] NUMBER SIGN ... REVERSE SOLIDUS
76 Po_QUOTE = 43, // '"' ["] QUOTATION MARK
77 Po_TERMINAL = 44, // '.' [!,.:;?] EXCLAMATION MARK ... QUESTION MARK
78 Po_EXTENDER = 45, // '-' [№] MIDDLE DOT (00B7)
79 Po_HYPHEN = 46, // '-'
80
81 Sm_MATH = 47, // '=' [+<=>|~] PLUS SIGN ... TILDE
82 Sm_MINUS = 48, // '-'
83 Sc_CURRENCY = 49, // '$' [$] DOLLAR SIGN
84 Sk_MODIFIER = 50, // '`' [^`] CIRCUMFLEX ACCENT ... GRAVE ACCENT
85 So_OTHER = 51, // '°' [°] DEGREE SIGN (00B0)
86
87 Ps_SINGLE_QUOTE = 52, // '\'' ['] OPENING SINGLE QUOTE
88 Pe_SINGLE_QUOTE = 53, // '\'' ['] CLOSING SINGLE QUOTE
89 Pi_SINGLE_QUOTE = 54, // '\'' ['] INITIAL SINGLE QUOTE
90 Pf_SINGLE_QUOTE = 55, // '\'' ['] FINAL SINGLE QUOTE
91 Po_SINGLE_QUOTE = 56, // '\'' ['] APOSTROPHE and PRIME
92
93 CCL_NUM = 57,
94 CCL_MASK = 0x3F,
95
96 TO_LOWER = 1<< 6,
97 TO_UPPER = 1<< 7,
98 TO_TITLE = 1<< 8,
99
100 IS_XDIGIT = 1<< 9,
101 IS_DIGIT = 1<<10,
102 IS_NONBREAK = 1<<11,
103
104 IS_PRIVATE = 1<<12,
105 IS_ORDERED = 1<<13,
106
107 IS_COMPAT = 1<<14,
108 IS_CANON = 1<<15,
109
110 BIDI_OFFSET = 16,
111 SVAL_OFFSET = 22,
112 };
113
114 const size_t DEFCHAR_BUF = 58; // CCL_NUM + 1
115
116 extern const ui32 unicode_types[];
117 extern const wchar32 decomp_mapping[];
118 extern const ui32 *unicode_pages[];
119
120 extern const unsigned DECOMP_OFFSET;
121 extern const unsigned DECOMP_MASK;
122 extern const unsigned LENGTH_OFFSET;
123 extern const unsigned LENGTH_MASK;
124 extern const unsigned TYPES_OFFSET;
125 extern const unsigned TYPES_MASK;
126
127 #define _(i) (ULL(1)<<(i))
128
_runeinfo(wchar32 ch)129 ui32 _runeinfo(wchar32 ch)
130 {
131 if (ch > 0xFFFF)
132 return _runeinfo(0xE001);//as characters from Private Use Zone
133 return unicode_pages[(ch>>5)&0x7FF][ch&0x1F];
134 }
wc_info(wchar32 ch)135 ui32 wc_info(wchar32 ch)
136 {
137 return unicode_types[(_runeinfo(ch)>>TYPES_OFFSET) & TYPES_MASK];
138 }
wc_type(wchar32 ch)139 WC_TYPE wc_type(wchar32 ch)
140 {
141 return (WC_TYPE)(wc_info(ch) & CCL_MASK);
142 }
get_decomp_mapping(wchar32 ch,const wchar32 * & decomp_p,unsigned & decomp_len)143 unsigned get_decomp_mapping(wchar32 ch, const wchar32 *&decomp_p, unsigned &decomp_len)
144 {
145 ui32 info = _runeinfo(ch);
146 decomp_len = (info>>LENGTH_OFFSET)&LENGTH_MASK;
147 decomp_p = &decomp_mapping[(info>>DECOMP_OFFSET) & DECOMP_MASK];
148 return decomp_len;
149 }
wc_istype(wchar32 ch,ui64 type_bits)150 bool wc_istype(wchar32 ch, ui64 type_bits)
151 {
152 return (_(wc_type(ch)) & type_bits) != 0;
153 }
154
155 // all usefull properties
156
is_unicode_space(wchar32 ch)157 bool is_unicode_space(wchar32 ch) // is_space without \n,\r,\v,\f,\40,\t
158 {
159 return wc_istype(ch, _(Zs_SPACE)|_(Zs_ZWSPACE)|_(Zl_LINE)|_(Zp_PARAGRAPH));
160 }
is_whitespace(wchar32 ch)161 bool is_whitespace(wchar32 ch)
162 {
163 return wc_istype(ch, _(Cc_SPACE)|_(Zs_SPACE)|_(Zs_ZWSPACE)|_(Zl_LINE)|_(Zp_PARAGRAPH));
164 }
is_ascii_cntrl(wchar32 ch)165 bool is_ascii_cntrl(wchar32 ch)
166 {
167 return wc_istype(ch, _(Cc_ASCII)|_(Cc_SPACE)|_(Cc_SEPARATOR));
168 }
is_bidi_cntrl(wchar32 ch)169 bool is_bidi_cntrl(wchar32 ch)
170 {
171 return wc_istype(ch, _(Cf_BIDI));
172 }
is_join_cntrl(wchar32 ch)173 bool is_join_cntrl(wchar32 ch)
174 {
175 return wc_istype(ch, _(Cf_JOIN));
176 }
is_format_cntrl(wchar32 ch)177 bool is_format_cntrl(wchar32 ch)
178 {
179 return wc_istype(ch, _(Cf_FORMAT));
180 }
is_ignorable_cntrl(wchar32 ch)181 bool is_ignorable_cntrl(wchar32 ch)
182 {
183 return wc_istype(ch, _(Cf_FORMAT)|_(Cf_JOIN)|_(Cf_BIDI)|_(Cf_ZWNBSP));
184 }
is_cntrl(wchar32 ch)185 bool is_cntrl(wchar32 ch)
186 {
187 return wc_istype(ch,
188 _(Cf_FORMAT)|_(Cf_JOIN)|_(Cf_BIDI)|_(Cf_ZWNBSP)|
189 _(Cc_ASCII)|_(Cc_SPACE)|_(Cc_SEPARATOR)
190 );
191 }
is_zerowidth(wchar32 ch)192 bool is_zerowidth(wchar32 ch)
193 {
194 return wc_istype(ch, _(Cf_FORMAT)|_(Cf_JOIN)|_(Cf_BIDI)|_(Cf_ZWNBSP)|_(Zs_ZWSPACE));
195 }
is_line_sep(wchar32 ch)196 bool is_line_sep(wchar32 ch)
197 {
198 return wc_istype(ch, _(Zl_LINE));
199 }
is_para_sep(wchar32 ch)200 bool is_para_sep(wchar32 ch)
201 {
202 return wc_istype(ch, _(Zp_PARAGRAPH));
203 }
is_dash(wchar32 ch)204 bool is_dash(wchar32 ch)
205 {
206 return wc_istype(ch, _(Pd_DASH)|_(Pd_HYPHEN)|_(Sm_MINUS));
207 }
is_hyphen(wchar32 ch)208 bool is_hyphen(wchar32 ch)
209 {
210 return wc_istype(ch, _(Pd_HYPHEN)|_(Po_HYPHEN));
211 }
is_quotation(wchar32 ch)212 bool is_quotation(wchar32 ch)
213 {
214 return wc_istype(ch, _(Po_QUOTE)|_(Ps_QUOTE)|_(Pe_QUOTE)|_(Pi_QUOTE)|_(Pf_QUOTE)|
215 _(Po_SINGLE_QUOTE)|_(Ps_SINGLE_QUOTE)|_(Pe_SINGLE_QUOTE)|_(Pi_SINGLE_QUOTE)|_(Pf_SINGLE_QUOTE));
216
217 }
is_terminal(wchar32 ch)218 bool is_terminal(wchar32 ch)
219 {
220 return wc_istype(ch, _(Po_TERMINAL));
221 }
is_paired_punct(wchar32 ch)222 bool is_paired_punct(wchar32 ch)
223 {
224 return wc_istype(ch, _(Ps_START)|_(Pe_END) |
225 _(Ps_QUOTE)|_(Pe_QUOTE)|_(Pi_QUOTE)|_(Pf_QUOTE)|
226 _(Ps_SINGLE_QUOTE)|_(Pe_SINGLE_QUOTE)|_(Pi_SINGLE_QUOTE)|_(Pf_SINGLE_QUOTE));
227 }
is_left_punct(wchar32 ch)228 bool is_left_punct(wchar32 ch)
229 {
230 return wc_istype(ch, _(Ps_START)|_(Ps_QUOTE)|_(Ps_SINGLE_QUOTE));
231 }
is_right_punct(wchar32 ch)232 bool is_right_punct(wchar32 ch)
233 {
234 return wc_istype(ch, _(Pe_END)|_(Pe_QUOTE)|_(Pe_SINGLE_QUOTE));
235 }
is_combining(wchar32 ch)236 bool is_combining(wchar32 ch)
237 {
238 return wc_istype(ch, _(Mc_SPACING)|_(Mn_NONSPACING)|_(Me_ENCLOSING));
239 }
is_nonspacing(wchar32 ch)240 bool is_nonspacing(wchar32 ch)
241 {
242 return wc_istype(ch, _(Mn_NONSPACING)|_(Me_ENCLOSING));
243 }
is_alphabetic(wchar32 ch)244 bool is_alphabetic(wchar32 ch)
245 {
246 return wc_istype(ch, _(Lu_UPPER)|_(Ll_LOWER)|_(Lt_TITLE)|
247 _(Lm_EXTENDER)|_(Lm_LETTER)|_(Lo_OTHER)|
248 _(Nl_LETTER)
249 );
250 }
is_ideographic(wchar32 ch)251 bool is_ideographic(wchar32 ch)
252 {
253 return wc_istype(ch, _(Lo_IDEOGRAPH)|_(Nl_IDEOGRAPH));
254 }
is_katakana(wchar32 ch)255 bool is_katakana(wchar32 ch)
256 {
257 return wc_istype(ch, _(Lo_KATAKANA));
258 }
is_hiragana(wchar32 ch)259 bool is_hiragana(wchar32 ch)
260 {
261 return wc_istype(ch, _(Lo_HIRAGANA));
262 }
is_hangul_leading(wchar32 ch)263 bool is_hangul_leading(wchar32 ch)
264 {
265 return wc_istype(ch, _(Lo_LEADING));
266 }
is_hangul_vowel(wchar32 ch)267 bool is_hangul_vowel(wchar32 ch)
268 {
269 return wc_istype(ch, _(Lo_VOWEL));
270 }
is_hangul_trailing(wchar32 ch)271 bool is_hangul_trailing(wchar32 ch)
272 {
273 return wc_istype(ch, _(Lo_TRAILING));
274 }
is_hexdigit(wchar32 ch)275 bool is_hexdigit(wchar32 ch)
276 {
277 return (wc_info(ch) & IS_XDIGIT) != 0;
278 }
is_decdigit(wchar32 ch)279 bool is_decdigit(wchar32 ch)
280 {
281 return wc_istype(ch, _(Nd_DIGIT));
282 }
is_numeric(wchar32 ch)283 bool is_numeric(wchar32 ch)
284 {
285 return wc_istype(ch, _(Nd_DIGIT)|_(Nl_LETTER)|_(Nl_IDEOGRAPH)|_(No_OTHER));
286 }
is_currency(wchar32 ch)287 bool is_currency(wchar32 ch)
288 {
289 return wc_istype(ch, _(Sc_CURRENCY));
290 }
is_math(wchar32 ch)291 bool is_math(wchar32 ch)
292 {
293 return wc_istype(ch, _(Sm_MATH));
294 }
is_symbol(wchar32 ch)295 bool is_symbol(wchar32 ch)
296 {
297 return wc_istype(ch, _(Sm_MATH)|_(Sm_MINUS)|_(Sc_CURRENCY)|_(Sk_MODIFIER)|_(So_OTHER));
298 }
is_idstart(wchar32 ch)299 bool is_idstart(wchar32 ch) // unicode
300 {
301 return wc_istype(ch,
302 _(Lu_UPPER)|_(Ll_LOWER)|_(Lt_TITLE)|_(Lm_EXTENDER)|_(Lm_LETTER)|
303 _(Lo_OTHER)|_(Lo_IDEOGRAPH)|_(Lo_KATAKANA)|_(Lo_HIRAGANA)|
304 _(Lo_LEADING)|_(Lo_VOWEL)|_(Lo_TRAILING)|
305 _(Nl_LETTER)
306 );
307 }
is_idignorable(wchar32 ch)308 bool is_idignorable(wchar32 ch)
309 {
310 return is_ignorable_cntrl(ch);
311 }
is_idpart(wchar32 ch)312 bool is_idpart(wchar32 ch) // unicode
313 {
314 return is_idignorable(ch) || is_idstart(ch) || wc_istype(ch,
315 _(Mn_NONSPACING)|_(Mc_SPACING)|_(Nd_DIGIT)|_(Pc_CONNECTOR)
316 );
317 }
is_nmstart(wchar32 ch)318 bool is_nmstart(wchar32 ch) // xml
319 {
320 return ch == ':' || ch == '_' || ((wc_info(ch) & IS_COMPAT) == 0 &&
321 wc_istype(ch,
322 _(Lu_UPPER)|_(Ll_LOWER)|_(Lt_TITLE)|_(Lm_LETTER)|
323 _(Lo_OTHER)|_(Lo_IDEOGRAPH)|_(Lo_KATAKANA)|_(Lo_HIRAGANA)|
324 _(Lo_LEADING)|_(Lo_VOWEL)|_(Lo_TRAILING)|
325 _(Nl_LETTER)
326 ));
327 }
is_nmchar(wchar32 ch)328 int is_nmchar(wchar32 ch) // xml
329 {
330 return is_nmstart(ch) || ch == '.' || ch == '-' ||
331 ((wc_info(ch) & IS_COMPAT) == 0 &&
332 wc_istype(ch,
333 _(Lm_EXTENDER)|_(Po_EXTENDER)|
334 _(Mc_SPACING)|_(Mn_NONSPACING)|_(Nd_DIGIT)|
335 _(Nl_IDEOGRAPH)
336 ));
337 }
is_low_surrogate(wchar32 ch)338 bool is_low_surrogate(wchar32 ch)
339 {
340 return wc_istype(ch, _(Cs_LOW));
341 }
is_high_surrogate(wchar32 ch)342 bool is_high_surrogate(wchar32 ch)
343 {
344 return wc_istype(ch, _(Cs_HIGH));
345 }
is_nonbreak(wchar32 ch)346 bool is_nonbreak(wchar32 ch)
347 {
348 return (wc_info(ch) & IS_NONBREAK) != 0;
349 }
is_private(wchar32 ch)350 bool is_private(wchar32 ch)
351 {
352 return (wc_info(ch) & IS_PRIVATE) && !wc_istype(ch, _(Cs_HIGH));
353 }
is_unassigned(wchar32 ch)354 bool is_unassigned(wchar32 ch)
355 {
356 int i = wc_info(ch);
357 return ((i & 0x3F) == 0) && !(i & IS_PRIVATE);
358 }
is_private_high_surrogate(wchar32 ch)359 bool is_private_high_surrogate(wchar32 ch)
360 {
361 return wc_istype(ch, _(Cs_HIGH)) && (wc_info(ch) & IS_PRIVATE);
362 }
is_composed(wchar32 ch)363 bool is_composed(wchar32 ch)
364 {
365 return wc_info(ch) & (IS_COMPAT|IS_CANON) ? true : false;
366 }
is_canon_composed(wchar32 ch)367 bool is_canon_composed(wchar32 ch)
368 {
369 return wc_info(ch) & IS_CANON ? true : false;
370 }
371
372 // transformations
373
to_lower(wchar32 ch)374 wchar32 to_lower(wchar32 ch)
375 {
376 i32 i = wc_info(ch);
377 return (wchar32)(ch + ((i & TO_LOWER) ? (i >> SVAL_OFFSET) : 0));
378 }
to_upper(wchar32 ch)379 wchar32 to_upper(wchar32 ch)
380 {
381 i32 i = wc_info(ch);
382 return (wchar32)(ch - ((i & TO_UPPER) ? (i >> SVAL_OFFSET) : 0));
383 }
to_title(wchar32 ch)384 wchar32 to_title(wchar32 ch)
385 {
386 i32 i = wc_info(ch);
387 wchar32 ret = ch;
388 if (i & TO_TITLE) {
389 if (wc_istype(ch, _(Lu_UPPER)))
390 ret++;
391 else if (wc_istype(ch, _(Ll_LOWER)))
392 ret--;
393 } else if (i & TO_UPPER) {
394 ret = (wchar32)(ret - (i >> SVAL_OFFSET));
395 }
396 return ret;
397 }
to_digit(wchar32 ch)398 int to_digit(wchar32 ch)
399 {
400 i32 i = wc_info(ch);
401 return (i & IS_DIGIT) ? (i >> SVAL_OFFSET) : -1;
402 }
403
404 // BIDI properties (C2_...)
405
is_bidi_left(wchar32 ch)406 int is_bidi_left(wchar32 ch) {return ((wc_info(ch) >> BIDI_OFFSET) & 15) == 1;}
is_bidi_right(wchar32 ch)407 int is_bidi_right(wchar32 ch) {return ((wc_info(ch) >> BIDI_OFFSET) & 15) == 2;}
is_bidi_euronum(wchar32 ch)408 int is_bidi_euronum(wchar32 ch) {return ((wc_info(ch) >> BIDI_OFFSET) & 15) == 3;}
is_bidi_eurosep(wchar32 ch)409 int is_bidi_eurosep(wchar32 ch) {return ((wc_info(ch) >> BIDI_OFFSET) & 15) == 4;}
is_bidi_euroterm(wchar32 ch)410 int is_bidi_euroterm(wchar32 ch){return ((wc_info(ch) >> BIDI_OFFSET) & 15) == 5;}
is_bidi_arabnum(wchar32 ch)411 int is_bidi_arabnum(wchar32 ch) {return ((wc_info(ch) >> BIDI_OFFSET) & 15) == 6;}
is_bidi_commsep(wchar32 ch)412 int is_bidi_commsep(wchar32 ch) {return ((wc_info(ch) >> BIDI_OFFSET) & 15) == 7;}
is_bidi_blocksep(wchar32 ch)413 int is_bidi_blocksep(wchar32 ch){return ((wc_info(ch) >> BIDI_OFFSET) & 15) == 8;}
is_bidi_segmsep(wchar32 ch)414 int is_bidi_segmsep(wchar32 ch) {return ((wc_info(ch) >> BIDI_OFFSET) & 15) == 9;}
is_bidi_space(wchar32 ch)415 int is_bidi_space(wchar32 ch) {return ((wc_info(ch) >> BIDI_OFFSET) & 15) == 10;}
is_bidi_neutral(wchar32 ch)416 int is_bidi_neutral(wchar32 ch) {return ((wc_info(ch) >> BIDI_OFFSET) & 15) == 11;}
is_bidi_notappl(wchar32 ch)417 int is_bidi_notappl(wchar32 ch) {return ((wc_info(ch) >> BIDI_OFFSET) & 15) == 0;}
418
419 // C properties (C1_...)
420
is_space(wchar32 ch)421 bool is_space(wchar32 ch) // == is_whitespace
422 {
423 return is_whitespace(ch);
424 }
is_lower(wchar32 ch)425 bool is_lower(wchar32 ch)
426 {
427 return wc_istype(ch, _(Ll_LOWER));
428 }
is_upper(wchar32 ch)429 bool is_upper(wchar32 ch)
430 {
431 return wc_istype(ch, _(Lu_UPPER));
432 }
is_alpha(wchar32 ch)433 bool is_alpha(wchar32 ch)
434 {
435 return wc_istype(ch,
436 _(Lu_UPPER)|_(Ll_LOWER)|_(Lt_TITLE)|_(Lm_LETTER)|_(Lm_EXTENDER)|
437 _(Lo_OTHER)|_(Lo_IDEOGRAPH)|_(Lo_KATAKANA)|_(Lo_HIRAGANA)|
438 _(Lo_LEADING)|_(Lo_VOWEL)|_(Lo_TRAILING)
439 );
440 }
is_alnum(wchar32 ch)441 bool is_alnum(wchar32 ch)
442 {
443 return wc_istype(ch,
444 _(Lu_UPPER)|_(Ll_LOWER)|_(Lt_TITLE)|_(Lm_LETTER)|_(Lm_EXTENDER)|
445 _(Lo_OTHER)|_(Lo_IDEOGRAPH)|_(Lo_KATAKANA)|_(Lo_HIRAGANA)|
446 _(Lo_LEADING)|_(Lo_VOWEL)|_(Lo_TRAILING)|
447 _(Nd_DIGIT)|_(Nl_LETTER)|_(Nl_IDEOGRAPH)|_(No_OTHER)
448 );
449 }
is_punct(wchar32 ch)450 bool is_punct(wchar32 ch)
451 {
452 return wc_istype(ch,
453 _(Pd_DASH)|
454 _(Pd_HYPHEN)|_(Ps_START)|_(Ps_QUOTE)|_(Pe_END)|_(Pe_QUOTE)|_(Pc_CONNECTOR)|
455 _(Po_OTHER)|_(Po_QUOTE)|_(Po_TERMINAL)|_(Po_EXTENDER)|_(Po_HYPHEN)|
456 _(Pi_QUOTE)|_(Pf_QUOTE)
457 );
458 }
is_xdigit(wchar32 ch)459 bool is_xdigit(wchar32 ch) {return is_hexdigit(ch);}
is_digit(wchar32 ch)460 bool is_digit(wchar32 ch) {return is_decdigit(ch);}
is_graph(wchar32 ch)461 bool is_graph(wchar32 ch) {return is_alnum(ch)||is_punct(ch)||is_symbol(ch);}
is_blank(wchar32 ch)462 bool is_blank(wchar32 ch)
463 {
464 return wc_istype(ch, _(Zs_SPACE)|_(Zs_ZWSPACE)) || ch == '\t';
465 }
is_print(wchar32 ch)466 bool is_print(wchar32 ch) {return is_alnum(ch)||is_punct(ch)||is_symbol(ch)||is_blank(ch);}
467
468 #undef _
469
470
471 #define UCS2_SURROGATE_CHAR 0x046C // CAPITAL IOTIFIED BIG YUS
472
473 #endif
474