1 /* DO NOT EDIT! GENERATED AUTOMATICALLY! */
2 /* Unicode character classification and properties.
3    Copyright (C) 2002, 2005-2018 Free Software Foundation, Inc.
4 
5    This program is free software: you can redistribute it and/or
6    modify it under the terms of either:
7 
8      * the GNU Lesser General Public License as published by the Free
9        Software Foundation; either version 3 of the License, or (at your
10        option) any later version.
11 
12    or
13 
14      * the GNU General Public License as published by the Free
15        Software Foundation; either version 2 of the License, or (at your
16        option) any later version.
17 
18    or both in parallel, as here.
19    This program is distributed in the hope that it will be useful,
20    but WITHOUT ANY WARRANTY; without even the implied warranty of
21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22    Lesser General Public License for more details.
23 
24    You should have received a copy of the GNU Lesser General Public License
25    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
26 
27 #ifndef _UNICTYPE_H
28 #define _UNICTYPE_H
29 
30 #include "unitypes.h"
31 
32 /* Get LIBUNISTRING_DLL_VARIABLE.  */
33 #include <unistring/woe32dll.h>
34 
35 /* Get bool.  */
36 #include <unistring/stdbool.h>
37 
38 /* Get size_t.  */
39 #include <stddef.h>
40 
41 #ifdef __cplusplus
42 extern "C" {
43 #endif
44 
45 /* ========================================================================= */
46 
47 /* Field 1 of Unicode Character Database: Character name.
48    See "uniname.h".  */
49 
50 /* ========================================================================= */
51 
52 /* Field 2 of Unicode Character Database: General category.  */
53 
54 /* Data type denoting a General category value.  This is not just a bitmask,
55    but rather a bitmask and a pointer to the lookup table, so that programs
56    that use only the predefined bitmasks (i.e. don't combine bitmasks with &
57    and |) don't have a link-time dependency towards the big general table.  */
58 typedef struct
59 {
60   uint32_t bitmask : 31;
61   /*bool*/ unsigned int generic : 1;
62   union
63   {
64     const void *table;                               /* when generic is 0 */
65     bool (*lookup_fn) (ucs4_t uc, uint32_t bitmask); /* when generic is 1 */
66   } lookup;
67 }
68 uc_general_category_t;
69 
70 /* Bits and bit masks denoting General category values.  UnicodeData-3.2.0.html
71    says a 32-bit integer will always suffice to represent them.
72    These bit masks can only be used with the uc_is_general_category_withtable
73    function.  */
74 enum
75 {
76   UC_CATEGORY_MASK_L  = 0x0000001f,
77   UC_CATEGORY_MASK_LC = 0x00000007,
78   UC_CATEGORY_MASK_Lu = 0x00000001,
79   UC_CATEGORY_MASK_Ll = 0x00000002,
80   UC_CATEGORY_MASK_Lt = 0x00000004,
81   UC_CATEGORY_MASK_Lm = 0x00000008,
82   UC_CATEGORY_MASK_Lo = 0x00000010,
83   UC_CATEGORY_MASK_M  = 0x000000e0,
84   UC_CATEGORY_MASK_Mn = 0x00000020,
85   UC_CATEGORY_MASK_Mc = 0x00000040,
86   UC_CATEGORY_MASK_Me = 0x00000080,
87   UC_CATEGORY_MASK_N  = 0x00000700,
88   UC_CATEGORY_MASK_Nd = 0x00000100,
89   UC_CATEGORY_MASK_Nl = 0x00000200,
90   UC_CATEGORY_MASK_No = 0x00000400,
91   UC_CATEGORY_MASK_P  = 0x0003f800,
92   UC_CATEGORY_MASK_Pc = 0x00000800,
93   UC_CATEGORY_MASK_Pd = 0x00001000,
94   UC_CATEGORY_MASK_Ps = 0x00002000,
95   UC_CATEGORY_MASK_Pe = 0x00004000,
96   UC_CATEGORY_MASK_Pi = 0x00008000,
97   UC_CATEGORY_MASK_Pf = 0x00010000,
98   UC_CATEGORY_MASK_Po = 0x00020000,
99   UC_CATEGORY_MASK_S  = 0x003c0000,
100   UC_CATEGORY_MASK_Sm = 0x00040000,
101   UC_CATEGORY_MASK_Sc = 0x00080000,
102   UC_CATEGORY_MASK_Sk = 0x00100000,
103   UC_CATEGORY_MASK_So = 0x00200000,
104   UC_CATEGORY_MASK_Z  = 0x01c00000,
105   UC_CATEGORY_MASK_Zs = 0x00400000,
106   UC_CATEGORY_MASK_Zl = 0x00800000,
107   UC_CATEGORY_MASK_Zp = 0x01000000,
108   UC_CATEGORY_MASK_C  = 0x3e000000,
109   UC_CATEGORY_MASK_Cc = 0x02000000,
110   UC_CATEGORY_MASK_Cf = 0x04000000,
111   UC_CATEGORY_MASK_Cs = 0x08000000,
112   UC_CATEGORY_MASK_Co = 0x10000000,
113   UC_CATEGORY_MASK_Cn = 0x20000000
114 };
115 
116 /* Predefined General category values.  */
117 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_L;
118 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_LC;
119 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Lu;
120 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Ll;
121 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Lt;
122 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Lm;
123 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Lo;
124 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_M;
125 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Mn;
126 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Mc;
127 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Me;
128 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_N;
129 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Nd;
130 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Nl;
131 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_No;
132 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_P;
133 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Pc;
134 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Pd;
135 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Ps;
136 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Pe;
137 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Pi;
138 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Pf;
139 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Po;
140 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_S;
141 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Sm;
142 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Sc;
143 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Sk;
144 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_So;
145 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Z;
146 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Zs;
147 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Zl;
148 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Zp;
149 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_C;
150 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Cc;
151 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Cf;
152 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Cs;
153 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Co;
154 extern LIBUNISTRING_DLL_VARIABLE const uc_general_category_t UC_CATEGORY_Cn;
155 /* Non-public.  */
156 extern const uc_general_category_t _UC_CATEGORY_NONE;
157 
158 /* Alias names for predefined General category values.  */
159 #define UC_LETTER                    UC_CATEGORY_L
160 #define UC_CASED_LETTER              UC_CATEGORY_LC
161 #define UC_UPPERCASE_LETTER          UC_CATEGORY_Lu
162 #define UC_LOWERCASE_LETTER          UC_CATEGORY_Ll
163 #define UC_TITLECASE_LETTER          UC_CATEGORY_Lt
164 #define UC_MODIFIER_LETTER           UC_CATEGORY_Lm
165 #define UC_OTHER_LETTER              UC_CATEGORY_Lo
166 #define UC_MARK                      UC_CATEGORY_M
167 #define UC_NON_SPACING_MARK          UC_CATEGORY_Mn
168 #define UC_COMBINING_SPACING_MARK    UC_CATEGORY_Mc
169 #define UC_ENCLOSING_MARK            UC_CATEGORY_Me
170 #define UC_NUMBER                    UC_CATEGORY_N
171 #define UC_DECIMAL_DIGIT_NUMBER      UC_CATEGORY_Nd
172 #define UC_LETTER_NUMBER             UC_CATEGORY_Nl
173 #define UC_OTHER_NUMBER              UC_CATEGORY_No
174 #define UC_PUNCTUATION               UC_CATEGORY_P
175 #define UC_CONNECTOR_PUNCTUATION     UC_CATEGORY_Pc
176 #define UC_DASH_PUNCTUATION          UC_CATEGORY_Pd
177 #define UC_OPEN_PUNCTUATION          UC_CATEGORY_Ps /* a.k.a. UC_START_PUNCTUATION */
178 #define UC_CLOSE_PUNCTUATION         UC_CATEGORY_Pe /* a.k.a. UC_END_PUNCTUATION */
179 #define UC_INITIAL_QUOTE_PUNCTUATION UC_CATEGORY_Pi
180 #define UC_FINAL_QUOTE_PUNCTUATION   UC_CATEGORY_Pf
181 #define UC_OTHER_PUNCTUATION         UC_CATEGORY_Po
182 #define UC_SYMBOL                    UC_CATEGORY_S
183 #define UC_MATH_SYMBOL               UC_CATEGORY_Sm
184 #define UC_CURRENCY_SYMBOL           UC_CATEGORY_Sc
185 #define UC_MODIFIER_SYMBOL           UC_CATEGORY_Sk
186 #define UC_OTHER_SYMBOL              UC_CATEGORY_So
187 #define UC_SEPARATOR                 UC_CATEGORY_Z
188 #define UC_SPACE_SEPARATOR           UC_CATEGORY_Zs
189 #define UC_LINE_SEPARATOR            UC_CATEGORY_Zl
190 #define UC_PARAGRAPH_SEPARATOR       UC_CATEGORY_Zp
191 #define UC_OTHER                     UC_CATEGORY_C
192 #define UC_CONTROL                   UC_CATEGORY_Cc
193 #define UC_FORMAT                    UC_CATEGORY_Cf
194 #define UC_SURROGATE                 UC_CATEGORY_Cs /* all of them are invalid characters */
195 #define UC_PRIVATE_USE               UC_CATEGORY_Co
196 #define UC_UNASSIGNED                UC_CATEGORY_Cn /* some of them are invalid characters */
197 
198 /* Return the union of two general categories.
199    This corresponds to the unions of the two sets of characters.  */
200 extern uc_general_category_t
201        uc_general_category_or (uc_general_category_t category1,
202                                uc_general_category_t category2);
203 
204 /* Return the intersection of two general categories as bit masks.
205    This *does*not* correspond to the intersection of the two sets of
206    characters.  */
207 extern uc_general_category_t
208        uc_general_category_and (uc_general_category_t category1,
209                                 uc_general_category_t category2);
210 
211 /* Return the intersection of a general category with the complement of a
212    second general category, as bit masks.
213    This *does*not* correspond to the intersection with complement, when
214    viewing the categories as sets of characters.  */
215 extern uc_general_category_t
216        uc_general_category_and_not (uc_general_category_t category1,
217                                     uc_general_category_t category2);
218 
219 /* Return the name of a general category.  */
220 extern const char *
221        uc_general_category_name (uc_general_category_t category)
222        _UC_ATTRIBUTE_PURE;
223 
224 /* Return the long name of a general category.  */
225 extern const char *
226        uc_general_category_long_name (uc_general_category_t category)
227        _UC_ATTRIBUTE_PURE;
228 
229 /* Return the general category given by name, e.g. "Lu", or by long name,
230    e.g. "Uppercase Letter".  */
231 extern uc_general_category_t
232        uc_general_category_byname (const char *category_name)
233        _UC_ATTRIBUTE_PURE;
234 
235 /* Return the general category of a Unicode character.  */
236 extern uc_general_category_t
237        uc_general_category (ucs4_t uc)
238        _UC_ATTRIBUTE_PURE;
239 
240 /* Test whether a Unicode character belongs to a given category.
241    The CATEGORY argument can be the combination of several predefined
242    general categories.  */
243 extern bool
244        uc_is_general_category (ucs4_t uc, uc_general_category_t category)
245        _UC_ATTRIBUTE_PURE;
246 /* Likewise.  This function uses a big table comprising all categories.  */
247 extern bool
248        uc_is_general_category_withtable (ucs4_t uc, uint32_t bitmask)
249        _UC_ATTRIBUTE_CONST;
250 
251 /* ========================================================================= */
252 
253 /* Field 3 of Unicode Character Database: Canonical combining class.  */
254 
255 /* The possible results of uc_combining_class (0..255) are described in
256    UCD.html.  The list here is not definitive; more values can be added
257    in future versions.  */
258 enum
259 {
260   UC_CCC_NR   =   0, /* Not Reordered */
261   UC_CCC_OV   =   1, /* Overlay */
262   UC_CCC_NK   =   7, /* Nukta */
263   UC_CCC_KV   =   8, /* Kana Voicing */
264   UC_CCC_VR   =   9, /* Virama */
265   UC_CCC_ATBL = 200, /* Attached Below Left */
266   UC_CCC_ATB  = 202, /* Attached Below */
267   UC_CCC_ATA  = 214, /* Attached Above */
268   UC_CCC_ATAR = 216, /* Attached Above Right */
269   UC_CCC_BL   = 218, /* Below Left */
270   UC_CCC_B    = 220, /* Below */
271   UC_CCC_BR   = 222, /* Below Right */
272   UC_CCC_L    = 224, /* Left */
273   UC_CCC_R    = 226, /* Right */
274   UC_CCC_AL   = 228, /* Above Left */
275   UC_CCC_A    = 230, /* Above */
276   UC_CCC_AR   = 232, /* Above Right */
277   UC_CCC_DB   = 233, /* Double Below */
278   UC_CCC_DA   = 234, /* Double Above */
279   UC_CCC_IS   = 240  /* Iota Subscript */
280 };
281 
282 /* Return the canonical combining class of a Unicode character.  */
283 extern int
284        uc_combining_class (ucs4_t uc)
285        _UC_ATTRIBUTE_CONST;
286 
287 /* Return the name of a canonical combining class.  */
288 extern const char *
289        uc_combining_class_name (int ccc)
290        _UC_ATTRIBUTE_CONST;
291 
292 /* Return the long name of a canonical combining class.  */
293 extern const char *
294        uc_combining_class_long_name (int ccc)
295        _UC_ATTRIBUTE_CONST;
296 
297 /* Return the canonical combining class given by name, e.g. "BL", or by long
298    name, e.g. "Below Left".  */
299 extern int
300        uc_combining_class_byname (const char *ccc_name)
301        _UC_ATTRIBUTE_PURE;
302 
303 /* ========================================================================= */
304 
305 /* Field 4 of Unicode Character Database: Bidi class.
306    Before Unicode 4.0, this field was called "Bidirectional category".  */
307 
308 enum
309 {
310   UC_BIDI_L,   /* Left-to-Right */
311   UC_BIDI_LRE, /* Left-to-Right Embedding */
312   UC_BIDI_LRO, /* Left-to-Right Override */
313   UC_BIDI_R,   /* Right-to-Left */
314   UC_BIDI_AL,  /* Right-to-Left Arabic */
315   UC_BIDI_RLE, /* Right-to-Left Embedding */
316   UC_BIDI_RLO, /* Right-to-Left Override */
317   UC_BIDI_PDF, /* Pop Directional Format */
318   UC_BIDI_EN,  /* European Number */
319   UC_BIDI_ES,  /* European Number Separator */
320   UC_BIDI_ET,  /* European Number Terminator */
321   UC_BIDI_AN,  /* Arabic Number */
322   UC_BIDI_CS,  /* Common Number Separator */
323   UC_BIDI_NSM, /* Non-Spacing Mark */
324   UC_BIDI_BN,  /* Boundary Neutral */
325   UC_BIDI_B,   /* Paragraph Separator */
326   UC_BIDI_S,   /* Segment Separator */
327   UC_BIDI_WS,  /* Whitespace */
328   UC_BIDI_ON,  /* Other Neutral */
329   UC_BIDI_LRI, /* Left-to-Right Isolate */
330   UC_BIDI_RLI, /* Right-to-Left Isolate */
331   UC_BIDI_FSI, /* First Strong Isolate */
332   UC_BIDI_PDI  /* Pop Directional Isolate */
333 };
334 
335 /* Return the name of a bidi class.  */
336 extern const char *
337        uc_bidi_class_name (int bidi_class)
338        _UC_ATTRIBUTE_CONST;
339 /* Same; obsolete function name.  */
340 extern const char *
341        uc_bidi_category_name (int category)
342        _UC_ATTRIBUTE_CONST;
343 
344 /* Return the long name of a bidi class.  */
345 extern const char *
346        uc_bidi_class_long_name (int bidi_class)
347        _UC_ATTRIBUTE_CONST;
348 
349 /* Return the bidi class given by name, e.g. "LRE", or by long name, e.g.
350    "Left-to-Right Embedding".  */
351 extern int
352        uc_bidi_class_byname (const char *bidi_class_name)
353        _UC_ATTRIBUTE_PURE;
354 /* Same; obsolete function name.  */
355 extern int
356        uc_bidi_category_byname (const char *category_name)
357        _UC_ATTRIBUTE_PURE;
358 
359 /* Return the bidi class of a Unicode character.  */
360 extern int
361        uc_bidi_class (ucs4_t uc)
362        _UC_ATTRIBUTE_CONST;
363 /* Same; obsolete function name.  */
364 extern int
365        uc_bidi_category (ucs4_t uc)
366        _UC_ATTRIBUTE_CONST;
367 
368 /* Test whether a Unicode character belongs to a given bidi class.  */
369 extern bool
370        uc_is_bidi_class (ucs4_t uc, int bidi_class)
371        _UC_ATTRIBUTE_CONST;
372 /* Same; obsolete function name.  */
373 extern bool
374        uc_is_bidi_category (ucs4_t uc, int category)
375        _UC_ATTRIBUTE_CONST;
376 
377 /* ========================================================================= */
378 
379 /* Field 5 of Unicode Character Database: Character decomposition mapping.
380    See "uninorm.h".  */
381 
382 /* ========================================================================= */
383 
384 /* Field 6 of Unicode Character Database: Decimal digit value.  */
385 
386 /* Return the decimal digit value of a Unicode character.  */
387 extern int
388        uc_decimal_value (ucs4_t uc)
389        _UC_ATTRIBUTE_CONST;
390 
391 /* ========================================================================= */
392 
393 /* Field 7 of Unicode Character Database: Digit value.  */
394 
395 /* Return the digit value of a Unicode character.  */
396 extern int
397        uc_digit_value (ucs4_t uc)
398        _UC_ATTRIBUTE_CONST;
399 
400 /* ========================================================================= */
401 
402 /* Field 8 of Unicode Character Database: Numeric value.  */
403 
404 /* Return the numeric value of a Unicode character.  */
405 typedef struct
406 {
407   int numerator;
408   int denominator;
409 }
410 uc_fraction_t;
411 extern uc_fraction_t
412        uc_numeric_value (ucs4_t uc)
413        _UC_ATTRIBUTE_CONST;
414 
415 /* ========================================================================= */
416 
417 /* Field 9 of Unicode Character Database: Mirrored.  */
418 
419 /* Return the mirrored character of a Unicode character UC in *PUC.  */
420 extern bool
421        uc_mirror_char (ucs4_t uc, ucs4_t *puc);
422 
423 /* ========================================================================= */
424 
425 /* Field 10 of Unicode Character Database: Unicode 1.0 Name.
426    Not available in this library.  */
427 
428 /* ========================================================================= */
429 
430 /* Field 11 of Unicode Character Database: ISO 10646 comment.
431    Not available in this library.  */
432 
433 /* ========================================================================= */
434 
435 /* Field 12, 13, 14 of Unicode Character Database: Uppercase mapping,
436    lowercase mapping, titlecase mapping.  See "unicase.h".  */
437 
438 /* ========================================================================= */
439 
440 /* Field 2 of the file ArabicShaping.txt in the Unicode Character Database.  */
441 
442 /* Possible joining types.  */
443 enum
444 {
445   UC_JOINING_TYPE_U, /* Non_Joining */
446   UC_JOINING_TYPE_T, /* Transparent */
447   UC_JOINING_TYPE_C, /* Join_Causing */
448   UC_JOINING_TYPE_L, /* Left_Joining */
449   UC_JOINING_TYPE_R, /* Right_Joining */
450   UC_JOINING_TYPE_D  /* Dual_Joining */
451 };
452 
453 /* Return the name of a joining type.  */
454 extern const char *
455        uc_joining_type_name (int joining_type)
456        _UC_ATTRIBUTE_CONST;
457 
458 /* Return the long name of a joining type.  */
459 extern const char *
460        uc_joining_type_long_name (int joining_type)
461        _UC_ATTRIBUTE_CONST;
462 
463 /* Return the joining type given by name, e.g. "D", or by long name, e.g.
464    "Dual Joining".  */
465 extern int
466        uc_joining_type_byname (const char *joining_type_name)
467        _UC_ATTRIBUTE_PURE;
468 
469 /* Return the joining type of a Unicode character.  */
470 extern int
471        uc_joining_type (ucs4_t uc)
472        _UC_ATTRIBUTE_CONST;
473 
474 /* ========================================================================= */
475 
476 /* Field 3 of the file ArabicShaping.txt in the Unicode Character Database.  */
477 
478 /* Possible joining groups.
479    This enumeration may be extended in the future.  */
480 enum
481 {
482   UC_JOINING_GROUP_NONE,                  /* No_Joining_Group */
483   UC_JOINING_GROUP_AIN,                   /* Ain */
484   UC_JOINING_GROUP_ALAPH,                 /* Alaph */
485   UC_JOINING_GROUP_ALEF,                  /* Alef */
486   UC_JOINING_GROUP_BEH,                   /* Beh */
487   UC_JOINING_GROUP_BETH,                  /* Beth */
488   UC_JOINING_GROUP_BURUSHASKI_YEH_BARREE, /* Burushaski_Yeh_Barree */
489   UC_JOINING_GROUP_DAL,                   /* Dal */
490   UC_JOINING_GROUP_DALATH_RISH,           /* Dalath_Rish */
491   UC_JOINING_GROUP_E,                     /* E */
492   UC_JOINING_GROUP_FARSI_YEH,             /* Farsi_Yeh */
493   UC_JOINING_GROUP_FE,                    /* Fe */
494   UC_JOINING_GROUP_FEH,                   /* Feh */
495   UC_JOINING_GROUP_FINAL_SEMKATH,         /* Final_Semkath */
496   UC_JOINING_GROUP_GAF,                   /* Gaf */
497   UC_JOINING_GROUP_GAMAL,                 /* Gamal */
498   UC_JOINING_GROUP_HAH,                   /* Hah */
499   UC_JOINING_GROUP_HE,                    /* He */
500   UC_JOINING_GROUP_HEH,                   /* Heh */
501   UC_JOINING_GROUP_HEH_GOAL,              /* Heh_Goal */
502   UC_JOINING_GROUP_HETH,                  /* Heth */
503   UC_JOINING_GROUP_KAF,                   /* Kaf */
504   UC_JOINING_GROUP_KAPH,                  /* Kaph */
505   UC_JOINING_GROUP_KHAPH,                 /* Khaph */
506   UC_JOINING_GROUP_KNOTTED_HEH,           /* Knotted_Heh */
507   UC_JOINING_GROUP_LAM,                   /* Lam */
508   UC_JOINING_GROUP_LAMADH,                /* Lamadh */
509   UC_JOINING_GROUP_MEEM,                  /* Meem */
510   UC_JOINING_GROUP_MIM,                   /* Mim */
511   UC_JOINING_GROUP_NOON,                  /* Noon */
512   UC_JOINING_GROUP_NUN,                   /* Nun */
513   UC_JOINING_GROUP_NYA,                   /* Nya */
514   UC_JOINING_GROUP_PE,                    /* Pe */
515   UC_JOINING_GROUP_QAF,                   /* Qaf */
516   UC_JOINING_GROUP_QAPH,                  /* Qaph */
517   UC_JOINING_GROUP_REH,                   /* Reh */
518   UC_JOINING_GROUP_REVERSED_PE,           /* Reversed_Pe */
519   UC_JOINING_GROUP_SAD,                   /* Sad */
520   UC_JOINING_GROUP_SADHE,                 /* Sadhe */
521   UC_JOINING_GROUP_SEEN,                  /* Seen */
522   UC_JOINING_GROUP_SEMKATH,               /* Semkath */
523   UC_JOINING_GROUP_SHIN,                  /* Shin */
524   UC_JOINING_GROUP_SWASH_KAF,             /* Swash_Kaf */
525   UC_JOINING_GROUP_SYRIAC_WAW,            /* Syriac_Waw */
526   UC_JOINING_GROUP_TAH,                   /* Tah */
527   UC_JOINING_GROUP_TAW,                   /* Taw */
528   UC_JOINING_GROUP_TEH_MARBUTA,           /* Teh_Marbuta */
529   UC_JOINING_GROUP_TEH_MARBUTA_GOAL,      /* Teh_Marbuta_Goal */
530   UC_JOINING_GROUP_TETH,                  /* Teth */
531   UC_JOINING_GROUP_WAW,                   /* Waw */
532   UC_JOINING_GROUP_YEH,                   /* Yeh */
533   UC_JOINING_GROUP_YEH_BARREE,            /* Yeh_Barree */
534   UC_JOINING_GROUP_YEH_WITH_TAIL,         /* Yeh_With_Tail */
535   UC_JOINING_GROUP_YUDH,                  /* Yudh */
536   UC_JOINING_GROUP_YUDH_HE,               /* Yudh_He */
537   UC_JOINING_GROUP_ZAIN,                  /* Zain */
538   UC_JOINING_GROUP_ZHAIN,                 /* Zhain */
539   UC_JOINING_GROUP_ROHINGYA_YEH,          /* Rohingya_Yeh */
540   UC_JOINING_GROUP_STRAIGHT_WAW,          /* Straight_Waw */
541   UC_JOINING_GROUP_MANICHAEAN_ALEPH,      /* Manichaean_Aleph */
542   UC_JOINING_GROUP_MANICHAEAN_BETH,       /* Manichaean_Beth */
543   UC_JOINING_GROUP_MANICHAEAN_GIMEL,      /* Manichaean_Gimel */
544   UC_JOINING_GROUP_MANICHAEAN_DALETH,     /* Manichaean_Daleth */
545   UC_JOINING_GROUP_MANICHAEAN_WAW,        /* Manichaean_Waw */
546   UC_JOINING_GROUP_MANICHAEAN_ZAYIN,      /* Manichaean_Zayin */
547   UC_JOINING_GROUP_MANICHAEAN_HETH,       /* Manichaean_Heth */
548   UC_JOINING_GROUP_MANICHAEAN_TETH,       /* Manichaean_Teth */
549   UC_JOINING_GROUP_MANICHAEAN_YODH,       /* Manichaean_Yodh */
550   UC_JOINING_GROUP_MANICHAEAN_KAPH,       /* Manichaean_Kaph */
551   UC_JOINING_GROUP_MANICHAEAN_LAMEDH,     /* Manichaean_Lamedh */
552   UC_JOINING_GROUP_MANICHAEAN_DHAMEDH,    /* Manichaean_Dhamedh */
553   UC_JOINING_GROUP_MANICHAEAN_THAMEDH,    /* Manichaean_Thamedh */
554   UC_JOINING_GROUP_MANICHAEAN_MEM,        /* Manichaean_Mem */
555   UC_JOINING_GROUP_MANICHAEAN_NUN,        /* Manichaean_Nun */
556   UC_JOINING_GROUP_MANICHAEAN_SAMEKH,     /* Manichaean_Aleph */
557   UC_JOINING_GROUP_MANICHAEAN_AYIN,       /* Manichaean_Ayin */
558   UC_JOINING_GROUP_MANICHAEAN_PE,         /* Manichaean_Pe */
559   UC_JOINING_GROUP_MANICHAEAN_SADHE,      /* Manichaean_Sadhe */
560   UC_JOINING_GROUP_MANICHAEAN_QOPH,       /* Manichaean_Qoph */
561   UC_JOINING_GROUP_MANICHAEAN_RESH,       /* Manichaean_Resh */
562   UC_JOINING_GROUP_MANICHAEAN_TAW,        /* Manichaean_Taw */
563   UC_JOINING_GROUP_MANICHAEAN_ONE,        /* Manichaean_One */
564   UC_JOINING_GROUP_MANICHAEAN_FIVE,       /* Manichaean_Five */
565   UC_JOINING_GROUP_MANICHAEAN_TEN,        /* Manichaean_Ten */
566   UC_JOINING_GROUP_MANICHAEAN_TWENTY,     /* Manichaean_Twenty */
567   UC_JOINING_GROUP_MANICHAEAN_HUNDRED,    /* Manichaean_Hundred */
568   UC_JOINING_GROUP_AFRICAN_FEH,           /* African_Feh */
569   UC_JOINING_GROUP_AFRICAN_QAF,           /* African_Qaf */
570   UC_JOINING_GROUP_AFRICAN_NOON           /* African_Noon */
571 };
572 
573 /* Return the name of a joining group.  */
574 extern const char *
575        uc_joining_group_name (int joining_group)
576        _UC_ATTRIBUTE_CONST;
577 
578 /* Return the joining group given by name, e.g. "Teh_Marbuta".  */
579 extern int
580        uc_joining_group_byname (const char *joining_group_name)
581        _UC_ATTRIBUTE_PURE;
582 
583 /* Return the joining group of a Unicode character.  */
584 extern int
585        uc_joining_group (ucs4_t uc)
586        _UC_ATTRIBUTE_CONST;
587 
588 /* ========================================================================= */
589 
590 /* Common API for properties.  */
591 
592 /* Data type denoting a property.  This is not just a number, but rather a
593    pointer to the test functions, so that programs that use only few of the
594    properties don't have a link-time dependency towards all the tables.  */
595 typedef struct
596 {
597   bool (*test_fn) (ucs4_t uc);
598 }
599 uc_property_t;
600 
601 /* Predefined properties.  */
602 /* General.  */
603 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_WHITE_SPACE;
604 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_ALPHABETIC;
605 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_OTHER_ALPHABETIC;
606 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_NOT_A_CHARACTER;
607 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_DEFAULT_IGNORABLE_CODE_POINT;
608 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_OTHER_DEFAULT_IGNORABLE_CODE_POINT;
609 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_DEPRECATED;
610 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_LOGICAL_ORDER_EXCEPTION;
611 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_VARIATION_SELECTOR;
612 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_PRIVATE_USE;
613 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_UNASSIGNED_CODE_VALUE;
614 /* Case.  */
615 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_UPPERCASE;
616 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_OTHER_UPPERCASE;
617 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_LOWERCASE;
618 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_OTHER_LOWERCASE;
619 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_TITLECASE;
620 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_CASED;
621 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_CASE_IGNORABLE;
622 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_CHANGES_WHEN_LOWERCASED;
623 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_CHANGES_WHEN_UPPERCASED;
624 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_CHANGES_WHEN_TITLECASED;
625 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_CHANGES_WHEN_CASEFOLDED;
626 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_CHANGES_WHEN_CASEMAPPED;
627 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_SOFT_DOTTED;
628 /* Identifiers.  */
629 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_ID_START;
630 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_OTHER_ID_START;
631 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_ID_CONTINUE;
632 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_OTHER_ID_CONTINUE;
633 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_XID_START;
634 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_XID_CONTINUE;
635 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_PATTERN_WHITE_SPACE;
636 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_PATTERN_SYNTAX;
637 /* Shaping and rendering.  */
638 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_JOIN_CONTROL;
639 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_GRAPHEME_BASE;
640 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_GRAPHEME_EXTEND;
641 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_OTHER_GRAPHEME_EXTEND;
642 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_GRAPHEME_LINK;
643 /* Bidi.  */
644 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_BIDI_CONTROL;
645 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_BIDI_LEFT_TO_RIGHT;
646 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_BIDI_HEBREW_RIGHT_TO_LEFT;
647 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_BIDI_ARABIC_RIGHT_TO_LEFT;
648 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_BIDI_EUROPEAN_DIGIT;
649 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_BIDI_EUR_NUM_SEPARATOR;
650 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_BIDI_EUR_NUM_TERMINATOR;
651 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_BIDI_ARABIC_DIGIT;
652 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_BIDI_COMMON_SEPARATOR;
653 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_BIDI_BLOCK_SEPARATOR;
654 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_BIDI_SEGMENT_SEPARATOR;
655 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_BIDI_WHITESPACE;
656 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_BIDI_NON_SPACING_MARK;
657 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_BIDI_BOUNDARY_NEUTRAL;
658 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_BIDI_PDF;
659 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_BIDI_EMBEDDING_OR_OVERRIDE;
660 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_BIDI_OTHER_NEUTRAL;
661 /* Numeric.  */
662 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_HEX_DIGIT;
663 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_ASCII_HEX_DIGIT;
664 /* CJK.  */
665 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_IDEOGRAPHIC;
666 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_UNIFIED_IDEOGRAPH;
667 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_RADICAL;
668 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_IDS_BINARY_OPERATOR;
669 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_IDS_TRINARY_OPERATOR;
670 /* Misc.  */
671 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_ZERO_WIDTH;
672 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_SPACE;
673 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_NON_BREAK;
674 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_ISO_CONTROL;
675 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_FORMAT_CONTROL;
676 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_DASH;
677 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_HYPHEN;
678 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_PUNCTUATION;
679 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_LINE_SEPARATOR;
680 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_PARAGRAPH_SEPARATOR;
681 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_QUOTATION_MARK;
682 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_SENTENCE_TERMINAL;
683 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_TERMINAL_PUNCTUATION;
684 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_CURRENCY_SYMBOL;
685 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_MATH;
686 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_OTHER_MATH;
687 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_PAIRED_PUNCTUATION;
688 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_LEFT_OF_PAIR;
689 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_COMBINING;
690 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_COMPOSITE;
691 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_DECIMAL_DIGIT;
692 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_NUMERIC;
693 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_DIACRITIC;
694 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_EXTENDER;
695 extern LIBUNISTRING_DLL_VARIABLE const uc_property_t UC_PROPERTY_IGNORABLE_CONTROL;
696 
697 /* Return the property given by name, e.g. "White space".  */
698 extern uc_property_t
699        uc_property_byname (const char *property_name);
700 
701 /* Test whether a property is valid.  */
702 #define uc_property_is_valid(property) ((property).test_fn != NULL)
703 
704 /* Test whether a Unicode character has a given property.  */
705 extern bool
706        uc_is_property (ucs4_t uc, uc_property_t property);
707 extern bool uc_is_property_white_space (ucs4_t uc)
708        _UC_ATTRIBUTE_CONST;
709 extern bool uc_is_property_alphabetic (ucs4_t uc)
710        _UC_ATTRIBUTE_CONST;
711 extern bool uc_is_property_other_alphabetic (ucs4_t uc)
712        _UC_ATTRIBUTE_CONST;
713 extern bool uc_is_property_not_a_character (ucs4_t uc)
714        _UC_ATTRIBUTE_CONST;
715 extern bool uc_is_property_default_ignorable_code_point (ucs4_t uc)
716        _UC_ATTRIBUTE_CONST;
717 extern bool uc_is_property_other_default_ignorable_code_point (ucs4_t uc)
718        _UC_ATTRIBUTE_CONST;
719 extern bool uc_is_property_deprecated (ucs4_t uc)
720        _UC_ATTRIBUTE_CONST;
721 extern bool uc_is_property_logical_order_exception (ucs4_t uc)
722        _UC_ATTRIBUTE_CONST;
723 extern bool uc_is_property_variation_selector (ucs4_t uc)
724        _UC_ATTRIBUTE_CONST;
725 extern bool uc_is_property_private_use (ucs4_t uc)
726        _UC_ATTRIBUTE_CONST;
727 extern bool uc_is_property_unassigned_code_value (ucs4_t uc)
728        _UC_ATTRIBUTE_CONST;
729 extern bool uc_is_property_uppercase (ucs4_t uc)
730        _UC_ATTRIBUTE_CONST;
731 extern bool uc_is_property_other_uppercase (ucs4_t uc)
732        _UC_ATTRIBUTE_CONST;
733 extern bool uc_is_property_lowercase (ucs4_t uc)
734        _UC_ATTRIBUTE_CONST;
735 extern bool uc_is_property_other_lowercase (ucs4_t uc)
736        _UC_ATTRIBUTE_CONST;
737 extern bool uc_is_property_titlecase (ucs4_t uc)
738        _UC_ATTRIBUTE_CONST;
739 extern bool uc_is_property_cased (ucs4_t uc)
740        _UC_ATTRIBUTE_CONST;
741 extern bool uc_is_property_case_ignorable (ucs4_t uc)
742        _UC_ATTRIBUTE_CONST;
743 extern bool uc_is_property_changes_when_lowercased (ucs4_t uc)
744        _UC_ATTRIBUTE_CONST;
745 extern bool uc_is_property_changes_when_uppercased (ucs4_t uc)
746        _UC_ATTRIBUTE_CONST;
747 extern bool uc_is_property_changes_when_titlecased (ucs4_t uc)
748        _UC_ATTRIBUTE_CONST;
749 extern bool uc_is_property_changes_when_casefolded (ucs4_t uc)
750        _UC_ATTRIBUTE_CONST;
751 extern bool uc_is_property_changes_when_casemapped (ucs4_t uc)
752        _UC_ATTRIBUTE_CONST;
753 extern bool uc_is_property_soft_dotted (ucs4_t uc)
754        _UC_ATTRIBUTE_CONST;
755 extern bool uc_is_property_id_start (ucs4_t uc)
756        _UC_ATTRIBUTE_CONST;
757 extern bool uc_is_property_other_id_start (ucs4_t uc)
758        _UC_ATTRIBUTE_CONST;
759 extern bool uc_is_property_id_continue (ucs4_t uc)
760        _UC_ATTRIBUTE_CONST;
761 extern bool uc_is_property_other_id_continue (ucs4_t uc)
762        _UC_ATTRIBUTE_CONST;
763 extern bool uc_is_property_xid_start (ucs4_t uc)
764        _UC_ATTRIBUTE_CONST;
765 extern bool uc_is_property_xid_continue (ucs4_t uc)
766        _UC_ATTRIBUTE_CONST;
767 extern bool uc_is_property_pattern_white_space (ucs4_t uc)
768        _UC_ATTRIBUTE_CONST;
769 extern bool uc_is_property_pattern_syntax (ucs4_t uc)
770        _UC_ATTRIBUTE_CONST;
771 extern bool uc_is_property_join_control (ucs4_t uc)
772        _UC_ATTRIBUTE_CONST;
773 extern bool uc_is_property_grapheme_base (ucs4_t uc)
774        _UC_ATTRIBUTE_CONST;
775 extern bool uc_is_property_grapheme_extend (ucs4_t uc)
776        _UC_ATTRIBUTE_CONST;
777 extern bool uc_is_property_other_grapheme_extend (ucs4_t uc)
778        _UC_ATTRIBUTE_CONST;
779 extern bool uc_is_property_grapheme_link (ucs4_t uc)
780        _UC_ATTRIBUTE_CONST;
781 extern bool uc_is_property_bidi_control (ucs4_t uc)
782        _UC_ATTRIBUTE_CONST;
783 extern bool uc_is_property_bidi_left_to_right (ucs4_t uc)
784        _UC_ATTRIBUTE_CONST;
785 extern bool uc_is_property_bidi_hebrew_right_to_left (ucs4_t uc)
786        _UC_ATTRIBUTE_CONST;
787 extern bool uc_is_property_bidi_arabic_right_to_left (ucs4_t uc)
788        _UC_ATTRIBUTE_CONST;
789 extern bool uc_is_property_bidi_european_digit (ucs4_t uc)
790        _UC_ATTRIBUTE_CONST;
791 extern bool uc_is_property_bidi_eur_num_separator (ucs4_t uc)
792        _UC_ATTRIBUTE_CONST;
793 extern bool uc_is_property_bidi_eur_num_terminator (ucs4_t uc)
794        _UC_ATTRIBUTE_CONST;
795 extern bool uc_is_property_bidi_arabic_digit (ucs4_t uc)
796        _UC_ATTRIBUTE_CONST;
797 extern bool uc_is_property_bidi_common_separator (ucs4_t uc)
798        _UC_ATTRIBUTE_CONST;
799 extern bool uc_is_property_bidi_block_separator (ucs4_t uc)
800        _UC_ATTRIBUTE_CONST;
801 extern bool uc_is_property_bidi_segment_separator (ucs4_t uc)
802        _UC_ATTRIBUTE_CONST;
803 extern bool uc_is_property_bidi_whitespace (ucs4_t uc)
804        _UC_ATTRIBUTE_CONST;
805 extern bool uc_is_property_bidi_non_spacing_mark (ucs4_t uc)
806        _UC_ATTRIBUTE_CONST;
807 extern bool uc_is_property_bidi_boundary_neutral (ucs4_t uc)
808        _UC_ATTRIBUTE_CONST;
809 extern bool uc_is_property_bidi_pdf (ucs4_t uc)
810        _UC_ATTRIBUTE_CONST;
811 extern bool uc_is_property_bidi_embedding_or_override (ucs4_t uc)
812        _UC_ATTRIBUTE_CONST;
813 extern bool uc_is_property_bidi_other_neutral (ucs4_t uc)
814        _UC_ATTRIBUTE_CONST;
815 extern bool uc_is_property_hex_digit (ucs4_t uc)
816        _UC_ATTRIBUTE_CONST;
817 extern bool uc_is_property_ascii_hex_digit (ucs4_t uc)
818        _UC_ATTRIBUTE_CONST;
819 extern bool uc_is_property_ideographic (ucs4_t uc)
820        _UC_ATTRIBUTE_CONST;
821 extern bool uc_is_property_unified_ideograph (ucs4_t uc)
822        _UC_ATTRIBUTE_CONST;
823 extern bool uc_is_property_radical (ucs4_t uc)
824        _UC_ATTRIBUTE_CONST;
825 extern bool uc_is_property_ids_binary_operator (ucs4_t uc)
826        _UC_ATTRIBUTE_CONST;
827 extern bool uc_is_property_ids_trinary_operator (ucs4_t uc)
828        _UC_ATTRIBUTE_CONST;
829 extern bool uc_is_property_zero_width (ucs4_t uc)
830        _UC_ATTRIBUTE_CONST;
831 extern bool uc_is_property_space (ucs4_t uc)
832        _UC_ATTRIBUTE_CONST;
833 extern bool uc_is_property_non_break (ucs4_t uc)
834        _UC_ATTRIBUTE_CONST;
835 extern bool uc_is_property_iso_control (ucs4_t uc)
836        _UC_ATTRIBUTE_CONST;
837 extern bool uc_is_property_format_control (ucs4_t uc)
838        _UC_ATTRIBUTE_CONST;
839 extern bool uc_is_property_dash (ucs4_t uc)
840        _UC_ATTRIBUTE_CONST;
841 extern bool uc_is_property_hyphen (ucs4_t uc)
842        _UC_ATTRIBUTE_CONST;
843 extern bool uc_is_property_punctuation (ucs4_t uc)
844        _UC_ATTRIBUTE_CONST;
845 extern bool uc_is_property_line_separator (ucs4_t uc)
846        _UC_ATTRIBUTE_CONST;
847 extern bool uc_is_property_paragraph_separator (ucs4_t uc)
848        _UC_ATTRIBUTE_CONST;
849 extern bool uc_is_property_quotation_mark (ucs4_t uc)
850        _UC_ATTRIBUTE_CONST;
851 extern bool uc_is_property_sentence_terminal (ucs4_t uc)
852        _UC_ATTRIBUTE_CONST;
853 extern bool uc_is_property_terminal_punctuation (ucs4_t uc)
854        _UC_ATTRIBUTE_CONST;
855 extern bool uc_is_property_currency_symbol (ucs4_t uc)
856        _UC_ATTRIBUTE_CONST;
857 extern bool uc_is_property_math (ucs4_t uc)
858        _UC_ATTRIBUTE_CONST;
859 extern bool uc_is_property_other_math (ucs4_t uc)
860        _UC_ATTRIBUTE_CONST;
861 extern bool uc_is_property_paired_punctuation (ucs4_t uc)
862        _UC_ATTRIBUTE_CONST;
863 extern bool uc_is_property_left_of_pair (ucs4_t uc)
864        _UC_ATTRIBUTE_CONST;
865 extern bool uc_is_property_combining (ucs4_t uc)
866        _UC_ATTRIBUTE_CONST;
867 extern bool uc_is_property_composite (ucs4_t uc)
868        _UC_ATTRIBUTE_CONST;
869 extern bool uc_is_property_decimal_digit (ucs4_t uc)
870        _UC_ATTRIBUTE_CONST;
871 extern bool uc_is_property_numeric (ucs4_t uc)
872        _UC_ATTRIBUTE_CONST;
873 extern bool uc_is_property_diacritic (ucs4_t uc)
874        _UC_ATTRIBUTE_CONST;
875 extern bool uc_is_property_extender (ucs4_t uc)
876        _UC_ATTRIBUTE_CONST;
877 extern bool uc_is_property_ignorable_control (ucs4_t uc)
878        _UC_ATTRIBUTE_CONST;
879 
880 /* ========================================================================= */
881 
882 /* Subdivision of the Unicode characters into scripts.  */
883 
884 typedef struct
885 {
886   unsigned int code : 21;
887   unsigned int start : 1;
888   unsigned int end : 1;
889 }
890 uc_interval_t;
891 typedef struct
892 {
893   unsigned int nintervals;
894   const uc_interval_t *intervals;
895   const char *name;
896 }
897 uc_script_t;
898 
899 /* Return the script of a Unicode character.  */
900 extern const uc_script_t *
901        uc_script (ucs4_t uc)
902        _UC_ATTRIBUTE_CONST;
903 
904 /* Return the script given by name, e.g. "HAN".  */
905 extern const uc_script_t *
906        uc_script_byname (const char *script_name)
907        _UC_ATTRIBUTE_PURE;
908 
909 /* Test whether a Unicode character belongs to a given script.  */
910 extern bool
911        uc_is_script (ucs4_t uc, const uc_script_t *script)
912        _UC_ATTRIBUTE_PURE;
913 
914 /* Get the list of all scripts.  */
915 extern void
916        uc_all_scripts (const uc_script_t **scripts, size_t *count);
917 
918 /* ========================================================================= */
919 
920 /* Subdivision of the Unicode character range into blocks.  */
921 
922 typedef struct
923 {
924   ucs4_t start;
925   ucs4_t end;
926   const char *name;
927 }
928 uc_block_t;
929 
930 /* Return the block a character belongs to.  */
931 extern const uc_block_t *
932        uc_block (ucs4_t uc)
933        _UC_ATTRIBUTE_CONST;
934 
935 /* Test whether a Unicode character belongs to a given block.  */
936 extern bool
937        uc_is_block (ucs4_t uc, const uc_block_t *block)
938        _UC_ATTRIBUTE_PURE;
939 
940 /* Get the list of all blocks.  */
941 extern void
942        uc_all_blocks (const uc_block_t **blocks, size_t *count);
943 
944 /* ========================================================================= */
945 
946 /* Properties taken from language standards.  */
947 
948 /* Test whether a Unicode character is considered whitespace in ISO C 99.  */
949 extern bool
950        uc_is_c_whitespace (ucs4_t uc)
951        _UC_ATTRIBUTE_CONST;
952 
953 /* Test whether a Unicode character is considered whitespace in Java.  */
954 extern bool
955        uc_is_java_whitespace (ucs4_t uc)
956        _UC_ATTRIBUTE_CONST;
957 
958 enum
959 {
960   UC_IDENTIFIER_START,    /* valid as first or subsequent character */
961   UC_IDENTIFIER_VALID,    /* valid as subsequent character only */
962   UC_IDENTIFIER_INVALID,  /* not valid */
963   UC_IDENTIFIER_IGNORABLE /* ignorable (Java only) */
964 };
965 
966 /* Return the categorization of a Unicode character w.r.t. the ISO C 99
967    identifier syntax.  */
968 extern int
969        uc_c_ident_category (ucs4_t uc)
970        _UC_ATTRIBUTE_CONST;
971 
972 /* Return the categorization of a Unicode character w.r.t. the Java
973    identifier syntax.  */
974 extern int
975        uc_java_ident_category (ucs4_t uc)
976        _UC_ATTRIBUTE_CONST;
977 
978 /* ========================================================================= */
979 
980 /* Like ISO C <ctype.h> and <wctype.h>.  These functions are deprecated,
981    because this set of functions was designed with ASCII in mind and cannot
982    reflect the more diverse reality of the Unicode character set.  But they
983    can be a quick-and-dirty porting aid when migrating from wchar_t APIs
984    to Unicode strings.  */
985 
986 /* Test for any character for which 'uc_is_alpha' or 'uc_is_digit' is true.  */
987 extern bool
988        uc_is_alnum (ucs4_t uc)
989        _UC_ATTRIBUTE_CONST;
990 
991 /* Test for any character for which 'uc_is_upper' or 'uc_is_lower' is true,
992    or any character that is one of a locale-specific set of characters for
993    which none of 'uc_is_cntrl', 'uc_is_digit', 'uc_is_punct', or 'uc_is_space'
994    is true.  */
995 extern bool
996        uc_is_alpha (ucs4_t uc)
997        _UC_ATTRIBUTE_CONST;
998 
999 /* Test for any control character.  */
1000 extern bool
1001        uc_is_cntrl (ucs4_t uc)
1002        _UC_ATTRIBUTE_CONST;
1003 
1004 /* Test for any character that corresponds to a decimal-digit character.  */
1005 extern bool
1006        uc_is_digit (ucs4_t uc)
1007        _UC_ATTRIBUTE_CONST;
1008 
1009 /* Test for any character for which 'uc_is_print' is true and 'uc_is_space'
1010    is false.  */
1011 extern bool
1012        uc_is_graph (ucs4_t uc)
1013        _UC_ATTRIBUTE_CONST;
1014 
1015 /* Test for any character that corresponds to a lowercase letter or is one
1016    of a locale-specific set of characters for which none of 'uc_is_cntrl',
1017    'uc_is_digit', 'uc_is_punct', or 'uc_is_space' is true.  */
1018 extern bool
1019        uc_is_lower (ucs4_t uc)
1020        _UC_ATTRIBUTE_CONST;
1021 
1022 /* Test for any printing character.  */
1023 extern bool
1024        uc_is_print (ucs4_t uc)
1025        _UC_ATTRIBUTE_CONST;
1026 
1027 /* Test for any printing character that is one of a locale-specific set of
1028    characters for which neither 'uc_is_space' nor 'uc_is_alnum' is true.  */
1029 extern bool
1030        uc_is_punct (ucs4_t uc)
1031        _UC_ATTRIBUTE_CONST;
1032 
1033 /* Test for any character that corresponds to a locale-specific set of
1034    characters for which none of 'uc_is_alnum', 'uc_is_graph', or 'uc_is_punct'
1035    is true.  */
1036 extern bool
1037        uc_is_space (ucs4_t uc)
1038        _UC_ATTRIBUTE_CONST;
1039 
1040 /* Test for any character that corresponds to an uppercase letter or is one
1041    of a locale-specific set of character for which none of 'uc_is_cntrl',
1042    'uc_is_digit', 'uc_is_punct', or 'uc_is_space' is true.  */
1043 extern bool
1044        uc_is_upper (ucs4_t uc)
1045        _UC_ATTRIBUTE_CONST;
1046 
1047 /* Test for any character that corresponds to a hexadecimal-digit
1048    character.  */
1049 extern bool
1050        uc_is_xdigit (ucs4_t uc)
1051        _UC_ATTRIBUTE_CONST;
1052 
1053 /* GNU extension. */
1054 /* Test for any character that corresponds to a standard blank character or
1055    a locale-specific set of characters for which 'uc_is_alnum' is false.  */
1056 extern bool
1057        uc_is_blank (ucs4_t uc)
1058        _UC_ATTRIBUTE_CONST;
1059 
1060 /* ========================================================================= */
1061 
1062 #ifdef __cplusplus
1063 }
1064 #endif
1065 
1066 #endif /* _UNICTYPE_H */
1067