1 /*=============================================================================
2     Copyright (c) 2001-2011 Joel de Guzman
3 
4     Distributed under the Boost Software License, Version 1.0. (See accompanying
5     file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 
7     Autogenerated by MultiStageTable.py (Unicode multi-stage
8     table builder) (c) Peter Kankowski, 2008
9 ==============================================================================*/
10 #if !defined(BOOST_SPIRIT_UNICODE_QUERY_FEBRUARY_2_2010)
11 #define BOOST_SPIRIT_UNICODE_QUERY_FEBRUARY_2_2010
12 
13 #include <boost/cstdint.hpp>
14 
15 # include "category_table.hpp"
16 # include "script_table.hpp"
17 # include "lowercase_table.hpp"
18 # include "uppercase_table.hpp"
19 
20 namespace boost { namespace spirit { namespace ucd
21 {
22     // This header provides Basic (Level 1) Unicode Support
23     // See http://unicode.org/reports/tr18/ for details
24 
25     struct properties
26     {
27         // bit pattern: xxMMMCCC
28         // MMM: major_category
29         // CCC: category
30 
31         enum major_category
32         {
33             letter,
34             mark,
35             number,
36             separator,
37             other,
38             punctuation,
39             symbol
40         };
41 
42         enum category
43         {
44             uppercase_letter = 0,   // [Lu] an uppercase letter
45             lowercase_letter,       // [Ll] a lowercase letter
46             titlecase_letter,       // [Lt] a digraphic character, with first part uppercase
47             modifier_letter,        // [Lm] a modifier letter
48             other_letter,           // [Lo] other letters, including syllables and ideographs
49 
50             nonspacing_mark = 8,    // [Mn] a nonspacing combining mark (zero advance width)
51             enclosing_mark,         // [Me] an enclosing combining mark
52             spacing_mark,           // [Mc] a spacing combining mark (positive advance width)
53 
54             decimal_number = 16,    // [Nd] a decimal digit
55             letter_number,          // [Nl] a letterlike numeric character
56             other_number,           // [No] a numeric character of other type
57 
58             space_separator = 24,   // [Zs] a space character (of various non-zero widths)
59             line_separator,         // [Zl] U+2028 LINE SEPARATOR only
60             paragraph_separator,    // [Zp] U+2029 PARAGRAPH SEPARATOR only
61 
62             control = 32,           // [Cc] a C0 or C1 control code
63             format,                 // [Cf] a format control character
64             private_use,            // [Co] a private-use character
65             surrogate,              // [Cs] a surrogate code point
66             unassigned,             // [Cn] a reserved unassigned code point or a noncharacter
67 
68             dash_punctuation = 40,  // [Pd] a dash or hyphen punctuation mark
69             open_punctuation,       // [Ps] an opening punctuation mark (of a pair)
70             close_punctuation,      // [Pe] a closing punctuation mark (of a pair)
71             connector_punctuation,  // [Pc] a connecting punctuation mark, like a tie
72             other_punctuation,      // [Po] a punctuation mark of other type
73             initial_punctuation,    // [Pi] an initial quotation mark
74             final_punctuation,      // [Pf] a final quotation mark
75 
76             math_symbol = 48,       // [Sm] a symbol of primarily mathematical use
77             currency_symbol,        // [Sc] a currency sign
78             modifier_symbol,        // [Sk] a non-letterlike modifier symbol
79             other_symbol            // [So] a symbol of other type
80         };
81 
82         enum derived_properties
83         {
84             alphabetic = 64,
85             uppercase = 128,
86             lowercase = 256,
87             white_space = 512,
88             hex_digit = 1024,
89             noncharacter_code_point = 2048,
90             default_ignorable_code_point = 4096
91         };
92 
93         enum script
94         {
95             arabic = 0,
96             imperial_aramaic = 1,
97             armenian = 2,
98             avestan = 3,
99             balinese = 4,
100             bamum = 5,
101             bengali = 6,
102             bopomofo = 7,
103             braille = 8,
104             buginese = 9,
105             buhid = 10,
106             canadian_aboriginal = 11,
107             carian = 12,
108             cham = 13,
109             cherokee = 14,
110             coptic = 15,
111             cypriot = 16,
112             cyrillic = 17,
113             devanagari = 18,
114             deseret = 19,
115             egyptian_hieroglyphs = 20,
116             ethiopic = 21,
117             georgian = 22,
118             glagolitic = 23,
119             gothic = 24,
120             greek = 25,
121             gujarati = 26,
122             gurmukhi = 27,
123             hangul = 28,
124             han = 29,
125             hanunoo = 30,
126             hebrew = 31,
127             hiragana = 32,
128             katakana_or_hiragana = 33,
129             old_italic = 34,
130             javanese = 35,
131             kayah_li = 36,
132             katakana = 37,
133             kharoshthi = 38,
134             khmer = 39,
135             kannada = 40,
136             kaithi = 41,
137             tai_tham = 42,
138             lao = 43,
139             latin = 44,
140             lepcha = 45,
141             limbu = 46,
142             linear_b = 47,
143             lisu = 48,
144             lycian = 49,
145             lydian = 50,
146             malayalam = 51,
147             mongolian = 52,
148             meetei_mayek = 53,
149             myanmar = 54,
150             nko = 55,
151             ogham = 56,
152             ol_chiki = 57,
153             old_turkic = 58,
154             oriya = 59,
155             osmanya = 60,
156             phags_pa = 61,
157             inscriptional_pahlavi = 62,
158             phoenician = 63,
159             inscriptional_parthian = 64,
160             rejang = 65,
161             runic = 66,
162             samaritan = 67,
163             old_south_arabian = 68,
164             saurashtra = 69,
165             shavian = 70,
166             sinhala = 71,
167             sundanese = 72,
168             syloti_nagri = 73,
169             syriac = 74,
170             tagbanwa = 75,
171             tai_le = 76,
172             new_tai_lue = 77,
173             tamil = 78,
174             tai_viet = 79,
175             telugu = 80,
176             tifinagh = 81,
177             tagalog = 82,
178             thaana = 83,
179             thai = 84,
180             tibetan = 85,
181             ugaritic = 86,
182             vai = 87,
183             old_persian = 88,
184             cuneiform = 89,
185             yi = 90,
186             inherited = 91,
187             common = 92,
188             unknown = 93
189         };
190     };
191 
get_category(::boost::uint32_t ch)192     inline properties::category get_category(::boost::uint32_t ch)
193     {
194         return static_cast<properties::category>(detail::category_lookup(ch) & 0x3F);
195     }
196 
get_major_category(::boost::uint32_t ch)197     inline properties::major_category get_major_category(::boost::uint32_t ch)
198     {
199         return static_cast<properties::major_category>(get_category(ch) >> 3);
200     }
201 
is_punctuation(::boost::uint32_t ch)202     inline bool is_punctuation(::boost::uint32_t ch)
203     {
204         return get_major_category(ch) == properties::punctuation;
205     }
206 
is_decimal_number(::boost::uint32_t ch)207     inline bool is_decimal_number(::boost::uint32_t ch)
208     {
209         return get_category(ch) == properties::decimal_number;
210     }
211 
is_hex_digit(::boost::uint32_t ch)212     inline bool is_hex_digit(::boost::uint32_t ch)
213     {
214         return (detail::category_lookup(ch) & properties::hex_digit) != 0;
215     }
216 
is_control(::boost::uint32_t ch)217     inline bool is_control(::boost::uint32_t ch)
218     {
219         return get_category(ch) == properties::control;
220     }
221 
is_alphabetic(::boost::uint32_t ch)222     inline bool is_alphabetic(::boost::uint32_t ch)
223     {
224         return (detail::category_lookup(ch) & properties::alphabetic) != 0;
225     }
226 
is_alphanumeric(::boost::uint32_t ch)227     inline bool is_alphanumeric(::boost::uint32_t ch)
228     {
229         return is_decimal_number(ch) || is_alphabetic(ch);
230     }
231 
is_uppercase(::boost::uint32_t ch)232     inline bool is_uppercase(::boost::uint32_t ch)
233     {
234         return (detail::category_lookup(ch) & properties::uppercase) != 0;
235     }
236 
is_lowercase(::boost::uint32_t ch)237     inline bool is_lowercase(::boost::uint32_t ch)
238     {
239         return (detail::category_lookup(ch) & properties::lowercase) != 0;
240     }
241 
is_white_space(::boost::uint32_t ch)242     inline bool is_white_space(::boost::uint32_t ch)
243     {
244         return (detail::category_lookup(ch) & properties::white_space) != 0;
245     }
246 
is_blank(::boost::uint32_t ch)247     inline bool is_blank(::boost::uint32_t ch)
248     {
249         switch (ch)
250         {
251             case '\n': case '\v': case '\f': case '\r':
252                 return false;
253             default:
254                 return is_white_space(ch)
255                 && !(   get_category(ch) == properties::line_separator
256                     ||  get_category(ch) == properties::paragraph_separator
257                     );
258         }
259     }
260 
is_graph(::boost::uint32_t ch)261     inline bool is_graph(::boost::uint32_t ch)
262     {
263         return !(   is_white_space(ch)
264                 ||  get_category(ch) == properties::control
265                 ||  get_category(ch) == properties::surrogate
266                 ||  get_category(ch) == properties::unassigned
267                 );
268     }
269 
is_print(::boost::uint32_t ch)270     inline bool is_print(::boost::uint32_t ch)
271     {
272         return (is_graph(ch) || is_blank(ch)) && !is_control(ch);
273     }
274 
is_noncharacter_code_point(::boost::uint32_t ch)275     inline bool is_noncharacter_code_point(::boost::uint32_t ch)
276     {
277         return (detail::category_lookup(ch) & properties::noncharacter_code_point) != 0;
278     }
279 
is_default_ignorable_code_point(::boost::uint32_t ch)280     inline bool is_default_ignorable_code_point(::boost::uint32_t ch)
281     {
282         return (detail::category_lookup(ch) & properties::default_ignorable_code_point) != 0;
283     }
284 
get_script(::boost::uint32_t ch)285     inline properties::script get_script(::boost::uint32_t ch)
286     {
287         return static_cast<properties::script>(detail::script_lookup(ch) & 0x7F);
288     }
289 
to_lowercase(::boost::uint32_t ch)290     inline ::boost::uint32_t to_lowercase(::boost::uint32_t ch)
291     {
292         // The table returns 0 to signal that this code maps to itself
293         ::boost::uint32_t r = detail::lowercase_lookup(ch);
294         return (r == 0)? ch : r;
295     }
296 
to_uppercase(::boost::uint32_t ch)297     inline ::boost::uint32_t to_uppercase(::boost::uint32_t ch)
298     {
299         // The table returns 0 to signal that this code maps to itself
300         ::boost::uint32_t r = detail::uppercase_lookup(ch);
301         return (r == 0)? ch : r;
302     }
303 }}}
304 
305 #endif
306