1 /*=============================================================================
2     Copyright (c) 2001-2011 Hartmut Kaiser
3     Copyright (c) 2001-2011 Joel de Guzman
4 
5     Distributed under the Boost Software License, Version 1.0. (See accompanying
6     file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7 =============================================================================*/
8 #if !defined(BOOST_SPIRIT_ASCII_APRIL_26_2006_1106PM)
9 #define BOOST_SPIRIT_ASCII_APRIL_26_2006_1106PM
10 
11 #if defined(_MSC_VER)
12 #pragma once
13 #endif
14 
15 #include <climits>
16 #include <boost/assert.hpp>
17 #include <boost/cstdint.hpp>
18 
19 ///////////////////////////////////////////////////////////////////////////////
20 // constants used to classify the single characters
21 ///////////////////////////////////////////////////////////////////////////////
22 #define BOOST_CC_DIGIT    0x0001
23 #define BOOST_CC_XDIGIT   0x0002
24 #define BOOST_CC_ALPHA    0x0004
25 #define BOOST_CC_CTRL     0x0008
26 #define BOOST_CC_LOWER    0x0010
27 #define BOOST_CC_UPPER    0x0020
28 #define BOOST_CC_SPACE    0x0040
29 #define BOOST_CC_PUNCT    0x0080
30 
31 namespace boost { namespace spirit { namespace char_encoding
32 {
33     // The detection of isgraph(), isprint() and isblank() is done programmatically
34     // to keep the character type table small. Additionally, these functions are
35     // rather seldom used and the programmatic detection is very simple.
36 
37     ///////////////////////////////////////////////////////////////////////////
38     // ASCII character classification table
39     ///////////////////////////////////////////////////////////////////////////
40     const unsigned char ascii_char_types[] =
41     {
42         /* NUL   0   0 */   BOOST_CC_CTRL,
43         /* SOH   1   1 */   BOOST_CC_CTRL,
44         /* STX   2   2 */   BOOST_CC_CTRL,
45         /* ETX   3   3 */   BOOST_CC_CTRL,
46         /* EOT   4   4 */   BOOST_CC_CTRL,
47         /* ENQ   5   5 */   BOOST_CC_CTRL,
48         /* ACK   6   6 */   BOOST_CC_CTRL,
49         /* BEL   7   7 */   BOOST_CC_CTRL,
50         /* BS    8   8 */   BOOST_CC_CTRL,
51         /* HT    9   9 */   BOOST_CC_CTRL|BOOST_CC_SPACE,
52         /* NL   10   a */   BOOST_CC_CTRL|BOOST_CC_SPACE,
53         /* VT   11   b */   BOOST_CC_CTRL|BOOST_CC_SPACE,
54         /* NP   12   c */   BOOST_CC_CTRL|BOOST_CC_SPACE,
55         /* CR   13   d */   BOOST_CC_CTRL|BOOST_CC_SPACE,
56         /* SO   14   e */   BOOST_CC_CTRL,
57         /* SI   15   f */   BOOST_CC_CTRL,
58         /* DLE  16  10 */   BOOST_CC_CTRL,
59         /* DC1  17  11 */   BOOST_CC_CTRL,
60         /* DC2  18  12 */   BOOST_CC_CTRL,
61         /* DC3  19  13 */   BOOST_CC_CTRL,
62         /* DC4  20  14 */   BOOST_CC_CTRL,
63         /* NAK  21  15 */   BOOST_CC_CTRL,
64         /* SYN  22  16 */   BOOST_CC_CTRL,
65         /* ETB  23  17 */   BOOST_CC_CTRL,
66         /* CAN  24  18 */   BOOST_CC_CTRL,
67         /* EM   25  19 */   BOOST_CC_CTRL,
68         /* SUB  26  1a */   BOOST_CC_CTRL,
69         /* ESC  27  1b */   BOOST_CC_CTRL,
70         /* FS   28  1c */   BOOST_CC_CTRL,
71         /* GS   29  1d */   BOOST_CC_CTRL,
72         /* RS   30  1e */   BOOST_CC_CTRL,
73         /* US   31  1f */   BOOST_CC_CTRL,
74         /* SP   32  20 */   BOOST_CC_SPACE,
75         /*  !   33  21 */   BOOST_CC_PUNCT,
76         /*  "   34  22 */   BOOST_CC_PUNCT,
77         /*  #   35  23 */   BOOST_CC_PUNCT,
78         /*  $   36  24 */   BOOST_CC_PUNCT,
79         /*  %   37  25 */   BOOST_CC_PUNCT,
80         /*  &   38  26 */   BOOST_CC_PUNCT,
81         /*  '   39  27 */   BOOST_CC_PUNCT,
82         /*  (   40  28 */   BOOST_CC_PUNCT,
83         /*  )   41  29 */   BOOST_CC_PUNCT,
84         /*  *   42  2a */   BOOST_CC_PUNCT,
85         /*  +   43  2b */   BOOST_CC_PUNCT,
86         /*  ,   44  2c */   BOOST_CC_PUNCT,
87         /*  -   45  2d */   BOOST_CC_PUNCT,
88         /*  .   46  2e */   BOOST_CC_PUNCT,
89         /*  /   47  2f */   BOOST_CC_PUNCT,
90         /*  0   48  30 */   BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
91         /*  1   49  31 */   BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
92         /*  2   50  32 */   BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
93         /*  3   51  33 */   BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
94         /*  4   52  34 */   BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
95         /*  5   53  35 */   BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
96         /*  6   54  36 */   BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
97         /*  7   55  37 */   BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
98         /*  8   56  38 */   BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
99         /*  9   57  39 */   BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
100         /*  :   58  3a */   BOOST_CC_PUNCT,
101         /*  ;   59  3b */   BOOST_CC_PUNCT,
102         /*  <   60  3c */   BOOST_CC_PUNCT,
103         /*  =   61  3d */   BOOST_CC_PUNCT,
104         /*  >   62  3e */   BOOST_CC_PUNCT,
105         /*  ?   63  3f */   BOOST_CC_PUNCT,
106         /*  @   64  40 */   BOOST_CC_PUNCT,
107         /*  A   65  41 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
108         /*  B   66  42 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
109         /*  C   67  43 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
110         /*  D   68  44 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
111         /*  E   69  45 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
112         /*  F   70  46 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
113         /*  G   71  47 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
114         /*  H   72  48 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
115         /*  I   73  49 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
116         /*  J   74  4a */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
117         /*  K   75  4b */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
118         /*  L   76  4c */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
119         /*  M   77  4d */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
120         /*  N   78  4e */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
121         /*  O   79  4f */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
122         /*  P   80  50 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
123         /*  Q   81  51 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
124         /*  R   82  52 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
125         /*  S   83  53 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
126         /*  T   84  54 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
127         /*  U   85  55 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
128         /*  V   86  56 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
129         /*  W   87  57 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
130         /*  X   88  58 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
131         /*  Y   89  59 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
132         /*  Z   90  5a */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
133         /*  [   91  5b */   BOOST_CC_PUNCT,
134         /*  \   92  5c */   BOOST_CC_PUNCT,
135         /*  ]   93  5d */   BOOST_CC_PUNCT,
136         /*  ^   94  5e */   BOOST_CC_PUNCT,
137         /*  _   95  5f */   BOOST_CC_PUNCT,
138         /*  `   96  60 */   BOOST_CC_PUNCT,
139         /*  a   97  61 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
140         /*  b   98  62 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
141         /*  c   99  63 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
142         /*  d  100  64 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
143         /*  e  101  65 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
144         /*  f  102  66 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
145         /*  g  103  67 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
146         /*  h  104  68 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
147         /*  i  105  69 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
148         /*  j  106  6a */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
149         /*  k  107  6b */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
150         /*  l  108  6c */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
151         /*  m  109  6d */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
152         /*  n  110  6e */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
153         /*  o  111  6f */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
154         /*  p  112  70 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
155         /*  q  113  71 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
156         /*  r  114  72 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
157         /*  s  115  73 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
158         /*  t  116  74 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
159         /*  u  117  75 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
160         /*  v  118  76 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
161         /*  w  119  77 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
162         /*  x  120  78 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
163         /*  y  121  79 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
164         /*  z  122  7a */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
165         /*  {  123  7b */   BOOST_CC_PUNCT,
166         /*  |  124  7c */   BOOST_CC_PUNCT,
167         /*  }  125  7d */   BOOST_CC_PUNCT,
168         /*  ~  126  7e */   BOOST_CC_PUNCT,
169         /* DEL 127  7f */   BOOST_CC_CTRL,
170         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
171         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
172         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
173         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
174         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
175         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
176         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
177         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
178     };
179 
180     ///////////////////////////////////////////////////////////////////////////
181     //  Test characters for specified conditions (using ASCII)
182     ///////////////////////////////////////////////////////////////////////////
183     struct ascii
184     {
185         typedef char char_type;
186 
187         static bool
isascii_boost::spirit::char_encoding::ascii188         isascii_(int ch)
189         {
190             return 0 == (ch & ~0x7f);
191         }
192 
193         static bool
ischarboost::spirit::char_encoding::ascii194         ischar(int ch)
195         {
196             return isascii_(ch);
197         }
198 
199         static bool
isalnumboost::spirit::char_encoding::ascii200         isalnum(int ch)
201         {
202             BOOST_ASSERT(0 == (ch & ~UCHAR_MAX));
203             return (ascii_char_types[ch] & BOOST_CC_ALPHA)
204                 || (ascii_char_types[ch] & BOOST_CC_DIGIT);
205         }
206 
207         static bool
isalphaboost::spirit::char_encoding::ascii208         isalpha(int ch)
209         {
210             BOOST_ASSERT(0 == (ch & ~UCHAR_MAX));
211             return (ascii_char_types[ch] & BOOST_CC_ALPHA) ? true : false;
212         }
213 
214         static bool
isdigitboost::spirit::char_encoding::ascii215         isdigit(int ch)
216         {
217             BOOST_ASSERT(0 == (ch & ~UCHAR_MAX));
218             return (ascii_char_types[ch] & BOOST_CC_DIGIT) ? true : false;
219         }
220 
221         static bool
isxdigitboost::spirit::char_encoding::ascii222         isxdigit(int ch)
223         {
224             BOOST_ASSERT(0 == (ch & ~UCHAR_MAX));
225             return (ascii_char_types[ch] & BOOST_CC_XDIGIT) ? true : false;
226         }
227 
228         static bool
iscntrlboost::spirit::char_encoding::ascii229         iscntrl(int ch)
230         {
231             BOOST_ASSERT(0 == (ch & ~UCHAR_MAX));
232             return (ascii_char_types[ch] & BOOST_CC_CTRL) ? true : false;
233         }
234 
235         static bool
isgraphboost::spirit::char_encoding::ascii236         isgraph(int ch)
237         {
238             return ('\x21' <= ch && ch <= '\x7e');
239         }
240 
241         static bool
islowerboost::spirit::char_encoding::ascii242         islower(int ch)
243         {
244             BOOST_ASSERT(0 == (ch & ~UCHAR_MAX));
245             return (ascii_char_types[ch] & BOOST_CC_LOWER) ? true : false;
246         }
247 
248         static bool
isprintboost::spirit::char_encoding::ascii249         isprint(int ch)
250         {
251             return ('\x20' <= ch && ch <= '\x7e');
252         }
253 
254         static bool
ispunctboost::spirit::char_encoding::ascii255         ispunct(int ch)
256         {
257             BOOST_ASSERT(0 == (ch & ~UCHAR_MAX));
258             return (ascii_char_types[ch] & BOOST_CC_PUNCT) ? true : false;
259         }
260 
261         static bool
isspaceboost::spirit::char_encoding::ascii262         isspace(int ch)
263         {
264             BOOST_ASSERT(0 == (ch & ~UCHAR_MAX));
265             return (ascii_char_types[ch] & BOOST_CC_SPACE) ? true : false;
266         }
267 
268         static bool
BOOST_PREVENT_MACRO_SUBSTITUTIONboost::spirit::char_encoding::ascii269         isblank BOOST_PREVENT_MACRO_SUBSTITUTION (int ch)
270         {
271             return ('\x09' == ch || '\x20' == ch);
272         }
273 
274         static bool
isupperboost::spirit::char_encoding::ascii275         isupper(int ch)
276         {
277             BOOST_ASSERT(0 == (ch & ~UCHAR_MAX));
278             return (ascii_char_types[ch] & BOOST_CC_UPPER) ? true : false;
279         }
280 
281         ///////////////////////////////////////////////////////////////////////
282         //  Simple character conversions
283         ///////////////////////////////////////////////////////////////////////
284 
285         static int
tolowerboost::spirit::char_encoding::ascii286         tolower(int ch)
287         {
288             return isupper(ch) ? (ch - 'A' + 'a') : ch;
289         }
290 
291         static int
toupperboost::spirit::char_encoding::ascii292         toupper(int ch)
293         {
294             return islower(ch) ? (ch - 'a' + 'A') : ch;
295         }
296 
297         static ::boost::uint32_t
toucs4boost::spirit::char_encoding::ascii298         toucs4(int ch)
299         {
300             return ch;
301         }
302     };
303 
304 }}}
305 
306 ///////////////////////////////////////////////////////////////////////////////
307 // undefine macros
308 ///////////////////////////////////////////////////////////////////////////////
309 #undef BOOST_CC_DIGIT
310 #undef BOOST_CC_XDIGIT
311 #undef BOOST_CC_ALPHA
312 #undef BOOST_CC_CTRL
313 #undef BOOST_CC_LOWER
314 #undef BOOST_CC_UPPER
315 #undef BOOST_CC_PUNCT
316 #undef BOOST_CC_SPACE
317 
318 #endif
319