1 /*=============================================================================
2     Copyright (c) 2001-2011 Hartmut Kaiser
3     Copyright (c) 2001-2011 Joel de Guzman
4 
5     Distributed under the Boost Software License, Version 1.0. (See accompanying
6     file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7 =============================================================================*/
8 #if !defined(BOOST_SPIRIT_ASCII_APRIL_26_2006_1106PM)
9 #define BOOST_SPIRIT_ASCII_APRIL_26_2006_1106PM
10 
11 #if defined(_MSC_VER)
12 #pragma once
13 #endif
14 
15 #include <climits>
16 #include <boost/assert.hpp>
17 #include <boost/cstdint.hpp>
18 
19 ///////////////////////////////////////////////////////////////////////////////
20 // constants used to classify the single characters
21 ///////////////////////////////////////////////////////////////////////////////
22 #define BOOST_CC_DIGIT    0x0001
23 #define BOOST_CC_XDIGIT   0x0002
24 #define BOOST_CC_ALPHA    0x0004
25 #define BOOST_CC_CTRL     0x0008
26 #define BOOST_CC_LOWER    0x0010
27 #define BOOST_CC_UPPER    0x0020
28 #define BOOST_CC_SPACE    0x0040
29 #define BOOST_CC_PUNCT    0x0080
30 
31 namespace boost { namespace spirit { namespace char_encoding
32 {
33     // The detection of isgraph(), isprint() and isblank() is done programmatically
34     // to keep the character type table small. Additionally, these functions are
35     // rather seldom used and the programmatic detection is very simple.
36 
37     ///////////////////////////////////////////////////////////////////////////
38     // ASCII character classification table
39     ///////////////////////////////////////////////////////////////////////////
40     const unsigned char ascii_char_types[] =
41     {
42         /* NUL   0   0 */   BOOST_CC_CTRL,
43         /* SOH   1   1 */   BOOST_CC_CTRL,
44         /* STX   2   2 */   BOOST_CC_CTRL,
45         /* ETX   3   3 */   BOOST_CC_CTRL,
46         /* EOT   4   4 */   BOOST_CC_CTRL,
47         /* ENQ   5   5 */   BOOST_CC_CTRL,
48         /* ACK   6   6 */   BOOST_CC_CTRL,
49         /* BEL   7   7 */   BOOST_CC_CTRL,
50         /* BS    8   8 */   BOOST_CC_CTRL,
51         /* HT    9   9 */   BOOST_CC_CTRL|BOOST_CC_SPACE,
52         /* NL   10   a */   BOOST_CC_CTRL|BOOST_CC_SPACE,
53         /* VT   11   b */   BOOST_CC_CTRL|BOOST_CC_SPACE,
54         /* NP   12   c */   BOOST_CC_CTRL|BOOST_CC_SPACE,
55         /* CR   13   d */   BOOST_CC_CTRL|BOOST_CC_SPACE,
56         /* SO   14   e */   BOOST_CC_CTRL,
57         /* SI   15   f */   BOOST_CC_CTRL,
58         /* DLE  16  10 */   BOOST_CC_CTRL,
59         /* DC1  17  11 */   BOOST_CC_CTRL,
60         /* DC2  18  12 */   BOOST_CC_CTRL,
61         /* DC3  19  13 */   BOOST_CC_CTRL,
62         /* DC4  20  14 */   BOOST_CC_CTRL,
63         /* NAK  21  15 */   BOOST_CC_CTRL,
64         /* SYN  22  16 */   BOOST_CC_CTRL,
65         /* ETB  23  17 */   BOOST_CC_CTRL,
66         /* CAN  24  18 */   BOOST_CC_CTRL,
67         /* EM   25  19 */   BOOST_CC_CTRL,
68         /* SUB  26  1a */   BOOST_CC_CTRL,
69         /* ESC  27  1b */   BOOST_CC_CTRL,
70         /* FS   28  1c */   BOOST_CC_CTRL,
71         /* GS   29  1d */   BOOST_CC_CTRL,
72         /* RS   30  1e */   BOOST_CC_CTRL,
73         /* US   31  1f */   BOOST_CC_CTRL,
74         /* SP   32  20 */   BOOST_CC_SPACE,
75         /*  !   33  21 */   BOOST_CC_PUNCT,
76         /*  "   34  22 */   BOOST_CC_PUNCT,
77         /*  #   35  23 */   BOOST_CC_PUNCT,
78         /*  $   36  24 */   BOOST_CC_PUNCT,
79         /*  %   37  25 */   BOOST_CC_PUNCT,
80         /*  &   38  26 */   BOOST_CC_PUNCT,
81         /*  '   39  27 */   BOOST_CC_PUNCT,
82         /*  (   40  28 */   BOOST_CC_PUNCT,
83         /*  )   41  29 */   BOOST_CC_PUNCT,
84         /*  *   42  2a */   BOOST_CC_PUNCT,
85         /*  +   43  2b */   BOOST_CC_PUNCT,
86         /*  ,   44  2c */   BOOST_CC_PUNCT,
87         /*  -   45  2d */   BOOST_CC_PUNCT,
88         /*  .   46  2e */   BOOST_CC_PUNCT,
89         /*  /   47  2f */   BOOST_CC_PUNCT,
90         /*  0   48  30 */   BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
91         /*  1   49  31 */   BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
92         /*  2   50  32 */   BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
93         /*  3   51  33 */   BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
94         /*  4   52  34 */   BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
95         /*  5   53  35 */   BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
96         /*  6   54  36 */   BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
97         /*  7   55  37 */   BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
98         /*  8   56  38 */   BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
99         /*  9   57  39 */   BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
100         /*  :   58  3a */   BOOST_CC_PUNCT,
101         /*  ;   59  3b */   BOOST_CC_PUNCT,
102         /*  <   60  3c */   BOOST_CC_PUNCT,
103         /*  =   61  3d */   BOOST_CC_PUNCT,
104         /*  >   62  3e */   BOOST_CC_PUNCT,
105         /*  ?   63  3f */   BOOST_CC_PUNCT,
106         /*  @   64  40 */   BOOST_CC_PUNCT,
107         /*  A   65  41 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
108         /*  B   66  42 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
109         /*  C   67  43 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
110         /*  D   68  44 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
111         /*  E   69  45 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
112         /*  F   70  46 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
113         /*  G   71  47 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
114         /*  H   72  48 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
115         /*  I   73  49 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
116         /*  J   74  4a */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
117         /*  K   75  4b */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
118         /*  L   76  4c */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
119         /*  M   77  4d */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
120         /*  N   78  4e */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
121         /*  O   79  4f */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
122         /*  P   80  50 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
123         /*  Q   81  51 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
124         /*  R   82  52 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
125         /*  S   83  53 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
126         /*  T   84  54 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
127         /*  U   85  55 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
128         /*  V   86  56 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
129         /*  W   87  57 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
130         /*  X   88  58 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
131         /*  Y   89  59 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
132         /*  Z   90  5a */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
133         /*  [   91  5b */   BOOST_CC_PUNCT,
134         /*  \   92  5c */   BOOST_CC_PUNCT,
135         /*  ]   93  5d */   BOOST_CC_PUNCT,
136         /*  ^   94  5e */   BOOST_CC_PUNCT,
137         /*  _   95  5f */   BOOST_CC_PUNCT,
138         /*  `   96  60 */   BOOST_CC_PUNCT,
139         /*  a   97  61 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
140         /*  b   98  62 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
141         /*  c   99  63 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
142         /*  d  100  64 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
143         /*  e  101  65 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
144         /*  f  102  66 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
145         /*  g  103  67 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
146         /*  h  104  68 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
147         /*  i  105  69 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
148         /*  j  106  6a */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
149         /*  k  107  6b */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
150         /*  l  108  6c */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
151         /*  m  109  6d */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
152         /*  n  110  6e */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
153         /*  o  111  6f */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
154         /*  p  112  70 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
155         /*  q  113  71 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
156         /*  r  114  72 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
157         /*  s  115  73 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
158         /*  t  116  74 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
159         /*  u  117  75 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
160         /*  v  118  76 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
161         /*  w  119  77 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
162         /*  x  120  78 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
163         /*  y  121  79 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
164         /*  z  122  7a */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
165         /*  {  123  7b */   BOOST_CC_PUNCT,
166         /*  |  124  7c */   BOOST_CC_PUNCT,
167         /*  }  125  7d */   BOOST_CC_PUNCT,
168         /*  ~  126  7e */   BOOST_CC_PUNCT,
169         /* DEL 127  7f */   BOOST_CC_CTRL,
170         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
171         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
172         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
173         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
174         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
175         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
176         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
177         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
178     };
179 
180     ///////////////////////////////////////////////////////////////////////////
181     //  Test characters for specified conditions (using ASCII)
182     ///////////////////////////////////////////////////////////////////////////
183     struct ascii
184     {
185         typedef char char_type;
186         typedef unsigned char classify_type;
187 
188         static bool
isascii_boost::spirit::char_encoding::ascii189         isascii_(int ch)
190         {
191             return 0 == (ch & ~0x7f);
192         }
193 
194         static bool
ischarboost::spirit::char_encoding::ascii195         ischar(int ch)
196         {
197             return isascii_(ch);
198         }
199 
200         // *** Note on assertions: The precondition is that the calls to
201         // these functions do not violate the required range of ch (type int)
202         // which is that strict_ischar(ch) should be true. It is the
203         // responsibility of the caller to make sure this precondition is not
204         // violated.
205 
206         static bool
strict_ischarboost::spirit::char_encoding::ascii207         strict_ischar(int ch)
208         {
209             return ch >= 0 && ch <= 127;
210         }
211 
212         static bool
isalnumboost::spirit::char_encoding::ascii213         isalnum(int ch)
214         {
215             BOOST_ASSERT(strict_ischar(ch));
216             return (ascii_char_types[ch] & BOOST_CC_ALPHA)
217                 || (ascii_char_types[ch] & BOOST_CC_DIGIT);
218         }
219 
220         static bool
isalphaboost::spirit::char_encoding::ascii221         isalpha(int ch)
222         {
223             BOOST_ASSERT(strict_ischar(ch));
224             return (ascii_char_types[ch] & BOOST_CC_ALPHA) ? true : false;
225         }
226 
227         static bool
isdigitboost::spirit::char_encoding::ascii228         isdigit(int ch)
229         {
230             BOOST_ASSERT(strict_ischar(ch));
231             return (ascii_char_types[ch] & BOOST_CC_DIGIT) ? true : false;
232         }
233 
234         static bool
isxdigitboost::spirit::char_encoding::ascii235         isxdigit(int ch)
236         {
237             BOOST_ASSERT(strict_ischar(ch));
238             return (ascii_char_types[ch] & BOOST_CC_XDIGIT) ? true : false;
239         }
240 
241         static bool
iscntrlboost::spirit::char_encoding::ascii242         iscntrl(int ch)
243         {
244             BOOST_ASSERT(strict_ischar(ch));
245             return (ascii_char_types[ch] & BOOST_CC_CTRL) ? true : false;
246         }
247 
248         static bool
isgraphboost::spirit::char_encoding::ascii249         isgraph(int ch)
250         {
251             BOOST_ASSERT(strict_ischar(ch));
252             return ('\x21' <= ch && ch <= '\x7e');
253         }
254 
255         static bool
islowerboost::spirit::char_encoding::ascii256         islower(int ch)
257         {
258             BOOST_ASSERT(strict_ischar(ch));
259             return (ascii_char_types[ch] & BOOST_CC_LOWER) ? true : false;
260         }
261 
262         static bool
isprintboost::spirit::char_encoding::ascii263         isprint(int ch)
264         {
265             BOOST_ASSERT(strict_ischar(ch));
266             return ('\x20' <= ch && ch <= '\x7e');
267         }
268 
269         static bool
ispunctboost::spirit::char_encoding::ascii270         ispunct(int ch)
271         {
272             BOOST_ASSERT(strict_ischar(ch));
273             return (ascii_char_types[ch] & BOOST_CC_PUNCT) ? true : false;
274         }
275 
276         static bool
isspaceboost::spirit::char_encoding::ascii277         isspace(int ch)
278         {
279             BOOST_ASSERT(strict_ischar(ch));
280             return (ascii_char_types[ch] & BOOST_CC_SPACE) ? true : false;
281         }
282 
283         static bool
BOOST_PREVENT_MACRO_SUBSTITUTIONboost::spirit::char_encoding::ascii284         isblank BOOST_PREVENT_MACRO_SUBSTITUTION (int ch)
285         {
286             BOOST_ASSERT(strict_ischar(ch));
287             return ('\x09' == ch || '\x20' == ch);
288         }
289 
290         static bool
isupperboost::spirit::char_encoding::ascii291         isupper(int ch)
292         {
293             BOOST_ASSERT(strict_ischar(ch));
294             return (ascii_char_types[ch] & BOOST_CC_UPPER) ? true : false;
295         }
296 
297         ///////////////////////////////////////////////////////////////////////
298         //  Simple character conversions
299         ///////////////////////////////////////////////////////////////////////
300 
301         static int
tolowerboost::spirit::char_encoding::ascii302         tolower(int ch)
303         {
304             BOOST_ASSERT(strict_ischar(ch));
305             return isupper(ch) ? (ch - 'A' + 'a') : ch;
306         }
307 
308         static int
toupperboost::spirit::char_encoding::ascii309         toupper(int ch)
310         {
311             BOOST_ASSERT(strict_ischar(ch));
312             return islower(ch) ? (ch - 'a' + 'A') : ch;
313         }
314 
315         static ::boost::uint32_t
toucs4boost::spirit::char_encoding::ascii316         toucs4(int ch)
317         {
318             BOOST_ASSERT(strict_ischar(ch));
319             return ch;
320         }
321     };
322 
323 }}}
324 
325 ///////////////////////////////////////////////////////////////////////////////
326 // undefine macros
327 ///////////////////////////////////////////////////////////////////////////////
328 #undef BOOST_CC_DIGIT
329 #undef BOOST_CC_XDIGIT
330 #undef BOOST_CC_ALPHA
331 #undef BOOST_CC_CTRL
332 #undef BOOST_CC_LOWER
333 #undef BOOST_CC_UPPER
334 #undef BOOST_CC_PUNCT
335 #undef BOOST_CC_SPACE
336 
337 #endif
338