1 /*=============================================================================
2     Copyright (c) 2001-2011 Hartmut Kaiser
3     Copyright (c) 2001-2011 Joel de Guzman
4 
5     Distributed under the Boost Software License, Version 1.0. (See accompanying
6     file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7 =============================================================================*/
8 #if !defined(BOOST_SPIRIT_ASCII_APRIL_26_2006_1106PM)
9 #define BOOST_SPIRIT_ASCII_APRIL_26_2006_1106PM
10 
11 #if defined(_MSC_VER)
12 #pragma once
13 #endif
14 
15 #include <climits>
16 #include <boost/assert.hpp>
17 #include <boost/cstdint.hpp>
18 
19 ///////////////////////////////////////////////////////////////////////////////
20 // constants used to classify the single characters
21 ///////////////////////////////////////////////////////////////////////////////
22 #define BOOST_CC_DIGIT    0x0001
23 #define BOOST_CC_XDIGIT   0x0002
24 #define BOOST_CC_ALPHA    0x0004
25 #define BOOST_CC_CTRL     0x0008
26 #define BOOST_CC_LOWER    0x0010
27 #define BOOST_CC_UPPER    0x0020
28 #define BOOST_CC_SPACE    0x0040
29 #define BOOST_CC_PUNCT    0x0080
30 
31 namespace boost { namespace spirit { namespace char_encoding
32 {
33     // The detection of isgraph(), isprint() and isblank() is done programmatically
34     // to keep the character type table small. Additionally, these functions are
35     // rather seldom used and the programmatic detection is very simple.
36 
37     ///////////////////////////////////////////////////////////////////////////
38     // ASCII character classification table
39     ///////////////////////////////////////////////////////////////////////////
40     const unsigned char ascii_char_types[] =
41     {
42         /* NUL   0   0 */   BOOST_CC_CTRL,
43         /* SOH   1   1 */   BOOST_CC_CTRL,
44         /* STX   2   2 */   BOOST_CC_CTRL,
45         /* ETX   3   3 */   BOOST_CC_CTRL,
46         /* EOT   4   4 */   BOOST_CC_CTRL,
47         /* ENQ   5   5 */   BOOST_CC_CTRL,
48         /* ACK   6   6 */   BOOST_CC_CTRL,
49         /* BEL   7   7 */   BOOST_CC_CTRL,
50         /* BS    8   8 */   BOOST_CC_CTRL,
51         /* HT    9   9 */   BOOST_CC_CTRL|BOOST_CC_SPACE,
52         /* NL   10   a */   BOOST_CC_CTRL|BOOST_CC_SPACE,
53         /* VT   11   b */   BOOST_CC_CTRL|BOOST_CC_SPACE,
54         /* NP   12   c */   BOOST_CC_CTRL|BOOST_CC_SPACE,
55         /* CR   13   d */   BOOST_CC_CTRL|BOOST_CC_SPACE,
56         /* SO   14   e */   BOOST_CC_CTRL,
57         /* SI   15   f */   BOOST_CC_CTRL,
58         /* DLE  16  10 */   BOOST_CC_CTRL,
59         /* DC1  17  11 */   BOOST_CC_CTRL,
60         /* DC2  18  12 */   BOOST_CC_CTRL,
61         /* DC3  19  13 */   BOOST_CC_CTRL,
62         /* DC4  20  14 */   BOOST_CC_CTRL,
63         /* NAK  21  15 */   BOOST_CC_CTRL,
64         /* SYN  22  16 */   BOOST_CC_CTRL,
65         /* ETB  23  17 */   BOOST_CC_CTRL,
66         /* CAN  24  18 */   BOOST_CC_CTRL,
67         /* EM   25  19 */   BOOST_CC_CTRL,
68         /* SUB  26  1a */   BOOST_CC_CTRL,
69         /* ESC  27  1b */   BOOST_CC_CTRL,
70         /* FS   28  1c */   BOOST_CC_CTRL,
71         /* GS   29  1d */   BOOST_CC_CTRL,
72         /* RS   30  1e */   BOOST_CC_CTRL,
73         /* US   31  1f */   BOOST_CC_CTRL,
74         /* SP   32  20 */   BOOST_CC_SPACE,
75         /*  !   33  21 */   BOOST_CC_PUNCT,
76         /*  "   34  22 */   BOOST_CC_PUNCT,
77         /*  #   35  23 */   BOOST_CC_PUNCT,
78         /*  $   36  24 */   BOOST_CC_PUNCT,
79         /*  %   37  25 */   BOOST_CC_PUNCT,
80         /*  &   38  26 */   BOOST_CC_PUNCT,
81         /*  '   39  27 */   BOOST_CC_PUNCT,
82         /*  (   40  28 */   BOOST_CC_PUNCT,
83         /*  )   41  29 */   BOOST_CC_PUNCT,
84         /*  *   42  2a */   BOOST_CC_PUNCT,
85         /*  +   43  2b */   BOOST_CC_PUNCT,
86         /*  ,   44  2c */   BOOST_CC_PUNCT,
87         /*  -   45  2d */   BOOST_CC_PUNCT,
88         /*  .   46  2e */   BOOST_CC_PUNCT,
89         /*  /   47  2f */   BOOST_CC_PUNCT,
90         /*  0   48  30 */   BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
91         /*  1   49  31 */   BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
92         /*  2   50  32 */   BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
93         /*  3   51  33 */   BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
94         /*  4   52  34 */   BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
95         /*  5   53  35 */   BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
96         /*  6   54  36 */   BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
97         /*  7   55  37 */   BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
98         /*  8   56  38 */   BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
99         /*  9   57  39 */   BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
100         /*  :   58  3a */   BOOST_CC_PUNCT,
101         /*  ;   59  3b */   BOOST_CC_PUNCT,
102         /*  <   60  3c */   BOOST_CC_PUNCT,
103         /*  =   61  3d */   BOOST_CC_PUNCT,
104         /*  >   62  3e */   BOOST_CC_PUNCT,
105         /*  ?   63  3f */   BOOST_CC_PUNCT,
106         /*  @   64  40 */   BOOST_CC_PUNCT,
107         /*  A   65  41 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
108         /*  B   66  42 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
109         /*  C   67  43 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
110         /*  D   68  44 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
111         /*  E   69  45 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
112         /*  F   70  46 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
113         /*  G   71  47 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
114         /*  H   72  48 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
115         /*  I   73  49 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
116         /*  J   74  4a */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
117         /*  K   75  4b */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
118         /*  L   76  4c */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
119         /*  M   77  4d */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
120         /*  N   78  4e */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
121         /*  O   79  4f */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
122         /*  P   80  50 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
123         /*  Q   81  51 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
124         /*  R   82  52 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
125         /*  S   83  53 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
126         /*  T   84  54 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
127         /*  U   85  55 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
128         /*  V   86  56 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
129         /*  W   87  57 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
130         /*  X   88  58 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
131         /*  Y   89  59 */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
132         /*  Z   90  5a */   BOOST_CC_ALPHA|BOOST_CC_UPPER,
133         /*  [   91  5b */   BOOST_CC_PUNCT,
134         /*  \   92  5c */   BOOST_CC_PUNCT,
135         /*  ]   93  5d */   BOOST_CC_PUNCT,
136         /*  ^   94  5e */   BOOST_CC_PUNCT,
137         /*  _   95  5f */   BOOST_CC_PUNCT,
138         /*  `   96  60 */   BOOST_CC_PUNCT,
139         /*  a   97  61 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
140         /*  b   98  62 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
141         /*  c   99  63 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
142         /*  d  100  64 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
143         /*  e  101  65 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
144         /*  f  102  66 */   BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
145         /*  g  103  67 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
146         /*  h  104  68 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
147         /*  i  105  69 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
148         /*  j  106  6a */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
149         /*  k  107  6b */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
150         /*  l  108  6c */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
151         /*  m  109  6d */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
152         /*  n  110  6e */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
153         /*  o  111  6f */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
154         /*  p  112  70 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
155         /*  q  113  71 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
156         /*  r  114  72 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
157         /*  s  115  73 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
158         /*  t  116  74 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
159         /*  u  117  75 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
160         /*  v  118  76 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
161         /*  w  119  77 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
162         /*  x  120  78 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
163         /*  y  121  79 */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
164         /*  z  122  7a */   BOOST_CC_ALPHA|BOOST_CC_LOWER,
165         /*  {  123  7b */   BOOST_CC_PUNCT,
166         /*  |  124  7c */   BOOST_CC_PUNCT,
167         /*  }  125  7d */   BOOST_CC_PUNCT,
168         /*  ~  126  7e */   BOOST_CC_PUNCT,
169         /* DEL 127  7f */   BOOST_CC_CTRL,
170     };
171 
172     ///////////////////////////////////////////////////////////////////////////
173     //  Test characters for specified conditions (using ASCII)
174     ///////////////////////////////////////////////////////////////////////////
175     struct ascii
176     {
177         typedef char char_type;
178 
179         static bool
isascii_boost::spirit::char_encoding::ascii180         isascii_(int ch)
181         {
182             return 0 == (ch & ~0x7f);
183         }
184 
185         static bool
ischarboost::spirit::char_encoding::ascii186         ischar(int ch)
187         {
188             return isascii_(ch);
189         }
190 
191         static bool
isalnumboost::spirit::char_encoding::ascii192         isalnum(int ch)
193         {
194             BOOST_ASSERT(isascii_(ch));
195             return (ascii_char_types[ch] & BOOST_CC_ALPHA)
196                 || (ascii_char_types[ch] & BOOST_CC_DIGIT);
197         }
198 
199         static bool
isalphaboost::spirit::char_encoding::ascii200         isalpha(int ch)
201         {
202             BOOST_ASSERT(isascii_(ch));
203             return (ascii_char_types[ch] & BOOST_CC_ALPHA) ? true : false;
204         }
205 
206         static bool
isdigitboost::spirit::char_encoding::ascii207         isdigit(int ch)
208         {
209             BOOST_ASSERT(isascii_(ch));
210             return (ascii_char_types[ch] & BOOST_CC_DIGIT) ? true : false;
211         }
212 
213         static bool
isxdigitboost::spirit::char_encoding::ascii214         isxdigit(int ch)
215         {
216             BOOST_ASSERT(isascii_(ch));
217             return (ascii_char_types[ch] & BOOST_CC_XDIGIT) ? true : false;
218         }
219 
220         static bool
iscntrlboost::spirit::char_encoding::ascii221         iscntrl(int ch)
222         {
223             BOOST_ASSERT(isascii_(ch));
224             return (ascii_char_types[ch] & BOOST_CC_CTRL) ? true : false;
225         }
226 
227         static bool
isgraphboost::spirit::char_encoding::ascii228         isgraph(int ch)
229         {
230             return ('\x21' <= ch && ch <= '\x7e');
231         }
232 
233         static bool
islowerboost::spirit::char_encoding::ascii234         islower(int ch)
235         {
236             BOOST_ASSERT(isascii_(ch));
237             return (ascii_char_types[ch] & BOOST_CC_LOWER) ? true : false;
238         }
239 
240         static bool
isprintboost::spirit::char_encoding::ascii241         isprint(int ch)
242         {
243             return ('\x20' <= ch && ch <= '\x7e');
244         }
245 
246         static bool
ispunctboost::spirit::char_encoding::ascii247         ispunct(int ch)
248         {
249             BOOST_ASSERT(isascii_(ch));
250             return (ascii_char_types[ch] & BOOST_CC_PUNCT) ? true : false;
251         }
252 
253         static bool
isspaceboost::spirit::char_encoding::ascii254         isspace(int ch)
255         {
256             BOOST_ASSERT(isascii_(ch));
257             return (ascii_char_types[ch] & BOOST_CC_SPACE) ? true : false;
258         }
259 
260         static bool
BOOST_PREVENT_MACRO_SUBSTITUTIONboost::spirit::char_encoding::ascii261         isblank BOOST_PREVENT_MACRO_SUBSTITUTION (int ch)
262         {
263             return ('\x09' == ch || '\x20' == ch);
264         }
265 
266         static bool
isupperboost::spirit::char_encoding::ascii267         isupper(int ch)
268         {
269             BOOST_ASSERT(isascii_(ch));
270             return (ascii_char_types[ch] & BOOST_CC_UPPER) ? true : false;
271         }
272 
273         ///////////////////////////////////////////////////////////////////////
274         //  Simple character conversions
275         ///////////////////////////////////////////////////////////////////////
276 
277         static int
tolowerboost::spirit::char_encoding::ascii278         tolower(int ch)
279         {
280             BOOST_ASSERT(isascii_(ch));
281             return isupper(ch) ? (ch - 'A' + 'a') : ch;
282         }
283 
284         static int
toupperboost::spirit::char_encoding::ascii285         toupper(int ch)
286         {
287             BOOST_ASSERT(isascii_(ch));
288             return islower(ch) ? (ch - 'a' + 'A') : ch;
289         }
290 
291         static ::boost::uint32_t
toucs4boost::spirit::char_encoding::ascii292         toucs4(int ch)
293         {
294             return ch;
295         }
296     };
297 
298 }}}
299 
300 ///////////////////////////////////////////////////////////////////////////////
301 // undefine macros
302 ///////////////////////////////////////////////////////////////////////////////
303 #undef BOOST_CC_DIGIT
304 #undef BOOST_CC_XDIGIT
305 #undef BOOST_CC_ALPHA
306 #undef BOOST_CC_CTRL
307 #undef BOOST_CC_LOWER
308 #undef BOOST_CC_UPPER
309 #undef BOOST_CC_PUNCT
310 #undef BOOST_CC_SPACE
311 
312 #endif
313 
314