1 /*============================================================================= 2 Copyright (c) 2001-2011 Hartmut Kaiser 3 Copyright (c) 2001-2011 Joel de Guzman 4 5 Distributed under the Boost Software License, Version 1.0. (See accompanying 6 file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 7 =============================================================================*/ 8 #if !defined(BOOST_SPIRIT_ASCII_APRIL_26_2006_1106PM) 9 #define BOOST_SPIRIT_ASCII_APRIL_26_2006_1106PM 10 11 #if defined(_MSC_VER) 12 #pragma once 13 #endif 14 15 #include <climits> 16 #include <boost/assert.hpp> 17 #include <boost/cstdint.hpp> 18 19 /////////////////////////////////////////////////////////////////////////////// 20 // constants used to classify the single characters 21 /////////////////////////////////////////////////////////////////////////////// 22 #define BOOST_CC_DIGIT 0x0001 23 #define BOOST_CC_XDIGIT 0x0002 24 #define BOOST_CC_ALPHA 0x0004 25 #define BOOST_CC_CTRL 0x0008 26 #define BOOST_CC_LOWER 0x0010 27 #define BOOST_CC_UPPER 0x0020 28 #define BOOST_CC_SPACE 0x0040 29 #define BOOST_CC_PUNCT 0x0080 30 31 namespace boost { namespace spirit { namespace char_encoding 32 { 33 // The detection of isgraph(), isprint() and isblank() is done programmatically 34 // to keep the character type table small. Additionally, these functions are 35 // rather seldom used and the programmatic detection is very simple. 36 37 /////////////////////////////////////////////////////////////////////////// 38 // ASCII character classification table 39 /////////////////////////////////////////////////////////////////////////// 40 const unsigned char ascii_char_types[] = 41 { 42 /* NUL 0 0 */ BOOST_CC_CTRL, 43 /* SOH 1 1 */ BOOST_CC_CTRL, 44 /* STX 2 2 */ BOOST_CC_CTRL, 45 /* ETX 3 3 */ BOOST_CC_CTRL, 46 /* EOT 4 4 */ BOOST_CC_CTRL, 47 /* ENQ 5 5 */ BOOST_CC_CTRL, 48 /* ACK 6 6 */ BOOST_CC_CTRL, 49 /* BEL 7 7 */ BOOST_CC_CTRL, 50 /* BS 8 8 */ BOOST_CC_CTRL, 51 /* HT 9 9 */ BOOST_CC_CTRL|BOOST_CC_SPACE, 52 /* NL 10 a */ BOOST_CC_CTRL|BOOST_CC_SPACE, 53 /* VT 11 b */ BOOST_CC_CTRL|BOOST_CC_SPACE, 54 /* NP 12 c */ BOOST_CC_CTRL|BOOST_CC_SPACE, 55 /* CR 13 d */ BOOST_CC_CTRL|BOOST_CC_SPACE, 56 /* SO 14 e */ BOOST_CC_CTRL, 57 /* SI 15 f */ BOOST_CC_CTRL, 58 /* DLE 16 10 */ BOOST_CC_CTRL, 59 /* DC1 17 11 */ BOOST_CC_CTRL, 60 /* DC2 18 12 */ BOOST_CC_CTRL, 61 /* DC3 19 13 */ BOOST_CC_CTRL, 62 /* DC4 20 14 */ BOOST_CC_CTRL, 63 /* NAK 21 15 */ BOOST_CC_CTRL, 64 /* SYN 22 16 */ BOOST_CC_CTRL, 65 /* ETB 23 17 */ BOOST_CC_CTRL, 66 /* CAN 24 18 */ BOOST_CC_CTRL, 67 /* EM 25 19 */ BOOST_CC_CTRL, 68 /* SUB 26 1a */ BOOST_CC_CTRL, 69 /* ESC 27 1b */ BOOST_CC_CTRL, 70 /* FS 28 1c */ BOOST_CC_CTRL, 71 /* GS 29 1d */ BOOST_CC_CTRL, 72 /* RS 30 1e */ BOOST_CC_CTRL, 73 /* US 31 1f */ BOOST_CC_CTRL, 74 /* SP 32 20 */ BOOST_CC_SPACE, 75 /* ! 33 21 */ BOOST_CC_PUNCT, 76 /* " 34 22 */ BOOST_CC_PUNCT, 77 /* # 35 23 */ BOOST_CC_PUNCT, 78 /* $ 36 24 */ BOOST_CC_PUNCT, 79 /* % 37 25 */ BOOST_CC_PUNCT, 80 /* & 38 26 */ BOOST_CC_PUNCT, 81 /* ' 39 27 */ BOOST_CC_PUNCT, 82 /* ( 40 28 */ BOOST_CC_PUNCT, 83 /* ) 41 29 */ BOOST_CC_PUNCT, 84 /* * 42 2a */ BOOST_CC_PUNCT, 85 /* + 43 2b */ BOOST_CC_PUNCT, 86 /* , 44 2c */ BOOST_CC_PUNCT, 87 /* - 45 2d */ BOOST_CC_PUNCT, 88 /* . 46 2e */ BOOST_CC_PUNCT, 89 /* / 47 2f */ BOOST_CC_PUNCT, 90 /* 0 48 30 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, 91 /* 1 49 31 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, 92 /* 2 50 32 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, 93 /* 3 51 33 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, 94 /* 4 52 34 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, 95 /* 5 53 35 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, 96 /* 6 54 36 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, 97 /* 7 55 37 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, 98 /* 8 56 38 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, 99 /* 9 57 39 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, 100 /* : 58 3a */ BOOST_CC_PUNCT, 101 /* ; 59 3b */ BOOST_CC_PUNCT, 102 /* < 60 3c */ BOOST_CC_PUNCT, 103 /* = 61 3d */ BOOST_CC_PUNCT, 104 /* > 62 3e */ BOOST_CC_PUNCT, 105 /* ? 63 3f */ BOOST_CC_PUNCT, 106 /* @ 64 40 */ BOOST_CC_PUNCT, 107 /* A 65 41 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER, 108 /* B 66 42 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER, 109 /* C 67 43 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER, 110 /* D 68 44 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER, 111 /* E 69 45 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER, 112 /* F 70 46 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER, 113 /* G 71 47 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 114 /* H 72 48 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 115 /* I 73 49 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 116 /* J 74 4a */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 117 /* K 75 4b */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 118 /* L 76 4c */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 119 /* M 77 4d */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 120 /* N 78 4e */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 121 /* O 79 4f */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 122 /* P 80 50 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 123 /* Q 81 51 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 124 /* R 82 52 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 125 /* S 83 53 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 126 /* T 84 54 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 127 /* U 85 55 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 128 /* V 86 56 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 129 /* W 87 57 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 130 /* X 88 58 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 131 /* Y 89 59 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 132 /* Z 90 5a */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 133 /* [ 91 5b */ BOOST_CC_PUNCT, 134 /* \ 92 5c */ BOOST_CC_PUNCT, 135 /* ] 93 5d */ BOOST_CC_PUNCT, 136 /* ^ 94 5e */ BOOST_CC_PUNCT, 137 /* _ 95 5f */ BOOST_CC_PUNCT, 138 /* ` 96 60 */ BOOST_CC_PUNCT, 139 /* a 97 61 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER, 140 /* b 98 62 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER, 141 /* c 99 63 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER, 142 /* d 100 64 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER, 143 /* e 101 65 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER, 144 /* f 102 66 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER, 145 /* g 103 67 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 146 /* h 104 68 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 147 /* i 105 69 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 148 /* j 106 6a */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 149 /* k 107 6b */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 150 /* l 108 6c */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 151 /* m 109 6d */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 152 /* n 110 6e */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 153 /* o 111 6f */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 154 /* p 112 70 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 155 /* q 113 71 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 156 /* r 114 72 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 157 /* s 115 73 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 158 /* t 116 74 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 159 /* u 117 75 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 160 /* v 118 76 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 161 /* w 119 77 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 162 /* x 120 78 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 163 /* y 121 79 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 164 /* z 122 7a */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 165 /* { 123 7b */ BOOST_CC_PUNCT, 166 /* | 124 7c */ BOOST_CC_PUNCT, 167 /* } 125 7d */ BOOST_CC_PUNCT, 168 /* ~ 126 7e */ BOOST_CC_PUNCT, 169 /* DEL 127 7f */ BOOST_CC_CTRL, 170 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 171 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 172 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 173 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 174 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 175 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 176 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 177 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 178 }; 179 180 /////////////////////////////////////////////////////////////////////////// 181 // Test characters for specified conditions (using ASCII) 182 /////////////////////////////////////////////////////////////////////////// 183 struct ascii 184 { 185 typedef char char_type; 186 typedef unsigned char classify_type; 187 188 static bool isascii_boost::spirit::char_encoding::ascii189 isascii_(int ch) 190 { 191 return 0 == (ch & ~0x7f); 192 } 193 194 static bool ischarboost::spirit::char_encoding::ascii195 ischar(int ch) 196 { 197 return isascii_(ch); 198 } 199 200 // *** Note on assertions: The precondition is that the calls to 201 // these functions do not violate the required range of ch (type int) 202 // which is that strict_ischar(ch) should be true. It is the 203 // responsibility of the caller to make sure this precondition is not 204 // violated. 205 206 static bool strict_ischarboost::spirit::char_encoding::ascii207 strict_ischar(int ch) 208 { 209 return ch >= 0 && ch <= 127; 210 } 211 212 static bool isalnumboost::spirit::char_encoding::ascii213 isalnum(int ch) 214 { 215 BOOST_ASSERT(strict_ischar(ch)); 216 return (ascii_char_types[ch] & BOOST_CC_ALPHA) 217 || (ascii_char_types[ch] & BOOST_CC_DIGIT); 218 } 219 220 static bool isalphaboost::spirit::char_encoding::ascii221 isalpha(int ch) 222 { 223 BOOST_ASSERT(strict_ischar(ch)); 224 return (ascii_char_types[ch] & BOOST_CC_ALPHA) ? true : false; 225 } 226 227 static bool isdigitboost::spirit::char_encoding::ascii228 isdigit(int ch) 229 { 230 BOOST_ASSERT(strict_ischar(ch)); 231 return (ascii_char_types[ch] & BOOST_CC_DIGIT) ? true : false; 232 } 233 234 static bool isxdigitboost::spirit::char_encoding::ascii235 isxdigit(int ch) 236 { 237 BOOST_ASSERT(strict_ischar(ch)); 238 return (ascii_char_types[ch] & BOOST_CC_XDIGIT) ? true : false; 239 } 240 241 static bool iscntrlboost::spirit::char_encoding::ascii242 iscntrl(int ch) 243 { 244 BOOST_ASSERT(strict_ischar(ch)); 245 return (ascii_char_types[ch] & BOOST_CC_CTRL) ? true : false; 246 } 247 248 static bool isgraphboost::spirit::char_encoding::ascii249 isgraph(int ch) 250 { 251 BOOST_ASSERT(strict_ischar(ch)); 252 return ('\x21' <= ch && ch <= '\x7e'); 253 } 254 255 static bool islowerboost::spirit::char_encoding::ascii256 islower(int ch) 257 { 258 BOOST_ASSERT(strict_ischar(ch)); 259 return (ascii_char_types[ch] & BOOST_CC_LOWER) ? true : false; 260 } 261 262 static bool isprintboost::spirit::char_encoding::ascii263 isprint(int ch) 264 { 265 BOOST_ASSERT(strict_ischar(ch)); 266 return ('\x20' <= ch && ch <= '\x7e'); 267 } 268 269 static bool ispunctboost::spirit::char_encoding::ascii270 ispunct(int ch) 271 { 272 BOOST_ASSERT(strict_ischar(ch)); 273 return (ascii_char_types[ch] & BOOST_CC_PUNCT) ? true : false; 274 } 275 276 static bool isspaceboost::spirit::char_encoding::ascii277 isspace(int ch) 278 { 279 BOOST_ASSERT(strict_ischar(ch)); 280 return (ascii_char_types[ch] & BOOST_CC_SPACE) ? true : false; 281 } 282 283 static bool BOOST_PREVENT_MACRO_SUBSTITUTIONboost::spirit::char_encoding::ascii284 isblank BOOST_PREVENT_MACRO_SUBSTITUTION (int ch) 285 { 286 BOOST_ASSERT(strict_ischar(ch)); 287 return ('\x09' == ch || '\x20' == ch); 288 } 289 290 static bool isupperboost::spirit::char_encoding::ascii291 isupper(int ch) 292 { 293 BOOST_ASSERT(strict_ischar(ch)); 294 return (ascii_char_types[ch] & BOOST_CC_UPPER) ? true : false; 295 } 296 297 /////////////////////////////////////////////////////////////////////// 298 // Simple character conversions 299 /////////////////////////////////////////////////////////////////////// 300 301 static int tolowerboost::spirit::char_encoding::ascii302 tolower(int ch) 303 { 304 BOOST_ASSERT(strict_ischar(ch)); 305 return isupper(ch) ? (ch - 'A' + 'a') : ch; 306 } 307 308 static int toupperboost::spirit::char_encoding::ascii309 toupper(int ch) 310 { 311 BOOST_ASSERT(strict_ischar(ch)); 312 return islower(ch) ? (ch - 'a' + 'A') : ch; 313 } 314 315 static ::boost::uint32_t toucs4boost::spirit::char_encoding::ascii316 toucs4(int ch) 317 { 318 BOOST_ASSERT(strict_ischar(ch)); 319 return ch; 320 } 321 }; 322 323 }}} 324 325 /////////////////////////////////////////////////////////////////////////////// 326 // undefine macros 327 /////////////////////////////////////////////////////////////////////////////// 328 #undef BOOST_CC_DIGIT 329 #undef BOOST_CC_XDIGIT 330 #undef BOOST_CC_ALPHA 331 #undef BOOST_CC_CTRL 332 #undef BOOST_CC_LOWER 333 #undef BOOST_CC_UPPER 334 #undef BOOST_CC_PUNCT 335 #undef BOOST_CC_SPACE 336 337 #endif 338