1 /*============================================================================= 2 Copyright (c) 2001-2011 Hartmut Kaiser 3 Copyright (c) 2001-2011 Joel de Guzman 4 5 Distributed under the Boost Software License, Version 1.0. (See accompanying 6 file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 7 =============================================================================*/ 8 #if !defined(BOOST_SPIRIT_ASCII_APRIL_26_2006_1106PM) 9 #define BOOST_SPIRIT_ASCII_APRIL_26_2006_1106PM 10 11 #if defined(_MSC_VER) 12 #pragma once 13 #endif 14 15 #include <climits> 16 #include <boost/assert.hpp> 17 #include <boost/cstdint.hpp> 18 19 /////////////////////////////////////////////////////////////////////////////// 20 // constants used to classify the single characters 21 /////////////////////////////////////////////////////////////////////////////// 22 #define BOOST_CC_DIGIT 0x0001 23 #define BOOST_CC_XDIGIT 0x0002 24 #define BOOST_CC_ALPHA 0x0004 25 #define BOOST_CC_CTRL 0x0008 26 #define BOOST_CC_LOWER 0x0010 27 #define BOOST_CC_UPPER 0x0020 28 #define BOOST_CC_SPACE 0x0040 29 #define BOOST_CC_PUNCT 0x0080 30 31 namespace boost { namespace spirit { namespace char_encoding 32 { 33 // The detection of isgraph(), isprint() and isblank() is done programmatically 34 // to keep the character type table small. Additionally, these functions are 35 // rather seldom used and the programmatic detection is very simple. 36 37 /////////////////////////////////////////////////////////////////////////// 38 // ASCII character classification table 39 /////////////////////////////////////////////////////////////////////////// 40 const unsigned char ascii_char_types[] = 41 { 42 /* NUL 0 0 */ BOOST_CC_CTRL, 43 /* SOH 1 1 */ BOOST_CC_CTRL, 44 /* STX 2 2 */ BOOST_CC_CTRL, 45 /* ETX 3 3 */ BOOST_CC_CTRL, 46 /* EOT 4 4 */ BOOST_CC_CTRL, 47 /* ENQ 5 5 */ BOOST_CC_CTRL, 48 /* ACK 6 6 */ BOOST_CC_CTRL, 49 /* BEL 7 7 */ BOOST_CC_CTRL, 50 /* BS 8 8 */ BOOST_CC_CTRL, 51 /* HT 9 9 */ BOOST_CC_CTRL|BOOST_CC_SPACE, 52 /* NL 10 a */ BOOST_CC_CTRL|BOOST_CC_SPACE, 53 /* VT 11 b */ BOOST_CC_CTRL|BOOST_CC_SPACE, 54 /* NP 12 c */ BOOST_CC_CTRL|BOOST_CC_SPACE, 55 /* CR 13 d */ BOOST_CC_CTRL|BOOST_CC_SPACE, 56 /* SO 14 e */ BOOST_CC_CTRL, 57 /* SI 15 f */ BOOST_CC_CTRL, 58 /* DLE 16 10 */ BOOST_CC_CTRL, 59 /* DC1 17 11 */ BOOST_CC_CTRL, 60 /* DC2 18 12 */ BOOST_CC_CTRL, 61 /* DC3 19 13 */ BOOST_CC_CTRL, 62 /* DC4 20 14 */ BOOST_CC_CTRL, 63 /* NAK 21 15 */ BOOST_CC_CTRL, 64 /* SYN 22 16 */ BOOST_CC_CTRL, 65 /* ETB 23 17 */ BOOST_CC_CTRL, 66 /* CAN 24 18 */ BOOST_CC_CTRL, 67 /* EM 25 19 */ BOOST_CC_CTRL, 68 /* SUB 26 1a */ BOOST_CC_CTRL, 69 /* ESC 27 1b */ BOOST_CC_CTRL, 70 /* FS 28 1c */ BOOST_CC_CTRL, 71 /* GS 29 1d */ BOOST_CC_CTRL, 72 /* RS 30 1e */ BOOST_CC_CTRL, 73 /* US 31 1f */ BOOST_CC_CTRL, 74 /* SP 32 20 */ BOOST_CC_SPACE, 75 /* ! 33 21 */ BOOST_CC_PUNCT, 76 /* " 34 22 */ BOOST_CC_PUNCT, 77 /* # 35 23 */ BOOST_CC_PUNCT, 78 /* $ 36 24 */ BOOST_CC_PUNCT, 79 /* % 37 25 */ BOOST_CC_PUNCT, 80 /* & 38 26 */ BOOST_CC_PUNCT, 81 /* ' 39 27 */ BOOST_CC_PUNCT, 82 /* ( 40 28 */ BOOST_CC_PUNCT, 83 /* ) 41 29 */ BOOST_CC_PUNCT, 84 /* * 42 2a */ BOOST_CC_PUNCT, 85 /* + 43 2b */ BOOST_CC_PUNCT, 86 /* , 44 2c */ BOOST_CC_PUNCT, 87 /* - 45 2d */ BOOST_CC_PUNCT, 88 /* . 46 2e */ BOOST_CC_PUNCT, 89 /* / 47 2f */ BOOST_CC_PUNCT, 90 /* 0 48 30 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, 91 /* 1 49 31 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, 92 /* 2 50 32 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, 93 /* 3 51 33 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, 94 /* 4 52 34 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, 95 /* 5 53 35 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, 96 /* 6 54 36 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, 97 /* 7 55 37 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, 98 /* 8 56 38 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, 99 /* 9 57 39 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, 100 /* : 58 3a */ BOOST_CC_PUNCT, 101 /* ; 59 3b */ BOOST_CC_PUNCT, 102 /* < 60 3c */ BOOST_CC_PUNCT, 103 /* = 61 3d */ BOOST_CC_PUNCT, 104 /* > 62 3e */ BOOST_CC_PUNCT, 105 /* ? 63 3f */ BOOST_CC_PUNCT, 106 /* @ 64 40 */ BOOST_CC_PUNCT, 107 /* A 65 41 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER, 108 /* B 66 42 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER, 109 /* C 67 43 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER, 110 /* D 68 44 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER, 111 /* E 69 45 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER, 112 /* F 70 46 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER, 113 /* G 71 47 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 114 /* H 72 48 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 115 /* I 73 49 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 116 /* J 74 4a */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 117 /* K 75 4b */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 118 /* L 76 4c */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 119 /* M 77 4d */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 120 /* N 78 4e */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 121 /* O 79 4f */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 122 /* P 80 50 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 123 /* Q 81 51 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 124 /* R 82 52 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 125 /* S 83 53 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 126 /* T 84 54 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 127 /* U 85 55 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 128 /* V 86 56 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 129 /* W 87 57 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 130 /* X 88 58 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 131 /* Y 89 59 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 132 /* Z 90 5a */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 133 /* [ 91 5b */ BOOST_CC_PUNCT, 134 /* \ 92 5c */ BOOST_CC_PUNCT, 135 /* ] 93 5d */ BOOST_CC_PUNCT, 136 /* ^ 94 5e */ BOOST_CC_PUNCT, 137 /* _ 95 5f */ BOOST_CC_PUNCT, 138 /* ` 96 60 */ BOOST_CC_PUNCT, 139 /* a 97 61 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER, 140 /* b 98 62 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER, 141 /* c 99 63 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER, 142 /* d 100 64 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER, 143 /* e 101 65 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER, 144 /* f 102 66 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER, 145 /* g 103 67 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 146 /* h 104 68 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 147 /* i 105 69 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 148 /* j 106 6a */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 149 /* k 107 6b */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 150 /* l 108 6c */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 151 /* m 109 6d */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 152 /* n 110 6e */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 153 /* o 111 6f */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 154 /* p 112 70 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 155 /* q 113 71 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 156 /* r 114 72 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 157 /* s 115 73 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 158 /* t 116 74 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 159 /* u 117 75 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 160 /* v 118 76 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 161 /* w 119 77 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 162 /* x 120 78 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 163 /* y 121 79 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 164 /* z 122 7a */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 165 /* { 123 7b */ BOOST_CC_PUNCT, 166 /* | 124 7c */ BOOST_CC_PUNCT, 167 /* } 125 7d */ BOOST_CC_PUNCT, 168 /* ~ 126 7e */ BOOST_CC_PUNCT, 169 /* DEL 127 7f */ BOOST_CC_CTRL, 170 }; 171 172 /////////////////////////////////////////////////////////////////////////// 173 // Test characters for specified conditions (using ASCII) 174 /////////////////////////////////////////////////////////////////////////// 175 struct ascii 176 { 177 typedef char char_type; 178 179 static bool isascii_boost::spirit::char_encoding::ascii180 isascii_(int ch) 181 { 182 return 0 == (ch & ~0x7f); 183 } 184 185 static bool ischarboost::spirit::char_encoding::ascii186 ischar(int ch) 187 { 188 return isascii_(ch); 189 } 190 191 static bool isalnumboost::spirit::char_encoding::ascii192 isalnum(int ch) 193 { 194 BOOST_ASSERT(isascii_(ch)); 195 return (ascii_char_types[ch] & BOOST_CC_ALPHA) 196 || (ascii_char_types[ch] & BOOST_CC_DIGIT); 197 } 198 199 static bool isalphaboost::spirit::char_encoding::ascii200 isalpha(int ch) 201 { 202 BOOST_ASSERT(isascii_(ch)); 203 return (ascii_char_types[ch] & BOOST_CC_ALPHA) ? true : false; 204 } 205 206 static bool isdigitboost::spirit::char_encoding::ascii207 isdigit(int ch) 208 { 209 BOOST_ASSERT(isascii_(ch)); 210 return (ascii_char_types[ch] & BOOST_CC_DIGIT) ? true : false; 211 } 212 213 static bool isxdigitboost::spirit::char_encoding::ascii214 isxdigit(int ch) 215 { 216 BOOST_ASSERT(isascii_(ch)); 217 return (ascii_char_types[ch] & BOOST_CC_XDIGIT) ? true : false; 218 } 219 220 static bool iscntrlboost::spirit::char_encoding::ascii221 iscntrl(int ch) 222 { 223 BOOST_ASSERT(isascii_(ch)); 224 return (ascii_char_types[ch] & BOOST_CC_CTRL) ? true : false; 225 } 226 227 static bool isgraphboost::spirit::char_encoding::ascii228 isgraph(int ch) 229 { 230 return ('\x21' <= ch && ch <= '\x7e'); 231 } 232 233 static bool islowerboost::spirit::char_encoding::ascii234 islower(int ch) 235 { 236 BOOST_ASSERT(isascii_(ch)); 237 return (ascii_char_types[ch] & BOOST_CC_LOWER) ? true : false; 238 } 239 240 static bool isprintboost::spirit::char_encoding::ascii241 isprint(int ch) 242 { 243 return ('\x20' <= ch && ch <= '\x7e'); 244 } 245 246 static bool ispunctboost::spirit::char_encoding::ascii247 ispunct(int ch) 248 { 249 BOOST_ASSERT(isascii_(ch)); 250 return (ascii_char_types[ch] & BOOST_CC_PUNCT) ? true : false; 251 } 252 253 static bool isspaceboost::spirit::char_encoding::ascii254 isspace(int ch) 255 { 256 BOOST_ASSERT(isascii_(ch)); 257 return (ascii_char_types[ch] & BOOST_CC_SPACE) ? true : false; 258 } 259 260 static bool BOOST_PREVENT_MACRO_SUBSTITUTIONboost::spirit::char_encoding::ascii261 isblank BOOST_PREVENT_MACRO_SUBSTITUTION (int ch) 262 { 263 return ('\x09' == ch || '\x20' == ch); 264 } 265 266 static bool isupperboost::spirit::char_encoding::ascii267 isupper(int ch) 268 { 269 BOOST_ASSERT(isascii_(ch)); 270 return (ascii_char_types[ch] & BOOST_CC_UPPER) ? true : false; 271 } 272 273 /////////////////////////////////////////////////////////////////////// 274 // Simple character conversions 275 /////////////////////////////////////////////////////////////////////// 276 277 static int tolowerboost::spirit::char_encoding::ascii278 tolower(int ch) 279 { 280 BOOST_ASSERT(isascii_(ch)); 281 return isupper(ch) ? (ch - 'A' + 'a') : ch; 282 } 283 284 static int toupperboost::spirit::char_encoding::ascii285 toupper(int ch) 286 { 287 BOOST_ASSERT(isascii_(ch)); 288 return islower(ch) ? (ch - 'a' + 'A') : ch; 289 } 290 291 static ::boost::uint32_t toucs4boost::spirit::char_encoding::ascii292 toucs4(int ch) 293 { 294 return ch; 295 } 296 }; 297 298 }}} 299 300 /////////////////////////////////////////////////////////////////////////////// 301 // undefine macros 302 /////////////////////////////////////////////////////////////////////////////// 303 #undef BOOST_CC_DIGIT 304 #undef BOOST_CC_XDIGIT 305 #undef BOOST_CC_ALPHA 306 #undef BOOST_CC_CTRL 307 #undef BOOST_CC_LOWER 308 #undef BOOST_CC_UPPER 309 #undef BOOST_CC_PUNCT 310 #undef BOOST_CC_SPACE 311 312 #endif 313 314