1 /*============================================================================= 2 Copyright (c) 2001-2011 Hartmut Kaiser 3 Copyright (c) 2001-2011 Joel de Guzman 4 5 Distributed under the Boost Software License, Version 1.0. (See accompanying 6 file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 7 =============================================================================*/ 8 #if !defined(BOOST_SPIRIT_ASCII_APRIL_26_2006_1106PM) 9 #define BOOST_SPIRIT_ASCII_APRIL_26_2006_1106PM 10 11 #if defined(_MSC_VER) 12 #pragma once 13 #endif 14 15 #include <climits> 16 #include <boost/assert.hpp> 17 #include <boost/cstdint.hpp> 18 19 /////////////////////////////////////////////////////////////////////////////// 20 // constants used to classify the single characters 21 /////////////////////////////////////////////////////////////////////////////// 22 #define BOOST_CC_DIGIT 0x0001 23 #define BOOST_CC_XDIGIT 0x0002 24 #define BOOST_CC_ALPHA 0x0004 25 #define BOOST_CC_CTRL 0x0008 26 #define BOOST_CC_LOWER 0x0010 27 #define BOOST_CC_UPPER 0x0020 28 #define BOOST_CC_SPACE 0x0040 29 #define BOOST_CC_PUNCT 0x0080 30 31 namespace boost { namespace spirit { namespace char_encoding 32 { 33 // The detection of isgraph(), isprint() and isblank() is done programmatically 34 // to keep the character type table small. Additionally, these functions are 35 // rather seldom used and the programmatic detection is very simple. 36 37 /////////////////////////////////////////////////////////////////////////// 38 // ASCII character classification table 39 /////////////////////////////////////////////////////////////////////////// 40 const unsigned char ascii_char_types[] = 41 { 42 /* NUL 0 0 */ BOOST_CC_CTRL, 43 /* SOH 1 1 */ BOOST_CC_CTRL, 44 /* STX 2 2 */ BOOST_CC_CTRL, 45 /* ETX 3 3 */ BOOST_CC_CTRL, 46 /* EOT 4 4 */ BOOST_CC_CTRL, 47 /* ENQ 5 5 */ BOOST_CC_CTRL, 48 /* ACK 6 6 */ BOOST_CC_CTRL, 49 /* BEL 7 7 */ BOOST_CC_CTRL, 50 /* BS 8 8 */ BOOST_CC_CTRL, 51 /* HT 9 9 */ BOOST_CC_CTRL|BOOST_CC_SPACE, 52 /* NL 10 a */ BOOST_CC_CTRL|BOOST_CC_SPACE, 53 /* VT 11 b */ BOOST_CC_CTRL|BOOST_CC_SPACE, 54 /* NP 12 c */ BOOST_CC_CTRL|BOOST_CC_SPACE, 55 /* CR 13 d */ BOOST_CC_CTRL|BOOST_CC_SPACE, 56 /* SO 14 e */ BOOST_CC_CTRL, 57 /* SI 15 f */ BOOST_CC_CTRL, 58 /* DLE 16 10 */ BOOST_CC_CTRL, 59 /* DC1 17 11 */ BOOST_CC_CTRL, 60 /* DC2 18 12 */ BOOST_CC_CTRL, 61 /* DC3 19 13 */ BOOST_CC_CTRL, 62 /* DC4 20 14 */ BOOST_CC_CTRL, 63 /* NAK 21 15 */ BOOST_CC_CTRL, 64 /* SYN 22 16 */ BOOST_CC_CTRL, 65 /* ETB 23 17 */ BOOST_CC_CTRL, 66 /* CAN 24 18 */ BOOST_CC_CTRL, 67 /* EM 25 19 */ BOOST_CC_CTRL, 68 /* SUB 26 1a */ BOOST_CC_CTRL, 69 /* ESC 27 1b */ BOOST_CC_CTRL, 70 /* FS 28 1c */ BOOST_CC_CTRL, 71 /* GS 29 1d */ BOOST_CC_CTRL, 72 /* RS 30 1e */ BOOST_CC_CTRL, 73 /* US 31 1f */ BOOST_CC_CTRL, 74 /* SP 32 20 */ BOOST_CC_SPACE, 75 /* ! 33 21 */ BOOST_CC_PUNCT, 76 /* " 34 22 */ BOOST_CC_PUNCT, 77 /* # 35 23 */ BOOST_CC_PUNCT, 78 /* $ 36 24 */ BOOST_CC_PUNCT, 79 /* % 37 25 */ BOOST_CC_PUNCT, 80 /* & 38 26 */ BOOST_CC_PUNCT, 81 /* ' 39 27 */ BOOST_CC_PUNCT, 82 /* ( 40 28 */ BOOST_CC_PUNCT, 83 /* ) 41 29 */ BOOST_CC_PUNCT, 84 /* * 42 2a */ BOOST_CC_PUNCT, 85 /* + 43 2b */ BOOST_CC_PUNCT, 86 /* , 44 2c */ BOOST_CC_PUNCT, 87 /* - 45 2d */ BOOST_CC_PUNCT, 88 /* . 46 2e */ BOOST_CC_PUNCT, 89 /* / 47 2f */ BOOST_CC_PUNCT, 90 /* 0 48 30 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, 91 /* 1 49 31 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, 92 /* 2 50 32 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, 93 /* 3 51 33 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, 94 /* 4 52 34 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, 95 /* 5 53 35 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, 96 /* 6 54 36 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, 97 /* 7 55 37 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, 98 /* 8 56 38 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, 99 /* 9 57 39 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, 100 /* : 58 3a */ BOOST_CC_PUNCT, 101 /* ; 59 3b */ BOOST_CC_PUNCT, 102 /* < 60 3c */ BOOST_CC_PUNCT, 103 /* = 61 3d */ BOOST_CC_PUNCT, 104 /* > 62 3e */ BOOST_CC_PUNCT, 105 /* ? 63 3f */ BOOST_CC_PUNCT, 106 /* @ 64 40 */ BOOST_CC_PUNCT, 107 /* A 65 41 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER, 108 /* B 66 42 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER, 109 /* C 67 43 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER, 110 /* D 68 44 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER, 111 /* E 69 45 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER, 112 /* F 70 46 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER, 113 /* G 71 47 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 114 /* H 72 48 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 115 /* I 73 49 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 116 /* J 74 4a */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 117 /* K 75 4b */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 118 /* L 76 4c */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 119 /* M 77 4d */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 120 /* N 78 4e */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 121 /* O 79 4f */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 122 /* P 80 50 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 123 /* Q 81 51 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 124 /* R 82 52 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 125 /* S 83 53 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 126 /* T 84 54 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 127 /* U 85 55 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 128 /* V 86 56 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 129 /* W 87 57 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 130 /* X 88 58 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 131 /* Y 89 59 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 132 /* Z 90 5a */ BOOST_CC_ALPHA|BOOST_CC_UPPER, 133 /* [ 91 5b */ BOOST_CC_PUNCT, 134 /* \ 92 5c */ BOOST_CC_PUNCT, 135 /* ] 93 5d */ BOOST_CC_PUNCT, 136 /* ^ 94 5e */ BOOST_CC_PUNCT, 137 /* _ 95 5f */ BOOST_CC_PUNCT, 138 /* ` 96 60 */ BOOST_CC_PUNCT, 139 /* a 97 61 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER, 140 /* b 98 62 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER, 141 /* c 99 63 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER, 142 /* d 100 64 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER, 143 /* e 101 65 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER, 144 /* f 102 66 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER, 145 /* g 103 67 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 146 /* h 104 68 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 147 /* i 105 69 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 148 /* j 106 6a */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 149 /* k 107 6b */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 150 /* l 108 6c */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 151 /* m 109 6d */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 152 /* n 110 6e */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 153 /* o 111 6f */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 154 /* p 112 70 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 155 /* q 113 71 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 156 /* r 114 72 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 157 /* s 115 73 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 158 /* t 116 74 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 159 /* u 117 75 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 160 /* v 118 76 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 161 /* w 119 77 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 162 /* x 120 78 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 163 /* y 121 79 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 164 /* z 122 7a */ BOOST_CC_ALPHA|BOOST_CC_LOWER, 165 /* { 123 7b */ BOOST_CC_PUNCT, 166 /* | 124 7c */ BOOST_CC_PUNCT, 167 /* } 125 7d */ BOOST_CC_PUNCT, 168 /* ~ 126 7e */ BOOST_CC_PUNCT, 169 /* DEL 127 7f */ BOOST_CC_CTRL, 170 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 171 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 172 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 173 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 174 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 175 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 176 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 177 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 178 }; 179 180 /////////////////////////////////////////////////////////////////////////// 181 // Test characters for specified conditions (using ASCII) 182 /////////////////////////////////////////////////////////////////////////// 183 struct ascii 184 { 185 typedef char char_type; 186 187 static bool isascii_boost::spirit::char_encoding::ascii188 isascii_(int ch) 189 { 190 return 0 == (ch & ~0x7f); 191 } 192 193 static bool ischarboost::spirit::char_encoding::ascii194 ischar(int ch) 195 { 196 return isascii_(ch); 197 } 198 199 static bool isalnumboost::spirit::char_encoding::ascii200 isalnum(int ch) 201 { 202 BOOST_ASSERT(0 == (ch & ~UCHAR_MAX)); 203 return (ascii_char_types[ch] & BOOST_CC_ALPHA) 204 || (ascii_char_types[ch] & BOOST_CC_DIGIT); 205 } 206 207 static bool isalphaboost::spirit::char_encoding::ascii208 isalpha(int ch) 209 { 210 BOOST_ASSERT(0 == (ch & ~UCHAR_MAX)); 211 return (ascii_char_types[ch] & BOOST_CC_ALPHA) ? true : false; 212 } 213 214 static bool isdigitboost::spirit::char_encoding::ascii215 isdigit(int ch) 216 { 217 BOOST_ASSERT(0 == (ch & ~UCHAR_MAX)); 218 return (ascii_char_types[ch] & BOOST_CC_DIGIT) ? true : false; 219 } 220 221 static bool isxdigitboost::spirit::char_encoding::ascii222 isxdigit(int ch) 223 { 224 BOOST_ASSERT(0 == (ch & ~UCHAR_MAX)); 225 return (ascii_char_types[ch] & BOOST_CC_XDIGIT) ? true : false; 226 } 227 228 static bool iscntrlboost::spirit::char_encoding::ascii229 iscntrl(int ch) 230 { 231 BOOST_ASSERT(0 == (ch & ~UCHAR_MAX)); 232 return (ascii_char_types[ch] & BOOST_CC_CTRL) ? true : false; 233 } 234 235 static bool isgraphboost::spirit::char_encoding::ascii236 isgraph(int ch) 237 { 238 return ('\x21' <= ch && ch <= '\x7e'); 239 } 240 241 static bool islowerboost::spirit::char_encoding::ascii242 islower(int ch) 243 { 244 BOOST_ASSERT(0 == (ch & ~UCHAR_MAX)); 245 return (ascii_char_types[ch] & BOOST_CC_LOWER) ? true : false; 246 } 247 248 static bool isprintboost::spirit::char_encoding::ascii249 isprint(int ch) 250 { 251 return ('\x20' <= ch && ch <= '\x7e'); 252 } 253 254 static bool ispunctboost::spirit::char_encoding::ascii255 ispunct(int ch) 256 { 257 BOOST_ASSERT(0 == (ch & ~UCHAR_MAX)); 258 return (ascii_char_types[ch] & BOOST_CC_PUNCT) ? true : false; 259 } 260 261 static bool isspaceboost::spirit::char_encoding::ascii262 isspace(int ch) 263 { 264 BOOST_ASSERT(0 == (ch & ~UCHAR_MAX)); 265 return (ascii_char_types[ch] & BOOST_CC_SPACE) ? true : false; 266 } 267 268 static bool BOOST_PREVENT_MACRO_SUBSTITUTIONboost::spirit::char_encoding::ascii269 isblank BOOST_PREVENT_MACRO_SUBSTITUTION (int ch) 270 { 271 return ('\x09' == ch || '\x20' == ch); 272 } 273 274 static bool isupperboost::spirit::char_encoding::ascii275 isupper(int ch) 276 { 277 BOOST_ASSERT(0 == (ch & ~UCHAR_MAX)); 278 return (ascii_char_types[ch] & BOOST_CC_UPPER) ? true : false; 279 } 280 281 /////////////////////////////////////////////////////////////////////// 282 // Simple character conversions 283 /////////////////////////////////////////////////////////////////////// 284 285 static int tolowerboost::spirit::char_encoding::ascii286 tolower(int ch) 287 { 288 return isupper(ch) ? (ch - 'A' + 'a') : ch; 289 } 290 291 static int toupperboost::spirit::char_encoding::ascii292 toupper(int ch) 293 { 294 return islower(ch) ? (ch - 'a' + 'A') : ch; 295 } 296 297 static ::boost::uint32_t toucs4boost::spirit::char_encoding::ascii298 toucs4(int ch) 299 { 300 return ch; 301 } 302 }; 303 304 }}} 305 306 /////////////////////////////////////////////////////////////////////////////// 307 // undefine macros 308 /////////////////////////////////////////////////////////////////////////////// 309 #undef BOOST_CC_DIGIT 310 #undef BOOST_CC_XDIGIT 311 #undef BOOST_CC_ALPHA 312 #undef BOOST_CC_CTRL 313 #undef BOOST_CC_LOWER 314 #undef BOOST_CC_UPPER 315 #undef BOOST_CC_PUNCT 316 #undef BOOST_CC_SPACE 317 318 #endif 319