1 // tokenize.cpp 2 3 #ifdef _MSC_VER 4 #pragma warning ( disable : 4786 ) 5 #endif//_MSC_VER 6 7 #include <string> 8 #include <vector> 9 #include <conio.h> 10 11 #include "assert.h" 12 #include "tokenize.h" 13 #include "skip_ws.h" 14 15 using std::string; 16 using std::vector; 17 18 void tokenize ( const string& text, vector<string>& tokens ) 19 { 20 tokens.resize ( 0 ); 21 string s ( text ); 22 char* p = &s[0]; 23 while ( *p ) 24 { 25 // skip whitespace 26 p = skip_ws ( p ); 27 // check for literal string 28 if ( *p == '\"' ) 29 { 30 // skip initial quote 31 char* end = p + 1; 32 for ( ;; ) 33 { 34 if ( *end == '\\' ) 35 { 36 end++; 37 switch ( *end ) 38 { 39 case 'x': 40 case 'X': 41 ASSERT(0); // come back to this.... 42 break; 43 case '0': 44 ASSERT(0); 45 break; 46 default: 47 end++; 48 break; 49 } 50 } 51 else if ( *end == '\"' ) 52 { 53 end++; 54 break; 55 } 56 else 57 end++; 58 } 59 tokens.push_back ( string ( p, end-p ) ); 60 p = end; 61 } 62 else if ( __iscsymf(*p) ) 63 { 64 char* end = p + 1; 65 while ( __iscsym ( *end ) ) 66 end++; 67 tokens.push_back ( string ( p, end-p ) ); 68 p = end; 69 } 70 else if ( isdigit(*p) || *p == '.' ) 71 { 72 char* end = p; 73 while ( isdigit(*end) ) 74 end++; 75 bool f = false; 76 if ( *end == '.' ) 77 { 78 end++; 79 while ( isdigit(*end) ) 80 end++; 81 f = true; 82 } 83 if ( *end == 'f' || *end == 'F' ) 84 end++; 85 else if ( !f && ( *end == 'l' || *end == 'L' ) ) 86 end++; 87 tokens.push_back ( string ( p, end-p ) ); 88 p = end; 89 } 90 else switch ( *p ) 91 { 92 case '.': 93 tokens.push_back ( "." ); 94 p++; 95 break; 96 case ',': 97 tokens.push_back ( "," ); 98 p++; 99 break; 100 case '(': 101 tokens.push_back ( "(" ); 102 p++; 103 break; 104 case ')': 105 tokens.push_back ( ")" ); 106 p++; 107 break; 108 case '{': 109 tokens.push_back ( "{" ); 110 p++; 111 break; 112 case '}': 113 tokens.push_back ( "}" ); 114 p++; 115 break; 116 case '[': 117 tokens.push_back ( "[" ); 118 p++; 119 break; 120 case ']': 121 tokens.push_back ( "]" ); 122 p++; 123 break; 124 case ';': 125 tokens.push_back ( ";" ); 126 p++; 127 break; 128 case '\\': 129 switch ( p[1] ) 130 { 131 case '\n': 132 tokens.push_back ( string ( p, 2 ) ); 133 p += 2; 134 break; 135 default: 136 ASSERT(0); // shouldn't hit here, I think 137 tokens.push_back ( "\\" ); 138 p++; 139 break; 140 } 141 break; 142 case '|': 143 switch ( p[1] ) 144 { 145 case '|': 146 tokens.push_back ( string ( p, 2 ) ); 147 p += 2; 148 break; 149 default: 150 tokens.push_back ( "|" ); 151 p++; 152 break; 153 } 154 break; 155 case '&': 156 switch ( p[1] ) 157 { 158 case '&': 159 tokens.push_back ( string ( p, 2 ) ); 160 p += 2; 161 break; 162 default: 163 tokens.push_back ( "&" ); 164 p++; 165 break; 166 } 167 break; 168 case '<': 169 switch ( p[1] ) 170 { 171 case '<': 172 if ( p[2] == '=' ) 173 tokens.push_back ( string ( p, 3 ) ), p += 3; 174 else 175 tokens.push_back ( string ( p, 2 ) ), p += 2; 176 break; 177 case '=': 178 tokens.push_back ( string ( p, 2 ) ); 179 p += 2; 180 break; 181 default: 182 tokens.push_back ( "<" ); 183 p++; 184 break; 185 } 186 break; 187 case '>': 188 switch ( p[1] ) 189 { 190 case '>': 191 if ( p[2] == '=' ) 192 tokens.push_back ( string ( p, 3 ) ), p += 3; 193 else 194 tokens.push_back ( string ( p, 2 ) ), p += 2; 195 break; 196 case '=': 197 tokens.push_back ( string ( p, 2 ) ); 198 p += 2; 199 break; 200 default: 201 tokens.push_back ( ">" ); 202 p++; 203 break; 204 } 205 break; 206 case '!': 207 switch ( p[1] ) 208 { 209 case '=': 210 tokens.push_back ( string ( p, 2 ) ); 211 p += 2; 212 break; 213 default: 214 tokens.push_back ( "!" ); 215 p++; 216 break; 217 } 218 break; 219 case '=': 220 switch ( p[1] ) 221 { 222 case '=': 223 tokens.push_back ( string ( p, 2 ) ); 224 p += 2; 225 break; 226 default: 227 tokens.push_back ( "=" ); 228 p++; 229 break; 230 } 231 break; 232 case ':': 233 switch ( p[1] ) 234 { 235 case ':': 236 tokens.push_back ( string ( p, 2 ) ); 237 p += 2; 238 break; 239 default: 240 tokens.push_back ( ":" ); 241 p++; 242 break; 243 } 244 break; 245 case '*': 246 switch ( p[1] ) 247 { 248 case '=': 249 tokens.push_back ( string ( p, 2 ) ); 250 p += 2; 251 break; 252 default: 253 tokens.push_back ( "*" ); 254 p++; 255 break; 256 } 257 break; 258 case '/': 259 switch ( p[1] ) 260 { 261 case '=': 262 tokens.push_back ( string ( p, 2 ) ); 263 p += 2; 264 break; 265 default: 266 tokens.push_back ( "/" ); 267 p++; 268 break; 269 } 270 break; 271 case '+': 272 switch ( p[1] ) 273 { 274 case '+': 275 case '=': 276 tokens.push_back ( string ( p, 2 ) ); 277 p += 2; 278 break; 279 default: 280 tokens.push_back ( "+" ); 281 p++; 282 break; 283 } 284 break; 285 case '-': 286 switch ( p[1] ) 287 { 288 case '-': 289 case '=': 290 tokens.push_back ( string ( p, 2 ) ); 291 p += 2; 292 break; 293 default: 294 tokens.push_back ( "-" ); 295 p++; 296 break; 297 } 298 break; 299 case '#': 300 while ( *p && *p != '\n' ) 301 p++; 302 break; 303 case 0: 304 break; 305 default: 306 printf ( "choked on '%c' in tokenize() - press any key to continue\n", *p ); 307 getch(); 308 p++; 309 break; 310 } 311 } 312 } 313