1 /* 2 * Copyright 2011 Jacek Caban for CodeWeavers 3 * 4 * This library is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU Lesser General Public 6 * License as published by the Free Software Foundation; either 7 * version 2.1 of the License, or (at your option) any later version. 8 * 9 * This library is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 * Lesser General Public License for more details. 13 * 14 * You should have received a copy of the GNU Lesser General Public 15 * License along with this library; if not, write to the Free Software 16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA 17 */ 18 19 #include "config.h" 20 #include "wine/port.h" 21 22 #include <assert.h> 23 #include <limits.h> 24 25 #include "vbscript.h" 26 #include "parse.h" 27 #include "parser.tab.h" 28 29 #include "wine/debug.h" 30 31 WINE_DEFAULT_DEBUG_CHANNEL(vbscript); 32 33 static const WCHAR andW[] = {'a','n','d',0}; 34 static const WCHAR byrefW[] = {'b','y','r','e','f',0}; 35 static const WCHAR byvalW[] = {'b','y','v','a','l',0}; 36 static const WCHAR callW[] = {'c','a','l','l',0}; 37 static const WCHAR caseW[] = {'c','a','s','e',0}; 38 static const WCHAR classW[] = {'c','l','a','s','s',0}; 39 static const WCHAR constW[] = {'c','o','n','s','t',0}; 40 static const WCHAR defaultW[] = {'d','e','f','a','u','l','t',0}; 41 static const WCHAR dimW[] = {'d','i','m',0}; 42 static const WCHAR doW[] = {'d','o',0}; 43 static const WCHAR eachW[] = {'e','a','c','h',0}; 44 static const WCHAR elseW[] = {'e','l','s','e',0}; 45 static const WCHAR elseifW[] = {'e','l','s','e','i','f',0}; 46 static const WCHAR emptyW[] = {'e','m','p','t','y',0}; 47 static const WCHAR endW[] = {'e','n','d',0}; 48 static const WCHAR eqvW[] = {'e','q','v',0}; 49 static const WCHAR errorW[] = {'e','r','r','o','r',0}; 50 static const WCHAR exitW[] = {'e','x','i','t',0}; 51 static const WCHAR explicitW[] = {'e','x','p','l','i','c','i','t',0}; 52 static const WCHAR falseW[] = {'f','a','l','s','e',0}; 53 static const WCHAR forW[] = {'f','o','r',0}; 54 static const WCHAR functionW[] = {'f','u','n','c','t','i','o','n',0}; 55 static const WCHAR getW[] = {'g','e','t',0}; 56 static const WCHAR gotoW[] = {'g','o','t','o',0}; 57 static const WCHAR ifW[] = {'i','f',0}; 58 static const WCHAR impW[] = {'i','m','p',0}; 59 static const WCHAR inW[] = {'i','n',0}; 60 static const WCHAR isW[] = {'i','s',0}; 61 static const WCHAR letW[] = {'l','e','t',0}; 62 static const WCHAR loopW[] = {'l','o','o','p',0}; 63 static const WCHAR meW[] = {'m','e',0}; 64 static const WCHAR modW[] = {'m','o','d',0}; 65 static const WCHAR newW[] = {'n','e','w',0}; 66 static const WCHAR nextW[] = {'n','e','x','t',0}; 67 static const WCHAR notW[] = {'n','o','t',0}; 68 static const WCHAR nothingW[] = {'n','o','t','h','i','n','g',0}; 69 static const WCHAR nullW[] = {'n','u','l','l',0}; 70 static const WCHAR onW[] = {'o','n',0}; 71 static const WCHAR optionW[] = {'o','p','t','i','o','n',0}; 72 static const WCHAR orW[] = {'o','r',0}; 73 static const WCHAR privateW[] = {'p','r','i','v','a','t','e',0}; 74 static const WCHAR propertyW[] = {'p','r','o','p','e','r','t','y',0}; 75 static const WCHAR publicW[] = {'p','u','b','l','i','c',0}; 76 static const WCHAR remW[] = {'r','e','m',0}; 77 static const WCHAR resumeW[] = {'r','e','s','u','m','e',0}; 78 static const WCHAR selectW[] = {'s','e','l','e','c','t',0}; 79 static const WCHAR setW[] = {'s','e','t',0}; 80 static const WCHAR stepW[] = {'s','t','e','p',0}; 81 static const WCHAR stopW[] = {'s','t','o','p',0}; 82 static const WCHAR subW[] = {'s','u','b',0}; 83 static const WCHAR thenW[] = {'t','h','e','n',0}; 84 static const WCHAR toW[] = {'t','o',0}; 85 static const WCHAR trueW[] = {'t','r','u','e',0}; 86 static const WCHAR untilW[] = {'u','n','t','i','l',0}; 87 static const WCHAR wendW[] = {'w','e','n','d',0}; 88 static const WCHAR whileW[] = {'w','h','i','l','e',0}; 89 static const WCHAR xorW[] = {'x','o','r',0}; 90 91 static const struct { 92 const WCHAR *word; 93 int token; 94 } keywords[] = { 95 {andW, tAND}, 96 {byrefW, tBYREF}, 97 {byvalW, tBYVAL}, 98 {callW, tCALL}, 99 {caseW, tCASE}, 100 {classW, tCLASS}, 101 {constW, tCONST}, 102 {defaultW, tDEFAULT}, 103 {dimW, tDIM}, 104 {doW, tDO}, 105 {eachW, tEACH}, 106 {elseW, tELSE}, 107 {elseifW, tELSEIF}, 108 {emptyW, tEMPTY}, 109 {endW, tEND}, 110 {eqvW, tEQV}, 111 {errorW, tERROR}, 112 {exitW, tEXIT}, 113 {explicitW, tEXPLICIT}, 114 {falseW, tFALSE}, 115 {forW, tFOR}, 116 {functionW, tFUNCTION}, 117 {getW, tGET}, 118 {gotoW, tGOTO}, 119 {ifW, tIF}, 120 {impW, tIMP}, 121 {inW, tIN}, 122 {isW, tIS}, 123 {letW, tLET}, 124 {loopW, tLOOP}, 125 {meW, tME}, 126 {modW, tMOD}, 127 {newW, tNEW}, 128 {nextW, tNEXT}, 129 {notW, tNOT}, 130 {nothingW, tNOTHING}, 131 {nullW, tNULL}, 132 {onW, tON}, 133 {optionW, tOPTION}, 134 {orW, tOR}, 135 {privateW, tPRIVATE}, 136 {propertyW, tPROPERTY}, 137 {publicW, tPUBLIC}, 138 {remW, tREM}, 139 {resumeW, tRESUME}, 140 {selectW, tSELECT}, 141 {setW, tSET}, 142 {stepW, tSTEP}, 143 {stopW, tSTOP}, 144 {subW, tSUB}, 145 {thenW, tTHEN}, 146 {toW, tTO}, 147 {trueW, tTRUE}, 148 {untilW, tUNTIL}, 149 {wendW, tWEND}, 150 {whileW, tWHILE}, 151 {xorW, tXOR} 152 }; 153 154 static inline BOOL is_identifier_char(WCHAR c) 155 { 156 return isalnumW(c) || c == '_'; 157 } 158 159 static int check_keyword(parser_ctx_t *ctx, const WCHAR *word) 160 { 161 const WCHAR *p1 = ctx->ptr; 162 const WCHAR *p2 = word; 163 WCHAR c; 164 165 while(p1 < ctx->end && *p2) { 166 c = tolowerW(*p1); 167 if(c != *p2) 168 return c - *p2; 169 p1++; 170 p2++; 171 } 172 173 if(*p2 || (p1 < ctx->end && is_identifier_char(*p1))) 174 return 1; 175 176 ctx->ptr = p1; 177 return 0; 178 } 179 180 static int check_keywords(parser_ctx_t *ctx) 181 { 182 int min = 0, max = ARRAY_SIZE(keywords)-1, r, i; 183 184 while(min <= max) { 185 i = (min+max)/2; 186 187 r = check_keyword(ctx, keywords[i].word); 188 if(!r) 189 return keywords[i].token; 190 191 if(r > 0) 192 min = i+1; 193 else 194 max = i-1; 195 } 196 197 return 0; 198 } 199 200 static int parse_identifier(parser_ctx_t *ctx, const WCHAR **ret) 201 { 202 const WCHAR *ptr = ctx->ptr++; 203 WCHAR *str; 204 int len; 205 206 while(ctx->ptr < ctx->end && is_identifier_char(*ctx->ptr)) 207 ctx->ptr++; 208 len = ctx->ptr-ptr; 209 210 str = parser_alloc(ctx, (len+1)*sizeof(WCHAR)); 211 if(!str) 212 return 0; 213 214 memcpy(str, ptr, (len+1)*sizeof(WCHAR)); 215 str[len] = 0; 216 *ret = str; 217 return tIdentifier; 218 } 219 220 static int parse_string_literal(parser_ctx_t *ctx, const WCHAR **ret) 221 { 222 const WCHAR *ptr = ++ctx->ptr; 223 WCHAR *rptr; 224 int len = 0; 225 226 while(ctx->ptr < ctx->end) { 227 if(*ctx->ptr == '\n') { 228 FIXME("newline inside string literal\n"); 229 return 0; 230 } 231 232 if(*ctx->ptr == '"') { 233 if(ctx->ptr[1] != '"') 234 break; 235 len--; 236 ctx->ptr++; 237 } 238 ctx->ptr++; 239 } 240 241 if(ctx->ptr == ctx->end) { 242 FIXME("unterminated string literal\n"); 243 return 0; 244 } 245 246 len += ctx->ptr-ptr; 247 248 *ret = rptr = parser_alloc(ctx, (len+1)*sizeof(WCHAR)); 249 if(!rptr) 250 return 0; 251 252 while(ptr < ctx->ptr) { 253 if(*ptr == '"') 254 ptr++; 255 *rptr++ = *ptr++; 256 } 257 258 *rptr = 0; 259 ctx->ptr++; 260 return tString; 261 } 262 263 static int parse_numeric_literal(parser_ctx_t *ctx, void **ret) 264 { 265 BOOL use_int = TRUE; 266 LONGLONG d = 0, hlp; 267 int exp = 0; 268 double r; 269 270 if(*ctx->ptr == '0' && !('0' <= ctx->ptr[1] && ctx->ptr[1] <= '9') && ctx->ptr[1] != '.') 271 return *ctx->ptr++; 272 273 while(ctx->ptr < ctx->end && isdigitW(*ctx->ptr)) { 274 hlp = d*10 + *(ctx->ptr++) - '0'; 275 if(d>MAXLONGLONG/10 || hlp<0) { 276 exp++; 277 break; 278 } 279 else 280 d = hlp; 281 } 282 while(ctx->ptr < ctx->end && isdigitW(*ctx->ptr)) { 283 exp++; 284 ctx->ptr++; 285 } 286 287 if(*ctx->ptr == '.') { 288 use_int = FALSE; 289 ctx->ptr++; 290 291 while(ctx->ptr < ctx->end && isdigitW(*ctx->ptr)) { 292 hlp = d*10 + *(ctx->ptr++) - '0'; 293 if(d>MAXLONGLONG/10 || hlp<0) 294 break; 295 296 d = hlp; 297 exp--; 298 } 299 while(ctx->ptr < ctx->end && isdigitW(*ctx->ptr)) 300 ctx->ptr++; 301 } 302 303 if(*ctx->ptr == 'e' || *ctx->ptr == 'E') { 304 int e = 0, sign = 1; 305 306 if(*++ctx->ptr == '-') { 307 ctx->ptr++; 308 sign = -1; 309 } 310 311 if(!isdigitW(*ctx->ptr)) { 312 FIXME("Invalid numeric literal\n"); 313 return 0; 314 } 315 316 use_int = FALSE; 317 318 do { 319 e = e*10 + *(ctx->ptr++) - '0'; 320 if(sign == -1 && -e+exp < -(INT_MAX/100)) { 321 /* The literal will be rounded to 0 anyway. */ 322 while(isdigitW(*ctx->ptr)) 323 ctx->ptr++; 324 *(double*)ret = 0; 325 return tDouble; 326 } 327 328 if(sign*e + exp > INT_MAX/100) { 329 FIXME("Invalid numeric literal\n"); 330 return 0; 331 } 332 } while(isdigitW(*ctx->ptr)); 333 334 exp += sign*e; 335 } 336 337 if(use_int && (LONG)d == d) { 338 LONG l = d; 339 *(LONG*)ret = l; 340 return (short)l == l ? tShort : tLong; 341 } 342 343 r = exp>=0 ? d*pow(10, exp) : d/pow(10, -exp); 344 if(isinf(r)) { 345 FIXME("Invalid numeric literal\n"); 346 return 0; 347 } 348 349 *(double*)ret = r; 350 return tDouble; 351 } 352 353 static int hex_to_int(WCHAR c) 354 { 355 if('0' <= c && c <= '9') 356 return c-'0'; 357 if('a' <= c && c <= 'f') 358 return c+10-'a'; 359 if('A' <= c && c <= 'F') 360 return c+10-'A'; 361 return -1; 362 } 363 364 static int parse_hex_literal(parser_ctx_t *ctx, LONG *ret) 365 { 366 const WCHAR *begin = ctx->ptr; 367 LONG l = 0, d; 368 369 while((d = hex_to_int(*++ctx->ptr)) != -1) 370 l = l*16 + d; 371 372 if(begin + 9 /* max digits+1 */ < ctx->ptr || (*ctx->ptr != '&' && is_identifier_char(*ctx->ptr))) { 373 FIXME("invalid literal\n"); 374 return 0; 375 } 376 377 if(*ctx->ptr == '&') 378 ctx->ptr++; 379 380 *ret = l; 381 return (short)l == l ? tShort : tLong; 382 } 383 384 static void skip_spaces(parser_ctx_t *ctx) 385 { 386 while(*ctx->ptr == ' ' || *ctx->ptr == '\t') 387 ctx->ptr++; 388 } 389 390 static int comment_line(parser_ctx_t *ctx) 391 { 392 static const WCHAR newlineW[] = {'\n','\r',0}; 393 ctx->ptr = strpbrkW(ctx->ptr, newlineW); 394 if(ctx->ptr) 395 ctx->ptr++; 396 else 397 ctx->ptr = ctx->end; 398 return tNL; 399 } 400 401 static int parse_next_token(void *lval, parser_ctx_t *ctx) 402 { 403 WCHAR c; 404 405 skip_spaces(ctx); 406 if(ctx->ptr == ctx->end) 407 return ctx->last_token == tNL ? tEOF : tNL; 408 409 c = *ctx->ptr; 410 411 if('0' <= c && c <= '9') 412 return parse_numeric_literal(ctx, lval); 413 414 if(isalphaW(c)) { 415 int ret = check_keywords(ctx); 416 if(!ret) 417 return parse_identifier(ctx, lval); 418 if(ret != tREM) 419 return ret; 420 c = '\''; 421 } 422 423 switch(c) { 424 case '\n': 425 case '\r': 426 ctx->ptr++; 427 return tNL; 428 case '\'': 429 return comment_line(ctx); 430 case ':': 431 case ')': 432 case ',': 433 case '=': 434 case '+': 435 case '*': 436 case '/': 437 case '^': 438 case '\\': 439 case '.': 440 case '_': 441 return *ctx->ptr++; 442 case '-': 443 if(ctx->is_html && ctx->ptr[1] == '-' && ctx->ptr[2] == '>') 444 return comment_line(ctx); 445 ctx->ptr++; 446 return '-'; 447 case '(': 448 /* NOTE: 449 * We resolve empty brackets in lexer instead of parser to avoid complex conflicts 450 * in call statement special case |f()| without 'call' keyword 451 */ 452 ctx->ptr++; 453 skip_spaces(ctx); 454 if(*ctx->ptr == ')') { 455 ctx->ptr++; 456 return tEMPTYBRACKETS; 457 } 458 return '('; 459 case '"': 460 return parse_string_literal(ctx, lval); 461 case '&': 462 if(*++ctx->ptr == 'h' || *ctx->ptr == 'H') 463 return parse_hex_literal(ctx, lval); 464 return '&'; 465 case '<': 466 switch(*++ctx->ptr) { 467 case '>': 468 ctx->ptr++; 469 return tNEQ; 470 case '=': 471 ctx->ptr++; 472 return tLTEQ; 473 case '!': 474 if(ctx->is_html && ctx->ptr[1] == '-' && ctx->ptr[2] == '-') 475 return comment_line(ctx); 476 } 477 return '<'; 478 case '>': 479 if(*++ctx->ptr == '=') { 480 ctx->ptr++; 481 return tGTEQ; 482 } 483 return '>'; 484 default: 485 FIXME("Unhandled char %c in %s\n", *ctx->ptr, debugstr_w(ctx->ptr)); 486 } 487 488 return 0; 489 } 490 491 int parser_lex(void *lval, parser_ctx_t *ctx) 492 { 493 int ret; 494 495 while(1) { 496 ret = parse_next_token(lval, ctx); 497 if(ret == '_') { 498 skip_spaces(ctx); 499 if(*ctx->ptr != '\n') { 500 FIXME("'_' not followed by newline\n"); 501 return 0; 502 } 503 ctx->ptr++; 504 continue; 505 } 506 if(ret != tNL || ctx->last_token != tNL) 507 break; 508 509 ctx->last_nl = ctx->ptr-ctx->code; 510 } 511 512 return (ctx->last_token = ret); 513 } 514