1 /* 2 * Copyright 2011 Jacek Caban for CodeWeavers 3 * 4 * This library is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU Lesser General Public 6 * License as published by the Free Software Foundation; either 7 * version 2.1 of the License, or (at your option) any later version. 8 * 9 * This library is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 * Lesser General Public License for more details. 13 * 14 * You should have received a copy of the GNU Lesser General Public 15 * License along with this library; if not, write to the Free Software 16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA 17 */ 18 19 #ifdef __REACTOS__ 20 #include <wine/config.h> 21 #include <wine/port.h> 22 #endif 23 #include <assert.h> 24 #include <limits.h> 25 #include <math.h> 26 27 #include "vbscript.h" 28 #include "parse.h" 29 #include "parser.tab.h" 30 31 #include "wine/debug.h" 32 33 WINE_DEFAULT_DEBUG_CHANNEL(vbscript); 34 35 static const WCHAR andW[] = {'a','n','d',0}; 36 static const WCHAR byrefW[] = {'b','y','r','e','f',0}; 37 static const WCHAR byvalW[] = {'b','y','v','a','l',0}; 38 static const WCHAR callW[] = {'c','a','l','l',0}; 39 static const WCHAR caseW[] = {'c','a','s','e',0}; 40 static const WCHAR classW[] = {'c','l','a','s','s',0}; 41 static const WCHAR constW[] = {'c','o','n','s','t',0}; 42 static const WCHAR defaultW[] = {'d','e','f','a','u','l','t',0}; 43 static const WCHAR dimW[] = {'d','i','m',0}; 44 static const WCHAR doW[] = {'d','o',0}; 45 static const WCHAR eachW[] = {'e','a','c','h',0}; 46 static const WCHAR elseW[] = {'e','l','s','e',0}; 47 static const WCHAR elseifW[] = {'e','l','s','e','i','f',0}; 48 static const WCHAR emptyW[] = {'e','m','p','t','y',0}; 49 static const WCHAR endW[] = {'e','n','d',0}; 50 static const WCHAR eqvW[] = {'e','q','v',0}; 51 static const WCHAR errorW[] = {'e','r','r','o','r',0}; 52 static const WCHAR exitW[] = {'e','x','i','t',0}; 53 static const WCHAR explicitW[] = {'e','x','p','l','i','c','i','t',0}; 54 static const WCHAR falseW[] = {'f','a','l','s','e',0}; 55 static const WCHAR forW[] = {'f','o','r',0}; 56 static const WCHAR functionW[] = {'f','u','n','c','t','i','o','n',0}; 57 static const WCHAR getW[] = {'g','e','t',0}; 58 static const WCHAR gotoW[] = {'g','o','t','o',0}; 59 static const WCHAR ifW[] = {'i','f',0}; 60 static const WCHAR impW[] = {'i','m','p',0}; 61 static const WCHAR inW[] = {'i','n',0}; 62 static const WCHAR isW[] = {'i','s',0}; 63 static const WCHAR letW[] = {'l','e','t',0}; 64 static const WCHAR loopW[] = {'l','o','o','p',0}; 65 static const WCHAR meW[] = {'m','e',0}; 66 static const WCHAR modW[] = {'m','o','d',0}; 67 static const WCHAR newW[] = {'n','e','w',0}; 68 static const WCHAR nextW[] = {'n','e','x','t',0}; 69 static const WCHAR notW[] = {'n','o','t',0}; 70 static const WCHAR nothingW[] = {'n','o','t','h','i','n','g',0}; 71 static const WCHAR nullW[] = {'n','u','l','l',0}; 72 static const WCHAR onW[] = {'o','n',0}; 73 static const WCHAR optionW[] = {'o','p','t','i','o','n',0}; 74 static const WCHAR orW[] = {'o','r',0}; 75 static const WCHAR privateW[] = {'p','r','i','v','a','t','e',0}; 76 static const WCHAR propertyW[] = {'p','r','o','p','e','r','t','y',0}; 77 static const WCHAR publicW[] = {'p','u','b','l','i','c',0}; 78 static const WCHAR remW[] = {'r','e','m',0}; 79 static const WCHAR resumeW[] = {'r','e','s','u','m','e',0}; 80 static const WCHAR selectW[] = {'s','e','l','e','c','t',0}; 81 static const WCHAR setW[] = {'s','e','t',0}; 82 static const WCHAR stepW[] = {'s','t','e','p',0}; 83 static const WCHAR stopW[] = {'s','t','o','p',0}; 84 static const WCHAR subW[] = {'s','u','b',0}; 85 static const WCHAR thenW[] = {'t','h','e','n',0}; 86 static const WCHAR toW[] = {'t','o',0}; 87 static const WCHAR trueW[] = {'t','r','u','e',0}; 88 static const WCHAR untilW[] = {'u','n','t','i','l',0}; 89 static const WCHAR wendW[] = {'w','e','n','d',0}; 90 static const WCHAR whileW[] = {'w','h','i','l','e',0}; 91 static const WCHAR xorW[] = {'x','o','r',0}; 92 93 static const struct { 94 const WCHAR *word; 95 int token; 96 } keywords[] = { 97 {andW, tAND}, 98 {byrefW, tBYREF}, 99 {byvalW, tBYVAL}, 100 {callW, tCALL}, 101 {caseW, tCASE}, 102 {classW, tCLASS}, 103 {constW, tCONST}, 104 {defaultW, tDEFAULT}, 105 {dimW, tDIM}, 106 {doW, tDO}, 107 {eachW, tEACH}, 108 {elseW, tELSE}, 109 {elseifW, tELSEIF}, 110 {emptyW, tEMPTY}, 111 {endW, tEND}, 112 {eqvW, tEQV}, 113 {errorW, tERROR}, 114 {exitW, tEXIT}, 115 {explicitW, tEXPLICIT}, 116 {falseW, tFALSE}, 117 {forW, tFOR}, 118 {functionW, tFUNCTION}, 119 {getW, tGET}, 120 {gotoW, tGOTO}, 121 {ifW, tIF}, 122 {impW, tIMP}, 123 {inW, tIN}, 124 {isW, tIS}, 125 {letW, tLET}, 126 {loopW, tLOOP}, 127 {meW, tME}, 128 {modW, tMOD}, 129 {newW, tNEW}, 130 {nextW, tNEXT}, 131 {notW, tNOT}, 132 {nothingW, tNOTHING}, 133 {nullW, tNULL}, 134 {onW, tON}, 135 {optionW, tOPTION}, 136 {orW, tOR}, 137 {privateW, tPRIVATE}, 138 {propertyW, tPROPERTY}, 139 {publicW, tPUBLIC}, 140 {remW, tREM}, 141 {resumeW, tRESUME}, 142 {selectW, tSELECT}, 143 {setW, tSET}, 144 {stepW, tSTEP}, 145 {stopW, tSTOP}, 146 {subW, tSUB}, 147 {thenW, tTHEN}, 148 {toW, tTO}, 149 {trueW, tTRUE}, 150 {untilW, tUNTIL}, 151 {wendW, tWEND}, 152 {whileW, tWHILE}, 153 {xorW, tXOR} 154 }; 155 156 static inline BOOL is_identifier_char(WCHAR c) 157 { 158 return iswalnum(c) || c == '_'; 159 } 160 161 static int check_keyword(parser_ctx_t *ctx, const WCHAR *word, const WCHAR **lval) 162 { 163 const WCHAR *p1 = ctx->ptr; 164 const WCHAR *p2 = word; 165 WCHAR c; 166 167 while(p1 < ctx->end && *p2) { 168 c = towlower(*p1); 169 if(c != *p2) 170 return c - *p2; 171 p1++; 172 p2++; 173 } 174 175 if(*p2 || (p1 < ctx->end && is_identifier_char(*p1))) 176 return 1; 177 178 ctx->ptr = p1; 179 *lval = word; 180 return 0; 181 } 182 183 static int check_keywords(parser_ctx_t *ctx, const WCHAR **lval) 184 { 185 int min = 0, max = ARRAY_SIZE(keywords)-1, r, i; 186 187 while(min <= max) { 188 i = (min+max)/2; 189 190 r = check_keyword(ctx, keywords[i].word, lval); 191 if(!r) 192 return keywords[i].token; 193 194 if(r > 0) 195 min = i+1; 196 else 197 max = i-1; 198 } 199 200 return 0; 201 } 202 203 static int parse_identifier(parser_ctx_t *ctx, const WCHAR **ret) 204 { 205 const WCHAR *ptr = ctx->ptr++; 206 WCHAR *str; 207 int len; 208 209 while(ctx->ptr < ctx->end && is_identifier_char(*ctx->ptr)) 210 ctx->ptr++; 211 len = ctx->ptr-ptr; 212 213 str = parser_alloc(ctx, (len+1)*sizeof(WCHAR)); 214 if(!str) 215 return 0; 216 217 memcpy(str, ptr, (len+1)*sizeof(WCHAR)); 218 str[len] = 0; 219 *ret = str; 220 return tIdentifier; 221 } 222 223 static int parse_string_literal(parser_ctx_t *ctx, const WCHAR **ret) 224 { 225 const WCHAR *ptr = ++ctx->ptr; 226 WCHAR *rptr; 227 int len = 0; 228 229 while(ctx->ptr < ctx->end) { 230 if(*ctx->ptr == '\n' || *ctx->ptr == '\r') { 231 FIXME("newline inside string literal\n"); 232 return 0; 233 } 234 235 if(*ctx->ptr == '"') { 236 if(ctx->ptr[1] != '"') 237 break; 238 len--; 239 ctx->ptr++; 240 } 241 ctx->ptr++; 242 } 243 244 if(ctx->ptr == ctx->end) { 245 FIXME("unterminated string literal\n"); 246 return 0; 247 } 248 249 len += ctx->ptr-ptr; 250 251 *ret = rptr = parser_alloc(ctx, (len+1)*sizeof(WCHAR)); 252 if(!rptr) 253 return 0; 254 255 while(ptr < ctx->ptr) { 256 if(*ptr == '"') 257 ptr++; 258 *rptr++ = *ptr++; 259 } 260 261 *rptr = 0; 262 ctx->ptr++; 263 return tString; 264 } 265 266 static int parse_numeric_literal(parser_ctx_t *ctx, void **ret) 267 { 268 BOOL use_int = TRUE; 269 LONGLONG d = 0, hlp; 270 int exp = 0; 271 double r; 272 273 if(*ctx->ptr == '0' && !('0' <= ctx->ptr[1] && ctx->ptr[1] <= '9') && ctx->ptr[1] != '.') 274 return *ctx->ptr++; 275 276 while(ctx->ptr < ctx->end && iswdigit(*ctx->ptr)) { 277 hlp = d*10 + *(ctx->ptr++) - '0'; 278 if(d>MAXLONGLONG/10 || hlp<0) { 279 exp++; 280 break; 281 } 282 else 283 d = hlp; 284 } 285 while(ctx->ptr < ctx->end && iswdigit(*ctx->ptr)) { 286 exp++; 287 ctx->ptr++; 288 } 289 290 if(*ctx->ptr == '.') { 291 use_int = FALSE; 292 ctx->ptr++; 293 294 while(ctx->ptr < ctx->end && iswdigit(*ctx->ptr)) { 295 hlp = d*10 + *(ctx->ptr++) - '0'; 296 if(d>MAXLONGLONG/10 || hlp<0) 297 break; 298 299 d = hlp; 300 exp--; 301 } 302 while(ctx->ptr < ctx->end && iswdigit(*ctx->ptr)) 303 ctx->ptr++; 304 } 305 306 if(*ctx->ptr == 'e' || *ctx->ptr == 'E') { 307 int e = 0, sign = 1; 308 309 ctx->ptr++; 310 if(*ctx->ptr == '-') { 311 ctx->ptr++; 312 sign = -1; 313 }else if(*ctx->ptr == '+') { 314 ctx->ptr++; 315 } 316 317 if(!iswdigit(*ctx->ptr)) { 318 FIXME("Invalid numeric literal\n"); 319 return 0; 320 } 321 322 use_int = FALSE; 323 324 do { 325 e = e*10 + *(ctx->ptr++) - '0'; 326 if(sign == -1 && -e+exp < -(INT_MAX/100)) { 327 /* The literal will be rounded to 0 anyway. */ 328 while(iswdigit(*ctx->ptr)) 329 ctx->ptr++; 330 *(double*)ret = 0; 331 return tDouble; 332 } 333 334 if(sign*e + exp > INT_MAX/100) { 335 FIXME("Invalid numeric literal\n"); 336 return 0; 337 } 338 } while(iswdigit(*ctx->ptr)); 339 340 exp += sign*e; 341 } 342 343 if(use_int && (LONG)d == d) { 344 *(LONG*)ret = d; 345 return tInt; 346 } 347 348 r = exp>=0 ? d*pow(10, exp) : d/pow(10, -exp); 349 if(isinf(r)) { 350 FIXME("Invalid numeric literal\n"); 351 return 0; 352 } 353 354 *(double*)ret = r; 355 return tDouble; 356 } 357 358 static int hex_to_int(WCHAR c) 359 { 360 if('0' <= c && c <= '9') 361 return c-'0'; 362 if('a' <= c && c <= 'f') 363 return c+10-'a'; 364 if('A' <= c && c <= 'F') 365 return c+10-'A'; 366 return -1; 367 } 368 369 static int parse_hex_literal(parser_ctx_t *ctx, LONG *ret) 370 { 371 const WCHAR *begin = ctx->ptr; 372 LONG l = 0, d; 373 374 while((d = hex_to_int(*++ctx->ptr)) != -1) 375 l = l*16 + d; 376 377 if(begin + 9 /* max digits+1 */ < ctx->ptr || (*ctx->ptr != '&' && is_identifier_char(*ctx->ptr))) { 378 FIXME("invalid literal\n"); 379 return 0; 380 } 381 382 if(*ctx->ptr == '&') 383 ctx->ptr++; 384 385 *ret = l; 386 return tInt; 387 } 388 389 static void skip_spaces(parser_ctx_t *ctx) 390 { 391 while(*ctx->ptr == ' ' || *ctx->ptr == '\t') 392 ctx->ptr++; 393 } 394 395 static int comment_line(parser_ctx_t *ctx) 396 { 397 static const WCHAR newlineW[] = {'\n','\r',0}; 398 ctx->ptr = wcspbrk(ctx->ptr, newlineW); 399 if(ctx->ptr) 400 ctx->ptr++; 401 else 402 ctx->ptr = ctx->end; 403 return tNL; 404 } 405 406 static int parse_next_token(void *lval, parser_ctx_t *ctx) 407 { 408 WCHAR c; 409 410 skip_spaces(ctx); 411 if(ctx->ptr == ctx->end) 412 return ctx->last_token == tNL ? tEOF : tNL; 413 414 c = *ctx->ptr; 415 416 if('0' <= c && c <= '9') 417 return parse_numeric_literal(ctx, lval); 418 419 if(iswalpha(c)) { 420 int ret = check_keywords(ctx, lval); 421 if(!ret) 422 return parse_identifier(ctx, lval); 423 if(ret != tREM) 424 return ret; 425 c = '\''; 426 } 427 428 switch(c) { 429 case '\n': 430 case '\r': 431 ctx->ptr++; 432 return tNL; 433 case '\'': 434 return comment_line(ctx); 435 case ':': 436 case ')': 437 case ',': 438 case '=': 439 case '+': 440 case '*': 441 case '/': 442 case '^': 443 case '\\': 444 case '.': 445 case '_': 446 return *ctx->ptr++; 447 case '-': 448 if(ctx->is_html && ctx->ptr[1] == '-' && ctx->ptr[2] == '>') 449 return comment_line(ctx); 450 ctx->ptr++; 451 return '-'; 452 case '(': 453 /* NOTE: 454 * We resolve empty brackets in lexer instead of parser to avoid complex conflicts 455 * in call statement special case |f()| without 'call' keyword 456 */ 457 ctx->ptr++; 458 skip_spaces(ctx); 459 if(*ctx->ptr == ')') { 460 ctx->ptr++; 461 return tEMPTYBRACKETS; 462 } 463 return '('; 464 case '"': 465 return parse_string_literal(ctx, lval); 466 case '&': 467 if(*++ctx->ptr == 'h' || *ctx->ptr == 'H') 468 return parse_hex_literal(ctx, lval); 469 return '&'; 470 case '<': 471 switch(*++ctx->ptr) { 472 case '>': 473 ctx->ptr++; 474 return tNEQ; 475 case '=': 476 ctx->ptr++; 477 return tLTEQ; 478 case '!': 479 if(ctx->is_html && ctx->ptr[1] == '-' && ctx->ptr[2] == '-') 480 return comment_line(ctx); 481 } 482 return '<'; 483 case '>': 484 if(*++ctx->ptr == '=') { 485 ctx->ptr++; 486 return tGTEQ; 487 } 488 return '>'; 489 default: 490 FIXME("Unhandled char %c in %s\n", *ctx->ptr, debugstr_w(ctx->ptr)); 491 } 492 493 return 0; 494 } 495 496 int parser_lex(void *lval, parser_ctx_t *ctx) 497 { 498 int ret; 499 500 if (ctx->last_token == tEXPRESSION) 501 { 502 ctx->last_token = tNL; 503 return tEXPRESSION; 504 } 505 506 while(1) { 507 ret = parse_next_token(lval, ctx); 508 if(ret == '_') { 509 skip_spaces(ctx); 510 if(*ctx->ptr != '\n' && *ctx->ptr != '\r') { 511 FIXME("'_' not followed by newline\n"); 512 return 0; 513 } 514 if(*ctx->ptr == '\r') 515 ctx->ptr++; 516 if(*ctx->ptr == '\n') 517 ctx->ptr++; 518 continue; 519 } 520 if(ret != tNL || ctx->last_token != tNL) 521 break; 522 523 ctx->last_nl = ctx->ptr-ctx->code; 524 } 525 526 return (ctx->last_token = ret); 527 } 528