1 /* 2 * Copyright 2011 Jacek Caban for CodeWeavers 3 * 4 * This library is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU Lesser General Public 6 * License as published by the Free Software Foundation; either 7 * version 2.1 of the License, or (at your option) any later version. 8 * 9 * This library is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 * Lesser General Public License for more details. 13 * 14 * You should have received a copy of the GNU Lesser General Public 15 * License along with this library; if not, write to the Free Software 16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA 17 */ 18 19 #include "vbscript.h" 20 #include "parser.tab.h" 21 22 #include <wine/config.h> 23 #include <wine/port.h> 24 25 static const WCHAR andW[] = {'a','n','d',0}; 26 static const WCHAR byrefW[] = {'b','y','r','e','f',0}; 27 static const WCHAR byvalW[] = {'b','y','v','a','l',0}; 28 static const WCHAR callW[] = {'c','a','l','l',0}; 29 static const WCHAR caseW[] = {'c','a','s','e',0}; 30 static const WCHAR classW[] = {'c','l','a','s','s',0}; 31 static const WCHAR constW[] = {'c','o','n','s','t',0}; 32 static const WCHAR defaultW[] = {'d','e','f','a','u','l','t',0}; 33 static const WCHAR dimW[] = {'d','i','m',0}; 34 static const WCHAR doW[] = {'d','o',0}; 35 static const WCHAR eachW[] = {'e','a','c','h',0}; 36 static const WCHAR elseW[] = {'e','l','s','e',0}; 37 static const WCHAR elseifW[] = {'e','l','s','e','i','f',0}; 38 static const WCHAR emptyW[] = {'e','m','p','t','y',0}; 39 static const WCHAR endW[] = {'e','n','d',0}; 40 static const WCHAR eqvW[] = {'e','q','v',0}; 41 static const WCHAR errorW[] = {'e','r','r','o','r',0}; 42 static const WCHAR exitW[] = {'e','x','i','t',0}; 43 static const WCHAR explicitW[] = {'e','x','p','l','i','c','i','t',0}; 44 static const WCHAR falseW[] = {'f','a','l','s','e',0}; 45 static const WCHAR forW[] = {'f','o','r',0}; 46 static const WCHAR functionW[] = {'f','u','n','c','t','i','o','n',0}; 47 static const WCHAR getW[] = {'g','e','t',0}; 48 static const WCHAR gotoW[] = {'g','o','t','o',0}; 49 static const WCHAR ifW[] = {'i','f',0}; 50 static const WCHAR impW[] = {'i','m','p',0}; 51 static const WCHAR inW[] = {'i','n',0}; 52 static const WCHAR isW[] = {'i','s',0}; 53 static const WCHAR letW[] = {'l','e','t',0}; 54 static const WCHAR loopW[] = {'l','o','o','p',0}; 55 static const WCHAR meW[] = {'m','e',0}; 56 static const WCHAR modW[] = {'m','o','d',0}; 57 static const WCHAR newW[] = {'n','e','w',0}; 58 static const WCHAR nextW[] = {'n','e','x','t',0}; 59 static const WCHAR notW[] = {'n','o','t',0}; 60 static const WCHAR nothingW[] = {'n','o','t','h','i','n','g',0}; 61 static const WCHAR nullW[] = {'n','u','l','l',0}; 62 static const WCHAR onW[] = {'o','n',0}; 63 static const WCHAR optionW[] = {'o','p','t','i','o','n',0}; 64 static const WCHAR orW[] = {'o','r',0}; 65 static const WCHAR privateW[] = {'p','r','i','v','a','t','e',0}; 66 static const WCHAR propertyW[] = {'p','r','o','p','e','r','t','y',0}; 67 static const WCHAR publicW[] = {'p','u','b','l','i','c',0}; 68 static const WCHAR remW[] = {'r','e','m',0}; 69 static const WCHAR resumeW[] = {'r','e','s','u','m','e',0}; 70 static const WCHAR selectW[] = {'s','e','l','e','c','t',0}; 71 static const WCHAR setW[] = {'s','e','t',0}; 72 static const WCHAR stepW[] = {'s','t','e','p',0}; 73 static const WCHAR stopW[] = {'s','t','o','p',0}; 74 static const WCHAR subW[] = {'s','u','b',0}; 75 static const WCHAR thenW[] = {'t','h','e','n',0}; 76 static const WCHAR toW[] = {'t','o',0}; 77 static const WCHAR trueW[] = {'t','r','u','e',0}; 78 static const WCHAR untilW[] = {'u','n','t','i','l',0}; 79 static const WCHAR wendW[] = {'w','e','n','d',0}; 80 static const WCHAR whileW[] = {'w','h','i','l','e',0}; 81 static const WCHAR xorW[] = {'x','o','r',0}; 82 83 static const struct { 84 const WCHAR *word; 85 int token; 86 } keywords[] = { 87 {andW, tAND}, 88 {byrefW, tBYREF}, 89 {byvalW, tBYVAL}, 90 {callW, tCALL}, 91 {caseW, tCASE}, 92 {classW, tCLASS}, 93 {constW, tCONST}, 94 {defaultW, tDEFAULT}, 95 {dimW, tDIM}, 96 {doW, tDO}, 97 {eachW, tEACH}, 98 {elseW, tELSE}, 99 {elseifW, tELSEIF}, 100 {emptyW, tEMPTY}, 101 {endW, tEND}, 102 {eqvW, tEQV}, 103 {errorW, tERROR}, 104 {exitW, tEXIT}, 105 {explicitW, tEXPLICIT}, 106 {falseW, tFALSE}, 107 {forW, tFOR}, 108 {functionW, tFUNCTION}, 109 {getW, tGET}, 110 {gotoW, tGOTO}, 111 {ifW, tIF}, 112 {impW, tIMP}, 113 {inW, tIN}, 114 {isW, tIS}, 115 {letW, tLET}, 116 {loopW, tLOOP}, 117 {meW, tME}, 118 {modW, tMOD}, 119 {newW, tNEW}, 120 {nextW, tNEXT}, 121 {notW, tNOT}, 122 {nothingW, tNOTHING}, 123 {nullW, tNULL}, 124 {onW, tON}, 125 {optionW, tOPTION}, 126 {orW, tOR}, 127 {privateW, tPRIVATE}, 128 {propertyW, tPROPERTY}, 129 {publicW, tPUBLIC}, 130 {remW, tREM}, 131 {resumeW, tRESUME}, 132 {selectW, tSELECT}, 133 {setW, tSET}, 134 {stepW, tSTEP}, 135 {stopW, tSTOP}, 136 {subW, tSUB}, 137 {thenW, tTHEN}, 138 {toW, tTO}, 139 {trueW, tTRUE}, 140 {untilW, tUNTIL}, 141 {wendW, tWEND}, 142 {whileW, tWHILE}, 143 {xorW, tXOR} 144 }; 145 146 static inline BOOL is_identifier_char(WCHAR c) 147 { 148 return isalnumW(c) || c == '_'; 149 } 150 151 static int check_keyword(parser_ctx_t *ctx, const WCHAR *word) 152 { 153 const WCHAR *p1 = ctx->ptr; 154 const WCHAR *p2 = word; 155 WCHAR c; 156 157 while(p1 < ctx->end && *p2) { 158 c = tolowerW(*p1); 159 if(c != *p2) 160 return c - *p2; 161 p1++; 162 p2++; 163 } 164 165 if(*p2 || (p1 < ctx->end && is_identifier_char(*p1))) 166 return 1; 167 168 ctx->ptr = p1; 169 return 0; 170 } 171 172 static int check_keywords(parser_ctx_t *ctx) 173 { 174 int min = 0, max = sizeof(keywords)/sizeof(keywords[0])-1, r, i; 175 176 while(min <= max) { 177 i = (min+max)/2; 178 179 r = check_keyword(ctx, keywords[i].word); 180 if(!r) 181 return keywords[i].token; 182 183 if(r > 0) 184 min = i+1; 185 else 186 max = i-1; 187 } 188 189 return 0; 190 } 191 192 static int parse_identifier(parser_ctx_t *ctx, const WCHAR **ret) 193 { 194 const WCHAR *ptr = ctx->ptr++; 195 WCHAR *str; 196 int len; 197 198 while(ctx->ptr < ctx->end && is_identifier_char(*ctx->ptr)) 199 ctx->ptr++; 200 len = ctx->ptr-ptr; 201 202 str = parser_alloc(ctx, (len+1)*sizeof(WCHAR)); 203 if(!str) 204 return 0; 205 206 memcpy(str, ptr, (len+1)*sizeof(WCHAR)); 207 str[len] = 0; 208 *ret = str; 209 return tIdentifier; 210 } 211 212 static int parse_string_literal(parser_ctx_t *ctx, const WCHAR **ret) 213 { 214 const WCHAR *ptr = ++ctx->ptr; 215 WCHAR *rptr; 216 int len = 0; 217 218 while(ctx->ptr < ctx->end) { 219 if(*ctx->ptr == '\n') { 220 FIXME("newline inside string literal\n"); 221 return 0; 222 } 223 224 if(*ctx->ptr == '"') { 225 if(ctx->ptr[1] != '"') 226 break; 227 len--; 228 ctx->ptr++; 229 } 230 ctx->ptr++; 231 } 232 233 if(ctx->ptr == ctx->end) { 234 FIXME("unterminated string literal\n"); 235 return 0; 236 } 237 238 len += ctx->ptr-ptr; 239 240 *ret = rptr = parser_alloc(ctx, (len+1)*sizeof(WCHAR)); 241 if(!rptr) 242 return 0; 243 244 while(ptr < ctx->ptr) { 245 if(*ptr == '"') 246 ptr++; 247 *rptr++ = *ptr++; 248 } 249 250 *rptr = 0; 251 ctx->ptr++; 252 return tString; 253 } 254 255 static int parse_numeric_literal(parser_ctx_t *ctx, void **ret) 256 { 257 BOOL use_int = TRUE; 258 LONGLONG d = 0, hlp; 259 int exp = 0; 260 double r; 261 262 if(*ctx->ptr == '0' && !('0' <= ctx->ptr[1] && ctx->ptr[1] <= '9') && ctx->ptr[1] != '.') 263 return *ctx->ptr++; 264 265 while(ctx->ptr < ctx->end && isdigitW(*ctx->ptr)) { 266 hlp = d*10 + *(ctx->ptr++) - '0'; 267 if(d>MAXLONGLONG/10 || hlp<0) { 268 exp++; 269 break; 270 } 271 else 272 d = hlp; 273 } 274 while(ctx->ptr < ctx->end && isdigitW(*ctx->ptr)) { 275 exp++; 276 ctx->ptr++; 277 } 278 279 if(*ctx->ptr == '.') { 280 use_int = FALSE; 281 ctx->ptr++; 282 283 while(ctx->ptr < ctx->end && isdigitW(*ctx->ptr)) { 284 hlp = d*10 + *(ctx->ptr++) - '0'; 285 if(d>MAXLONGLONG/10 || hlp<0) 286 break; 287 288 d = hlp; 289 exp--; 290 } 291 while(ctx->ptr < ctx->end && isdigitW(*ctx->ptr)) 292 ctx->ptr++; 293 } 294 295 if(*ctx->ptr == 'e' || *ctx->ptr == 'E') { 296 int e = 0, sign = 1; 297 298 if(*++ctx->ptr == '-') { 299 ctx->ptr++; 300 sign = -1; 301 } 302 303 if(!isdigitW(*ctx->ptr)) { 304 FIXME("Invalid numeric literal\n"); 305 return 0; 306 } 307 308 use_int = FALSE; 309 310 do { 311 e = e*10 + *(ctx->ptr++) - '0'; 312 if(sign == -1 && -e+exp < -(INT_MAX/100)) { 313 /* The literal will be rounded to 0 anyway. */ 314 while(isdigitW(*ctx->ptr)) 315 ctx->ptr++; 316 *(double*)ret = 0; 317 return tDouble; 318 } 319 320 if(sign*e + exp > INT_MAX/100) { 321 FIXME("Invalid numeric literal\n"); 322 return 0; 323 } 324 } while(isdigitW(*ctx->ptr)); 325 326 exp += sign*e; 327 } 328 329 if(use_int && (LONG)d == d) { 330 LONG l = d; 331 *(LONG*)ret = l; 332 return (short)l == l ? tShort : tLong; 333 } 334 335 r = exp>=0 ? d*pow(10, exp) : d/pow(10, -exp); 336 if(isinf(r)) { 337 FIXME("Invalid numeric literal\n"); 338 return 0; 339 } 340 341 *(double*)ret = r; 342 return tDouble; 343 } 344 345 static int hex_to_int(WCHAR c) 346 { 347 if('0' <= c && c <= '9') 348 return c-'0'; 349 if('a' <= c && c <= 'f') 350 return c+10-'a'; 351 if('A' <= c && c <= 'F') 352 return c+10-'A'; 353 return -1; 354 } 355 356 static int parse_hex_literal(parser_ctx_t *ctx, LONG *ret) 357 { 358 const WCHAR *begin = ctx->ptr; 359 LONG l = 0, d; 360 361 while((d = hex_to_int(*++ctx->ptr)) != -1) 362 l = l*16 + d; 363 364 if(begin + 9 /* max digits+1 */ < ctx->ptr || (*ctx->ptr != '&' && is_identifier_char(*ctx->ptr))) { 365 FIXME("invalid literal\n"); 366 return 0; 367 } 368 369 if(*ctx->ptr == '&') 370 ctx->ptr++; 371 372 *ret = l; 373 return (short)l == l ? tShort : tLong; 374 } 375 376 static void skip_spaces(parser_ctx_t *ctx) 377 { 378 while(*ctx->ptr == ' ' || *ctx->ptr == '\t' || *ctx->ptr == '\r') 379 ctx->ptr++; 380 } 381 382 static int comment_line(parser_ctx_t *ctx) 383 { 384 ctx->ptr = strchrW(ctx->ptr, '\n'); 385 if(ctx->ptr) 386 ctx->ptr++; 387 else 388 ctx->ptr = ctx->end; 389 return tNL; 390 } 391 392 static int parse_next_token(void *lval, parser_ctx_t *ctx) 393 { 394 WCHAR c; 395 396 skip_spaces(ctx); 397 if(ctx->ptr == ctx->end) 398 return ctx->last_token == tNL ? tEOF : tNL; 399 400 c = *ctx->ptr; 401 402 if('0' <= c && c <= '9') 403 return parse_numeric_literal(ctx, lval); 404 405 if(isalphaW(c)) { 406 int ret = check_keywords(ctx); 407 if(!ret) 408 return parse_identifier(ctx, lval); 409 if(ret != tREM) 410 return ret; 411 c = '\''; 412 } 413 414 switch(c) { 415 case '\n': 416 ctx->ptr++; 417 return tNL; 418 case '\'': 419 return comment_line(ctx); 420 case ':': 421 case ')': 422 case ',': 423 case '=': 424 case '+': 425 case '*': 426 case '/': 427 case '^': 428 case '\\': 429 case '.': 430 case '_': 431 return *ctx->ptr++; 432 case '-': 433 if(ctx->is_html && ctx->ptr[1] == '-' && ctx->ptr[2] == '>') 434 return comment_line(ctx); 435 ctx->ptr++; 436 return '-'; 437 case '(': 438 /* NOTE: 439 * We resolve empty brackets in lexer instead of parser to avoid complex conflicts 440 * in call statement special case |f()| without 'call' keyword 441 */ 442 ctx->ptr++; 443 skip_spaces(ctx); 444 if(*ctx->ptr == ')') { 445 ctx->ptr++; 446 return tEMPTYBRACKETS; 447 } 448 return '('; 449 case '"': 450 return parse_string_literal(ctx, lval); 451 case '&': 452 if(*++ctx->ptr == 'h' || *ctx->ptr == 'H') 453 return parse_hex_literal(ctx, lval); 454 return '&'; 455 case '<': 456 switch(*++ctx->ptr) { 457 case '>': 458 ctx->ptr++; 459 return tNEQ; 460 case '=': 461 ctx->ptr++; 462 return tLTEQ; 463 case '!': 464 if(ctx->is_html && ctx->ptr[1] == '-' && ctx->ptr[2] == '-') 465 return comment_line(ctx); 466 } 467 return '<'; 468 case '>': 469 if(*++ctx->ptr == '=') { 470 ctx->ptr++; 471 return tGTEQ; 472 } 473 return '>'; 474 default: 475 FIXME("Unhandled char %c in %s\n", *ctx->ptr, debugstr_w(ctx->ptr)); 476 } 477 478 return 0; 479 } 480 481 int parser_lex(void *lval, parser_ctx_t *ctx) 482 { 483 int ret; 484 485 while(1) { 486 ret = parse_next_token(lval, ctx); 487 if(ret == '_') { 488 skip_spaces(ctx); 489 if(*ctx->ptr != '\n') { 490 FIXME("'_' not followed by newline\n"); 491 return 0; 492 } 493 ctx->ptr++; 494 continue; 495 } 496 if(ret != tNL || ctx->last_token != tNL) 497 break; 498 499 ctx->last_nl = ctx->ptr-ctx->code; 500 } 501 502 return (ctx->last_token = ret); 503 } 504