1 #include "scanner.hpp" 2 3 #include <cassert> 4 5 #include "exception.hpp" 6 #include "errorhandler.hpp" 7 #include "parser.hpp" 8 #include "extensions.hpp" 9 10 #include <components/misc/stringops.hpp> 11 12 namespace Compiler 13 { get(MultiChar & c)14 bool Scanner::get (MultiChar& c) 15 { 16 if (!c.getFrom(mStream)) 17 return false; 18 19 mPrevLoc = mLoc; 20 21 if (c=='\n') 22 { 23 mStrictKeywords = false; 24 mTolerantNames = false; 25 mLoc.mColumn = 0; 26 ++mLoc.mLine; 27 mLoc.mLiteral.clear(); 28 } 29 else 30 { 31 ++mLoc.mColumn; 32 c.appendTo(mLoc.mLiteral); 33 } 34 35 return true; 36 } 37 putback(MultiChar & c)38 void Scanner::putback (MultiChar& c) 39 { 40 c.putback(mStream); 41 mLoc = mPrevLoc; 42 } 43 scanToken(Parser & parser)44 bool Scanner::scanToken (Parser& parser) 45 { 46 switch (mPutback) 47 { 48 case Putback_Special: 49 50 mPutback = Putback_None; 51 return parser.parseSpecial (mPutbackCode, mPutbackLoc, *this); 52 53 case Putback_Integer: 54 55 mPutback = Putback_None; 56 return parser.parseInt (mPutbackInteger, mPutbackLoc, *this); 57 58 case Putback_Float: 59 60 mPutback = Putback_None; 61 return parser.parseFloat (mPutbackFloat, mPutbackLoc, *this); 62 63 case Putback_Name: 64 65 mPutback = Putback_None; 66 return parser.parseName (mPutbackName, mPutbackLoc, *this); 67 68 case Putback_Keyword: 69 70 mPutback = Putback_None; 71 return parser.parseKeyword (mPutbackCode, mPutbackLoc, *this); 72 73 case Putback_None: 74 75 break; 76 } 77 78 MultiChar c; 79 80 if (!get (c)) 81 { 82 parser.parseEOF (*this); 83 return false; 84 } 85 else if (c==';') 86 { 87 std::string comment; 88 89 c.appendTo(comment); 90 91 while (get (c)) 92 { 93 if (c=='\n') 94 { 95 putback (c); 96 break; 97 } 98 else 99 c.appendTo(comment); 100 } 101 102 TokenLoc loc (mLoc); 103 mLoc.mLiteral.clear(); 104 105 return parser.parseComment (comment, loc, *this); 106 } 107 else if (c.isWhitespace()) 108 { 109 mLoc.mLiteral.clear(); 110 return true; 111 } 112 else if (c==':') 113 { 114 // treat : as a whitespace :( 115 mLoc.mLiteral.clear(); 116 return true; 117 } 118 else if (c.isAlpha() || c=='_' || c=='"') 119 { 120 bool cont = false; 121 122 if (scanName (c, parser, cont)) 123 { 124 mLoc.mLiteral.clear(); 125 return cont; 126 } 127 } 128 else if (c.isDigit()) 129 { 130 bool cont = false; 131 132 if (scanInt (c, parser, cont)) 133 { 134 mLoc.mLiteral.clear(); 135 return cont; 136 } 137 } 138 else if (c==13) // linux compatibility hack 139 { 140 return true; 141 } 142 else 143 { 144 bool cont = false; 145 146 if (scanSpecial (c, parser, cont)) 147 { 148 mLoc.mLiteral.clear(); 149 return cont; 150 } 151 } 152 153 TokenLoc loc (mLoc); 154 mLoc.mLiteral.clear(); 155 156 mErrorHandler.error ("Syntax error", loc); 157 throw SourceException(); 158 } 159 scanInt(MultiChar & c,Parser & parser,bool & cont)160 bool Scanner::scanInt (MultiChar& c, Parser& parser, bool& cont) 161 { 162 assert(c != '\0'); 163 std::string value; 164 c.appendTo(value); 165 166 bool error = false; 167 168 while (get (c)) 169 { 170 if (c.isDigit()) 171 { 172 c.appendTo(value); 173 } 174 else if (!c.isMinusSign() && isStringCharacter (c)) 175 { 176 error = true; 177 c.appendTo(value); 178 } 179 else if (c=='.') 180 { 181 if (error) 182 { 183 putback (c); 184 break; 185 } 186 return scanFloat (value, parser, cont); 187 } 188 else 189 { 190 putback (c); 191 break; 192 } 193 } 194 195 if (error) 196 { 197 /// workaround that allows names to begin with digits 198 /// \todo disable 199 TokenLoc loc (mLoc); 200 mLoc.mLiteral.clear(); 201 cont = parser.parseName (value, loc, *this); 202 return true; 203 // return false; 204 } 205 206 TokenLoc loc (mLoc); 207 mLoc.mLiteral.clear(); 208 209 std::istringstream stream (value); 210 211 int intValue = 0; 212 stream >> intValue; 213 214 cont = parser.parseInt (intValue, loc, *this); 215 return true; 216 } 217 scanFloat(const std::string & intValue,Parser & parser,bool & cont)218 bool Scanner::scanFloat (const std::string& intValue, Parser& parser, bool& cont) 219 { 220 std::string value = intValue + "."; 221 222 MultiChar c; 223 224 bool empty = intValue.empty() || intValue=="-"; 225 bool error = false; 226 227 while (get (c)) 228 { 229 if (c.isDigit()) 230 { 231 c.appendTo(value); 232 empty = false; 233 } 234 else if (c.isAlpha() || c=='_') 235 error = true; 236 else 237 { 238 putback (c); 239 break; 240 } 241 } 242 243 if (empty || error) 244 return false; 245 246 TokenLoc loc (mLoc); 247 mLoc.mLiteral.clear(); 248 249 std::istringstream stream (value); 250 251 float floatValue = 0; 252 stream >> floatValue; 253 254 cont = parser.parseFloat (floatValue, loc, *this); 255 return true; 256 } 257 258 static const char *sKeywords[] = 259 { 260 "begin", "end", 261 "short", "long", "float", 262 "if", "endif", "else", "elseif", 263 "while", "endwhile", 264 "return", 265 "messagebox", 266 "set", "to", 267 "getsquareroot", 268 nullptr 269 }; 270 scanName(MultiChar & c,Parser & parser,bool & cont)271 bool Scanner::scanName (MultiChar& c, Parser& parser, bool& cont) 272 { 273 std::string name; 274 c.appendTo(name); 275 276 if (!scanName (name)) 277 return false; 278 else if(name.empty()) 279 return true; 280 281 TokenLoc loc (mLoc); 282 mLoc.mLiteral.clear(); 283 284 if (name.size()>=2 && name[0]=='"' && name[name.size()-1]=='"') 285 { 286 name = name.substr (1, name.size()-2); 287 // allow keywords enclosed in "" 288 /// \todo optionally disable 289 if (mStrictKeywords) 290 { 291 cont = parser.parseName (name, loc, *this); 292 return true; 293 } 294 } 295 296 int i = 0; 297 298 std::string lowerCase = Misc::StringUtils::lowerCase(name); 299 bool isKeyword = false; 300 for (; sKeywords[i]; ++i) 301 if (lowerCase==sKeywords[i]) 302 { 303 isKeyword = true; 304 break; 305 } 306 307 // Russian localization and some mods use a quirk - add newline character directly 308 // to compiled bytecode via HEX-editor to implement multiline messageboxes. 309 // Of course, original editor can not compile such script. 310 // Allow messageboxes to bypass the "incomplete string or name" error. 311 if (lowerCase == "messagebox") 312 enableIgnoreNewlines(); 313 else if (isKeyword) 314 mIgnoreNewline = false; 315 316 if (sKeywords[i]) 317 { 318 cont = parser.parseKeyword (i, loc, *this); 319 return true; 320 } 321 322 if (mExtensions) 323 { 324 if (int keyword = mExtensions->searchKeyword (lowerCase)) 325 { 326 cont = parser.parseKeyword (keyword, loc, *this); 327 return true; 328 } 329 } 330 331 cont = parser.parseName (name, loc, *this); 332 333 return true; 334 } 335 scanName(std::string & name)336 bool Scanner::scanName (std::string& name) 337 { 338 MultiChar c; 339 bool error = false; 340 341 while (get (c)) 342 { 343 if (!name.empty() && name[0]=='"') 344 { 345 if (c=='"') 346 { 347 c.appendTo(name); 348 break; 349 } 350 // ignoring escape sequences for now, because they are messing up stupid Windows path names. 351 // else if (c=='\\') 352 // { 353 // if (!get (c)) 354 // { 355 // error = true; 356 // mErrorHandler.error ("incomplete escape sequence", mLoc); 357 // break; 358 // } 359 // } 360 else if (c=='\n') 361 { 362 if (mIgnoreNewline) 363 mErrorHandler.warning ("string contains newline character, make sure that it is intended", mLoc); 364 else 365 { 366 bool allWhitespace = true; 367 for (size_t i = 1; i < name.size(); i++) 368 { 369 //ignore comments 370 if (name[i] == ';') 371 break; 372 else if (name[i] != '\t' && name[i] != ' ' && name[i] != '\r') 373 { 374 allWhitespace = false; 375 break; 376 } 377 } 378 if (allWhitespace) 379 { 380 name.clear(); 381 mLoc.mLiteral.clear(); 382 mErrorHandler.warning ("unterminated empty string", mLoc); 383 return true; 384 } 385 386 error = true; 387 mErrorHandler.error ("incomplete string or name", mLoc); 388 break; 389 } 390 } 391 } 392 else if (!(c=='"' && name.empty())) 393 { 394 if (!isStringCharacter (c) && !(mTolerantNames && (c=='.' || c == '-'))) 395 { 396 putback (c); 397 break; 398 } 399 } 400 401 c.appendTo(name); 402 } 403 404 return !error; 405 } 406 scanSpecial(MultiChar & c,Parser & parser,bool & cont)407 bool Scanner::scanSpecial (MultiChar& c, Parser& parser, bool& cont) 408 { 409 int special = -1; 410 411 if (c=='\n') 412 special = S_newline; 413 else if (c=='(' || c=='[') /// \todo option to disable the use of [ as alias for ( 414 special = S_open; 415 else if (c==')' || c==']') /// \todo option to disable the use of ] as alias for ) 416 special = S_close; 417 else if (c=='.') 418 { 419 // check, if this starts a float literal 420 if (get (c)) 421 { 422 putback (c); 423 424 if (c.isDigit()) 425 return scanFloat ("", parser, cont); 426 } 427 428 special = S_member; 429 } 430 else if (c=='=') 431 { 432 if (get (c)) 433 { 434 /// \todo hack to allow a space in comparison operators (add option to disable) 435 if (c==' ' && !get (c)) 436 special = S_cmpEQ; 437 else if (c=='=') 438 special = S_cmpEQ; 439 else if (c == '>' || c == '<') // Treat => and =< as == 440 { 441 special = S_cmpEQ; 442 mErrorHandler.warning (std::string("invalid operator =") + c.data() + ", treating it as ==", mLoc); 443 } 444 else 445 { 446 special = S_cmpEQ; 447 putback (c); 448 // return false; 449 /// Allow = as synonym for ==. \todo optionally disable for post-1.0 scripting improvements. 450 } 451 } 452 else 453 { 454 putback (c); 455 return false; 456 } 457 } 458 else if (c=='!') 459 { 460 if (get (c)) 461 { 462 /// \todo hack to allow a space in comparison operators (add option to disable) 463 if (c==' ' && !get (c)) 464 return false; 465 466 if (c=='=') 467 special = S_cmpNE; 468 else 469 { 470 putback (c); 471 return false; 472 } 473 } 474 else 475 return false; 476 } 477 else if (c.isMinusSign()) 478 { 479 if (get (c)) 480 { 481 if (c=='>') 482 special = S_ref; 483 else 484 { 485 putback (c); 486 special = S_minus; 487 } 488 } 489 else 490 special = S_minus; 491 } 492 else if (c=='<') 493 { 494 if (get (c)) 495 { 496 /// \todo hack to allow a space in comparison operators (add option to disable) 497 if (c==' ' && !get (c)) 498 special = S_cmpLT; 499 else if (c=='=') 500 { 501 special = S_cmpLE; 502 503 if (get (c) && c!='=') // <== is a allowed as an alternative to <= :( 504 putback (c); 505 } 506 else if (c == '<' || c == '>') // Treat <> and << as < 507 { 508 special = S_cmpLT; 509 mErrorHandler.warning ("Invalid operator, treating it as <", mLoc); 510 } 511 else 512 { 513 putback (c); 514 special = S_cmpLT; 515 } 516 } 517 else 518 special = S_cmpLT; 519 } 520 else if (c=='>') 521 { 522 if (get (c)) 523 { 524 /// \todo hack to allow a space in comparison operators (add option to disable) 525 if (c==' ' && !get (c)) 526 special = S_cmpGT; 527 else if (c=='=') 528 { 529 special = S_cmpGE; 530 531 if (get (c) && c!='=') // >== is a allowed as an alternative to >= :( 532 putback (c); 533 } 534 else if (c == '<' || c == '>') // Treat >< and >> as > 535 { 536 special = S_cmpGT; 537 mErrorHandler.warning ("Invalid operator, treating it as >", mLoc); 538 } 539 else 540 { 541 putback (c); 542 special = S_cmpGT; 543 } 544 } 545 else 546 special = S_cmpGT; 547 } 548 else if (c==',') 549 special = S_comma; 550 else if (c=='+') 551 special = S_plus; 552 else if (c=='*') 553 special = S_mult; 554 else if (c=='/') 555 special = S_div; 556 else 557 return false; 558 559 if (special==S_newline) 560 mLoc.mLiteral = "<newline>"; 561 562 TokenLoc loc (mLoc); 563 mLoc.mLiteral.clear(); 564 565 cont = parser.parseSpecial (special, loc, *this); 566 567 return true; 568 } 569 isStringCharacter(MultiChar & c,bool lookAhead)570 bool Scanner::isStringCharacter (MultiChar& c, bool lookAhead) 571 { 572 if (lookAhead && c.isMinusSign()) 573 { 574 /// \todo disable this when doing more stricter compiling. Also, find out who is 575 /// responsible for allowing it in the first place and meet up with that person in 576 /// a dark alley. 577 MultiChar next; 578 if (next.peek(mStream) && isStringCharacter (next, false)) 579 return true; 580 } 581 582 return c.isAlpha() || c.isDigit() || c=='_' || 583 /// \todo disable this when doing more stricter compiling 584 c=='`' || c=='\''; 585 } 586 587 // constructor 588 Scanner(ErrorHandler & errorHandler,std::istream & inputStream,const Extensions * extensions)589 Scanner::Scanner (ErrorHandler& errorHandler, std::istream& inputStream, 590 const Extensions *extensions) 591 : mErrorHandler (errorHandler), mStream (inputStream), mExtensions (extensions), 592 mPutback (Putback_None), mPutbackCode(0), mPutbackInteger(0), mPutbackFloat(0), 593 mStrictKeywords (false), mTolerantNames (false), mIgnoreNewline(false) 594 { 595 } 596 scan(Parser & parser)597 void Scanner::scan (Parser& parser) 598 { 599 while (scanToken (parser)); 600 } 601 putbackSpecial(int code,const TokenLoc & loc)602 void Scanner::putbackSpecial (int code, const TokenLoc& loc) 603 { 604 mPutback = Putback_Special; 605 mPutbackCode = code; 606 mPutbackLoc = loc; 607 } 608 putbackInt(int value,const TokenLoc & loc)609 void Scanner::putbackInt (int value, const TokenLoc& loc) 610 { 611 mPutback = Putback_Integer; 612 mPutbackInteger = value; 613 mPutbackLoc = loc; 614 } 615 putbackFloat(float value,const TokenLoc & loc)616 void Scanner::putbackFloat (float value, const TokenLoc& loc) 617 { 618 mPutback = Putback_Float; 619 mPutbackFloat = value; 620 mPutbackLoc = loc; 621 } 622 putbackName(const std::string & name,const TokenLoc & loc)623 void Scanner::putbackName (const std::string& name, const TokenLoc& loc) 624 { 625 mPutback = Putback_Name; 626 mPutbackName = name; 627 mPutbackLoc = loc; 628 } 629 putbackKeyword(int keyword,const TokenLoc & loc)630 void Scanner::putbackKeyword (int keyword, const TokenLoc& loc) 631 { 632 mPutback = Putback_Keyword; 633 mPutbackCode = keyword; 634 mPutbackLoc = loc; 635 } 636 listKeywords(std::vector<std::string> & keywords)637 void Scanner::listKeywords (std::vector<std::string>& keywords) 638 { 639 for (int i=0; Compiler::sKeywords[i]; ++i) 640 keywords.emplace_back(Compiler::sKeywords[i]); 641 642 if (mExtensions) 643 mExtensions->listKeywords (keywords); 644 } 645 enableIgnoreNewlines()646 void Scanner::enableIgnoreNewlines() 647 { 648 mIgnoreNewline = true; 649 } 650 enableStrictKeywords()651 void Scanner::enableStrictKeywords() 652 { 653 mStrictKeywords = true; 654 } 655 enableTolerantNames()656 void Scanner::enableTolerantNames() 657 { 658 mTolerantNames = true; 659 } 660 } 661