1 /* lexer.c++ -- written by Alexis WILKE for Made to Order Software Corp. (c) 2005-2009 */
2
3 /*
4
5 Copyright (c) 2005-2009 Made to Order Software Corp.
6
7 Permission is hereby granted, free of charge, to any
8 person obtaining a copy of this software and
9 associated documentation files (the "Software"), to
10 deal in the Software without restriction, including
11 without limitation the rights to use, copy, modify,
12 merge, publish, distribute, sublicense, and/or sell
13 copies of the Software, and to permit persons to whom
14 the Software is furnished to do so, subject to the
15 following conditions:
16
17 The above copyright notice and this permission notice
18 shall be included in all copies or substantial
19 portions of the Software.
20
21 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
22 ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
23 LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
24 FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
25 EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
26 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
27 WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
28 ARISING FROM, OUT OF OR IN CONNECTION WITH THE
29 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 SOFTWARE.
31
32 */
33
34 #include "parser.h"
35
36
37 namespace sswf
38 {
39 namespace as
40 {
41
42
43 /**********************************************************************/
44 /**********************************************************************/
45 /*** PARSER CREATOR *************************************************/
46 /**********************************************************************/
47 /**********************************************************************/
48
49
Lexer(void)50 Lexer::Lexer(void)
51 {
52 f_last = 0;
53 f_type = 0;
54 //f_data -- auto-init
55 f_unget_pos = 0;
56 //f_unget[...] -- don't require initialization
57 f_input = 0;
58 f_options = 0;
59 f_for_in = false;
60 }
61
62
63
SetInput(Input & input)64 void Lexer::SetInput(Input& input)
65 {
66 f_input = &input;
67 }
68
69
SetOptions(Options & options)70 void Lexer::SetOptions(Options& options)
71 {
72 f_options = &options;
73 }
74
75
InputGetC(void)76 long Lexer::InputGetC(void)
77 {
78 AS_ASSERT(f_input != 0);
79
80 return f_input->GetC();
81 }
82
83
GetC(void)84 long Lexer::GetC(void)
85 {
86 long c;
87
88 // we don't want to re-process these!
89 if(f_unget_pos > 0) {
90 --f_unget_pos;
91 f_last = f_unget[f_unget_pos];
92 f_type = CharType(f_last);
93 //fprintf(stderr, "(reget) ");
94 return f_last;
95 }
96
97 c = InputGetC();
98
99 f_type = CharType(c);
100 if((f_type & (CHAR_LINE_TERMINATOR | CHAR_WHITE_SPACE)) != 0) {
101 switch(c) {
102 case '\n':
103 // skip '\n\r' as one newline
104 do {
105 f_input->NewLine();
106 c = InputGetC();
107 } while(c == '\n');
108 if(c != '\r') {
109 UngetC(c);
110 }
111 c = '\n';
112 break;
113
114 case '\r':
115 // skip '\r\n' as one newline (?!)
116 do {
117 f_input->NewLine();
118 c = InputGetC();
119 } while(c == '\r');
120 if(c != '\n') {
121 UngetC(c);
122 }
123 c = '\n';
124 break;
125
126 case '\f':
127 // view the form feed as a new page for now...
128 f_input->NewPage();
129 break;
130
131 case 0x0085:
132 // ?
133 break;
134
135 case 0x2028:
136 f_input->NewLine();
137 break;
138
139 case 0x2029:
140 f_input->NewParagraph();
141 break;
142
143 }
144 }
145
146 return f_last = c;
147 }
148
149
UngetC(long c)150 void Lexer::UngetC(long c)
151 {
152 AS_ASSERT(f_unget_pos < MAX_UNGET);
153
154 f_unget[f_unget_pos] = c;
155 ++f_unget_pos;
156 }
157
158
CharType(long c)159 long Lexer::CharType(long c)
160 {
161 // TODO: this needs a HUGE improvement to be conformant...
162 switch(c) {
163 case '\0':
164 return CHAR_INVALID;
165
166 case '\n':
167 case '\r':
168 case 0x0085:
169 case 0x2028:
170 case 0x2029:
171 return CHAR_LINE_TERMINATOR;
172
173 case '\t':
174 case '\v':
175 case '\f':
176 case ' ':
177 case 0x00A0:
178 //case 0x2000 ... 0x200B: -- cl doesn't like those
179 case 0x2000:
180 case 0x2001:
181 case 0x2002:
182 case 0x2003:
183 case 0x2004:
184 case 0x2005:
185 case 0x2006:
186 case 0x2007:
187 case 0x2008:
188 case 0x2009:
189 case 0x200A:
190 case 0x200B:
191 case 0x3000:
192 return CHAR_WHITE_SPACE;
193
194 //case '0' ... '9': -- cl doesn't like those
195 case '0':
196 case '1':
197 case '2':
198 case '3':
199 case '4':
200 case '5':
201 case '6':
202 case '7':
203 case '8':
204 case '9':
205 return CHAR_DIGIT | CHAR_HEXDIGIT;
206
207 //case 'a' ... 'f': -- cl doesn't like those
208 //case 'A' ... 'F':
209 case 'a':
210 case 'b':
211 case 'c':
212 case 'd':
213 case 'e':
214 case 'f':
215 case 'A':
216 case 'B':
217 case 'C':
218 case 'D':
219 case 'E':
220 case 'F':
221 return CHAR_LETTER | CHAR_HEXDIGIT;
222
223 case '_':
224 case '$':
225 //case 'g' ... 'z': -- cl doesn't like those (moved to default:)
226 //case 'G' ... 'Z':
227 return CHAR_LETTER;
228
229 default:
230 if((c >= 'g' && c <= 'z')
231 || (c >= 'G' && c <= 'Z')) {
232 return CHAR_LETTER;
233 }
234 if((c & 0x0FFFF) >= 0xFFFE) {
235 return CHAR_INVALID;
236 }
237 if(c < 0x7F) {
238 return CHAR_PUNCTUATION;
239 }
240 // TODO: this will be true in most cases, but not always!
241 return CHAR_LETTER;
242
243 }
244 /*NOTREACHED*/
245 }
246
247
248
249
ReadHex(long max)250 int64_t Lexer::ReadHex(long max)
251 {
252 long c, p, result;
253
254 result = 0;
255 p = 0;
256 c = GetC();
257 while((f_type & CHAR_HEXDIGIT) != 0 && p < max) {
258 p++;
259 if(c <= '9') {
260 result = result * 16 + c - '0';
261 }
262 else {
263 result = result * 16 + c - ('A' - 10);
264 }
265 c = GetC();
266 }
267 UngetC(c);
268
269 if(p == 0) {
270 f_input->ErrMsg(AS_ERR_INVALID_UNICODE_ESCAPE_SEQUENCE, "invalid unicode (\\[xXuU]##) escape sequence)");
271 return -1;
272 }
273
274 // TODO: In strict mode, should we check whether we got p == max?
275 // WARNING: this is also used by the ReadNumber() function
276
277 return result;
278 }
279
280
ReadOctal(long c,long max)281 int64_t Lexer::ReadOctal(long c, long max)
282 {
283 long p, result;
284
285 result = c - '0';
286 p = 1;
287 c = GetC();
288 while(c >= '0' && c <= '7' && p < max) {
289 p++;
290 result = result * 8 + c - '0';
291 c = GetC();
292 }
293 UngetC(c);
294
295 return result;
296 }
297
298
EscapeSequence(void)299 long Lexer::EscapeSequence(void)
300 {
301 long c = f_input->GetC();
302 switch(c) {
303 case 'u':
304 // 4 hex digits
305 return ReadHex(4);
306
307 case 'U':
308 // 8 hex digits
309 return ReadHex(8);
310
311 case 'x':
312 case 'X':
313 // 2 hex digits
314 return ReadHex(2);
315
316 case '\'':
317 case '\"':
318 case '\\':
319 return c;
320
321 case 'b':
322 return '\b';
323
324 case 'e':
325 if(f_options != 0
326 && f_options->GetOption(AS_OPTION_EXTENDED_ESCAPE_SEQUENCES) != 0) {
327 return '\033';
328 }
329 break;
330
331 case 'f':
332 return '\f';
333
334 case 'n':
335 return '\n';
336
337 case 'r':
338 return '\r';
339
340 case 't':
341 return '\t';
342
343 case 'v':
344 return '\v';
345
346 //case '0' ... '7': -- cl doesn't like those
347 default:
348 if(c >= '0' && c <= '7') {
349 return ReadOctal(c, 3);
350 }
351 break;
352
353 }
354
355 if(c > ' ' && c < 0x7F) {
356 f_input->ErrMsg(AS_ERR_UNKNOWN_ESCAPE_SEQUENCE, "unknown escape letter '%c'", (char) c);
357 }
358 else {
359 f_input->ErrMsg(AS_ERR_UNKNOWN_ESCAPE_SEQUENCE, "unknown escape letter '\\U%08lX'", c);
360 }
361
362 return '?';
363 }
364
365
366
367
368
369
Read(long c,long flags,String & str)370 long Lexer::Read(long c, long flags, String& str)
371 {
372 bool escape;
373
374 do {
375 escape = c == '\\';
376 if(escape) {
377 c = EscapeSequence();
378 }
379 if((f_type & CHAR_INVALID) == 0) {
380 str.AppendChar(c);
381 }
382 c = GetC();
383 } while((f_type & flags) != 0 && c >= 0);
384
385 if(escape) {
386 long l, i;
387 l = c;
388 i = 8;
389 while(i > 0) {
390 --i;
391 long x = l & 15;
392 if(x >= 10) {
393 x += 'A' - 10;
394 }
395 else {
396 x += '0';
397 }
398 UngetC(x);
399 l >>= 4;
400 }
401 UngetC('U');
402 UngetC('\\');
403 }
404 else {
405 UngetC(c);
406 }
407
408 return c;
409 }
410
411
412
ReadIdentifier(long c)413 void Lexer::ReadIdentifier(long c)
414 {
415 f_data.f_type = NODE_IDENTIFIER;
416 c = Read(c, CHAR_LETTER | CHAR_DIGIT, f_data.f_str);
417
418 // An identifier can be a keyword, we check that right here!
419 long l = f_data.f_str.GetLength();
420 if(l > 1) {
421 const long *s = f_data.f_str.Get();
422 switch(s[0]) {
423 case 'a':
424 if(l == 2 && s[1] == 's') {
425 f_data.f_type = NODE_AS;
426 break;
427 }
428 break;
429
430 case 'b':
431 if(l == 5 && f_data.f_str == "break") {
432 f_data.f_type = NODE_BREAK;
433 break;
434 }
435 break;
436
437 case 'c':
438 if(l == 4 && f_data.f_str == "case") {
439 f_data.f_type = NODE_CASE;
440 break;
441 }
442 if(l == 5 && f_data.f_str == "catch") {
443 f_data.f_type = NODE_CATCH;
444 break;
445 }
446 if(l == 5 && f_data.f_str == "class") {
447 f_data.f_type = NODE_CLASS;
448 break;
449 }
450 if(l == 5 && f_data.f_str == "const") {
451 f_data.f_type = NODE_CONST;
452 break;
453 }
454 if(l == 8 && f_data.f_str == "continue") {
455 f_data.f_type = NODE_CONTINUE;
456 break;
457 }
458 break;
459
460 case 'd':
461 if(l == 8 && f_data.f_str == "debugger") {
462 f_data.f_type = NODE_DEBUGGER;
463 break;
464 }
465 if(l == 7 && f_data.f_str == "default") {
466 f_data.f_type = NODE_DEFAULT;
467 break;
468 }
469 if(l == 6 && f_data.f_str == "delete") {
470 f_data.f_type = NODE_DELETE;
471 break;
472 }
473 if(l == 2 && s[1] == 'o') {
474 f_data.f_type = NODE_DO;
475 break;
476 }
477 break;
478
479 case 'e':
480 if(l == 4 && f_data.f_str == "else") {
481 f_data.f_type = NODE_ELSE;
482 break;
483 }
484 if(l == 4 && f_data.f_str == "enum") {
485 f_data.f_type = NODE_ENUM;
486 break;
487 }
488 if(l == 7 && f_data.f_str == "extends") {
489 f_data.f_type = NODE_EXTENDS;
490 break;
491 }
492 break;
493
494 case 'f':
495 if(l == 5 && f_data.f_str == "false") {
496 f_data.f_type = NODE_FALSE;
497 break;
498 }
499 if(l == 7 && f_data.f_str == "finally") {
500 f_data.f_type = NODE_FINALLY;
501 break;
502 }
503 if(l == 3 && s[1] == 'o' && s[2] == 'r') {
504 f_data.f_type = NODE_FOR;
505 break;
506 }
507 if(l == 8 && f_data.f_str == "function") {
508 f_data.f_type = NODE_FUNCTION;
509 break;
510 }
511 break;
512
513 case 'g':
514 if(f_options != 0
515 && f_options->GetOption(AS_OPTION_EXTENDED_STATEMENTS) != 0) {
516 if(l == 4 && f_data.f_str == "goto") {
517 f_data.f_type = NODE_GOTO;
518 break;
519 }
520 }
521 break;
522
523 case 'i':
524 if(l == 2 && s[1] == 'f') {
525 f_data.f_type = NODE_IF;
526 break;
527 }
528 if(l == 10 && f_data.f_str == "implements") {
529 f_data.f_type = NODE_IMPLEMENTS;
530 break;
531 }
532 if(l == 6 && f_data.f_str == "import") {
533 f_data.f_type = NODE_IMPORT;
534 break;
535 }
536 if(l == 2 && s[1] == 'n') {
537 f_data.f_type = f_for_in ? NODE_FOR_IN : NODE_IN;
538 break;
539 }
540 if(l == 10 && f_data.f_str == "instanceof") {
541 f_data.f_type = NODE_INSTANCEOF;
542 break;
543 }
544 if(l == 9 && f_data.f_str == "interface") {
545 f_data.f_type = NODE_INTERFACE;
546 break;
547 }
548 if(l == 2 && s[1] == 's') {
549 f_data.f_type = NODE_IS;
550 break;
551 }
552 break;
553
554 case 'n':
555 if(l == 9 && f_data.f_str == "namespace") {
556 f_data.f_type = NODE_NAMESPACE;
557 break;
558 }
559 if(l == 3 && s[1] == 'e' && s[2] == 'w') {
560 f_data.f_type = NODE_NEW;
561 break;
562 }
563 if(l == 4 && f_data.f_str == "null") {
564 f_data.f_type = NODE_NULL;
565 break;
566 }
567 break;
568
569 case 'p':
570 if(l == 7 && f_data.f_str == "package") {
571 f_data.f_type = NODE_PACKAGE;
572 break;
573 }
574 if(l == 7 && f_data.f_str == "private") {
575 f_data.f_type = NODE_PRIVATE;
576 break;
577 }
578 if(l == 6 && f_data.f_str == "public") {
579 f_data.f_type = NODE_PUBLIC;
580 break;
581 }
582 break;
583
584 case 'r':
585 if(l == 6 && f_data.f_str == "return") {
586 f_data.f_type = NODE_RETURN;
587 break;
588 }
589 break;
590
591 case 's':
592 if(l == 5 && f_data.f_str == "super") {
593 f_data.f_type = NODE_SUPER;
594 break;
595 }
596 if(l == 6 && f_data.f_str == "switch") {
597 f_data.f_type = NODE_SWITCH;
598 break;
599 }
600 break;
601
602 case 't':
603 if(l == 4 && f_data.f_str == "this") {
604 f_data.f_type = NODE_THIS;
605 break;
606 }
607 if(l == 5 && f_data.f_str == "throw") {
608 f_data.f_type = NODE_THROW;
609 break;
610 }
611 if(l == 4 && f_data.f_str == "true") {
612 f_data.f_type = NODE_TRUE;
613 break;
614 }
615 if(l == 3 && s[1] == 'r' && s[2] == 'y') {
616 f_data.f_type = NODE_TRY;
617 break;
618 }
619 if(l == 6 && f_data.f_str == "typeof") {
620 f_data.f_type = NODE_TYPEOF;
621 break;
622 }
623 break;
624
625 case 'u':
626 if(l == 9 && f_data.f_str == "undefined") {
627 f_data.f_type = NODE_UNDEFINED;
628 break;
629 }
630 if(l == 3 && s[1] == 's' && s[2] == 'e') {
631 f_data.f_type = NODE_USE;
632 break;
633 }
634 break;
635
636 case 'v':
637 if(l == 3 && s[1] == 'a' && s[2] == 'r') {
638 f_data.f_type = NODE_VAR;
639 break;
640 }
641 if(l == 4 && f_data.f_str == "void") {
642 f_data.f_type = NODE_VOID;
643 break;
644 }
645 break;
646
647 case 'w':
648 if(l == 4 && f_data.f_str == "with") {
649 f_data.f_type = NODE_WITH;
650 break;
651 }
652 if(l == 5 && f_data.f_str == "while") {
653 f_data.f_type = NODE_WHILE;
654 break;
655 }
656 break;
657
658 case '_':
659 if(l == 8 && f_data.f_str == "__FILE__") {
660 f_data.f_type = NODE_STRING;
661 f_data.f_str = f_input->GetFilename();
662 break;
663 }
664 if(l == 8 && f_data.f_str == "__LINE__") {
665 f_data.f_type = NODE_INT64;
666 f_data.f_int.Set(f_input->Line());
667 break;
668 }
669 break;
670
671 }
672 }
673 }
674
675
ReadNumber(long c)676 void Lexer::ReadNumber(long c)
677 {
678 String number;
679 char buf[256];
680 size_t sz;
681
682 buf[sizeof(buf) - 1] = '\0';
683
684 if(c == '.') {
685 // in case the strtod() doesn't support a missing 0
686 // at the start of the string
687 number.AppendChar('0');
688 number.AppendChar('.');
689 }
690 else if(c == '0') {
691 c = GetC();
692 if(c == 'x' || c == 'X') {
693 // hexadecimal number
694 f_data.f_type = NODE_INT64;
695 f_data.f_int.Set(ReadHex(16));
696 return;
697 }
698 // octal is not permitted in ECMAScript version 3+
699 if(f_options != 0
700 && f_options->GetOption(AS_OPTION_OCTAL) != 0
701 && c >= '0' && c <= '7') {
702 // octal
703 f_data.f_type = NODE_INT64;
704 f_data.f_int.Set(ReadOctal(c, 22));
705 return;
706 }
707 number.AppendChar('0');
708 UngetC(c);
709 }
710 else {
711 c = Read(c, CHAR_DIGIT, number);
712 }
713
714 if(c == '.') {
715 // TODO: we may want to support 32 bits floats as well
716 f_data.f_type = NODE_FLOAT64;
717 c = GetC();
718
719 // TODO:
720 // Here we could check to know whether this really
721 // represents a decimal number or whether the decimal
722 // point is a member operator. This can be very tricky.
723
724 c = Read(c, CHAR_DIGIT, number);
725 if(c == 'e' || c == 'E') {
726 number.AppendChar('e');
727 GetC(); // skip the 'e'
728 c = GetC(); // get the character after!
729 if(c == '-' || c == '+' || (c >= '0' && c <= '9')) {
730 c = Read(c, CHAR_DIGIT, number);
731 }
732 }
733 sz = sizeof(buf);
734 number.ToUTF8(buf, sz);
735 f_data.f_float.Set(strtod(buf, 0));
736 }
737 else {
738 // TODO: Support 8, 16, 32 bits, unsigned thereof
739 f_data.f_type = NODE_INT64;
740 sz = sizeof(buf);
741 number.ToUTF8(buf, sz);
742 f_data.f_int.Set(strtoll(buf, 0, 10));
743 }
744
745 // TODO: Note, we could also support numbers followed by a unit.
746 // (but not too useful in Flash ActionScript at this time
747 // without us doing all the work...)
748 }
749
750
ReadString(long quote)751 void Lexer::ReadString(long quote)
752 {
753 long c;
754
755 f_data.f_type = NODE_STRING;
756
757 c = GetC();
758 while(c != quote) {
759 if(c < 0) {
760 f_input->ErrMsg(AS_ERR_UNTERMINTED_STRING, "the last string wasn't closed before the end of the input was reached");
761 return;
762 }
763 if((f_type & CHAR_LINE_TERMINATOR) != 0) {
764 f_input->ErrMsg(AS_ERR_UNTERMINTED_STRING, "a string can't include a line terminator");
765 return;
766 }
767 if(c == '\\') {
768 c = EscapeSequence();
769 // here c can be equal to quote (c == quote)
770 }
771 f_data.f_str.AppendChar(c);
772 c = GetC();
773 }
774 }
775
776
777
GetNextToken(void)778 const Data& Lexer::GetNextToken(void)
779 {
780 long c;
781
782 f_data.Clear();
783
784 for(;;) {
785 c = GetC();
786 if(c < 0) {
787 // we're done
788 f_data.f_type = NODE_EOF;
789 return f_data;
790 }
791
792 if((f_type & (CHAR_WHITE_SPACE | CHAR_LINE_TERMINATOR | CHAR_INVALID)) != 0) {
793 continue;
794 }
795
796 if((f_type & CHAR_LETTER) != 0) {
797 ReadIdentifier(c);
798 return f_data;
799 }
800
801 if((f_type & CHAR_DIGIT) != 0) {
802 ReadNumber(c);
803 return f_data;
804 }
805
806 switch(c) {
807 case '"':
808 case '\'':
809 case '`': // TODO: do we want to support correct regex?
810 ReadString(c);
811 if(c == '`') {
812 f_data.f_type = NODE_REGULAR_EXPRESSION;
813 }
814 return f_data;
815
816 case '<':
817 c = GetC();
818 if(c == '<') {
819 c = GetC();
820 if(c == '=') {
821 f_data.f_type = NODE_ASSIGNMENT_SHIFT_LEFT;
822 return f_data;
823 }
824 UngetC(c);
825 f_data.f_type = NODE_SHIFT_LEFT;
826 return f_data;
827 }
828 if(c == '=') {
829 f_data.f_type = NODE_LESS_EQUAL;
830 return f_data;
831 }
832 if(f_options != 0
833 && f_options->GetOption(AS_OPTION_EXTENDED_OPERATORS) != 0) {
834 if(c == '>') {
835 f_data.f_type = NODE_NOT_EQUAL;
836 return f_data;
837 }
838 }
839 UngetC(c);
840 f_data.f_type = NODE_LESS;
841 return f_data;
842
843 case '>':
844 c = GetC();
845 if(c == '>') {
846 c = GetC();
847 if(c == '>') {
848 c = GetC();
849 if(c == '=') {
850 f_data.f_type = NODE_ASSIGNMENT_SHIFT_RIGHT_UNSIGNED;
851 return f_data;
852 }
853 UngetC(c);
854 f_data.f_type = NODE_SHIFT_RIGHT_UNSIGNED;
855 return f_data;
856 }
857 if(c == '=') {
858 f_data.f_type = NODE_ASSIGNMENT_SHIFT_RIGHT;
859 return f_data;
860 }
861 UngetC(c);
862 f_data.f_type = NODE_SHIFT_RIGHT;
863 return f_data;
864 }
865 if(c == '=') {
866 f_data.f_type = NODE_GREATER_EQUAL;
867 return f_data;
868 }
869 UngetC(c);
870 f_data.f_type = NODE_GREATER;
871 return f_data;
872
873 case '!':
874 c = GetC();
875 if(f_options != 0
876 && f_options->GetOption(AS_OPTION_EXTENDED_OPERATORS) != 0) {
877 if(c == '<') {
878 c = GetC();
879 if(c == '=') {
880 f_data.f_type = NODE_ASSIGNMENT_ROTATE_LEFT;
881 return f_data;
882 }
883 UngetC(c);
884 f_data.f_type = NODE_ROTATE_LEFT;
885 return f_data;
886 }
887 if(c == '>') {
888 c = GetC();
889 if(c == '=') {
890 f_data.f_type = NODE_ASSIGNMENT_ROTATE_RIGHT;
891 return f_data;
892 }
893 UngetC(c);
894 f_data.f_type = NODE_ROTATE_RIGHT;
895 return f_data;
896 }
897 }
898 if(c == '=') {
899 c = GetC();
900 if(c == '=') {
901 f_data.f_type = NODE_STRICTLY_NOT_EQUAL;
902 return f_data;
903 }
904 UngetC(c);
905 f_data.f_type = NODE_NOT_EQUAL;
906 return f_data;
907 }
908 UngetC(c);
909 f_data.f_type = NODE_LOGICAL_NOT;
910 return f_data;
911
912 case '=':
913 c = GetC();
914 if(c == '=') {
915 c = GetC();
916 if(c == '=') {
917 f_data.f_type = NODE_STRICTLY_EQUAL;
918 return f_data;
919 }
920 UngetC(c);
921 f_data.f_type = NODE_EQUAL;
922 return f_data;
923 }
924 UngetC(c);
925 f_data.f_type = NODE_ASSIGNMENT;
926 return f_data;
927
928 case ':':
929 c = GetC();
930 if(f_options != 0
931 && f_options->GetOption(AS_OPTION_EXTENDED_OPERATORS) != 0
932 && c == '=') {
933 f_data.f_type = NODE_ASSIGNMENT;
934 return f_data;
935 }
936 if(c == ':') {
937 f_data.f_type = NODE_SCOPE;
938 return f_data;
939 }
940 UngetC(c);
941 f_data.f_type = NODE_COLON;
942 return f_data;
943
944 case '~':
945 c = GetC();
946 if(f_options != 0
947 && f_options->GetOption(AS_OPTION_EXTENDED_OPERATORS) != 0
948 && c == '=') {
949 f_data.f_type = NODE_MATCH;
950 return f_data;
951 }
952 UngetC(c);
953 f_data.f_type = NODE_BITWISE_NOT;
954 return f_data;
955
956 case '+':
957 c = GetC();
958 if(c == '=') {
959 f_data.f_type = NODE_ASSIGNMENT_ADD;
960 return f_data;
961 }
962 if(c == '+') {
963 f_data.f_type = NODE_INCREMENT;
964 return f_data;
965 }
966 UngetC(c);
967 f_data.f_type = NODE_ADD;
968 return f_data;
969
970 case '-':
971 c = GetC();
972 if(c == '=') {
973 f_data.f_type = NODE_ASSIGNMENT_SUBTRACT;
974 return f_data;
975 }
976 if(c == '-') {
977 f_data.f_type = NODE_DECREMENT;
978 return f_data;
979 }
980 UngetC(c);
981 f_data.f_type = NODE_SUBTRACT;
982 return f_data;
983
984 case '*':
985 c = GetC();
986 if(c == '=') {
987 f_data.f_type = NODE_ASSIGNMENT_MULTIPLY;
988 return f_data;
989 }
990 if(f_options != 0
991 && f_options->GetOption(AS_OPTION_EXTENDED_OPERATORS) != 0
992 && c == '*') {
993 c = GetC();
994 if(c == '=') {
995 f_data.f_type = NODE_ASSIGNMENT_POWER;
996 return f_data;
997 }
998 UngetC(c);
999 f_data.f_type = NODE_POWER;
1000 return f_data;
1001 }
1002 UngetC(c);
1003 f_data.f_type = NODE_MULTIPLY;
1004 return f_data;
1005
1006 case '/':
1007 c = GetC();
1008 if(c == '=') {
1009 f_data.f_type = NODE_ASSIGNMENT_DIVIDE;
1010 return f_data;
1011 }
1012 if(c == '/') {
1013 // skip comments (to end of line)
1014 do {
1015 c = GetC();
1016 } while((f_type & CHAR_LINE_TERMINATOR) == 0 && c > 0);
1017 break;
1018 }
1019 if(c == '*') {
1020 // skip comments (multiline)
1021 do {
1022 c = GetC();
1023 while(c == '*') {
1024 c = GetC();
1025 if(c == '/') {
1026 c = -1;
1027 break;
1028 }
1029 }
1030 } while(c > 0);
1031 break;
1032 }
1033 UngetC(c);
1034 f_data.f_type = NODE_DIVIDE;
1035 return f_data;
1036
1037 case '%':
1038 c = GetC();
1039 if(c == '=') {
1040 f_data.f_type = NODE_ASSIGNMENT_MODULO;
1041 return f_data;
1042 }
1043 UngetC(c);
1044 f_data.f_type = NODE_MODULO;
1045 return f_data;
1046
1047 case '?':
1048 c = GetC();
1049 if(f_options != 0
1050 && f_options->GetOption(AS_OPTION_EXTENDED_OPERATORS) != 0) {
1051 if(c == '<') {
1052 c = GetC();
1053 if(c == '=') {
1054 f_data.f_type = NODE_ASSIGNMENT_MINIMUM;
1055 return f_data;
1056 }
1057 UngetC(c);
1058 f_data.f_type = NODE_MINIMUM;
1059 return f_data;
1060 }
1061 if(c == '>') {
1062 c = GetC();
1063 if(c == '=') {
1064 f_data.f_type = NODE_ASSIGNMENT_MAXIMUM;
1065 return f_data;
1066 }
1067 UngetC(c);
1068 f_data.f_type = NODE_MAXIMUM;
1069 return f_data;
1070 }
1071 }
1072 UngetC(c);
1073 f_data.f_type = NODE_CONDITIONAL;
1074 return f_data;
1075
1076 case '&':
1077 c = GetC();
1078 if(c == '=') {
1079 f_data.f_type = NODE_ASSIGNMENT_BITWISE_AND;
1080 return f_data;
1081 }
1082 if(c == '&') {
1083 c = GetC();
1084 if(c == '=') {
1085 f_data.f_type = NODE_ASSIGNMENT_LOGICAL_AND;
1086 return f_data;
1087 }
1088 UngetC(c);
1089 f_data.f_type = NODE_LOGICAL_AND;
1090 return f_data;
1091 }
1092 UngetC(c);
1093 f_data.f_type = NODE_BITWISE_AND;
1094 return f_data;
1095
1096 case '^':
1097 c = GetC();
1098 if(c == '=') {
1099 f_data.f_type = NODE_ASSIGNMENT_BITWISE_XOR;
1100 return f_data;
1101 }
1102 if(c == '^') {
1103 c = GetC();
1104 if(c == '=') {
1105 f_data.f_type = NODE_ASSIGNMENT_LOGICAL_XOR;
1106 return f_data;
1107 }
1108 UngetC(c);
1109 f_data.f_type = NODE_LOGICAL_XOR;
1110 return f_data;
1111 }
1112 UngetC(c);
1113 f_data.f_type = NODE_BITWISE_XOR;
1114 return f_data;
1115
1116 case '|':
1117 c = GetC();
1118 if(c == '=') {
1119 f_data.f_type = NODE_ASSIGNMENT_BITWISE_OR;
1120 return f_data;
1121 }
1122 if(c == '|') {
1123 c = GetC();
1124 if(c == '=') {
1125 f_data.f_type = NODE_ASSIGNMENT_LOGICAL_OR;
1126 return f_data;
1127 }
1128 UngetC(c);
1129 f_data.f_type = NODE_LOGICAL_OR;
1130 return f_data;
1131 }
1132 UngetC(c);
1133 f_data.f_type = NODE_BITWISE_OR;
1134 return f_data;
1135
1136 case '.':
1137 c = GetC();
1138 if(c >= '0' && c <= '9') {
1139 // this is a valid fraction
1140 UngetC(c);
1141 ReadNumber('.');
1142 return f_data;
1143 }
1144 if(c == '.') {
1145 c = GetC();
1146 if(c == '.') {
1147 // Elipsis!
1148 f_data.f_type = NODE_REST;
1149 return f_data;
1150 }
1151 UngetC(c);
1152 // Range (not too sure if this is really used yet
1153 // and whether it will be called RANGE)
1154 f_data.f_type = NODE_RANGE;
1155 return f_data;
1156 }
1157 UngetC(c);
1158 f_data.f_type = NODE_MEMBER;
1159 return f_data;
1160
1161 case '[':
1162 f_data.f_type = NODE_OPEN_SQUARE_BRACKET;
1163 return f_data;
1164
1165 case ']':
1166 f_data.f_type = NODE_CLOSE_SQUARE_BRACKET;
1167 return f_data;
1168
1169 case '{':
1170 f_data.f_type = NODE_OPEN_CURVLY_BRACKET;
1171 return f_data;
1172
1173 case '}':
1174 f_data.f_type = NODE_CLOSE_CURVLY_BRACKET;
1175 return f_data;
1176
1177 case '(':
1178 f_data.f_type = NODE_OPEN_PARENTHESIS;
1179 return f_data;
1180
1181 case ')':
1182 f_data.f_type = NODE_CLOSE_PARENTHESIS;
1183 return f_data;
1184
1185 case ';':
1186 f_data.f_type = NODE_SEMICOLON;
1187 return f_data;
1188
1189 case ',':
1190 f_data.f_type = NODE_COMMA;
1191 return f_data;
1192
1193 default:
1194 if(c > ' ' && c < 0x7F) {
1195 f_input->ErrMsg(AS_ERR_UNEXPECTED_PUNCTUATION, "unexpected punctuation '%c'", (char) c);
1196 }
1197 else {
1198 f_input->ErrMsg(AS_ERR_UNEXPECTED_PUNCTUATION, "unexpected punctuation '\\U%08lX'", c);
1199 }
1200 break;
1201
1202 }
1203 }
1204 }
1205
1206
ErrMsg(err_code_t err_code,const char * format,...)1207 void Lexer::ErrMsg(err_code_t err_code, const char *format, ...)
1208 {
1209 va_list ap;
1210
1211 va_start(ap, format);
1212 f_input->ErrMsg(err_code, format, ap);
1213 va_end(ap);
1214 }
1215
1216
1217
1218
1219
1220
1221 }; // namespace as
1222 }; // namespace sswf
1223