1 // Copyright (c) 2019, Fernando Carmona Varo <ferkiwi@gmail.com>
2 // Copyright (c) 2010, Braden "Blzut3" Obrzut <admin@maniacsvault.net>
3 // All rights reserved.
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are met:
7 // * Redistributions of source code must retain the above copyright
8 // notice, this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above copyright
10 // notice, this list of conditions and the following disclaimer in the
11 // documentation and/or other materials provided with the distribution.
12 // * Neither the name of the <organization> nor the
13 // names of its contributors may be used to endorse or promote products
14 // derived from this software without specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 // ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
20 // DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 // (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 // ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27 #include "config.h"
28
29 #include <stdarg.h>
30 #include <ctype.h>
31
32 #include "doomtype.h"
33 #include "z_zone.h"
34 #include "lprintf.h"
35 #include "u_scanner.h"
36
37 const char* U_TokenNames[TK_NumSpecialTokens] =
38 {
39 "Identifier", // case insensitive identifier, beginning with a letter and may contain [a-z0-9_]
40 "String Constant",
41 "Integer Constant",
42 "Float Constant",
43 "Boolean Constant",
44 "Logical And",
45 "Logical Or",
46 "Equals",
47 "Not Equals",
48 "Greater Than or Equals"
49 "Less Than or Equals",
50 "Left Shift",
51 "Right Shift"
52 };
53
54 void U_CheckForWhitespace(u_scanner_t* scanner);
55 void U_ExpandState(u_scanner_t* scanner);
56 void U_Unescape(char *str);
57 void U_SetString(char **ptr, const char *start, int length);
58
U_ScanOpen(const char * data,int length,const char * name)59 u_scanner_t U_ScanOpen(const char* data, int length, const char* name)
60 {
61 u_scanner_t scanner;
62 scanner.lineStart = scanner.logicalPosition = scanner.scanPos = scanner.tokenLinePosition = 0;
63 scanner.line = scanner.tokenLine = 1;
64 scanner.needNext = TRUE;
65 scanner.string = NULL;
66 scanner.nextState.string = NULL;
67 scanner.name = name;
68
69 if(length == -1)
70 length = strlen(data);
71 scanner.length = length;
72 scanner.data = (char*) malloc(sizeof(char)*length);
73 memcpy(scanner.data, data, length);
74
75 U_CheckForWhitespace(&scanner);
76
77 return scanner;
78 }
79
U_ScanClose(u_scanner_t * scanner)80 void U_ScanClose(u_scanner_t* scanner)
81 {
82 if (scanner->nextState.string != NULL)
83 free(scanner->nextState.string);
84 if(scanner->data != NULL)
85 free(scanner->data);
86 }
87
U_IncrementLine(u_scanner_t * scanner)88 void U_IncrementLine(u_scanner_t* scanner)
89 {
90 scanner->line++;
91 scanner->lineStart = scanner->scanPos;
92 }
93
U_CheckForWhitespace(u_scanner_t * scanner)94 void U_CheckForWhitespace(u_scanner_t* scanner)
95 {
96 int comment = 0; // 1 = till next new line, 2 = till end block
97 while(scanner->scanPos < scanner->length)
98 {
99 char cur = scanner->data[scanner->scanPos];
100 char next = scanner->scanPos+1 < scanner->length ? scanner->data[scanner->scanPos+1] : 0;
101 if(comment == 2)
102 {
103 if(cur != '*' || next != '/')
104 {
105 if(cur == '\n' || cur == '\r')
106 {
107 scanner->scanPos++;
108
109 // Do a quick check for Windows style new line
110 if(cur == '\r' && next == '\n')
111 scanner->scanPos++;
112 U_IncrementLine(scanner);
113 }
114 else
115 scanner->scanPos++;
116 }
117 else
118 {
119 comment = 0;
120 scanner->scanPos += 2;
121 }
122 continue;
123 }
124
125 if(cur == ' ' || cur == '\t' || cur == 0)
126 scanner->scanPos++;
127 else if(cur == '\n' || cur == '\r')
128 {
129 scanner->scanPos++;
130 if(comment == 1)
131 comment = 0;
132
133 // Do a quick check for Windows style new line
134 if(cur == '\r' && next == '\n')
135 scanner->scanPos++;
136 U_IncrementLine(scanner);
137 }
138 else if(cur == '/' && comment == 0)
139 {
140 switch(next)
141 {
142 case '/':
143 comment = 1;
144 break;
145 case '*':
146 comment = 2;
147 break;
148 default:
149 return;
150 }
151 scanner->scanPos += 2;
152 }
153 else
154 {
155 if(comment == 0)
156 return;
157 else
158 scanner->scanPos++;
159 }
160 }
161 }
162
U_CheckToken(u_scanner_t * s,char token)163 dbool U_CheckToken(u_scanner_t* s, char token)
164 {
165 if(s->needNext)
166 {
167 if(!U_GetNextToken(s, FALSE))
168 return FALSE;
169 }
170
171 // An int can also be a float.
172 if((s->nextState).token == token || ((s->nextState).token == TK_IntConst && s->token == TK_FloatConst))
173 {
174 s->needNext = TRUE;
175 U_ExpandState(s);
176 return TRUE;
177 }
178 s->needNext = FALSE;
179 return FALSE;
180 }
181
U_ExpandState(u_scanner_t * s)182 void U_ExpandState(u_scanner_t* s)
183 {
184 s->logicalPosition = s->scanPos;
185 U_CheckForWhitespace(s);
186
187 U_SetString(&(s->string), s->nextState.string, -1);
188 s->number = s->nextState.number;
189 s->decimal = s->nextState.decimal;
190 s->boolean = s->nextState.boolean;
191 s->token = s->nextState.token;
192 s->tokenLine = s->nextState.tokenLine;
193 s->tokenLinePosition = s->nextState.tokenLinePosition;
194 }
195
U_SaveState(u_scanner_t * s,u_scanner_t savedstate)196 void U_SaveState(u_scanner_t* s, u_scanner_t savedstate)
197 {
198 // This saves the entire parser state except for the data pointer.
199 if (savedstate.string != NULL) free(savedstate.string);
200 if (savedstate.nextState.string != NULL) free(savedstate.nextState.string);
201
202 memcpy(&savedstate, s, sizeof(*s));
203 savedstate.string = strdup(s->string);
204 savedstate.nextState.string = strdup(s->nextState.string);
205 savedstate.data = NULL;
206 }
207
U_RestoreState(u_scanner_t * s,u_scanner_t savedstate)208 void U_RestoreState(u_scanner_t* s, u_scanner_t savedstate)
209 {
210 if (savedstate.data == NULL)
211 {
212 char *saveddata = s->data;
213 U_SaveState(&savedstate, *s);
214 s->data = saveddata;
215 }
216 }
217
U_GetNextToken(u_scanner_t * scanner,dbool expandState)218 dbool U_GetNextToken(u_scanner_t* scanner, dbool expandState)
219 {
220 unsigned int start;
221 unsigned int end;
222 char cur;
223 int integerBase = 10;
224 dbool floatHasDecimal = FALSE;
225 dbool floatHasExponent = FALSE;
226 dbool stringFinished = FALSE; // Strings are the only things that can have 0 length tokens.
227 u_parserstate_t* nextState = &scanner->nextState;
228
229 if(!scanner->needNext)
230 {
231 scanner->needNext = TRUE;
232 if(expandState)
233 U_ExpandState(scanner);
234 return TRUE;
235 }
236
237 nextState->tokenLine = scanner->line;
238 nextState->tokenLinePosition = scanner->scanPos - scanner->lineStart;
239 nextState->token = TK_NoToken;
240 if(scanner->scanPos >= scanner->length)
241 {
242 if(expandState)
243 U_ExpandState(scanner);
244 return FALSE;
245 }
246
247 start = scanner->scanPos;
248 end = scanner->scanPos;
249 cur = scanner->data[scanner->scanPos++];
250
251 // Determine by first character
252 if(cur == '_' || (cur >= 'A' && cur <= 'Z') || (cur >= 'a' && cur <= 'z'))
253 nextState->token = TK_Identifier;
254 else if(cur >= '0' && cur <= '9')
255 {
256 if(cur == '0')
257 integerBase = 8;
258 nextState->token = TK_IntConst;
259 }
260 else if(cur == '.')
261 {
262 floatHasDecimal = TRUE;
263 nextState->token = TK_FloatConst;
264 }
265 else if(cur == '"')
266 {
267 end = ++start; // Move the start up one character so we don't have to trim it later.
268 nextState->token = TK_StringConst;
269 }
270 else
271 {
272 end = scanner->scanPos;
273 nextState->token = cur;
274
275 // Now check for operator tokens
276 if(scanner->scanPos < scanner->length)
277 {
278 char next = scanner->data[scanner->scanPos];
279 if(cur == '&' && next == '&')
280 nextState->token = TK_AndAnd;
281 else if(cur == '|' && next == '|')
282 nextState->token = TK_OrOr;
283 else if(cur == '<' && next == '<')
284 nextState->token = TK_ShiftLeft;
285 else if(cur == '>' && next == '>')
286 nextState->token = TK_ShiftRight;
287 //else if(cur == '#' && next == '#')
288 // nextState.token = TK_MacroConcat;
289 else if(next == '=')
290 {
291 switch(cur)
292 {
293 case '=':
294 nextState->token = TK_EqEq;
295 break;
296 case '!':
297 nextState->token = TK_NotEq;
298 break;
299 case '>':
300 nextState->token = TK_GtrEq;
301 break;
302 case '<':
303 nextState->token = TK_LessEq;
304 break;
305 default:
306 break;
307 }
308 }
309 if(nextState->token != cur)
310 {
311 scanner->scanPos++;
312 end = scanner->scanPos;
313 }
314 }
315 }
316
317 if(start == end)
318 {
319 while(scanner->scanPos < scanner->length)
320 {
321 cur = scanner->data[scanner->scanPos];
322 switch(nextState->token)
323 {
324 default:
325 break;
326 case TK_Identifier:
327 if(cur != '_' && (cur < 'A' || cur > 'Z') && (cur < 'a' || cur > 'z') && (cur < '0' || cur > '9'))
328 end = scanner->scanPos;
329 break;
330 case TK_IntConst:
331 if(cur == '.' || (scanner->scanPos-1 != start && cur == 'e'))
332 nextState->token = TK_FloatConst;
333 else if((cur == 'x' || cur == 'X') && scanner->scanPos-1 == start)
334 {
335 integerBase = 16;
336 break;
337 }
338 else
339 {
340 switch(integerBase)
341 {
342 default:
343 if(cur < '0' || cur > '9')
344 end = scanner->scanPos;
345 break;
346 case 8:
347 if(cur < '0' || cur > '7')
348 end = scanner->scanPos;
349 break;
350 case 16:
351 if((cur < '0' || cur > '9') && (cur < 'A' || cur > 'F') && (cur < 'a' || cur > 'f'))
352 end = scanner->scanPos;
353 break;
354 }
355 break;
356 }
357 case TK_FloatConst:
358 if(cur < '0' || cur > '9')
359 {
360 if(!floatHasDecimal && cur == '.')
361 {
362 floatHasDecimal = TRUE;
363 break;
364 }
365 else if(!floatHasExponent && cur == 'e')
366 {
367 floatHasDecimal = TRUE;
368 floatHasExponent = TRUE;
369 if(scanner->scanPos+1 < scanner->length)
370 {
371 char next = scanner->data[scanner->scanPos+1];
372 if((next < '0' || next > '9') && next != '+' && next != '-')
373 end = scanner->scanPos;
374 else
375 scanner->scanPos++;
376 }
377 break;
378 }
379 end = scanner->scanPos;
380 }
381 break;
382 case TK_StringConst:
383 if(cur == '"')
384 {
385 stringFinished = TRUE;
386 end = scanner->scanPos;
387 scanner->scanPos++;
388 }
389 else if(cur == '\\')
390 scanner->scanPos++; // Will add two since the loop automatically adds one
391 break;
392 }
393 if(start == end && !stringFinished)
394 scanner->scanPos++;
395 else
396 break;
397 }
398 // If we reached end of input while reading, set it as the end of token
399 if(scanner->scanPos == scanner->length && start == end)
400 end = scanner->length;
401 }
402
403 if(end-start > 0 || stringFinished)
404 {
405 U_SetString(&(nextState->string), scanner->data+start, end-start);
406 if(nextState->token == TK_FloatConst)
407 {
408 nextState->decimal = atof(nextState->string);
409 nextState->number = (int) (nextState->decimal);
410 nextState->boolean = (nextState->number != 0);
411 }
412 else if(nextState->token == TK_IntConst)
413 {
414 nextState->number = strtol(nextState->string, NULL, integerBase);
415 nextState->decimal = nextState->number;
416 nextState->boolean = (nextState->number != 0);
417 }
418 else if(nextState->token == TK_Identifier)
419 {
420 // Identifiers should be case insensitive.
421 char *p = nextState->string;
422 while (*p)
423 {
424 *p = tolower(*p);
425 p++;
426 }
427 // Check for a boolean constant.
428 if(strcmp(nextState->string, "true") == 0)
429 {
430 nextState->token = TK_BoolConst;
431 nextState->boolean = TRUE;
432 }
433 else if (strcmp(nextState->string, "false") == 0)
434 {
435 nextState->token = TK_BoolConst;
436 nextState->boolean = FALSE;
437 }
438 }
439 else if(nextState->token == TK_StringConst)
440 {
441 U_Unescape(nextState->string);
442 }
443 if(expandState)
444 U_ExpandState(scanner);
445 return TRUE;
446 }
447 nextState->token = TK_NoToken;
448 if(expandState)
449 U_ExpandState(scanner);
450 return FALSE;
451 }
452
453 /**
454 * Skips all Tokens in current line and parses the first token on
455 * the next line.
456 */
U_GetNextLineToken(u_scanner_t * scanner)457 dbool U_GetNextLineToken(u_scanner_t* scanner)
458 {
459 unsigned int line = scanner->line;
460 dbool retval = FALSE;
461
462 do retval = U_GetNextToken(scanner, TRUE);
463 while (retval && scanner->line == line);
464
465 return retval;
466 }
467
468
U_ErrorToken(u_scanner_t * s,int token)469 void U_ErrorToken(u_scanner_t* s, int token)
470 {
471 if (token < TK_NumSpecialTokens && s->token < TK_NumSpecialTokens)
472 U_Error(s, "Expected %s but got %s '%s' instead.", U_TokenNames[token], U_TokenNames[(int)s->token], s->string);
473 else if (token < TK_NumSpecialTokens && s->token >= TK_NumSpecialTokens)
474 U_Error(s, "Expected %s but got '%c' instead.", U_TokenNames[token], s->token);
475 else if (token >= TK_NumSpecialTokens && s->token < TK_NumSpecialTokens)
476 U_Error(s, "Expected '%c' but got %s '%s' instead.", token, U_TokenNames[(int)s->token], s->string);
477 else
478 U_Error(s, "Expected '%c' but got '%c' instead.", token, s->token);
479 }
480
U_ErrorString(u_scanner_t * s,const char * mustget)481 void U_ErrorString(u_scanner_t* s, const char *mustget)
482 {
483 if (s->token < TK_NumSpecialTokens)
484 U_Error(s, "Expected '%s' but got %s '%s' instead.", mustget, U_TokenNames[(int)s->token], s->string);
485 else
486 U_Error(s, "Expected '%s' but got '%c' instead.", mustget, s->token);
487 }
488
U_Error(u_scanner_t * s,const char * msg,...)489 void U_Error(u_scanner_t* s, const char *msg, ...)
490 {
491 char buffer[1024];
492 va_list ap;
493 va_start(ap, msg);
494 vsnprintf(buffer, 1024, msg, ap);
495 va_end(ap);
496 I_Error("%s:%d:%d:%s.", s->name, s->tokenLine, s->tokenLinePosition, buffer);
497 }
498
U_MustGetToken(u_scanner_t * s,char token)499 dbool U_MustGetToken(u_scanner_t* s, char token)
500 {
501 if(!U_CheckToken(s, token))
502 {
503 U_ExpandState(s);
504 U_ErrorToken(s, token);
505 return FALSE;
506 }
507 return TRUE;
508 }
509
U_MustGetIdentifier(u_scanner_t * s,const char * ident)510 dbool U_MustGetIdentifier(u_scanner_t* s, const char *ident)
511 {
512 if (!U_CheckToken(s, TK_Identifier) || strcasecmp(s->string, ident))
513 {
514 U_ErrorString(s, ident);
515 return FALSE;
516 }
517 return TRUE;
518 }
519
520 // Convenience helpers that parse an entire number including a leading minus or plus sign
U_ScanInteger(u_scanner_t * s)521 dbool U_ScanInteger(u_scanner_t* s)
522 {
523 dbool neg = FALSE;
524 if (!U_GetNextToken(s, TRUE))
525 {
526 return FALSE;
527 }
528 if (s->token == '-')
529 {
530 if (!U_GetNextToken(s, TRUE))
531 {
532 return FALSE;
533 }
534 neg = TRUE;
535 }
536 else if (s->token == '+')
537 {
538 if (!U_GetNextToken(s, TRUE))
539 {
540 return FALSE;
541 }
542 }
543 if (s->token != TK_IntConst)
544 {
545 return FALSE;
546 }
547 if (neg)
548 {
549 s->number = -(s->number);
550 s->decimal = -(s->decimal);
551 }
552 return TRUE;
553 }
554
U_ScanFloat(u_scanner_t * s)555 dbool U_ScanFloat(u_scanner_t* s)
556 {
557 dbool neg = FALSE;
558 if (!U_GetNextToken(s, TRUE))
559 {
560 return FALSE;
561 }
562 if (s->token == '-')
563 {
564 if (!U_GetNextToken(s, TRUE))
565 {
566 return FALSE;
567 }
568 neg = TRUE;
569 }
570 else if (s->token == '+')
571 {
572 if (!U_GetNextToken(s, TRUE))
573 {
574 return FALSE;
575 }
576 }
577 if (s->token != TK_IntConst && s->token != TK_FloatConst)
578 {
579 return FALSE;
580 }
581 if (neg)
582 {
583 s->number = -(s->number);
584 s->decimal = -(s->decimal);
585 }
586 return TRUE;
587 }
588
U_CheckInteger(u_scanner_t * s)589 dbool U_CheckInteger(u_scanner_t* s)
590 {
591 dbool res;
592 u_scanner_t savedstate = {0};
593 U_SaveState(s, savedstate);
594 res = U_ScanInteger(s);
595 if (!res)
596 U_RestoreState(s, savedstate);
597 return res;
598 }
599
U_CheckFloat(u_scanner_t * s)600 dbool U_CheckFloat(u_scanner_t* s)
601 {
602 dbool res;
603 u_scanner_t savedstate = {0};
604 U_SaveState(s, savedstate);
605 res = U_ScanFloat(s);
606 if (!res)
607 U_RestoreState(s, savedstate);
608 return res;
609 }
610
U_MustGetInteger(u_scanner_t * s)611 dbool U_MustGetInteger(u_scanner_t* s)
612 {
613 if (!U_ScanInteger(s))
614 {
615 U_ErrorToken(s, TK_IntConst);
616 return FALSE;
617 }
618 return TRUE;
619 }
620
U_MustGetFloat(u_scanner_t * s)621 dbool U_MustGetFloat(u_scanner_t* s)
622 {
623 if (!U_ScanFloat(s))
624 {
625 U_ErrorToken(s, TK_FloatConst);
626 return FALSE;
627 }
628 return TRUE;
629 }
630
631
U_HasTokensLeft(u_scanner_t * s)632 dbool U_HasTokensLeft(u_scanner_t* s)
633 {
634 return (s->scanPos < s->length);
635 }
636
637 // This is taken from ZDoom's strbin function which can do a lot more than just unescaping backslashes and quotation marks.
U_Unescape(char * str)638 void U_Unescape(char *str)
639 {
640 char *p = str, c;
641 int i;
642
643 while ((c = *p++)) {
644 if (c != '\\') {
645 *str++ = c;
646 }
647 else {
648 switch (*p) {
649 case 'a':
650 *str++ = '\a';
651 break;
652 case 'b':
653 *str++ = '\b';
654 break;
655 case 'f':
656 *str++ = '\f';
657 break;
658 case 'n':
659 *str++ = '\n';
660 break;
661 case 't':
662 *str++ = '\t';
663 break;
664 case 'r':
665 *str++ = '\r';
666 break;
667 case 'v':
668 *str++ = '\v';
669 break;
670 case '?':
671 *str++ = '\?';
672 break;
673 case '\n':
674 break;
675 case 'x':
676 case 'X':
677 c = 0;
678 for (i = 0; i < 2; i++)
679 {
680 p++;
681 if (*p >= '0' && *p <= '9')
682 c = (c << 4) + *p - '0';
683 else if (*p >= 'a' && *p <= 'f')
684 c = (c << 4) + 10 + *p - 'a';
685 else if (*p >= 'A' && *p <= 'F')
686 c = (c << 4) + 10 + *p - 'A';
687 else
688 {
689 p--;
690 break;
691 }
692 }
693 *str++ = c;
694 break;
695 case '0':
696 case '1':
697 case '2':
698 case '3':
699 case '4':
700 case '5':
701 case '6':
702 case '7':
703 c = *p - '0';
704 for (i = 0; i < 2; i++)
705 {
706 p++;
707 if (*p >= '0' && *p <= '7')
708 c = (c << 3) + *p - '0';
709 else
710 {
711 p--;
712 break;
713 }
714 }
715 *str++ = c;
716 break;
717 default:
718 *str++ = *p;
719 break;
720 }
721 p++;
722 }
723 }
724 *str = 0;
725 }
726
U_SetString(char ** ptr,const char * start,int length)727 void U_SetString(char **ptr, const char *start, int length)
728 {
729 if (length == -1)
730 length = strlen(start);
731 if (*ptr != NULL) free(*ptr);
732 *ptr = (char*)malloc(length + 1);
733 memcpy(*ptr, start, length);
734 (*ptr)[length] = '\0';
735 }
736