1 // Copyright (c) 2019, Fernando Carmona Varo  <ferkiwi@gmail.com>
2 // Copyright (c) 2010, Braden "Blzut3" Obrzut <admin@maniacsvault.net>
3 // All rights reserved.
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are met:
7 //    * Redistributions of source code must retain the above copyright
8 //      notice, this list of conditions and the following disclaimer.
9 //    * Redistributions in binary form must reproduce the above copyright
10 //      notice, this list of conditions and the following disclaimer in the
11 //      documentation and/or other materials provided with the distribution.
12 //    * Neither the name of the <organization> nor the
13 //      names of its contributors may be used to endorse or promote products
14 //      derived from this software without specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 // ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
20 // DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 // (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 // ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 #include "config.h"
28 
29 #include <stdarg.h>
30 #include <ctype.h>
31 
32 #include "doomtype.h"
33 #include "z_zone.h"
34 #include "lprintf.h"
35 #include "u_scanner.h"
36 
37 const char* U_TokenNames[TK_NumSpecialTokens] =
38 {
39   "Identifier", // case insensitive identifier, beginning with a letter and may contain [a-z0-9_]
40   "String Constant",
41   "Integer Constant",
42   "Float Constant",
43   "Boolean Constant",
44   "Logical And",
45   "Logical Or",
46   "Equals",
47   "Not Equals",
48   "Greater Than or Equals"
49   "Less Than or Equals",
50   "Left Shift",
51   "Right Shift"
52 };
53 
54 void U_CheckForWhitespace(u_scanner_t* scanner);
55 void U_ExpandState(u_scanner_t* scanner);
56 void U_Unescape(char *str);
57 void U_SetString(char **ptr, const char *start, int length);
58 
U_ScanOpen(const char * data,int length,const char * name)59 u_scanner_t U_ScanOpen(const char* data, int length, const char* name)
60 {
61   u_scanner_t scanner;
62   scanner.lineStart = scanner.logicalPosition = scanner.scanPos = scanner.tokenLinePosition = 0;
63   scanner.line = scanner.tokenLine = 1;
64   scanner.needNext = TRUE;
65   scanner.string = NULL;
66   scanner.nextState.string = NULL;
67   scanner.name = name;
68 
69   if(length == -1)
70     length = strlen(data);
71   scanner.length = length;
72   scanner.data = (char*) malloc(sizeof(char)*length);
73   memcpy(scanner.data, data, length);
74 
75   U_CheckForWhitespace(&scanner);
76 
77   return scanner;
78 }
79 
U_ScanClose(u_scanner_t * scanner)80 void U_ScanClose(u_scanner_t* scanner)
81 {
82   if (scanner->nextState.string != NULL)
83     free(scanner->nextState.string);
84   if(scanner->data != NULL)
85     free(scanner->data);
86 }
87 
U_IncrementLine(u_scanner_t * scanner)88 void U_IncrementLine(u_scanner_t* scanner)
89 {
90   scanner->line++;
91   scanner->lineStart = scanner->scanPos;
92 }
93 
U_CheckForWhitespace(u_scanner_t * scanner)94 void U_CheckForWhitespace(u_scanner_t* scanner)
95 {
96   int comment = 0; // 1 = till next new line, 2 = till end block
97   while(scanner->scanPos < scanner->length)
98   {
99     char cur = scanner->data[scanner->scanPos];
100     char next = scanner->scanPos+1 < scanner->length ? scanner->data[scanner->scanPos+1] : 0;
101     if(comment == 2)
102     {
103       if(cur != '*' || next != '/')
104       {
105         if(cur == '\n' || cur == '\r')
106         {
107           scanner->scanPos++;
108 
109           // Do a quick check for Windows style new line
110           if(cur == '\r' && next == '\n')
111             scanner->scanPos++;
112           U_IncrementLine(scanner);
113         }
114         else
115           scanner->scanPos++;
116       }
117       else
118       {
119         comment = 0;
120         scanner->scanPos += 2;
121       }
122       continue;
123     }
124 
125     if(cur == ' ' || cur == '\t' || cur == 0)
126       scanner->scanPos++;
127     else if(cur == '\n' || cur == '\r')
128     {
129       scanner->scanPos++;
130       if(comment == 1)
131         comment = 0;
132 
133       // Do a quick check for Windows style new line
134       if(cur == '\r' && next == '\n')
135         scanner->scanPos++;
136       U_IncrementLine(scanner);
137     }
138     else if(cur == '/' && comment == 0)
139     {
140       switch(next)
141       {
142         case '/':
143           comment = 1;
144           break;
145         case '*':
146           comment = 2;
147           break;
148         default:
149           return;
150       }
151       scanner->scanPos += 2;
152     }
153     else
154     {
155       if(comment == 0)
156         return;
157       else
158         scanner->scanPos++;
159     }
160   }
161 }
162 
U_CheckToken(u_scanner_t * s,char token)163 dbool   U_CheckToken(u_scanner_t* s, char token)
164 {
165   if(s->needNext)
166   {
167     if(!U_GetNextToken(s, FALSE))
168     return FALSE;
169   }
170 
171   // An int can also be a float.
172   if((s->nextState).token == token || ((s->nextState).token == TK_IntConst && s->token == TK_FloatConst))
173   {
174     s->needNext = TRUE;
175     U_ExpandState(s);
176     return TRUE;
177   }
178   s->needNext = FALSE;
179   return FALSE;
180 }
181 
U_ExpandState(u_scanner_t * s)182 void U_ExpandState(u_scanner_t* s)
183 {
184   s->logicalPosition = s->scanPos;
185   U_CheckForWhitespace(s);
186 
187   U_SetString(&(s->string), s->nextState.string, -1);
188   s->number = s->nextState.number;
189   s->decimal = s->nextState.decimal;
190   s->boolean = s->nextState.boolean;
191   s->token = s->nextState.token;
192   s->tokenLine = s->nextState.tokenLine;
193   s->tokenLinePosition = s->nextState.tokenLinePosition;
194 }
195 
U_SaveState(u_scanner_t * s,u_scanner_t savedstate)196 void U_SaveState(u_scanner_t* s, u_scanner_t savedstate)
197 {
198   // This saves the entire parser state except for the data pointer.
199   if (savedstate.string != NULL) free(savedstate.string);
200   if (savedstate.nextState.string != NULL) free(savedstate.nextState.string);
201 
202   memcpy(&savedstate, s, sizeof(*s));
203   savedstate.string = strdup(s->string);
204   savedstate.nextState.string = strdup(s->nextState.string);
205   savedstate.data = NULL;
206 }
207 
U_RestoreState(u_scanner_t * s,u_scanner_t savedstate)208 void U_RestoreState(u_scanner_t* s, u_scanner_t savedstate)
209 {
210   if (savedstate.data == NULL)
211   {
212     char *saveddata = s->data;
213     U_SaveState(&savedstate, *s);
214     s->data = saveddata;
215   }
216 }
217 
U_GetNextToken(u_scanner_t * scanner,dbool expandState)218 dbool   U_GetNextToken(u_scanner_t* scanner, dbool   expandState)
219 {
220   unsigned int start;
221   unsigned int end;
222   char cur;
223   int integerBase            = 10;
224   dbool   floatHasDecimal    = FALSE;
225   dbool   floatHasExponent   = FALSE;
226   dbool   stringFinished     = FALSE; // Strings are the only things that can have 0 length tokens.
227   u_parserstate_t* nextState = &scanner->nextState;
228 
229   if(!scanner->needNext)
230   {
231     scanner->needNext = TRUE;
232     if(expandState)
233       U_ExpandState(scanner);
234     return TRUE;
235   }
236 
237   nextState->tokenLine = scanner->line;
238   nextState->tokenLinePosition = scanner->scanPos - scanner->lineStart;
239   nextState->token = TK_NoToken;
240   if(scanner->scanPos >= scanner->length)
241   {
242     if(expandState)
243       U_ExpandState(scanner);
244     return FALSE;
245   }
246 
247   start = scanner->scanPos;
248   end   = scanner->scanPos;
249   cur   = scanner->data[scanner->scanPos++];
250 
251   // Determine by first character
252   if(cur == '_' || (cur >= 'A' && cur <= 'Z') || (cur >= 'a' && cur <= 'z'))
253     nextState->token = TK_Identifier;
254   else if(cur >= '0' && cur <= '9')
255   {
256     if(cur == '0')
257       integerBase = 8;
258     nextState->token = TK_IntConst;
259   }
260   else if(cur == '.')
261   {
262     floatHasDecimal = TRUE;
263     nextState->token = TK_FloatConst;
264   }
265   else if(cur == '"')
266   {
267     end = ++start; // Move the start up one character so we don't have to trim it later.
268     nextState->token = TK_StringConst;
269   }
270   else
271   {
272     end = scanner->scanPos;
273     nextState->token = cur;
274 
275     // Now check for operator tokens
276     if(scanner->scanPos < scanner->length)
277     {
278       char next = scanner->data[scanner->scanPos];
279       if(cur == '&' && next == '&')
280         nextState->token = TK_AndAnd;
281       else if(cur == '|' && next == '|')
282         nextState->token = TK_OrOr;
283       else if(cur == '<' && next == '<')
284         nextState->token = TK_ShiftLeft;
285       else if(cur == '>' && next == '>')
286         nextState->token = TK_ShiftRight;
287       //else if(cur == '#' && next == '#')
288       //  nextState.token = TK_MacroConcat;
289       else if(next == '=')
290       {
291         switch(cur)
292         {
293           case '=':
294             nextState->token = TK_EqEq;
295             break;
296           case '!':
297             nextState->token = TK_NotEq;
298             break;
299           case '>':
300             nextState->token = TK_GtrEq;
301             break;
302           case '<':
303             nextState->token = TK_LessEq;
304             break;
305           default:
306             break;
307         }
308       }
309       if(nextState->token != cur)
310       {
311         scanner->scanPos++;
312         end = scanner->scanPos;
313       }
314     }
315   }
316 
317   if(start == end)
318   {
319     while(scanner->scanPos < scanner->length)
320     {
321       cur = scanner->data[scanner->scanPos];
322       switch(nextState->token)
323       {
324         default:
325           break;
326         case TK_Identifier:
327           if(cur != '_' && (cur < 'A' || cur > 'Z') && (cur < 'a' || cur > 'z') && (cur < '0' || cur > '9'))
328             end = scanner->scanPos;
329           break;
330         case TK_IntConst:
331           if(cur == '.' || (scanner->scanPos-1 != start && cur == 'e'))
332             nextState->token = TK_FloatConst;
333           else if((cur == 'x' || cur == 'X') && scanner->scanPos-1 == start)
334           {
335             integerBase = 16;
336             break;
337           }
338           else
339           {
340             switch(integerBase)
341             {
342               default:
343                 if(cur < '0' || cur > '9')
344                   end = scanner->scanPos;
345                 break;
346               case 8:
347                 if(cur < '0' || cur > '7')
348                   end = scanner->scanPos;
349                 break;
350               case 16:
351                 if((cur < '0' || cur > '9') && (cur < 'A' || cur > 'F') && (cur < 'a' || cur > 'f'))
352                   end = scanner->scanPos;
353                 break;
354             }
355             break;
356           }
357         case TK_FloatConst:
358           if(cur < '0' || cur > '9')
359           {
360             if(!floatHasDecimal && cur == '.')
361             {
362               floatHasDecimal = TRUE;
363               break;
364             }
365             else if(!floatHasExponent && cur == 'e')
366             {
367               floatHasDecimal = TRUE;
368               floatHasExponent = TRUE;
369               if(scanner->scanPos+1 < scanner->length)
370               {
371                 char next = scanner->data[scanner->scanPos+1];
372                 if((next < '0' || next > '9') && next != '+' && next != '-')
373                   end = scanner->scanPos;
374                 else
375                   scanner->scanPos++;
376               }
377               break;
378             }
379             end = scanner->scanPos;
380           }
381           break;
382         case TK_StringConst:
383           if(cur == '"')
384           {
385             stringFinished = TRUE;
386             end = scanner->scanPos;
387             scanner->scanPos++;
388           }
389           else if(cur == '\\')
390             scanner->scanPos++; // Will add two since the loop automatically adds one
391           break;
392       }
393       if(start == end && !stringFinished)
394         scanner->scanPos++;
395       else
396         break;
397     }
398     // If we reached end of input while reading, set it as the end of token
399     if(scanner->scanPos == scanner->length && start == end)
400       end = scanner->length;
401   }
402 
403   if(end-start > 0 || stringFinished)
404   {
405     U_SetString(&(nextState->string), scanner->data+start, end-start);
406     if(nextState->token == TK_FloatConst)
407     {
408       nextState->decimal = atof(nextState->string);
409       nextState->number = (int) (nextState->decimal);
410       nextState->boolean = (nextState->number != 0);
411     }
412     else if(nextState->token == TK_IntConst)
413     {
414       nextState->number = strtol(nextState->string, NULL, integerBase);
415       nextState->decimal = nextState->number;
416       nextState->boolean = (nextState->number != 0);
417     }
418     else if(nextState->token == TK_Identifier)
419     {
420       // Identifiers should be case insensitive.
421       char *p = nextState->string;
422       while (*p)
423       {
424         *p = tolower(*p);
425         p++;
426       }
427       // Check for a boolean constant.
428       if(strcmp(nextState->string, "true") == 0)
429       {
430         nextState->token = TK_BoolConst;
431         nextState->boolean = TRUE;
432       }
433       else if (strcmp(nextState->string, "false") == 0)
434       {
435         nextState->token = TK_BoolConst;
436         nextState->boolean = FALSE;
437       }
438     }
439     else if(nextState->token == TK_StringConst)
440     {
441       U_Unescape(nextState->string);
442     }
443     if(expandState)
444       U_ExpandState(scanner);
445     return TRUE;
446   }
447   nextState->token = TK_NoToken;
448   if(expandState)
449     U_ExpandState(scanner);
450   return FALSE;
451 }
452 
453 /**
454  * Skips all Tokens in current line and parses the first token on
455  * the next line.
456  */
U_GetNextLineToken(u_scanner_t * scanner)457 dbool   U_GetNextLineToken(u_scanner_t* scanner)
458 {
459   unsigned int line = scanner->line;
460   dbool   retval = FALSE;
461 
462   do retval = U_GetNextToken(scanner, TRUE);
463   while (retval && scanner->line == line);
464 
465   return retval;
466 }
467 
468 
U_ErrorToken(u_scanner_t * s,int token)469 void U_ErrorToken(u_scanner_t* s, int token)
470 {
471   if (token < TK_NumSpecialTokens && s->token < TK_NumSpecialTokens)
472     U_Error(s, "Expected %s but got %s '%s' instead.", U_TokenNames[token], U_TokenNames[(int)s->token], s->string);
473   else if (token < TK_NumSpecialTokens && s->token >= TK_NumSpecialTokens)
474     U_Error(s, "Expected %s but got '%c' instead.", U_TokenNames[token], s->token);
475   else if (token >= TK_NumSpecialTokens && s->token < TK_NumSpecialTokens)
476     U_Error(s, "Expected '%c' but got %s '%s' instead.", token, U_TokenNames[(int)s->token], s->string);
477   else
478     U_Error(s, "Expected '%c' but got '%c' instead.", token, s->token);
479 }
480 
U_ErrorString(u_scanner_t * s,const char * mustget)481 void U_ErrorString(u_scanner_t* s, const char *mustget)
482 {
483   if (s->token < TK_NumSpecialTokens)
484     U_Error(s, "Expected '%s' but got %s '%s' instead.", mustget, U_TokenNames[(int)s->token], s->string);
485   else
486     U_Error(s, "Expected '%s' but got '%c' instead.", mustget, s->token);
487 }
488 
U_Error(u_scanner_t * s,const char * msg,...)489 void U_Error(u_scanner_t* s, const char *msg, ...)
490 {
491   char buffer[1024];
492   va_list ap;
493   va_start(ap, msg);
494   vsnprintf(buffer, 1024, msg, ap);
495   va_end(ap);
496   I_Error("%s:%d:%d:%s.", s->name, s->tokenLine, s->tokenLinePosition, buffer);
497 }
498 
U_MustGetToken(u_scanner_t * s,char token)499 dbool   U_MustGetToken(u_scanner_t* s, char token)
500 {
501   if(!U_CheckToken(s, token))
502   {
503     U_ExpandState(s);
504     U_ErrorToken(s, token);
505     return FALSE;
506   }
507   return TRUE;
508 }
509 
U_MustGetIdentifier(u_scanner_t * s,const char * ident)510 dbool   U_MustGetIdentifier(u_scanner_t* s, const char *ident)
511 {
512   if (!U_CheckToken(s, TK_Identifier) || strcasecmp(s->string, ident))
513   {
514     U_ErrorString(s, ident);
515     return FALSE;
516   }
517   return TRUE;
518 }
519 
520 // Convenience helpers that parse an entire number including a leading minus or plus sign
U_ScanInteger(u_scanner_t * s)521 dbool   U_ScanInteger(u_scanner_t* s)
522 {
523   dbool   neg = FALSE;
524   if (!U_GetNextToken(s, TRUE))
525   {
526     return FALSE;
527   }
528   if (s->token == '-')
529   {
530    if (!U_GetNextToken(s, TRUE))
531     {
532       return FALSE;
533     }
534     neg = TRUE;
535   }
536   else if (s->token == '+')
537   {
538    if (!U_GetNextToken(s, TRUE))
539    {
540      return FALSE;
541    }
542   }
543   if (s->token != TK_IntConst)
544   {
545     return FALSE;
546   }
547   if (neg)
548   {
549     s->number = -(s->number);
550     s->decimal = -(s->decimal);
551   }
552   return TRUE;
553 }
554 
U_ScanFloat(u_scanner_t * s)555 dbool   U_ScanFloat(u_scanner_t* s)
556 {
557   dbool   neg = FALSE;
558   if (!U_GetNextToken(s, TRUE))
559   {
560     return FALSE;
561   }
562   if (s->token == '-')
563   {
564     if (!U_GetNextToken(s, TRUE))
565     {
566       return FALSE;
567     }
568     neg = TRUE;
569   }
570   else if (s->token == '+')
571   {
572     if (!U_GetNextToken(s, TRUE))
573     {
574       return FALSE;
575     }
576   }
577   if (s->token != TK_IntConst && s->token != TK_FloatConst)
578   {
579     return FALSE;
580   }
581   if (neg)
582   {
583     s->number = -(s->number);
584     s->decimal = -(s->decimal);
585   }
586   return TRUE;
587 }
588 
U_CheckInteger(u_scanner_t * s)589 dbool   U_CheckInteger(u_scanner_t* s)
590 {
591   dbool   res;
592   u_scanner_t savedstate = {0};
593   U_SaveState(s, savedstate);
594   res = U_ScanInteger(s);
595   if (!res)
596      U_RestoreState(s, savedstate);
597   return res;
598 }
599 
U_CheckFloat(u_scanner_t * s)600 dbool   U_CheckFloat(u_scanner_t* s)
601 {
602   dbool   res;
603   u_scanner_t savedstate = {0};
604   U_SaveState(s, savedstate);
605   res = U_ScanFloat(s);
606   if (!res)
607      U_RestoreState(s, savedstate);
608   return res;
609 }
610 
U_MustGetInteger(u_scanner_t * s)611 dbool   U_MustGetInteger(u_scanner_t* s)
612 {
613   if (!U_ScanInteger(s))
614   {
615     U_ErrorToken(s, TK_IntConst);
616     return FALSE;
617   }
618   return TRUE;
619 }
620 
U_MustGetFloat(u_scanner_t * s)621 dbool   U_MustGetFloat(u_scanner_t* s)
622 {
623   if (!U_ScanFloat(s))
624   {
625     U_ErrorToken(s, TK_FloatConst);
626     return FALSE;
627   }
628   return TRUE;
629 }
630 
631 
U_HasTokensLeft(u_scanner_t * s)632 dbool   U_HasTokensLeft(u_scanner_t* s)
633 {
634   return (s->scanPos < s->length);
635 }
636 
637 // This is taken from ZDoom's strbin function which can do a lot more than just unescaping backslashes and quotation marks.
U_Unescape(char * str)638 void U_Unescape(char *str)
639 {
640   char *p = str, c;
641   int i;
642 
643   while ((c = *p++)) {
644     if (c != '\\') {
645       *str++ = c;
646     }
647     else {
648       switch (*p) {
649       case 'a':
650         *str++ = '\a';
651         break;
652       case 'b':
653         *str++ = '\b';
654         break;
655       case 'f':
656         *str++ = '\f';
657         break;
658       case 'n':
659         *str++ = '\n';
660         break;
661       case 't':
662         *str++ = '\t';
663         break;
664       case 'r':
665         *str++ = '\r';
666         break;
667       case 'v':
668         *str++ = '\v';
669         break;
670       case '?':
671         *str++ = '\?';
672         break;
673       case '\n':
674         break;
675       case 'x':
676       case 'X':
677         c = 0;
678         for (i = 0; i < 2; i++)
679         {
680           p++;
681           if (*p >= '0' && *p <= '9')
682             c = (c << 4) + *p - '0';
683           else if (*p >= 'a' && *p <= 'f')
684             c = (c << 4) + 10 + *p - 'a';
685           else if (*p >= 'A' && *p <= 'F')
686             c = (c << 4) + 10 + *p - 'A';
687           else
688           {
689             p--;
690             break;
691           }
692         }
693         *str++ = c;
694         break;
695       case '0':
696       case '1':
697       case '2':
698       case '3':
699       case '4':
700       case '5':
701       case '6':
702       case '7':
703         c = *p - '0';
704         for (i = 0; i < 2; i++)
705         {
706           p++;
707           if (*p >= '0' && *p <= '7')
708             c = (c << 3) + *p - '0';
709           else
710           {
711             p--;
712             break;
713           }
714         }
715         *str++ = c;
716         break;
717       default:
718         *str++ = *p;
719         break;
720       }
721       p++;
722     }
723   }
724   *str = 0;
725 }
726 
U_SetString(char ** ptr,const char * start,int length)727 void U_SetString(char **ptr, const char *start, int length)
728 {
729   if (length == -1)
730     length = strlen(start);
731   if (*ptr != NULL) free(*ptr);
732   *ptr = (char*)malloc(length + 1);
733   memcpy(*ptr, start, length);
734   (*ptr)[length] = '\0';
735 }
736