1 /* ====================================================================
2  * The Kannel Software License, Version 1.0
3  *
4  * Copyright (c) 2001-2014 Kannel Group
5  * Copyright (c) 1998-2001 WapIT Ltd.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  *
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in
17  *    the documentation and/or other materials provided with the
18  *    distribution.
19  *
20  * 3. The end-user documentation included with the redistribution,
21  *    if any, must include the following acknowledgment:
22  *       "This product includes software developed by the
23  *        Kannel Group (http://www.kannel.org/)."
24  *    Alternately, this acknowledgment may appear in the software itself,
25  *    if and wherever such third-party acknowledgments normally appear.
26  *
27  * 4. The names "Kannel" and "Kannel Group" must not be used to
28  *    endorse or promote products derived from this software without
29  *    prior written permission. For written permission, please
30  *    contact org@kannel.org.
31  *
32  * 5. Products derived from this software may not be called "Kannel",
33  *    nor may "Kannel" appear in their name, without prior written
34  *    permission of the Kannel Group.
35  *
36  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39  * DISCLAIMED.  IN NO EVENT SHALL THE KANNEL GROUP OR ITS CONTRIBUTORS
40  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
41  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
42  * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
43  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
44  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
45  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
46  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
47  * ====================================================================
48  *
49  * This software consists of voluntary contributions made by many
50  * individuals on behalf of the Kannel Group.  For more information on
51  * the Kannel Group, please see <http://www.kannel.org/>.
52  *
53  * Portions of this software are based upon software originally written at
54  * WapIT Ltd., Helsinki, Finland for the Kannel project.
55  */
56 
57 /*
58  *
59  * wslexer.c
60  *
61  * Author: Markku Rossi <mtr@iki.fi>
62  *
63  * Copyright (c) 1999-2000 WAPIT OY LTD.
64  *		 All rights reserved.
65  *
66  * Lexical analyzer.
67  *
68  */
69 
70 #include "wsint.h"
71 #include "wsstree.h"
72 #include "wsgram.h"
73 
74 /********************* Types and definitions ****************************/
75 
76 /* A predicate to check whether the character `ch' is a decimal
77    digit. */
78 #define WS_IS_DECIMAL_DIGIT(ch) ('0' <= (ch) && (ch) <= '9')
79 
80 /* Convert the decimal digit `ch' to an integer number. */
81 #define WS_DECIMAL_TO_INT(ch) ((ch) - '0')
82 
83 /* A predicate to check whether the character `ch' is a non-zero
84    decimal digit. */
85 #define WS_IS_NON_ZERO_DIGIT(ch) ('1' <= (ch) && (ch) <= '9')
86 
87 /* A predicate to check whether the character `ch' is an octal digit. */
88 #define WS_IS_OCTAL_DIGIT(ch) ('0' <= (ch) && (ch) <= '7')
89 
90 /* Convert the octal digit `ch' to an integer number. */
91 #define WS_OCTAL_TO_INT(ch) ((ch) - '0')
92 
93 /* A predicate to check whether the character `ch' is a hex digit. */
94 #define WS_IS_HEX_DIGIT(ch) (('0' <= (ch) && (ch) <= '9')	\
95                              || ('a' <= (ch) && (ch) <= 'f')	\
96                              || ('A' <= (ch) && (ch) <= 'F'))
97 
98 /* Convert the hex digit `ch' to an integer number. */
99 #define WS_HEX_TO_INT(ch)		\
100     ('0' <= (ch) && (ch) <= '9'		\
101      ? ((ch) - '0')			\
102      : ('a' <= (ch) && (ch) <= 'f'	\
103        ? ((ch) - 'a' + 10)		\
104        : (ch) - 'A' + 10))
105 
106 /* A predicate to check whether the character `ch' is an identifier
107    starter letter. */
108 #define WS_IS_IDENTIFIER_LETTER(ch)	\
109     (('a' <= (ch) && (ch) <= 'z')		\
110      || ('A' <= (ch) && (ch) <= 'Z')	\
111      || (ch) == '_')
112 
113 /********************* Prototypes for static functions ******************/
114 
115 /* Check whether the identifier `id', `len' is a keyword.  If the
116    identifier is a keyword, the function returns WS_TRUE and sets the
117    keywords token ID to `token_return'.  Otherwise the function
118    returns WS_FALSE. */
119 static WsBool lookup_keyword(char *id, size_t len, int *token_return);
120 
121 /* Convert literal integer number, stored to the buffer `buffer', into
122    a 32 bit integer number.  The function will report possible integer
123    overflows to the compiler `compiler'.  The function modifies the
124    contents of the buffer `buffer' but it does not free it. */
125 static WsUInt32 buffer_to_int(WsCompilerPtr compiler, WsBuffer *buffer);
126 
127 /* Read a floating point number from the decimal point to the buffer
128    `buffer'.  The buffer `buffer' might already contain some leading
129    digits of the number and it always contains the decimal point.  If
130    the operation is successful, the function returns WS_TRUE and it
131    returns the resulting floating point number in `result'.  Otherwise
132    the function returns WS_FALSE.  The buffer `buffer' must be
133    initialized before this function is called and it must be
134    uninitialized by the caller. */
135 static WsBool read_float_from_point(WsCompiler *compiler, WsBuffer *buffer,
136                                     WsFloat *result);
137 
138 /* Read a floating point number from the exponent part to the buffer
139    `buffer'.  The buffer might already contain some leading digits and
140    fields of the floating poit number.  Otherwise, the function works
141    like read_float_from_point(). */
142 static WsBool read_float_from_exp(WsCompiler *compiler, WsBuffer *buffer,
143                                   WsFloat *result);
144 
145 /********************* Static variables *********************************/
146 
147 /* A helper macro which expands to a strings and its length excluding
148    the trailing '\0' character. */
149 #define N(n) n, sizeof(n) - 1
150 
151 /* They keywords of the WMLScript language.  This array must be sorted
152    by the keyword names. */
153 static struct
154 {
155     char *name;
156     size_t name_len;
157     int token;
158 } keywords[] = {
159         {N("access"), tACCESS},
160         {N("agent"), tAGENT},
161         {N("break"), tBREAK},
162         {N("case"), tCASE},
163         {N("catch"), tCATCH},
164         {N("class"), tCLASS},
165         {N("const"), tCONST},
166         {N("continue"), tCONTINUE},
167         {N("debugger"), tDEBUGGER},
168         {N("default"), tDEFAULT},
169         {N("delete"), tDELETE},
170         {N("div"), tIDIV},
171         {N("do"), tDO},
172         {N("domain"), tDOMAIN},
173         {N("else"), tELSE},
174         {N("enum"), tENUM},
175         {N("equiv"), tEQUIV},
176         {N("export"), tEXPORT},
177         {N("extends"), tEXTENDS},
178         {N("extern"), tEXTERN},
179         {N("false"), tFALSE},
180         {N("finally"), tFINALLY},
181         {N("for"), tFOR},
182         {N("function"), tFUNCTION},
183         {N("header"), tHEADER},
184         {N("http"), tHTTP},
185         {N("if"), tIF},
186         {N("import"), tIMPORT},
187         {N("in"), tIN},
188         {N("invalid"), tINVALID},
189         {N("isvalid"), tISVALID},
190         {N("lib"), tLIB},
191         {N("meta"), tMETA},
192         {N("name"), tNAME},
193         {N("new"), tNEW},
194         {N("null"), tNULL},
195         {N("path"), tPATH},
196         {N("private"), tPRIVATE},
197         {N("public"), tPUBLIC},
198         {N("return"), tRETURN},
199         {N("sizeof"), tSIZEOF},
200         {N("struct"), tSTRUCT},
201         {N("super"), tSUPER},
202         {N("switch"), tSWITCH},
203         {N("this"), tTHIS},
204         {N("throw"), tTHROW},
205         {N("true"), tTRUE},
206         {N("try"), tTRY},
207         {N("typeof"), tTYPEOF},
208         {N("url"), tURL},
209         {N("use"), tUSE},
210         {N("user"), tUSER},
211         {N("var"), tVAR},
212         {N("void"), tVOID},
213         {N("while"), tWHILE},
214         {N("with"), tWITH},
215 };
216 
217 static int num_keywords = sizeof(keywords) / sizeof(keywords[0]);
218 
219 /********************* Global functions *********************************/
220 
ws_yy_lex(YYSTYPE * yylval,YYLTYPE * yylloc,void * context)221 int ws_yy_lex(YYSTYPE *yylval, YYLTYPE *yylloc, void *context)
222 {
223     WsCompiler *compiler = (WsCompiler *) context;
224     WsUInt32 ch, ch2;
225     WsBuffer buffer;
226     unsigned char *p;
227     WsBool success;
228 
229     /* Just check that we get the correct amount of arguments. */
230     gw_assert(compiler->magic == COMPILER_MAGIC);
231 
232     while (ws_stream_getc(compiler->input, &ch)) {
233         /* Save the token's line number. */
234         yylloc->first_line = compiler->linenum;
235 
236         switch (ch) {
237         case '\t': 		/* Whitespace characters. */
238         case '\v':
239         case '\f':
240         case ' ':
241             continue;
242 
243         case '\n': 		/* Line terminators. */
244         case '\r':
245             if (ch == '\r' && ws_stream_getc(compiler->input, &ch2)) {
246                 if (ch2 != '\n')
247                     ws_stream_ungetc(compiler->input, ch2);
248             }
249             compiler->linenum++;
250             continue;
251 
252         case '!': 		/* !, != */
253             if (ws_stream_getc(compiler->input, &ch2)) {
254                 if (ch2 == '=')
255                     return tNE;
256 
257                 ws_stream_ungetc(compiler->input, ch2);
258             }
259             return '!';
260 
261         case '%': 		/* %, %= */
262             if (ws_stream_getc(compiler->input, &ch2)) {
263                 if (ch2 == '=')
264                     return tREMA;
265 
266                 ws_stream_ungetc(compiler->input, ch2);
267             }
268             return '%';
269 
270         case '&': 		/* &, &&, &= */
271             if (ws_stream_getc(compiler->input, &ch2)) {
272                 if (ch2 == '&')
273                     return tAND;
274                 if (ch2 == '=')
275                     return tANDA;
276 
277                 ws_stream_ungetc(compiler->input, ch2);
278             }
279             return '&';
280 
281         case '*': 		/* *, *= */
282             if (ws_stream_getc(compiler->input, &ch2)) {
283                 if (ch2 == '=')
284                     return tMULA;
285 
286                 ws_stream_ungetc(compiler->input, ch2);
287             }
288             return '*';
289 
290         case '+': 		/* +, ++, += */
291             if (ws_stream_getc(compiler->input, &ch2)) {
292                 if (ch2 == '+')
293                     return tPLUSPLUS;
294                 if (ch2 == '=')
295                     return tADDA;
296 
297                 ws_stream_ungetc(compiler->input, ch2);
298             }
299             return '+';
300 
301         case '-': 		/* -, --, -= */
302             if (ws_stream_getc(compiler->input, &ch2)) {
303                 if (ch2 == '-')
304                     return tMINUSMINUS;
305                 if (ch2 == '=')
306                     return tSUBA;
307 
308                 ws_stream_ungetc(compiler->input, ch2);
309             }
310             return '-';
311 
312         case '.':
313             if (ws_stream_getc(compiler->input, &ch2)) {
314                 if (WS_IS_DECIMAL_DIGIT(ch2)) {
315                     /* DecimalFloatLiteral. */
316                     ws_buffer_init(&buffer);
317 
318                     if (!ws_buffer_append_space(&buffer, &p, 2)) {
319                         ws_error_memory(compiler);
320                         ws_buffer_uninit(&buffer);
321                         return EOF;
322                     }
323 
324                     p[0] = '.';
325                     p[1] = (unsigned char) ch2;
326 
327                     success = read_float_from_point(compiler, &buffer,
328                                                     &yylval->vfloat);
329                     ws_buffer_uninit(&buffer);
330 
331                     if (!success)
332                         return EOF;
333 
334                     return tFLOAT;
335                 }
336 
337                 ws_stream_ungetc(compiler->input, ch2);
338             }
339             return '.';
340 
341         case '/': 		/* /, /=, block or a single line comment */
342             if (ws_stream_getc(compiler->input, &ch2)) {
343                 if (ch2 == '*') {
344                     /* Block comment. */
345                     while (1) {
346                         if (!ws_stream_getc(compiler->input, &ch)) {
347                             ws_src_error(compiler, 0, "EOF in comment");
348                             return EOF;
349                         }
350 
351                         if (ch == '\n' || ch == '\r') {
352                             /* Line terminators. */
353                             if (ch == '\r' && ws_stream_getc(compiler->input,
354                                                              &ch2)) {
355                                 if (ch2 != '\n')
356                                     ws_stream_ungetc(compiler->input, ch2);
357                             }
358                             compiler->linenum++;
359 
360                             /* Continue reading the block comment. */
361                             continue;
362                         }
363 
364                         if (ch == '*' && ws_stream_getc(compiler->input, &ch2)) {
365                             if (ch2 == '/')
366                                 /* The end of the comment found. */
367                                 break;
368                             ws_stream_ungetc(compiler->input, ch2);
369                         }
370                     }
371                     /* Continue after the comment. */
372                     continue;
373                 }
374                 if (ch2 == '/') {
375                     /* Single line comment. */
376                     while (1) {
377                         if (!ws_stream_getc(compiler->input, &ch))
378                             /* The end of input stream reached.  We accept
379                                this as a valid comment terminator. */
380                             break;
381 
382                         if (ch == '\n' || ch == '\r') {
383                             /* Line terminators. */
384                             if (ch == '\r' && ws_stream_getc(compiler->input,
385                                                              &ch2)) {
386                                 if (ch2 != '\n')
387                                     ws_stream_ungetc(compiler->input, ch2);
388                             }
389                             /* The end of the line (and the comment)
390                                                     reached. */
391                             compiler->linenum++;
392                             break;
393                         }
394                     }
395                     /* Continue after the comment. */
396                     continue;
397                 }
398                 if (ch2 == '=')
399                     return tDIVA;
400 
401                 ws_stream_ungetc(compiler->input, ch2);
402             }
403             return '/';
404 
405         case '<': 		/* <, <<, <<=, <= */
406             if (ws_stream_getc(compiler->input, &ch2)) {
407                 if (ch2 == '<') {
408                     if (ws_stream_getc(compiler->input, &ch2)) {
409                         if (ch2 == '=')
410                             return tLSHIFTA;
411 
412                         ws_stream_ungetc(compiler->input, ch2);
413                     }
414                     return tLSHIFT;
415                 }
416                 if (ch2 == '=')
417                     return tLE;
418 
419                 ws_stream_ungetc(compiler->input, ch2);
420             }
421             return '<';
422 
423         case '=': 		/* =, == */
424             if (ws_stream_getc(compiler->input, &ch2)) {
425                 if (ch2 == '=')
426                     return tEQ;
427 
428                 ws_stream_ungetc(compiler->input, ch2);
429             }
430             return '=';
431 
432         case '>': 		/* >, >=, >>, >>=, >>>, >>>= */
433             if (ws_stream_getc(compiler->input, &ch2)) {
434                 if (ch2 == '>') {
435                     if (ws_stream_getc(compiler->input, &ch2)) {
436                         if (ch2 == '>') {
437                             if (ws_stream_getc(compiler->input, &ch2)) {
438                                 if (ch2 == '=')
439                                     return tRSZSHIFTA;
440 
441                                 ws_stream_ungetc(compiler->input, ch2);
442                             }
443                             return tRSZSHIFT;
444                         }
445                         if (ch2 == '=')
446                             return tRSSHIFTA;
447 
448                         ws_stream_ungetc(compiler->input, ch2);
449                     }
450                     return tRSSHIFT;
451                 }
452                 if (ch2 == '=')
453                     return tGE;
454 
455                 ws_stream_ungetc(compiler->input, ch2);
456             }
457             return '>';
458 
459         case '^': 		/* ^, ^= */
460             if (ws_stream_getc(compiler->input, &ch2)) {
461                 if (ch2 == '=')
462                     return tXORA;
463 
464                 ws_stream_ungetc(compiler->input, ch2);
465             }
466             return '^';
467 
468         case '|': 		/* |, |=, || */
469             if (ws_stream_getc(compiler->input, &ch2)) {
470                 if (ch2 == '=')
471                     return tORA;
472                 if (ch2 == '|')
473                     return tOR;
474 
475                 ws_stream_ungetc(compiler->input, ch2);
476             }
477             return '|';
478 
479         case '#': 		/* The simple cases. */
480         case '(':
481         case ')':
482         case ',':
483         case ':':
484         case ';':
485         case '?':
486         case '{':
487         case '}':
488         case '~':
489             return (int) ch;
490 
491         case '\'': 		/* String literals. */
492         case '"':
493             {
494                 WsUInt32 string_end_ch = ch;
495                 WsUtf8String *str = ws_utf8_alloc();
496 
497                 if (str == NULL) {
498                     ws_error_memory(compiler);
499                     return EOF;
500                 }
501 
502                 while (1) {
503                     if (!ws_stream_getc(compiler->input, &ch)) {
504 eof_in_string_literal:
505                         ws_src_error(compiler, 0, "EOF in string literal");
506                         ws_utf8_free(str);
507                         return EOF;
508                     }
509                     if (ch == string_end_ch)
510                         /* The end of string reached. */
511                         break;
512 
513                     if (ch == '\\') {
514                         /* An escape sequence. */
515                         if (!ws_stream_getc(compiler->input, &ch))
516                             goto eof_in_string_literal;
517 
518                         switch (ch) {
519                         case '\'':
520                         case '"':
521                         case '\\':
522                         case '/':
523                             /* The character as-is. */
524                             break;
525 
526                         case 'b':
527                             ch = '\b';
528                             break;
529 
530                         case 'f':
531                             ch = '\f';
532                             break;
533 
534                         case 'n':
535                             ch = '\n';
536                             break;
537 
538                         case 'r':
539                             ch = '\r';
540                             break;
541 
542                         case 't':
543                             ch = '\t';
544                             break;
545 
546                         case 'x':
547                         case 'u':
548                             {
549                                 int i, len;
550                                 int type = ch;
551 
552                                 if (ch == 'x')
553                                     len = 2;
554                                 else
555                                     len = 4;
556 
557                                 ch = 0;
558                                 for (i = 0; i < len; i++) {
559                                     if (!ws_stream_getc(compiler->input, &ch2))
560                                         goto eof_in_string_literal;
561                                     if (!WS_IS_HEX_DIGIT(ch2)) {
562                                         ws_src_error(compiler, 0,
563                                                      "malformed `\\%c' escape in "
564                                                      "string literal", (char) type);
565                                         ch = 0;
566                                         break;
567                                     }
568                                     ch *= 16;
569                                     ch += WS_HEX_TO_INT(ch2);
570                                 }
571                             }
572                             break;
573 
574                         default:
575                             if (WS_IS_OCTAL_DIGIT(ch)) {
576                                 int i;
577                                 int limit = 3;
578 
579                                 ch = WS_OCTAL_TO_INT(ch);
580                                 if (ch > 3)
581                                     limit = 2;
582 
583                                 for (i = 1; i < limit; i++) {
584                                     if (!ws_stream_getc(compiler->input, &ch2))
585                                         goto eof_in_string_literal;
586                                     if (!WS_IS_OCTAL_DIGIT(ch2)) {
587                                         ws_stream_ungetc(compiler->input, ch2);
588                                         break;
589                                     }
590 
591                                     ch *= 8;
592                                     ch += WS_OCTAL_TO_INT(ch2);
593                                 }
594                             } else {
595                                 ws_src_error(compiler, 0,
596                                              "unknown escape sequence `\\%c' in "
597                                              "string literal", (char) ch);
598                                 ch = 0;
599                             }
600                             break;
601                         }
602                         /* FALLTHROUGH */
603                     }
604 
605                     if (!ws_utf8_append_char(str, ch)) {
606                         ws_error_memory(compiler);
607                         ws_utf8_free(str);
608                         return EOF;
609                     }
610                 }
611 
612                 if (!ws_lexer_register_utf8(compiler, str)) {
613                     ws_error_memory(compiler);
614                     ws_utf8_free(str);
615                     return EOF;
616                 }
617 
618                 gw_assert(str != NULL);
619                 yylval->string = str;
620 
621                 return tSTRING;
622             }
623             break;
624 
625         default:
626             /* Identifiers, keywords and number constants. */
627 
628             if (WS_IS_IDENTIFIER_LETTER(ch)) {
629                 WsBool got;
630                 int token;
631                 unsigned char *p;
632                 unsigned char *np;
633                 size_t len = 0;
634 
635                 /* An identifier or a keyword.  We start with a 256
636                  * bytes long buffer but it is expanded dynamically if
637                  * needed.  However, 256 should be enought for most
638                  * cases since the byte-code format limits the function
639                  * names to 255 characters. */
640                 p = ws_malloc(256);
641                 if (p == NULL) {
642                     ws_error_memory(compiler);
643                     return EOF;
644                 }
645 
646                 do {
647                     /* Add one extra for the possible terminator
648                        character. */
649                     np = ws_realloc(p, len + 2);
650                     if (np == NULL) {
651                         ws_error_memory(compiler);
652                         ws_free(p);
653                         return EOF;
654                     }
655 
656                     p = np;
657 
658                     /* This is ok since the only valid identifier names
659                      * can be written in 7 bit ASCII. */
660                     p[len++] = (unsigned char) ch;
661                 } while ((got = ws_stream_getc(compiler->input, &ch))
662                          && (WS_IS_IDENTIFIER_LETTER(ch)
663                              || WS_IS_DECIMAL_DIGIT(ch)));
664 
665                 if (got)
666                     /* Put back the terminator character. */
667                     ws_stream_ungetc(compiler->input, ch);
668 
669                 /* Is it a keyword? */
670                 if (lookup_keyword((char *) p, len, &token)) {
671                     /* Yes it is... */
672                     ws_free(p);
673 
674                     /* ...except one case: `div='. */
675                     if (token == tIDIV) {
676                         if (ws_stream_getc(compiler->input, &ch)) {
677                             if (ch == '=')
678                                 return tIDIVA;
679 
680                             ws_stream_ungetc(compiler->input, ch);
681                         }
682                     }
683 
684                     /* Return the token value. */
685                     return token;
686                 }
687 
688                 /* It is a normal identifier.  Let's pad the name with a
689                           null-character.  We have already allocated space for
690                           it. */
691                 p[len] = '\0';
692 
693                 if (!ws_lexer_register_block(compiler, p)) {
694                     ws_error_memory(compiler);
695                     ws_free(p);
696                     return EOF;
697                 }
698 
699                 gw_assert(p != NULL);
700                 yylval->identifier = (char *) p;
701 
702                 return tIDENTIFIER;
703             }
704 
705             if (WS_IS_NON_ZERO_DIGIT(ch)) {
706                 /* A decimal integer literal or a decimal float
707                           literal. */
708 
709                 ws_buffer_init(&buffer);
710                 if (!ws_buffer_append_space(&buffer, &p, 1)) {
711 number_error_memory:
712                     ws_error_memory(compiler);
713                     ws_buffer_uninit(&buffer);
714                     return EOF;
715                 }
716                 p[0] = ch;
717 
718                 while (ws_stream_getc(compiler->input, &ch)) {
719                     if (WS_IS_DECIMAL_DIGIT(ch)) {
720                         if (!ws_buffer_append_space(&buffer, &p, 1))
721                             goto number_error_memory;
722                         p[0] = ch;
723                     } else if (ch == '.' || ch == 'e' || ch == 'E') {
724                         /* DecimalFloatLiteral. */
725                         if (ch == '.') {
726                             if (!ws_buffer_append_space(&buffer, &p, 1))
727                                 goto number_error_memory;
728                             p[0] = '.';
729 
730                             success = read_float_from_point(compiler, &buffer,
731                                                             &yylval->vfloat);
732                         } else {
733                             ws_stream_ungetc(compiler->input, ch);
734 
735                             success = read_float_from_exp(compiler, &buffer,
736                                                           &yylval->vfloat);
737                         }
738                         ws_buffer_uninit(&buffer);
739 
740                         if (!success)
741                             return EOF;
742 
743                         return tFLOAT;
744                     } else {
745                         ws_stream_ungetc(compiler->input, ch);
746                         break;
747                     }
748                 }
749 
750                 /* Now the buffer contains an integer number as a
751                           string.  Let's convert it to an integer number. */
752                 yylval->integer = buffer_to_int(compiler, &buffer);
753                 ws_buffer_uninit(&buffer);
754 
755                 /* Read a DecimalIntegerLiteral. */
756                 return tINTEGER;
757             }
758 
759             if (ch == '0') {
760                 /* The integer constant 0, an octal number or a
761                    HexIntegerLiteral. */
762                 if (ws_stream_getc(compiler->input, &ch2)) {
763                     if (ch2 == 'x' || ch2 == 'X') {
764                         /* HexIntegerLiteral. */
765 
766                         ws_buffer_init(&buffer);
767                         if (!ws_buffer_append_space(&buffer, &p, 2))
768                             goto number_error_memory;
769 
770                         p[0] = '0';
771                         p[1] = 'x';
772 
773                         while (ws_stream_getc(compiler->input, &ch)) {
774                             if (WS_IS_HEX_DIGIT(ch)) {
775                                 if (!ws_buffer_append_space(&buffer, &p, 1))
776                                     goto number_error_memory;
777                                 p[0] = ch;
778                             } else {
779                                 ws_stream_ungetc(compiler->input, ch);
780                                 break;
781                             }
782                         }
783 
784                         if (ws_buffer_len(&buffer) == 2) {
785                             ws_buffer_uninit(&buffer);
786                             ws_src_error(compiler, 0,
787                                          "numeric constant with no digits");
788                             yylval->integer = 0;
789                             return tINTEGER;
790                         }
791 
792                         /* Now the buffer contains an integer number as
793                          * a string.  Let's convert it to an integer
794                          * number. */
795                         yylval->integer = buffer_to_int(compiler, &buffer);
796                         ws_buffer_uninit(&buffer);
797 
798                         /* Read a HexIntegerLiteral. */
799                         return tINTEGER;
800                     }
801                     if (WS_IS_OCTAL_DIGIT(ch2)) {
802                         /* OctalIntegerLiteral. */
803 
804                         ws_buffer_init(&buffer);
805                         if (!ws_buffer_append_space(&buffer, &p, 2))
806                             goto number_error_memory;
807 
808                         p[0] = '0';
809                         p[1] = ch2;
810 
811                         while (ws_stream_getc(compiler->input, &ch)) {
812                             if (WS_IS_OCTAL_DIGIT(ch)) {
813                                 if (!ws_buffer_append_space(&buffer, &p, 1))
814                                     goto number_error_memory;
815                                 p[0] = ch;
816                             } else {
817                                 ws_stream_ungetc(compiler->input, ch);
818                                 break;
819                             }
820                         }
821 
822                         /* Convert the buffer into an intger number. */
823                         yylval->integer = buffer_to_int(compiler, &buffer);
824                         ws_buffer_uninit(&buffer);
825 
826                         /* Read an OctalIntegerLiteral. */
827                         return tINTEGER;
828                     }
829                     if (ch2 == '.' || ch2 == 'e' || ch2 == 'E') {
830                         /* DecimalFloatLiteral. */
831                         ws_buffer_init(&buffer);
832 
833                         if (ch2 == '.') {
834                             if (!ws_buffer_append_space(&buffer, &p, 1))
835                                 goto number_error_memory;
836                             p[0] = '.';
837 
838                             success = read_float_from_point(compiler, &buffer,
839                                                             &yylval->vfloat);
840                         } else {
841                             ws_stream_ungetc(compiler->input, ch);
842 
843                             success = read_float_from_exp(compiler, &buffer,
844                                                           &yylval->vfloat);
845                         }
846                         ws_buffer_uninit(&buffer);
847 
848                         if (!success)
849                             return EOF;
850 
851                         return tFLOAT;
852                     }
853 
854                     ws_stream_ungetc(compiler->input, ch2);
855                 }
856 
857                 /* Integer literal 0. */
858                 yylval->integer = 0;
859                 return tINTEGER;
860             }
861 
862             /* Garbage found from the input stream. */
863             ws_src_error(compiler, 0,
864                          "garbage found from the input stream: character=0x%x",
865                          ch);
866             return EOF;
867             break;
868         }
869     }
870 
871     return EOF;
872 }
873 
874 /********************* Static functions *********************************/
875 
lookup_keyword(char * id,size_t len,int * token_return)876 static WsBool lookup_keyword(char *id, size_t len, int *token_return)
877 {
878     int left = 0, center, right = num_keywords;
879 
880     while (left < right) {
881         size_t l;
882         int result;
883 
884         center = left + (right - left) / 2;
885 
886         l = keywords[center].name_len;
887         if (len < l)
888             l = len;
889 
890         result = memcmp(id, keywords[center].name, l);
891         if (result < 0 || (result == 0 && len < keywords[center].name_len))
892             /* The possible match is smaller. */
893             right = center;
894         else if (result > 0 || (result == 0 && len > keywords[center].name_len))
895             /* The possible match is bigger. */
896             left = center + 1;
897         else {
898             /* Found a match. */
899             *token_return = keywords[center].token;
900             return WS_TRUE;
901         }
902     }
903 
904     /* No match. */
905     return WS_FALSE;
906 }
907 
908 
buffer_to_int(WsCompilerPtr compiler,WsBuffer * buffer)909 static WsUInt32 buffer_to_int(WsCompilerPtr compiler, WsBuffer *buffer)
910 {
911     unsigned char *p;
912     unsigned long value;
913 
914     /* Terminate the string. */
915     if (!ws_buffer_append_space(buffer, &p, 1)) {
916         ws_error_memory(compiler);
917         return 0;
918     }
919     p[0] = '\0';
920 
921     /* Convert the buffer into an integer number.  The base is taken
922        from the bufer. */
923     errno = 0;
924     value = strtoul((char *) ws_buffer_ptr(buffer), NULL, 0);
925 
926     /* Check for overflow.  We accept WS_INT32_MAX + 1 because we might
927      * be parsing the numeric part of '-2147483648'. */
928     if (errno == ERANGE || value > (WsUInt32) WS_INT32_MAX + 1)
929         ws_src_error(compiler, 0, "integer literal too large");
930 
931     /* All done. */
932     return (WsUInt32) value;
933 }
934 
935 
read_float_from_point(WsCompiler * compiler,WsBuffer * buffer,WsFloat * result)936 static WsBool read_float_from_point(WsCompiler *compiler, WsBuffer *buffer,
937                                     WsFloat *result)
938 {
939     WsUInt32 ch;
940     unsigned char *p;
941 
942     while (ws_stream_getc(compiler->input, &ch)) {
943         if (WS_IS_DECIMAL_DIGIT(ch)) {
944             if (!ws_buffer_append_space(buffer, &p, 1)) {
945                 ws_error_memory(compiler);
946                 return WS_FALSE;
947             }
948             p[0] = (unsigned char) ch;
949         } else {
950             ws_stream_ungetc(compiler->input, ch);
951             break;
952         }
953     }
954 
955     return read_float_from_exp(compiler, buffer, result);
956 }
957 
958 
read_float_from_exp(WsCompiler * compiler,WsBuffer * buffer,WsFloat * result)959 static WsBool read_float_from_exp(WsCompiler *compiler, WsBuffer *buffer,
960                                   WsFloat *result)
961 {
962     WsUInt32 ch;
963     unsigned char *p;
964     int sign = '+';
965     unsigned char buf[4];
966 
967     /* Do we have an exponent part. */
968     if (!ws_stream_getc(compiler->input, &ch))
969         goto done;
970     if (ch != 'e' && ch != 'E') {
971         /* No exponent part. */
972         ws_stream_ungetc(compiler->input, ch);
973         goto done;
974     }
975 
976     /* Sign. */
977     if (!ws_stream_getc(compiler->input, &ch)) {
978         /* This is an error. */
979         ws_src_error(compiler, 0, "truncated float literal");
980         return WS_FALSE;
981     }
982     if (ch == '-')
983         sign = '-';
984     else if (ch == '+')
985         sign = '+';
986     else
987         ws_stream_ungetc(compiler->input, ch);
988 
989     /* DecimalDigits. */
990     if (!ws_stream_getc(compiler->input, &ch)) {
991         ws_src_error(compiler, 0, "truncated float literal");
992         return WS_FALSE;
993     }
994     if (!WS_IS_DECIMAL_DIGIT(ch)) {
995         ws_src_error(compiler, 0, "no decimal digits in exponent part");
996         return WS_FALSE;
997     }
998 
999     /* Append exponent part read so far. */
1000     if (!ws_buffer_append_space(buffer, &p, 2)) {
1001         ws_error_memory(compiler);
1002         return WS_FALSE;
1003     }
1004     p[0] = 'e';
1005     p[1] = sign;
1006 
1007     /* Read decimal digits. */
1008     while (WS_IS_DECIMAL_DIGIT(ch)) {
1009         if (!ws_buffer_append_space(buffer, &p, 1)) {
1010             ws_error_memory(compiler);
1011             return WS_FALSE;
1012         }
1013         p[0] = (unsigned char) ch;
1014 
1015         if (!ws_stream_getc(compiler->input, &ch))
1016             /* EOF.  This is ok. */
1017             goto done;
1018     }
1019     /* Unget the extra character. */
1020     ws_stream_ungetc(compiler->input, ch);
1021 
1022     /* FALLTHROUGH */
1023 
1024 done:
1025 
1026     if (!ws_buffer_append_space(buffer, &p, 1)) {
1027         ws_error_memory(compiler);
1028         return WS_FALSE;
1029     }
1030     p[0] = 0;
1031 
1032     /* Now the buffer contains a valid floating point number. */
1033     *result = (WsFloat) strtod((char *) ws_buffer_ptr(buffer), NULL);
1034 
1035     /* Check that the generated floating point number fits to
1036        `float32'. */
1037     if (*result == HUGE_VAL || *result == -HUGE_VAL
1038         || ws_ieee754_encode_single(*result, buf) != WS_IEEE754_OK)
1039         ws_src_error(compiler, 0, "floating point literal too large");
1040 
1041     return WS_TRUE;
1042 }
1043