1 /* ====================================================================
2 * The Kannel Software License, Version 1.0
3 *
4 * Copyright (c) 2001-2014 Kannel Group
5 * Copyright (c) 1998-2001 WapIT Ltd.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 *
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in
17 * the documentation and/or other materials provided with the
18 * distribution.
19 *
20 * 3. The end-user documentation included with the redistribution,
21 * if any, must include the following acknowledgment:
22 * "This product includes software developed by the
23 * Kannel Group (http://www.kannel.org/)."
24 * Alternately, this acknowledgment may appear in the software itself,
25 * if and wherever such third-party acknowledgments normally appear.
26 *
27 * 4. The names "Kannel" and "Kannel Group" must not be used to
28 * endorse or promote products derived from this software without
29 * prior written permission. For written permission, please
30 * contact org@kannel.org.
31 *
32 * 5. Products derived from this software may not be called "Kannel",
33 * nor may "Kannel" appear in their name, without prior written
34 * permission of the Kannel Group.
35 *
36 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39 * DISCLAIMED. IN NO EVENT SHALL THE KANNEL GROUP OR ITS CONTRIBUTORS
40 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
41 * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
42 * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
43 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
44 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
45 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
46 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
47 * ====================================================================
48 *
49 * This software consists of voluntary contributions made by many
50 * individuals on behalf of the Kannel Group. For more information on
51 * the Kannel Group, please see <http://www.kannel.org/>.
52 *
53 * Portions of this software are based upon software originally written at
54 * WapIT Ltd., Helsinki, Finland for the Kannel project.
55 */
56
57 /*
58 *
59 * wslexer.c
60 *
61 * Author: Markku Rossi <mtr@iki.fi>
62 *
63 * Copyright (c) 1999-2000 WAPIT OY LTD.
64 * All rights reserved.
65 *
66 * Lexical analyzer.
67 *
68 */
69
70 #include "wsint.h"
71 #include "wsstree.h"
72 #include "wsgram.h"
73
74 /********************* Types and definitions ****************************/
75
76 /* A predicate to check whether the character `ch' is a decimal
77 digit. */
78 #define WS_IS_DECIMAL_DIGIT(ch) ('0' <= (ch) && (ch) <= '9')
79
80 /* Convert the decimal digit `ch' to an integer number. */
81 #define WS_DECIMAL_TO_INT(ch) ((ch) - '0')
82
83 /* A predicate to check whether the character `ch' is a non-zero
84 decimal digit. */
85 #define WS_IS_NON_ZERO_DIGIT(ch) ('1' <= (ch) && (ch) <= '9')
86
87 /* A predicate to check whether the character `ch' is an octal digit. */
88 #define WS_IS_OCTAL_DIGIT(ch) ('0' <= (ch) && (ch) <= '7')
89
90 /* Convert the octal digit `ch' to an integer number. */
91 #define WS_OCTAL_TO_INT(ch) ((ch) - '0')
92
93 /* A predicate to check whether the character `ch' is a hex digit. */
94 #define WS_IS_HEX_DIGIT(ch) (('0' <= (ch) && (ch) <= '9') \
95 || ('a' <= (ch) && (ch) <= 'f') \
96 || ('A' <= (ch) && (ch) <= 'F'))
97
98 /* Convert the hex digit `ch' to an integer number. */
99 #define WS_HEX_TO_INT(ch) \
100 ('0' <= (ch) && (ch) <= '9' \
101 ? ((ch) - '0') \
102 : ('a' <= (ch) && (ch) <= 'f' \
103 ? ((ch) - 'a' + 10) \
104 : (ch) - 'A' + 10))
105
106 /* A predicate to check whether the character `ch' is an identifier
107 starter letter. */
108 #define WS_IS_IDENTIFIER_LETTER(ch) \
109 (('a' <= (ch) && (ch) <= 'z') \
110 || ('A' <= (ch) && (ch) <= 'Z') \
111 || (ch) == '_')
112
113 /********************* Prototypes for static functions ******************/
114
115 /* Check whether the identifier `id', `len' is a keyword. If the
116 identifier is a keyword, the function returns WS_TRUE and sets the
117 keywords token ID to `token_return'. Otherwise the function
118 returns WS_FALSE. */
119 static WsBool lookup_keyword(char *id, size_t len, int *token_return);
120
121 /* Convert literal integer number, stored to the buffer `buffer', into
122 a 32 bit integer number. The function will report possible integer
123 overflows to the compiler `compiler'. The function modifies the
124 contents of the buffer `buffer' but it does not free it. */
125 static WsUInt32 buffer_to_int(WsCompilerPtr compiler, WsBuffer *buffer);
126
127 /* Read a floating point number from the decimal point to the buffer
128 `buffer'. The buffer `buffer' might already contain some leading
129 digits of the number and it always contains the decimal point. If
130 the operation is successful, the function returns WS_TRUE and it
131 returns the resulting floating point number in `result'. Otherwise
132 the function returns WS_FALSE. The buffer `buffer' must be
133 initialized before this function is called and it must be
134 uninitialized by the caller. */
135 static WsBool read_float_from_point(WsCompiler *compiler, WsBuffer *buffer,
136 WsFloat *result);
137
138 /* Read a floating point number from the exponent part to the buffer
139 `buffer'. The buffer might already contain some leading digits and
140 fields of the floating poit number. Otherwise, the function works
141 like read_float_from_point(). */
142 static WsBool read_float_from_exp(WsCompiler *compiler, WsBuffer *buffer,
143 WsFloat *result);
144
145 /********************* Static variables *********************************/
146
147 /* A helper macro which expands to a strings and its length excluding
148 the trailing '\0' character. */
149 #define N(n) n, sizeof(n) - 1
150
151 /* They keywords of the WMLScript language. This array must be sorted
152 by the keyword names. */
153 static struct
154 {
155 char *name;
156 size_t name_len;
157 int token;
158 } keywords[] = {
159 {N("access"), tACCESS},
160 {N("agent"), tAGENT},
161 {N("break"), tBREAK},
162 {N("case"), tCASE},
163 {N("catch"), tCATCH},
164 {N("class"), tCLASS},
165 {N("const"), tCONST},
166 {N("continue"), tCONTINUE},
167 {N("debugger"), tDEBUGGER},
168 {N("default"), tDEFAULT},
169 {N("delete"), tDELETE},
170 {N("div"), tIDIV},
171 {N("do"), tDO},
172 {N("domain"), tDOMAIN},
173 {N("else"), tELSE},
174 {N("enum"), tENUM},
175 {N("equiv"), tEQUIV},
176 {N("export"), tEXPORT},
177 {N("extends"), tEXTENDS},
178 {N("extern"), tEXTERN},
179 {N("false"), tFALSE},
180 {N("finally"), tFINALLY},
181 {N("for"), tFOR},
182 {N("function"), tFUNCTION},
183 {N("header"), tHEADER},
184 {N("http"), tHTTP},
185 {N("if"), tIF},
186 {N("import"), tIMPORT},
187 {N("in"), tIN},
188 {N("invalid"), tINVALID},
189 {N("isvalid"), tISVALID},
190 {N("lib"), tLIB},
191 {N("meta"), tMETA},
192 {N("name"), tNAME},
193 {N("new"), tNEW},
194 {N("null"), tNULL},
195 {N("path"), tPATH},
196 {N("private"), tPRIVATE},
197 {N("public"), tPUBLIC},
198 {N("return"), tRETURN},
199 {N("sizeof"), tSIZEOF},
200 {N("struct"), tSTRUCT},
201 {N("super"), tSUPER},
202 {N("switch"), tSWITCH},
203 {N("this"), tTHIS},
204 {N("throw"), tTHROW},
205 {N("true"), tTRUE},
206 {N("try"), tTRY},
207 {N("typeof"), tTYPEOF},
208 {N("url"), tURL},
209 {N("use"), tUSE},
210 {N("user"), tUSER},
211 {N("var"), tVAR},
212 {N("void"), tVOID},
213 {N("while"), tWHILE},
214 {N("with"), tWITH},
215 };
216
217 static int num_keywords = sizeof(keywords) / sizeof(keywords[0]);
218
219 /********************* Global functions *********************************/
220
ws_yy_lex(YYSTYPE * yylval,YYLTYPE * yylloc,void * context)221 int ws_yy_lex(YYSTYPE *yylval, YYLTYPE *yylloc, void *context)
222 {
223 WsCompiler *compiler = (WsCompiler *) context;
224 WsUInt32 ch, ch2;
225 WsBuffer buffer;
226 unsigned char *p;
227 WsBool success;
228
229 /* Just check that we get the correct amount of arguments. */
230 gw_assert(compiler->magic == COMPILER_MAGIC);
231
232 while (ws_stream_getc(compiler->input, &ch)) {
233 /* Save the token's line number. */
234 yylloc->first_line = compiler->linenum;
235
236 switch (ch) {
237 case '\t': /* Whitespace characters. */
238 case '\v':
239 case '\f':
240 case ' ':
241 continue;
242
243 case '\n': /* Line terminators. */
244 case '\r':
245 if (ch == '\r' && ws_stream_getc(compiler->input, &ch2)) {
246 if (ch2 != '\n')
247 ws_stream_ungetc(compiler->input, ch2);
248 }
249 compiler->linenum++;
250 continue;
251
252 case '!': /* !, != */
253 if (ws_stream_getc(compiler->input, &ch2)) {
254 if (ch2 == '=')
255 return tNE;
256
257 ws_stream_ungetc(compiler->input, ch2);
258 }
259 return '!';
260
261 case '%': /* %, %= */
262 if (ws_stream_getc(compiler->input, &ch2)) {
263 if (ch2 == '=')
264 return tREMA;
265
266 ws_stream_ungetc(compiler->input, ch2);
267 }
268 return '%';
269
270 case '&': /* &, &&, &= */
271 if (ws_stream_getc(compiler->input, &ch2)) {
272 if (ch2 == '&')
273 return tAND;
274 if (ch2 == '=')
275 return tANDA;
276
277 ws_stream_ungetc(compiler->input, ch2);
278 }
279 return '&';
280
281 case '*': /* *, *= */
282 if (ws_stream_getc(compiler->input, &ch2)) {
283 if (ch2 == '=')
284 return tMULA;
285
286 ws_stream_ungetc(compiler->input, ch2);
287 }
288 return '*';
289
290 case '+': /* +, ++, += */
291 if (ws_stream_getc(compiler->input, &ch2)) {
292 if (ch2 == '+')
293 return tPLUSPLUS;
294 if (ch2 == '=')
295 return tADDA;
296
297 ws_stream_ungetc(compiler->input, ch2);
298 }
299 return '+';
300
301 case '-': /* -, --, -= */
302 if (ws_stream_getc(compiler->input, &ch2)) {
303 if (ch2 == '-')
304 return tMINUSMINUS;
305 if (ch2 == '=')
306 return tSUBA;
307
308 ws_stream_ungetc(compiler->input, ch2);
309 }
310 return '-';
311
312 case '.':
313 if (ws_stream_getc(compiler->input, &ch2)) {
314 if (WS_IS_DECIMAL_DIGIT(ch2)) {
315 /* DecimalFloatLiteral. */
316 ws_buffer_init(&buffer);
317
318 if (!ws_buffer_append_space(&buffer, &p, 2)) {
319 ws_error_memory(compiler);
320 ws_buffer_uninit(&buffer);
321 return EOF;
322 }
323
324 p[0] = '.';
325 p[1] = (unsigned char) ch2;
326
327 success = read_float_from_point(compiler, &buffer,
328 &yylval->vfloat);
329 ws_buffer_uninit(&buffer);
330
331 if (!success)
332 return EOF;
333
334 return tFLOAT;
335 }
336
337 ws_stream_ungetc(compiler->input, ch2);
338 }
339 return '.';
340
341 case '/': /* /, /=, block or a single line comment */
342 if (ws_stream_getc(compiler->input, &ch2)) {
343 if (ch2 == '*') {
344 /* Block comment. */
345 while (1) {
346 if (!ws_stream_getc(compiler->input, &ch)) {
347 ws_src_error(compiler, 0, "EOF in comment");
348 return EOF;
349 }
350
351 if (ch == '\n' || ch == '\r') {
352 /* Line terminators. */
353 if (ch == '\r' && ws_stream_getc(compiler->input,
354 &ch2)) {
355 if (ch2 != '\n')
356 ws_stream_ungetc(compiler->input, ch2);
357 }
358 compiler->linenum++;
359
360 /* Continue reading the block comment. */
361 continue;
362 }
363
364 if (ch == '*' && ws_stream_getc(compiler->input, &ch2)) {
365 if (ch2 == '/')
366 /* The end of the comment found. */
367 break;
368 ws_stream_ungetc(compiler->input, ch2);
369 }
370 }
371 /* Continue after the comment. */
372 continue;
373 }
374 if (ch2 == '/') {
375 /* Single line comment. */
376 while (1) {
377 if (!ws_stream_getc(compiler->input, &ch))
378 /* The end of input stream reached. We accept
379 this as a valid comment terminator. */
380 break;
381
382 if (ch == '\n' || ch == '\r') {
383 /* Line terminators. */
384 if (ch == '\r' && ws_stream_getc(compiler->input,
385 &ch2)) {
386 if (ch2 != '\n')
387 ws_stream_ungetc(compiler->input, ch2);
388 }
389 /* The end of the line (and the comment)
390 reached. */
391 compiler->linenum++;
392 break;
393 }
394 }
395 /* Continue after the comment. */
396 continue;
397 }
398 if (ch2 == '=')
399 return tDIVA;
400
401 ws_stream_ungetc(compiler->input, ch2);
402 }
403 return '/';
404
405 case '<': /* <, <<, <<=, <= */
406 if (ws_stream_getc(compiler->input, &ch2)) {
407 if (ch2 == '<') {
408 if (ws_stream_getc(compiler->input, &ch2)) {
409 if (ch2 == '=')
410 return tLSHIFTA;
411
412 ws_stream_ungetc(compiler->input, ch2);
413 }
414 return tLSHIFT;
415 }
416 if (ch2 == '=')
417 return tLE;
418
419 ws_stream_ungetc(compiler->input, ch2);
420 }
421 return '<';
422
423 case '=': /* =, == */
424 if (ws_stream_getc(compiler->input, &ch2)) {
425 if (ch2 == '=')
426 return tEQ;
427
428 ws_stream_ungetc(compiler->input, ch2);
429 }
430 return '=';
431
432 case '>': /* >, >=, >>, >>=, >>>, >>>= */
433 if (ws_stream_getc(compiler->input, &ch2)) {
434 if (ch2 == '>') {
435 if (ws_stream_getc(compiler->input, &ch2)) {
436 if (ch2 == '>') {
437 if (ws_stream_getc(compiler->input, &ch2)) {
438 if (ch2 == '=')
439 return tRSZSHIFTA;
440
441 ws_stream_ungetc(compiler->input, ch2);
442 }
443 return tRSZSHIFT;
444 }
445 if (ch2 == '=')
446 return tRSSHIFTA;
447
448 ws_stream_ungetc(compiler->input, ch2);
449 }
450 return tRSSHIFT;
451 }
452 if (ch2 == '=')
453 return tGE;
454
455 ws_stream_ungetc(compiler->input, ch2);
456 }
457 return '>';
458
459 case '^': /* ^, ^= */
460 if (ws_stream_getc(compiler->input, &ch2)) {
461 if (ch2 == '=')
462 return tXORA;
463
464 ws_stream_ungetc(compiler->input, ch2);
465 }
466 return '^';
467
468 case '|': /* |, |=, || */
469 if (ws_stream_getc(compiler->input, &ch2)) {
470 if (ch2 == '=')
471 return tORA;
472 if (ch2 == '|')
473 return tOR;
474
475 ws_stream_ungetc(compiler->input, ch2);
476 }
477 return '|';
478
479 case '#': /* The simple cases. */
480 case '(':
481 case ')':
482 case ',':
483 case ':':
484 case ';':
485 case '?':
486 case '{':
487 case '}':
488 case '~':
489 return (int) ch;
490
491 case '\'': /* String literals. */
492 case '"':
493 {
494 WsUInt32 string_end_ch = ch;
495 WsUtf8String *str = ws_utf8_alloc();
496
497 if (str == NULL) {
498 ws_error_memory(compiler);
499 return EOF;
500 }
501
502 while (1) {
503 if (!ws_stream_getc(compiler->input, &ch)) {
504 eof_in_string_literal:
505 ws_src_error(compiler, 0, "EOF in string literal");
506 ws_utf8_free(str);
507 return EOF;
508 }
509 if (ch == string_end_ch)
510 /* The end of string reached. */
511 break;
512
513 if (ch == '\\') {
514 /* An escape sequence. */
515 if (!ws_stream_getc(compiler->input, &ch))
516 goto eof_in_string_literal;
517
518 switch (ch) {
519 case '\'':
520 case '"':
521 case '\\':
522 case '/':
523 /* The character as-is. */
524 break;
525
526 case 'b':
527 ch = '\b';
528 break;
529
530 case 'f':
531 ch = '\f';
532 break;
533
534 case 'n':
535 ch = '\n';
536 break;
537
538 case 'r':
539 ch = '\r';
540 break;
541
542 case 't':
543 ch = '\t';
544 break;
545
546 case 'x':
547 case 'u':
548 {
549 int i, len;
550 int type = ch;
551
552 if (ch == 'x')
553 len = 2;
554 else
555 len = 4;
556
557 ch = 0;
558 for (i = 0; i < len; i++) {
559 if (!ws_stream_getc(compiler->input, &ch2))
560 goto eof_in_string_literal;
561 if (!WS_IS_HEX_DIGIT(ch2)) {
562 ws_src_error(compiler, 0,
563 "malformed `\\%c' escape in "
564 "string literal", (char) type);
565 ch = 0;
566 break;
567 }
568 ch *= 16;
569 ch += WS_HEX_TO_INT(ch2);
570 }
571 }
572 break;
573
574 default:
575 if (WS_IS_OCTAL_DIGIT(ch)) {
576 int i;
577 int limit = 3;
578
579 ch = WS_OCTAL_TO_INT(ch);
580 if (ch > 3)
581 limit = 2;
582
583 for (i = 1; i < limit; i++) {
584 if (!ws_stream_getc(compiler->input, &ch2))
585 goto eof_in_string_literal;
586 if (!WS_IS_OCTAL_DIGIT(ch2)) {
587 ws_stream_ungetc(compiler->input, ch2);
588 break;
589 }
590
591 ch *= 8;
592 ch += WS_OCTAL_TO_INT(ch2);
593 }
594 } else {
595 ws_src_error(compiler, 0,
596 "unknown escape sequence `\\%c' in "
597 "string literal", (char) ch);
598 ch = 0;
599 }
600 break;
601 }
602 /* FALLTHROUGH */
603 }
604
605 if (!ws_utf8_append_char(str, ch)) {
606 ws_error_memory(compiler);
607 ws_utf8_free(str);
608 return EOF;
609 }
610 }
611
612 if (!ws_lexer_register_utf8(compiler, str)) {
613 ws_error_memory(compiler);
614 ws_utf8_free(str);
615 return EOF;
616 }
617
618 gw_assert(str != NULL);
619 yylval->string = str;
620
621 return tSTRING;
622 }
623 break;
624
625 default:
626 /* Identifiers, keywords and number constants. */
627
628 if (WS_IS_IDENTIFIER_LETTER(ch)) {
629 WsBool got;
630 int token;
631 unsigned char *p;
632 unsigned char *np;
633 size_t len = 0;
634
635 /* An identifier or a keyword. We start with a 256
636 * bytes long buffer but it is expanded dynamically if
637 * needed. However, 256 should be enought for most
638 * cases since the byte-code format limits the function
639 * names to 255 characters. */
640 p = ws_malloc(256);
641 if (p == NULL) {
642 ws_error_memory(compiler);
643 return EOF;
644 }
645
646 do {
647 /* Add one extra for the possible terminator
648 character. */
649 np = ws_realloc(p, len + 2);
650 if (np == NULL) {
651 ws_error_memory(compiler);
652 ws_free(p);
653 return EOF;
654 }
655
656 p = np;
657
658 /* This is ok since the only valid identifier names
659 * can be written in 7 bit ASCII. */
660 p[len++] = (unsigned char) ch;
661 } while ((got = ws_stream_getc(compiler->input, &ch))
662 && (WS_IS_IDENTIFIER_LETTER(ch)
663 || WS_IS_DECIMAL_DIGIT(ch)));
664
665 if (got)
666 /* Put back the terminator character. */
667 ws_stream_ungetc(compiler->input, ch);
668
669 /* Is it a keyword? */
670 if (lookup_keyword((char *) p, len, &token)) {
671 /* Yes it is... */
672 ws_free(p);
673
674 /* ...except one case: `div='. */
675 if (token == tIDIV) {
676 if (ws_stream_getc(compiler->input, &ch)) {
677 if (ch == '=')
678 return tIDIVA;
679
680 ws_stream_ungetc(compiler->input, ch);
681 }
682 }
683
684 /* Return the token value. */
685 return token;
686 }
687
688 /* It is a normal identifier. Let's pad the name with a
689 null-character. We have already allocated space for
690 it. */
691 p[len] = '\0';
692
693 if (!ws_lexer_register_block(compiler, p)) {
694 ws_error_memory(compiler);
695 ws_free(p);
696 return EOF;
697 }
698
699 gw_assert(p != NULL);
700 yylval->identifier = (char *) p;
701
702 return tIDENTIFIER;
703 }
704
705 if (WS_IS_NON_ZERO_DIGIT(ch)) {
706 /* A decimal integer literal or a decimal float
707 literal. */
708
709 ws_buffer_init(&buffer);
710 if (!ws_buffer_append_space(&buffer, &p, 1)) {
711 number_error_memory:
712 ws_error_memory(compiler);
713 ws_buffer_uninit(&buffer);
714 return EOF;
715 }
716 p[0] = ch;
717
718 while (ws_stream_getc(compiler->input, &ch)) {
719 if (WS_IS_DECIMAL_DIGIT(ch)) {
720 if (!ws_buffer_append_space(&buffer, &p, 1))
721 goto number_error_memory;
722 p[0] = ch;
723 } else if (ch == '.' || ch == 'e' || ch == 'E') {
724 /* DecimalFloatLiteral. */
725 if (ch == '.') {
726 if (!ws_buffer_append_space(&buffer, &p, 1))
727 goto number_error_memory;
728 p[0] = '.';
729
730 success = read_float_from_point(compiler, &buffer,
731 &yylval->vfloat);
732 } else {
733 ws_stream_ungetc(compiler->input, ch);
734
735 success = read_float_from_exp(compiler, &buffer,
736 &yylval->vfloat);
737 }
738 ws_buffer_uninit(&buffer);
739
740 if (!success)
741 return EOF;
742
743 return tFLOAT;
744 } else {
745 ws_stream_ungetc(compiler->input, ch);
746 break;
747 }
748 }
749
750 /* Now the buffer contains an integer number as a
751 string. Let's convert it to an integer number. */
752 yylval->integer = buffer_to_int(compiler, &buffer);
753 ws_buffer_uninit(&buffer);
754
755 /* Read a DecimalIntegerLiteral. */
756 return tINTEGER;
757 }
758
759 if (ch == '0') {
760 /* The integer constant 0, an octal number or a
761 HexIntegerLiteral. */
762 if (ws_stream_getc(compiler->input, &ch2)) {
763 if (ch2 == 'x' || ch2 == 'X') {
764 /* HexIntegerLiteral. */
765
766 ws_buffer_init(&buffer);
767 if (!ws_buffer_append_space(&buffer, &p, 2))
768 goto number_error_memory;
769
770 p[0] = '0';
771 p[1] = 'x';
772
773 while (ws_stream_getc(compiler->input, &ch)) {
774 if (WS_IS_HEX_DIGIT(ch)) {
775 if (!ws_buffer_append_space(&buffer, &p, 1))
776 goto number_error_memory;
777 p[0] = ch;
778 } else {
779 ws_stream_ungetc(compiler->input, ch);
780 break;
781 }
782 }
783
784 if (ws_buffer_len(&buffer) == 2) {
785 ws_buffer_uninit(&buffer);
786 ws_src_error(compiler, 0,
787 "numeric constant with no digits");
788 yylval->integer = 0;
789 return tINTEGER;
790 }
791
792 /* Now the buffer contains an integer number as
793 * a string. Let's convert it to an integer
794 * number. */
795 yylval->integer = buffer_to_int(compiler, &buffer);
796 ws_buffer_uninit(&buffer);
797
798 /* Read a HexIntegerLiteral. */
799 return tINTEGER;
800 }
801 if (WS_IS_OCTAL_DIGIT(ch2)) {
802 /* OctalIntegerLiteral. */
803
804 ws_buffer_init(&buffer);
805 if (!ws_buffer_append_space(&buffer, &p, 2))
806 goto number_error_memory;
807
808 p[0] = '0';
809 p[1] = ch2;
810
811 while (ws_stream_getc(compiler->input, &ch)) {
812 if (WS_IS_OCTAL_DIGIT(ch)) {
813 if (!ws_buffer_append_space(&buffer, &p, 1))
814 goto number_error_memory;
815 p[0] = ch;
816 } else {
817 ws_stream_ungetc(compiler->input, ch);
818 break;
819 }
820 }
821
822 /* Convert the buffer into an intger number. */
823 yylval->integer = buffer_to_int(compiler, &buffer);
824 ws_buffer_uninit(&buffer);
825
826 /* Read an OctalIntegerLiteral. */
827 return tINTEGER;
828 }
829 if (ch2 == '.' || ch2 == 'e' || ch2 == 'E') {
830 /* DecimalFloatLiteral. */
831 ws_buffer_init(&buffer);
832
833 if (ch2 == '.') {
834 if (!ws_buffer_append_space(&buffer, &p, 1))
835 goto number_error_memory;
836 p[0] = '.';
837
838 success = read_float_from_point(compiler, &buffer,
839 &yylval->vfloat);
840 } else {
841 ws_stream_ungetc(compiler->input, ch);
842
843 success = read_float_from_exp(compiler, &buffer,
844 &yylval->vfloat);
845 }
846 ws_buffer_uninit(&buffer);
847
848 if (!success)
849 return EOF;
850
851 return tFLOAT;
852 }
853
854 ws_stream_ungetc(compiler->input, ch2);
855 }
856
857 /* Integer literal 0. */
858 yylval->integer = 0;
859 return tINTEGER;
860 }
861
862 /* Garbage found from the input stream. */
863 ws_src_error(compiler, 0,
864 "garbage found from the input stream: character=0x%x",
865 ch);
866 return EOF;
867 break;
868 }
869 }
870
871 return EOF;
872 }
873
874 /********************* Static functions *********************************/
875
lookup_keyword(char * id,size_t len,int * token_return)876 static WsBool lookup_keyword(char *id, size_t len, int *token_return)
877 {
878 int left = 0, center, right = num_keywords;
879
880 while (left < right) {
881 size_t l;
882 int result;
883
884 center = left + (right - left) / 2;
885
886 l = keywords[center].name_len;
887 if (len < l)
888 l = len;
889
890 result = memcmp(id, keywords[center].name, l);
891 if (result < 0 || (result == 0 && len < keywords[center].name_len))
892 /* The possible match is smaller. */
893 right = center;
894 else if (result > 0 || (result == 0 && len > keywords[center].name_len))
895 /* The possible match is bigger. */
896 left = center + 1;
897 else {
898 /* Found a match. */
899 *token_return = keywords[center].token;
900 return WS_TRUE;
901 }
902 }
903
904 /* No match. */
905 return WS_FALSE;
906 }
907
908
buffer_to_int(WsCompilerPtr compiler,WsBuffer * buffer)909 static WsUInt32 buffer_to_int(WsCompilerPtr compiler, WsBuffer *buffer)
910 {
911 unsigned char *p;
912 unsigned long value;
913
914 /* Terminate the string. */
915 if (!ws_buffer_append_space(buffer, &p, 1)) {
916 ws_error_memory(compiler);
917 return 0;
918 }
919 p[0] = '\0';
920
921 /* Convert the buffer into an integer number. The base is taken
922 from the bufer. */
923 errno = 0;
924 value = strtoul((char *) ws_buffer_ptr(buffer), NULL, 0);
925
926 /* Check for overflow. We accept WS_INT32_MAX + 1 because we might
927 * be parsing the numeric part of '-2147483648'. */
928 if (errno == ERANGE || value > (WsUInt32) WS_INT32_MAX + 1)
929 ws_src_error(compiler, 0, "integer literal too large");
930
931 /* All done. */
932 return (WsUInt32) value;
933 }
934
935
read_float_from_point(WsCompiler * compiler,WsBuffer * buffer,WsFloat * result)936 static WsBool read_float_from_point(WsCompiler *compiler, WsBuffer *buffer,
937 WsFloat *result)
938 {
939 WsUInt32 ch;
940 unsigned char *p;
941
942 while (ws_stream_getc(compiler->input, &ch)) {
943 if (WS_IS_DECIMAL_DIGIT(ch)) {
944 if (!ws_buffer_append_space(buffer, &p, 1)) {
945 ws_error_memory(compiler);
946 return WS_FALSE;
947 }
948 p[0] = (unsigned char) ch;
949 } else {
950 ws_stream_ungetc(compiler->input, ch);
951 break;
952 }
953 }
954
955 return read_float_from_exp(compiler, buffer, result);
956 }
957
958
read_float_from_exp(WsCompiler * compiler,WsBuffer * buffer,WsFloat * result)959 static WsBool read_float_from_exp(WsCompiler *compiler, WsBuffer *buffer,
960 WsFloat *result)
961 {
962 WsUInt32 ch;
963 unsigned char *p;
964 int sign = '+';
965 unsigned char buf[4];
966
967 /* Do we have an exponent part. */
968 if (!ws_stream_getc(compiler->input, &ch))
969 goto done;
970 if (ch != 'e' && ch != 'E') {
971 /* No exponent part. */
972 ws_stream_ungetc(compiler->input, ch);
973 goto done;
974 }
975
976 /* Sign. */
977 if (!ws_stream_getc(compiler->input, &ch)) {
978 /* This is an error. */
979 ws_src_error(compiler, 0, "truncated float literal");
980 return WS_FALSE;
981 }
982 if (ch == '-')
983 sign = '-';
984 else if (ch == '+')
985 sign = '+';
986 else
987 ws_stream_ungetc(compiler->input, ch);
988
989 /* DecimalDigits. */
990 if (!ws_stream_getc(compiler->input, &ch)) {
991 ws_src_error(compiler, 0, "truncated float literal");
992 return WS_FALSE;
993 }
994 if (!WS_IS_DECIMAL_DIGIT(ch)) {
995 ws_src_error(compiler, 0, "no decimal digits in exponent part");
996 return WS_FALSE;
997 }
998
999 /* Append exponent part read so far. */
1000 if (!ws_buffer_append_space(buffer, &p, 2)) {
1001 ws_error_memory(compiler);
1002 return WS_FALSE;
1003 }
1004 p[0] = 'e';
1005 p[1] = sign;
1006
1007 /* Read decimal digits. */
1008 while (WS_IS_DECIMAL_DIGIT(ch)) {
1009 if (!ws_buffer_append_space(buffer, &p, 1)) {
1010 ws_error_memory(compiler);
1011 return WS_FALSE;
1012 }
1013 p[0] = (unsigned char) ch;
1014
1015 if (!ws_stream_getc(compiler->input, &ch))
1016 /* EOF. This is ok. */
1017 goto done;
1018 }
1019 /* Unget the extra character. */
1020 ws_stream_ungetc(compiler->input, ch);
1021
1022 /* FALLTHROUGH */
1023
1024 done:
1025
1026 if (!ws_buffer_append_space(buffer, &p, 1)) {
1027 ws_error_memory(compiler);
1028 return WS_FALSE;
1029 }
1030 p[0] = 0;
1031
1032 /* Now the buffer contains a valid floating point number. */
1033 *result = (WsFloat) strtod((char *) ws_buffer_ptr(buffer), NULL);
1034
1035 /* Check that the generated floating point number fits to
1036 `float32'. */
1037 if (*result == HUGE_VAL || *result == -HUGE_VAL
1038 || ws_ieee754_encode_single(*result, buf) != WS_IEEE754_OK)
1039 ws_src_error(compiler, 0, "floating point literal too large");
1040
1041 return WS_TRUE;
1042 }
1043