1 /*
2 ** $Id: llex.c,v 2.20.1.2 2009/11/23 14:58:22 roberto Exp $
3 ** Lexical Analyzer
4 ** See Copyright Notice in lua.h
5 */
6 
7 
8 #include <ctype.h>
9 #include <locale.h>
10 #include <string.h>
11 
12 #define llex_c
13 #define LUA_CORE
14 
15 #include "lua.h"
16 
17 #include "ldo.h"
18 #include "llex.h"
19 #include "lobject.h"
20 #include "lparser.h"
21 #include "lstate.h"
22 #include "lstring.h"
23 #include "ltable.h"
24 #include "lzio.h"
25 
26 
27 
28 #define next(ls) (ls->current = zgetc(ls->z))
29 
30 
31 
32 
33 #define currIsNewline(ls)	(ls->current == '\n' || ls->current == '\r')
34 
35 
36 /* ORDER RESERVED */
37 const char *const luaX_tokens [] = {
38     "and", "break", "do", "else", "elseif",
39     "end", "false", "for", "function", "if",
40     "in", "local", "nil", "not", "or", "repeat",
41     "return", "then", "true", "until", "while",
42     "..", "...", "==", ">=", "<=", "~=",
43     "<number>", "<name>", "<string>", "<eof>",
44     NULL
45 };
46 
47 
48 #define save_and_next(ls) (save(ls, ls->current), next(ls))
49 
50 
save(LexState * ls,int c)51 static void save (LexState *ls, int c) {
52   Mbuffer *b = ls->buff;
53   if (b->n + 1 > b->buffsize) {
54     size_t newsize;
55     if (b->buffsize >= MAX_SIZET/2)
56       luaX_lexerror(ls, "lexical element too long", 0);
57     newsize = b->buffsize * 2;
58     luaZ_resizebuffer(ls->L, b, newsize);
59   }
60   b->buffer[b->n++] = cast(char, c);
61 }
62 
63 
luaX_init(lua_State * L)64 void luaX_init (lua_State *L) {
65   int i;
66   for (i=0; i<NUM_RESERVED; i++) {
67     TString *ts = luaS_new(L, luaX_tokens[i]);
68     luaS_fix(ts);  /* reserved words are never collected */
69     lua_assert(strlen(luaX_tokens[i])+1 <= TOKEN_LEN);
70     ts->tsv.reserved = cast_byte(i+1);  /* reserved word */
71   }
72 }
73 
74 
75 #define MAXSRC          80
76 
77 
luaX_token2str(LexState * ls,int token)78 const char *luaX_token2str (LexState *ls, int token) {
79   if (token < FIRST_RESERVED) {
80     lua_assert(token == cast(unsigned char, token));
81     return (iscntrl(token)) ? luaO_pushfstring(ls->L, "char(%d)", token) :
82                               luaO_pushfstring(ls->L, "%c", token);
83   }
84   else
85     return luaX_tokens[token-FIRST_RESERVED];
86 }
87 
88 
txtToken(LexState * ls,int token)89 static const char *txtToken (LexState *ls, int token) {
90   switch (token) {
91     case TK_NAME:
92     case TK_STRING:
93     case TK_NUMBER:
94       save(ls, '\0');
95       return luaZ_buffer(ls->buff);
96     default:
97       return luaX_token2str(ls, token);
98   }
99 }
100 
101 
luaX_lexerror(LexState * ls,const char * msg,int token)102 void luaX_lexerror (LexState *ls, const char *msg, int token) {
103   char buff[MAXSRC];
104   luaO_chunkid(buff, getstr(ls->source), MAXSRC);
105   msg = luaO_pushfstring(ls->L, "%s:%d: %s", buff, ls->linenumber, msg);
106   if (token)
107     luaO_pushfstring(ls->L, "%s near " LUA_QS, msg, txtToken(ls, token));
108   luaD_throw(ls->L, LUA_ERRSYNTAX);
109 }
110 
111 
luaX_syntaxerror(LexState * ls,const char * msg)112 void luaX_syntaxerror (LexState *ls, const char *msg) {
113   luaX_lexerror(ls, msg, ls->t.token);
114 }
115 
116 
luaX_newstring(LexState * ls,const char * str,size_t l)117 TString *luaX_newstring (LexState *ls, const char *str, size_t l) {
118   lua_State *L = ls->L;
119   TString *ts = luaS_newlstr(L, str, l);
120   TValue *o = luaH_setstr(L, ls->fs->h, ts);  /* entry for `str' */
121   if (ttisnil(o)) {
122     setbvalue(o, 1);  /* make sure `str' will not be collected */
123     luaC_checkGC(L);
124   }
125   return ts;
126 }
127 
128 
inclinenumber(LexState * ls)129 static void inclinenumber (LexState *ls) {
130   int old = ls->current;
131   lua_assert(currIsNewline(ls));
132   next(ls);  /* skip `\n' or `\r' */
133   if (currIsNewline(ls) && ls->current != old)
134     next(ls);  /* skip `\n\r' or `\r\n' */
135   if (++ls->linenumber >= MAX_INT)
136     luaX_syntaxerror(ls, "chunk has too many lines");
137 }
138 
139 
luaX_setinput(lua_State * L,LexState * ls,ZIO * z,TString * source)140 void luaX_setinput (lua_State *L, LexState *ls, ZIO *z, TString *source) {
141   ls->decpoint = '.';
142   ls->L = L;
143   ls->lookahead.token = TK_EOS;  /* no look-ahead token */
144   ls->z = z;
145   ls->fs = NULL;
146   ls->linenumber = 1;
147   ls->lastline = 1;
148   ls->source = source;
149   luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER);  /* initialize buffer */
150   next(ls);  /* read first char */
151 }
152 
153 
154 
155 /*
156 ** =======================================================
157 ** LEXICAL ANALYZER
158 ** =======================================================
159 */
160 
161 
162 
check_next(LexState * ls,const char * set)163 static int check_next (LexState *ls, const char *set) {
164   if (!strchr(set, ls->current))
165     return 0;
166   save_and_next(ls);
167   return 1;
168 }
169 
170 
buffreplace(LexState * ls,char from,char to)171 static void buffreplace (LexState *ls, char from, char to) {
172   size_t n = luaZ_bufflen(ls->buff);
173   char *p = luaZ_buffer(ls->buff);
174   while (n--)
175     if (p[n] == from) p[n] = to;
176 }
177 
178 
trydecpoint(LexState * ls,SemInfo * seminfo)179 static void trydecpoint (LexState *ls, SemInfo *seminfo) {
180   /* format error: try to update decimal point separator */
181   struct lconv *cv = localeconv();
182   char old = ls->decpoint;
183   ls->decpoint = (cv ? cv->decimal_point[0] : '.');
184   buffreplace(ls, old, ls->decpoint);  /* try updated decimal separator */
185   if (!luaO_str2d(luaZ_buffer(ls->buff), &seminfo->r)) {
186     /* format error with correct decimal point: no more options */
187     buffreplace(ls, ls->decpoint, '.');  /* undo change (for error message) */
188     luaX_lexerror(ls, "malformed number", TK_NUMBER);
189   }
190 }
191 
192 
193 /* LUA_NUMBER */
read_numeral(LexState * ls,SemInfo * seminfo)194 static void read_numeral (LexState *ls, SemInfo *seminfo) {
195   lua_assert(isdigit(ls->current));
196   do {
197     save_and_next(ls);
198   } while (isdigit(ls->current) || ls->current == '.');
199   if (check_next(ls, "Ee"))  /* `E'? */
200     check_next(ls, "+-");  /* optional exponent sign */
201   while (isalnum(ls->current) || ls->current == '_')
202     save_and_next(ls);
203   save(ls, '\0');
204   buffreplace(ls, '.', ls->decpoint);  /* follow locale for decimal point */
205   if (!luaO_str2d(luaZ_buffer(ls->buff), &seminfo->r))  /* format error? */
206     trydecpoint(ls, seminfo); /* try to update decimal point separator */
207 }
208 
209 
skip_sep(LexState * ls)210 static int skip_sep (LexState *ls) {
211   int count = 0;
212   int s = ls->current;
213   lua_assert(s == '[' || s == ']');
214   save_and_next(ls);
215   while (ls->current == '=') {
216     save_and_next(ls);
217     count++;
218   }
219   return (ls->current == s) ? count : (-count) - 1;
220 }
221 
222 
read_long_string(LexState * ls,SemInfo * seminfo,int sep)223 static void read_long_string (LexState *ls, SemInfo *seminfo, int sep) {
224   int cont = 0;
225   (void)(cont);  /* avoid warnings when `cont' is not used */
226   save_and_next(ls);  /* skip 2nd `[' */
227   if (currIsNewline(ls))  /* string starts with a newline? */
228     inclinenumber(ls);  /* skip it */
229   for (;;) {
230     switch (ls->current) {
231       case EOZ:
232         luaX_lexerror(ls, (seminfo) ? "unfinished long string" :
233                                    "unfinished long comment", TK_EOS);
234         break;  /* to avoid warnings */
235 #if defined(LUA_COMPAT_LSTR)
236       case '[': {
237         if (skip_sep(ls) == sep) {
238           save_and_next(ls);  /* skip 2nd `[' */
239           cont++;
240 #if LUA_COMPAT_LSTR == 1
241           if (sep == 0)
242             luaX_lexerror(ls, "nesting of [[...]] is deprecated", '[');
243 #endif
244         }
245         break;
246       }
247 #endif
248       case ']': {
249         if (skip_sep(ls) == sep) {
250           save_and_next(ls);  /* skip 2nd `]' */
251 #if defined(LUA_COMPAT_LSTR) && LUA_COMPAT_LSTR == 2
252           cont--;
253           if (sep == 0 && cont >= 0) break;
254 #endif
255           goto endloop;
256         }
257         break;
258       }
259       case '\n':
260       case '\r': {
261         save(ls, '\n');
262         inclinenumber(ls);
263         if (!seminfo) luaZ_resetbuffer(ls->buff);  /* avoid wasting space */
264         break;
265       }
266       default: {
267         if (seminfo) save_and_next(ls);
268         else next(ls);
269       }
270     }
271   } endloop:
272   if (seminfo)
273     seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + (2 + sep),
274                                      luaZ_bufflen(ls->buff) - 2*(2 + sep));
275 }
276 
277 
read_string(LexState * ls,int del,SemInfo * seminfo)278 static void read_string (LexState *ls, int del, SemInfo *seminfo) {
279   save_and_next(ls);
280   while (ls->current != del) {
281     switch (ls->current) {
282       case EOZ:
283         luaX_lexerror(ls, "unfinished string", TK_EOS);
284         continue;  /* to avoid warnings */
285       case '\n':
286       case '\r':
287         luaX_lexerror(ls, "unfinished string", TK_STRING);
288         continue;  /* to avoid warnings */
289       case '\\': {
290         int c;
291         next(ls);  /* do not save the `\' */
292         switch (ls->current) {
293           case 'a': c = '\a'; break;
294           case 'b': c = '\b'; break;
295           case 'f': c = '\f'; break;
296           case 'n': c = '\n'; break;
297           case 'r': c = '\r'; break;
298           case 't': c = '\t'; break;
299           case 'v': c = '\v'; break;
300           case '\n':  /* go through */
301           case '\r': save(ls, '\n'); inclinenumber(ls); continue;
302           case EOZ: continue;  /* will raise an error next loop */
303           default: {
304             if (!isdigit(ls->current))
305               save_and_next(ls);  /* handles \\, \", \', and \? */
306             else {  /* \xxx */
307               int i = 0;
308               c = 0;
309               do {
310                 c = 10*c + (ls->current-'0');
311                 next(ls);
312               } while (++i<3 && isdigit(ls->current));
313               if (c > UCHAR_MAX)
314                 luaX_lexerror(ls, "escape sequence too large", TK_STRING);
315               save(ls, c);
316             }
317             continue;
318           }
319         }
320         save(ls, c);
321         next(ls);
322         continue;
323       }
324       default:
325         save_and_next(ls);
326     }
327   }
328   save_and_next(ls);  /* skip delimiter */
329   seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1,
330                                    luaZ_bufflen(ls->buff) - 2);
331 }
332 
333 
llex(LexState * ls,SemInfo * seminfo)334 static int llex (LexState *ls, SemInfo *seminfo) {
335   luaZ_resetbuffer(ls->buff);
336   for (;;) {
337     switch (ls->current) {
338       case '\n':
339       case '\r': {
340         inclinenumber(ls);
341         continue;
342       }
343       case '-': {
344         next(ls);
345         if (ls->current != '-') return '-';
346         /* else is a comment */
347         next(ls);
348         if (ls->current == '[') {
349           int sep = skip_sep(ls);
350           luaZ_resetbuffer(ls->buff);  /* `skip_sep' may dirty the buffer */
351           if (sep >= 0) {
352             read_long_string(ls, NULL, sep);  /* long comment */
353             luaZ_resetbuffer(ls->buff);
354             continue;
355           }
356         }
357         /* else short comment */
358         while (!currIsNewline(ls) && ls->current != EOZ)
359           next(ls);
360         continue;
361       }
362       case '[': {
363         int sep = skip_sep(ls);
364         if (sep >= 0) {
365           read_long_string(ls, seminfo, sep);
366           return TK_STRING;
367         }
368         else if (sep == -1) return '[';
369         else luaX_lexerror(ls, "invalid long string delimiter", TK_STRING);
370       }
371       case '=': {
372         next(ls);
373         if (ls->current != '=') return '=';
374         else { next(ls); return TK_EQ; }
375       }
376       case '<': {
377         next(ls);
378         if (ls->current != '=') return '<';
379         else { next(ls); return TK_LE; }
380       }
381       case '>': {
382         next(ls);
383         if (ls->current != '=') return '>';
384         else { next(ls); return TK_GE; }
385       }
386       case '~': {
387         next(ls);
388         if (ls->current != '=') return '~';
389         else { next(ls); return TK_NE; }
390       }
391       case '"':
392       case '\'': {
393         read_string(ls, ls->current, seminfo);
394         return TK_STRING;
395       }
396       case '.': {
397         save_and_next(ls);
398         if (check_next(ls, ".")) {
399           if (check_next(ls, "."))
400             return TK_DOTS;   /* ... */
401           else return TK_CONCAT;   /* .. */
402         }
403         else if (!isdigit(ls->current)) return '.';
404         else {
405           read_numeral(ls, seminfo);
406           return TK_NUMBER;
407         }
408       }
409       case EOZ: {
410         return TK_EOS;
411       }
412       default: {
413         if (isspace(ls->current)) {
414           lua_assert(!currIsNewline(ls));
415           next(ls);
416           continue;
417         }
418         else if (isdigit(ls->current)) {
419           read_numeral(ls, seminfo);
420           return TK_NUMBER;
421         }
422         else if (isalpha(ls->current) || ls->current == '_') {
423           /* identifier or reserved word */
424           TString *ts;
425           do {
426             save_and_next(ls);
427           } while (isalnum(ls->current) || ls->current == '_');
428           ts = luaX_newstring(ls, luaZ_buffer(ls->buff),
429                                   luaZ_bufflen(ls->buff));
430           if (ts->tsv.reserved > 0)  /* reserved word? */
431             return ts->tsv.reserved - 1 + FIRST_RESERVED;
432           else {
433             seminfo->ts = ts;
434             return TK_NAME;
435           }
436         }
437         else {
438           int c = ls->current;
439           next(ls);
440           return c;  /* single-char tokens (+ - / ...) */
441         }
442       }
443     }
444   }
445 }
446 
447 
luaX_next(LexState * ls)448 void luaX_next (LexState *ls) {
449   ls->lastline = ls->linenumber;
450   if (ls->lookahead.token != TK_EOS) {  /* is there a look-ahead token? */
451     ls->t = ls->lookahead;  /* use this one */
452     ls->lookahead.token = TK_EOS;  /* and discharge it */
453   }
454   else
455     ls->t.token = llex(ls, &ls->t.seminfo);  /* read next token */
456 }
457 
458 
luaX_lookahead(LexState * ls)459 void luaX_lookahead (LexState *ls) {
460   lua_assert(ls->lookahead.token == TK_EOS);
461   ls->lookahead.token = llex(ls, &ls->lookahead.seminfo);
462 }
463 
464