1 /*
2 ** $Id$
3 ** Lexical Analyzer
4 ** See Copyright Notice in lua.h
5 */
6 
7 #include "common/util.h"
8 
9 #define llex_c
10 #define LUA_CORE
11 
12 #include "lua.h"
13 
14 #include "ldo.h"
15 #include "llex.h"
16 #include "lobject.h"
17 #include "lparser.h"
18 #include "lstate.h"
19 #include "lstring.h"
20 #include "ltable.h"
21 #include "lzio.h"
22 
23 
24 
25 #define next(ls) (ls->current = zgetc(ls->z))
26 
27 
28 
29 
30 #define currIsNewline(ls)	(ls->current == '\n' || ls->current == '\r')
31 
32 
33 /* ORDER RESERVED */
34 const char *const luaX_tokens [] = {
35     "and", "break", "do", "else", "elseif",
36     "end", "false", "for", "function", "if",
37     "in", "local", "nil", "not", "or", "repeat",
38     "return", "then", "true", "until", "while",
39     "..", "...", "==", ">=", "<=", "~=",
40     "<number>", "<name>", "<string>", "<eof>",
41     NULL
42 };
43 
44 
45 #define save_and_next(ls) (save(ls, ls->current), next(ls))
46 
47 
save(LexState * ls,int c)48 static void save (LexState *ls, int c) {
49   Mbuffer *b = ls->buff;
50   if (b->n + 1 > b->buffsize) {
51     size_t newsize;
52     if (b->buffsize >= MAX_SIZET/2)
53       luaX_lexerror(ls, "lexical element too long", 0);
54     newsize = b->buffsize * 2;
55     luaZ_resizebuffer(ls->L, b, newsize);
56   }
57   b->buffer[b->n++] = cast(char, c);
58 }
59 
60 
luaX_init(lua_State * L)61 void luaX_init (lua_State *L) {
62   int i;
63   for (i=0; i<NUM_RESERVED; i++) {
64     TString *ts = luaS_new(L, luaX_tokens[i]);
65     luaS_fix(ts);  /* reserved words are never collected */
66     lua_assert(strlen(luaX_tokens[i])+1 <= TOKEN_LEN);
67     ts->tsv.reserved = cast_byte(i+1);  /* reserved word */
68   }
69 }
70 
71 
72 #define MAXSRC          80
73 
74 
luaX_token2str(LexState * ls,int token)75 const char *luaX_token2str (LexState *ls, int token) {
76   if (token < FIRST_RESERVED) {
77     lua_assert(token == cast(unsigned char, token));
78     return (Common::isCntrl(token)) ? luaO_pushfstring(ls->L, "char(%d)", token) :
79                               luaO_pushfstring(ls->L, "%c", token);
80   }
81   else
82     return luaX_tokens[token-FIRST_RESERVED];
83 }
84 
85 
txtToken(LexState * ls,int token)86 static const char *txtToken (LexState *ls, int token) {
87   switch (token) {
88     case TK_NAME:
89     case TK_STRING:
90     case TK_NUMBER:
91       save(ls, '\0');
92       return luaZ_buffer(ls->buff);
93     default:
94       return luaX_token2str(ls, token);
95   }
96 }
97 
98 
luaX_lexerror(LexState * ls,const char * msg,int token)99 void luaX_lexerror (LexState *ls, const char *msg, int token) {
100   char buff[MAXSRC];
101   luaO_chunkid(buff, getstr(ls->source), MAXSRC);
102   msg = luaO_pushfstring(ls->L, "%s:%d: %s", buff, ls->linenumber, msg);
103   if (token)
104     luaO_pushfstring(ls->L, "%s near " LUA_QS, msg, txtToken(ls, token));
105   luaD_throw(ls->L, LUA_ERRSYNTAX);
106 }
107 
108 
luaX_syntaxerror(LexState * ls,const char * msg)109 void luaX_syntaxerror (LexState *ls, const char *msg) {
110   luaX_lexerror(ls, msg, ls->t.token);
111 }
112 
113 
luaX_newstring(LexState * ls,const char * str,size_t l)114 TString *luaX_newstring (LexState *ls, const char *str, size_t l) {
115   lua_State *L = ls->L;
116   TString *ts = luaS_newlstr(L, str, l);
117   TValue *o = luaH_setstr(L, ls->fs->h, ts);  /* entry for `str' */
118   if (ttisnil(o))
119     setbvalue(o, 1);  /* make sure `str' will not be collected */
120   return ts;
121 }
122 
123 
inclinenumber(LexState * ls)124 static void inclinenumber (LexState *ls) {
125   int old = ls->current;
126   lua_assert(currIsNewline(ls));
127   next(ls);  /* skip `\n' or `\r' */
128   if (currIsNewline(ls) && ls->current != old)
129     next(ls);  /* skip `\n\r' or `\r\n' */
130   if (++ls->linenumber >= MAX_INT)
131     luaX_syntaxerror(ls, "chunk has too many lines");
132 }
133 
134 
luaX_setinput(lua_State * L,LexState * ls,ZIO * z,TString * source)135 void luaX_setinput (lua_State *L, LexState *ls, ZIO *z, TString *source) {
136   ls->decpoint = '.';
137   ls->L = L;
138   ls->lookahead.token = TK_EOS;  /* no look-ahead token */
139   ls->z = z;
140   ls->fs = NULL;
141   ls->linenumber = 1;
142   ls->lastline = 1;
143   ls->source = source;
144   luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER);  /* initialize buffer */
145   next(ls);  /* read first char */
146 }
147 
148 
149 
150 /*
151 ** =======================================================
152 ** LEXICAL ANALYZER
153 ** =======================================================
154 */
155 
156 
157 
check_next(LexState * ls,const char * set)158 static int check_next (LexState *ls, const char *set) {
159   if (!strchr(set, ls->current))
160     return 0;
161   save_and_next(ls);
162   return 1;
163 }
164 
165 
buffreplace(LexState * ls,char from,char to)166 static void buffreplace (LexState *ls, char from, char to) {
167   size_t n = luaZ_bufflen(ls->buff);
168   char *p = luaZ_buffer(ls->buff);
169   while (n--)
170     if (p[n] == from) p[n] = to;
171 }
172 
173 
trydecpoint(LexState * ls,SemInfo * seminfo)174 static void trydecpoint (LexState *ls, SemInfo *seminfo) {
175   /* format error: try to update decimal point separator */
176   // Normally we'd use localeconv() to get the decimal point separator, but
177   // annoyingly that is not available on some platforms, e.g. Android. Figure
178   // it out by formatting a known value and extract the separator from that
179   // instead. The result could be cached, but considering the game I doubt
180   // this will ever be a bottleneck. Note that the separator is assumed to fit
181   // in a char, but that was a limitation in the original code as well.
182   char old = ls->decpoint;
183   char buf[5];
184   int i;
185   sprintf(buf, "%.1f", 1.0);
186   ls->decpoint = '.';
187   for (i = 0; buf[i]; i++) {
188     if (!Common::isSpace(buf[i]) && !Common::isDigit(buf[i])) {
189       ls->decpoint = buf[i];
190       break;
191     }
192   }
193   buffreplace(ls, old, ls->decpoint);  /* try updated decimal separator */
194   if (!luaO_str2d(luaZ_buffer(ls->buff), &seminfo->r)) {
195     /* format error with correct decimal point: no more options */
196     buffreplace(ls, ls->decpoint, '.');  /* undo change (for error message) */
197     luaX_lexerror(ls, "malformed number", TK_NUMBER);
198   }
199 }
200 
201 
202 /* LUA_NUMBER */
read_numeral(LexState * ls,SemInfo * seminfo)203 static void read_numeral (LexState *ls, SemInfo *seminfo) {
204   lua_assert(Common::isDigit(ls->current));
205   do {
206     save_and_next(ls);
207   } while (Common::isDigit(ls->current) || ls->current == '.');
208   if (check_next(ls, "Ee"))  /* `E'? */
209     check_next(ls, "+-");  /* optional exponent sign */
210   while (Common::isAlnum(ls->current) || ls->current == '_')
211     save_and_next(ls);
212   save(ls, '\0');
213   buffreplace(ls, '.', ls->decpoint);  /* follow locale for decimal point */
214   if (!luaO_str2d(luaZ_buffer(ls->buff), &seminfo->r))  /* format error? */
215     trydecpoint(ls, seminfo); /* try to update decimal point separator */
216 }
217 
218 
skip_sep(LexState * ls)219 static int skip_sep (LexState *ls) {
220   int count = 0;
221   int s = ls->current;
222   lua_assert(s == '[' || s == ']');
223   save_and_next(ls);
224   while (ls->current == '=') {
225     save_and_next(ls);
226     count++;
227   }
228   return (ls->current == s) ? count : (-count) - 1;
229 }
230 
231 
read_long_string(LexState * ls,SemInfo * seminfo,int sep)232 static void read_long_string (LexState *ls, SemInfo *seminfo, int sep) {
233   int cont = 0;
234   (void)(cont);  /* avoid warnings when `cont' is not used */
235   save_and_next(ls);  /* skip 2nd `[' */
236   if (currIsNewline(ls))  /* string starts with a newline? */
237     inclinenumber(ls);  /* skip it */
238   for (;;) {
239     switch (ls->current) {
240       case EOZ:
241         luaX_lexerror(ls, (seminfo) ? "unfinished long string" :
242                                    "unfinished long comment", TK_EOS);
243         break;  /* to avoid warnings */
244 #if defined(LUA_COMPAT_LSTR)
245       case '[': {
246         if (skip_sep(ls) == sep) {
247           save_and_next(ls);  /* skip 2nd `[' */
248           cont++;
249 #if LUA_COMPAT_LSTR == 1
250           if (sep == 0)
251             luaX_lexerror(ls, "nesting of [[...]] is deprecated", '[');
252 #endif
253         }
254         break;
255       }
256 #endif
257       case ']': {
258         if (skip_sep(ls) == sep) {
259           save_and_next(ls);  /* skip 2nd `]' */
260 #if defined(LUA_COMPAT_LSTR) && LUA_COMPAT_LSTR == 2
261           cont--;
262           if (sep == 0 && cont >= 0) break;
263 #endif
264           goto endloop;
265         }
266         break;
267       }
268       case '\n':
269       case '\r': {
270         save(ls, '\n');
271         inclinenumber(ls);
272         if (!seminfo) luaZ_resetbuffer(ls->buff);  /* avoid wasting space */
273         break;
274       }
275       default: {
276         if (seminfo) save_and_next(ls);
277         else next(ls);
278       }
279     }
280   } endloop:
281   if (seminfo)
282     seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + (2 + sep),
283                                      luaZ_bufflen(ls->buff) - 2*(2 + sep));
284 }
285 
286 
read_string(LexState * ls,int del,SemInfo * seminfo)287 static void read_string (LexState *ls, int del, SemInfo *seminfo) {
288   save_and_next(ls);
289   while (ls->current != del) {
290     switch (ls->current) {
291       case EOZ:
292         luaX_lexerror(ls, "unfinished string", TK_EOS);
293         continue;  /* to avoid warnings */
294       case '\n':
295       case '\r':
296         luaX_lexerror(ls, "unfinished string", TK_STRING);
297         continue;  /* to avoid warnings */
298       case '\\': {
299         int c;
300         next(ls);  /* do not save the `\' */
301         switch (ls->current) {
302           case 'a': c = '\a'; break;
303           case 'b': c = '\b'; break;
304           case 'f': c = '\f'; break;
305           case 'n': c = '\n'; break;
306           case 'r': c = '\r'; break;
307           case 't': c = '\t'; break;
308           case 'v': c = '\v'; break;
309           case '\n':  /* go through */
310           case '\r': save(ls, '\n'); inclinenumber(ls); continue;
311           case EOZ: continue;  /* will raise an error next loop */
312           default: {
313             if (!Common::isDigit(ls->current))
314               save_and_next(ls);  /* handles \\, \", \', and \? */
315             else {  /* \xxx */
316               int i = 0;
317               c = 0;
318               do {
319                 c = 10*c + (ls->current-'0');
320                 next(ls);
321               } while (++i<3 && Common::isDigit(ls->current));
322               if (c > UCHAR_MAX)
323                 luaX_lexerror(ls, "escape sequence too large", TK_STRING);
324               save(ls, c);
325             }
326             continue;
327           }
328         }
329         save(ls, c);
330         next(ls);
331         continue;
332       }
333       default:
334         save_and_next(ls);
335     }
336   }
337   save_and_next(ls);  /* skip delimiter */
338   seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1,
339                                    luaZ_bufflen(ls->buff) - 2);
340 }
341 
342 
llex(LexState * ls,SemInfo * seminfo)343 static int llex (LexState *ls, SemInfo *seminfo) {
344   luaZ_resetbuffer(ls->buff);
345   for (;;) {
346     switch (ls->current) {
347       case '\n':
348       case '\r': {
349         inclinenumber(ls);
350         continue;
351       }
352       case '-': {
353         next(ls);
354         if (ls->current != '-') return '-';
355         /* else is a comment */
356         next(ls);
357         if (ls->current == '[') {
358           int sep = skip_sep(ls);
359           luaZ_resetbuffer(ls->buff);  /* `skip_sep' may dirty the buffer */
360           if (sep >= 0) {
361             read_long_string(ls, NULL, sep);  /* long comment */
362             luaZ_resetbuffer(ls->buff);
363             continue;
364           }
365         }
366         /* else short comment */
367         while (!currIsNewline(ls) && ls->current != EOZ)
368           next(ls);
369         continue;
370       }
371       case '[': {
372         int sep = skip_sep(ls);
373         if (sep >= 0) {
374           read_long_string(ls, seminfo, sep);
375           return TK_STRING;
376         }
377         else if (sep == -1)
378           return '[';
379         luaX_lexerror(ls, "invalid long string delimiter", TK_STRING);
380         break;
381       }
382       case '=': {
383         next(ls);
384         if (ls->current != '=') return '=';
385         else { next(ls); return TK_EQ; }
386       }
387       case '<': {
388         next(ls);
389         if (ls->current != '=') return '<';
390         else { next(ls); return TK_LE; }
391       }
392       case '>': {
393         next(ls);
394         if (ls->current != '=') return '>';
395         else { next(ls); return TK_GE; }
396       }
397       case '~': {
398         next(ls);
399         if (ls->current != '=') return '~';
400         else { next(ls); return TK_NE; }
401       }
402       case '"':
403       case '\'': {
404         read_string(ls, ls->current, seminfo);
405         return TK_STRING;
406       }
407       case '.': {
408         save_and_next(ls);
409         if (check_next(ls, ".")) {
410           if (check_next(ls, "."))
411             return TK_DOTS;   /* ... */
412           else return TK_CONCAT;   /* .. */
413         }
414         else if (!Common::isDigit(ls->current)) return '.';
415         else {
416           read_numeral(ls, seminfo);
417           return TK_NUMBER;
418         }
419       }
420       case EOZ: {
421         return TK_EOS;
422       }
423       default: {
424         if (Common::isSpace(ls->current)) {
425           lua_assert(!currIsNewline(ls));
426           next(ls);
427           continue;
428         }
429         else if (Common::isDigit(ls->current)) {
430           read_numeral(ls, seminfo);
431           return TK_NUMBER;
432         }
433         else if (Common::isAlpha(ls->current) || ls->current == '_') {
434           /* identifier or reserved word */
435           TString *ts;
436           do {
437             save_and_next(ls);
438           } while (Common::isAlnum(ls->current) || ls->current == '_');
439           ts = luaX_newstring(ls, luaZ_buffer(ls->buff),
440                                   luaZ_bufflen(ls->buff));
441           if (ts->tsv.reserved > 0)  /* reserved word? */
442             return ts->tsv.reserved - 1 + FIRST_RESERVED;
443           else {
444             seminfo->ts = ts;
445             return TK_NAME;
446           }
447         }
448         else {
449           int c = ls->current;
450           next(ls);
451           return c;  /* single-char tokens (+ - / ...) */
452         }
453       }
454     }
455   }
456 }
457 
458 
luaX_next(LexState * ls)459 void luaX_next (LexState *ls) {
460   ls->lastline = ls->linenumber;
461   if (ls->lookahead.token != TK_EOS) {  /* is there a look-ahead token? */
462     ls->t = ls->lookahead;  /* use this one */
463     ls->lookahead.token = TK_EOS;  /* and discharge it */
464   }
465   else
466     ls->t.token = llex(ls, &ls->t.seminfo);  /* read next token */
467 }
468 
469 
luaX_lookahead(LexState * ls)470 void luaX_lookahead (LexState *ls) {
471   lua_assert(ls->lookahead.token == TK_EOS);
472   ls->lookahead.token = llex(ls, &ls->lookahead.seminfo);
473 }
474