1 /*
2 ** $Id$
3 ** Lexical Analyzer
4 ** See Copyright Notice in lua.h
5 */
6
7 #include "common/util.h"
8
9 #define llex_c
10 #define LUA_CORE
11
12 #include "lua.h"
13
14 #include "ldo.h"
15 #include "llex.h"
16 #include "lobject.h"
17 #include "lparser.h"
18 #include "lstate.h"
19 #include "lstring.h"
20 #include "ltable.h"
21 #include "lzio.h"
22
23
24
25 #define next(ls) (ls->current = zgetc(ls->z))
26
27
28
29
30 #define currIsNewline(ls) (ls->current == '\n' || ls->current == '\r')
31
32
33 /* ORDER RESERVED */
34 const char *const luaX_tokens [] = {
35 "and", "break", "do", "else", "elseif",
36 "end", "false", "for", "function", "if",
37 "in", "local", "nil", "not", "or", "repeat",
38 "return", "then", "true", "until", "while",
39 "..", "...", "==", ">=", "<=", "~=",
40 "<number>", "<name>", "<string>", "<eof>",
41 NULL
42 };
43
44
45 #define save_and_next(ls) (save(ls, ls->current), next(ls))
46
47
save(LexState * ls,int c)48 static void save (LexState *ls, int c) {
49 Mbuffer *b = ls->buff;
50 if (b->n + 1 > b->buffsize) {
51 size_t newsize;
52 if (b->buffsize >= MAX_SIZET/2)
53 luaX_lexerror(ls, "lexical element too long", 0);
54 newsize = b->buffsize * 2;
55 luaZ_resizebuffer(ls->L, b, newsize);
56 }
57 b->buffer[b->n++] = cast(char, c);
58 }
59
60
luaX_init(lua_State * L)61 void luaX_init (lua_State *L) {
62 int i;
63 for (i=0; i<NUM_RESERVED; i++) {
64 TString *ts = luaS_new(L, luaX_tokens[i]);
65 luaS_fix(ts); /* reserved words are never collected */
66 lua_assert(strlen(luaX_tokens[i])+1 <= TOKEN_LEN);
67 ts->tsv.reserved = cast_byte(i+1); /* reserved word */
68 }
69 }
70
71
72 #define MAXSRC 80
73
74
luaX_token2str(LexState * ls,int token)75 const char *luaX_token2str (LexState *ls, int token) {
76 if (token < FIRST_RESERVED) {
77 lua_assert(token == cast(unsigned char, token));
78 return (Common::isCntrl(token)) ? luaO_pushfstring(ls->L, "char(%d)", token) :
79 luaO_pushfstring(ls->L, "%c", token);
80 }
81 else
82 return luaX_tokens[token-FIRST_RESERVED];
83 }
84
85
txtToken(LexState * ls,int token)86 static const char *txtToken (LexState *ls, int token) {
87 switch (token) {
88 case TK_NAME:
89 case TK_STRING:
90 case TK_NUMBER:
91 save(ls, '\0');
92 return luaZ_buffer(ls->buff);
93 default:
94 return luaX_token2str(ls, token);
95 }
96 }
97
98
luaX_lexerror(LexState * ls,const char * msg,int token)99 void luaX_lexerror (LexState *ls, const char *msg, int token) {
100 char buff[MAXSRC];
101 luaO_chunkid(buff, getstr(ls->source), MAXSRC);
102 msg = luaO_pushfstring(ls->L, "%s:%d: %s", buff, ls->linenumber, msg);
103 if (token)
104 luaO_pushfstring(ls->L, "%s near " LUA_QS, msg, txtToken(ls, token));
105 luaD_throw(ls->L, LUA_ERRSYNTAX);
106 }
107
108
luaX_syntaxerror(LexState * ls,const char * msg)109 void luaX_syntaxerror (LexState *ls, const char *msg) {
110 luaX_lexerror(ls, msg, ls->t.token);
111 }
112
113
luaX_newstring(LexState * ls,const char * str,size_t l)114 TString *luaX_newstring (LexState *ls, const char *str, size_t l) {
115 lua_State *L = ls->L;
116 TString *ts = luaS_newlstr(L, str, l);
117 TValue *o = luaH_setstr(L, ls->fs->h, ts); /* entry for `str' */
118 if (ttisnil(o))
119 setbvalue(o, 1); /* make sure `str' will not be collected */
120 return ts;
121 }
122
123
inclinenumber(LexState * ls)124 static void inclinenumber (LexState *ls) {
125 int old = ls->current;
126 lua_assert(currIsNewline(ls));
127 next(ls); /* skip `\n' or `\r' */
128 if (currIsNewline(ls) && ls->current != old)
129 next(ls); /* skip `\n\r' or `\r\n' */
130 if (++ls->linenumber >= MAX_INT)
131 luaX_syntaxerror(ls, "chunk has too many lines");
132 }
133
134
luaX_setinput(lua_State * L,LexState * ls,ZIO * z,TString * source)135 void luaX_setinput (lua_State *L, LexState *ls, ZIO *z, TString *source) {
136 ls->decpoint = '.';
137 ls->L = L;
138 ls->lookahead.token = TK_EOS; /* no look-ahead token */
139 ls->z = z;
140 ls->fs = NULL;
141 ls->linenumber = 1;
142 ls->lastline = 1;
143 ls->source = source;
144 luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER); /* initialize buffer */
145 next(ls); /* read first char */
146 }
147
148
149
150 /*
151 ** =======================================================
152 ** LEXICAL ANALYZER
153 ** =======================================================
154 */
155
156
157
check_next(LexState * ls,const char * set)158 static int check_next (LexState *ls, const char *set) {
159 if (!strchr(set, ls->current))
160 return 0;
161 save_and_next(ls);
162 return 1;
163 }
164
165
buffreplace(LexState * ls,char from,char to)166 static void buffreplace (LexState *ls, char from, char to) {
167 size_t n = luaZ_bufflen(ls->buff);
168 char *p = luaZ_buffer(ls->buff);
169 while (n--)
170 if (p[n] == from) p[n] = to;
171 }
172
173
trydecpoint(LexState * ls,SemInfo * seminfo)174 static void trydecpoint (LexState *ls, SemInfo *seminfo) {
175 /* format error: try to update decimal point separator */
176 // Normally we'd use localeconv() to get the decimal point separator, but
177 // annoyingly that is not available on some platforms, e.g. Android. Figure
178 // it out by formatting a known value and extract the separator from that
179 // instead. The result could be cached, but considering the game I doubt
180 // this will ever be a bottleneck. Note that the separator is assumed to fit
181 // in a char, but that was a limitation in the original code as well.
182 char old = ls->decpoint;
183 char buf[5];
184 int i;
185 sprintf(buf, "%.1f", 1.0);
186 ls->decpoint = '.';
187 for (i = 0; buf[i]; i++) {
188 if (!Common::isSpace(buf[i]) && !Common::isDigit(buf[i])) {
189 ls->decpoint = buf[i];
190 break;
191 }
192 }
193 buffreplace(ls, old, ls->decpoint); /* try updated decimal separator */
194 if (!luaO_str2d(luaZ_buffer(ls->buff), &seminfo->r)) {
195 /* format error with correct decimal point: no more options */
196 buffreplace(ls, ls->decpoint, '.'); /* undo change (for error message) */
197 luaX_lexerror(ls, "malformed number", TK_NUMBER);
198 }
199 }
200
201
202 /* LUA_NUMBER */
read_numeral(LexState * ls,SemInfo * seminfo)203 static void read_numeral (LexState *ls, SemInfo *seminfo) {
204 lua_assert(Common::isDigit(ls->current));
205 do {
206 save_and_next(ls);
207 } while (Common::isDigit(ls->current) || ls->current == '.');
208 if (check_next(ls, "Ee")) /* `E'? */
209 check_next(ls, "+-"); /* optional exponent sign */
210 while (Common::isAlnum(ls->current) || ls->current == '_')
211 save_and_next(ls);
212 save(ls, '\0');
213 buffreplace(ls, '.', ls->decpoint); /* follow locale for decimal point */
214 if (!luaO_str2d(luaZ_buffer(ls->buff), &seminfo->r)) /* format error? */
215 trydecpoint(ls, seminfo); /* try to update decimal point separator */
216 }
217
218
skip_sep(LexState * ls)219 static int skip_sep (LexState *ls) {
220 int count = 0;
221 int s = ls->current;
222 lua_assert(s == '[' || s == ']');
223 save_and_next(ls);
224 while (ls->current == '=') {
225 save_and_next(ls);
226 count++;
227 }
228 return (ls->current == s) ? count : (-count) - 1;
229 }
230
231
read_long_string(LexState * ls,SemInfo * seminfo,int sep)232 static void read_long_string (LexState *ls, SemInfo *seminfo, int sep) {
233 int cont = 0;
234 (void)(cont); /* avoid warnings when `cont' is not used */
235 save_and_next(ls); /* skip 2nd `[' */
236 if (currIsNewline(ls)) /* string starts with a newline? */
237 inclinenumber(ls); /* skip it */
238 for (;;) {
239 switch (ls->current) {
240 case EOZ:
241 luaX_lexerror(ls, (seminfo) ? "unfinished long string" :
242 "unfinished long comment", TK_EOS);
243 break; /* to avoid warnings */
244 #if defined(LUA_COMPAT_LSTR)
245 case '[': {
246 if (skip_sep(ls) == sep) {
247 save_and_next(ls); /* skip 2nd `[' */
248 cont++;
249 #if LUA_COMPAT_LSTR == 1
250 if (sep == 0)
251 luaX_lexerror(ls, "nesting of [[...]] is deprecated", '[');
252 #endif
253 }
254 break;
255 }
256 #endif
257 case ']': {
258 if (skip_sep(ls) == sep) {
259 save_and_next(ls); /* skip 2nd `]' */
260 #if defined(LUA_COMPAT_LSTR) && LUA_COMPAT_LSTR == 2
261 cont--;
262 if (sep == 0 && cont >= 0) break;
263 #endif
264 goto endloop;
265 }
266 break;
267 }
268 case '\n':
269 case '\r': {
270 save(ls, '\n');
271 inclinenumber(ls);
272 if (!seminfo) luaZ_resetbuffer(ls->buff); /* avoid wasting space */
273 break;
274 }
275 default: {
276 if (seminfo) save_and_next(ls);
277 else next(ls);
278 }
279 }
280 } endloop:
281 if (seminfo)
282 seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + (2 + sep),
283 luaZ_bufflen(ls->buff) - 2*(2 + sep));
284 }
285
286
read_string(LexState * ls,int del,SemInfo * seminfo)287 static void read_string (LexState *ls, int del, SemInfo *seminfo) {
288 save_and_next(ls);
289 while (ls->current != del) {
290 switch (ls->current) {
291 case EOZ:
292 luaX_lexerror(ls, "unfinished string", TK_EOS);
293 continue; /* to avoid warnings */
294 case '\n':
295 case '\r':
296 luaX_lexerror(ls, "unfinished string", TK_STRING);
297 continue; /* to avoid warnings */
298 case '\\': {
299 int c;
300 next(ls); /* do not save the `\' */
301 switch (ls->current) {
302 case 'a': c = '\a'; break;
303 case 'b': c = '\b'; break;
304 case 'f': c = '\f'; break;
305 case 'n': c = '\n'; break;
306 case 'r': c = '\r'; break;
307 case 't': c = '\t'; break;
308 case 'v': c = '\v'; break;
309 case '\n': /* go through */
310 case '\r': save(ls, '\n'); inclinenumber(ls); continue;
311 case EOZ: continue; /* will raise an error next loop */
312 default: {
313 if (!Common::isDigit(ls->current))
314 save_and_next(ls); /* handles \\, \", \', and \? */
315 else { /* \xxx */
316 int i = 0;
317 c = 0;
318 do {
319 c = 10*c + (ls->current-'0');
320 next(ls);
321 } while (++i<3 && Common::isDigit(ls->current));
322 if (c > UCHAR_MAX)
323 luaX_lexerror(ls, "escape sequence too large", TK_STRING);
324 save(ls, c);
325 }
326 continue;
327 }
328 }
329 save(ls, c);
330 next(ls);
331 continue;
332 }
333 default:
334 save_and_next(ls);
335 }
336 }
337 save_and_next(ls); /* skip delimiter */
338 seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1,
339 luaZ_bufflen(ls->buff) - 2);
340 }
341
342
llex(LexState * ls,SemInfo * seminfo)343 static int llex (LexState *ls, SemInfo *seminfo) {
344 luaZ_resetbuffer(ls->buff);
345 for (;;) {
346 switch (ls->current) {
347 case '\n':
348 case '\r': {
349 inclinenumber(ls);
350 continue;
351 }
352 case '-': {
353 next(ls);
354 if (ls->current != '-') return '-';
355 /* else is a comment */
356 next(ls);
357 if (ls->current == '[') {
358 int sep = skip_sep(ls);
359 luaZ_resetbuffer(ls->buff); /* `skip_sep' may dirty the buffer */
360 if (sep >= 0) {
361 read_long_string(ls, NULL, sep); /* long comment */
362 luaZ_resetbuffer(ls->buff);
363 continue;
364 }
365 }
366 /* else short comment */
367 while (!currIsNewline(ls) && ls->current != EOZ)
368 next(ls);
369 continue;
370 }
371 case '[': {
372 int sep = skip_sep(ls);
373 if (sep >= 0) {
374 read_long_string(ls, seminfo, sep);
375 return TK_STRING;
376 }
377 else if (sep == -1)
378 return '[';
379 luaX_lexerror(ls, "invalid long string delimiter", TK_STRING);
380 break;
381 }
382 case '=': {
383 next(ls);
384 if (ls->current != '=') return '=';
385 else { next(ls); return TK_EQ; }
386 }
387 case '<': {
388 next(ls);
389 if (ls->current != '=') return '<';
390 else { next(ls); return TK_LE; }
391 }
392 case '>': {
393 next(ls);
394 if (ls->current != '=') return '>';
395 else { next(ls); return TK_GE; }
396 }
397 case '~': {
398 next(ls);
399 if (ls->current != '=') return '~';
400 else { next(ls); return TK_NE; }
401 }
402 case '"':
403 case '\'': {
404 read_string(ls, ls->current, seminfo);
405 return TK_STRING;
406 }
407 case '.': {
408 save_and_next(ls);
409 if (check_next(ls, ".")) {
410 if (check_next(ls, "."))
411 return TK_DOTS; /* ... */
412 else return TK_CONCAT; /* .. */
413 }
414 else if (!Common::isDigit(ls->current)) return '.';
415 else {
416 read_numeral(ls, seminfo);
417 return TK_NUMBER;
418 }
419 }
420 case EOZ: {
421 return TK_EOS;
422 }
423 default: {
424 if (Common::isSpace(ls->current)) {
425 lua_assert(!currIsNewline(ls));
426 next(ls);
427 continue;
428 }
429 else if (Common::isDigit(ls->current)) {
430 read_numeral(ls, seminfo);
431 return TK_NUMBER;
432 }
433 else if (Common::isAlpha(ls->current) || ls->current == '_') {
434 /* identifier or reserved word */
435 TString *ts;
436 do {
437 save_and_next(ls);
438 } while (Common::isAlnum(ls->current) || ls->current == '_');
439 ts = luaX_newstring(ls, luaZ_buffer(ls->buff),
440 luaZ_bufflen(ls->buff));
441 if (ts->tsv.reserved > 0) /* reserved word? */
442 return ts->tsv.reserved - 1 + FIRST_RESERVED;
443 else {
444 seminfo->ts = ts;
445 return TK_NAME;
446 }
447 }
448 else {
449 int c = ls->current;
450 next(ls);
451 return c; /* single-char tokens (+ - / ...) */
452 }
453 }
454 }
455 }
456 }
457
458
luaX_next(LexState * ls)459 void luaX_next (LexState *ls) {
460 ls->lastline = ls->linenumber;
461 if (ls->lookahead.token != TK_EOS) { /* is there a look-ahead token? */
462 ls->t = ls->lookahead; /* use this one */
463 ls->lookahead.token = TK_EOS; /* and discharge it */
464 }
465 else
466 ls->t.token = llex(ls, &ls->t.seminfo); /* read next token */
467 }
468
469
luaX_lookahead(LexState * ls)470 void luaX_lookahead (LexState *ls) {
471 lua_assert(ls->lookahead.token == TK_EOS);
472 ls->lookahead.token = llex(ls, &ls->lookahead.seminfo);
473 }
474