1 /*
2 ** $Id: llex.cpp 940 2008-07-26 19:30:44Z aquadran $
3 ** Lexical Analizer
4 ** See Copyright Notice in lua.h
5 */
6
7
8 #include "lauxlib.h"
9 #include "llex.h"
10 #include "lmem.h"
11 #include "lobject.h"
12 #include "lparser.h"
13 #include "lstate.h"
14 #include "lstring.h"
15 #include "luadebug.h"
16 #include "lzio.h"
17
18
19
20 int32 lua_debug=0;
21
22
23 #define next(LS) (LS->current = zgetc(LS->lex_z))
24
25
26 #define save(c) luaL_addchar(c)
27 #define save_and_next(LS) (save(LS->current), next(LS))
28
29
30 const char *reserved [] = {"and", "do", "else", "elseif", "end", "function",
31 "if", "local", "nil", "not", "or", "repeat", "return", "then",
32 "until", "while"};
33
34
luaX_init(void)35 void luaX_init (void)
36 {
37 uint32 i;
38 for (i=0; i<(sizeof(reserved)/sizeof(reserved[0])); i++) {
39 TaggedString *ts = luaS_new(reserved[i]);
40 ts->head.marked = FIRST_RESERVED+i; /* reserved word (always > 255) */
41 }
42 }
43
44
luaX_syntaxerror(LexState * ls,const char * s,const char * token)45 void luaX_syntaxerror (LexState *ls, const char *s, const char *token) {
46 if (token[0] == 0)
47 token = "<eof>";
48 luaL_verror("%.100s;\n last token read: `%.50s' at line %d in chunk `%.50s'",
49 s, token, ls->linenumber, zname(ls->lex_z));
50 }
51
52
luaX_error(LexState * ls,const char * s)53 void luaX_error (LexState *ls, const char *s) {
54 save(0);
55 luaX_syntaxerror(ls, s, luaL_buffer());
56 }
57
58
luaX_token2str(LexState *,int32 token,char * s)59 void luaX_token2str (LexState * /*ls*/, int32 token, char *s) {
60 if (token < 255) {
61 s[0] = token;
62 s[1] = 0;
63 }
64 else
65 strcpy(s, reserved[token-FIRST_RESERVED]);
66 }
67
68
luaX_invalidchar(LexState * ls,int32 c)69 static void luaX_invalidchar (LexState *ls, int32 c) {
70 char buff[10];
71 sprintf(buff, "0x%X", (int)c);
72 luaX_syntaxerror(ls, "invalid control char", buff);
73 }
74
75
firstline(LexState * LS)76 static void firstline (LexState *LS)
77 {
78 int32 c = zgetc(LS->lex_z);
79 if (c == '#')
80 while ((c=zgetc(LS->lex_z)) != '\n' && c != EOZ) /* skip first line */;
81 zungetc(LS->lex_z);
82 }
83
84
luaX_setinput(LexState * LS,ZIO * z)85 void luaX_setinput (LexState *LS, ZIO *z)
86 {
87 LS->current = '\n';
88 LS->linenumber = 0;
89 LS->iflevel = 0;
90 LS->ifstate[0].skip = 0;
91 LS->ifstate[0].elsepart = 1; /* to avoid a free $else */
92 LS->lex_z = z;
93 LS->fs = NULL;
94 firstline(LS);
95 luaL_resetbuffer();
96 }
97
98
99
100 /*
101 ** =======================================================
102 ** PRAGMAS
103 ** =======================================================
104 */
105
106 #define PRAGMASIZE 20
107
skipspace(LexState * LS)108 static void skipspace (LexState *LS)
109 {
110 while (LS->current == ' ' || LS->current == '\t' || LS->current == '\r')
111 next(LS);
112 }
113
114
checkcond(LexState * LS,char * buff)115 static int32 checkcond (LexState *LS, char *buff)
116 {
117 static const char *opts[] = {"nil", "1", NULL};
118 int32 i = luaL_findstring(buff, opts);
119 if (i >= 0) return i;
120 else if (isalpha((byte)buff[0]) || buff[0] == '_')
121 return luaS_globaldefined(buff);
122 else {
123 luaX_syntaxerror(LS, "invalid $if condition", buff);
124 return 0; /* to avoid warnings */
125 }
126 }
127
128
readname(LexState * LS,char * buff)129 static void readname (LexState *LS, char *buff)
130 {
131 int32 i = 0;
132 skipspace(LS);
133 while (isalnum(LS->current) || LS->current == '_') {
134 if (i >= PRAGMASIZE) {
135 buff[PRAGMASIZE] = 0;
136 luaX_syntaxerror(LS, "pragma too long", buff);
137 }
138 buff[i++] = LS->current;
139 next(LS);
140 }
141 buff[i] = 0;
142 }
143
144
145 static void inclinenumber (LexState *LS);
146
147
ifskip(LexState * LS)148 static void ifskip (LexState *LS)
149 {
150 while (LS->ifstate[LS->iflevel].skip) {
151 if (LS->current == '\n')
152 inclinenumber(LS);
153 else if (LS->current == EOZ)
154 luaX_error(LS, "input ends inside a $if");
155 else next(LS);
156 }
157 }
158
159
inclinenumber(LexState * LS)160 static void inclinenumber (LexState *LS)
161 {
162 static const char *pragmas [] =
163 {"debug", "nodebug", "endinput", "end", "ifnot", "if", "else", NULL};
164 next(LS); /* skip '\n' */
165 ++LS->linenumber;
166 if (LS->current == '$') { /* is a pragma? */
167 char buff[PRAGMASIZE+1];
168 int32 ifnot = 0;
169 int32 skip = LS->ifstate[LS->iflevel].skip;
170 next(LS); /* skip $ */
171 readname(LS, buff);
172 switch (luaL_findstring(buff, pragmas)) {
173 case 0: /* debug */
174 if (!skip) lua_debug = 1;
175 break;
176 case 1: /* nodebug */
177 if (!skip) lua_debug = 0;
178 break;
179 case 2: /* endinput */
180 if (!skip) {
181 LS->current = EOZ;
182 LS->iflevel = 0; /* to allow $endinput inside a $if */
183 }
184 break;
185 case 3: /* end */
186 if (LS->iflevel-- == 0)
187 luaX_syntaxerror(LS, "unmatched $end", "$end");
188 break;
189 case 4: /* ifnot */
190 ifnot = 1;
191 /* go through */
192 case 5: /* if */
193 if (LS->iflevel == MAX_IFS-1)
194 luaX_syntaxerror(LS, "too many nested $ifs", "$if");
195 readname(LS, buff);
196 LS->iflevel++;
197 LS->ifstate[LS->iflevel].elsepart = 0;
198 LS->ifstate[LS->iflevel].condition = checkcond(LS, buff) ? !ifnot : ifnot;
199 LS->ifstate[LS->iflevel].skip = skip || !LS->ifstate[LS->iflevel].condition;
200 break;
201 case 6: /* else */
202 if (LS->ifstate[LS->iflevel].elsepart)
203 luaX_syntaxerror(LS, "unmatched $else", "$else");
204 LS->ifstate[LS->iflevel].elsepart = 1;
205 LS->ifstate[LS->iflevel].skip = LS->ifstate[LS->iflevel-1].skip ||
206 LS->ifstate[LS->iflevel].condition;
207 break;
208 default:
209 luaX_syntaxerror(LS, "unknown pragma", buff);
210 }
211 skipspace(LS);
212 if (LS->current == '\n') /* pragma must end with a '\n' ... */
213 inclinenumber(LS);
214 else if (LS->current != EOZ) /* or eof */
215 luaX_syntaxerror(LS, "invalid pragma format", buff);
216 ifskip(LS);
217 }
218 }
219
220
221 /*
222 ** =======================================================
223 ** LEXICAL ANALIZER
224 ** =======================================================
225 */
226
227
228
229
230
read_long_string(LexState * LS)231 static int32 read_long_string (LexState *LS)
232 {
233 int32 cont = 0;
234 while (1) {
235 switch (LS->current) {
236 case EOZ:
237 luaX_error(LS, "unfinished long string");
238 return EOS; /* to avoid warnings */
239 case '[':
240 save_and_next(LS);
241 if (LS->current == '[') {
242 cont++;
243 save_and_next(LS);
244 }
245 continue;
246 case ']':
247 save_and_next(LS);
248 if (LS->current == ']') {
249 if (cont == 0) goto endloop;
250 cont--;
251 save_and_next(LS);
252 }
253 continue;
254 case '\n':
255 save('\n');
256 inclinenumber(LS);
257 continue;
258 default:
259 save_and_next(LS);
260 }
261 } endloop:
262 save_and_next(LS); /* pass the second ']' */
263 LS->seminfo.ts = luaS_newlstr(L->Mbuffbase+2,
264 L->Mbuffnext-(L->Mbuffbase-L->Mbuffer)-4);
265 return STRING;
266 }
267
268
luaX_lex(LexState * LS)269 int32 luaX_lex (LexState *LS) {
270 double a;
271 luaL_resetbuffer();
272 while (1) {
273 switch (LS->current) {
274
275 case ' ': case '\t': case '\r': /* CR: to avoid problems with DOS */
276 next(LS);
277 continue;
278
279 case '\n':
280 inclinenumber(LS);
281 continue;
282
283 case '-':
284 save_and_next(LS);
285 if (LS->current != '-') return '-';
286 do { next(LS); } while (LS->current != '\n' && LS->current != EOZ);
287 luaL_resetbuffer();
288 continue;
289
290 case '[':
291 save_and_next(LS);
292 if (LS->current != '[') return '[';
293 else {
294 save_and_next(LS); /* pass the second '[' */
295 return read_long_string(LS);
296 }
297
298 case '=':
299 save_and_next(LS);
300 if (LS->current != '=') return '=';
301 else { save_and_next(LS); return EQ; }
302
303 case '<':
304 save_and_next(LS);
305 if (LS->current != '=') return '<';
306 else { save_and_next(LS); return LE; }
307
308 case '>':
309 save_and_next(LS);
310 if (LS->current != '=') return '>';
311 else { save_and_next(LS); return GE; }
312
313 case '~':
314 save_and_next(LS);
315 if (LS->current != '=') return '~';
316 else { save_and_next(LS); return NE; }
317
318 case '"':
319 case '\'': {
320 int32 del = LS->current;
321 save_and_next(LS);
322 while (LS->current != del) {
323 switch (LS->current) {
324 case EOZ:
325 case '\n':
326 luaX_error(LS, "unfinished string");
327 return EOS; /* to avoid warnings */
328 case '\\':
329 next(LS); /* do not save the '\' */
330 switch (LS->current) {
331 case 'a': save('\a'); next(LS); break;
332 case 'b': save('\b'); next(LS); break;
333 case 'f': save('\f'); next(LS); break;
334 case 'n': save('\n'); next(LS); break;
335 case 'r': save('\r'); next(LS); break;
336 case 't': save('\t'); next(LS); break;
337 case 'v': save('\v'); next(LS); break;
338 case '\n': save('\n'); inclinenumber(LS); break;
339 default : {
340 if (isdigit(LS->current)) {
341 int32 c = 0;
342 int32 i = 0;
343 do {
344 c = 10*c + (LS->current-'0');
345 next(LS);
346 } while (++i<3 && isdigit(LS->current));
347 if (c >= 256)
348 luaX_error(LS, "escape sequence too large");
349 save(c);
350 }
351 else { /* handles \, ", ', and ? */
352 save(LS->current);
353 next(LS);
354 }
355 break;
356 }
357 }
358 break;
359 default:
360 save_and_next(LS);
361 }
362 }
363 save_and_next(LS); /* skip delimiter */
364 LS->seminfo.ts = luaS_newlstr(L->Mbuffbase+1,
365 L->Mbuffnext-(L->Mbuffbase-L->Mbuffer)-2);
366 return STRING;
367 }
368
369 case '.':
370 save_and_next(LS);
371 if (LS->current == '.')
372 {
373 save_and_next(LS);
374 if (LS->current == '.')
375 {
376 save_and_next(LS);
377 return DOTS; /* ... */
378 }
379 else return CONC; /* .. */
380 }
381 else if (!isdigit(LS->current)) return '.';
382 /* LS->current is a digit: goes through to number */
383 a=0.0;
384 goto fraction;
385
386 case '0': case '1': case '2': case '3': case '4':
387 case '5': case '6': case '7': case '8': case '9':
388 a=0.0;
389 do {
390 a = 10.0*a + (LS->current-'0');
391 save_and_next(LS);
392 } while (isdigit(LS->current));
393 if (LS->current == '.') {
394 save_and_next(LS);
395 if (LS->current == '.') {
396 save('.');
397 luaX_error(LS,
398 "ambiguous syntax (decimal point x string concatenation)");
399 }
400 }
401 fraction:
402 { double da=0.1;
403 while (isdigit(LS->current))
404 {
405 a += (LS->current-'0')*da;
406 da /= 10.0;
407 save_and_next(LS);
408 }
409 if (toupper(LS->current) == 'E') {
410 int32 e = 0;
411 int32 neg;
412 double ea;
413 save_and_next(LS);
414 neg = (LS->current=='-');
415 if (LS->current == '+' || LS->current == '-') save_and_next(LS);
416 if (!isdigit(LS->current))
417 luaX_error(LS, "invalid numeral format");
418 do {
419 e = 10*e + (LS->current-'0');
420 save_and_next(LS);
421 } while (isdigit(LS->current));
422 for (ea=neg?0.1:10.0; e>0; e>>=1)
423 {
424 if (e & 1) a *= ea;
425 ea *= ea;
426 }
427 }
428 LS->seminfo.r = (real)a;
429 return NUMBER;
430 }
431
432 case EOZ:
433 if (LS->iflevel > 0)
434 luaX_error(LS, "input ends inside a $if");
435 return EOS;
436
437 default:
438 if (LS->current != '_' && !isalpha(LS->current)) {
439 int32 c = LS->current;
440 if (iscntrl(c))
441 luaX_invalidchar(LS, c);
442 save_and_next(LS);
443 return c;
444 }
445 else { /* identifier or reserved word */
446 TaggedString *ts;
447 do {
448 save_and_next(LS);
449 } while (isalnum(LS->current) || LS->current == '_');
450 save(0);
451 ts = luaS_new(L->Mbuffbase);
452 if (ts->head.marked >= 'A')
453 return ts->head.marked; /* reserved word */
454 LS->seminfo.ts = ts;
455 return NAME;
456 }
457 }
458 }
459 }
460
461