1 /*
2 ** $Id: llex.cpp 940 2008-07-26 19:30:44Z aquadran $
3 ** Lexical Analizer
4 ** See Copyright Notice in lua.h
5 */
6 
7 
8 #include "lauxlib.h"
9 #include "llex.h"
10 #include "lmem.h"
11 #include "lobject.h"
12 #include "lparser.h"
13 #include "lstate.h"
14 #include "lstring.h"
15 #include "luadebug.h"
16 #include "lzio.h"
17 
18 
19 
20 int32 lua_debug=0;
21 
22 
23 #define next(LS) (LS->current = zgetc(LS->lex_z))
24 
25 
26 #define save(c)	luaL_addchar(c)
27 #define save_and_next(LS)  (save(LS->current), next(LS))
28 
29 
30 const char *reserved [] = {"and", "do", "else", "elseif", "end", "function",
31     "if", "local", "nil", "not", "or", "repeat", "return", "then",
32     "until", "while"};
33 
34 
luaX_init(void)35 void luaX_init (void)
36 {
37   uint32 i;
38   for (i=0; i<(sizeof(reserved)/sizeof(reserved[0])); i++) {
39     TaggedString *ts = luaS_new(reserved[i]);
40     ts->head.marked = FIRST_RESERVED+i;  /* reserved word  (always > 255) */
41   }
42 }
43 
44 
luaX_syntaxerror(LexState * ls,const char * s,const char * token)45 void luaX_syntaxerror (LexState *ls, const char *s, const char *token) {
46   if (token[0] == 0)
47     token = "<eof>";
48   luaL_verror("%.100s;\n  last token read: `%.50s' at line %d in chunk `%.50s'",
49               s, token, ls->linenumber, zname(ls->lex_z));
50 }
51 
52 
luaX_error(LexState * ls,const char * s)53 void luaX_error (LexState *ls, const char *s) {
54   save(0);
55   luaX_syntaxerror(ls, s, luaL_buffer());
56 }
57 
58 
luaX_token2str(LexState *,int32 token,char * s)59 void luaX_token2str (LexState * /*ls*/, int32 token, char *s) {
60   if (token < 255) {
61     s[0] = token;
62     s[1] = 0;
63   }
64   else
65     strcpy(s, reserved[token-FIRST_RESERVED]);
66 }
67 
68 
luaX_invalidchar(LexState * ls,int32 c)69 static void luaX_invalidchar (LexState *ls, int32 c) {
70   char buff[10];
71   sprintf(buff, "0x%X", (int)c);
72   luaX_syntaxerror(ls, "invalid control char", buff);
73 }
74 
75 
firstline(LexState * LS)76 static void firstline (LexState *LS)
77 {
78   int32 c = zgetc(LS->lex_z);
79   if (c == '#')
80     while ((c=zgetc(LS->lex_z)) != '\n' && c != EOZ) /* skip first line */;
81   zungetc(LS->lex_z);
82 }
83 
84 
luaX_setinput(LexState * LS,ZIO * z)85 void luaX_setinput (LexState *LS, ZIO *z)
86 {
87   LS->current = '\n';
88   LS->linenumber = 0;
89   LS->iflevel = 0;
90   LS->ifstate[0].skip = 0;
91   LS->ifstate[0].elsepart = 1;  /* to avoid a free $else */
92   LS->lex_z = z;
93   LS->fs = NULL;
94   firstline(LS);
95   luaL_resetbuffer();
96 }
97 
98 
99 
100 /*
101 ** =======================================================
102 ** PRAGMAS
103 ** =======================================================
104 */
105 
106 #define PRAGMASIZE	20
107 
skipspace(LexState * LS)108 static void skipspace (LexState *LS)
109 {
110   while (LS->current == ' ' || LS->current == '\t' || LS->current == '\r')
111     next(LS);
112 }
113 
114 
checkcond(LexState * LS,char * buff)115 static int32 checkcond (LexState *LS, char *buff)
116 {
117   static const char *opts[] = {"nil", "1", NULL};
118   int32 i = luaL_findstring(buff, opts);
119   if (i >= 0) return i;
120   else if (isalpha((byte)buff[0]) || buff[0] == '_')
121     return luaS_globaldefined(buff);
122   else {
123     luaX_syntaxerror(LS, "invalid $if condition", buff);
124     return 0;  /* to avoid warnings */
125   }
126 }
127 
128 
readname(LexState * LS,char * buff)129 static void readname (LexState *LS, char *buff)
130 {
131   int32 i = 0;
132   skipspace(LS);
133   while (isalnum(LS->current) || LS->current == '_') {
134     if (i >= PRAGMASIZE) {
135       buff[PRAGMASIZE] = 0;
136       luaX_syntaxerror(LS, "pragma too long", buff);
137     }
138     buff[i++] = LS->current;
139     next(LS);
140   }
141   buff[i] = 0;
142 }
143 
144 
145 static void inclinenumber (LexState *LS);
146 
147 
ifskip(LexState * LS)148 static void ifskip (LexState *LS)
149 {
150   while (LS->ifstate[LS->iflevel].skip) {
151     if (LS->current == '\n')
152       inclinenumber(LS);
153     else if (LS->current == EOZ)
154       luaX_error(LS, "input ends inside a $if");
155     else next(LS);
156   }
157 }
158 
159 
inclinenumber(LexState * LS)160 static void inclinenumber (LexState *LS)
161 {
162   static const char *pragmas [] =
163     {"debug", "nodebug", "endinput", "end", "ifnot", "if", "else", NULL};
164   next(LS);  /* skip '\n' */
165   ++LS->linenumber;
166   if (LS->current == '$') {  /* is a pragma? */
167     char buff[PRAGMASIZE+1];
168     int32 ifnot = 0;
169     int32 skip = LS->ifstate[LS->iflevel].skip;
170     next(LS);  /* skip $ */
171     readname(LS, buff);
172     switch (luaL_findstring(buff, pragmas)) {
173       case 0:  /* debug */
174         if (!skip) lua_debug = 1;
175         break;
176       case 1:  /* nodebug */
177         if (!skip) lua_debug = 0;
178         break;
179       case 2:  /* endinput */
180         if (!skip) {
181           LS->current = EOZ;
182           LS->iflevel = 0;  /* to allow $endinput inside a $if */
183         }
184         break;
185       case 3:  /* end */
186         if (LS->iflevel-- == 0)
187           luaX_syntaxerror(LS, "unmatched $end", "$end");
188         break;
189       case 4:  /* ifnot */
190         ifnot = 1;
191         /* go through */
192       case 5:  /* if */
193         if (LS->iflevel == MAX_IFS-1)
194           luaX_syntaxerror(LS, "too many nested $ifs", "$if");
195         readname(LS, buff);
196         LS->iflevel++;
197         LS->ifstate[LS->iflevel].elsepart = 0;
198         LS->ifstate[LS->iflevel].condition = checkcond(LS, buff) ? !ifnot : ifnot;
199         LS->ifstate[LS->iflevel].skip = skip || !LS->ifstate[LS->iflevel].condition;
200         break;
201       case 6:  /* else */
202         if (LS->ifstate[LS->iflevel].elsepart)
203           luaX_syntaxerror(LS, "unmatched $else", "$else");
204         LS->ifstate[LS->iflevel].elsepart = 1;
205         LS->ifstate[LS->iflevel].skip = LS->ifstate[LS->iflevel-1].skip ||
206                                       LS->ifstate[LS->iflevel].condition;
207         break;
208       default:
209         luaX_syntaxerror(LS, "unknown pragma", buff);
210     }
211     skipspace(LS);
212     if (LS->current == '\n')  /* pragma must end with a '\n' ... */
213       inclinenumber(LS);
214     else if (LS->current != EOZ)  /* or eof */
215       luaX_syntaxerror(LS, "invalid pragma format", buff);
216     ifskip(LS);
217   }
218 }
219 
220 
221 /*
222 ** =======================================================
223 ** LEXICAL ANALIZER
224 ** =======================================================
225 */
226 
227 
228 
229 
230 
read_long_string(LexState * LS)231 static int32 read_long_string (LexState *LS)
232 {
233   int32 cont = 0;
234   while (1) {
235     switch (LS->current) {
236       case EOZ:
237         luaX_error(LS, "unfinished long string");
238         return EOS;  /* to avoid warnings */
239       case '[':
240         save_and_next(LS);
241         if (LS->current == '[') {
242           cont++;
243           save_and_next(LS);
244         }
245         continue;
246       case ']':
247         save_and_next(LS);
248         if (LS->current == ']') {
249           if (cont == 0) goto endloop;
250           cont--;
251           save_and_next(LS);
252         }
253         continue;
254       case '\n':
255         save('\n');
256         inclinenumber(LS);
257         continue;
258       default:
259         save_and_next(LS);
260     }
261   } endloop:
262   save_and_next(LS);  /* pass the second ']' */
263   LS->seminfo.ts = luaS_newlstr(L->Mbuffbase+2,
264                           L->Mbuffnext-(L->Mbuffbase-L->Mbuffer)-4);
265   return STRING;
266 }
267 
268 
luaX_lex(LexState * LS)269 int32 luaX_lex (LexState *LS) {
270   double a;
271   luaL_resetbuffer();
272   while (1) {
273     switch (LS->current) {
274 
275       case ' ': case '\t': case '\r':  /* CR: to avoid problems with DOS */
276         next(LS);
277         continue;
278 
279       case '\n':
280         inclinenumber(LS);
281         continue;
282 
283       case '-':
284         save_and_next(LS);
285         if (LS->current != '-') return '-';
286         do { next(LS); } while (LS->current != '\n' && LS->current != EOZ);
287         luaL_resetbuffer();
288         continue;
289 
290       case '[':
291         save_and_next(LS);
292         if (LS->current != '[') return '[';
293         else {
294           save_and_next(LS);  /* pass the second '[' */
295           return read_long_string(LS);
296         }
297 
298       case '=':
299         save_and_next(LS);
300         if (LS->current != '=') return '=';
301         else { save_and_next(LS); return EQ; }
302 
303       case '<':
304         save_and_next(LS);
305         if (LS->current != '=') return '<';
306         else { save_and_next(LS); return LE; }
307 
308       case '>':
309         save_and_next(LS);
310         if (LS->current != '=') return '>';
311         else { save_and_next(LS); return GE; }
312 
313       case '~':
314         save_and_next(LS);
315         if (LS->current != '=') return '~';
316         else { save_and_next(LS); return NE; }
317 
318       case '"':
319       case '\'': {
320         int32 del = LS->current;
321         save_and_next(LS);
322         while (LS->current != del) {
323           switch (LS->current) {
324             case EOZ:
325             case '\n':
326               luaX_error(LS, "unfinished string");
327               return EOS;  /* to avoid warnings */
328             case '\\':
329               next(LS);  /* do not save the '\' */
330               switch (LS->current) {
331                 case 'a': save('\a'); next(LS); break;
332                 case 'b': save('\b'); next(LS); break;
333                 case 'f': save('\f'); next(LS); break;
334                 case 'n': save('\n'); next(LS); break;
335                 case 'r': save('\r'); next(LS); break;
336                 case 't': save('\t'); next(LS); break;
337                 case 'v': save('\v'); next(LS); break;
338                 case '\n': save('\n'); inclinenumber(LS); break;
339                 default : {
340                   if (isdigit(LS->current)) {
341                     int32 c = 0;
342                     int32 i = 0;
343                     do {
344                       c = 10*c + (LS->current-'0');
345                       next(LS);
346                     } while (++i<3 && isdigit(LS->current));
347                     if (c >= 256)
348                       luaX_error(LS, "escape sequence too large");
349                     save(c);
350                   }
351                   else {  /* handles \, ", ', and ? */
352                     save(LS->current);
353                     next(LS);
354                   }
355                   break;
356                 }
357               }
358               break;
359             default:
360               save_and_next(LS);
361           }
362         }
363         save_and_next(LS);  /* skip delimiter */
364         LS->seminfo.ts = luaS_newlstr(L->Mbuffbase+1,
365                                 L->Mbuffnext-(L->Mbuffbase-L->Mbuffer)-2);
366         return STRING;
367       }
368 
369       case '.':
370         save_and_next(LS);
371         if (LS->current == '.')
372         {
373           save_and_next(LS);
374           if (LS->current == '.')
375           {
376             save_and_next(LS);
377             return DOTS;   /* ... */
378           }
379           else return CONC;   /* .. */
380         }
381         else if (!isdigit(LS->current)) return '.';
382         /* LS->current is a digit: goes through to number */
383 	a=0.0;
384         goto fraction;
385 
386       case '0': case '1': case '2': case '3': case '4':
387       case '5': case '6': case '7': case '8': case '9':
388 	a=0.0;
389         do {
390           a = 10.0*a + (LS->current-'0');
391           save_and_next(LS);
392         } while (isdigit(LS->current));
393         if (LS->current == '.') {
394           save_and_next(LS);
395           if (LS->current == '.') {
396             save('.');
397             luaX_error(LS,
398               "ambiguous syntax (decimal point x string concatenation)");
399           }
400         }
401       fraction:
402 	{ double da=0.1;
403 	  while (isdigit(LS->current))
404 	  {
405             a += (LS->current-'0')*da;
406             da /= 10.0;
407             save_and_next(LS);
408           }
409           if (toupper(LS->current) == 'E') {
410 	    int32 e = 0;
411 	    int32 neg;
412 	    double ea;
413             save_and_next(LS);
414 	    neg = (LS->current=='-');
415             if (LS->current == '+' || LS->current == '-') save_and_next(LS);
416             if (!isdigit(LS->current))
417               luaX_error(LS, "invalid numeral format");
418             do {
419               e = 10*e + (LS->current-'0');
420               save_and_next(LS);
421             } while (isdigit(LS->current));
422 	    for (ea=neg?0.1:10.0; e>0; e>>=1)
423 	    {
424 	      if (e & 1) a *= ea;
425 	      ea *= ea;
426 	    }
427           }
428           LS->seminfo.r = (real)a;
429           return NUMBER;
430         }
431 
432       case EOZ:
433         if (LS->iflevel > 0)
434           luaX_error(LS, "input ends inside a $if");
435         return EOS;
436 
437       default:
438         if (LS->current != '_' && !isalpha(LS->current)) {
439           int32 c = LS->current;
440           if (iscntrl(c))
441             luaX_invalidchar(LS, c);
442           save_and_next(LS);
443           return c;
444         }
445         else {  /* identifier or reserved word */
446           TaggedString *ts;
447           do {
448             save_and_next(LS);
449           } while (isalnum(LS->current) || LS->current == '_');
450           save(0);
451           ts = luaS_new(L->Mbuffbase);
452           if (ts->head.marked >= 'A')
453             return ts->head.marked;  /* reserved word */
454           LS->seminfo.ts = ts;
455           return NAME;
456         }
457     }
458   }
459 }
460 
461