1 /*
2  * Copyright (c) 2001-2002 Secure Software, Inc
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
17  *
18  */
19 
20 %{
21 #include <string.h>
22 #include "tokens.h"
23 #include "engine.h"
24 
25 int perllexreal_column = 0;
26 int perllex_column = 0;
27 int perllex_lineno = 1;
28 
29 char *yyperlcomment = NULL; /* for consistency, not used */
30 
31 static void no_match(void);
32 static void gobble_pod(void);
33 static void count(void);
34 static void gobble_string(char c);
35 
36 #define YY_INPUT(buf, result, max_size)                                     \
37     if (((result = fread(buf, 1, max_size, yyin)) == 0) && ferror(yyin)) { \
38         YY_FATAL_ERROR("input in flex scanner failed");                     \
39     } else {                                                                  \
40         if (result) {                                                           \
41             char *c, *end = (buf) + result - 1;                                 \
42             for (c = (buf);  c < end;  c++) {                                   \
43                 if (*c == '\r') *c = ' ';                                       \
44                 if (*c == '\\' && *(c + 1) == '\n') {                           \
45                     memmove(c + 1, c + 2, end - c);                             \
46                     result--;                                                   \
47                     end--;                                                      \
48                     *c = '\r';                                                  \
49                 }                                                               \
50             }                                                                   \
51             if (*end == '\r') *end = ' ';                                       \
52             if (*end == '\\') {                                                 \
53                 result--;                                                       \
54                 fseek(yyin, -1, SEEK_CUR);                                      \
55             }                                                                   \
56         }                                                                       \
57     }
58 %}
59 
60 %%
61 
62 [\n\r]			{ count();perllex_lineno++; return TOKEN_NEWLINE; }
63 [ \t\v\f]		{ count(); }
64 ^[ \r\t]*"#".*\n	{ count();perllex_lineno++; }
65 "#".*			{ count(); }
66 
67 
68 
69 %{
70 
71 
72 /*
73 xor			{count(); return XOR; }
74 write			{count(); return WRITE; }
75 while			{count(); return WHILE; }
76 warn			{count(); return WARN; }
77 wantarray		{count(); return WANTARRAY; }
78 waitpid			{count(); return WAITPID; }
79 wait			{count(); return WAIT; }
80 vec			{count(); return VEC; }
81 values			{count(); return VALUES; }
82 utime			{count(); return UTIME; }
83 use			{count(); return USE; }
84 until			{count(); return UNTIL; }
85 untie			{count(); return UNTIE; }
86 unshift			{count(); return UNSHIFT; }
87 unpack			{count(); return UNPACK; }
88 unlink			{count(); return UNLINK; }
89 unless			{count(); return UNLESS; }
90 undef			{count(); return UNDEF; }
91 umask			{count(); return UMASK; }
92 ucfirst			{count(); return UCFIRST; }
93 uc			{count(); return UC; }
94 truncate		{count(); return TRUNCATE; }
95 tr			{count(); return TR; }
96 times			{count(); return TIMES; }
97 time			{count(); return TIME; }
98 tied			{count(); return TIED; }
99 tie			{count(); return TIE; }
100 telldir			{count(); return TELLDIR; }
101 tell			{count(); return TELL; }
102 syswrite		{count(); return SYSWRITE; }
103 system			{count(); return SYSTEM; }
104 sysseek			{count(); return SYSSEEK; }
105 sysread			{count(); return SYSREAD; }
106 sysopen			{count(); return SYSOPEN; }
107 syscall			{count(); return SYSCALL; }
108 symlink			{count(); return SYMLINK; }
109 substr			{count(); return SUBSTR; }
110 sub			{count(); return SUB; }
111 study			{count(); return STUDY; }
112 stat			{count(); return STAT; }
113 srand			{count(); return SRAND; }
114 sqrt			{count(); return SQRT; }
115 sprintf			{count(); return SPRINTF; }
116 split			{count(); return SPLIT; }
117 splice			{count(); return SPLICE; }
118 sort			{count(); return SORT; }
119 socketpair		{count(); return SOCKETPAIR; }
120 socket			{count(); return SOCKET; }
121 sleep			{count(); return SLEEP; }
122 sin			{count(); return SIN; }
123 shutdown		{count(); return SHUTDOWN; }
124 shmwrite		{count(); return SHMWRITE; }
125 shmread			{count(); return SHMREAD; }
126 shmget			{count(); return SHMGET; }
127 shmctl			{count(); return SHMCTL; }
128 shift			{count(); return SHIFT; }
129 setsockopt		{count(); return SETSOCKOPT; }
130 setservent		{count(); return SETSERVENT; }
131 setpwent		{count(); return SETPWENT; }
132 setprotoent		{count(); return SETPROTOENT; }
133 setpriority		{count(); return SETPRIORITY; }
134 setpgrp			{count(); return SETPGRP; }
135 setnetent		{count(); return SETNETENT; }
136 sethostent		{count(); return SETHOSTENT; }
137 setgrent		{count(); return SETGRENT; }
138 send			{count(); return SEND; }
139 semop			{count(); return SEMOP; }
140 semget			{count(); return SEMGET; }
141 semctl			{count(); return SEMCTL; }
142 select			{count(); return SELECT; }
143 seekdir			{count(); return SEEKDIR; }
144 seek			{count(); return SEEK; }
145 scalar			{count(); return SCALAR; }
146 rmdir			{count(); return RMDIR; }
147 rindex			{count(); return RINDEX; }
148 rewinddir		{count(); return REWINDDIR; }
149 reverse			{count(); return REVERSE; }
150 return			{count(); return RETURN; }
151 reset			{count(); return RESET; }
152 require			{count(); return REQUIRE; }
153 rename			{count(); return RENAME; }
154 ref			{count(); return REF; }
155 redo			{count(); return REDO; }
156 recv			{count(); return RECV; }
157 readpipe		{count(); return READPIPE; }
158 readlink		{count(); return READLINK; }
159 readline		{count(); return READLINE; }
160 readdir			{count(); return READDIR; }
161 read			{count(); return READ; }
162 rand			{count(); return RAND; }
163 qx			{count(); return QX; }
164 qw			{count(); return QW; }
165 quotemeta		{count(); return QUOTEMETA; }
166 qr			{count(); return QR; }
167 qq			{count(); return QQ; }
168 push			{count(); return PUSH; }
169 prototype		{count(); return PROTOTYPE; }
170 printf			{count(); return PRINTF; }
171 print			{count(); return PRINT; }
172 pos			{count(); return POS; }
173 pop			{count(); return POP; }
174 pipe			{count(); return PIPE; }
175 package			{count(); return PACKAGE; }
176 pack			{count(); return PACK; }
177 our			{count(); return OUR; }
178 ord			{count(); return ORD; }
179 or			{count(); return OR; }
180 opendir			{count(); return OPENDIR; }
181 open			{count(); return OPEN; }
182 oct			{count(); return OCT; }
183 not			{count(); return NOT; }
184 no			{count(); return NO; }
185 next			{count(); return NEXT; }
186 ne			{count(); return NE; }
187 my			{count(); return MY; }
188 msgsnd			{count(); return MSGSND; }
189 msgrcv			{count(); return MSGRCV; }
190 msgget			{count(); return MSGGET; }
191 msgctl			{count(); return MSGCTL; }
192 mkdir			{count(); return MKDIR; }
193 map			{count(); return MAP; }
194 lt			{count(); return LT; }
195 lstat			{count(); return LSTAT; }
196 log			{count(); return LOG; }
197 lock			{count(); return LOCK; }
198 localtime		{count(); return LOCALTIME; }
199 local			{count(); return LOCAL; }
200 listen			{count(); return LISTEN; }
201 link			{count(); return LINK; }
202 length			{count(); return LENGTH; }
203 le			{count(); return LE; }
204 lcfirst			{count(); return LCFIRST; }
205 lc			{count(); return LC; }
206 last			{count(); return LAST; }
207 kill			{count(); return KILL; }
208 keys			{count(); return KEYS; }
209 join			{count(); return JOIN; }
210 ioctl			{count(); return IOCTL; }
211 int			{count(); return INT; }
212 index			{count(); return INDEX; }
213 if			{count(); return IF; }
214 hex			{count(); return HEX; }
215 gt			{count(); return GT; }
216 grep			{count(); return GREP; }
217 goto			{count(); return GOTO; }
218 gmtime			{count(); return GMTIME; }
219 glob			{count(); return GLOB; }
220 getsockopt		{count(); return GETSOCKOPT; }
221 getsockname		{count(); return GETSOCKNAME; }
222 getservent		{count(); return GETSERVENT; }
223 getservbyport		{count(); return GETSERVBYPORT; }
224 getservbyname		{count(); return GETSERVBYNAME; }
225 getpwuid		{count(); return GETPWUID; }
226 getpwnam		{count(); return GETPWNAM; }
227 getpwent		{count(); return GETPWENT; }
228 getprotoent		{count(); return GETPROTOENT; }
229 getprotobynumber	{count(); return GETPROTOBYNUMBER; }
230 getprotobyname		{count(); return GETPROTOBYNAME; }
231 getpriority		{count(); return GETPRIORITY; }
232 getppid			{count(); return GETPPID; }
233 getpgrp			{count(); return GETPGRP; }
234 getpeername		{count(); return GETPEERNAME; }
235 getnetent		{count(); return GETNETENT; }
236 getnetbyname		{count(); return GETNETBYNAME; }
237 getnetbyaddr		{count(); return GETNETBYADDR; }
238 getlogin		{count(); return GETLOGIN; }
239 gethostent		{count(); return GETHOSTENT; }
240 gethostbyname		{count(); return GETHOSTBYNAME; }
241 gethostbyaddr		{count(); return GETHOSTBYADDR; }
242 getgrnam		{count(); return GETGRNAM; }
243 getgrgid		{count(); return GETGRGID; }
244 getgrent		{count(); return GETGRENT; }
245 getc			{count(); return GETC; }
246 ge			{count(); return GE; }
247 formline		{count(); return FORMLINE; }
248 format			{count(); return FORMAT; }
249 fork			{count(); return FORK; }
250 foreach			{count(); return FOREACH; }
251 for			{count(); return FOR; }
252 flock			{count(); return FLOCK; }
253 fileno			{count(); return FILENO; }
254 fcntl			{count(); return FCNTL; }
255 exp			{count(); return EXP; }
256 exit			{count(); return EXIT; }
257 exists			{count(); return EXISTS; }
258 exec			{count(); return EXEC; }
259 eval			{count(); return EVAL; }
260 eq			{count(); return EQ; }
261 eof			{count(); return EOF; }
262 endservent		{count(); return ENDSERVENT; }
263 endpwent		{count(); return ENDPWENT; }
264 endprotoent		{count(); return ENDPROTOENT; }
265 endnetent		{count(); return ENDNETENT; }
266 endhostent		{count(); return ENDHOSTENT; }
267 endgrent		{count(); return ENDGRENT; }
268 elsif			{count(); return ELSIF; }
269 else			{count(); return ELSE; }
270 each			{count(); return EACH; }
271 dump			{count(); return DUMP; }
272 do			{count(); return DO; }
273 die			{count(); return DIE; }
274 delete			{count(); return DELETE; }
275 defined			{count(); return DEFINED; }
276 dbmopen			{count(); return DBMOPEN; }
277 dbmclose		{count(); return DBMCLOSE; }
278 crypt			{count(); return CRYPT; }
279 cos			{count(); return COS; }
280 continue		{count(); return CONTINUE; }
281 connect			{count(); return CONNECT; }
282 cmp			{count(); return CMP; }
283 closedir		{count(); return CLOSEDIR; }
284 close			{count(); return CLOSE; }
285 chroot			{count(); return CHROOT; }
286 chr			{count(); return CHR; }
287 chown			{count(); return CHOWN; }
288 chop			{count(); return CHOP; }
289 chomp			{count(); return CHOMP; }
290 chmod			{count(); return CHMOD; }
291 chdir			{count(); return CHDIR; }
292 caller			{count(); return CALLER; }
293 bless			{count(); return BLESS; }
294 binmode			{count(); return BINMODE; }
295 bind			{count(); return BIND; }
296 atan2			{count(); return ATAN2; }
297 and			{count(); return AND; }
298 alarm			{count(); return ALARM; }
299 accept			{count(); return ACCEPT; }
300 abs			{count(); return ABS; }
301 */
302 %}
303 
304 __PACKAGE__ 		{count(); return TOKEN_PACKAGE; }
305 __LINE__                {count(); return TOKEN_LINE; }
306 __FILE__                {count(); return TOKEN_FILE; }
307 __END__                 {count(); return TOKEN_END; }
308 __DATA__                {count(); return TOKEN_DATA; }
309 NULL                    {count(); return TOKEN_NULL; }
310 
311 
312 "=head1"		{ count();gobble_pod(); return TOKEN_PERLPOD; }
313 "=head2"		 { count();gobble_pod(); return TOKEN_PERLPOD; }
314 "=back"			 { count();gobble_pod(); return TOKEN_PERLPOD; }
315 "=pod"			{ count();gobble_pod(); return TOKEN_PERLPOD; }
316 "=item"			{ count();gobble_pod();  return TOKEN_PERLPOD; }
317 ("'") 			{ count();gobble_string('\''); return TOKEN_QSTRING_LITERAL; }
318 ("\"")                  { count();gobble_string('"'); return TOKEN_QQSTRING_LITERAL; }
319 ("`")                  { count();gobble_string('`'); return TOKEN_BACKTICK_LITERAL; }
320 
321 
322 "/".*"/"		{count(); return TOKEN_REGEXP; }
323 0[xX][a-fA-F0-9]+     {count(); return TOKEN_HEX_CONST; }
324 0[0-9]+               {count(); return TOKEN_OCT_CONST; }
325 [0-9]+                {count(); return TOKEN_DEC_CONST; }
326 [0-9]+[Ee][+-]?[0-9]+              {count(); return TOKEN_FLOAT_CONST; }
327 [0-9]*"."[0-9]+([Ee][+-]?[0-9]+)?  {count(); return TOKEN_FLOAT_CONST; }
328 [0-9]+"."[0-9]*([Ee][+-]?[0-9]+)?  {count(); return TOKEN_FLOAT_CONST; }
329 
330 
331 ">>="			{count(); return TOKEN_RIGHT_ASSIGN; }
332 "<<="		        {count(); return TOKEN_LEFT_ASSIGN; }
333 
334 "**="			{count(); return TOKEN_EXP_ASSIGN; }
335 "+="			{count(); return TOKEN_ADD_ASSIGN; }
336 "-="			{count(); return TOKEN_SUB_ASSIGN; }
337 "*="			{count(); return TOKEN_MUL_ASSIGN; }
338 "/="			{count(); return TOKEN_DIV_ASSIGN; }
339 "%="			{count(); return TOKEN_MOD_ASSIGN; }
340 ".="                    {count(); return TOKEN_CONCAT_ASSIGN; }
341 "x="                    {count(); return TOKEN_REPEAT_ASSIGN; }
342 
343 "&="			{count(); return TOKEN_AND_ASSIGN; }
344 "|="			{count(); return TOKEN_OR_ASSIGN; }
345 "^="			{count(); return TOKEN_XOR_ASSIGN; }
346 ">>"                    {count(); return TOKEN_RIGHT_OP; }
347 "<<"                    {count(); return TOKEN_LEFT_OP; }
348 "**"                    {count(); return TOKEN_EXP_OP; }
349 "<="                    {count(); return TOKEN_LE_OP; }
350 ">="                    {count(); return TOKEN_GE_OP; }
351 "=="                    {count(); return TOKEN_EQ_OP; }
352 "!="                    {count(); return TOKEN_NE_OP; }
353 "<>"                    {count(); return TOKEN_NE_OP; }
354 "!"			{count(); return '!'; }
355 "?"			{count(); return '?'; }
356 "&"                     {count(); return '&'; }
357 "~"                     {count(); return '~'; }
358 "-"                     {count(); return '-'; }
359 "+"                     {count(); return '+'; }
360 "*"                     {count(); return '*'; }
361 "/"                     {count(); return '/'; }
362 "%"                     {count(); return '%'; }
363 "<"                     {count(); return '<'; }
364 ">"                     {count(); return '>'; }
365 "^"                     {count(); return '^'; }
366 "|"                     {count(); return '|'; }
367 
368 "("			{count(); return '('; }
369 ")"			{count(); return ')'; }
370 "["			{count(); return '['; }
371 "]"			{count(); return ']'; }
372 "{"			{count(); return '{'; }
373 "}"			{count(); return '}'; }
374 ","			{count(); return ','; }
375 ":"			{count(); return ':'; }
376 "."			{count(); return '.'; }
377 "="			{count(); return '='; }
378 ";"			{count(); return ';'; }
379 "x"                     {count(); return 'x'; }
380 "y"                     {count(); return 'y'; }
381 "s"                     {count(); return 's'; }
382 "q"                     {count(); return 'q'; }
383 "m"                     {count(); return 'm'; }
384 "\\"			{count(); return '\\';}
385 
386 
387 "$"[_&'`+*./|,\;#%=-~^:?!@"$<>)([\]]  {count();return TOKEN_ID_SCALAR;}
388 "$"[^][a-zA-Z]			{count(); return TOKEN_ID_SCALAR; }
389 "$"[0-9]*			{count(); return TOKEN_ID_SCALAR; }
390 "$"[a-zA-Z_$][a-zA-Z0-9_']*	{count(); return TOKEN_ID_SCALAR; }
391 "\\"*"@"[ \t]*"{"*["$a-zA-Z_]*[a-zA-Z0-9_'$]*"}"*	{count(); return TOKEN_ID_ARRAY; }
392 "%"[a-zA-Z_$][a-zA-Z0-9_']*	{count(); return TOKEN_ID_HASHT; }
393 [a-zA-Z_][a-zA-Z0-9_':]*	        {count(); return TOKEN_ID_HANDLE; }
394 
395 
396 .                       { count();no_match(); }
397 
398 %%
399 
400 int yywrap(void)
401 {
402     return 1;
403 }
404 
405 
406 static void
407 count()
408 {
409         int i;
410 
411         if (perllexreal_column != 0)
412         {
413           perllex_column = perllexreal_column+1;
414         }
415         for (i = 0; yytext[i] != '\0'; i++)
416         {
417                 if (yytext[i] == '\n')
418                 {
419                         perllexreal_column = 0;
420                         perllex_column = 0;
421                 } else if (yytext[i] == '\t') {
422                         perllexreal_column += 8 - (perllexreal_column % 8);
423                 }else {
424                         perllexreal_column++;
425                 }
426         }
427 }
428 
429 static
430 void gobble_string(char which)
431 {
432 
433   int bslash = 0;
434   char c;
435   while ((c = input()) && c != -1)
436   {
437 
438     perllexreal_column++;
439     switch(c)  {
440 
441       case '\\':
442                  if (!bslash)
443                    bslash = 1;
444                  else
445                    bslash = 0;
446                  break;
447       case '\n':
448                  perllexreal_column = 0;
449                  perllex_column = 0;
450                  perllex_lineno++;
451                  bslash = 0;
452                  break;
453       default:
454                  if (c == which && !bslash)  {
455                    return;
456                  }
457                  bslash = 0;
458                  break;
459     }
460   }
461 }
462 
463 
464 
465 static
466 void gobble_pod(void)
467 {
468 
469   int bline = 0;
470   int cstate = 0;
471   char c;
472   while ((c = input()) && c != -1)
473   {
474     perllexreal_column++;
475     switch(c)  {
476 
477       case '=':
478                 if (!bline)
479                   cstate = 1;
480                 break;
481       case '\n':
482                  perllexreal_column = 0;
483                  perllex_column = 0;
484                  perllex_lineno++;
485 
486                  bline = 0;
487                  if (cstate == 4)
488                    return;
489                  break;
490       case 'c':
491                  if (cstate == 1)
492                    cstate = 2;
493                  break;
494       case 'u':  if (cstate == 2)
495                    cstate = 3;
496                  break;
497       case 't':  if (cstate == 3)
498                    cstate = 4;
499                  break;
500       case ' ':
501       case '\t':
502                  if (cstate == 4)
503                    return;
504                  break;
505       default:
506                  bline++;
507                  cstate = 0;
508                  break;
509     }
510   }
511 }
512 
513 
514 
515 
516 static
517 void no_match(void)
518 {
519     fprintf(stderr, "%s:%d: warning: bad token `%s'\n", current_file, perllex_lineno, yytext);
520 }
521 
522 
523 
524 
525 
526 
527 
528 
529