1 /* -*- Mode: C -*-
2  * GObject introspection: C lexer
3  *
4  * Copyright (c) 1997 Sandro Sigala  <ssigala@globalnet.it>
5  * Copyright (c) 2007-2008 Jürg Billeter  <j@bitron.ch>
6  *
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 %{
31 #include <ctype.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 
35 #include "scanner.h"
36 #include "scannerparser.h"
37 #include "grealpath.h"
38 
39 int lineno;
40 char linebuf[2000];
41 
42 #undef YY_BUF_SIZE
43 #define YY_BUF_SIZE 1048576
44 
45 extern int yylex (GIGenerator *igenerator);
46 #define YY_DECL int yylex (GIGenerator *igenerator)
47 #define yywrap() (1)
48 #define YY_SKIP_YYWRAP
49 static void parse_comment (GIGenerator *igenerator);
50 static void process_linemarks (GIGenerator *igenerator, gboolean has_line);
51 static int check_identifier (GIGenerator *igenerator, const char *);
52 static int parse_ignored_macro (void);
53 static void print_error (GIGenerator *igenerator);
54 %}
55 
56 %option nounput
57 
58 intsuffix				([uU][lL]?[lL]?)|([lL][lL]?[uU]?)
59 fracconst				([0-9]*\.[0-9]+)|([0-9]+\.)
60 exppart					[eE][-+]?[0-9]+
61 floatsuffix				[fFlL]
62 chartext				([^\\\'])|(\\.)
63 stringtext				([^\\\"])|(\\.)
64 
65 %%
66 
67 \n.*					{ strncpy(linebuf, yytext+1, sizeof(linebuf)); /* save the next line */
68 						linebuf[sizeof(linebuf)-1]='\0';
69 						/* printf("%4d:%s\n",lineno,linebuf); */
70 						yyless(1);      /* give back all but the \n to rescan */
71 						++lineno;
72 					}
73 "\\\n"					{ ++lineno; }
74 
75 [\t\f\v\r ]+				{ /* Ignore whitespace. */ }
76 
77 "/*"					{ parse_comment(igenerator); }
78 "//".*					{ /* Ignore C++ style comments. */ }
79 
80 "#define "[a-zA-Z_][a-zA-Z_0-9]*"("	{ yyless (yyleng - 1); return FUNCTION_MACRO; }
81 "#define "[a-zA-Z_][a-zA-Z_0-9]*	{ return OBJECT_MACRO; }
82 
83 "# "[0-9]+" ".*"\n"			{ process_linemarks(igenerator, FALSE); }
84 "#line "[0-9]+" ".*"\n"			{ process_linemarks(igenerator, TRUE); }
85 "#"			                { }
86 "{"					{ return '{'; }
87 "<%"					{ return '{'; }
88 "}"					{ return '}'; }
89 "%>"					{ return '}'; }
90 "["					{ return '['; }
91 "<:"					{ return '['; }
92 "]"					{ return ']'; }
93 ":>"					{ return ']'; }
94 "("					{ return '('; }
95 ")"					{ return ')'; }
96 ";"					{ return ';'; }
97 ":"					{ return ':'; }
98 "..."					{ return ELLIPSIS; }
99 "?"					{ return '?'; }
100 "."					{ return '.'; }
101 "+"					{ return '+'; }
102 "-"					{ return '-'; }
103 "*"					{ return '*'; }
104 "/"					{ return '/'; }
105 "%"					{ return '%'; }
106 "^"					{ return '^'; }
107 "&"					{ return '&'; }
108 "|"					{ return '|'; }
109 "~"					{ return '~'; }
110 "!"					{ return '!'; }
111 "="					{ return '='; }
112 "<"					{ return '<'; }
113 ">"					{ return '>'; }
114 "+="					{ return ADDEQ; }
115 "-="					{ return SUBEQ; }
116 "*="					{ return MULEQ; }
117 "/="					{ return DIVEQ; }
118 "%="					{ return MODEQ; }
119 "^="					{ return XOREQ; }
120 "&="					{ return ANDEQ; }
121 "|="					{ return OREQ; }
122 "<<"					{ return SL; }
123 ">>"					{ return SR; }
124 "<<="					{ return SLEQ; }
125 ">>="					{ return SREQ; }
126 "=="					{ return EQ; }
127 "!="					{ return NOTEQ; }
128 "<="					{ return LTEQ; }
129 ">="					{ return GTEQ; }
130 "&&"					{ return ANDAND; }
131 "||"					{ return OROR; }
132 "++"					{ return PLUSPLUS; }
133 "--"					{ return MINUSMINUS; }
134 ","					{ return ','; }
135 "->"					{ return ARROW; }
136 
137 "__asm"[\t\f\v\r ]+"volatile"  	        { if (!parse_ignored_macro()) REJECT; }
138 "__asm__"[\t\f\v\r ]+"volatile"	        { if (!parse_ignored_macro()) REJECT; }
139 "__asm__"[\t\f\v\r ]+"__volatile__"	{ if (!parse_ignored_macro()) REJECT; }
140 "__asm" 	        	        { if (!parse_ignored_macro()) REJECT; }
141 "__asm__" 	        	        { if (!parse_ignored_macro()) REJECT; }
142 "__attribute__" 		        { if (!parse_ignored_macro()) REJECT; }
143 "__attribute" 		                { if (!parse_ignored_macro()) REJECT; }
144 "__const"                               { return CONST; }
145 "__extension__"                         { /* Ignore */ }
146 "__inline__"                            { return INLINE; }
147 "__inline"				{ return INLINE; }
148 "__nonnull" 			        { if (!parse_ignored_macro()) REJECT; }
149 "_Nonnull"				{ /* Ignore */ }
150 "_Nullable"				{ /* Ignore */ }
151 "_Null_unspecified"			{ /* Ignore */ }
152 "_Noreturn" 			        { /* Ignore */ }
153 "__signed"				{ return SIGNED; }
154 "__signed__"				{ return SIGNED; }
155 "__restrict"				{ return RESTRICT; }
156 "__restrict__"				{ return RESTRICT; }
157 "__typeof"				{ if (!parse_ignored_macro()) REJECT; }
158 "__volatile"	        	        { return VOLATILE; }
159 "__volatile__"	        	        { return VOLATILE; }
160 "_Bool"					{ return BOOL; }
161 "typedef char __static_assert_t".*"\n"	{ /* Ignore */ }
162 "__cdecl" 	        	        { /* Ignore */ }
163 "__declspec(deprecated(".*"))"		{ /* Ignore */ }
164 "__declspec"[\t ]*"("[a-z\t ]+")"	{ /* Ignore */ }
165 "__stdcall" 			        { /* ignore */ }
166 "__w64"					{ /* ignore */ }
167 "__int64"				{ return INT; }
168 "_Float128"				{ return FLOAT; }
169 
170 
171 [a-zA-Z_][a-zA-Z_0-9]*			{ if (igenerator->macro_scan) return check_identifier(igenerator, yytext); else REJECT; }
172 
173 "asm"           		        { if (!parse_ignored_macro()) REJECT; }
174 
175 "auto"					{ return AUTO; }
176 "bool"					{ return BOOL; }
177 "break"					{ return BREAK; }
178 "case"					{ return CASE; }
179 "char"					{ return CHAR; }
180 "const"					{ return CONST; }
181 "continue"				{ return CONTINUE; }
182 "default"				{ return DEFAULT; }
183 "do"					{ return DO; }
184 "double"				{ return DOUBLE; }
185 "else"					{ return ELSE; }
186 "enum"					{ return ENUM; }
187 "extern"				{ return EXTERN; }
188 "float"					{ return FLOAT; }
189 "for"					{ return FOR; }
190 "goto"					{ return GOTO; }
191 "if"					{ return IF; }
192 "inline"				{ return INLINE; }
193 "int"					{ return INT; }
194 "__uint128_t"				{ return INT; }
195 "__int128_t"				{ return INT; }
196 "__uint128"				{ return INT; }
197 "__int128"				{ return INT; }
198 "long"					{ return LONG; }
199 "register"				{ return REGISTER; }
200 "restrict"				{ return RESTRICT; }
201 "return"				{ return RETURN; }
202 "short"					{ return SHORT; }
203 "signed"				{ return SIGNED; }
204 "sizeof"				{ return SIZEOF; }
205 "static"				{ return STATIC; }
206 "struct"				{ return STRUCT; }
207 "switch"				{ return SWITCH; }
208 "typedef"				{ return TYPEDEF; }
209 "union"					{ return UNION; }
210 "unsigned"				{ return UNSIGNED; }
211 "void"					{ return VOID; }
212 "volatile"				{ return VOLATILE; }
213 "while"					{ return WHILE; }
214 
215 [a-zA-Z_][a-zA-Z_0-9]*			{ return check_identifier(igenerator, yytext); }
216 
217 "0"[xX][0-9a-fA-F]+{intsuffix}?		{ return INTEGER; }
218 "0"[0-7]+{intsuffix}?			{ return INTEGER; }
219 [0-9]+{intsuffix}?			{ return INTEGER; }
220 
221 {fracconst}{exppart}?{floatsuffix}?	{ return FLOATING; }
222 [0-9]+{exppart}{floatsuffix}?		{ return FLOATING; }
223 
224 "'"{chartext}*"'"			{ return CHARACTER; }
225 "L'"{chartext}*"'"			{ return CHARACTER; }
226 
227 "\""{stringtext}*"\""			{ return STRING; }
228 "L\""{stringtext}*"\""			{ return STRING; }
229 
230 .					{ print_error(igenerator); }
231 
232 %%
233 
234 static void parse_gtkdoc (GIGenerator *igenerator, int *c1, int *c2)
235 {
236   gboolean isline = FALSE;
237   gchar line[256];
238   int i;
239   gchar **parts;
240   CDirective *directive;
241   char *name, *value;
242 
243   i = 0;
244   do {
245       *c1 = *c2;
246       if (*c1 == '\n')
247 	{
248 	  isline = TRUE;
249 	  break;
250 	}
251       if (i >= 256)
252 	break;
253       line[i++] = *c1;
254       *c2 = input();
255 
256   } while (*c2 != EOF && !(*c1 == '*' && *c2 == '/'));
257 
258   if (!isline)
259     return;
260 
261   line[i] = '\0';
262 
263   parts = g_strsplit (line, ": ", 2);
264 
265   if (g_strv_length (parts) == 2)
266     {
267       name = parts[0];
268       value = parts[1];
269     }
270   else /* parts == 1 */
271     {
272       name = parts[0];
273       value = NULL;
274     }
275 
276   directive = cdirective_new (name, value);
277   igenerator->directives = g_slist_prepend (igenerator->directives,
278 					    directive);
279 
280   g_strfreev (parts);
281 }
282 
283 static void parse_comment (GIGenerator *igenerator)
284 {
285   int c1, c2;
286 
287   c1 = input();
288   c2 = input();
289 
290   while (c2 != EOF && !(c1 == '*' && c2 == '/'))
291     {
292       if (c1 == '\n')
293 	++lineno;
294       c1 = c2;
295       c2 = input();
296 
297       if (c1 == ' ' && c2 == '@')
298 	{
299 	  c1 = c2;
300 	  c2 = input();
301 	  parse_gtkdoc (igenerator, &c1, &c2);
302 	}
303     }
304 }
305 
306 static int
307 check_identifier (GIGenerator *igenerator,
308 		  const char *s)
309 {
310 	/*
311 	 * This function checks if `s' is a type name or an
312 	 * identifier.
313 	 */
314 
315 	if (g_igenerator_is_typedef (igenerator, s)) {
316 		return TYPEDEF_NAME;
317 	} else if (strcmp (s, "__builtin_va_list") == 0) {
318 		return TYPEDEF_NAME;
319 	}
320 
321 	return IDENTIFIER;
322 }
323 
324 /*
325  * # linenum "filename" flags
326  *  See http://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html
327  **/
328 
329 static void
330 process_linemarks (GIGenerator *igenerator, gboolean has_line)
331 {
332 	char escaped_filename[1025];
333 	char *filename;
334 	char *real;
335 
336 	if (has_line)
337 		sscanf(yytext, "#line %d \"%1024[^\"]\"", &lineno, escaped_filename);
338 	else
339 		sscanf(yytext, "# %d \"%1024[^\"]\"", &lineno, escaped_filename);
340 
341 	filename = g_strcompress (escaped_filename);
342 
343         real = g_realpath (filename);
344         if (real)
345           {
346             g_free (filename);
347             filename = real;
348           }
349 
350         if (igenerator->current_filename)
351           g_free (igenerator->current_filename);
352 	igenerator->current_filename = filename;
353 }
354 
355 /*
356  * This parses a macro which is ignored, such as
357  * __attribute__((x)) or __asm__ (x)
358  */
359 static int
360 parse_ignored_macro (void)
361 {
362 	int c;
363 	int nest;
364 
365 	while ((c = input ()) != EOF && isspace (c))
366 		;
367 	if (c != '(')
368 		return FALSE;
369 
370 	nest = 0;
371 	while ((c = input ()) != EOF && (nest > 0 || c != ')')) {
372 		if (c == '(')
373 			nest++;
374 		else if (c == ')')
375 			nest--;
376 		else if (c == '"') {
377 			while ((c = input ()) != EOF && c != '"') {
378 				if (c == '\\')
379 					c = input ();
380 			}
381 		} else if (c == '\'') {
382 			c = input ();
383 			if (c == '\\')
384 				c = input ();
385 			else if (c == '\'')
386 				return FALSE;
387 			c = input ();
388 			if (c != '\'')
389 				return FALSE;
390 		} else if (c == '\n')
391 			lineno++;
392 	}
393 
394 	return TRUE;
395 }
396 
397 static void
398 print_error (GIGenerator *igenerator)
399 {
400   if (yytext[0]) {
401     fprintf(stderr, "%s:%d: unexpected character `%c'\n", igenerator->current_filename, lineno, yytext[0]);
402   }
403 }
404