1 /* grecs - Gray's Extensible Configuration System       -*- c -*- */
2 %option nounput
3 %option noinput
4 %top {
5 #ifdef HAVE_CONFIG_H
6 # include <config.h>
7 #endif
8 }
9 %{
10 /* grecs - Gray's Extensible Configuration System
11    Copyright (C) 2007-2016 Sergey Poznyakoff
12 
13    Grecs is free software; you can redistribute it and/or modify it
14    under the terms of the GNU General Public License as published by the
15    Free Software Foundation; either version 3 of the License, or (at your
16    option) any later version.
17 
18    Grecs is distributed in the hope that it will be useful,
19    but WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21    GNU General Public License for more details.
22 
23    You should have received a copy of the GNU General Public License along
24    with Grecs. If not, see <http://www.gnu.org/licenses/>. */
25 
26 #include <grecs.h>
27 #include <grecs-gram.h>
28 #include <unistd.h>
29 #include <fcntl.h>
30 #include <ctype.h>
31 #include <stdlib.h>
32 #include <errno.h>
33 
34 #include <wordsplit.h>
35 
36 static char *multiline_delimiter;
37 static size_t multiline_delimiter_len;
38 static int multiline_unescape;         /* Unescape here-document contents */
39 static int (*char_to_strip)(char);    /* Strip matching characters of each
40 					  here-document line */
41 
42 struct grecs_locus_point grecs_current_locus_point;   /* Input file location */
43 /* Line correction.  Equals to the number of #line directives inserted into
44    the input by the preprocessor instance.  The external preprocessor, if
45    any, counts these as input lines and therefore the line numbers in *its*
46    #line directives are offset by the value of XLINES.
47 
48    Uff, running two preprocessors is confusing...
49 */
50 static size_t xlines;
51 
52 static void multiline_begin(char *);
53 static void multiline_add(char *);
54 static char *multiline_strip_tabs(char *text);
55 static int ident(void);
56 static int isemptystr(int off);
57 static void qstring_locus_fixup(void);
58 
59 #define qstring() \
60 	((grecs_parser_options & GRECS_OPTION_QUOTED_STRING_CONCAT)	\
61 	 ? QSTRING : STRING)
62 
63 #undef YY_INPUT
64 #define YY_INPUT(buf,result,max_size)					\
65 	do {								\
66 		if (grecs_preprocessor)					\
67 			result = fread(buf, 1, max_size, yyin);		\
68 		else							\
69 			result = grecs_preproc_fill_buffer(buf, max_size); \
70 	} while(0)
71 
72 #define YY_USER_ACTION do {						\
73 		if (YYSTATE == 0) {					\
74 			yylloc.beg = grecs_current_locus_point;		\
75 			yylloc.beg.col++;				\
76 		}							\
77   		grecs_current_locus_point.col += yyleng;		\
78  		yylloc.end = grecs_current_locus_point;			\
79    	} while (0);
80 
81 %}
82 
83 %x COMMENT ML STR
84 
85 WS [ \t\f][ \t\f]*
86 ID [a-zA-Z_][a-zA-Z_0-9-]*
87 P [1-9][0-9]*
88 
89 %%
90          /* C-style comments */
91 "/*"         BEGIN(COMMENT);
92 <COMMENT>[^*\n]*        /* eat anything that's not a '*' */
93 <COMMENT>"*"+[^*/\n]*   /* eat up '*'s not followed by '/'s */
94 <COMMENT>\n          grecs_locus_point_advance_line(grecs_current_locus_point);
95 <COMMENT>"*"+"/"        BEGIN(INITIAL);
96          /* Line directive */
97 ^[ \t]*#[ \t]*{P}[ \t]+\".*\".*\n { grecs_parse_line_directive_cpp(yytext,
98 						    &yylloc,
99 						    &grecs_current_locus_point,
100 						    &xlines); }
101 ^[ \t]*#[ \t]*line[ \t].*\n       { grecs_parse_line_directive(yytext,
102 							       &yylloc,
103 							       &grecs_current_locus_point,
104 							       &xlines); }
105          /* End-of-line comments */
106 #.*\n     { grecs_locus_point_advance_line(grecs_current_locus_point); }
107 #.*     /* end-of-file comment */;
108 "//".*\n  { grecs_locus_point_advance_line(grecs_current_locus_point); }
109 "//".*    /* end-of-file comment */;
110         /* Identifiers */
111 <INITIAL>{ID}           return ident();
112          /* Strings */
113 [a-zA-Z0-9_\.\*/:@\[\]-]([a-zA-Z0-9_\./:@\[\]-][a-zA-Z0-9_\.\*/:@\[\]-]*)? {
114 	                   grecs_line_begin();
115 	                   grecs_line_add(yytext, yyleng);
116                            yylval.string = grecs_line_finish();
117                            return STRING; }
118          /* Quoted strings */
119 \"[^\\"\n]*\"         { grecs_line_begin();
120                         grecs_line_add(yytext + 1, yyleng - 2);
121                         yylval.string = grecs_line_finish();
122                         qstring_locus_fixup();
123                         return qstring(); }
124 \"[^\\"\n]*\\\n        { BEGIN(STR);
125                          grecs_line_begin();
126 		         grecs_line_acc_grow_unescape_last(yytext + 1,
127                                                            yyleng - 1,
128                                                            &yylloc);
129                          grecs_locus_point_advance_line(grecs_current_locus_point); }
130 \"[^\\"\n]*\\.         { BEGIN(STR);
131                          grecs_line_begin();
132 		         grecs_line_acc_grow_unescape_last(yytext + 1,
133                                                            yyleng - 1,
134                                                            &yylloc); }
135 <STR>\"[^\\"\n]*\\\n  { grecs_line_acc_grow_unescape_last(yytext, yyleng,
136                                                           &yylloc);
137                         grecs_locus_point_advance_line(grecs_current_locus_point); }
138 <STR>[^\\"\n]*\\.     { grecs_line_acc_grow_unescape_last(yytext, yyleng,
139                                                           &yylloc); }
140 <STR>[^\\"\n]*\"      { BEGIN(INITIAL);
141                         if (yyleng > 1)
142 				grecs_line_add(yytext, yyleng - 1);
143                         yylval.string = grecs_line_finish();
144 			qstring_locus_fixup();
145 		        return qstring(); }
146          /* Multiline strings */
147 "<<"(-" "?)?\\?{ID}[ \t]*#.*\n |
148 "<<"(-" "?)?\\?{ID}[ \t]*"//".*\n |
149 "<<"(-" "?)?\\?{ID}[ \t]*\n |
150 "<<"(-" "?)?\"{ID}\"[ \t]*#.*\n |
151 "<<"(-" "?)?\"{ID}\"[ \t]*"//".*\n |
152 "<<"(-" "?)?\"{ID}\"[ \t]*\n {
153                 BEGIN(ML);
154 		multiline_begin(yytext+2); }
155          /* Ignore m4 line statements */
156 <ML>^"#line ".*\n {
157 	grecs_locus_point_advance_line(grecs_current_locus_point);
158 }
159 <ML>.*\n { char *p = multiline_strip_tabs(yytext);
160 
161            if (!strncmp(p, multiline_delimiter, multiline_delimiter_len)
162 	       && isemptystr(p + multiline_delimiter_len - yytext)) {
163 		   grecs_free(multiline_delimiter);
164 		   multiline_delimiter = NULL;
165 		   BEGIN(INITIAL);
166 		   yylval.string = grecs_line_finish();
167 
168 		   /* Update end pos */
169 		   yylloc.end.line--;
170 		   for (yylloc.end.col = 0,
171 				p = yylval.string + strlen(yylval.string) - 1;
172 			p > yylval.string && p[-1] != '\n';
173 			yylloc.end.col++, p--);
174 		   if (yylloc.end.col == 0)
175 			   yylloc.end.col = 1;
176 		   return MSTRING;
177 	   }
178            grecs_locus_point_advance_line(grecs_current_locus_point);
179 	   multiline_add(p); }
180 {WS}     ;
181          /* Other tokens */
182 \n       { grecs_locus_point_advance_line(grecs_current_locus_point); }
183 [,;{}()] return yytext[0];
184 .        { if (isascii(yytext[0]) && isprint(yytext[0]))
185 		grecs_error(&yylloc, 0,
186 			     _("stray character %c"), yytext[0]);
187            else
188 		grecs_error(&yylloc, 0,
189 			     _("stray character \\%03o"),
190 			       (unsigned char) yytext[0]); }
191 %%
192 
193 pid_t grecs_preproc_pid;
194 
195 int
196 yywrap()
197 {
198 	if (grecs_preprocessor) {
199 		grecs_preproc_extrn_shutdown(grecs_preproc_pid);
200 		fclose(yyin);
201 	} else
202 		grecs_preproc_done();
203 	grecs_current_locus_point.file = NULL;
204 	return 1;
205 }
206 
207 int
208 grecs_lex_begin(const char *name, int trace)
209 {
210 	yy_flex_debug = trace;
211 
212 	grecs_line_acc_create();
213 
214 	if (grecs_preprocessor) {
215 		int fd;
216 
217 		fd = open(name, O_RDONLY);
218 		if (fd == -1) {
219 			grecs_error(NULL, errno, _("Cannot open `%s'"), name);
220 			return 1;
221 		}
222 		close(fd);
223 
224 		yyin = grecs_preproc_extrn_start(name, &grecs_preproc_pid);
225 		if (!yyin) {
226 			grecs_error(NULL, errno,
227 				     _("Unable to start external preprocessor `%s'"),
228 				     grecs_preprocessor);
229 			return 1;
230 		}
231 	} else
232 		return grecs_preproc_init(name);
233 
234 	return 0;
235 }
236 
237 void
238 grecs_lex_end(int err)
239 {
240 	grecs_line_acc_free();
241 }
242 
243 static int
244 isemptystr(int off)
245 {
246 	for (; yytext[off] && isspace(yytext[off]); off++)
247 		;
248 	if (yytext[off] == ';') {
249 		int i;
250 		for (i = off + 1; yytext[i]; i++)
251 			if (!isspace(yytext[i]))
252 				return 0;
253 		yyless(off);
254 		return 1;
255 	}
256 	return yytext[off] == 0;
257 }
258 
259 char *
260 multiline_strip_tabs(char *text)
261 {
262 	if (char_to_strip)
263 		for (; *text && char_to_strip(*text); text++)
264 			;
265 	return text;
266 }
267 
268 static void
269 multiline_add(char *s)
270 {
271 	if (multiline_unescape) {
272 		for (; *s; s++)	{
273 			if (*s == '\\') {
274 				grecs_line_acc_grow_char_unescape(s[1]);
275 				++s;
276 			} else
277 				grecs_line_acc_grow_char(*s);
278 		}
279 	} else
280 		grecs_line_add(s, strlen(s));
281 }
282 
283 static int
284 is_tab(char c)
285 {
286 	return c == '\t';
287 }
288 
289 static int
290 is_ws(char c)
291 {
292 	return c == '\t' || c == ' ';
293 }
294 
295 void
296 multiline_begin(char *p)
297 {
298 	if (*p == '-') {
299 		if (*++p == ' ') {
300 			char_to_strip = is_ws;
301 			p++;
302 		} else
303 			char_to_strip = is_tab;
304 	} else
305 		char_to_strip = NULL;
306 	if (*p == '\\') {
307 		p++;
308 		multiline_unescape = 0;
309 		multiline_delimiter_len = strcspn(p, " \t");
310 	} else if (*p == '"') {
311 		char *q;
312 
313 		p++;
314 		multiline_unescape = 0;
315 		q = strchr(p, '"');
316 		multiline_delimiter_len = q - p;
317 	} else {
318 		multiline_delimiter_len = strcspn(p, " \t");
319 		multiline_unescape = 1;
320 	}
321 
322 	/* Remove trailing newline */
323 	multiline_delimiter_len--;
324 	multiline_delimiter = grecs_malloc(multiline_delimiter_len + 1);
325 	memcpy(multiline_delimiter, p, multiline_delimiter_len);
326 	multiline_delimiter[multiline_delimiter_len] = 0;
327 	grecs_line_begin();
328 
329 	/* Update locus */
330 	grecs_locus_point_advance_line(grecs_current_locus_point);
331 	yylloc.beg = grecs_current_locus_point;
332 	yylloc.beg.col++;
333 }
334 
335 static int
336 ident()
337 {
338 	char *p;
339 	char *str;
340 	size_t len;
341 
342 	for (p = yytext; *p && isspace(*p); p++)
343 		;
344 
345 	len = strlen(p);
346 	str = grecs_malloc(len + 1);
347 	strcpy(str, p);
348 	yylval.string = str;
349 	return IDENT;
350 }
351 
352 static void
353 qstring_locus_fixup()
354 {
355 	if (grecs_parser_options & GRECS_OPTION_ADJUST_STRING_LOCATIONS) {
356 		yylloc.beg.col++;
357 		yylloc.end.col--;
358 	}
359 }
360 
361 grecs_value_t *
362 grecs_value_ptr_from_static(grecs_value_t *input)
363 {
364 	grecs_value_t *ptr = grecs_malloc(sizeof(*ptr));
365 	*ptr = *input;
366 	return ptr;
367 }
368 
369 
370 static int
371 assign_locus(struct grecs_locus_point *ploc,
372 	     char *name, char *line, size_t *pxlines)
373 {
374 	char *p;
375 
376 	if (name) {
377 		if (pxlines && (!ploc->file || strcmp(name, ploc->file)))
378 			*pxlines = 0;
379 		ploc->file = grecs_install_text(name);
380 	}
381 	ploc->line = strtoul(line, &p, 10) - (pxlines ? *pxlines : 0);
382 	ploc->col = 0;
383 	return *p != 0;
384 }
385 
386 void
387 grecs_parse_line_directive(char *text, grecs_locus_t *ploc,
388 			   struct grecs_locus_point *ppoint, size_t *pxlines)
389 {
390 	int rc = 1;
391 	struct wordsplit ws;
392 
393 	if (wordsplit(text, &ws, WRDSF_DEFFLAGS))
394 		grecs_error(ploc, 0, _("cannot parse #line line: %s"),
395 			    wordsplit_strerror(&ws));
396 	else {
397 		if (ws.ws_wordc == 2)
398 			rc = assign_locus(ppoint, NULL,
399 					   ws.ws_wordv[1], pxlines);
400 		else if (ws.ws_wordc == 3)
401 			rc = assign_locus(ppoint, ws.ws_wordv[2],
402 					   ws.ws_wordv[1], pxlines);
403 		else if (ws.ws_wordc == 4) {
404 			rc = assign_locus(ppoint, ws.ws_wordv[2],
405 					   ws.ws_wordv[1], 0);
406 			if (pxlines && rc == 0) {
407 				char *p;
408 				unsigned long x = strtoul(ws.ws_wordv[3],
409 							   &p, 10);
410 				rc = *p != 0;
411 				if (rc == 0)
412 					*pxlines = x;
413 			}
414 		} else
415 			grecs_error(ploc, 0, _("invalid #line statement"));
416 
417 		if (rc)
418 			grecs_error(ploc, 0, _("malformed #line statement"));
419 		wordsplit_free(&ws);
420 	}
421 }
422 
423 void
424 grecs_parse_line_directive_cpp(char *text, grecs_locus_t *ploc,
425 			       struct grecs_locus_point *ppoint,
426 			       size_t *pxlines)
427 {
428 	struct wordsplit ws;
429 
430 	if (wordsplit(text, &ws, WRDSF_DEFFLAGS)) {
431 		grecs_error(ploc, 0, _("cannot parse #line line: %s"),
432 			    wordsplit_strerror(&ws));
433 		return;
434 	} else if (ws.ws_wordc < 3)
435 		grecs_error(ploc, 0, _("invalid #line statement"));
436 	else {
437 		if (assign_locus(ppoint, ws.ws_wordv[2],
438 				 ws.ws_wordv[1], pxlines))
439 			grecs_error(ploc, 0, _("malformed #line statement"));
440 	}
441 	wordsplit_free(&ws);
442 }
443 
444