xref: /openbsd/usr.bin/make/lowparse.c (revision 8529ddd3)
1 /*	$OpenBSD: lowparse.c,v 1.33 2014/11/03 12:48:37 espie Exp $ */
2 
3 /* low-level parsing functions. */
4 
5 /*
6  * Copyright (c) 1999,2000 Marc Espie.
7  *
8  * Extensive code changes for the OpenBSD project.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE OPENBSD PROJECT AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OPENBSD
23  * PROJECT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <assert.h>
33 #include <stddef.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38 #include "config.h"
39 #include "defines.h"
40 #include "buf.h"
41 #include "lowparse.h"
42 #include "error.h"
43 #include "lst.h"
44 #include "memory.h"
45 #include "pathnames.h"
46 #ifndef LOCATION_TYPE
47 #include "location.h"
48 #endif
49 #include "var.h"
50 
51 
52 #define READ_MAKEFILES "MAKEFILE_LIST"
53 
54 /* Input stream structure: file or string.
55  * Files have str == NULL, F != NULL.
56  * Strings have F == NULL, str != NULL.
57  */
58 struct input_stream {
59 	Location origin;	/* Name of file and line number */
60 	FILE *F;		/* Open stream, or NULL if pure string. */
61 	char *str;		/* Input string, if F == NULL. */
62 
63 	/* Line buffer. */
64 	char *ptr;		/* Where we are. */
65 	char *end;		/* Don't overdo it. */
66 };
67 
68 static struct input_stream *current;	/* the input_stream being parsed. */
69 
70 static LIST input_stack;	/* Stack of input_stream waiting to be parsed
71 				 * (includes and loop reparses) */
72 
73 /* record gnode location for proper reporting at runtime */
74 static Location *post_parse = NULL;
75 
76 /* input_stream ctors.
77  *
78  * obj = new_input_file(filename, filehandle);
79  *	Create input stream from filename, filehandle. */
80 static struct input_stream *new_input_file(const char *, FILE *);
81 /* obj = new_input_string(str, origin);
82  *	Create input stream from str, origin. */
83 static struct input_stream *new_input_string(char *, const Location *);
84 /* free_input_stream(obj);
85  *	Discard consumed input stream, closing files, freeing memory.  */
86 static void free_input_stream(struct input_stream *);
87 
88 
89 /* Handling basic character reading.
90  * c = read_char();
91  *	New character c from current input stream, or EOF at end of stream. */
92 #define read_char()	\
93     current->ptr < current->end ? *current->ptr++ : grab_new_line_and_readchar()
94 /* char = grab_new_line_and_readchar();
95  *	Guts for read_char. Grabs a new line off fgetln when we have
96  *	consumed the current line and returns the first char, or EOF at end of
97  *	stream.  */
98 static int grab_new_line_and_readchar(void);
99 /* c = skip_to_end_of_line();
100  *	Skips to the end of the current line, returns either '\n' or EOF.  */
101 static int skip_to_end_of_line(void);
102 
103 
104 /* Helper functions to handle basic parsing. */
105 /* read_logical_line(buffer, firstchar);
106  *	Grabs logical line into buffer, the first character has already been
107  *	read into firstchar.  */
108 static void read_logical_line(Buffer, int);
109 
110 /* firstchar = ParseSkipEmptyLines(buffer);
111  *	Scans lines, skipping empty lines. May put some characters into
112  *	buffer, returns the first character useful to continue parsing
113  *	(e.g., not a backslash or a space. */
114 static int skip_empty_lines_and_read_char(Buffer);
115 
116 const char *curdir;
117 size_t curdir_len;
118 
119 void
120 Parse_setcurdir(const char *dir)
121 {
122 	curdir = dir;
123 	curdir_len = strlen(dir);
124 }
125 
126 static bool
127 startswith(const char *f, const char *s, size_t len)
128 {
129 	return strncmp(f, s, len) == 0 && f[len] == '/';
130 }
131 
132 static const char *
133 simplify(const char *filename)
134 {
135 	if (startswith(filename, curdir, curdir_len))
136 		return filename + curdir_len + 1;
137 	else if (startswith(filename, _PATH_DEFSYSPATH,
138 	    sizeof(_PATH_DEFSYSPATH)-1)) {
139 	    	size_t sz;
140 		char *buf;
141 		sz = strlen(filename) - sizeof(_PATH_DEFSYSPATH)+3;
142 		buf = emalloc(sz);
143 		snprintf(buf, sz, "<%s>", filename+sizeof(_PATH_DEFSYSPATH));
144 		return buf;
145 	} else
146 		return filename;
147 }
148 
149 static struct input_stream *
150 new_input_file(const char *name, FILE *stream)
151 {
152 	struct input_stream *istream;
153 
154 	istream = emalloc(sizeof(*istream));
155 	istream->origin.fname = simplify(name);
156 	Var_Append(READ_MAKEFILES, name);
157 	istream->str = NULL;
158 	/* Naturally enough, we start reading at line 0. */
159 	istream->origin.lineno = 0;
160 	istream->F = stream;
161 	istream->ptr = istream->end = NULL;
162 	return istream;
163 }
164 
165 static void
166 free_input_stream(struct input_stream *istream)
167 {
168 	if (istream->F && fileno(istream->F) != STDIN_FILENO)
169 		(void)fclose(istream->F);
170 	free(istream->str);
171 	/* Note we can't free the file names, as they are embedded in GN
172 	 * for error reports. */
173 	free(istream);
174 }
175 
176 static struct input_stream *
177 new_input_string(char *str, const Location *origin)
178 {
179 	struct input_stream *istream;
180 
181 	istream = emalloc(sizeof(*istream));
182 	/* No malloc, name is always taken from an already existing istream
183 	 * and strings are used in for loops, so we need to reset the line
184 	 * counter to an appropriate value. */
185 	istream->origin = *origin;
186 	istream->F = NULL;
187 	istream->ptr = istream->str = str;
188 	istream->end = str + strlen(str);
189 	return istream;
190 }
191 
192 
193 void
194 Parse_FromString(char *str, unsigned long lineno)
195 {
196 	Location origin;
197 
198 	origin.fname = current->origin.fname;
199 	origin.lineno = lineno;
200 	if (DEBUG(FOR))
201 		(void)fprintf(stderr, "%s\n----\n", str);
202 
203 	Lst_Push(&input_stack, current);
204 	assert(current != NULL);
205 	current = new_input_string(str, &origin);
206 }
207 
208 
209 void
210 Parse_FromFile(const char *name, FILE *stream)
211 {
212 	if (current != NULL)
213 		Lst_Push(&input_stack, current);
214 	current = new_input_file(name, stream);
215 }
216 
217 bool
218 Parse_NextFile(void)
219 {
220 	if (current != NULL)
221 		free_input_stream(current);
222 	current = (struct input_stream *)Lst_Pop(&input_stack);
223 	return current != NULL;
224 }
225 
226 static int
227 grab_new_line_and_readchar(void)
228 {
229 	size_t len;
230 
231 	if (current->F) {
232 		current->ptr = fgetln(current->F, &len);
233 		if (current->ptr) {
234 			current->end = current->ptr + len;
235 			return *current->ptr++;
236 		} else {
237 			current->end = NULL;
238 		}
239 	}
240 	return EOF;
241 }
242 
243 static int
244 skip_to_end_of_line(void)
245 {
246 	if (current->F) {
247 		if (current->end - current->ptr > 1)
248 			current->ptr = current->end - 1;
249 		if (*current->ptr == '\n')
250 			return *current->ptr++;
251 		return EOF;
252 	} else {
253 		int c;
254 
255 		do {
256 			c = read_char();
257 		} while (c != '\n' && c != EOF);
258 		return c;
259 	}
260 }
261 
262 
263 char *
264 Parse_ReadNextConditionalLine(Buffer linebuf)
265 {
266 	int c;
267 
268 	/* If first char isn't dot, skip to end of line, handling \ */
269 	while ((c = read_char()) != '.') {
270 		for (;c != '\n'; c = read_char()) {
271 			if (c == '\\') {
272 				c = read_char();
273 				if (c == '\n')
274 					current->origin.lineno++;
275 			}
276 			if (c == EOF)
277 				/* Unclosed conditional, reported by cond.c */
278 				return NULL;
279 		}
280 		current->origin.lineno++;
281 	}
282 
283 	/* This is the line we need to copy */
284 	return Parse_ReadUnparsedLine(linebuf, "conditional");
285 }
286 
287 static void
288 read_logical_line(Buffer linebuf, int c)
289 {
290 	for (;;) {
291 		if (c == '\n') {
292 			current->origin.lineno++;
293 			break;
294 		}
295 		if (c == EOF)
296 			break;
297 		Buf_AddChar(linebuf, c);
298 		c = read_char();
299 		while (c == '\\') {
300 			c = read_char();
301 			if (c == '\n') {
302 				Buf_AddSpace(linebuf);
303 				current->origin.lineno++;
304 				do {
305 					c = read_char();
306 				} while (c == ' ' || c == '\t');
307 			} else {
308 				Buf_AddChar(linebuf, '\\');
309 				if (c == '\\') {
310 					Buf_AddChar(linebuf, '\\');
311 					c = read_char();
312 				}
313 				break;
314 			}
315 		}
316 	}
317 }
318 
319 char *
320 Parse_ReadUnparsedLine(Buffer linebuf, const char *type)
321 {
322 	int c;
323 
324 	Buf_Reset(linebuf);
325 	c = read_char();
326 	if (c == EOF) {
327 		Parse_Error(PARSE_FATAL, "Unclosed %s", type);
328 		return NULL;
329 	}
330 
331 	/* Handle '\' at beginning of line, since \\n needs special treatment */
332 	while (c == '\\') {
333 		c = read_char();
334 		if (c == '\n') {
335 			current->origin.lineno++;
336 			do {
337 				c = read_char();
338 			} while (c == ' ' || c == '\t');
339 		} else {
340 			Buf_AddChar(linebuf, '\\');
341 			if (c == '\\') {
342 				Buf_AddChar(linebuf, '\\');
343 				c = read_char();
344 			}
345 			break;
346 		}
347 	}
348 	read_logical_line(linebuf, c);
349 
350 	return Buf_Retrieve(linebuf);
351 }
352 
353 /* This is a fairly complex function, but without it, we could not skip
354  * blocks of comments without reading them. */
355 static int
356 skip_empty_lines_and_read_char(Buffer linebuf)
357 {
358 	int c;		/* the current character */
359 
360 	for (;;) {
361 		Buf_Reset(linebuf);
362 		c = read_char();
363 		/* Strip leading spaces, fold on '\n' */
364 		if (c == ' ') {
365 			do {
366 				c = read_char();
367 			} while (c == ' ' || c == '\t');
368 			while (c == '\\') {
369 				c = read_char();
370 				if (c == '\n') {
371 					current->origin.lineno++;
372 					do {
373 						c = read_char();
374 					} while (c == ' ' || c == '\t');
375 				} else {
376 					Buf_AddChar(linebuf, '\\');
377 					if (c == '\\') {
378 						Buf_AddChar(linebuf, '\\');
379 						c = read_char();
380 					}
381 					if (c == EOF)
382 						return '\n';
383 					else
384 						return c;
385 				}
386 			}
387 			assert(c != '\t');
388 		}
389 		if (c == '#')
390 			c = skip_to_end_of_line();
391 		/* Almost identical to spaces, except this occurs after
392 		 * comments have been taken care of, and we keep the tab
393 		 * itself.  */
394 		if (c == '\t') {
395 			Buf_AddChar(linebuf, '\t');
396 			do {
397 				c = read_char();
398 			} while (c == ' ' || c == '\t');
399 			while (c == '\\') {
400 				c = read_char();
401 				if (c == '\n') {
402 					current->origin.lineno++;
403 					do {
404 						c = read_char();
405 					} while (c == ' ' || c == '\t');
406 				} else {
407 					Buf_AddChar(linebuf, '\\');
408 					if (c == '\\') {
409 						Buf_AddChar(linebuf, '\\');
410 						c = read_char();
411 					}
412 					if (c == EOF)
413 						return '\n';
414 					else
415 						return c;
416 				}
417 			}
418 		}
419 		if (c == '\n')
420 			current->origin.lineno++;
421 		else
422 			return c;
423 	}
424 }
425 
426 /* Parse_ReadNormalLine removes beginning and trailing blanks (but keeps
427  * the first tab), handles escaped newlines, and skips over uninteresting
428  * lines.
429  *
430  * The line number is incremented, which implies that continuation
431  * lines are numbered with the last line number (we could do better, at a
432  * price).
433  *
434  * Trivial comments are also removed, but we can't do more, as
435  * we don't know which lines are shell commands or not.  */
436 char *
437 Parse_ReadNormalLine(Buffer linebuf)
438 {
439 	int c;		/* the current character */
440 
441 	c = skip_empty_lines_and_read_char(linebuf);
442 
443 	if (c == EOF)
444 		return NULL;
445 	else {
446 		read_logical_line(linebuf, c);
447 		return Buf_Retrieve(linebuf);
448 	}
449 }
450 
451 unsigned long
452 Parse_Getlineno(void)
453 {
454 	return current ? current->origin.lineno : 0;
455 }
456 
457 const char *
458 Parse_Getfilename(void)
459 {
460 	return current ? current->origin.fname : NULL;
461 }
462 
463 void
464 Parse_SetLocation(Location *origin)
465 {
466 	post_parse = origin;
467 }
468 
469 void
470 Parse_FillLocation(Location *origin)
471 {
472 	if (post_parse) {
473 		*origin = *post_parse;
474 	} else {
475 		origin->lineno = Parse_Getlineno();
476 		origin->fname = Parse_Getfilename();
477 	}
478 }
479 
480 void
481 Parse_ReportErrors(void)
482 {
483 	if (fatal_errors)
484 		exit(1);
485 	else
486 		assert(current == NULL);
487 }
488