xref: /openbsd/usr.bin/make/lowparse.c (revision 4cfece93)
1 /*	$OpenBSD: lowparse.c,v 1.35 2016/10/21 16:12:38 espie Exp $ */
2 
3 /* low-level parsing functions. */
4 
5 /*
6  * Copyright (c) 1999,2000 Marc Espie.
7  *
8  * Extensive code changes for the OpenBSD project.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE OPENBSD PROJECT AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OPENBSD
23  * PROJECT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <assert.h>
33 #include <stddef.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38 #include "config.h"
39 #include "defines.h"
40 #include "buf.h"
41 #include "lowparse.h"
42 #include "error.h"
43 #include "lst.h"
44 #include "memory.h"
45 #include "pathnames.h"
46 #ifndef LOCATION_TYPE
47 #include "location.h"
48 #endif
49 #include "var.h"
50 
51 
52 #define READ_MAKEFILES "MAKEFILE_LIST"
53 
54 /* Input stream structure: file or string.
55  * Files have str == NULL, F != NULL.
56  * Strings have F == NULL, str != NULL.
57  */
58 struct input_stream {
59 	Location origin;	/* Name of file and line number */
60 	FILE *F;		/* Open stream, or NULL if pure string. */
61 	char *str;		/* Input string, if F == NULL. */
62 
63 	/* Line buffer. */
64 	char *ptr;		/* Where we are. */
65 	char *end;		/* Don't overdo it. */
66 };
67 
68 static struct input_stream *current;	/* the input_stream being parsed. */
69 
70 static LIST input_stack;	/* Stack of input_stream waiting to be parsed
71 				 * (includes and loop reparses) */
72 
73 /* record gnode location for proper reporting at runtime */
74 static Location *post_parse = NULL;
75 
76 /* input_stream ctors.
77  *
78  * obj = new_input_file(filename, filehandle);
79  *	Create input stream from filename, filehandle. */
80 static struct input_stream *new_input_file(const char *, FILE *);
81 /* obj = new_input_string(str, origin);
82  *	Create input stream from str, origin. */
83 static struct input_stream *new_input_string(char *, const Location *);
84 /* free_input_stream(obj);
85  *	Discard consumed input stream, closing files, freeing memory.  */
86 static void free_input_stream(struct input_stream *);
87 
88 
89 /* Handling basic character reading.
90  * c = read_char();
91  *	New character c from current input stream, or EOF at end of stream. */
92 #define read_char()	\
93     current->ptr < current->end ? *current->ptr++ : grab_new_line_and_readchar()
94 /* char = grab_new_line_and_readchar();
95  *	Guts for read_char. Grabs a new line off fgetln when we have
96  *	consumed the current line and returns the first char, or EOF at end of
97  *	stream.  */
98 static int grab_new_line_and_readchar(void);
99 /* c = skip_to_end_of_line();
100  *	Skips to the end of the current line, returns either '\n' or EOF.  */
101 static int skip_to_end_of_line(void);
102 
103 
104 /* Helper functions to handle basic parsing. */
105 /* read_logical_line(buffer, firstchar);
106  *	Grabs logical line into buffer, the first character has already been
107  *	read into firstchar.  */
108 static void read_logical_line(Buffer, int);
109 
110 /* firstchar = ParseSkipEmptyLines(buffer);
111  *	Scans lines, skipping empty lines. May put some characters into
112  *	buffer, returns the first character useful to continue parsing
113  *	(e.g., not a backslash or a space. */
114 static int skip_empty_lines_and_read_char(Buffer);
115 
116 const char *curdir;
117 size_t curdir_len;
118 
119 void
120 Parse_setcurdir(const char *dir)
121 {
122 	curdir = dir;
123 	curdir_len = strlen(dir);
124 }
125 
126 static bool
127 startswith(const char *f, const char *s, size_t len)
128 {
129 	return strncmp(f, s, len) == 0 && f[len] == '/';
130 }
131 
132 static const char *
133 simplify(const char *filename)
134 {
135 	if (startswith(filename, curdir, curdir_len))
136 		return filename + curdir_len + 1;
137 	else if (startswith(filename, _PATH_DEFSYSPATH,
138 	    sizeof(_PATH_DEFSYSPATH)-1)) {
139 	    	size_t sz;
140 		char *buf;
141 		sz = strlen(filename) - sizeof(_PATH_DEFSYSPATH)+3;
142 		buf = emalloc(sz);
143 		snprintf(buf, sz, "<%s>", filename+sizeof(_PATH_DEFSYSPATH));
144 		return buf;
145 	} else
146 		return filename;
147 }
148 
149 static struct input_stream *
150 new_input_file(const char *name, FILE *stream)
151 {
152 	struct input_stream *istream;
153 
154 	istream = emalloc(sizeof(*istream));
155 	istream->origin.fname = simplify(name);
156 	Var_Append(READ_MAKEFILES, name);
157 	istream->str = NULL;
158 	/* Naturally enough, we start reading at line 0. */
159 	istream->origin.lineno = 0;
160 	istream->F = stream;
161 	istream->ptr = istream->end = NULL;
162 	return istream;
163 }
164 
165 static void
166 free_input_stream(struct input_stream *istream)
167 {
168 	if (istream->F) {
169 		if (ferror(istream->F))
170 			Parse_Error(PARSE_FATAL, "Read error");
171 		if (fileno(istream->F) != STDIN_FILENO)
172 			(void)fclose(istream->F);
173 	}
174 	free(istream->str);
175 	/* Note we can't free the file names, as they are embedded in GN
176 	 * for error reports. */
177 	free(istream);
178 }
179 
180 static struct input_stream *
181 new_input_string(char *str, const Location *origin)
182 {
183 	struct input_stream *istream;
184 
185 	istream = emalloc(sizeof(*istream));
186 	/* No malloc, name is always taken from an already existing istream
187 	 * and strings are used in for loops, so we need to reset the line
188 	 * counter to an appropriate value. */
189 	istream->origin = *origin;
190 	istream->F = NULL;
191 	istream->ptr = istream->str = str;
192 	istream->end = str + strlen(str);
193 	return istream;
194 }
195 
196 
197 void
198 Parse_FromString(char *str, unsigned long lineno)
199 {
200 	Location origin;
201 
202 	origin.fname = current->origin.fname;
203 	origin.lineno = lineno;
204 	if (DEBUG(FOR))
205 		(void)fprintf(stderr, "%s\n----\n", str);
206 
207 	Lst_Push(&input_stack, current);
208 	assert(current != NULL);
209 	current = new_input_string(str, &origin);
210 }
211 
212 
213 void
214 Parse_FromFile(const char *name, FILE *stream)
215 {
216 	if (current != NULL)
217 		Lst_Push(&input_stack, current);
218 	current = new_input_file(name, stream);
219 }
220 
221 bool
222 Parse_NextFile(void)
223 {
224 	if (current != NULL)
225 		free_input_stream(current);
226 	current = Lst_Pop(&input_stack);
227 	return current != NULL;
228 }
229 
230 static int
231 grab_new_line_and_readchar(void)
232 {
233 	size_t len;
234 
235 	if (current->F) {
236 		current->ptr = fgetln(current->F, &len);
237 		if (current->ptr) {
238 			current->end = current->ptr + len;
239 			return *current->ptr++;
240 		} else {
241 			current->end = NULL;
242 		}
243 	}
244 	return EOF;
245 }
246 
247 static int
248 skip_to_end_of_line(void)
249 {
250 	if (current->F) {
251 		if (current->end - current->ptr > 1)
252 			current->ptr = current->end - 1;
253 		if (*current->ptr == '\n')
254 			return *current->ptr++;
255 		return EOF;
256 	} else {
257 		int c;
258 
259 		do {
260 			c = read_char();
261 		} while (c != '\n' && c != EOF);
262 		return c;
263 	}
264 }
265 
266 
267 char *
268 Parse_ReadNextConditionalLine(Buffer linebuf)
269 {
270 	int c;
271 
272 	/* If first char isn't dot, skip to end of line, handling \ */
273 	while ((c = read_char()) != '.') {
274 		for (;c != '\n'; c = read_char()) {
275 			if (c == '\\') {
276 				c = read_char();
277 				if (c == '\n')
278 					current->origin.lineno++;
279 			}
280 			if (c == EOF)
281 				/* Unclosed conditional, reported by cond.c */
282 				return NULL;
283 		}
284 		current->origin.lineno++;
285 	}
286 
287 	/* This is the line we need to copy */
288 	return Parse_ReadUnparsedLine(linebuf, "conditional");
289 }
290 
291 static void
292 read_logical_line(Buffer linebuf, int c)
293 {
294 	for (;;) {
295 		if (c == '\n') {
296 			current->origin.lineno++;
297 			break;
298 		}
299 		if (c == EOF)
300 			break;
301 		Buf_AddChar(linebuf, c);
302 		c = read_char();
303 		while (c == '\\') {
304 			c = read_char();
305 			if (c == '\n') {
306 				Buf_AddSpace(linebuf);
307 				current->origin.lineno++;
308 				do {
309 					c = read_char();
310 				} while (c == ' ' || c == '\t');
311 			} else {
312 				Buf_AddChar(linebuf, '\\');
313 				if (c == '\\') {
314 					Buf_AddChar(linebuf, '\\');
315 					c = read_char();
316 				}
317 				break;
318 			}
319 		}
320 	}
321 }
322 
323 char *
324 Parse_ReadUnparsedLine(Buffer linebuf, const char *type)
325 {
326 	int c;
327 
328 	Buf_Reset(linebuf);
329 	c = read_char();
330 	if (c == EOF) {
331 		Parse_Error(PARSE_FATAL, "Unclosed %s", type);
332 		return NULL;
333 	}
334 
335 	/* Handle '\' at beginning of line, since \\n needs special treatment */
336 	while (c == '\\') {
337 		c = read_char();
338 		if (c == '\n') {
339 			current->origin.lineno++;
340 			do {
341 				c = read_char();
342 			} while (c == ' ' || c == '\t');
343 		} else {
344 			Buf_AddChar(linebuf, '\\');
345 			if (c == '\\') {
346 				Buf_AddChar(linebuf, '\\');
347 				c = read_char();
348 			}
349 			break;
350 		}
351 	}
352 	read_logical_line(linebuf, c);
353 
354 	return Buf_Retrieve(linebuf);
355 }
356 
357 /* This is a fairly complex function, but without it, we could not skip
358  * blocks of comments without reading them. */
359 static int
360 skip_empty_lines_and_read_char(Buffer linebuf)
361 {
362 	int c;		/* the current character */
363 
364 	for (;;) {
365 		Buf_Reset(linebuf);
366 		c = read_char();
367 		/* Strip leading spaces, fold on '\n' */
368 		if (c == ' ') {
369 			do {
370 				c = read_char();
371 			} while (c == ' ' || c == '\t');
372 			while (c == '\\') {
373 				c = read_char();
374 				if (c == '\n') {
375 					current->origin.lineno++;
376 					do {
377 						c = read_char();
378 					} while (c == ' ' || c == '\t');
379 				} else {
380 					Buf_AddChar(linebuf, '\\');
381 					if (c == '\\') {
382 						Buf_AddChar(linebuf, '\\');
383 						c = read_char();
384 					}
385 					if (c == EOF)
386 						return '\n';
387 					else
388 						return c;
389 				}
390 			}
391 			assert(c != '\t');
392 		}
393 		if (c == '#')
394 			c = skip_to_end_of_line();
395 		/* Almost identical to spaces, except this occurs after
396 		 * comments have been taken care of, and we keep the tab
397 		 * itself.  */
398 		if (c == '\t') {
399 			Buf_AddChar(linebuf, '\t');
400 			do {
401 				c = read_char();
402 			} while (c == ' ' || c == '\t');
403 			while (c == '\\') {
404 				c = read_char();
405 				if (c == '\n') {
406 					current->origin.lineno++;
407 					do {
408 						c = read_char();
409 					} while (c == ' ' || c == '\t');
410 				} else {
411 					Buf_AddChar(linebuf, '\\');
412 					if (c == '\\') {
413 						Buf_AddChar(linebuf, '\\');
414 						c = read_char();
415 					}
416 					if (c == EOF)
417 						return '\n';
418 					else
419 						return c;
420 				}
421 			}
422 		}
423 		if (c == '\n')
424 			current->origin.lineno++;
425 		else
426 			return c;
427 	}
428 }
429 
430 /* Parse_ReadNormalLine removes beginning and trailing blanks (but keeps
431  * the first tab), handles escaped newlines, and skips over uninteresting
432  * lines.
433  *
434  * The line number is incremented, which implies that continuation
435  * lines are numbered with the last line number (we could do better, at a
436  * price).
437  *
438  * Trivial comments are also removed, but we can't do more, as
439  * we don't know which lines are shell commands or not.  */
440 char *
441 Parse_ReadNormalLine(Buffer linebuf)
442 {
443 	int c;		/* the current character */
444 
445 	c = skip_empty_lines_and_read_char(linebuf);
446 
447 	if (c == EOF)
448 		return NULL;
449 	else {
450 		read_logical_line(linebuf, c);
451 		return Buf_Retrieve(linebuf);
452 	}
453 }
454 
455 unsigned long
456 Parse_Getlineno(void)
457 {
458 	return current ? current->origin.lineno : 0;
459 }
460 
461 const char *
462 Parse_Getfilename(void)
463 {
464 	return current ? current->origin.fname : NULL;
465 }
466 
467 void
468 Parse_SetLocation(Location *origin)
469 {
470 	post_parse = origin;
471 }
472 
473 void
474 Parse_FillLocation(Location *origin)
475 {
476 	if (post_parse) {
477 		*origin = *post_parse;
478 	} else {
479 		origin->lineno = Parse_Getlineno();
480 		origin->fname = Parse_Getfilename();
481 	}
482 }
483 
484 void
485 Parse_ReportErrors(void)
486 {
487 	if (fatal_errors)
488 		exit(1);
489 	else
490 		assert(current == NULL);
491 }
492