xref: /openbsd/usr.bin/make/lowparse.c (revision c9fc29cf)
1 /*	$OpenBSD: lowparse.c,v 1.36 2023/09/04 11:35:11 espie Exp $ */
2 
3 /* low-level parsing functions. */
4 
5 /*
6  * Copyright (c) 1999,2000 Marc Espie.
7  *
8  * Extensive code changes for the OpenBSD project.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE OPENBSD PROJECT AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OPENBSD
23  * PROJECT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <assert.h>
33 #include <stddef.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38 #include "defines.h"
39 #include "buf.h"
40 #include "lowparse.h"
41 #include "error.h"
42 #include "lst.h"
43 #include "memory.h"
44 #include "pathnames.h"
45 #ifndef LOCATION_TYPE
46 #include "location.h"
47 #endif
48 #include "var.h"
49 
50 
51 #define READ_MAKEFILES "MAKEFILE_LIST"
52 
53 /* Input stream structure: file or string.
54  * Files have str == NULL, F != NULL.
55  * Strings have F == NULL, str != NULL.
56  */
57 struct input_stream {
58 	Location origin;	/* Name of file and line number */
59 	FILE *F;		/* Open stream, or NULL if pure string. */
60 	char *str;		/* Input string, if F == NULL. */
61 
62 	/* Line buffer. */
63 	char *ptr;		/* Where we are. */
64 	char *end;		/* Don't overdo it. */
65 };
66 
67 static struct input_stream *current;	/* the input_stream being parsed. */
68 
69 static LIST input_stack;	/* Stack of input_stream waiting to be parsed
70 				 * (includes and loop reparses) */
71 
72 /* record gnode location for proper reporting at runtime */
73 static Location *post_parse = NULL;
74 
75 /* input_stream ctors.
76  *
77  * obj = new_input_file(filename, filehandle);
78  *	Create input stream from filename, filehandle. */
79 static struct input_stream *new_input_file(const char *, FILE *);
80 /* obj = new_input_string(str, origin);
81  *	Create input stream from str, origin. */
82 static struct input_stream *new_input_string(char *, const Location *);
83 /* free_input_stream(obj);
84  *	Discard consumed input stream, closing files, freeing memory.  */
85 static void free_input_stream(struct input_stream *);
86 
87 
88 /* Handling basic character reading.
89  * c = read_char();
90  *	New character c from current input stream, or EOF at end of stream. */
91 #define read_char()	\
92     current->ptr < current->end ? *current->ptr++ : grab_new_line_and_readchar()
93 /* char = grab_new_line_and_readchar();
94  *	Guts for read_char. Grabs a new line off fgetln when we have
95  *	consumed the current line and returns the first char, or EOF at end of
96  *	stream.  */
97 static int grab_new_line_and_readchar(void);
98 /* c = skip_to_end_of_line();
99  *	Skips to the end of the current line, returns either '\n' or EOF.  */
100 static int skip_to_end_of_line(void);
101 
102 
103 /* Helper functions to handle basic parsing. */
104 /* read_logical_line(buffer, firstchar);
105  *	Grabs logical line into buffer, the first character has already been
106  *	read into firstchar.  */
107 static void read_logical_line(Buffer, int);
108 
109 /* firstchar = ParseSkipEmptyLines(buffer);
110  *	Scans lines, skipping empty lines. May put some characters into
111  *	buffer, returns the first character useful to continue parsing
112  *	(e.g., not a backslash or a space. */
113 static int skip_empty_lines_and_read_char(Buffer);
114 
115 const char *curdir;
116 size_t curdir_len;
117 
118 void
Parse_setcurdir(const char * dir)119 Parse_setcurdir(const char *dir)
120 {
121 	curdir = dir;
122 	curdir_len = strlen(dir);
123 }
124 
125 static bool
startswith(const char * f,const char * s,size_t len)126 startswith(const char *f, const char *s, size_t len)
127 {
128 	return strncmp(f, s, len) == 0 && f[len] == '/';
129 }
130 
131 static const char *
simplify(const char * filename)132 simplify(const char *filename)
133 {
134 	if (startswith(filename, curdir, curdir_len))
135 		return filename + curdir_len + 1;
136 	else if (startswith(filename, _PATH_DEFSYSPATH,
137 	    sizeof(_PATH_DEFSYSPATH)-1)) {
138 	    	size_t sz;
139 		char *buf;
140 		sz = strlen(filename) - sizeof(_PATH_DEFSYSPATH)+3;
141 		buf = emalloc(sz);
142 		snprintf(buf, sz, "<%s>", filename+sizeof(_PATH_DEFSYSPATH));
143 		return buf;
144 	} else
145 		return filename;
146 }
147 
148 static struct input_stream *
new_input_file(const char * name,FILE * stream)149 new_input_file(const char *name, FILE *stream)
150 {
151 	struct input_stream *istream;
152 
153 	istream = emalloc(sizeof(*istream));
154 	istream->origin.fname = simplify(name);
155 	Var_Append(READ_MAKEFILES, name);
156 	istream->str = NULL;
157 	/* Naturally enough, we start reading at line 0. */
158 	istream->origin.lineno = 0;
159 	istream->F = stream;
160 	istream->ptr = istream->end = NULL;
161 	return istream;
162 }
163 
164 static void
free_input_stream(struct input_stream * istream)165 free_input_stream(struct input_stream *istream)
166 {
167 	if (istream->F) {
168 		if (ferror(istream->F))
169 			Parse_Error(PARSE_FATAL, "Read error");
170 		if (fileno(istream->F) != STDIN_FILENO)
171 			(void)fclose(istream->F);
172 	}
173 	free(istream->str);
174 	/* Note we can't free the file names, as they are embedded in GN
175 	 * for error reports. */
176 	free(istream);
177 }
178 
179 static struct input_stream *
new_input_string(char * str,const Location * origin)180 new_input_string(char *str, const Location *origin)
181 {
182 	struct input_stream *istream;
183 
184 	istream = emalloc(sizeof(*istream));
185 	/* No malloc, name is always taken from an already existing istream
186 	 * and strings are used in for loops, so we need to reset the line
187 	 * counter to an appropriate value. */
188 	istream->origin = *origin;
189 	istream->F = NULL;
190 	istream->ptr = istream->str = str;
191 	istream->end = str + strlen(str);
192 	return istream;
193 }
194 
195 
196 void
Parse_FromString(char * str,unsigned long lineno)197 Parse_FromString(char *str, unsigned long lineno)
198 {
199 	Location origin;
200 
201 	origin.fname = current->origin.fname;
202 	origin.lineno = lineno;
203 	if (DEBUG(FOR))
204 		(void)fprintf(stderr, "%s\n----\n", str);
205 
206 	Lst_Push(&input_stack, current);
207 	assert(current != NULL);
208 	current = new_input_string(str, &origin);
209 }
210 
211 
212 void
Parse_FromFile(const char * name,FILE * stream)213 Parse_FromFile(const char *name, FILE *stream)
214 {
215 	if (current != NULL)
216 		Lst_Push(&input_stack, current);
217 	current = new_input_file(name, stream);
218 }
219 
220 bool
Parse_NextFile(void)221 Parse_NextFile(void)
222 {
223 	if (current != NULL)
224 		free_input_stream(current);
225 	current = Lst_Pop(&input_stack);
226 	return current != NULL;
227 }
228 
229 static int
grab_new_line_and_readchar(void)230 grab_new_line_and_readchar(void)
231 {
232 	size_t len;
233 
234 	if (current->F) {
235 		current->ptr = fgetln(current->F, &len);
236 		if (current->ptr) {
237 			current->end = current->ptr + len;
238 			return *current->ptr++;
239 		} else {
240 			current->end = NULL;
241 		}
242 	}
243 	return EOF;
244 }
245 
246 static int
skip_to_end_of_line(void)247 skip_to_end_of_line(void)
248 {
249 	if (current->F) {
250 		if (current->end - current->ptr > 1)
251 			current->ptr = current->end - 1;
252 		if (*current->ptr == '\n')
253 			return *current->ptr++;
254 		return EOF;
255 	} else {
256 		int c;
257 
258 		do {
259 			c = read_char();
260 		} while (c != '\n' && c != EOF);
261 		return c;
262 	}
263 }
264 
265 
266 char *
Parse_ReadNextConditionalLine(Buffer linebuf)267 Parse_ReadNextConditionalLine(Buffer linebuf)
268 {
269 	int c;
270 
271 	/* If first char isn't dot, skip to end of line, handling \ */
272 	while ((c = read_char()) != '.') {
273 		for (;c != '\n'; c = read_char()) {
274 			if (c == '\\') {
275 				c = read_char();
276 				if (c == '\n')
277 					current->origin.lineno++;
278 			}
279 			if (c == EOF)
280 				/* Unclosed conditional, reported by cond.c */
281 				return NULL;
282 		}
283 		current->origin.lineno++;
284 	}
285 
286 	/* This is the line we need to copy */
287 	return Parse_ReadUnparsedLine(linebuf, "conditional");
288 }
289 
290 static void
read_logical_line(Buffer linebuf,int c)291 read_logical_line(Buffer linebuf, int c)
292 {
293 	for (;;) {
294 		if (c == '\n') {
295 			current->origin.lineno++;
296 			break;
297 		}
298 		if (c == EOF)
299 			break;
300 		Buf_AddChar(linebuf, c);
301 		c = read_char();
302 		while (c == '\\') {
303 			c = read_char();
304 			if (c == '\n') {
305 				Buf_AddSpace(linebuf);
306 				current->origin.lineno++;
307 				do {
308 					c = read_char();
309 				} while (c == ' ' || c == '\t');
310 			} else {
311 				Buf_AddChar(linebuf, '\\');
312 				if (c == '\\') {
313 					Buf_AddChar(linebuf, '\\');
314 					c = read_char();
315 				}
316 				break;
317 			}
318 		}
319 	}
320 }
321 
322 char *
Parse_ReadUnparsedLine(Buffer linebuf,const char * type)323 Parse_ReadUnparsedLine(Buffer linebuf, const char *type)
324 {
325 	int c;
326 
327 	Buf_Reset(linebuf);
328 	c = read_char();
329 	if (c == EOF) {
330 		Parse_Error(PARSE_FATAL, "Unclosed %s", type);
331 		return NULL;
332 	}
333 
334 	/* Handle '\' at beginning of line, since \\n needs special treatment */
335 	while (c == '\\') {
336 		c = read_char();
337 		if (c == '\n') {
338 			current->origin.lineno++;
339 			do {
340 				c = read_char();
341 			} while (c == ' ' || c == '\t');
342 		} else {
343 			Buf_AddChar(linebuf, '\\');
344 			if (c == '\\') {
345 				Buf_AddChar(linebuf, '\\');
346 				c = read_char();
347 			}
348 			break;
349 		}
350 	}
351 	read_logical_line(linebuf, c);
352 
353 	return Buf_Retrieve(linebuf);
354 }
355 
356 /* This is a fairly complex function, but without it, we could not skip
357  * blocks of comments without reading them. */
358 static int
skip_empty_lines_and_read_char(Buffer linebuf)359 skip_empty_lines_and_read_char(Buffer linebuf)
360 {
361 	int c;		/* the current character */
362 
363 	for (;;) {
364 		Buf_Reset(linebuf);
365 		c = read_char();
366 		/* Strip leading spaces, fold on '\n' */
367 		if (c == ' ') {
368 			do {
369 				c = read_char();
370 			} while (c == ' ' || c == '\t');
371 			while (c == '\\') {
372 				c = read_char();
373 				if (c == '\n') {
374 					current->origin.lineno++;
375 					do {
376 						c = read_char();
377 					} while (c == ' ' || c == '\t');
378 				} else {
379 					Buf_AddChar(linebuf, '\\');
380 					if (c == '\\') {
381 						Buf_AddChar(linebuf, '\\');
382 						c = read_char();
383 					}
384 					if (c == EOF)
385 						return '\n';
386 					else
387 						return c;
388 				}
389 			}
390 			assert(c != '\t');
391 		}
392 		if (c == '#')
393 			c = skip_to_end_of_line();
394 		/* Almost identical to spaces, except this occurs after
395 		 * comments have been taken care of, and we keep the tab
396 		 * itself.  */
397 		if (c == '\t') {
398 			Buf_AddChar(linebuf, '\t');
399 			do {
400 				c = read_char();
401 			} while (c == ' ' || c == '\t');
402 			while (c == '\\') {
403 				c = read_char();
404 				if (c == '\n') {
405 					current->origin.lineno++;
406 					do {
407 						c = read_char();
408 					} while (c == ' ' || c == '\t');
409 				} else {
410 					Buf_AddChar(linebuf, '\\');
411 					if (c == '\\') {
412 						Buf_AddChar(linebuf, '\\');
413 						c = read_char();
414 					}
415 					if (c == EOF)
416 						return '\n';
417 					else
418 						return c;
419 				}
420 			}
421 		}
422 		if (c == '\n')
423 			current->origin.lineno++;
424 		else
425 			return c;
426 	}
427 }
428 
429 /* Parse_ReadNormalLine removes beginning and trailing blanks (but keeps
430  * the first tab), handles escaped newlines, and skips over uninteresting
431  * lines.
432  *
433  * The line number is incremented, which implies that continuation
434  * lines are numbered with the last line number (we could do better, at a
435  * price).
436  *
437  * Trivial comments are also removed, but we can't do more, as
438  * we don't know which lines are shell commands or not.  */
439 char *
Parse_ReadNormalLine(Buffer linebuf)440 Parse_ReadNormalLine(Buffer linebuf)
441 {
442 	int c;		/* the current character */
443 
444 	c = skip_empty_lines_and_read_char(linebuf);
445 
446 	if (c == EOF)
447 		return NULL;
448 	else {
449 		read_logical_line(linebuf, c);
450 		return Buf_Retrieve(linebuf);
451 	}
452 }
453 
454 unsigned long
Parse_Getlineno(void)455 Parse_Getlineno(void)
456 {
457 	return current ? current->origin.lineno : 0;
458 }
459 
460 const char *
Parse_Getfilename(void)461 Parse_Getfilename(void)
462 {
463 	return current ? current->origin.fname : NULL;
464 }
465 
466 void
Parse_SetLocation(Location * origin)467 Parse_SetLocation(Location *origin)
468 {
469 	post_parse = origin;
470 }
471 
472 void
Parse_FillLocation(Location * origin)473 Parse_FillLocation(Location *origin)
474 {
475 	if (post_parse) {
476 		*origin = *post_parse;
477 	} else {
478 		origin->lineno = Parse_Getlineno();
479 		origin->fname = Parse_Getfilename();
480 	}
481 }
482 
483 void
Parse_ReportErrors(void)484 Parse_ReportErrors(void)
485 {
486 	if (fatal_errors)
487 		exit(1);
488 	else
489 		assert(current == NULL);
490 }
491