1 /* $OpenBSD: lowparse.c,v 1.36 2023/09/04 11:35:11 espie Exp $ */
2
3 /* low-level parsing functions. */
4
5 /*
6 * Copyright (c) 1999,2000 Marc Espie.
7 *
8 * Extensive code changes for the OpenBSD project.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE OPENBSD PROJECT AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBSD
23 * PROJECT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <assert.h>
33 #include <stddef.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38 #include "defines.h"
39 #include "buf.h"
40 #include "lowparse.h"
41 #include "error.h"
42 #include "lst.h"
43 #include "memory.h"
44 #include "pathnames.h"
45 #ifndef LOCATION_TYPE
46 #include "location.h"
47 #endif
48 #include "var.h"
49
50
51 #define READ_MAKEFILES "MAKEFILE_LIST"
52
53 /* Input stream structure: file or string.
54 * Files have str == NULL, F != NULL.
55 * Strings have F == NULL, str != NULL.
56 */
57 struct input_stream {
58 Location origin; /* Name of file and line number */
59 FILE *F; /* Open stream, or NULL if pure string. */
60 char *str; /* Input string, if F == NULL. */
61
62 /* Line buffer. */
63 char *ptr; /* Where we are. */
64 char *end; /* Don't overdo it. */
65 };
66
67 static struct input_stream *current; /* the input_stream being parsed. */
68
69 static LIST input_stack; /* Stack of input_stream waiting to be parsed
70 * (includes and loop reparses) */
71
72 /* record gnode location for proper reporting at runtime */
73 static Location *post_parse = NULL;
74
75 /* input_stream ctors.
76 *
77 * obj = new_input_file(filename, filehandle);
78 * Create input stream from filename, filehandle. */
79 static struct input_stream *new_input_file(const char *, FILE *);
80 /* obj = new_input_string(str, origin);
81 * Create input stream from str, origin. */
82 static struct input_stream *new_input_string(char *, const Location *);
83 /* free_input_stream(obj);
84 * Discard consumed input stream, closing files, freeing memory. */
85 static void free_input_stream(struct input_stream *);
86
87
88 /* Handling basic character reading.
89 * c = read_char();
90 * New character c from current input stream, or EOF at end of stream. */
91 #define read_char() \
92 current->ptr < current->end ? *current->ptr++ : grab_new_line_and_readchar()
93 /* char = grab_new_line_and_readchar();
94 * Guts for read_char. Grabs a new line off fgetln when we have
95 * consumed the current line and returns the first char, or EOF at end of
96 * stream. */
97 static int grab_new_line_and_readchar(void);
98 /* c = skip_to_end_of_line();
99 * Skips to the end of the current line, returns either '\n' or EOF. */
100 static int skip_to_end_of_line(void);
101
102
103 /* Helper functions to handle basic parsing. */
104 /* read_logical_line(buffer, firstchar);
105 * Grabs logical line into buffer, the first character has already been
106 * read into firstchar. */
107 static void read_logical_line(Buffer, int);
108
109 /* firstchar = ParseSkipEmptyLines(buffer);
110 * Scans lines, skipping empty lines. May put some characters into
111 * buffer, returns the first character useful to continue parsing
112 * (e.g., not a backslash or a space. */
113 static int skip_empty_lines_and_read_char(Buffer);
114
115 const char *curdir;
116 size_t curdir_len;
117
118 void
Parse_setcurdir(const char * dir)119 Parse_setcurdir(const char *dir)
120 {
121 curdir = dir;
122 curdir_len = strlen(dir);
123 }
124
125 static bool
startswith(const char * f,const char * s,size_t len)126 startswith(const char *f, const char *s, size_t len)
127 {
128 return strncmp(f, s, len) == 0 && f[len] == '/';
129 }
130
131 static const char *
simplify(const char * filename)132 simplify(const char *filename)
133 {
134 if (startswith(filename, curdir, curdir_len))
135 return filename + curdir_len + 1;
136 else if (startswith(filename, _PATH_DEFSYSPATH,
137 sizeof(_PATH_DEFSYSPATH)-1)) {
138 size_t sz;
139 char *buf;
140 sz = strlen(filename) - sizeof(_PATH_DEFSYSPATH)+3;
141 buf = emalloc(sz);
142 snprintf(buf, sz, "<%s>", filename+sizeof(_PATH_DEFSYSPATH));
143 return buf;
144 } else
145 return filename;
146 }
147
148 static struct input_stream *
new_input_file(const char * name,FILE * stream)149 new_input_file(const char *name, FILE *stream)
150 {
151 struct input_stream *istream;
152
153 istream = emalloc(sizeof(*istream));
154 istream->origin.fname = simplify(name);
155 Var_Append(READ_MAKEFILES, name);
156 istream->str = NULL;
157 /* Naturally enough, we start reading at line 0. */
158 istream->origin.lineno = 0;
159 istream->F = stream;
160 istream->ptr = istream->end = NULL;
161 return istream;
162 }
163
164 static void
free_input_stream(struct input_stream * istream)165 free_input_stream(struct input_stream *istream)
166 {
167 if (istream->F) {
168 if (ferror(istream->F))
169 Parse_Error(PARSE_FATAL, "Read error");
170 if (fileno(istream->F) != STDIN_FILENO)
171 (void)fclose(istream->F);
172 }
173 free(istream->str);
174 /* Note we can't free the file names, as they are embedded in GN
175 * for error reports. */
176 free(istream);
177 }
178
179 static struct input_stream *
new_input_string(char * str,const Location * origin)180 new_input_string(char *str, const Location *origin)
181 {
182 struct input_stream *istream;
183
184 istream = emalloc(sizeof(*istream));
185 /* No malloc, name is always taken from an already existing istream
186 * and strings are used in for loops, so we need to reset the line
187 * counter to an appropriate value. */
188 istream->origin = *origin;
189 istream->F = NULL;
190 istream->ptr = istream->str = str;
191 istream->end = str + strlen(str);
192 return istream;
193 }
194
195
196 void
Parse_FromString(char * str,unsigned long lineno)197 Parse_FromString(char *str, unsigned long lineno)
198 {
199 Location origin;
200
201 origin.fname = current->origin.fname;
202 origin.lineno = lineno;
203 if (DEBUG(FOR))
204 (void)fprintf(stderr, "%s\n----\n", str);
205
206 Lst_Push(&input_stack, current);
207 assert(current != NULL);
208 current = new_input_string(str, &origin);
209 }
210
211
212 void
Parse_FromFile(const char * name,FILE * stream)213 Parse_FromFile(const char *name, FILE *stream)
214 {
215 if (current != NULL)
216 Lst_Push(&input_stack, current);
217 current = new_input_file(name, stream);
218 }
219
220 bool
Parse_NextFile(void)221 Parse_NextFile(void)
222 {
223 if (current != NULL)
224 free_input_stream(current);
225 current = Lst_Pop(&input_stack);
226 return current != NULL;
227 }
228
229 static int
grab_new_line_and_readchar(void)230 grab_new_line_and_readchar(void)
231 {
232 size_t len;
233
234 if (current->F) {
235 current->ptr = fgetln(current->F, &len);
236 if (current->ptr) {
237 current->end = current->ptr + len;
238 return *current->ptr++;
239 } else {
240 current->end = NULL;
241 }
242 }
243 return EOF;
244 }
245
246 static int
skip_to_end_of_line(void)247 skip_to_end_of_line(void)
248 {
249 if (current->F) {
250 if (current->end - current->ptr > 1)
251 current->ptr = current->end - 1;
252 if (*current->ptr == '\n')
253 return *current->ptr++;
254 return EOF;
255 } else {
256 int c;
257
258 do {
259 c = read_char();
260 } while (c != '\n' && c != EOF);
261 return c;
262 }
263 }
264
265
266 char *
Parse_ReadNextConditionalLine(Buffer linebuf)267 Parse_ReadNextConditionalLine(Buffer linebuf)
268 {
269 int c;
270
271 /* If first char isn't dot, skip to end of line, handling \ */
272 while ((c = read_char()) != '.') {
273 for (;c != '\n'; c = read_char()) {
274 if (c == '\\') {
275 c = read_char();
276 if (c == '\n')
277 current->origin.lineno++;
278 }
279 if (c == EOF)
280 /* Unclosed conditional, reported by cond.c */
281 return NULL;
282 }
283 current->origin.lineno++;
284 }
285
286 /* This is the line we need to copy */
287 return Parse_ReadUnparsedLine(linebuf, "conditional");
288 }
289
290 static void
read_logical_line(Buffer linebuf,int c)291 read_logical_line(Buffer linebuf, int c)
292 {
293 for (;;) {
294 if (c == '\n') {
295 current->origin.lineno++;
296 break;
297 }
298 if (c == EOF)
299 break;
300 Buf_AddChar(linebuf, c);
301 c = read_char();
302 while (c == '\\') {
303 c = read_char();
304 if (c == '\n') {
305 Buf_AddSpace(linebuf);
306 current->origin.lineno++;
307 do {
308 c = read_char();
309 } while (c == ' ' || c == '\t');
310 } else {
311 Buf_AddChar(linebuf, '\\');
312 if (c == '\\') {
313 Buf_AddChar(linebuf, '\\');
314 c = read_char();
315 }
316 break;
317 }
318 }
319 }
320 }
321
322 char *
Parse_ReadUnparsedLine(Buffer linebuf,const char * type)323 Parse_ReadUnparsedLine(Buffer linebuf, const char *type)
324 {
325 int c;
326
327 Buf_Reset(linebuf);
328 c = read_char();
329 if (c == EOF) {
330 Parse_Error(PARSE_FATAL, "Unclosed %s", type);
331 return NULL;
332 }
333
334 /* Handle '\' at beginning of line, since \\n needs special treatment */
335 while (c == '\\') {
336 c = read_char();
337 if (c == '\n') {
338 current->origin.lineno++;
339 do {
340 c = read_char();
341 } while (c == ' ' || c == '\t');
342 } else {
343 Buf_AddChar(linebuf, '\\');
344 if (c == '\\') {
345 Buf_AddChar(linebuf, '\\');
346 c = read_char();
347 }
348 break;
349 }
350 }
351 read_logical_line(linebuf, c);
352
353 return Buf_Retrieve(linebuf);
354 }
355
356 /* This is a fairly complex function, but without it, we could not skip
357 * blocks of comments without reading them. */
358 static int
skip_empty_lines_and_read_char(Buffer linebuf)359 skip_empty_lines_and_read_char(Buffer linebuf)
360 {
361 int c; /* the current character */
362
363 for (;;) {
364 Buf_Reset(linebuf);
365 c = read_char();
366 /* Strip leading spaces, fold on '\n' */
367 if (c == ' ') {
368 do {
369 c = read_char();
370 } while (c == ' ' || c == '\t');
371 while (c == '\\') {
372 c = read_char();
373 if (c == '\n') {
374 current->origin.lineno++;
375 do {
376 c = read_char();
377 } while (c == ' ' || c == '\t');
378 } else {
379 Buf_AddChar(linebuf, '\\');
380 if (c == '\\') {
381 Buf_AddChar(linebuf, '\\');
382 c = read_char();
383 }
384 if (c == EOF)
385 return '\n';
386 else
387 return c;
388 }
389 }
390 assert(c != '\t');
391 }
392 if (c == '#')
393 c = skip_to_end_of_line();
394 /* Almost identical to spaces, except this occurs after
395 * comments have been taken care of, and we keep the tab
396 * itself. */
397 if (c == '\t') {
398 Buf_AddChar(linebuf, '\t');
399 do {
400 c = read_char();
401 } while (c == ' ' || c == '\t');
402 while (c == '\\') {
403 c = read_char();
404 if (c == '\n') {
405 current->origin.lineno++;
406 do {
407 c = read_char();
408 } while (c == ' ' || c == '\t');
409 } else {
410 Buf_AddChar(linebuf, '\\');
411 if (c == '\\') {
412 Buf_AddChar(linebuf, '\\');
413 c = read_char();
414 }
415 if (c == EOF)
416 return '\n';
417 else
418 return c;
419 }
420 }
421 }
422 if (c == '\n')
423 current->origin.lineno++;
424 else
425 return c;
426 }
427 }
428
429 /* Parse_ReadNormalLine removes beginning and trailing blanks (but keeps
430 * the first tab), handles escaped newlines, and skips over uninteresting
431 * lines.
432 *
433 * The line number is incremented, which implies that continuation
434 * lines are numbered with the last line number (we could do better, at a
435 * price).
436 *
437 * Trivial comments are also removed, but we can't do more, as
438 * we don't know which lines are shell commands or not. */
439 char *
Parse_ReadNormalLine(Buffer linebuf)440 Parse_ReadNormalLine(Buffer linebuf)
441 {
442 int c; /* the current character */
443
444 c = skip_empty_lines_and_read_char(linebuf);
445
446 if (c == EOF)
447 return NULL;
448 else {
449 read_logical_line(linebuf, c);
450 return Buf_Retrieve(linebuf);
451 }
452 }
453
454 unsigned long
Parse_Getlineno(void)455 Parse_Getlineno(void)
456 {
457 return current ? current->origin.lineno : 0;
458 }
459
460 const char *
Parse_Getfilename(void)461 Parse_Getfilename(void)
462 {
463 return current ? current->origin.fname : NULL;
464 }
465
466 void
Parse_SetLocation(Location * origin)467 Parse_SetLocation(Location *origin)
468 {
469 post_parse = origin;
470 }
471
472 void
Parse_FillLocation(Location * origin)473 Parse_FillLocation(Location *origin)
474 {
475 if (post_parse) {
476 *origin = *post_parse;
477 } else {
478 origin->lineno = Parse_Getlineno();
479 origin->fname = Parse_Getfilename();
480 }
481 }
482
483 void
Parse_ReportErrors(void)484 Parse_ReportErrors(void)
485 {
486 if (fatal_errors)
487 exit(1);
488 else
489 assert(current == NULL);
490 }
491