xref: /freebsd/lib/libfigpar/figpar.c (revision b00ab754)
1 /*-
2  * Copyright (c) 2002-2015 Devin Teske <dteske@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29 
30 #include <sys/param.h>
31 
32 #include <ctype.h>
33 #include <errno.h>
34 #include <fcntl.h>
35 #include <fnmatch.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <unistd.h>
39 
40 #include "figpar.h"
41 #include "string_m.h"
42 
43 struct figpar_config figpar_dummy_config = {0, NULL, {0}, NULL};
44 
45 /*
46  * Search for config option (struct figpar_config) in the array of config
47  * options, returning the struct whose directive matches the given parameter.
48  * If no match is found, a pointer to the static dummy array (above) is
49  * returned.
50  *
51  * This is to eliminate dependency on the index position of an item in the
52  * array, since the index position is more apt to be changed as code grows.
53  */
54 struct figpar_config *
55 get_config_option(struct figpar_config options[], const char *directive)
56 {
57 	uint32_t n;
58 
59 	/* Check arguments */
60 	if (options == NULL || directive == NULL)
61 		return (&figpar_dummy_config);
62 
63 	/* Loop through the array, return the index of the first match */
64 	for (n = 0; options[n].directive != NULL; n++)
65 		if (strcmp(options[n].directive, directive) == 0)
66 			return (&(options[n]));
67 
68 	/* Re-initialize the dummy variable in case it was written to */
69 	figpar_dummy_config.directive	= NULL;
70 	figpar_dummy_config.type	= 0;
71 	figpar_dummy_config.action	= NULL;
72 	figpar_dummy_config.value.u_num	= 0;
73 
74 	return (&figpar_dummy_config);
75 }
76 
77 /*
78  * Parse the configuration file at `path' and execute the `action' call-back
79  * functions for any directives defined by the array of config options (first
80  * argument).
81  *
82  * For unknown directives that are encountered, you can optionally pass a
83  * call-back function for the third argument to be called for unknowns.
84  *
85  * Returns zero on success; otherwise returns -1 and errno should be consulted.
86 */
87 int
88 parse_config(struct figpar_config options[], const char *path,
89     int (*unknown)(struct figpar_config *option, uint32_t line,
90     char *directive, char *value), uint16_t processing_options)
91 {
92 	uint8_t bequals;
93 	uint8_t bsemicolon;
94 	uint8_t case_sensitive;
95 	uint8_t comment = 0;
96 	uint8_t end;
97 	uint8_t found;
98 	uint8_t have_equals = 0;
99 	uint8_t quote;
100 	uint8_t require_equals;
101 	uint8_t strict_equals;
102 	char p[2];
103 	char *directive;
104 	char *t;
105 	char *value;
106 	int error;
107 	int fd;
108 	ssize_t r = 1;
109 	uint32_t dsize;
110 	uint32_t line = 1;
111 	uint32_t n;
112 	uint32_t vsize;
113 	uint32_t x;
114 	off_t charpos;
115 	off_t curpos;
116 	char rpath[PATH_MAX];
117 
118 	/* Sanity check: if no options and no unknown function, return */
119 	if (options == NULL && unknown == NULL)
120 		return (-1);
121 
122 	/* Processing options */
123 	bequals = (processing_options & FIGPAR_BREAK_ON_EQUALS) == 0 ? 0 : 1;
124 	bsemicolon =
125 		(processing_options & FIGPAR_BREAK_ON_SEMICOLON) == 0 ? 0 : 1;
126 	case_sensitive =
127 		(processing_options & FIGPAR_CASE_SENSITIVE) == 0 ? 0 : 1;
128 	require_equals =
129 		(processing_options & FIGPAR_REQUIRE_EQUALS) == 0 ? 0 : 1;
130 	strict_equals =
131 		(processing_options & FIGPAR_STRICT_EQUALS) == 0 ? 0 : 1;
132 
133 	/* Initialize strings */
134 	directive = value = 0;
135 	vsize = dsize = 0;
136 
137 	/* Resolve the file path */
138 	if (realpath(path, rpath) == 0)
139 		return (-1);
140 
141 	/* Open the file */
142 	if ((fd = open(rpath, O_RDONLY)) < 0)
143 		return (-1);
144 
145 	/* Read the file until EOF */
146 	while (r != 0) {
147 		r = read(fd, p, 1);
148 
149 		/* skip to the beginning of a directive */
150 		while (r != 0 && (isspace(*p) || *p == '#' || comment ||
151 		    (bsemicolon && *p == ';'))) {
152 			if (*p == '#')
153 				comment = 1;
154 			else if (*p == '\n') {
155 				comment = 0;
156 				line++;
157 			}
158 			r = read(fd, p, 1);
159 		}
160 		/* Test for EOF; if EOF then no directive was found */
161 		if (r == 0) {
162 			close(fd);
163 			return (0);
164 		}
165 
166 		/* Get the current offset */
167 		curpos = lseek(fd, 0, SEEK_CUR) - 1;
168 		if (curpos == -1) {
169 			close(fd);
170 			return (-1);
171 		}
172 
173 		/* Find the length of the directive */
174 		for (n = 0; r != 0; n++) {
175 			if (isspace(*p))
176 				break;
177 			if (bequals && *p == '=') {
178 				have_equals = 1;
179 				break;
180 			}
181 			if (bsemicolon && *p == ';')
182 				break;
183 			r = read(fd, p, 1);
184 		}
185 
186 		/* Test for EOF, if EOF then no directive was found */
187 		if (n == 0 && r == 0) {
188 			close(fd);
189 			return (0);
190 		}
191 
192 		/* Go back to the beginning of the directive */
193 		error = (int)lseek(fd, curpos, SEEK_SET);
194 		if (error == (curpos - 1)) {
195 			close(fd);
196 			return (-1);
197 		}
198 
199 		/* Allocate and read the directive into memory */
200 		if (n > dsize) {
201 			if ((directive = realloc(directive, n + 1)) == NULL) {
202 				close(fd);
203 				return (-1);
204 			}
205 			dsize = n;
206 		}
207 		r = read(fd, directive, n);
208 
209 		/* Advance beyond the equals sign if appropriate/desired */
210 		if (bequals && *p == '=') {
211 			if (lseek(fd, 1, SEEK_CUR) != -1)
212 				r = read(fd, p, 1);
213 			if (strict_equals && isspace(*p))
214 				*p = '\n';
215 		}
216 
217 		/* Terminate the string */
218 		directive[n] = '\0';
219 
220 		/* Convert directive to lower case before comparison */
221 		if (!case_sensitive)
222 			strtolower(directive);
223 
224 		/* Move to what may be the start of the value */
225 		if (!(bsemicolon && *p == ';') &&
226 		    !(strict_equals && *p == '=')) {
227 			while (r != 0 && isspace(*p) && *p != '\n')
228 				r = read(fd, p, 1);
229 		}
230 
231 		/* An equals sign may have stopped us, should we eat it? */
232 		if (r != 0 && bequals && *p == '=' && !strict_equals) {
233 			have_equals = 1;
234 			r = read(fd, p, 1);
235 			while (r != 0 && isspace(*p) && *p != '\n')
236 				r = read(fd, p, 1);
237 		}
238 
239 		/* If no value, allocate a dummy value and jump to action */
240 		if (r == 0 || *p == '\n' || *p == '#' ||
241 		    (bsemicolon && *p == ';')) {
242 			/* Initialize the value if not already done */
243 			if (value == NULL && (value = malloc(1)) == NULL) {
244 				close(fd);
245 				return (-1);
246 			}
247 			value[0] = '\0';
248 			goto call_function;
249 		}
250 
251 		/* Get the current offset */
252 		curpos = lseek(fd, 0, SEEK_CUR) - 1;
253 		if (curpos == -1) {
254 			close(fd);
255 			return (-1);
256 		}
257 
258 		/* Find the end of the value */
259 		quote = 0;
260 		end = 0;
261 		while (r != 0 && end == 0) {
262 			/* Advance to the next character if we know we can */
263 			if (*p != '\"' && *p != '#' && *p != '\n' &&
264 			    (!bsemicolon || *p != ';')) {
265 				r = read(fd, p, 1);
266 				continue;
267 			}
268 
269 			/*
270 			 * If we get this far, we've hit an end-key
271 			 */
272 
273 			/* Get the current offset */
274 			charpos = lseek(fd, 0, SEEK_CUR) - 1;
275 			if (charpos == -1) {
276 				close(fd);
277 				return (-1);
278 			}
279 
280 			/*
281 			 * Go back so we can read the character before the key
282 			 * to check if the character is escaped (which means we
283 			 * should continue).
284 			 */
285 			error = (int)lseek(fd, -2, SEEK_CUR);
286 			if (error == -3) {
287 				close(fd);
288 				return (-1);
289 			}
290 			r = read(fd, p, 1);
291 
292 			/*
293 			 * Count how many backslashes there are (an odd number
294 			 * means the key is escaped, even means otherwise).
295 			 */
296 			for (n = 1; *p == '\\'; n++) {
297 				/* Move back another offset to read */
298 				error = (int)lseek(fd, -2, SEEK_CUR);
299 				if (error == -3) {
300 					close(fd);
301 					return (-1);
302 				}
303 				r = read(fd, p, 1);
304 			}
305 
306 			/* Move offset back to the key and read it */
307 			error = (int)lseek(fd, charpos, SEEK_SET);
308 			if (error == (charpos - 1)) {
309 				close(fd);
310 				return (-1);
311 			}
312 			r = read(fd, p, 1);
313 
314 			/*
315 			 * If an even number of backslashes was counted meaning
316 			 * key is not escaped, we should evaluate what to do.
317 			 */
318 			if ((n & 1) == 1) {
319 				switch (*p) {
320 				case '\"':
321 					/*
322 				 	 * Flag current sequence of characters
323 					 * to follow as being quoted (hashes
324 					 * are not considered comments).
325 					 */
326 					quote = !quote;
327 					break;
328 				case '#':
329 					/*
330 					 * If we aren't in a quoted series, we
331 					 * just hit an inline comment and have
332 					 * found the end of the value.
333 					 */
334 					if (!quote)
335 						end = 1;
336 					break;
337 				case '\n':
338 					/*
339 					 * Newline characters must always be
340 					 * escaped, whether inside a quoted
341 					 * series or not, otherwise they
342 					 * terminate the value.
343 					 */
344 					end = 1;
345 				case ';':
346 					if (!quote && bsemicolon)
347 						end = 1;
348 					break;
349 				}
350 			} else if (*p == '\n')
351 				/* Escaped newline character. increment */
352 				line++;
353 
354 			/* Advance to the next character */
355 			r = read(fd, p, 1);
356 		}
357 
358 		/* Get the current offset */
359 		charpos = lseek(fd, 0, SEEK_CUR) - 1;
360 		if (charpos == -1) {
361 			close(fd);
362 			return (-1);
363 		}
364 
365 		/* Get the length of the value */
366 		n = (uint32_t)(charpos - curpos);
367 		if (r != 0) /* more to read, but don't read ending key */
368 			n--;
369 
370 		/* Move offset back to the beginning of the value */
371 		error = (int)lseek(fd, curpos, SEEK_SET);
372 		if (error == (curpos - 1)) {
373 			close(fd);
374 			return (-1);
375 		}
376 
377 		/* Allocate and read the value into memory */
378 		if (n > vsize) {
379 			if ((value = realloc(value, n + 1)) == NULL) {
380 				close(fd);
381 				return (-1);
382 			}
383 			vsize = n;
384 		}
385 		r = read(fd, value, n);
386 
387 		/* Terminate the string */
388 		value[n] = '\0';
389 
390 		/* Cut trailing whitespace off by termination */
391 		t = value + n;
392 		while (isspace(*--t))
393 			*t = '\0';
394 
395 		/* Escape the escaped quotes (replaceall is in string_m.c) */
396 		x = strcount(value, "\\\""); /* in string_m.c */
397 		if (x != 0 && (n + x) > vsize) {
398 			if ((value = realloc(value, n + x + 1)) == NULL) {
399 				close(fd);
400 				return (-1);
401 			}
402 			vsize = n + x;
403 		}
404 		if (replaceall(value, "\\\"", "\\\\\"") < 0) {
405 			/* Replace operation failed for some unknown reason */
406 			close(fd);
407 			return (-1);
408 		}
409 
410 		/* Remove all new line characters */
411 		if (replaceall(value, "\\\n", "") < 0) {
412 			/* Replace operation failed for some unknown reason */
413 			close(fd);
414 			return (-1);
415 		}
416 
417 		/* Resolve escape sequences */
418 		strexpand(value); /* in string_m.c */
419 
420 call_function:
421 		/* Abort if we're seeking only assignments */
422 		if (require_equals && !have_equals)
423 			return (-1);
424 
425 		found = have_equals = 0; /* reset */
426 
427 		/* If there are no options defined, call unknown and loop */
428 		if (options == NULL && unknown != NULL) {
429 			error = unknown(NULL, line, directive, value);
430 			if (error != 0) {
431 				close(fd);
432 				return (error);
433 			}
434 			continue;
435 		}
436 
437 		/* Loop through the array looking for a match for the value */
438 		for (n = 0; options[n].directive != NULL; n++) {
439 			error = fnmatch(options[n].directive, directive,
440 			    FNM_NOESCAPE);
441 			if (error == 0) {
442 				found = 1;
443 				/* Call function for array index item */
444 				if (options[n].action != NULL) {
445 					error = options[n].action(
446 					    &options[n],
447 					    line, directive, value);
448 					if (error != 0) {
449 						close(fd);
450 						return (error);
451 					}
452 				}
453 			} else if (error != FNM_NOMATCH) {
454 				/* An error has occurred */
455 				close(fd);
456 				return (-1);
457 			}
458 		}
459 		if (!found && unknown != NULL) {
460 			/*
461 			 * No match was found for the value we read from the
462 			 * file; call function designated for unknown values.
463 			 */
464 			error = unknown(NULL, line, directive, value);
465 			if (error != 0) {
466 				close(fd);
467 				return (error);
468 			}
469 		}
470 	}
471 
472 	close(fd);
473 	return (0);
474 }
475