1 /* parseconf.c - state machine-driven dynamic configuration file parser
2 
3    Copyright (C) 2002  Russell Kroll <rkroll@exploits.org>
4 
5    This program is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 2 of the License, or
8    (at your option) any later version.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with this program; if not, write to the Free Software
17    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19 
20 /* parseconf, version 4.
21  *
22  * This one abandons the "callback" system introduced last time.  It
23  * didn't turn out as well as I had hoped - you got stuck "behind"
24  * parseconf too often.
25  *
26  * There is now a context buffer, and you call pconf_init to set it up.
27  * All subsequent calls must have it as the first argument.  There are
28  * two entry points for parsing lines.  You can have it read a file
29  * (pconf_file_begin and pconf_file_next), take lines directly from
30  * the caller (pconf_line), or go along a character at a time (pconf_char).
31  * The parsing is identical no matter how you feed it.
32  *
33  * Since there are no more callbacks, you take the successful return
34  * from the function and access ctx->arglist and ctx->numargs yourself.
35  * You must check for errors with pconf_parse_error before using them,
36  * since it might not be complete.  This lets the caller handle all
37  * error reporting that's nonfatal.
38  *
39  * Fatal errors are those that involve memory allocation.  If the user
40  * defines an error handler when calling pconf_init, that function will
41  * be called with the error message before parseconf exits.  By default
42  * it will just write the message to stderr before exiting.
43  *
44  * Input vs. Output:
45  *
46  * What it reads		--> What ends up in each argument
47  *
48  * this is a line 		--> "this" "is" "a" "line"
49  * this "is also" a line	--> "this" "is also" "a" "line"
50  * embedded\ space		--> "embedded space"
51  * embedded\\backslash		--> "embedded\backslash"
52  *
53  * Arguments are split by whitespace (isspace()) unless that whitespace
54  * occurs inside a "quoted pair like this".
55  *
56  * You can also escape the double quote (") character.  The backslash
57  * also allows you to join lines, allowing you to have logical lines
58  * that span physical lines, just like you can do in some shells.
59  *
60  * Lines normally end with a newline, but reaching EOF will also force
61  * parsing on what's been scanned so far.
62  *
63  * Design:
64  *
65  * Characters are read one at a time to drive the state machine.
66  * As words are completed (by hitting whitespace or ending a "" item),
67  * they are committed to the next buffer in the arglist.  realloc is
68  * used, so the buffer can grow to handle bigger words.
69  *
70  * The arglist also grows as necessary with a similar approach.  As a
71  * result, you can parse extremely long words and lines with an insane
72  * number of elements.
73  *
74  * Finally, there is argsize, which remembers how long each of the
75  * arglist elements are.  This is how we know when to expand them.
76  *
77  */
78 
79 #include "common.h"
80 
81 #include <ctype.h>
82 #include <errno.h>
83 #include <stdio.h>
84 #include <stdarg.h>
85 #include <stdlib.h>
86 #include <string.h>
87 #include <unistd.h>
88 #include <fcntl.h>
89 
90 #include "parseconf.h"
91 #include "attribute.h"
92 
93 /* possible states */
94 
95 #define STATE_FINDWORDSTART	1
96 #define STATE_FINDEOL		2
97 #define STATE_QUOTECOLLECT	3
98 #define STATE_QC_LITERAL	4
99 #define STATE_COLLECT		5
100 #define STATE_COLLECTLITERAL	6
101 #define STATE_ENDOFLINE		7
102 #define STATE_PARSEERR		8
103 
104 static void pconf_fatal(PCONF_CTX_t *ctx, const char *errtxt)
105 	__attribute__((noreturn));
106 
pconf_fatal(PCONF_CTX_t * ctx,const char * errtxt)107 static void pconf_fatal(PCONF_CTX_t *ctx, const char *errtxt)
108 {
109 	if (ctx->errhandler)
110 		ctx->errhandler(errtxt);
111 	else
112 		fprintf(stderr, "parseconf: fatal error: %s\n", errtxt);
113 
114 	exit(EXIT_FAILURE);
115 }
116 
add_arg_word(PCONF_CTX_t * ctx)117 static void add_arg_word(PCONF_CTX_t *ctx)
118 {
119 	size_t	argpos;
120 	size_t	wbuflen;
121 
122 	/* this is where the new value goes */
123 	argpos = ctx->numargs;
124 
125 	ctx->numargs++;
126 
127 	/* when facing more args than ever before, expand the list */
128 	if (ctx->numargs > ctx->maxargs) {
129 		ctx->maxargs = ctx->numargs;
130 
131 		/* resize the lists */
132 		ctx->arglist = realloc(ctx->arglist,
133 			sizeof(char *) * ctx->numargs);
134 
135 		if (!ctx->arglist)
136 			pconf_fatal(ctx, "realloc arglist failed");
137 
138 		ctx->argsize = realloc(ctx->argsize,
139 			sizeof(size_t) * ctx->numargs);
140 
141 		if (!ctx->argsize)
142 			pconf_fatal(ctx, "realloc argsize failed");
143 
144 		/* ensure sane starting values */
145 		ctx->arglist[argpos] = NULL;
146 		ctx->argsize[argpos] = 0;
147 	}
148 
149 	wbuflen = strlen(ctx->wordbuf);
150 
151 	/* now see if the string itself grew compared to last time */
152 	if (wbuflen >= ctx->argsize[argpos]) {
153 		size_t	newlen;
154 
155 		/* allow for the trailing NULL */
156 		newlen = wbuflen + 1;
157 
158 		/* expand the string storage */
159 		ctx->arglist[argpos] = realloc(ctx->arglist[argpos], newlen);
160 
161 		if (!ctx->arglist[argpos])
162 			pconf_fatal(ctx, "realloc arglist member failed");
163 
164 		/* remember the new size */
165 		ctx->argsize[argpos] = newlen;
166 	}
167 
168 	/* strncpy doesn't give us a trailing NULL, so prep the space */
169 	memset(ctx->arglist[argpos], '\0', ctx->argsize[argpos]);
170 
171 	/* finally copy the new value into the provided space */
172 	strncpy(ctx->arglist[argpos], ctx->wordbuf, wbuflen);
173 }
174 
addchar(PCONF_CTX_t * ctx)175 static void addchar(PCONF_CTX_t *ctx)
176 {
177 	size_t	wbuflen;
178 
179 	wbuflen = strlen(ctx->wordbuf);
180 
181 	/* CVE-2012-2944: only allow the subset of ASCII charset from Space to ~ */
182 	if ((ctx->ch < 0x20) || (ctx->ch > 0x7f)) {
183 		fprintf(stderr, "addchar: discarding invalid character (0x%02x)!\n",
184 				ctx->ch);
185 		return;
186 	}
187 
188 	if (ctx->wordlen_limit != 0) {
189 		if (wbuflen >= ctx->wordlen_limit) {
190 
191 			/* limit reached: don't append any more */
192 			return;
193 		}
194 	}
195 
196 	/* allow for the null */
197 	if (wbuflen >= (ctx->wordbufsize - 1)) {
198 		ctx->wordbufsize += 8;
199 
200 		ctx->wordbuf = realloc(ctx->wordbuf, ctx->wordbufsize);
201 
202 		if (!ctx->wordbuf)
203 			pconf_fatal(ctx, "realloc wordbuf failed");
204 
205 		/* repoint as wordbuf may have moved */
206 		ctx->wordptr = &ctx->wordbuf[wbuflen];
207 	}
208 
209 	*ctx->wordptr++ = (char)ctx->ch;
210 	*ctx->wordptr = '\0';
211 }
212 
endofword(PCONF_CTX_t * ctx)213 static void endofword(PCONF_CTX_t *ctx)
214 {
215 	if (ctx->arg_limit != 0) {
216 		if (ctx->numargs >= ctx->arg_limit) {
217 
218 			/* don't accept this word - just drop it */
219 			ctx->wordptr = ctx->wordbuf;
220 			*ctx->wordptr = '\0';
221 
222 			return;
223 		}
224 	}
225 
226 	add_arg_word(ctx);
227 
228 	ctx->wordptr = ctx->wordbuf;
229 	*ctx->wordptr = '\0';
230 }
231 
232 /* look for the beginning of a word */
findwordstart(PCONF_CTX_t * ctx)233 static int findwordstart(PCONF_CTX_t *ctx)
234 {
235 	/* newline = the physical line is over, so the logical one is too */
236 	if (ctx->ch == 10)
237 		return STATE_ENDOFLINE;
238 
239 	/* the rest of the line is a comment */
240 	if (ctx->ch == '#')
241 		return STATE_FINDEOL;
242 
243 	/* space = not in a word yet, so loop back */
244 	if (isspace(ctx->ch))
245 		return STATE_FINDWORDSTART;
246 
247 	/* \ = literal = accept the next char blindly */
248 	if (ctx->ch == '\\')
249 		return STATE_COLLECTLITERAL;
250 
251 	/* " = begin word bounded by quotes */
252 	if (ctx->ch == '"')
253 		return STATE_QUOTECOLLECT;
254 
255 	/* at this point the word just started */
256 	addchar(ctx);
257 
258 	/* if the first character is a '=' this is considered a whole word */
259 	if (ctx->ch == '=') {
260 		endofword(ctx);
261 		return STATE_FINDWORDSTART;
262 	}
263 
264 	return STATE_COLLECT;
265 }
266 
267 /* eat characters until the end of the line is found */
findeol(PCONF_CTX_t * ctx)268 static int findeol(PCONF_CTX_t *ctx)
269 {
270 	/* newline = found it, so start a new line */
271 	if (ctx->ch == 10)
272 		return STATE_ENDOFLINE;
273 
274 	/* come back here */
275 	return STATE_FINDEOL;
276 }
277 
278 /* set up the error reporting details */
pconf_seterr(PCONF_CTX_t * ctx,const char * errmsg)279 static void pconf_seterr(PCONF_CTX_t *ctx, const char *errmsg)
280 {
281 	snprintf(ctx->errmsg, PCONF_ERR_LEN, "%s", errmsg);
282 
283 	ctx->error = 1;
284 }
285 
286 /* quote characters inside a word bounded by "quotes" */
quotecollect(PCONF_CTX_t * ctx)287 static int quotecollect(PCONF_CTX_t *ctx)
288 {
289 	/* user is trying to break us */
290 	if (ctx->ch == '#') {
291 		pconf_seterr(ctx, "Unbalanced word due to unescaped # in quotes");
292 		endofword(ctx);
293 
294 		/* this makes us drop all the way out of the caller */
295 		return STATE_PARSEERR;
296 	}
297 
298 	/* another " means we're done with this word */
299 	if (ctx->ch == '"') {
300 		endofword(ctx);
301 
302 		return STATE_FINDWORDSTART;
303 	}
304 
305 	/* literal - special case since it needs to return here */
306 	if (ctx->ch == '\\')
307 		return STATE_QC_LITERAL;
308 
309 	/* otherwise save it and loop back */
310 	addchar(ctx);
311 
312 	return STATE_QUOTECOLLECT;
313 }
314 
315 /* take almost anything literally, but return to quotecollect */
qc_literal(PCONF_CTX_t * ctx)316 static int qc_literal(PCONF_CTX_t *ctx)
317 {
318 	/* continue onto the next line of the file */
319 	if (ctx->ch == 10)
320 		return STATE_QUOTECOLLECT;
321 
322 	addchar(ctx);
323 	return STATE_QUOTECOLLECT;
324 }
325 
326 /* collect characters inside a word */
collect(PCONF_CTX_t * ctx)327 static int collect(PCONF_CTX_t *ctx)
328 {
329 	/* comment means the word is done, and skip to the end of the line */
330 	if (ctx->ch == '#') {
331 		endofword(ctx);
332 
333 		return STATE_FINDEOL;
334 	}
335 
336 	/* newline means the word is done, and the line is done */
337 	if (ctx->ch == 10) {
338 		endofword(ctx);
339 
340 		return STATE_ENDOFLINE;
341 	}
342 
343 	/* space means the word is done */
344 	if (isspace(ctx->ch)) {
345 		endofword(ctx);
346 
347 		return STATE_FINDWORDSTART;
348 	}
349 
350 	/* '=' means the word is done and the = is a single char word*/
351 	if (ctx->ch == '=') {
352 		endofword(ctx);
353 		findwordstart(ctx);
354 
355 		return STATE_FINDWORDSTART;
356 	}
357 
358 	/* \ = literal = accept the next char blindly */
359 	if (ctx->ch == '\\')
360 		return STATE_COLLECTLITERAL;
361 
362 	/* otherwise store it and come back for more */
363 	addchar(ctx);
364 	return STATE_COLLECT;
365 }
366 
367 /* take almost anything literally */
collectliteral(PCONF_CTX_t * ctx)368 static int collectliteral(PCONF_CTX_t *ctx)
369 {
370 	/* continue to the next line */
371 	if (ctx->ch == 10)
372 		return STATE_COLLECT;
373 
374 	addchar(ctx);
375 	return STATE_COLLECT;
376 }
377 
378 /* clean up memory before going back to the user */
free_storage(PCONF_CTX_t * ctx)379 static void free_storage(PCONF_CTX_t *ctx)
380 {
381 	unsigned int	i;
382 
383 	free(ctx->wordbuf);
384 
385 	/* clear out the individual words first */
386 	for (i = 0; i < ctx->maxargs; i++)
387 		free(ctx->arglist[i]);
388 
389 	free(ctx->arglist);
390 	free(ctx->argsize);
391 
392 	/* put things back to the initial state */
393 	ctx->arglist = NULL;
394 	ctx->argsize = NULL;
395 	ctx->numargs = 0;
396 	ctx->maxargs = 0;
397 }
398 
pconf_init(PCONF_CTX_t * ctx,void errhandler (const char *))399 int pconf_init(PCONF_CTX_t *ctx, void errhandler(const char *))
400 {
401 	/* set up the ctx elements */
402 
403 	ctx->f = NULL;
404 	ctx->state = STATE_FINDWORDSTART;
405 	ctx->numargs = 0;
406 	ctx->maxargs = 0;
407 	ctx->arg_limit = PCONF_DEFAULT_ARG_LIMIT;
408 	ctx->wordlen_limit = PCONF_DEFAULT_WORDLEN_LIMIT;
409 	ctx->linenum = 0;
410 	ctx->error = 0;
411 	ctx->arglist = NULL;
412 	ctx->argsize = NULL;
413 
414 	ctx->wordbufsize = 16;
415 	ctx->wordbuf = calloc(1, ctx->wordbufsize);
416 
417 	if (!ctx->wordbuf)
418 		pconf_fatal(ctx, "malloc wordbuf failed");
419 	ctx->wordptr = ctx->wordbuf;
420 
421 	ctx->errhandler = errhandler;
422 	ctx->magic = PCONF_CTX_t_MAGIC;
423 
424 	return 1;
425 }
426 
check_magic(PCONF_CTX_t * ctx)427 static int check_magic(PCONF_CTX_t *ctx)
428 {
429 	if (!ctx)
430 		return 0;
431 
432 	if (ctx->magic != PCONF_CTX_t_MAGIC) {
433 		snprintf(ctx->errmsg, PCONF_ERR_LEN, "Invalid ctx buffer");
434 		return 0;
435 	}
436 
437 	return 1;
438 }
439 
pconf_file_begin(PCONF_CTX_t * ctx,const char * fn)440 int pconf_file_begin(PCONF_CTX_t *ctx, const char *fn)
441 {
442 	if (!check_magic(ctx))
443 		return 0;
444 
445 	ctx->f = fopen(fn, "r");
446 
447 	if (!ctx->f) {
448 		snprintf(ctx->errmsg, PCONF_ERR_LEN, "Can't open %s: %s",
449 			fn, strerror(errno));
450 		return 0;
451 	}
452 
453 	/* prevent fd leaking to child processes */
454 	fcntl(fileno(ctx->f), F_SETFD, FD_CLOEXEC);
455 
456 	return 1;	/* OK */
457 }
458 
parse_char(PCONF_CTX_t * ctx)459 static void parse_char(PCONF_CTX_t *ctx)
460 {
461 	switch(ctx->state) {
462 		case STATE_FINDWORDSTART:
463 			ctx->state = findwordstart(ctx);
464 			break;
465 
466 		case STATE_FINDEOL:
467 			ctx->state = findeol(ctx);
468 			break;
469 
470 		case STATE_QUOTECOLLECT:
471 			ctx->state = quotecollect(ctx);
472 			break;
473 
474 		case STATE_QC_LITERAL:
475 			ctx->state = qc_literal(ctx);
476 			break;
477 
478 		case STATE_COLLECT:
479 			ctx->state = collect(ctx);
480 			break;
481 
482 		case STATE_COLLECTLITERAL:
483 			ctx->state = collectliteral(ctx);
484 			break;
485 	}	/* switch */
486 }
487 
488 /* return 1 if an error occurred, but only do it once */
pconf_parse_error(PCONF_CTX_t * ctx)489 int pconf_parse_error(PCONF_CTX_t *ctx)
490 {
491 	if (!check_magic(ctx))
492 		return 0;
493 
494 	if (ctx->error == 1) {
495 		ctx->error = 0;
496 		return 1;
497 	}
498 
499 	return 0;
500 }
501 
502 /* clean up the ctx space */
pconf_finish(PCONF_CTX_t * ctx)503 void pconf_finish(PCONF_CTX_t *ctx)
504 {
505 	if (!check_magic(ctx))
506 		return;
507 
508 	if (ctx->f)
509 		fclose(ctx->f);
510 
511 	free_storage(ctx);
512 
513 	ctx->magic = 0;
514 }
515 
516 /* read from a file until a whole line is ready for use */
pconf_file_next(PCONF_CTX_t * ctx)517 int pconf_file_next(PCONF_CTX_t *ctx)
518 {
519 	if (!check_magic(ctx))
520 		return 0;
521 
522 	ctx->linenum++;
523 
524 	/* start over for the new line */
525 	ctx->numargs = 0;
526 	ctx->state = STATE_FINDWORDSTART;
527 
528 	while ((ctx->ch = fgetc(ctx->f)) != EOF) {
529 		parse_char(ctx);
530 
531 		if (ctx->state == STATE_PARSEERR)
532 			return 1;
533 
534 		if (ctx->state == STATE_ENDOFLINE)
535 			return 1;
536 	}
537 
538 	/* deal with files that don't end in a newline */
539 
540 	if (ctx->numargs != 0) {
541 
542 		/* still building a word? */
543 		if (ctx->wordptr != ctx->wordbuf)
544 			endofword(ctx);
545 
546 		return 1;
547 	}
548 
549 	/* finished with nothing left over */
550 	return 0;
551 }
552 
553 /* parse a provided line */
pconf_line(PCONF_CTX_t * ctx,const char * line)554 int pconf_line(PCONF_CTX_t *ctx, const char *line)
555 {
556 	size_t	i, linelen;
557 
558 	if (!check_magic(ctx))
559 		return 0;
560 
561 	ctx->linenum++;
562 
563 	/* start over for the new line */
564 	ctx->numargs = 0;
565 	ctx->state = STATE_FINDWORDSTART;
566 
567 	linelen = strlen(line);
568 
569 	for (i = 0; i < linelen; i++) {
570 		ctx->ch = line[i];
571 
572 		parse_char(ctx);
573 
574 		if (ctx->state == STATE_PARSEERR)
575 			return 1;
576 
577 		if (ctx->state == STATE_ENDOFLINE)
578 			return 1;
579 	}
580 
581 	/* deal with any lingering characters */
582 
583 	/* still building a word? */
584 	if (ctx->wordptr != ctx->wordbuf)
585 		endofword(ctx);		/* tie it off */
586 
587 	return 1;
588 }
589 
590 #define PCONF_ESCAPE "#\\\""
591 
pconf_encode(const char * src,char * dest,size_t destsize)592 char *pconf_encode(const char *src, char *dest, size_t destsize)
593 {
594 	size_t	i, srclen, destlen, maxlen;
595 
596 	if (destsize < 1)
597 		return dest;
598 
599 	memset(dest, '\0', destsize);
600 
601 	/* always leave room for a final NULL */
602 	maxlen = destsize - 1;
603 	srclen = strlen(src);
604 	destlen = 0;
605 
606 	for (i = 0; i < srclen; i++) {
607 		if (strchr(PCONF_ESCAPE, src[i])) {
608 
609 			/* if they both won't fit, we're done */
610 			if (destlen >= maxlen - 1)
611 				return dest;
612 
613 			dest[destlen++] = '\\';
614 		}
615 
616 		/* bail out when dest is full */
617 		if (destlen >= maxlen)
618 			return dest;
619 
620 		dest[destlen++] = src[i];
621 	}
622 
623 	return dest;
624 }
625 
626 /* parse input a character at a time */
pconf_char(PCONF_CTX_t * ctx,char ch)627 int pconf_char(PCONF_CTX_t *ctx, char ch)
628 {
629 	if (!check_magic(ctx))
630 		return -1;
631 
632 	/* if the last call finished a line, clean stuff up for another */
633 	if ((ctx->state == STATE_ENDOFLINE) || (ctx->state == STATE_PARSEERR)) {
634 		ctx->numargs = 0;
635 		ctx->state = STATE_FINDWORDSTART;
636 	}
637 
638 	ctx->ch = ch;
639 	parse_char(ctx);
640 
641 	if (ctx->state == STATE_ENDOFLINE)
642 		return 1;
643 
644 	if (ctx->state == STATE_PARSEERR)
645 		return -1;
646 
647 	return 0;
648 }
649