1 /* parseconf.c - state machine-driven dynamic configuration file parser
2 
3    Copyright (C) 2002  Russell Kroll <rkroll@exploits.org>
4 
5    This program is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 2 of the License, or
8    (at your option) any later version.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with this program; if not, write to the Free Software
17    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19 
20 /* parseconf, version 4.
21  *
22  * This one abandons the "callback" system introduced last time.  It
23  * didn't turn out as well as I had hoped - you got stuck "behind"
24  * parseconf too often.
25  *
26  * There is now a context buffer, and you call pconf_init to set it up.
27  * All subsequent calls must have it as the first argument.  There are
28  * two entry points for parsing lines.  You can have it read a file
29  * (pconf_file_begin and pconf_file_next), take lines directly from
30  * the caller (pconf_line), or go along a character at a time (pconf_char).
31  * The parsing is identical no matter how you feed it.
32  *
33  * Since there are no more callbacks, you take the successful return
34  * from the function and access ctx->arglist and ctx->numargs yourself.
35  * You must check for errors with pconf_parse_error before using them,
36  * since it might not be complete.  This lets the caller handle all
37  * error reporting that's nonfatal.
38  *
39  * Fatal errors are those that involve memory allocation.  If the user
40  * defines an error handler when calling pconf_init, that function will
41  * be called with the error message before parseconf exits.  By default
42  * it will just write the message to stderr before exiting.
43  *
44  * Input vs. Output:
45  *
46  * What it reads		--> What ends up in each argument
47  *
48  * this is a line 		--> "this" "is" "a" "line"
49  * this "is also" a line	--> "this" "is also" "a" "line"
50  * embedded\ space		--> "embedded space"
51  * embedded\\backslash		--> "embedded\backslash"
52  *
53  * Arguments are split by whitespace (isspace()) unless that whitespace
54  * occurs inside a "quoted pair like this".
55  *
56  * You can also escape the double quote (") character.  The backslash
57  * also allows you to join lines, allowing you to have logical lines
58  * that span physical lines, just like you can do in some shells.
59  *
60  * Lines normally end with a newline, but reaching EOF will also force
61  * parsing on what's been scanned so far.
62  *
63  * Design:
64  *
65  * Characters are read one at a time to drive the state machine.
66  * As words are completed (by hitting whitespace or ending a "" item),
67  * they are committed to the next buffer in the arglist.  realloc is
68  * used, so the buffer can grow to handle bigger words.
69  *
70  * The arglist also grows as necessary with a similar approach.  As a
71  * result, you can parse extremely long words and lines with an insane
72  * number of elements.
73  *
74  * Finally, there is argsize, which remembers how long each of the
75  * arglist elements are.  This is how we know when to expand them.
76  *
77  */
78 
79 #include <ctype.h>
80 #include <errno.h>
81 #include <stdio.h>
82 #include <stdarg.h>
83 #include <stdlib.h>
84 #include <string.h>
85 #include <unistd.h>
86 
87 #include "parseconf.h"
88 
89 /* possible states */
90 
91 #define STATE_FINDWORDSTART	1
92 #define STATE_FINDEOL		2
93 #define STATE_QUOTECOLLECT	3
94 #define STATE_QC_LITERAL	4
95 #define STATE_COLLECT		5
96 #define STATE_COLLECTLITERAL	6
97 #define STATE_ENDOFLINE		7
98 #define STATE_PARSEERR		8
99 
pconf_fatal(PCONF_CTX_t * ctx,const char * errtxt)100 static void pconf_fatal(PCONF_CTX_t *ctx, const char *errtxt)
101 {
102 	if (ctx->errhandler)
103 		ctx->errhandler(errtxt);
104 	else
105 		fprintf(stderr, "parseconf: fatal error: %s\n", errtxt);
106 
107 	exit(EXIT_FAILURE);
108 }
109 
add_arg_word(PCONF_CTX_t * ctx)110 static void add_arg_word(PCONF_CTX_t *ctx)
111 {
112 	int	argpos;
113 	size_t	wbuflen;
114 
115 	/* this is where the new value goes */
116 	argpos = ctx->numargs;
117 
118 	ctx->numargs++;
119 
120 	/* when facing more args than ever before, expand the list */
121 	if (ctx->numargs > ctx->maxargs) {
122 		ctx->maxargs = ctx->numargs;
123 
124 		/* resize the lists */
125 		ctx->arglist = realloc(ctx->arglist,
126 			sizeof(char *) * ctx->numargs);
127 
128 		if (!ctx->arglist)
129 			pconf_fatal(ctx, "realloc arglist failed");
130 
131 		ctx->argsize = realloc(ctx->argsize,
132 			sizeof(size_t) * ctx->numargs);
133 
134 		if (!ctx->argsize)
135 			pconf_fatal(ctx, "realloc argsize failed");
136 
137 		/* ensure sane starting values */
138 		ctx->arglist[argpos] = NULL;
139 		ctx->argsize[argpos] = 0;
140 	}
141 
142 	wbuflen = strlen(ctx->wordbuf);
143 
144 	/* now see if the string itself grew compared to last time */
145 	if (wbuflen >= ctx->argsize[argpos]) {
146 		size_t	newlen;
147 
148 		/* allow for the trailing NULL */
149 		newlen = wbuflen + 1;
150 
151 		/* expand the string storage */
152 		ctx->arglist[argpos] = realloc(ctx->arglist[argpos], newlen);
153 
154 		if (!ctx->arglist[argpos])
155 			pconf_fatal(ctx, "realloc arglist member failed");
156 
157 		/* remember the new size */
158 		ctx->argsize[argpos] = newlen;
159 	}
160 
161 	/* strncpy doesn't give us a trailing NULL, so prep the space */
162 	memset(ctx->arglist[argpos], '\0', ctx->argsize[argpos]);
163 
164 	/* finally copy the new value into the provided space */
165 	strncpy(ctx->arglist[argpos], ctx->wordbuf, wbuflen);
166 }
167 
addchar(PCONF_CTX_t * ctx)168 static void addchar(PCONF_CTX_t *ctx)
169 {
170 	size_t	wbuflen;
171 
172 	wbuflen = strlen(ctx->wordbuf);
173 
174 	/* CVE-2012-2944: only allow the subset Ascii charset from Space to ~ */
175 	if ((ctx->ch < 0x20) || (ctx->ch > 0x7f)) {
176 		fprintf(stderr, "addchar: discarding invalid character (0x%02x)!\n",
177 				ctx->ch);
178 		return;
179 	}
180 
181 	if (ctx->wordlen_limit != 0) {
182 		if (wbuflen >= ctx->wordlen_limit) {
183 
184 			/* limit reached: don't append any more */
185 			return;
186 		}
187 	}
188 
189 	/* allow for the null */
190 	if (wbuflen >= (ctx->wordbufsize - 1)) {
191 		ctx->wordbufsize += 8;
192 
193 		ctx->wordbuf = realloc(ctx->wordbuf, ctx->wordbufsize);
194 
195 		if (!ctx->wordbuf)
196 			pconf_fatal(ctx, "realloc wordbuf failed");
197 
198 		/* repoint as wordbuf may have moved */
199 		ctx->wordptr = &ctx->wordbuf[wbuflen];
200 	}
201 
202 	*ctx->wordptr++ = ctx->ch;
203 	*ctx->wordptr = '\0';
204 }
205 
endofword(PCONF_CTX_t * ctx)206 static void endofword(PCONF_CTX_t *ctx)
207 {
208 	if (ctx->arg_limit != 0) {
209 		if (ctx->numargs >= ctx->arg_limit) {
210 
211 			/* don't accept this word - just drop it */
212 			ctx->wordptr = ctx->wordbuf;
213 			*ctx->wordptr = '\0';
214 
215 			return;
216 		}
217 	}
218 
219 	add_arg_word(ctx);
220 
221 	ctx->wordptr = ctx->wordbuf;
222 	*ctx->wordptr = '\0';
223 }
224 
225 /* look for the beginning of a word */
findwordstart(PCONF_CTX_t * ctx)226 static int findwordstart(PCONF_CTX_t *ctx)
227 {
228 	/* newline = the physical line is over, so the logical one is too */
229 	if (ctx->ch == 10)
230 		return STATE_ENDOFLINE;
231 
232 	/* the rest of the line is a comment */
233 	if (ctx->ch == '#')
234 		return STATE_FINDEOL;
235 
236 	/* space = not in a word yet, so loop back */
237 	if (isspace(ctx->ch))
238 		return STATE_FINDWORDSTART;
239 
240 	/* \ = literal = accept the next char blindly */
241 	if (ctx->ch == '\\')
242 		return STATE_COLLECTLITERAL;
243 
244 	/* " = begin word bounded by quotes */
245 	if (ctx->ch == '"')
246 		return STATE_QUOTECOLLECT;
247 
248 	/* at this point the word just started */
249 	addchar(ctx);
250 
251 	/* if the first character is a '=' this is considered a whole word */
252 	if (ctx->ch == '=') {
253 		endofword(ctx);
254 		return STATE_FINDWORDSTART;
255 	}
256 
257 	return STATE_COLLECT;
258 }
259 
260 /* eat characters until the end of the line is found */
findeol(PCONF_CTX_t * ctx)261 static int findeol(PCONF_CTX_t *ctx)
262 {
263 	/* newline = found it, so start a new line */
264 	if (ctx->ch == 10)
265 		return STATE_ENDOFLINE;
266 
267 	/* come back here */
268 	return STATE_FINDEOL;
269 }
270 
271 /* set up the error reporting details */
pconf_seterr(PCONF_CTX_t * ctx,const char * errmsg)272 static void pconf_seterr(PCONF_CTX_t *ctx, const char *errmsg)
273 {
274 	snprintf(ctx->errmsg, PCONF_ERR_LEN, "%s", errmsg);
275 
276 	ctx->error = 1;
277 }
278 
279 /* quote characters inside a word bounded by "quotes" */
quotecollect(PCONF_CTX_t * ctx)280 static int quotecollect(PCONF_CTX_t *ctx)
281 {
282 	/* user is trying to break us */
283 	if (ctx->ch == '#') {
284 		pconf_seterr(ctx, "Unbalanced word due to unescaped # in quotes");
285 		endofword(ctx);
286 
287 		/* this makes us drop all the way out of the caller */
288 		return STATE_PARSEERR;
289 	}
290 
291 	/* another " means we're done with this word */
292 	if (ctx->ch == '"') {
293 		endofword(ctx);
294 
295 		return STATE_FINDWORDSTART;
296 	}
297 
298 	/* literal - special case since it needs to return here */
299 	if (ctx->ch == '\\')
300 		return STATE_QC_LITERAL;
301 
302 	/* otherwise save it and loop back */
303 	addchar(ctx);
304 
305 	return STATE_QUOTECOLLECT;
306 }
307 
308 /* take almost anything literally, but return to quotecollect */
qc_literal(PCONF_CTX_t * ctx)309 static int qc_literal(PCONF_CTX_t *ctx)
310 {
311 	/* continue onto the next line of the file */
312 	if (ctx->ch == 10)
313 		return STATE_QUOTECOLLECT;
314 
315 	addchar(ctx);
316 	return STATE_QUOTECOLLECT;
317 }
318 
319 /* collect characters inside a word */
collect(PCONF_CTX_t * ctx)320 static int collect(PCONF_CTX_t *ctx)
321 {
322 	/* comment means the word is done, and skip to the end of the line */
323 	if (ctx->ch == '#') {
324 		endofword(ctx);
325 
326 		return STATE_FINDEOL;
327 	}
328 
329 	/* newline means the word is done, and the line is done */
330 	if (ctx->ch == 10) {
331 		endofword(ctx);
332 
333 		return STATE_ENDOFLINE;
334 	}
335 
336 	/* space means the word is done */
337 	if (isspace(ctx->ch)) {
338 		endofword(ctx);
339 
340 		return STATE_FINDWORDSTART;
341 	}
342 
343 	/* '=' means the word is done and the = is a single char word*/
344 	if (ctx->ch == '=') {
345 		endofword(ctx);
346 		findwordstart(ctx);
347 
348 		return STATE_FINDWORDSTART;
349 	}
350 
351 	/* \ = literal = accept the next char blindly */
352 	if (ctx->ch == '\\')
353 		return STATE_COLLECTLITERAL;
354 
355 	/* otherwise store it and come back for more */
356 	addchar(ctx);
357 	return STATE_COLLECT;
358 }
359 
360 /* take almost anything literally */
collectliteral(PCONF_CTX_t * ctx)361 static int collectliteral(PCONF_CTX_t *ctx)
362 {
363 	/* continue to the next line */
364 	if (ctx->ch == 10)
365 		return STATE_COLLECT;
366 
367 	addchar(ctx);
368 	return STATE_COLLECT;
369 }
370 
371 /* clean up memory before going back to the user */
free_storage(PCONF_CTX_t * ctx)372 static void free_storage(PCONF_CTX_t *ctx)
373 {
374 	unsigned int	i;
375 
376 	free(ctx->wordbuf);
377 
378 	/* clear out the individual words first */
379 	for (i = 0; i < ctx->maxargs; i++)
380 		free(ctx->arglist[i]);
381 
382 	free(ctx->arglist);
383 	free(ctx->argsize);
384 
385 	/* put things back to the initial state */
386 	ctx->arglist = NULL;
387 	ctx->argsize = NULL;
388 	ctx->numargs = 0;
389 	ctx->maxargs = 0;
390 }
391 
pconf_init(PCONF_CTX_t * ctx,void errhandler (const char *))392 int pconf_init(PCONF_CTX_t *ctx, void errhandler(const char *))
393 {
394 	/* set up the ctx elements */
395 
396 	ctx->f = NULL;
397 	ctx->state = STATE_FINDWORDSTART;
398 	ctx->numargs = 0;
399 	ctx->maxargs = 0;
400 	ctx->arg_limit = PCONF_DEFAULT_ARG_LIMIT;
401 	ctx->wordlen_limit = PCONF_DEFAULT_WORDLEN_LIMIT;
402 	ctx->linenum = 0;
403 	ctx->error = 0;
404 	ctx->arglist = NULL;
405 	ctx->argsize = NULL;
406 
407 	ctx->wordbufsize = 16;
408 	ctx->wordbuf = calloc(1, ctx->wordbufsize);
409 
410 	if (!ctx->wordbuf)
411 		pconf_fatal(ctx, "malloc wordbuf failed");
412 	ctx->wordptr = ctx->wordbuf;
413 
414 	ctx->errhandler = errhandler;
415 	ctx->magic = PCONF_CTX_t_MAGIC;
416 
417 	return 1;
418 }
419 
check_magic(PCONF_CTX_t * ctx)420 static int check_magic(PCONF_CTX_t *ctx)
421 {
422 	if (!ctx)
423 		return 0;
424 
425 	if (ctx->magic != PCONF_CTX_t_MAGIC) {
426 		snprintf(ctx->errmsg, PCONF_ERR_LEN, "Invalid ctx buffer");
427 		return 0;
428 	}
429 
430 	return 1;
431 }
432 
pconf_file_begin(PCONF_CTX_t * ctx,const char * fn)433 int pconf_file_begin(PCONF_CTX_t *ctx, const char *fn)
434 {
435 	if (!check_magic(ctx))
436 		return 0;
437 
438 	ctx->f = fopen(fn, "r");
439 
440 	if (!ctx->f) {
441 		snprintf(ctx->errmsg, PCONF_ERR_LEN, "Can't open %s: %s",
442 			fn, strerror(errno));
443 		return 0;
444 	}
445 
446 	return 1;	/* OK */
447 }
448 
parse_char(PCONF_CTX_t * ctx)449 static void parse_char(PCONF_CTX_t *ctx)
450 {
451 	switch(ctx->state) {
452 		case STATE_FINDWORDSTART:
453 			ctx->state = findwordstart(ctx);
454 			break;
455 
456 		case STATE_FINDEOL:
457 			ctx->state = findeol(ctx);
458 			break;
459 
460 		case STATE_QUOTECOLLECT:
461 			ctx->state = quotecollect(ctx);
462 			break;
463 
464 		case STATE_QC_LITERAL:
465 			ctx->state = qc_literal(ctx);
466 			break;
467 
468 		case STATE_COLLECT:
469 			ctx->state = collect(ctx);
470 			break;
471 
472 		case STATE_COLLECTLITERAL:
473 			ctx->state = collectliteral(ctx);
474 			break;
475 	}	/* switch */
476 }
477 
478 /* return 1 if an error occurred, but only do it once */
pconf_parse_error(PCONF_CTX_t * ctx)479 int pconf_parse_error(PCONF_CTX_t *ctx)
480 {
481 	if (!check_magic(ctx))
482 		return 0;
483 
484 	if (ctx->error == 1) {
485 		ctx->error = 0;
486 		return 1;
487 	}
488 
489 	return 0;
490 }
491 
492 /* clean up the ctx space */
pconf_finish(PCONF_CTX_t * ctx)493 void pconf_finish(PCONF_CTX_t *ctx)
494 {
495 	if (!check_magic(ctx))
496 		return;
497 
498 	if (ctx->f)
499 		fclose(ctx->f);
500 
501 	free_storage(ctx);
502 
503 	ctx->magic = 0;
504 }
505 
506 /* read from a file until a whole line is ready for use */
pconf_file_next(PCONF_CTX_t * ctx)507 int pconf_file_next(PCONF_CTX_t *ctx)
508 {
509 	if (!check_magic(ctx))
510 		return 0;
511 
512 	ctx->linenum++;
513 
514 	/* start over for the new line */
515 	ctx->numargs = 0;
516 	ctx->state = STATE_FINDWORDSTART;
517 
518 	while ((ctx->ch = fgetc(ctx->f)) != EOF) {
519 		parse_char(ctx);
520 
521 		if (ctx->state == STATE_PARSEERR)
522 			return 1;
523 
524 		if (ctx->state == STATE_ENDOFLINE)
525 			return 1;
526 	}
527 
528 	/* deal with files that don't end in a newline */
529 
530 	if (ctx->numargs != 0) {
531 
532 		/* still building a word? */
533 		if (ctx->wordptr != ctx->wordbuf)
534 			endofword(ctx);
535 
536 		return 1;
537 	}
538 
539 	/* finished with nothing left over */
540 	return 0;
541 }
542 
543 /* parse a provided line */
pconf_line(PCONF_CTX_t * ctx,const char * line)544 int pconf_line(PCONF_CTX_t *ctx, const char *line)
545 {
546 	size_t	i, linelen;
547 
548 	if (!check_magic(ctx))
549 		return 0;
550 
551 	ctx->linenum++;
552 
553 	/* start over for the new line */
554 	ctx->numargs = 0;
555 	ctx->state = STATE_FINDWORDSTART;
556 
557 	linelen = strlen(line);
558 
559 	for (i = 0; i < linelen; i++) {
560 		ctx->ch = line[i];
561 
562 		parse_char(ctx);
563 
564 		if (ctx->state == STATE_PARSEERR)
565 			return 1;
566 
567 		if (ctx->state == STATE_ENDOFLINE)
568 			return 1;
569 	}
570 
571 	/* deal with any lingering characters */
572 
573 	/* still building a word? */
574 	if (ctx->wordptr != ctx->wordbuf)
575 		endofword(ctx);		/* tie it off */
576 
577 	return 1;
578 }
579 
580 #define PCONF_ESCAPE "#\\\""
581 
pconf_encode(const char * src,char * dest,size_t destsize)582 char *pconf_encode(const char *src, char *dest, size_t destsize)
583 {
584 	size_t	i, srclen, destlen, maxlen;
585 
586 	if (destsize < 1)
587 		return dest;
588 
589 	memset(dest, '\0', destsize);
590 
591 	/* always leave room for a final NULL */
592 	maxlen = destsize - 1;
593 	srclen = strlen(src);
594 	destlen = 0;
595 
596 	for (i = 0; i < srclen; i++) {
597 		if (strchr(PCONF_ESCAPE, src[i])) {
598 
599 			/* if they both won't fit, we're done */
600 			if (destlen >= maxlen - 1)
601 				return dest;
602 
603 			dest[destlen++] = '\\';
604 		}
605 
606 		/* bail out when dest is full */
607 		if (destlen >= maxlen)
608 			return dest;
609 
610 		dest[destlen++] = src[i];
611 	}
612 
613 	return dest;
614 }
615 
616 /* parse input a character at a time */
pconf_char(PCONF_CTX_t * ctx,char ch)617 int pconf_char(PCONF_CTX_t *ctx, char ch)
618 {
619 	if (!check_magic(ctx))
620 		return -1;
621 
622 	/* if the last call finished a line, clean stuff up for another */
623 	if ((ctx->state == STATE_ENDOFLINE) || (ctx->state == STATE_PARSEERR)) {
624 		ctx->numargs = 0;
625 		ctx->state = STATE_FINDWORDSTART;
626 	}
627 
628 	ctx->ch = ch;
629 	parse_char(ctx);
630 
631 	if (ctx->state == STATE_ENDOFLINE)
632 		return 1;
633 
634 	if (ctx->state == STATE_PARSEERR)
635 		return -1;
636 
637 	return 0;
638 }
639