1 /* parseconf.c - state machine-driven dynamic configuration file parser
2
3 Copyright (C) 2002 Russell Kroll <rkroll@exploits.org>
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20 /* parseconf, version 4.
21 *
22 * This one abandons the "callback" system introduced last time. It
23 * didn't turn out as well as I had hoped - you got stuck "behind"
24 * parseconf too often.
25 *
26 * There is now a context buffer, and you call pconf_init to set it up.
27 * All subsequent calls must have it as the first argument. There are
28 * two entry points for parsing lines. You can have it read a file
29 * (pconf_file_begin and pconf_file_next), take lines directly from
30 * the caller (pconf_line), or go along a character at a time (pconf_char).
31 * The parsing is identical no matter how you feed it.
32 *
33 * Since there are no more callbacks, you take the successful return
34 * from the function and access ctx->arglist and ctx->numargs yourself.
35 * You must check for errors with pconf_parse_error before using them,
36 * since it might not be complete. This lets the caller handle all
37 * error reporting that's nonfatal.
38 *
39 * Fatal errors are those that involve memory allocation. If the user
40 * defines an error handler when calling pconf_init, that function will
41 * be called with the error message before parseconf exits. By default
42 * it will just write the message to stderr before exiting.
43 *
44 * Input vs. Output:
45 *
46 * What it reads --> What ends up in each argument
47 *
48 * this is a line --> "this" "is" "a" "line"
49 * this "is also" a line --> "this" "is also" "a" "line"
50 * embedded\ space --> "embedded space"
51 * embedded\\backslash --> "embedded\backslash"
52 *
53 * Arguments are split by whitespace (isspace()) unless that whitespace
54 * occurs inside a "quoted pair like this".
55 *
56 * You can also escape the double quote (") character. The backslash
57 * also allows you to join lines, allowing you to have logical lines
58 * that span physical lines, just like you can do in some shells.
59 *
60 * Lines normally end with a newline, but reaching EOF will also force
61 * parsing on what's been scanned so far.
62 *
63 * Design:
64 *
65 * Characters are read one at a time to drive the state machine.
66 * As words are completed (by hitting whitespace or ending a "" item),
67 * they are committed to the next buffer in the arglist. realloc is
68 * used, so the buffer can grow to handle bigger words.
69 *
70 * The arglist also grows as necessary with a similar approach. As a
71 * result, you can parse extremely long words and lines with an insane
72 * number of elements.
73 *
74 * Finally, there is argsize, which remembers how long each of the
75 * arglist elements are. This is how we know when to expand them.
76 *
77 */
78
79 #include "common.h"
80
81 #include <ctype.h>
82 #include <errno.h>
83 #include <stdio.h>
84 #include <stdarg.h>
85 #include <stdlib.h>
86 #include <string.h>
87 #include <unistd.h>
88 #include <fcntl.h>
89
90 #include "parseconf.h"
91 #include "attribute.h"
92
93 /* possible states */
94
95 #define STATE_FINDWORDSTART 1
96 #define STATE_FINDEOL 2
97 #define STATE_QUOTECOLLECT 3
98 #define STATE_QC_LITERAL 4
99 #define STATE_COLLECT 5
100 #define STATE_COLLECTLITERAL 6
101 #define STATE_ENDOFLINE 7
102 #define STATE_PARSEERR 8
103
104 static void pconf_fatal(PCONF_CTX_t *ctx, const char *errtxt)
105 __attribute__((noreturn));
106
pconf_fatal(PCONF_CTX_t * ctx,const char * errtxt)107 static void pconf_fatal(PCONF_CTX_t *ctx, const char *errtxt)
108 {
109 if (ctx->errhandler)
110 ctx->errhandler(errtxt);
111 else
112 fprintf(stderr, "parseconf: fatal error: %s\n", errtxt);
113
114 exit(EXIT_FAILURE);
115 }
116
add_arg_word(PCONF_CTX_t * ctx)117 static void add_arg_word(PCONF_CTX_t *ctx)
118 {
119 size_t argpos;
120 size_t wbuflen;
121
122 /* this is where the new value goes */
123 argpos = ctx->numargs;
124
125 ctx->numargs++;
126
127 /* when facing more args than ever before, expand the list */
128 if (ctx->numargs > ctx->maxargs) {
129 ctx->maxargs = ctx->numargs;
130
131 /* resize the lists */
132 ctx->arglist = realloc(ctx->arglist,
133 sizeof(char *) * ctx->numargs);
134
135 if (!ctx->arglist)
136 pconf_fatal(ctx, "realloc arglist failed");
137
138 ctx->argsize = realloc(ctx->argsize,
139 sizeof(size_t) * ctx->numargs);
140
141 if (!ctx->argsize)
142 pconf_fatal(ctx, "realloc argsize failed");
143
144 /* ensure sane starting values */
145 ctx->arglist[argpos] = NULL;
146 ctx->argsize[argpos] = 0;
147 }
148
149 wbuflen = strlen(ctx->wordbuf);
150
151 /* now see if the string itself grew compared to last time */
152 if (wbuflen >= ctx->argsize[argpos]) {
153 size_t newlen;
154
155 /* allow for the trailing NULL */
156 newlen = wbuflen + 1;
157
158 /* expand the string storage */
159 ctx->arglist[argpos] = realloc(ctx->arglist[argpos], newlen);
160
161 if (!ctx->arglist[argpos])
162 pconf_fatal(ctx, "realloc arglist member failed");
163
164 /* remember the new size */
165 ctx->argsize[argpos] = newlen;
166 }
167
168 /* strncpy doesn't give us a trailing NULL, so prep the space */
169 memset(ctx->arglist[argpos], '\0', ctx->argsize[argpos]);
170
171 /* finally copy the new value into the provided space */
172 strncpy(ctx->arglist[argpos], ctx->wordbuf, wbuflen);
173 }
174
addchar(PCONF_CTX_t * ctx)175 static void addchar(PCONF_CTX_t *ctx)
176 {
177 size_t wbuflen;
178
179 wbuflen = strlen(ctx->wordbuf);
180
181 /* CVE-2012-2944: only allow the subset of ASCII charset from Space to ~ */
182 if ((ctx->ch < 0x20) || (ctx->ch > 0x7f)) {
183 fprintf(stderr, "addchar: discarding invalid character (0x%02x)!\n",
184 ctx->ch);
185 return;
186 }
187
188 if (ctx->wordlen_limit != 0) {
189 if (wbuflen >= ctx->wordlen_limit) {
190
191 /* limit reached: don't append any more */
192 return;
193 }
194 }
195
196 /* allow for the null */
197 if (wbuflen >= (ctx->wordbufsize - 1)) {
198 ctx->wordbufsize += 8;
199
200 ctx->wordbuf = realloc(ctx->wordbuf, ctx->wordbufsize);
201
202 if (!ctx->wordbuf)
203 pconf_fatal(ctx, "realloc wordbuf failed");
204
205 /* repoint as wordbuf may have moved */
206 ctx->wordptr = &ctx->wordbuf[wbuflen];
207 }
208
209 *ctx->wordptr++ = (char)ctx->ch;
210 *ctx->wordptr = '\0';
211 }
212
endofword(PCONF_CTX_t * ctx)213 static void endofword(PCONF_CTX_t *ctx)
214 {
215 if (ctx->arg_limit != 0) {
216 if (ctx->numargs >= ctx->arg_limit) {
217
218 /* don't accept this word - just drop it */
219 ctx->wordptr = ctx->wordbuf;
220 *ctx->wordptr = '\0';
221
222 return;
223 }
224 }
225
226 add_arg_word(ctx);
227
228 ctx->wordptr = ctx->wordbuf;
229 *ctx->wordptr = '\0';
230 }
231
232 /* look for the beginning of a word */
findwordstart(PCONF_CTX_t * ctx)233 static int findwordstart(PCONF_CTX_t *ctx)
234 {
235 /* newline = the physical line is over, so the logical one is too */
236 if (ctx->ch == 10)
237 return STATE_ENDOFLINE;
238
239 /* the rest of the line is a comment */
240 if (ctx->ch == '#')
241 return STATE_FINDEOL;
242
243 /* space = not in a word yet, so loop back */
244 if (isspace(ctx->ch))
245 return STATE_FINDWORDSTART;
246
247 /* \ = literal = accept the next char blindly */
248 if (ctx->ch == '\\')
249 return STATE_COLLECTLITERAL;
250
251 /* " = begin word bounded by quotes */
252 if (ctx->ch == '"')
253 return STATE_QUOTECOLLECT;
254
255 /* at this point the word just started */
256 addchar(ctx);
257
258 /* if the first character is a '=' this is considered a whole word */
259 if (ctx->ch == '=') {
260 endofword(ctx);
261 return STATE_FINDWORDSTART;
262 }
263
264 return STATE_COLLECT;
265 }
266
267 /* eat characters until the end of the line is found */
findeol(PCONF_CTX_t * ctx)268 static int findeol(PCONF_CTX_t *ctx)
269 {
270 /* newline = found it, so start a new line */
271 if (ctx->ch == 10)
272 return STATE_ENDOFLINE;
273
274 /* come back here */
275 return STATE_FINDEOL;
276 }
277
278 /* set up the error reporting details */
pconf_seterr(PCONF_CTX_t * ctx,const char * errmsg)279 static void pconf_seterr(PCONF_CTX_t *ctx, const char *errmsg)
280 {
281 snprintf(ctx->errmsg, PCONF_ERR_LEN, "%s", errmsg);
282
283 ctx->error = 1;
284 }
285
286 /* quote characters inside a word bounded by "quotes" */
quotecollect(PCONF_CTX_t * ctx)287 static int quotecollect(PCONF_CTX_t *ctx)
288 {
289 /* user is trying to break us */
290 if (ctx->ch == '#') {
291 pconf_seterr(ctx, "Unbalanced word due to unescaped # in quotes");
292 endofword(ctx);
293
294 /* this makes us drop all the way out of the caller */
295 return STATE_PARSEERR;
296 }
297
298 /* another " means we're done with this word */
299 if (ctx->ch == '"') {
300 endofword(ctx);
301
302 return STATE_FINDWORDSTART;
303 }
304
305 /* literal - special case since it needs to return here */
306 if (ctx->ch == '\\')
307 return STATE_QC_LITERAL;
308
309 /* otherwise save it and loop back */
310 addchar(ctx);
311
312 return STATE_QUOTECOLLECT;
313 }
314
315 /* take almost anything literally, but return to quotecollect */
qc_literal(PCONF_CTX_t * ctx)316 static int qc_literal(PCONF_CTX_t *ctx)
317 {
318 /* continue onto the next line of the file */
319 if (ctx->ch == 10)
320 return STATE_QUOTECOLLECT;
321
322 addchar(ctx);
323 return STATE_QUOTECOLLECT;
324 }
325
326 /* collect characters inside a word */
collect(PCONF_CTX_t * ctx)327 static int collect(PCONF_CTX_t *ctx)
328 {
329 /* comment means the word is done, and skip to the end of the line */
330 if (ctx->ch == '#') {
331 endofword(ctx);
332
333 return STATE_FINDEOL;
334 }
335
336 /* newline means the word is done, and the line is done */
337 if (ctx->ch == 10) {
338 endofword(ctx);
339
340 return STATE_ENDOFLINE;
341 }
342
343 /* space means the word is done */
344 if (isspace(ctx->ch)) {
345 endofword(ctx);
346
347 return STATE_FINDWORDSTART;
348 }
349
350 /* '=' means the word is done and the = is a single char word*/
351 if (ctx->ch == '=') {
352 endofword(ctx);
353 findwordstart(ctx);
354
355 return STATE_FINDWORDSTART;
356 }
357
358 /* \ = literal = accept the next char blindly */
359 if (ctx->ch == '\\')
360 return STATE_COLLECTLITERAL;
361
362 /* otherwise store it and come back for more */
363 addchar(ctx);
364 return STATE_COLLECT;
365 }
366
367 /* take almost anything literally */
collectliteral(PCONF_CTX_t * ctx)368 static int collectliteral(PCONF_CTX_t *ctx)
369 {
370 /* continue to the next line */
371 if (ctx->ch == 10)
372 return STATE_COLLECT;
373
374 addchar(ctx);
375 return STATE_COLLECT;
376 }
377
378 /* clean up memory before going back to the user */
free_storage(PCONF_CTX_t * ctx)379 static void free_storage(PCONF_CTX_t *ctx)
380 {
381 unsigned int i;
382
383 free(ctx->wordbuf);
384
385 /* clear out the individual words first */
386 for (i = 0; i < ctx->maxargs; i++)
387 free(ctx->arglist[i]);
388
389 free(ctx->arglist);
390 free(ctx->argsize);
391
392 /* put things back to the initial state */
393 ctx->arglist = NULL;
394 ctx->argsize = NULL;
395 ctx->numargs = 0;
396 ctx->maxargs = 0;
397 }
398
pconf_init(PCONF_CTX_t * ctx,void errhandler (const char *))399 int pconf_init(PCONF_CTX_t *ctx, void errhandler(const char *))
400 {
401 /* set up the ctx elements */
402
403 ctx->f = NULL;
404 ctx->state = STATE_FINDWORDSTART;
405 ctx->numargs = 0;
406 ctx->maxargs = 0;
407 ctx->arg_limit = PCONF_DEFAULT_ARG_LIMIT;
408 ctx->wordlen_limit = PCONF_DEFAULT_WORDLEN_LIMIT;
409 ctx->linenum = 0;
410 ctx->error = 0;
411 ctx->arglist = NULL;
412 ctx->argsize = NULL;
413
414 ctx->wordbufsize = 16;
415 ctx->wordbuf = calloc(1, ctx->wordbufsize);
416
417 if (!ctx->wordbuf)
418 pconf_fatal(ctx, "malloc wordbuf failed");
419 ctx->wordptr = ctx->wordbuf;
420
421 ctx->errhandler = errhandler;
422 ctx->magic = PCONF_CTX_t_MAGIC;
423
424 return 1;
425 }
426
check_magic(PCONF_CTX_t * ctx)427 static int check_magic(PCONF_CTX_t *ctx)
428 {
429 if (!ctx)
430 return 0;
431
432 if (ctx->magic != PCONF_CTX_t_MAGIC) {
433 snprintf(ctx->errmsg, PCONF_ERR_LEN, "Invalid ctx buffer");
434 return 0;
435 }
436
437 return 1;
438 }
439
pconf_file_begin(PCONF_CTX_t * ctx,const char * fn)440 int pconf_file_begin(PCONF_CTX_t *ctx, const char *fn)
441 {
442 if (!check_magic(ctx))
443 return 0;
444
445 ctx->f = fopen(fn, "r");
446
447 if (!ctx->f) {
448 snprintf(ctx->errmsg, PCONF_ERR_LEN, "Can't open %s: %s",
449 fn, strerror(errno));
450 return 0;
451 }
452
453 /* prevent fd leaking to child processes */
454 fcntl(fileno(ctx->f), F_SETFD, FD_CLOEXEC);
455
456 return 1; /* OK */
457 }
458
parse_char(PCONF_CTX_t * ctx)459 static void parse_char(PCONF_CTX_t *ctx)
460 {
461 switch(ctx->state) {
462 case STATE_FINDWORDSTART:
463 ctx->state = findwordstart(ctx);
464 break;
465
466 case STATE_FINDEOL:
467 ctx->state = findeol(ctx);
468 break;
469
470 case STATE_QUOTECOLLECT:
471 ctx->state = quotecollect(ctx);
472 break;
473
474 case STATE_QC_LITERAL:
475 ctx->state = qc_literal(ctx);
476 break;
477
478 case STATE_COLLECT:
479 ctx->state = collect(ctx);
480 break;
481
482 case STATE_COLLECTLITERAL:
483 ctx->state = collectliteral(ctx);
484 break;
485 } /* switch */
486 }
487
488 /* return 1 if an error occurred, but only do it once */
pconf_parse_error(PCONF_CTX_t * ctx)489 int pconf_parse_error(PCONF_CTX_t *ctx)
490 {
491 if (!check_magic(ctx))
492 return 0;
493
494 if (ctx->error == 1) {
495 ctx->error = 0;
496 return 1;
497 }
498
499 return 0;
500 }
501
502 /* clean up the ctx space */
pconf_finish(PCONF_CTX_t * ctx)503 void pconf_finish(PCONF_CTX_t *ctx)
504 {
505 if (!check_magic(ctx))
506 return;
507
508 if (ctx->f)
509 fclose(ctx->f);
510
511 free_storage(ctx);
512
513 ctx->magic = 0;
514 }
515
516 /* read from a file until a whole line is ready for use */
pconf_file_next(PCONF_CTX_t * ctx)517 int pconf_file_next(PCONF_CTX_t *ctx)
518 {
519 if (!check_magic(ctx))
520 return 0;
521
522 ctx->linenum++;
523
524 /* start over for the new line */
525 ctx->numargs = 0;
526 ctx->state = STATE_FINDWORDSTART;
527
528 while ((ctx->ch = fgetc(ctx->f)) != EOF) {
529 parse_char(ctx);
530
531 if (ctx->state == STATE_PARSEERR)
532 return 1;
533
534 if (ctx->state == STATE_ENDOFLINE)
535 return 1;
536 }
537
538 /* deal with files that don't end in a newline */
539
540 if (ctx->numargs != 0) {
541
542 /* still building a word? */
543 if (ctx->wordptr != ctx->wordbuf)
544 endofword(ctx);
545
546 return 1;
547 }
548
549 /* finished with nothing left over */
550 return 0;
551 }
552
553 /* parse a provided line */
pconf_line(PCONF_CTX_t * ctx,const char * line)554 int pconf_line(PCONF_CTX_t *ctx, const char *line)
555 {
556 size_t i, linelen;
557
558 if (!check_magic(ctx))
559 return 0;
560
561 ctx->linenum++;
562
563 /* start over for the new line */
564 ctx->numargs = 0;
565 ctx->state = STATE_FINDWORDSTART;
566
567 linelen = strlen(line);
568
569 for (i = 0; i < linelen; i++) {
570 ctx->ch = line[i];
571
572 parse_char(ctx);
573
574 if (ctx->state == STATE_PARSEERR)
575 return 1;
576
577 if (ctx->state == STATE_ENDOFLINE)
578 return 1;
579 }
580
581 /* deal with any lingering characters */
582
583 /* still building a word? */
584 if (ctx->wordptr != ctx->wordbuf)
585 endofword(ctx); /* tie it off */
586
587 return 1;
588 }
589
590 #define PCONF_ESCAPE "#\\\""
591
pconf_encode(const char * src,char * dest,size_t destsize)592 char *pconf_encode(const char *src, char *dest, size_t destsize)
593 {
594 size_t i, srclen, destlen, maxlen;
595
596 if (destsize < 1)
597 return dest;
598
599 memset(dest, '\0', destsize);
600
601 /* always leave room for a final NULL */
602 maxlen = destsize - 1;
603 srclen = strlen(src);
604 destlen = 0;
605
606 for (i = 0; i < srclen; i++) {
607 if (strchr(PCONF_ESCAPE, src[i])) {
608
609 /* if they both won't fit, we're done */
610 if (destlen >= maxlen - 1)
611 return dest;
612
613 dest[destlen++] = '\\';
614 }
615
616 /* bail out when dest is full */
617 if (destlen >= maxlen)
618 return dest;
619
620 dest[destlen++] = src[i];
621 }
622
623 return dest;
624 }
625
626 /* parse input a character at a time */
pconf_char(PCONF_CTX_t * ctx,char ch)627 int pconf_char(PCONF_CTX_t *ctx, char ch)
628 {
629 if (!check_magic(ctx))
630 return -1;
631
632 /* if the last call finished a line, clean stuff up for another */
633 if ((ctx->state == STATE_ENDOFLINE) || (ctx->state == STATE_PARSEERR)) {
634 ctx->numargs = 0;
635 ctx->state = STATE_FINDWORDSTART;
636 }
637
638 ctx->ch = ch;
639 parse_char(ctx);
640
641 if (ctx->state == STATE_ENDOFLINE)
642 return 1;
643
644 if (ctx->state == STATE_PARSEERR)
645 return -1;
646
647 return 0;
648 }
649