1 /* parseconf.c - state machine-driven dynamic configuration file parser
2
3 Copyright (C) 2002 Russell Kroll <rkroll@exploits.org>
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20 /* parseconf, version 4.
21 *
22 * This one abandons the "callback" system introduced last time. It
23 * didn't turn out as well as I had hoped - you got stuck "behind"
24 * parseconf too often.
25 *
26 * There is now a context buffer, and you call pconf_init to set it up.
27 * All subsequent calls must have it as the first argument. There are
28 * two entry points for parsing lines. You can have it read a file
29 * (pconf_file_begin and pconf_file_next), take lines directly from
30 * the caller (pconf_line), or go along a character at a time (pconf_char).
31 * The parsing is identical no matter how you feed it.
32 *
33 * Since there are no more callbacks, you take the successful return
34 * from the function and access ctx->arglist and ctx->numargs yourself.
35 * You must check for errors with pconf_parse_error before using them,
36 * since it might not be complete. This lets the caller handle all
37 * error reporting that's nonfatal.
38 *
39 * Fatal errors are those that involve memory allocation. If the user
40 * defines an error handler when calling pconf_init, that function will
41 * be called with the error message before parseconf exits. By default
42 * it will just write the message to stderr before exiting.
43 *
44 * Input vs. Output:
45 *
46 * What it reads --> What ends up in each argument
47 *
48 * this is a line --> "this" "is" "a" "line"
49 * this "is also" a line --> "this" "is also" "a" "line"
50 * embedded\ space --> "embedded space"
51 * embedded\\backslash --> "embedded\backslash"
52 *
53 * Arguments are split by whitespace (isspace()) unless that whitespace
54 * occurs inside a "quoted pair like this".
55 *
56 * You can also escape the double quote (") character. The backslash
57 * also allows you to join lines, allowing you to have logical lines
58 * that span physical lines, just like you can do in some shells.
59 *
60 * Lines normally end with a newline, but reaching EOF will also force
61 * parsing on what's been scanned so far.
62 *
63 * Design:
64 *
65 * Characters are read one at a time to drive the state machine.
66 * As words are completed (by hitting whitespace or ending a "" item),
67 * they are committed to the next buffer in the arglist. realloc is
68 * used, so the buffer can grow to handle bigger words.
69 *
70 * The arglist also grows as necessary with a similar approach. As a
71 * result, you can parse extremely long words and lines with an insane
72 * number of elements.
73 *
74 * Finally, there is argsize, which remembers how long each of the
75 * arglist elements are. This is how we know when to expand them.
76 *
77 */
78
79 #include <ctype.h>
80 #include <errno.h>
81 #include <stdio.h>
82 #include <stdarg.h>
83 #include <stdlib.h>
84 #include <string.h>
85 #include <unistd.h>
86
87 #include "parseconf.h"
88
89 /* possible states */
90
91 #define STATE_FINDWORDSTART 1
92 #define STATE_FINDEOL 2
93 #define STATE_QUOTECOLLECT 3
94 #define STATE_QC_LITERAL 4
95 #define STATE_COLLECT 5
96 #define STATE_COLLECTLITERAL 6
97 #define STATE_ENDOFLINE 7
98 #define STATE_PARSEERR 8
99
pconf_fatal(PCONF_CTX_t * ctx,const char * errtxt)100 static void pconf_fatal(PCONF_CTX_t *ctx, const char *errtxt)
101 {
102 if (ctx->errhandler)
103 ctx->errhandler(errtxt);
104 else
105 fprintf(stderr, "parseconf: fatal error: %s\n", errtxt);
106
107 exit(EXIT_FAILURE);
108 }
109
add_arg_word(PCONF_CTX_t * ctx)110 static void add_arg_word(PCONF_CTX_t *ctx)
111 {
112 int argpos;
113 size_t wbuflen;
114
115 /* this is where the new value goes */
116 argpos = ctx->numargs;
117
118 ctx->numargs++;
119
120 /* when facing more args than ever before, expand the list */
121 if (ctx->numargs > ctx->maxargs) {
122 ctx->maxargs = ctx->numargs;
123
124 /* resize the lists */
125 ctx->arglist = realloc(ctx->arglist,
126 sizeof(char *) * ctx->numargs);
127
128 if (!ctx->arglist)
129 pconf_fatal(ctx, "realloc arglist failed");
130
131 ctx->argsize = realloc(ctx->argsize,
132 sizeof(size_t) * ctx->numargs);
133
134 if (!ctx->argsize)
135 pconf_fatal(ctx, "realloc argsize failed");
136
137 /* ensure sane starting values */
138 ctx->arglist[argpos] = NULL;
139 ctx->argsize[argpos] = 0;
140 }
141
142 wbuflen = strlen(ctx->wordbuf);
143
144 /* now see if the string itself grew compared to last time */
145 if (wbuflen >= ctx->argsize[argpos]) {
146 size_t newlen;
147
148 /* allow for the trailing NULL */
149 newlen = wbuflen + 1;
150
151 /* expand the string storage */
152 ctx->arglist[argpos] = realloc(ctx->arglist[argpos], newlen);
153
154 if (!ctx->arglist[argpos])
155 pconf_fatal(ctx, "realloc arglist member failed");
156
157 /* remember the new size */
158 ctx->argsize[argpos] = newlen;
159 }
160
161 /* strncpy doesn't give us a trailing NULL, so prep the space */
162 memset(ctx->arglist[argpos], '\0', ctx->argsize[argpos]);
163
164 /* finally copy the new value into the provided space */
165 strncpy(ctx->arglist[argpos], ctx->wordbuf, wbuflen);
166 }
167
addchar(PCONF_CTX_t * ctx)168 static void addchar(PCONF_CTX_t *ctx)
169 {
170 size_t wbuflen;
171
172 wbuflen = strlen(ctx->wordbuf);
173
174 /* CVE-2012-2944: only allow the subset Ascii charset from Space to ~ */
175 if ((ctx->ch < 0x20) || (ctx->ch > 0x7f)) {
176 fprintf(stderr, "addchar: discarding invalid character (0x%02x)!\n",
177 ctx->ch);
178 return;
179 }
180
181 if (ctx->wordlen_limit != 0) {
182 if (wbuflen >= ctx->wordlen_limit) {
183
184 /* limit reached: don't append any more */
185 return;
186 }
187 }
188
189 /* allow for the null */
190 if (wbuflen >= (ctx->wordbufsize - 1)) {
191 ctx->wordbufsize += 8;
192
193 ctx->wordbuf = realloc(ctx->wordbuf, ctx->wordbufsize);
194
195 if (!ctx->wordbuf)
196 pconf_fatal(ctx, "realloc wordbuf failed");
197
198 /* repoint as wordbuf may have moved */
199 ctx->wordptr = &ctx->wordbuf[wbuflen];
200 }
201
202 *ctx->wordptr++ = ctx->ch;
203 *ctx->wordptr = '\0';
204 }
205
endofword(PCONF_CTX_t * ctx)206 static void endofword(PCONF_CTX_t *ctx)
207 {
208 if (ctx->arg_limit != 0) {
209 if (ctx->numargs >= ctx->arg_limit) {
210
211 /* don't accept this word - just drop it */
212 ctx->wordptr = ctx->wordbuf;
213 *ctx->wordptr = '\0';
214
215 return;
216 }
217 }
218
219 add_arg_word(ctx);
220
221 ctx->wordptr = ctx->wordbuf;
222 *ctx->wordptr = '\0';
223 }
224
225 /* look for the beginning of a word */
findwordstart(PCONF_CTX_t * ctx)226 static int findwordstart(PCONF_CTX_t *ctx)
227 {
228 /* newline = the physical line is over, so the logical one is too */
229 if (ctx->ch == 10)
230 return STATE_ENDOFLINE;
231
232 /* the rest of the line is a comment */
233 if (ctx->ch == '#')
234 return STATE_FINDEOL;
235
236 /* space = not in a word yet, so loop back */
237 if (isspace(ctx->ch))
238 return STATE_FINDWORDSTART;
239
240 /* \ = literal = accept the next char blindly */
241 if (ctx->ch == '\\')
242 return STATE_COLLECTLITERAL;
243
244 /* " = begin word bounded by quotes */
245 if (ctx->ch == '"')
246 return STATE_QUOTECOLLECT;
247
248 /* at this point the word just started */
249 addchar(ctx);
250
251 /* if the first character is a '=' this is considered a whole word */
252 if (ctx->ch == '=') {
253 endofword(ctx);
254 return STATE_FINDWORDSTART;
255 }
256
257 return STATE_COLLECT;
258 }
259
260 /* eat characters until the end of the line is found */
findeol(PCONF_CTX_t * ctx)261 static int findeol(PCONF_CTX_t *ctx)
262 {
263 /* newline = found it, so start a new line */
264 if (ctx->ch == 10)
265 return STATE_ENDOFLINE;
266
267 /* come back here */
268 return STATE_FINDEOL;
269 }
270
271 /* set up the error reporting details */
pconf_seterr(PCONF_CTX_t * ctx,const char * errmsg)272 static void pconf_seterr(PCONF_CTX_t *ctx, const char *errmsg)
273 {
274 snprintf(ctx->errmsg, PCONF_ERR_LEN, "%s", errmsg);
275
276 ctx->error = 1;
277 }
278
279 /* quote characters inside a word bounded by "quotes" */
quotecollect(PCONF_CTX_t * ctx)280 static int quotecollect(PCONF_CTX_t *ctx)
281 {
282 /* user is trying to break us */
283 if (ctx->ch == '#') {
284 pconf_seterr(ctx, "Unbalanced word due to unescaped # in quotes");
285 endofword(ctx);
286
287 /* this makes us drop all the way out of the caller */
288 return STATE_PARSEERR;
289 }
290
291 /* another " means we're done with this word */
292 if (ctx->ch == '"') {
293 endofword(ctx);
294
295 return STATE_FINDWORDSTART;
296 }
297
298 /* literal - special case since it needs to return here */
299 if (ctx->ch == '\\')
300 return STATE_QC_LITERAL;
301
302 /* otherwise save it and loop back */
303 addchar(ctx);
304
305 return STATE_QUOTECOLLECT;
306 }
307
308 /* take almost anything literally, but return to quotecollect */
qc_literal(PCONF_CTX_t * ctx)309 static int qc_literal(PCONF_CTX_t *ctx)
310 {
311 /* continue onto the next line of the file */
312 if (ctx->ch == 10)
313 return STATE_QUOTECOLLECT;
314
315 addchar(ctx);
316 return STATE_QUOTECOLLECT;
317 }
318
319 /* collect characters inside a word */
collect(PCONF_CTX_t * ctx)320 static int collect(PCONF_CTX_t *ctx)
321 {
322 /* comment means the word is done, and skip to the end of the line */
323 if (ctx->ch == '#') {
324 endofword(ctx);
325
326 return STATE_FINDEOL;
327 }
328
329 /* newline means the word is done, and the line is done */
330 if (ctx->ch == 10) {
331 endofword(ctx);
332
333 return STATE_ENDOFLINE;
334 }
335
336 /* space means the word is done */
337 if (isspace(ctx->ch)) {
338 endofword(ctx);
339
340 return STATE_FINDWORDSTART;
341 }
342
343 /* '=' means the word is done and the = is a single char word*/
344 if (ctx->ch == '=') {
345 endofword(ctx);
346 findwordstart(ctx);
347
348 return STATE_FINDWORDSTART;
349 }
350
351 /* \ = literal = accept the next char blindly */
352 if (ctx->ch == '\\')
353 return STATE_COLLECTLITERAL;
354
355 /* otherwise store it and come back for more */
356 addchar(ctx);
357 return STATE_COLLECT;
358 }
359
360 /* take almost anything literally */
collectliteral(PCONF_CTX_t * ctx)361 static int collectliteral(PCONF_CTX_t *ctx)
362 {
363 /* continue to the next line */
364 if (ctx->ch == 10)
365 return STATE_COLLECT;
366
367 addchar(ctx);
368 return STATE_COLLECT;
369 }
370
371 /* clean up memory before going back to the user */
free_storage(PCONF_CTX_t * ctx)372 static void free_storage(PCONF_CTX_t *ctx)
373 {
374 unsigned int i;
375
376 free(ctx->wordbuf);
377
378 /* clear out the individual words first */
379 for (i = 0; i < ctx->maxargs; i++)
380 free(ctx->arglist[i]);
381
382 free(ctx->arglist);
383 free(ctx->argsize);
384
385 /* put things back to the initial state */
386 ctx->arglist = NULL;
387 ctx->argsize = NULL;
388 ctx->numargs = 0;
389 ctx->maxargs = 0;
390 }
391
pconf_init(PCONF_CTX_t * ctx,void errhandler (const char *))392 int pconf_init(PCONF_CTX_t *ctx, void errhandler(const char *))
393 {
394 /* set up the ctx elements */
395
396 ctx->f = NULL;
397 ctx->state = STATE_FINDWORDSTART;
398 ctx->numargs = 0;
399 ctx->maxargs = 0;
400 ctx->arg_limit = PCONF_DEFAULT_ARG_LIMIT;
401 ctx->wordlen_limit = PCONF_DEFAULT_WORDLEN_LIMIT;
402 ctx->linenum = 0;
403 ctx->error = 0;
404 ctx->arglist = NULL;
405 ctx->argsize = NULL;
406
407 ctx->wordbufsize = 16;
408 ctx->wordbuf = calloc(1, ctx->wordbufsize);
409
410 if (!ctx->wordbuf)
411 pconf_fatal(ctx, "malloc wordbuf failed");
412 ctx->wordptr = ctx->wordbuf;
413
414 ctx->errhandler = errhandler;
415 ctx->magic = PCONF_CTX_t_MAGIC;
416
417 return 1;
418 }
419
check_magic(PCONF_CTX_t * ctx)420 static int check_magic(PCONF_CTX_t *ctx)
421 {
422 if (!ctx)
423 return 0;
424
425 if (ctx->magic != PCONF_CTX_t_MAGIC) {
426 snprintf(ctx->errmsg, PCONF_ERR_LEN, "Invalid ctx buffer");
427 return 0;
428 }
429
430 return 1;
431 }
432
pconf_file_begin(PCONF_CTX_t * ctx,const char * fn)433 int pconf_file_begin(PCONF_CTX_t *ctx, const char *fn)
434 {
435 if (!check_magic(ctx))
436 return 0;
437
438 ctx->f = fopen(fn, "r");
439
440 if (!ctx->f) {
441 snprintf(ctx->errmsg, PCONF_ERR_LEN, "Can't open %s: %s",
442 fn, strerror(errno));
443 return 0;
444 }
445
446 return 1; /* OK */
447 }
448
parse_char(PCONF_CTX_t * ctx)449 static void parse_char(PCONF_CTX_t *ctx)
450 {
451 switch(ctx->state) {
452 case STATE_FINDWORDSTART:
453 ctx->state = findwordstart(ctx);
454 break;
455
456 case STATE_FINDEOL:
457 ctx->state = findeol(ctx);
458 break;
459
460 case STATE_QUOTECOLLECT:
461 ctx->state = quotecollect(ctx);
462 break;
463
464 case STATE_QC_LITERAL:
465 ctx->state = qc_literal(ctx);
466 break;
467
468 case STATE_COLLECT:
469 ctx->state = collect(ctx);
470 break;
471
472 case STATE_COLLECTLITERAL:
473 ctx->state = collectliteral(ctx);
474 break;
475 } /* switch */
476 }
477
478 /* return 1 if an error occurred, but only do it once */
pconf_parse_error(PCONF_CTX_t * ctx)479 int pconf_parse_error(PCONF_CTX_t *ctx)
480 {
481 if (!check_magic(ctx))
482 return 0;
483
484 if (ctx->error == 1) {
485 ctx->error = 0;
486 return 1;
487 }
488
489 return 0;
490 }
491
492 /* clean up the ctx space */
pconf_finish(PCONF_CTX_t * ctx)493 void pconf_finish(PCONF_CTX_t *ctx)
494 {
495 if (!check_magic(ctx))
496 return;
497
498 if (ctx->f)
499 fclose(ctx->f);
500
501 free_storage(ctx);
502
503 ctx->magic = 0;
504 }
505
506 /* read from a file until a whole line is ready for use */
pconf_file_next(PCONF_CTX_t * ctx)507 int pconf_file_next(PCONF_CTX_t *ctx)
508 {
509 if (!check_magic(ctx))
510 return 0;
511
512 ctx->linenum++;
513
514 /* start over for the new line */
515 ctx->numargs = 0;
516 ctx->state = STATE_FINDWORDSTART;
517
518 while ((ctx->ch = fgetc(ctx->f)) != EOF) {
519 parse_char(ctx);
520
521 if (ctx->state == STATE_PARSEERR)
522 return 1;
523
524 if (ctx->state == STATE_ENDOFLINE)
525 return 1;
526 }
527
528 /* deal with files that don't end in a newline */
529
530 if (ctx->numargs != 0) {
531
532 /* still building a word? */
533 if (ctx->wordptr != ctx->wordbuf)
534 endofword(ctx);
535
536 return 1;
537 }
538
539 /* finished with nothing left over */
540 return 0;
541 }
542
543 /* parse a provided line */
pconf_line(PCONF_CTX_t * ctx,const char * line)544 int pconf_line(PCONF_CTX_t *ctx, const char *line)
545 {
546 size_t i, linelen;
547
548 if (!check_magic(ctx))
549 return 0;
550
551 ctx->linenum++;
552
553 /* start over for the new line */
554 ctx->numargs = 0;
555 ctx->state = STATE_FINDWORDSTART;
556
557 linelen = strlen(line);
558
559 for (i = 0; i < linelen; i++) {
560 ctx->ch = line[i];
561
562 parse_char(ctx);
563
564 if (ctx->state == STATE_PARSEERR)
565 return 1;
566
567 if (ctx->state == STATE_ENDOFLINE)
568 return 1;
569 }
570
571 /* deal with any lingering characters */
572
573 /* still building a word? */
574 if (ctx->wordptr != ctx->wordbuf)
575 endofword(ctx); /* tie it off */
576
577 return 1;
578 }
579
580 #define PCONF_ESCAPE "#\\\""
581
pconf_encode(const char * src,char * dest,size_t destsize)582 char *pconf_encode(const char *src, char *dest, size_t destsize)
583 {
584 size_t i, srclen, destlen, maxlen;
585
586 if (destsize < 1)
587 return dest;
588
589 memset(dest, '\0', destsize);
590
591 /* always leave room for a final NULL */
592 maxlen = destsize - 1;
593 srclen = strlen(src);
594 destlen = 0;
595
596 for (i = 0; i < srclen; i++) {
597 if (strchr(PCONF_ESCAPE, src[i])) {
598
599 /* if they both won't fit, we're done */
600 if (destlen >= maxlen - 1)
601 return dest;
602
603 dest[destlen++] = '\\';
604 }
605
606 /* bail out when dest is full */
607 if (destlen >= maxlen)
608 return dest;
609
610 dest[destlen++] = src[i];
611 }
612
613 return dest;
614 }
615
616 /* parse input a character at a time */
pconf_char(PCONF_CTX_t * ctx,char ch)617 int pconf_char(PCONF_CTX_t *ctx, char ch)
618 {
619 if (!check_magic(ctx))
620 return -1;
621
622 /* if the last call finished a line, clean stuff up for another */
623 if ((ctx->state == STATE_ENDOFLINE) || (ctx->state == STATE_PARSEERR)) {
624 ctx->numargs = 0;
625 ctx->state = STATE_FINDWORDSTART;
626 }
627
628 ctx->ch = ch;
629 parse_char(ctx);
630
631 if (ctx->state == STATE_ENDOFLINE)
632 return 1;
633
634 if (ctx->state == STATE_PARSEERR)
635 return -1;
636
637 return 0;
638 }
639