1 %{
2 /**
3  * This file is part of the Detox package.
4  *
5  * Copyright (c) Doug Harple <detox.dharple@gmail.com>
6  *
7  * For the full copyright and license information, please view the LICENSE
8  * file that was distributed with this source code.
9  */
10 
11 #include "config.h"
12 
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <string.h>
16 
17 #include "detox.h"
18 #include "clean_string.h"
19 #include "config_file.h"
20 
21 /*
22  * I must apologize in advance for the cryptic, global variable names.
23  */
24 
25 static struct detox_sequence_list *cf_sl_ret, *cf_sl_current;
26 static struct detox_sequence_entry *cf_seq_ret, *cf_seq_current;
27 static struct detox_ignore_entry *cf_ignore_ret, *cf_ignore_current;
28 static struct clean_string_options *csopts;
29 static char *current_name = NULL;
30 static char *current_filename = NULL;
31 static struct detox_options *current_options;
32 
33 void cf_append_sequence_list(void);
34 void cf_append_sequence_entry(void *ptr, void *opts);
35 void cf_append_ignore_entry(int token, void *str);
36 
37 void yyerror (char *s);
38 
39 int yylex (void);
40 
41 %}
42 
43 %union {
44     char	*string;	/* string buffer */
45     int		cmd;		/* command value */
46     struct detox_sequence_entry *seq;	/* sequence */
47     int		nvalue;		/* nvalue */
48 }
49 
50 %token <string> QSTRING ID
51 %token <cmd> SEQUENCE IGNORE
52 %token <cmd> UNCGI ISO8859_1 UTF_8 SAFE WIPEUP
53 %token <cmd> MAX_LENGTH LOWER
54 %token <cmd> FILENAME REMOVE_TRAILING LENGTH
55 %token <cmd> OPEN CLOSE EOL
56 %token <nvalue> NVALUE
57 
58 %type <string> string
59 
60 %%
61 
62 configfile:
63 	|
64 	configfile rule
65 	;
66 
67 rule: sequence
68 	|
69 	ignore
70 	;
71 
72 sequence: sequence_open method_list sequence_close
73 	;
74 
75 sequence_open: SEQUENCE string OPEN { current_name = $2; }
76 	;
77 
78 sequence_close: CLOSE EOL { cf_append_sequence_list(); }
79 	;
80 
81 method_list: method |
82 	method_list method
83 	;
84 
85 method: UNCGI EOL	{ cf_append_sequence_entry(&clean_uncgi, NULL); }
86 	|
87 	LOWER EOL	{ cf_append_sequence_entry(&clean_lower, NULL); }
88 	|
89 	wipeup EOL
90 	|
91 	iso8859_1 EOL
92 	|
93 	utf_8 EOL
94 	|
95 	safe EOL
96 	|
97 	max_length EOL
98 	;
99 
100 iso8859_1: ISO8859_1 { cf_append_sequence_entry(&clean_iso8859_1, NULL); }
101 	|
102 	ISO8859_1 OPEN CLOSE { cf_append_sequence_entry(&clean_iso8859_1, NULL); }
103 	|
104 	ISO8859_1 OPEN FILENAME string EOL CLOSE {
105 		csopts = malloc(sizeof(struct clean_string_options));
106 		memset(csopts, 0, sizeof(struct clean_string_options));
107 		csopts->filename = $4;
108 
109 		cf_append_sequence_entry(&clean_iso8859_1, csopts);
110 	}
111 	;
112 
113 utf_8: UTF_8 { cf_append_sequence_entry(&clean_utf_8, NULL); }
114 	|
115 	UTF_8 OPEN CLOSE { cf_append_sequence_entry(&clean_utf_8, NULL); }
116 	|
117 	UTF_8 OPEN FILENAME string EOL CLOSE {
118 		csopts = malloc(sizeof(struct clean_string_options));
119 		memset(csopts, 0, sizeof(struct clean_string_options));
120 		csopts->filename = $4;
121 
122 		cf_append_sequence_entry(&clean_utf_8, csopts);
123 	}
124 	;
125 
126 safe: SAFE { cf_append_sequence_entry(&clean_safe, NULL); }
127 	|
128 	SAFE OPEN CLOSE { cf_append_sequence_entry(&clean_safe, NULL); }
129 	|
130 	SAFE OPEN FILENAME string EOL CLOSE {
131 		csopts = malloc(sizeof(struct clean_string_options));
132 		memset(csopts, 0, sizeof(struct clean_string_options));
133 		csopts->filename = $4;
134 
135 		cf_append_sequence_entry(&clean_safe, csopts);
136 	}
137 	;
138 
139 wipeup:	WIPEUP {
140 		if (current_options->remove_trailing) {
141 			csopts = malloc(sizeof(struct clean_string_options));
142 			memset(csopts, 0, sizeof(struct clean_string_options));
143 			csopts->remove_trailing = 1;
144 		}
145 		else {
146 			csopts = NULL;
147 		}
148 
149 		cf_append_sequence_entry(&clean_wipeup, csopts);
150 	}
151 	|
152 	WIPEUP OPEN CLOSE {
153 		if (current_options->remove_trailing) {
154 			csopts = malloc(sizeof(struct clean_string_options));
155 			memset(csopts, 0, sizeof(struct clean_string_options));
156 			csopts->remove_trailing = 1;
157 		}
158 		else {
159 			csopts = NULL;
160 		}
161 
162 		cf_append_sequence_entry(&clean_wipeup, csopts);
163 	}
164 	|
165 	WIPEUP OPEN REMOVE_TRAILING EOL CLOSE {
166 		csopts = malloc(sizeof(struct clean_string_options));
167 		memset(csopts, 0, sizeof(struct clean_string_options));
168 		csopts->remove_trailing = 1;
169 
170 		cf_append_sequence_entry(&clean_wipeup, csopts);
171 	}
172 	;
173 
174 max_length: MAX_LENGTH	{ cf_append_sequence_entry(&clean_max_length, NULL); }
175 	|
176 	MAX_LENGTH OPEN CLOSE { cf_append_sequence_entry(&clean_max_length, NULL); }
177 	|
178 	MAX_LENGTH OPEN LENGTH NVALUE EOL CLOSE {
179 		csopts = malloc(sizeof(struct clean_string_options));
180 		memset(csopts, 0, sizeof(struct clean_string_options));
181 		csopts->max_length = (size_t)$4;
182 
183 		cf_append_sequence_entry(&clean_max_length, csopts);
184 	}
185 	;
186 
187 ignore: ignore_open ignore_list ignore_close
188 	;
189 
190 ignore_open: IGNORE OPEN
191 	;
192 
193 ignore_close: CLOSE EOL
194 	;
195 
196 ignore_list: ignore_filename |
197 	ignore_list ignore_filename
198 	;
199 
200 ignore_filename: FILENAME string EOL {
201 		cf_append_ignore_entry(FILENAME, $2);
202 	}
203 	;
204 
205 string: QSTRING 		{ $$ = $1; }
206 	|
207 	ID			{ $$ = $1; }
208 	;
209 
210 %%
211 
212 extern FILE *yyin;
213 extern FILE *yyout;
214 
parse_config_file(char * filename,struct detox_parse_results * previous_results,struct detox_options * main_options)215 struct detox_parse_results *parse_config_file(char *filename, struct detox_parse_results *previous_results, struct detox_options *main_options) {
216 	struct detox_parse_results *ret = NULL;
217 
218 	current_filename = filename;
219 	current_options = main_options;
220 
221 	/*
222 	 * XXX - Should we be closing the default yyin/yyout?  If so, should we
223 	 * be setting them to NULL at the end of this function?
224 	 */
225 
226 	yyin = fopen(filename, "r");
227 	if (yyin == NULL) {
228 		return previous_results;
229 	}
230 	yyout = fopen("/dev/null", "w");
231 
232 	/*
233 	 * Initialize the return variable
234 	 */
235 
236 	if (previous_results) {
237 		ret = previous_results;
238 	}
239 	else {
240 		ret = malloc(sizeof(struct detox_parse_results));
241 		memset(ret, 0, sizeof(struct detox_parse_results));
242 	}
243 
244 	/*
245 	 * Initialize the sequence list
246 	 */
247 
248 	cf_sl_ret = NULL;
249 	cf_sl_current = NULL;
250 
251 	if (previous_results && previous_results->sequences) {
252 		cf_sl_ret = previous_results->sequences;
253 		cf_sl_current = cf_sl_ret;
254 		while (cf_sl_current->next != NULL) {
255 			cf_sl_current = cf_sl_current->next;
256 		}
257 	}
258 
259 	/*
260 	 * Initialize the ignore list
261 	 */
262 
263 	cf_ignore_ret = NULL;
264 	cf_ignore_current = NULL;
265 
266 	if (previous_results && previous_results->files_to_ignore) {
267 		cf_ignore_ret = previous_results->files_to_ignore;
268 		cf_ignore_current = cf_ignore_ret;
269 		while (cf_ignore_current->next != NULL) {
270 			cf_ignore_current = cf_ignore_current->next;
271 		}
272 	}
273 
274 	/*
275 	 * Reset the sequence entry holding vars
276 	 */
277 
278 	cf_seq_ret = NULL;
279 	cf_seq_current = NULL;
280 
281 	do {
282 		yyparse();
283 	}
284 	while (!feof(yyin));
285 
286 	fclose(yyin);
287 	fclose(yyout);
288 
289 	/*
290 	 * Populate returns
291 	 */
292 
293 	ret->sequences = cf_sl_ret;
294 	ret->files_to_ignore = cf_ignore_ret;
295 
296 	return ret;
297 }
298 
yyerror(char * s)299 void yyerror(char *s) {
300 	/*
301 	 * XXX - Is extern valid here?  Does it do what I'm expecting?
302 	 */
303 	extern char *yytext;
304 
305 	fprintf(stderr, "detox: error parsing config file %s: %s\n", current_filename, s);
306 	fprintf(stderr, "\tline %d", config_file_lineno);
307 	if (yytext != NULL) {
308 		fprintf(stderr, ": %s", yytext);
309 	}
310 	fprintf(stderr, "\n");
311 	exit(EXIT_FAILURE);
312 }
313 
314 
cf_append_sequence_list(void)315 void cf_append_sequence_list(void) {
316 	struct detox_sequence_list *work;
317 
318 	if (current_name == NULL) {
319 		current_name = strdup("default");
320 	}
321 
322 	work = NULL;
323 
324 	if (cf_sl_ret != NULL) {
325 		work = cf_sl_ret;
326 
327 		while (work != NULL) {
328 			if (strcmp(work->name, current_name) == 0) {
329 				break;
330 			}
331 
332 			work = work->next;
333 		}
334 
335 	}
336 
337 	if (work != NULL) {
338 		/*
339 		 * XXX - Free Old Tree
340 		 */
341 	}
342 	else {
343 		work = malloc(sizeof(struct detox_sequence_list));
344 		memset(work, 0, sizeof(struct detox_sequence_list));
345 
346 		work->name = strdup(current_name);
347 
348 		/*
349 		 * Append to the tree first.  If we don't, we could create a
350 		 * circular reference.
351 		 */
352 		if (cf_sl_ret == NULL) {
353 			cf_sl_ret = cf_sl_current = work;
354 		}
355 		else {
356 			cf_sl_current->next = work;
357 			cf_sl_current = work;
358 		}
359 
360 	}
361 
362 	work->head = cf_seq_ret;
363 	work->source_filename = strdup(current_filename);
364 	cf_seq_ret = cf_seq_current = NULL;
365 
366 }
367 
368 
cf_append_sequence_entry(void * ptr,void * opts)369 void cf_append_sequence_entry(void *ptr, void *opts) {
370 	struct detox_sequence_entry *work;
371 
372 	work = malloc(sizeof(struct detox_sequence_entry));
373 	memset(work, 0, sizeof(struct detox_sequence_entry));
374 
375 	work->cleaner = ptr;
376 	work->options = opts;
377 
378 	if (cf_seq_ret == NULL) {
379 		cf_seq_ret = cf_seq_current = work;
380 	}
381 	else {
382 		cf_seq_current->next = work;
383 		cf_seq_current = work;
384 	}
385 }
386 
387 
cf_append_ignore_entry(int token,void * str)388 void cf_append_ignore_entry(int token, void *str) {
389 	struct detox_ignore_entry *work;
390 
391 	work = malloc(sizeof(struct detox_ignore_entry));
392 	memset(work, 0, sizeof(struct detox_ignore_entry));
393 
394 	switch(token) {
395 		case FILENAME:
396 			work->filename = str;
397 			break;
398 
399 		default:
400 			break;
401 	}
402 
403 	if (cf_ignore_ret == NULL) {
404 		cf_ignore_ret = cf_ignore_current = work;
405 	}
406 	else {
407 		cf_ignore_current->next = work;
408 		cf_ignore_current = work;
409 	}
410 }
411 
412 
413