1 /*
2 AWFFull - A Webalizer Fork, Full o' features
3
4 parser.c
5 parsing log lines and individual records
6
7 Copyright (C) 1997-2001 Bradford L. Barrett (brad@mrunix.net)
8 Copyright (C) 2004-2008 by Stephen McInerney (spm@stedee.id.au)
9 Copyright (C) 2006 by John Heaton (john@manchester.ac.uk)
10
11 This file is part of AWFFull.
12
13 AWFFull is free software: you can redistribute it and/or modify
14 it under the terms of the GNU General Public License as published by
15 the Free Software Foundation, either version 3 of the License, or
16 (at your option) any later version.
17
18 AWFFull is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with AWFFull. If not, see <http://www.gnu.org/licenses/>.
25
26 This software uses the gd graphics library, which is copyright by
27 Quest Protein Database Center, Cold Spring Harbor Labs. Please
28 see the documentation supplied with the library for additional
29 information and license terms, or visit www.boutell.com/gd/ for the
30 most recent version of the library and supporting documentation.
31
32 */
33
34 #include "awffull.h" /* main header */
35
36 /* internal function prototypes */
37 static int parse_record_web(char *, struct log_struct *);
38 static int parse_record_ftp(char *, struct log_struct *);
39 static int parse_record_squid(char *, struct log_struct *);
40
41 static int identify_log_format(char *);
42 static void re_compile_all_regexes(void); /* Use at first run - compiles all used regex's */
43 static void re_compile_failed(int, const char *, char *); /* Display a failed RE Compile & where */
44 static void re_check_errors(int); /* After an RE check, deal with any errors */
45 static void error_substring_extract(int, int); /* Error when we fail on getting a substring */
46
47 static pcre *cmp_log_regexp = NULL; /* Main compiled RE - use as pointer only to one of the below */
48 static pcre *cmp_log_regexp_clf = NULL; /* CLF compiled RE */
49 static pcre *cmp_log_regexp_combined = NULL; /* Combined compiled RE */
50 static pcre *cmp_log_regexp_combined_enhanced = NULL; /* Enhanced Combined compiled RE */
51 static pcre *cmp_log_regexp_xferlog = NULL; /* FTP, xferlog format compiled RE */
52 static pcre *cmp_log_regexp_squid = NULL; /* SQUID format compiled RE */
53 static pcre *cmp_log_regexp_domino = NULL; /* Lotus Domino v6 format compiled RE */
54
55 //pcre_extra *studied_log_regexp = NULL;
56
57
58 /*********************************************/
59 /* PARSE_RECORD - uhhh, you know... */
60 /*********************************************/
61
62 int
parse_record(char * buffer,struct log_struct * log_ptr)63 parse_record(char *buffer, struct log_struct *log_ptr)
64 //parse_record(char *buffer)
65 {
66 int auto_log_type = 0;
67
68 /* clear out structure */
69 // memset(&log_rec, 0, sizeof(struct log_struct));
70
71 if (cmp_log_regexp == NULL) {
72 re_compile_all_regexes();
73 if (g_settings.settings.log_type == LOG_AUTO) {
74 auto_log_type = identify_log_format(buffer);
75 if (auto_log_type > 0) {
76 g_settings.settings.log_type = auto_log_type;
77 } else {
78 ERRVPRINT(VERBOSE0, "%s\n", _("Cannot recognise log format. Manually configure \"LogType\" in the config file."));
79 exit(1);
80 }
81 }
82 switch (g_settings.settings.log_type) {
83 case LOG_FTP:
84 cmp_log_regexp = cmp_log_regexp_xferlog;
85 break;
86 case LOG_SQUID:
87 cmp_log_regexp = cmp_log_regexp_squid;
88 break;
89 case LOG_CLF:
90 cmp_log_regexp = cmp_log_regexp_clf;
91 break;
92 case LOG_COMBINED:
93 cmp_log_regexp = cmp_log_regexp_combined;
94 break;
95 case LOG_DOMINO:
96 cmp_log_regexp = cmp_log_regexp_domino;
97 break;
98 default:
99 ERRVPRINT(VERBOSE0, "%s %d\n", _("Unknown LOG Type Setting.:"), g_settings.settings.log_type);
100 exit(1);
101 }
102 }
103
104 /* call appropriate handler */
105 switch (g_settings.settings.log_type) {
106 default:
107 case LOG_CLF:
108 case LOG_COMBINED:
109 return parse_record_web(buffer, log_ptr);
110 break; /* clf */
111 case LOG_FTP:
112 return parse_record_ftp(buffer, log_ptr);
113 break; /* ftp */
114 case LOG_SQUID:
115 return parse_record_squid(buffer, log_ptr);
116 break; /* squid */
117 }
118 }
119
120 /*********************************************/
121 /* PARSE_RECORD_FTP - ftp log handler */
122 /*********************************************/
123 static int
parse_record_ftp(char * buffer,struct log_struct * log_rec)124 parse_record_ftp(char *buffer, struct log_struct *log_rec)
125 {
126 int ovector[OVECCOUNT]; /* RE substring offsets array */
127 int rc; /* RE Check return value */
128 int copy_substr_rtn; /* RE Check return from pcre_copy_substring */
129
130 int buffer_length;
131
132 char tmp_bytes[25 + 1];
133 char completion_status[2 + 1];
134
135 buffer_length = (int) strlen(buffer);
136 rc = pcre_exec(cmp_log_regexp, NULL, buffer, buffer_length, 0, 0, ovector, OVECCOUNT);
137 /* check for RE matching errors */
138 if (rc < 0) {
139 re_check_errors(rc);
140 return (0);
141 }
142
143 copy_substr_rtn = pcre_copy_substring(buffer, ovector, rc, 1, log_rec->datetime, 29);
144 if (copy_substr_rtn < 0) {
145 error_substring_extract(copy_substr_rtn, 1);
146 return (0);
147 }
148
149 /* Ignore time taken (in seconds) for now... */
150
151 copy_substr_rtn = pcre_copy_substring(buffer, ovector, rc, 3, log_rec->hostname, MAXHOST);
152 if (copy_substr_rtn < 0) {
153 error_substring_extract(copy_substr_rtn, 3);
154 return (0);
155 }
156
157 copy_substr_rtn = pcre_copy_substring(buffer, ovector, rc, 4, tmp_bytes, 20);
158 if (copy_substr_rtn < 0) {
159 error_substring_extract(copy_substr_rtn, 4);
160 return (0);
161 }
162 log_rec->xfer_size = strtoul(tmp_bytes, NULL, 10);
163
164 /* URL */
165 copy_substr_rtn = pcre_copy_substring(buffer, ovector, rc, 5, log_rec->url, MAXURL);
166 if (copy_substr_rtn < 0) {
167 error_substring_extract(copy_substr_rtn, 5);
168 return (0);
169 }
170
171 /* Ignore Transfer Type */
172 /* Ignore special-action-flag */
173 /* Ignore Direction */
174
175 /* User */
176 copy_substr_rtn = pcre_copy_substring(buffer, ovector, rc, 10, log_rec->ident, MAXIDENT);
177 if (copy_substr_rtn < 0) {
178 error_substring_extract(copy_substr_rtn, 10);
179 return (0);
180 }
181
182 /* Completion Status - fake to a 200 or 206 */
183 copy_substr_rtn = pcre_copy_substring(buffer, ovector, rc, 14, completion_status, 2);
184 if (copy_substr_rtn < 0) {
185 error_substring_extract(copy_substr_rtn, 14);
186 return (0);
187 }
188 if (completion_status[0] == 'i') {
189 log_rec->resp_code = 206;
190 } else {
191 /* == c */
192 log_rec->resp_code = 200;
193 }
194
195 return (1);
196 }
197
198 /*********************************************
199 * PARSE_RECORD_WEB - web log handler *
200 * parse with pcre *
201 *********************************************/
202 static int
parse_record_web(char * buffer,struct log_struct * log_rec)203 parse_record_web(char *buffer, struct log_struct *log_rec)
204 {
205 int ovector[OVECCOUNT]; /* RE substring offsets array */
206 int rc; /* RE Check return value */
207 int copy_substr_rtn; /* RE Check return from pcre_copy_substring */
208
209 int buffer_length;
210
211 char tmp_status[5 + 1];
212 char tmp_bytes[20 + 1];
213
214 buffer_length = (int) strlen(buffer);
215 // rc = pcre_exec (cmp_log_regexp, studied_log_regexp, buffer, buffer_length, 0, 0, ovector, OVECCOUNT);
216 rc = pcre_exec(cmp_log_regexp, NULL, buffer, buffer_length, 0, 0, ovector, OVECCOUNT);
217 /* check for RE matching errors */
218 if (rc < 0) {
219 /* First see if a normal enhanced regex will work.
220 * If this fails, then see if we can get a Domino style match
221 * If this works - switch to Domino Logs,
222 * If Fails - Boom.
223 */
224 if ((cmp_log_regexp != cmp_log_regexp_domino) && (cmp_log_regexp == cmp_log_regexp_combined) && (g_settings.settings.log_type == LOG_COMBINED)) {
225 /* Attempt an enhanced log match */
226 VPRINT(VERBOSE1, "%s\n", _("Attempting COMBINED_ENHANCED Regular Expression"));
227 rc = pcre_exec(cmp_log_regexp_combined_enhanced, NULL, buffer, buffer_length, 0, 0, ovector, OVECCOUNT);
228
229 /* Didn't work. Try Domino? */
230 if (rc < 0 && g_settings.flags.force_log_type == false) {
231 /* Try a domino log format first - if is, switch to using domino checks instead */
232 VPRINT(VERBOSE1, "%s\n", _("Attempting COMBINED_DOMINO Regular Expression"));
233 rc = pcre_exec(cmp_log_regexp_domino, NULL, buffer, buffer_length, 0, 0, ovector, OVECCOUNT);
234 if (rc >= 0) {
235 /* Successfully matched as a Domino Log, apply domino RE from here on */
236 /* FIXME: The default domino RegEx is perhaps not as quick - is based on the ENHANCED */
237 VPRINT(VERBOSE1, "%s\n", _("Switching to DOMINO log format"));
238 cmp_log_regexp = cmp_log_regexp_domino;
239 }
240 }
241 }
242 if (rc < 0) {
243 re_check_errors(rc);
244 return (0);
245 }
246 }
247
248 copy_substr_rtn = pcre_copy_substring(buffer, ovector, rc, LF_NCSA_ADDRESS, log_rec->hostname, MAXHOST - 1);
249 if (copy_substr_rtn < 0) {
250 error_substring_extract(copy_substr_rtn, LF_NCSA_ADDRESS);
251 return (0);
252 }
253
254 copy_substr_rtn = pcre_copy_substring(buffer, ovector, rc, LF_NCSA_AUTHUSER, log_rec->ident, MAXIDENT - 1);
255 if (copy_substr_rtn < 0) {
256 error_substring_extract(copy_substr_rtn, LF_NCSA_USER);
257 return (0);
258 }
259
260 copy_substr_rtn = pcre_copy_substring(buffer, ovector, rc, LF_NCSA_DATE_TIME, log_rec->datetime, MAXDATETIME - 1);
261 if (copy_substr_rtn < 0) {
262 error_substring_extract(copy_substr_rtn, LF_NCSA_DATE_TIME);
263 return (0);
264 }
265
266 copy_substr_rtn = pcre_copy_substring(buffer, ovector, rc, LF_NCSA_URL, log_rec->url, MAXURL - 1);
267 if (copy_substr_rtn < 0) {
268 error_substring_extract(copy_substr_rtn, LF_NCSA_URL);
269 return (0);
270 }
271
272 copy_substr_rtn = pcre_copy_substring(buffer, ovector, rc, LF_NCSA_STATUS, tmp_status, 5);
273 if (copy_substr_rtn < 0) {
274 error_substring_extract(copy_substr_rtn, LF_NCSA_STATUS);
275 return (0);
276 }
277 log_rec->resp_code = atoi(tmp_status);
278
279 copy_substr_rtn = pcre_copy_substring(buffer, ovector, rc, LF_NCSA_BYTES, tmp_bytes, 20);
280 if (copy_substr_rtn < 0) {
281 error_substring_extract(copy_substr_rtn, LF_NCSA_BYTES);
282 return (0);
283 }
284 log_rec->xfer_size = strtoul(tmp_bytes, NULL, 10);
285
286 if (g_settings.settings.log_type == LOG_COMBINED) {
287 copy_substr_rtn = pcre_copy_substring(buffer, ovector, rc, LF_NCSA_REFERER, log_rec->refer, MAXREF - 1);
288 if (copy_substr_rtn < 0) {
289 error_substring_extract(copy_substr_rtn, LF_NCSA_REFERER);
290 return (0);
291 }
292 copy_substr_rtn = pcre_copy_substring(buffer, ovector, rc, LF_NCSA_BROWSER, log_rec->agent, MAXAGENT - 1);
293 if (copy_substr_rtn < 0) {
294 error_substring_extract(copy_substr_rtn, LF_NCSA_BROWSER);
295 return (0);
296 }
297 }
298 return (1);
299 }
300
301
302 /*********************************************/
303 /* PARSE_RECORD_SQUID - squid log handler */
304 /*********************************************/
305 static int
parse_record_squid(char * buffer,struct log_struct * log_rec)306 parse_record_squid(char *buffer, struct log_struct *log_rec)
307 {
308 int ovector[OVECCOUNT]; /* RE substring offsets array */
309 int rc; /* RE Check return value */
310 int copy_substr_rtn; /* RE Check return from pcre_copy_substring */
311
312 int buffer_length;
313
314 char tmp_bytes[25 + 1];
315
316 buffer_length = (int) strlen(buffer);
317 rc = pcre_exec(cmp_log_regexp, NULL, buffer, buffer_length, 0, 0, ovector, OVECCOUNT);
318 /* check for RE matching errors */
319 if (rc < 0) {
320 re_check_errors(rc);
321 return (0);
322 }
323
324 copy_substr_rtn = pcre_copy_substring(buffer, ovector, rc, 1, log_rec->datetime, 29);
325 if (copy_substr_rtn < 0) {
326 error_substring_extract(copy_substr_rtn, 1);
327 return (0);
328 }
329
330 copy_substr_rtn = pcre_copy_substring(buffer, ovector, rc, 4, log_rec->hostname, MAXHOST - 1);
331 if (copy_substr_rtn < 0) {
332 error_substring_extract(copy_substr_rtn, 1);
333 return (0);
334 }
335
336 copy_substr_rtn = pcre_copy_substring(buffer, ovector, rc, 6, tmp_bytes, 20);
337 if (copy_substr_rtn < 0) {
338 error_substring_extract(copy_substr_rtn, 1);
339 return (0);
340 }
341 log_rec->resp_code = strtoul(tmp_bytes, NULL, 10);
342
343 copy_substr_rtn = pcre_copy_substring(buffer, ovector, rc, 7, tmp_bytes, 20);
344 if (copy_substr_rtn < 0) {
345 error_substring_extract(copy_substr_rtn, 1);
346 return (0);
347 }
348 log_rec->xfer_size = strtoul(tmp_bytes, NULL, 10);
349
350 copy_substr_rtn = pcre_copy_substring(buffer, ovector, rc, 9, log_rec->url, MAXURL - 1);
351 if (copy_substr_rtn < 0) {
352 error_substring_extract(copy_substr_rtn, 1);
353 return (0);
354 }
355
356 return (1);
357 }
358
359 /************************************************************************
360 * parse_check_not_page *
361 * *
362 * Given a URL (field from a log line) *
363 * determine if this URL is a page or not *
364 * *
365 * This function is the opposite of parse_is_page, and is only called *
366 * from same. It takes the linked list of "NotPageType" and applies *
367 * and RE against a log line built from that list. *
368 * *
369 * A successfull match means that this line is NOT a page *
370 * *
371 * Arguments: *
372 * ----------- *
373 * char *url - A URL field, typically: log_rec->url *
374 * *
375 * Returns: *
376 * ----------- *
377 * Boolean. True if, yes this is a page *
378 * False is not a page. ie Successfull match. *
379 * *
380 ************************************************************************/
381 bool
parse_check_not_page(char * url)382 parse_check_not_page(char *url)
383 {
384 char regex_page[MAX_RE_LENGTH + 1] = ""; /* Hold the PAGE RE */
385 static pcre *cmp_regex_page = NULL; /* NotPage compiled RE */
386
387 const char *error; /* RE error pointer, offset */
388 int erroffset; /* RE error value */
389 int str_length, tmp_length;
390 int rc; /* RE Check return value */
391 static int max_type_length = 0;
392 char *str_start = url;
393
394 LISTPTR lptr = not_page_type;
395
396 /* Compile both RegEx's */
397 if (cmp_regex_page == NULL) {
398 /* Build the RegEx first, loop thru the PageType list & reverse */
399 strcat(regex_page, "\\.(");
400 while (lptr != NULL) {
401 strcat(regex_page, lptr->string);
402 str_length = strlen(lptr->string);
403 if (str_length > max_type_length) {
404 max_type_length = str_length;
405 }
406 lptr = lptr->next;
407 if (lptr != NULL) {
408 strcat(regex_page, "|");
409 }
410 }
411 strcat(regex_page, ")$");
412 VPRINT(VERBOSE2, "PCRE: New NotPAGE RegEx: '%s', Max: %d\n", regex_page, max_type_length);
413
414 /* Compile the RegEx */
415 cmp_regex_page = pcre_compile(regex_page, 0, &error, &erroffset, NULL);
416 VPRINT(VERBOSE2, "PCRE: Compile PAGE%s", "\n");
417 if (cmp_regex_page == NULL) {
418 re_compile_failed(erroffset, error, regex_page);
419 }
420 max_type_length++; /* Increase by 1 for starting '.' */
421 }
422
423 str_length = strlen(url);
424 if (str_length < max_type_length) {
425 tmp_length = str_length;
426 } else {
427 tmp_length = max_type_length;
428 str_start = url + str_length - max_type_length;
429 }
430 VPRINT(VERBOSE4, " Was: '%s', Is: %s\n", url, str_start);
431
432 rc = pcre_exec(cmp_regex_page, NULL, str_start, tmp_length, 0, 0, NULL, 0);
433 /* check for RE matching */
434 if (rc >= 0) {
435 /* Have matched! */
436 return (false);
437 }
438 return (true);
439 }
440
441 /************************************************************************
442 * parse_is_page *
443 * *
444 * Given a URL (field from a log line) *
445 * determine if this URL is a page or not *
446 * *
447 * Logic: *
448 * ----------- *
449 * The RegEx and URL provided are reversed for efficiency - we only *
450 * want to match the end of a URL, not the entire thing. *
451 * Firstly builds the RegEx. Does this by reversing the provided *
452 * PageType config options, and wrappering appropriate RE around.
453 * *
454 * Arguments: *
455 * ----------- *
456 * char *url - A URL field, typically: log_rec->url *
457 * *
458 * Returns: *
459 * ----------- *
460 * Boolean. True if, yes this is a page *
461 * False in all other cases. Including invalid args. *
462 * *
463 ************************************************************************/
464
465 bool
parse_is_page(char * url)466 parse_is_page(char *url)
467 {
468 char regex_page[MAX_RE_LENGTH + 1] = ""; /* Hold the PAGE RE */
469 static pcre *cmp_regex_page = NULL; /* Page compiled RE */
470
471 const char *error; /* RE error pointer, offset */
472 int erroffset; /* RE error value */
473 int str_length;
474 int rc; /* RE Check return value */
475 char reverse[MAXURL + 1] = "";
476 int i, j = 0;
477 LISTPTR lptr;
478
479 if (not_page_type != NULL) {
480 return (parse_check_not_page(url));
481 }
482
483 lptr = page_type;
484
485 /* Compile both RegEx's */
486 if (cmp_regex_page == NULL) {
487 /* Build the RegEx first, loop thru the PageType list & reverse */
488 strcat(regex_page, "^(\\/|(");
489 while (lptr != NULL) {
490 str_length = (int) strlen(lptr->string);
491 j = 0;
492 for (i = str_length - 1; i >= 0; i--) {
493 if (lptr->string[i] == '*') {
494 reverse[j] = '.';
495 j++;
496 }
497 reverse[j] = lptr->string[i];
498 j++;
499 }
500 reverse[j] = '\0';
501 strcat(regex_page, reverse);
502 lptr = lptr->next;
503 if (lptr != NULL) {
504 strcat(regex_page, "|");
505 }
506 }
507 strcat(regex_page, ")\\.|[^./]+\\/)");
508 VPRINT(VERBOSE2, "PCRE: New PAGE RegEx: '%s'\n", regex_page);
509
510 /* Compile the RegEx */
511 cmp_regex_page = pcre_compile(regex_page, 0, &error, &erroffset, NULL);
512 VPRINT(VERBOSE2, "PCRE: Compile PAGE%s", "\n");
513 if (cmp_regex_page == NULL) {
514 re_compile_failed(erroffset, error, regex_page);
515 }
516 }
517
518 str_length = (int) strlen(url);
519 j = 0;
520 for (i = str_length - 1; i >= 0; i--) {
521 /* Use pointer math - faster at loss of clarity */
522 *(reverse + j) = *(url + i);
523 j++;
524 }
525 *(reverse + j) = '\0'; /* Probably not needed as we provide the length... */
526
527 rc = pcre_exec(cmp_regex_page, NULL, reverse, str_length, 0, 0, NULL, 0);
528 /* check for RE matching */
529 if (rc >= 0) {
530 /* Have matched! */
531 return (true);
532 }
533 return (false);
534 } /* parse_is_page */
535
536
537 /************************************************************************
538 * identify_log_format *
539 * *
540 * Attempt to identify the type of log format we've been given. *
541 * Returns the LOG_type as defined in awffull.h *
542 * returns -1 if unknown. *
543 * *
544 * Requires a line of the log to attempt to process *
545 ************************************************************************/
546 static int
identify_log_format(char * buffer)547 identify_log_format(char *buffer)
548 {
549 int ovector[OVECCOUNT]; /* RE substring offsets array */
550 int rc; /* RE Check return value */
551 int buffer_length;
552
553
554 buffer_length = (int) strlen(buffer);
555
556 /* Check for COMBINED */
557 rc = pcre_exec(cmp_log_regexp_combined, NULL, buffer, buffer_length, 0, 0, ovector, OVECCOUNT);
558 if (rc >= 0) {
559 /* Matches against COMBINED */
560 VPRINT(VERBOSE1, "%s\n", _("Using COMBINED Log Format"));
561 return (LOG_COMBINED);
562 }
563
564 /* Check for COMBINED_DOMINO */
565 /* If the first line is a non logged in user, it'll probably register as COMBINED... */
566 rc = pcre_exec(cmp_log_regexp_domino, NULL, buffer, buffer_length, 0, 0, ovector, OVECCOUNT);
567 if (rc >= 0) {
568 /* Matches against COMBINED_DOMINO */
569 VPRINT(VERBOSE1, "%s\n", _("Using COMBINED_DOMINO Log Format"));
570 return (LOG_DOMINO);
571 }
572
573 rc = pcre_exec(cmp_log_regexp_clf, NULL, buffer, buffer_length, 0, 0, ovector, OVECCOUNT);
574 if (rc >= 0) {
575 /* Matches against COMBINED */
576 VPRINT(VERBOSE1, "%s\n", _("Using CLF Log Format"));
577 return (LOG_CLF);
578 }
579
580 rc = pcre_exec(cmp_log_regexp_xferlog, NULL, buffer, buffer_length, 0, 0, ovector, OVECCOUNT);
581 if (rc >= 0) {
582 /* Matches against FTP/XFERLOG */
583 VPRINT(VERBOSE1, "%s\n", _("Using FTP/XFERLOG Log Format"));
584 /* Invalid tables for this log type. Zero them away and hence not display. */
585 g_settings.top.agents = 0;
586 g_settings.top.refs = 0;
587 return (LOG_FTP);
588 }
589
590 rc = pcre_exec(cmp_log_regexp_squid, NULL, buffer, buffer_length, 0, 0, ovector, OVECCOUNT);
591 if (rc >= 0) {
592 /* Matches against SQUID */
593 VPRINT(VERBOSE1, "%s\n", _("Using SQUID Log Format"));
594 /* Invalid tables for this log type. Zero them away and hence not display. */
595 g_settings.top.agents = 0;
596 g_settings.top.refs = 0;
597 return (LOG_SQUID);
598 }
599
600 VPRINT(VERBOSE1, "%s\n", _("Unrecognised Log Format"));
601 return (-1); /* Failed to match any, unknown format */
602 }
603
604
605 /************************************************************************
606 * re_compile_all_regexs *
607 * *
608 * Does what the name says, in a single function we compile all *
609 * possibly used Regular expressions. *
610 * Either forcibly exits on any failure, or happily finishes. *
611 * No values needed or returned. *
612 * *
613 * Assigns the RE's to the various globals: *
614 * cmp_log_regexp_* *
615 ************************************************************************/
616 static void
re_compile_all_regexes(void)617 re_compile_all_regexes(void)
618 {
619 char log_regexp_clf[MAX_RE_LENGTH] = PATTERN_CLF;
620 char log_regexp_combined[MAX_RE_LENGTH] = PATTERN_COMBINED;
621 char log_regexp_combined_enhanced[MAX_RE_LENGTH] = PATTERN_COMBINED_ENHANCED;
622 char log_regexp_xferlog[MAX_RE_LENGTH] = PATTERN_XFERLOG;
623 char log_regexp_squid[MAX_RE_LENGTH] = PATTERN_SQUID;
624 char log_regexp_domino[MAX_RE_LENGTH] = PATTERN_DOMINO;
625
626 const char *error; /* RE error pointer, offset */
627 int erroffset; /* RE error value */
628
629 /* CLF */
630 cmp_log_regexp_clf = pcre_compile(log_regexp_clf, 0, &error, &erroffset, NULL);
631 VPRINT(VERBOSE2, "PCRE: Compile CLF%s", "\n");
632 if (cmp_log_regexp_clf == NULL) {
633 re_compile_failed(erroffset, error, log_regexp_clf);
634 }
635
636 /* Combined */
637 cmp_log_regexp_combined = pcre_compile(log_regexp_combined, 0, &error, &erroffset, NULL);
638 VPRINT(VERBOSE2, "PCRE: Compile COMBINED%s", "\n");
639 if (cmp_log_regexp_combined == NULL) {
640 re_compile_failed(erroffset, error, log_regexp_combined);
641 }
642
643 /* Enhanced Combined */
644 cmp_log_regexp_combined_enhanced = pcre_compile(log_regexp_combined_enhanced, 0, &error, &erroffset, NULL);
645 VPRINT(VERBOSE2, "PCRE: Compile COMBINED_ENHANCED%s", "\n");
646 if (cmp_log_regexp_combined_enhanced == NULL) {
647 re_compile_failed(erroffset, error, log_regexp_combined_enhanced);
648 }
649
650 /* FTP XFERLOG */
651 cmp_log_regexp_xferlog = pcre_compile(log_regexp_xferlog, 0, &error, &erroffset, NULL);
652 VPRINT(VERBOSE2, "PCRE: Compile PATTERN_XFERLOG%s", "\n");
653 if (cmp_log_regexp_xferlog == NULL) {
654 re_compile_failed(erroffset, error, log_regexp_xferlog);
655 }
656
657 /* SQUID LOG */
658 cmp_log_regexp_squid = pcre_compile(log_regexp_squid, 0, &error, &erroffset, NULL);
659 VPRINT(VERBOSE2, "PCRE: Compile PATTERN_SQUID%s", "\n");
660 if (cmp_log_regexp_squid == NULL) {
661 re_compile_failed(erroffset, error, log_regexp_squid);
662 }
663
664 /* DOMINO LOG */
665 cmp_log_regexp_domino = pcre_compile(log_regexp_domino, 0, &error, &erroffset, NULL);
666 VPRINT(VERBOSE2, "PCRE: Compile PATTERN_COMBINED_DOMINO%s", "\n");
667 if (cmp_log_regexp_domino == NULL) {
668 re_compile_failed(erroffset, error, log_regexp_domino);
669 }
670 }
671
672
673 /************************************************************************
674 * re_check_errors *
675 * *
676 * After an RE check, deal with any errors *
677 * err: value returned from pcre_exec *
678 * str_ptr: String that failed to match *
679 ************************************************************************/
680 static void
re_check_errors(int err)681 re_check_errors(int err)
682 {
683
684 /* Matching failed: handle error cases */
685 switch (err) {
686 case PCRE_ERROR_NOMATCH:
687 ERRVPRINT(VERBOSE1, "%s", _("Warning: No Regular Expression Match. "));
688 break;
689 /* Leave out the more explicit failure messages - we show the number, so can be found.
690 case PCRE_ERROR_NULL:
691 case PCRE_ERROR_BADOPTION:
692 case PCRE_ERROR_BADMAGIC:
693 case PCRE_ERROR_UNKNOWN_NODE:
694 case PCRE_ERROR_NOMEMORY:
695 case PCRE_ERROR_NOSUBSTRING:
696 case PCRE_ERROR_MATCHLIMIT:
697 case PCRE_ERROR_CALLOUT:
698 case PCRE_ERROR_BADUTF8:
699 case PCRE_ERROR_BADUTF8_OFFSET:
700 case PCRE_ERROR_PARTIAL:
701 case PCRE_ERROR_BAD_PARTIAL:
702 case PCRE_ERROR_INTERNAL:
703 case PCRE_ERROR_BADCOUNT:
704 */
705 default:
706 ERRVPRINT(VERBOSE1, "%s %d\n", _("Warning: Regular Expression Error:"), err);
707 break;
708 }
709 }
710
711
712 /************************************************************************
713 * re_compile_failed *
714 * *
715 * Display a failed RE Compile & where *
716 * FATAL failure. Will exit the run. *
717 ************************************************************************/
718 static void
re_compile_failed(int err,const char * err_offset,char * re_str)719 re_compile_failed(int err, const char *err_offset, char *re_str)
720 {
721 ERRVPRINT(VERBOSE0, "%s %d %s\n", _("FATAL ERROR! PCRE compilation failed at offset"), err, err_offset);
722 ERRVPRINT(VERBOSE0, "%s %s\n", _(" Using Regular Expression:"), re_str);
723 exit(1); /* FIXME - table of exit codes! */
724 }
725
726
727 /************************************************************************
728 * error_substring_extract *
729 * *
730 * Display a failed substring extraction *
731 * Error Only, as this should have failed the RE *
732 ************************************************************************/
733 static void
error_substring_extract(int err,int substr_idx)734 error_substring_extract(int err, int substr_idx)
735 {
736 ERRVPRINT(VERBOSE1, "%s %d\n", _("Error: Failed to extract substring:"), substr_idx);
737 switch (err) {
738 case PCRE_ERROR_NOMEMORY:
739 ERRVPRINT(VERBOSE2, " PCRE: Insufficient Memory\n");
740 break;
741 case PCRE_ERROR_NOSUBSTRING:
742 ERRVPRINT(VERBOSE2, " PCRE: Substring doesn't exist.\n");
743 break;
744 default:
745 ERRVPRINT(VERBOSE2, " Unknown PCRE Error: %d\n", err);
746 break;
747 }
748 }
749
750
751 /************************************************************************
752 ************************************************************************
753 * END OF FILE *
754 ************************************************************************/
755