1 /*
2     AWFFull - A Webalizer Fork, Full o' features
3 
4     parser.c
5         parsing log lines and individual records
6 
7     Copyright (C) 1997-2001  Bradford L. Barrett (brad@mrunix.net)
8     Copyright (C) 2004-2008 by Stephen McInerney (spm@stedee.id.au)
9     Copyright (C) 2006 by John Heaton (john@manchester.ac.uk)
10 
11     This file is part of AWFFull.
12 
13     AWFFull is free software: you can redistribute it and/or modify
14     it under the terms of the GNU General Public License as published by
15     the Free Software Foundation, either version 3 of the License, or
16     (at your option) any later version.
17 
18     AWFFull is distributed in the hope that it will be useful,
19     but WITHOUT ANY WARRANTY; without even the implied warranty of
20     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21     GNU General Public License for more details.
22 
23     You should have received a copy of the GNU General Public License
24     along with AWFFull.  If not, see <http://www.gnu.org/licenses/>.
25 
26     This software uses the gd graphics library, which is copyright by
27     Quest Protein Database Center, Cold Spring Harbor Labs.  Please
28     see the documentation supplied with the library for additional
29     information and license terms, or visit www.boutell.com/gd/ for the
30     most recent version of the library and supporting documentation.
31 
32 */
33 
34 #include "awffull.h"                            /* main header              */
35 
36 /* internal function prototypes */
37 static int parse_record_web(char *, struct log_struct *);
38 static int parse_record_ftp(char *, struct log_struct *);
39 static int parse_record_squid(char *, struct log_struct *);
40 
41 static int identify_log_format(char *);
42 static void re_compile_all_regexes(void);       /* Use at first run - compiles all used regex's */
43 static void re_compile_failed(int, const char *, char *);       /* Display a failed RE Compile & where */
44 static void re_check_errors(int);               /* After an RE check, deal with any errors */
45 static void error_substring_extract(int, int);  /* Error when we fail on getting a substring */
46 
47 static pcre *cmp_log_regexp = NULL;             /* Main compiled RE - use as pointer only to one of the below */
48 static pcre *cmp_log_regexp_clf = NULL;         /* CLF compiled RE */
49 static pcre *cmp_log_regexp_combined = NULL;    /* Combined compiled RE */
50 static pcre *cmp_log_regexp_combined_enhanced = NULL;   /* Enhanced Combined compiled RE */
51 static pcre *cmp_log_regexp_xferlog = NULL;     /* FTP, xferlog format compiled RE */
52 static pcre *cmp_log_regexp_squid = NULL;       /* SQUID format compiled RE */
53 static pcre *cmp_log_regexp_domino = NULL;      /* Lotus Domino v6 format compiled RE */
54 
55 //pcre_extra *studied_log_regexp = NULL;
56 
57 
58 /*********************************************/
59 /* PARSE_RECORD - uhhh, you know...          */
60 /*********************************************/
61 
62 int
parse_record(char * buffer,struct log_struct * log_ptr)63 parse_record(char *buffer, struct log_struct *log_ptr)
64 //parse_record(char *buffer)
65 {
66     int auto_log_type = 0;
67 
68     /* clear out structure */
69 //    memset(&log_rec, 0, sizeof(struct log_struct));
70 
71     if (cmp_log_regexp == NULL) {
72         re_compile_all_regexes();
73         if (g_settings.settings.log_type == LOG_AUTO) {
74             auto_log_type = identify_log_format(buffer);
75             if (auto_log_type > 0) {
76                 g_settings.settings.log_type = auto_log_type;
77             } else {
78                 ERRVPRINT(VERBOSE0, "%s\n", _("Cannot recognise log format. Manually configure \"LogType\" in the config file."));
79                 exit(1);
80             }
81         }
82         switch (g_settings.settings.log_type) {
83         case LOG_FTP:
84             cmp_log_regexp = cmp_log_regexp_xferlog;
85             break;
86         case LOG_SQUID:
87             cmp_log_regexp = cmp_log_regexp_squid;
88             break;
89         case LOG_CLF:
90             cmp_log_regexp = cmp_log_regexp_clf;
91             break;
92         case LOG_COMBINED:
93             cmp_log_regexp = cmp_log_regexp_combined;
94             break;
95         case LOG_DOMINO:
96             cmp_log_regexp = cmp_log_regexp_domino;
97             break;
98         default:
99             ERRVPRINT(VERBOSE0, "%s %d\n", _("Unknown LOG Type Setting.:"), g_settings.settings.log_type);
100             exit(1);
101         }
102     }
103 
104     /* call appropriate handler */
105     switch (g_settings.settings.log_type) {
106     default:
107     case LOG_CLF:
108     case LOG_COMBINED:
109         return parse_record_web(buffer, log_ptr);
110         break;                                  /* clf   */
111     case LOG_FTP:
112         return parse_record_ftp(buffer, log_ptr);
113         break;                                  /* ftp   */
114     case LOG_SQUID:
115         return parse_record_squid(buffer, log_ptr);
116         break;                                  /* squid */
117     }
118 }
119 
120 /*********************************************/
121 /* PARSE_RECORD_FTP - ftp log handler        */
122 /*********************************************/
123 static int
parse_record_ftp(char * buffer,struct log_struct * log_rec)124 parse_record_ftp(char *buffer, struct log_struct *log_rec)
125 {
126     int ovector[OVECCOUNT];                     /* RE substring offsets array */
127     int rc;                                     /* RE Check return value */
128     int copy_substr_rtn;                        /* RE Check return from pcre_copy_substring */
129 
130     int buffer_length;
131 
132     char tmp_bytes[25 + 1];
133     char completion_status[2 + 1];
134 
135     buffer_length = (int) strlen(buffer);
136     rc = pcre_exec(cmp_log_regexp, NULL, buffer, buffer_length, 0, 0, ovector, OVECCOUNT);
137     /* check for RE matching errors */
138     if (rc < 0) {
139         re_check_errors(rc);
140         return (0);
141     }
142 
143     copy_substr_rtn = pcre_copy_substring(buffer, ovector, rc, 1, log_rec->datetime, 29);
144     if (copy_substr_rtn < 0) {
145         error_substring_extract(copy_substr_rtn, 1);
146         return (0);
147     }
148 
149     /* Ignore time taken (in seconds) for now... */
150 
151     copy_substr_rtn = pcre_copy_substring(buffer, ovector, rc, 3, log_rec->hostname, MAXHOST);
152     if (copy_substr_rtn < 0) {
153         error_substring_extract(copy_substr_rtn, 3);
154         return (0);
155     }
156 
157     copy_substr_rtn = pcre_copy_substring(buffer, ovector, rc, 4, tmp_bytes, 20);
158     if (copy_substr_rtn < 0) {
159         error_substring_extract(copy_substr_rtn, 4);
160         return (0);
161     }
162     log_rec->xfer_size = strtoul(tmp_bytes, NULL, 10);
163 
164     /* URL */
165     copy_substr_rtn = pcre_copy_substring(buffer, ovector, rc, 5, log_rec->url, MAXURL);
166     if (copy_substr_rtn < 0) {
167         error_substring_extract(copy_substr_rtn, 5);
168         return (0);
169     }
170 
171     /* Ignore Transfer Type */
172     /* Ignore special-action-flag */
173     /* Ignore Direction */
174 
175     /* User */
176     copy_substr_rtn = pcre_copy_substring(buffer, ovector, rc, 10, log_rec->ident, MAXIDENT);
177     if (copy_substr_rtn < 0) {
178         error_substring_extract(copy_substr_rtn, 10);
179         return (0);
180     }
181 
182     /* Completion Status - fake to a 200 or 206 */
183     copy_substr_rtn = pcre_copy_substring(buffer, ovector, rc, 14, completion_status, 2);
184     if (copy_substr_rtn < 0) {
185         error_substring_extract(copy_substr_rtn, 14);
186         return (0);
187     }
188     if (completion_status[0] == 'i') {
189         log_rec->resp_code = 206;
190     } else {
191         /* == c */
192         log_rec->resp_code = 200;
193     }
194 
195     return (1);
196 }
197 
198 /*********************************************
199  * PARSE_RECORD_WEB - web log handler        *
200  * parse with pcre							 *
201  *********************************************/
202 static int
parse_record_web(char * buffer,struct log_struct * log_rec)203 parse_record_web(char *buffer, struct log_struct *log_rec)
204 {
205     int ovector[OVECCOUNT];                     /* RE substring offsets array */
206     int rc;                                     /* RE Check return value */
207     int copy_substr_rtn;                        /* RE Check return from pcre_copy_substring */
208 
209     int buffer_length;
210 
211     char tmp_status[5 + 1];
212     char tmp_bytes[20 + 1];
213 
214     buffer_length = (int) strlen(buffer);
215 //    rc = pcre_exec (cmp_log_regexp, studied_log_regexp, buffer, buffer_length, 0, 0, ovector, OVECCOUNT);
216     rc = pcre_exec(cmp_log_regexp, NULL, buffer, buffer_length, 0, 0, ovector, OVECCOUNT);
217     /* check for RE matching errors */
218     if (rc < 0) {
219         /* First see if a normal enhanced regex will work.
220          * If this fails, then see if we can get a Domino style match
221          *   If this works - switch to Domino Logs,
222          *   If Fails - Boom.
223          */
224         if ((cmp_log_regexp != cmp_log_regexp_domino) && (cmp_log_regexp == cmp_log_regexp_combined) && (g_settings.settings.log_type == LOG_COMBINED)) {
225             /* Attempt an enhanced log match */
226             VPRINT(VERBOSE1, "%s\n", _("Attempting COMBINED_ENHANCED Regular Expression"));
227             rc = pcre_exec(cmp_log_regexp_combined_enhanced, NULL, buffer, buffer_length, 0, 0, ovector, OVECCOUNT);
228 
229             /* Didn't work. Try Domino? */
230             if (rc < 0 && g_settings.flags.force_log_type == false) {
231                 /* Try a domino log format first - if is, switch to using domino checks instead */
232                 VPRINT(VERBOSE1, "%s\n", _("Attempting COMBINED_DOMINO Regular Expression"));
233                 rc = pcre_exec(cmp_log_regexp_domino, NULL, buffer, buffer_length, 0, 0, ovector, OVECCOUNT);
234                 if (rc >= 0) {
235                     /* Successfully matched as a Domino Log, apply domino RE from here on */
236                     /* FIXME: The default domino RegEx is perhaps not as quick - is based on the ENHANCED */
237                     VPRINT(VERBOSE1, "%s\n", _("Switching to DOMINO log format"));
238                     cmp_log_regexp = cmp_log_regexp_domino;
239                 }
240             }
241         }
242         if (rc < 0) {
243             re_check_errors(rc);
244             return (0);
245         }
246     }
247 
248     copy_substr_rtn = pcre_copy_substring(buffer, ovector, rc, LF_NCSA_ADDRESS, log_rec->hostname, MAXHOST - 1);
249     if (copy_substr_rtn < 0) {
250         error_substring_extract(copy_substr_rtn, LF_NCSA_ADDRESS);
251         return (0);
252     }
253 
254     copy_substr_rtn = pcre_copy_substring(buffer, ovector, rc, LF_NCSA_AUTHUSER, log_rec->ident, MAXIDENT - 1);
255     if (copy_substr_rtn < 0) {
256         error_substring_extract(copy_substr_rtn, LF_NCSA_USER);
257         return (0);
258     }
259 
260     copy_substr_rtn = pcre_copy_substring(buffer, ovector, rc, LF_NCSA_DATE_TIME, log_rec->datetime, MAXDATETIME - 1);
261     if (copy_substr_rtn < 0) {
262         error_substring_extract(copy_substr_rtn, LF_NCSA_DATE_TIME);
263         return (0);
264     }
265 
266     copy_substr_rtn = pcre_copy_substring(buffer, ovector, rc, LF_NCSA_URL, log_rec->url, MAXURL - 1);
267     if (copy_substr_rtn < 0) {
268         error_substring_extract(copy_substr_rtn, LF_NCSA_URL);
269         return (0);
270     }
271 
272     copy_substr_rtn = pcre_copy_substring(buffer, ovector, rc, LF_NCSA_STATUS, tmp_status, 5);
273     if (copy_substr_rtn < 0) {
274         error_substring_extract(copy_substr_rtn, LF_NCSA_STATUS);
275         return (0);
276     }
277     log_rec->resp_code = atoi(tmp_status);
278 
279     copy_substr_rtn = pcre_copy_substring(buffer, ovector, rc, LF_NCSA_BYTES, tmp_bytes, 20);
280     if (copy_substr_rtn < 0) {
281         error_substring_extract(copy_substr_rtn, LF_NCSA_BYTES);
282         return (0);
283     }
284     log_rec->xfer_size = strtoul(tmp_bytes, NULL, 10);
285 
286     if (g_settings.settings.log_type == LOG_COMBINED) {
287         copy_substr_rtn = pcre_copy_substring(buffer, ovector, rc, LF_NCSA_REFERER, log_rec->refer, MAXREF - 1);
288         if (copy_substr_rtn < 0) {
289             error_substring_extract(copy_substr_rtn, LF_NCSA_REFERER);
290             return (0);
291         }
292         copy_substr_rtn = pcre_copy_substring(buffer, ovector, rc, LF_NCSA_BROWSER, log_rec->agent, MAXAGENT - 1);
293         if (copy_substr_rtn < 0) {
294             error_substring_extract(copy_substr_rtn, LF_NCSA_BROWSER);
295             return (0);
296         }
297     }
298     return (1);
299 }
300 
301 
302 /*********************************************/
303 /* PARSE_RECORD_SQUID - squid log handler    */
304 /*********************************************/
305 static int
parse_record_squid(char * buffer,struct log_struct * log_rec)306 parse_record_squid(char *buffer, struct log_struct *log_rec)
307 {
308     int ovector[OVECCOUNT];                     /* RE substring offsets array */
309     int rc;                                     /* RE Check return value */
310     int copy_substr_rtn;                        /* RE Check return from pcre_copy_substring */
311 
312     int buffer_length;
313 
314     char tmp_bytes[25 + 1];
315 
316     buffer_length = (int) strlen(buffer);
317     rc = pcre_exec(cmp_log_regexp, NULL, buffer, buffer_length, 0, 0, ovector, OVECCOUNT);
318     /* check for RE matching errors */
319     if (rc < 0) {
320         re_check_errors(rc);
321         return (0);
322     }
323 
324     copy_substr_rtn = pcre_copy_substring(buffer, ovector, rc, 1, log_rec->datetime, 29);
325     if (copy_substr_rtn < 0) {
326         error_substring_extract(copy_substr_rtn, 1);
327         return (0);
328     }
329 
330     copy_substr_rtn = pcre_copy_substring(buffer, ovector, rc, 4, log_rec->hostname, MAXHOST - 1);
331     if (copy_substr_rtn < 0) {
332         error_substring_extract(copy_substr_rtn, 1);
333         return (0);
334     }
335 
336     copy_substr_rtn = pcre_copy_substring(buffer, ovector, rc, 6, tmp_bytes, 20);
337     if (copy_substr_rtn < 0) {
338         error_substring_extract(copy_substr_rtn, 1);
339         return (0);
340     }
341     log_rec->resp_code = strtoul(tmp_bytes, NULL, 10);
342 
343     copy_substr_rtn = pcre_copy_substring(buffer, ovector, rc, 7, tmp_bytes, 20);
344     if (copy_substr_rtn < 0) {
345         error_substring_extract(copy_substr_rtn, 1);
346         return (0);
347     }
348     log_rec->xfer_size = strtoul(tmp_bytes, NULL, 10);
349 
350     copy_substr_rtn = pcre_copy_substring(buffer, ovector, rc, 9, log_rec->url, MAXURL - 1);
351     if (copy_substr_rtn < 0) {
352         error_substring_extract(copy_substr_rtn, 1);
353         return (0);
354     }
355 
356     return (1);
357 }
358 
359 /************************************************************************
360  * parse_check_not_page                                                 *
361  *                                                                      *
362  * Given a URL (field from a log line)                                  *
363  *   determine if this URL is a page or not                             *
364  *                                                                      *
365  * This function is the opposite of parse_is_page, and is only called   *
366  * from same. It takes the linked list of "NotPageType" and applies     *
367  * and RE against a log line built from that list.                      *
368  *                                                                      *
369  * A successfull match means that this line is NOT a page               *
370  *                                                                      *
371  * Arguments:                                                           *
372  * -----------                                                          *
373  * char *url      -  A URL field, typically: log_rec->url                *
374  *                                                                      *
375  * Returns:                                                             *
376  * -----------                                                          *
377  * Boolean.  True if, yes this is a page                                *
378  *           False is not a page. ie Successfull match.                 *
379  *                                                                      *
380  ************************************************************************/
381 bool
parse_check_not_page(char * url)382 parse_check_not_page(char *url)
383 {
384     char regex_page[MAX_RE_LENGTH + 1] = "";    /* Hold the PAGE RE */
385     static pcre *cmp_regex_page = NULL;         /* NotPage compiled RE */
386 
387     const char *error;                          /* RE error pointer, offset */
388     int erroffset;                              /* RE error value */
389     int str_length, tmp_length;
390     int rc;                                     /* RE Check return value */
391     static int max_type_length = 0;
392     char *str_start = url;
393 
394     LISTPTR lptr = not_page_type;
395 
396     /* Compile both RegEx's */
397     if (cmp_regex_page == NULL) {
398         /* Build the RegEx first, loop thru the PageType list & reverse */
399         strcat(regex_page, "\\.(");
400         while (lptr != NULL) {
401             strcat(regex_page, lptr->string);
402             str_length = strlen(lptr->string);
403             if (str_length > max_type_length) {
404                 max_type_length = str_length;
405             }
406             lptr = lptr->next;
407             if (lptr != NULL) {
408                 strcat(regex_page, "|");
409             }
410         }
411         strcat(regex_page, ")$");
412         VPRINT(VERBOSE2, "PCRE: New NotPAGE RegEx: '%s',  Max: %d\n", regex_page, max_type_length);
413 
414         /* Compile the RegEx */
415         cmp_regex_page = pcre_compile(regex_page, 0, &error, &erroffset, NULL);
416         VPRINT(VERBOSE2, "PCRE: Compile PAGE%s", "\n");
417         if (cmp_regex_page == NULL) {
418             re_compile_failed(erroffset, error, regex_page);
419         }
420         max_type_length++;                      /* Increase by 1 for starting '.' */
421     }
422 
423     str_length = strlen(url);
424     if (str_length < max_type_length) {
425         tmp_length = str_length;
426     } else {
427         tmp_length = max_type_length;
428         str_start = url + str_length - max_type_length;
429     }
430     VPRINT(VERBOSE4, "  Was: '%s', Is: %s\n", url, str_start);
431 
432     rc = pcre_exec(cmp_regex_page, NULL, str_start, tmp_length, 0, 0, NULL, 0);
433     /* check for RE matching */
434     if (rc >= 0) {
435         /* Have matched! */
436         return (false);
437     }
438     return (true);
439 }
440 
441 /************************************************************************
442  * parse_is_page                                                        *
443  *                                                                      *
444  * Given a URL (field from a log line)                                  *
445  *   determine if this URL is a page or not                             *
446  *                                                                      *
447  * Logic:                                                               *
448  * -----------                                                          *
449  * The RegEx and URL provided are reversed for efficiency - we only     *
450  *  want to match the end of a URL, not the entire thing.               *
451  * Firstly builds the RegEx. Does this by reversing the provided        *
452  *  PageType config options, and wrappering appropriate RE around.
453  *                                                                      *
454  * Arguments:                                                           *
455  * -----------                                                          *
456  * char *url      -  A URL field, typically: log_rec->url                *
457  *                                                                      *
458  * Returns:                                                             *
459  * -----------                                                          *
460  * Boolean.  True if, yes this is a page                                *
461  *           False in all other cases. Including invalid args.          *
462  *                                                                      *
463  ************************************************************************/
464 
465 bool
parse_is_page(char * url)466 parse_is_page(char *url)
467 {
468     char regex_page[MAX_RE_LENGTH + 1] = "";    /* Hold the PAGE RE */
469     static pcre *cmp_regex_page = NULL;         /* Page compiled RE */
470 
471     const char *error;                          /* RE error pointer, offset */
472     int erroffset;                              /* RE error value */
473     int str_length;
474     int rc;                                     /* RE Check return value */
475     char reverse[MAXURL + 1] = "";
476     int i, j = 0;
477     LISTPTR lptr;
478 
479     if (not_page_type != NULL) {
480         return (parse_check_not_page(url));
481     }
482 
483     lptr = page_type;
484 
485     /* Compile both RegEx's */
486     if (cmp_regex_page == NULL) {
487         /* Build the RegEx first, loop thru the PageType list & reverse */
488         strcat(regex_page, "^(\\/|(");
489         while (lptr != NULL) {
490             str_length = (int) strlen(lptr->string);
491             j = 0;
492             for (i = str_length - 1; i >= 0; i--) {
493                 if (lptr->string[i] == '*') {
494                     reverse[j] = '.';
495                     j++;
496                 }
497                 reverse[j] = lptr->string[i];
498                 j++;
499             }
500             reverse[j] = '\0';
501             strcat(regex_page, reverse);
502             lptr = lptr->next;
503             if (lptr != NULL) {
504                 strcat(regex_page, "|");
505             }
506         }
507         strcat(regex_page, ")\\.|[^./]+\\/)");
508         VPRINT(VERBOSE2, "PCRE: New PAGE RegEx: '%s'\n", regex_page);
509 
510         /* Compile the RegEx */
511         cmp_regex_page = pcre_compile(regex_page, 0, &error, &erroffset, NULL);
512         VPRINT(VERBOSE2, "PCRE: Compile PAGE%s", "\n");
513         if (cmp_regex_page == NULL) {
514             re_compile_failed(erroffset, error, regex_page);
515         }
516     }
517 
518     str_length = (int) strlen(url);
519     j = 0;
520     for (i = str_length - 1; i >= 0; i--) {
521         /* Use pointer math - faster at loss of clarity */
522         *(reverse + j) = *(url + i);
523         j++;
524     }
525     *(reverse + j) = '\0';                      /* Probably not needed as we provide the length... */
526 
527     rc = pcre_exec(cmp_regex_page, NULL, reverse, str_length, 0, 0, NULL, 0);
528     /* check for RE matching */
529     if (rc >= 0) {
530         /* Have matched! */
531         return (true);
532     }
533     return (false);
534 }                                               /* parse_is_page */
535 
536 
537 /************************************************************************
538  * identify_log_format                                                  *
539  *                                                                      *
540  * Attempt to identify the type of log format we've been given.         *
541  * Returns the LOG_type as defined in awffull.h                         *
542  * returns -1 if unknown.                                               *
543  *                                                                      *
544  * Requires a line of the log to attempt to process                     *
545  ************************************************************************/
546 static int
identify_log_format(char * buffer)547 identify_log_format(char *buffer)
548 {
549     int ovector[OVECCOUNT];                     /* RE substring offsets array */
550     int rc;                                     /* RE Check return value */
551     int buffer_length;
552 
553 
554     buffer_length = (int) strlen(buffer);
555 
556     /* Check for COMBINED */
557     rc = pcre_exec(cmp_log_regexp_combined, NULL, buffer, buffer_length, 0, 0, ovector, OVECCOUNT);
558     if (rc >= 0) {
559         /* Matches against COMBINED */
560         VPRINT(VERBOSE1, "%s\n", _("Using COMBINED Log Format"));
561         return (LOG_COMBINED);
562     }
563 
564     /* Check for COMBINED_DOMINO */
565     /* If the first line is a non logged in user, it'll probably register as COMBINED... */
566     rc = pcre_exec(cmp_log_regexp_domino, NULL, buffer, buffer_length, 0, 0, ovector, OVECCOUNT);
567     if (rc >= 0) {
568         /* Matches against COMBINED_DOMINO */
569         VPRINT(VERBOSE1, "%s\n", _("Using COMBINED_DOMINO Log Format"));
570         return (LOG_DOMINO);
571     }
572 
573     rc = pcre_exec(cmp_log_regexp_clf, NULL, buffer, buffer_length, 0, 0, ovector, OVECCOUNT);
574     if (rc >= 0) {
575         /* Matches against COMBINED */
576         VPRINT(VERBOSE1, "%s\n", _("Using CLF Log Format"));
577         return (LOG_CLF);
578     }
579 
580     rc = pcre_exec(cmp_log_regexp_xferlog, NULL, buffer, buffer_length, 0, 0, ovector, OVECCOUNT);
581     if (rc >= 0) {
582         /* Matches against FTP/XFERLOG */
583         VPRINT(VERBOSE1, "%s\n", _("Using FTP/XFERLOG Log Format"));
584         /* Invalid tables for this log type. Zero them away and hence not display. */
585         g_settings.top.agents = 0;
586         g_settings.top.refs = 0;
587         return (LOG_FTP);
588     }
589 
590     rc = pcre_exec(cmp_log_regexp_squid, NULL, buffer, buffer_length, 0, 0, ovector, OVECCOUNT);
591     if (rc >= 0) {
592         /* Matches against SQUID */
593         VPRINT(VERBOSE1, "%s\n", _("Using SQUID Log Format"));
594         /* Invalid tables for this log type. Zero them away and hence not display. */
595         g_settings.top.agents = 0;
596         g_settings.top.refs = 0;
597         return (LOG_SQUID);
598     }
599 
600     VPRINT(VERBOSE1, "%s\n", _("Unrecognised Log Format"));
601     return (-1);                                /* Failed to match any, unknown format */
602 }
603 
604 
605 /************************************************************************
606  * re_compile_all_regexs                                                *
607  *                                                                      *
608  * Does what the name says, in a single function we compile all         *
609  *  possibly used Regular expressions.                                  *
610  * Either forcibly exits on any failure, or happily finishes.           *
611  * No values needed or returned.                                        *
612  *                                                                      *
613  * Assigns the RE's to the various globals:                             *
614  *   cmp_log_regexp_*                                                   *
615  ************************************************************************/
616 static void
re_compile_all_regexes(void)617 re_compile_all_regexes(void)
618 {
619     char log_regexp_clf[MAX_RE_LENGTH] = PATTERN_CLF;
620     char log_regexp_combined[MAX_RE_LENGTH] = PATTERN_COMBINED;
621     char log_regexp_combined_enhanced[MAX_RE_LENGTH] = PATTERN_COMBINED_ENHANCED;
622     char log_regexp_xferlog[MAX_RE_LENGTH] = PATTERN_XFERLOG;
623     char log_regexp_squid[MAX_RE_LENGTH] = PATTERN_SQUID;
624     char log_regexp_domino[MAX_RE_LENGTH] = PATTERN_DOMINO;
625 
626     const char *error;                          /* RE error pointer, offset */
627     int erroffset;                              /* RE error value */
628 
629     /* CLF */
630     cmp_log_regexp_clf = pcre_compile(log_regexp_clf, 0, &error, &erroffset, NULL);
631     VPRINT(VERBOSE2, "PCRE: Compile CLF%s", "\n");
632     if (cmp_log_regexp_clf == NULL) {
633         re_compile_failed(erroffset, error, log_regexp_clf);
634     }
635 
636     /* Combined */
637     cmp_log_regexp_combined = pcre_compile(log_regexp_combined, 0, &error, &erroffset, NULL);
638     VPRINT(VERBOSE2, "PCRE: Compile COMBINED%s", "\n");
639     if (cmp_log_regexp_combined == NULL) {
640         re_compile_failed(erroffset, error, log_regexp_combined);
641     }
642 
643     /* Enhanced Combined */
644     cmp_log_regexp_combined_enhanced = pcre_compile(log_regexp_combined_enhanced, 0, &error, &erroffset, NULL);
645     VPRINT(VERBOSE2, "PCRE: Compile COMBINED_ENHANCED%s", "\n");
646     if (cmp_log_regexp_combined_enhanced == NULL) {
647         re_compile_failed(erroffset, error, log_regexp_combined_enhanced);
648     }
649 
650     /* FTP XFERLOG */
651     cmp_log_regexp_xferlog = pcre_compile(log_regexp_xferlog, 0, &error, &erroffset, NULL);
652     VPRINT(VERBOSE2, "PCRE: Compile PATTERN_XFERLOG%s", "\n");
653     if (cmp_log_regexp_xferlog == NULL) {
654         re_compile_failed(erroffset, error, log_regexp_xferlog);
655     }
656 
657     /* SQUID LOG */
658     cmp_log_regexp_squid = pcre_compile(log_regexp_squid, 0, &error, &erroffset, NULL);
659     VPRINT(VERBOSE2, "PCRE: Compile PATTERN_SQUID%s", "\n");
660     if (cmp_log_regexp_squid == NULL) {
661         re_compile_failed(erroffset, error, log_regexp_squid);
662     }
663 
664     /* DOMINO LOG */
665     cmp_log_regexp_domino = pcre_compile(log_regexp_domino, 0, &error, &erroffset, NULL);
666     VPRINT(VERBOSE2, "PCRE: Compile PATTERN_COMBINED_DOMINO%s", "\n");
667     if (cmp_log_regexp_domino == NULL) {
668         re_compile_failed(erroffset, error, log_regexp_domino);
669     }
670 }
671 
672 
673 /************************************************************************
674  * re_check_errors                                                      *
675  *                                                                      *
676  * After an RE check, deal with any errors                              *
677  * err: value returned from pcre_exec                                   *
678  * str_ptr: String that failed to match                                 *
679  ************************************************************************/
680 static void
re_check_errors(int err)681 re_check_errors(int err)
682 {
683 
684     /* Matching failed: handle error cases */
685     switch (err) {
686     case PCRE_ERROR_NOMATCH:
687         ERRVPRINT(VERBOSE1, "%s", _("Warning: No Regular Expression Match. "));
688         break;
689         /*  Leave out the more explicit failure messages - we show the number, so can be found.
690            case PCRE_ERROR_NULL:
691            case PCRE_ERROR_BADOPTION:
692            case PCRE_ERROR_BADMAGIC:
693            case PCRE_ERROR_UNKNOWN_NODE:
694            case PCRE_ERROR_NOMEMORY:
695            case PCRE_ERROR_NOSUBSTRING:
696            case PCRE_ERROR_MATCHLIMIT:
697            case PCRE_ERROR_CALLOUT:
698            case PCRE_ERROR_BADUTF8:
699            case PCRE_ERROR_BADUTF8_OFFSET:
700            case PCRE_ERROR_PARTIAL:
701            case PCRE_ERROR_BAD_PARTIAL:
702            case PCRE_ERROR_INTERNAL:
703            case PCRE_ERROR_BADCOUNT:
704          */
705     default:
706         ERRVPRINT(VERBOSE1, "%s %d\n", _("Warning: Regular Expression Error:"), err);
707         break;
708     }
709 }
710 
711 
712 /************************************************************************
713  * re_compile_failed                                                    *
714  *                                                                      *
715  * Display a failed RE Compile & where                                  *
716  * FATAL failure. Will exit the run.                                    *
717  ************************************************************************/
718 static void
re_compile_failed(int err,const char * err_offset,char * re_str)719 re_compile_failed(int err, const char *err_offset, char *re_str)
720 {
721     ERRVPRINT(VERBOSE0, "%s %d %s\n", _("FATAL ERROR! PCRE compilation failed at offset"), err, err_offset);
722     ERRVPRINT(VERBOSE0, "%s %s\n", _("  Using Regular Expression:"), re_str);
723     exit(1);                                    /* FIXME - table of exit codes! */
724 }
725 
726 
727 /************************************************************************
728  * error_substring_extract                                              *
729  *                                                                      *
730  * Display a failed substring extraction                                *
731  * Error Only, as this should have failed the RE                        *
732  ************************************************************************/
733 static void
error_substring_extract(int err,int substr_idx)734 error_substring_extract(int err, int substr_idx)
735 {
736     ERRVPRINT(VERBOSE1, "%s %d\n", _("Error: Failed to extract substring:"), substr_idx);
737     switch (err) {
738     case PCRE_ERROR_NOMEMORY:
739         ERRVPRINT(VERBOSE2, "  PCRE: Insufficient Memory\n");
740         break;
741     case PCRE_ERROR_NOSUBSTRING:
742         ERRVPRINT(VERBOSE2, "  PCRE: Substring doesn't exist.\n");
743         break;
744     default:
745         ERRVPRINT(VERBOSE2, "  Unknown PCRE Error: %d\n", err);
746         break;
747     }
748 }
749 
750 
751 /************************************************************************
752  ************************************************************************
753  *                      END OF FILE                                     *
754  ************************************************************************/
755