1#############################################################################
2# Copyright (c) 2015 Balabit
3#
4# This program is free software; you can redistribute it and/or modify it
5# under the terms of the GNU General Public License version 2 as published
6# by the Free Software Foundation, or (at your option) any later version.
7#
8# This program is distributed in the hope that it will be useful,
9# but WITHOUT ANY WARRANTY; without even the implied warranty of
10# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11# GNU General Public License for more details.
12#
13# You should have received a copy of the GNU General Public License
14# along with this program; if not, write to the Free Software
15# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
16#
17# As an additional exemption you are allowed to compile & link against the
18# OpenSSL libraries as published by the OpenSSL project. See the file
19# COPYING for details.
20#
21#############################################################################
22
23# Parse apache access.log
24#
25# Formats recognized:
26#
27# LogFormat "%v:%p %h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"" vhost_combined
28#    virtualhost:443 127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326 "http://www.example.com/start.html" "Mozilla/4.08 [en] (Win98; I ;Nav)"
29#
30# LogFormat "%v:%p %h %l %u %t \"%r\" %>s %b" vhost_common
31#    virtualhost:443 127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326
32#
33# LogFormat "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"" combined
34#    127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326 "http://www.example.com/start.html" "Mozilla/4.08 [en] (Win98; I ;Nav)"
35#
36# LogFormat "%h %l %u %t \"%r\" %>s %b" common
37#    127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326
38block parser apache-accesslog-parser-vhost(prefix() template()) {
39    channel {
40        filter { match("^[A-Za-z0-9\-\._]+:[0-9]+ " template(`template`)); };
41        parser {
42            csv-parser(
43                dialect(escape-double-char)
44                flags(strip-whitespace)
45                delimiters(" ")
46                template(`template`)
47                quote-pairs('""[]')
48                columns("2", "`prefix`clientip", "`prefix`ident",
49                        "`prefix`auth", "`prefix`timestamp",
50                        "`prefix`rawrequest", "`prefix`response",
51                        "`prefix`bytes", "`prefix`referrer",
52                        "`prefix`agent"));
53
54            csv-parser(
55                prefix(`prefix`)
56                template("$2")
57                delimiters(":")
58                dialect(escape-none)
59                columns("vhost", "port"));
60        };
61    };
62};
63
64# combined & common format without vhost
65# LogFormat "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"" combined
66# LogFormat "%h %l %u %t \"%r\" %>s %b" common
67block parser apache-accesslog-parser-combined(prefix() template()) {
68    channel {
69        parser {
70            csv-parser(
71                prefix(`prefix`)
72                dialect(escape-double-char)
73                flags(strip-whitespace)
74                delimiters(" ")
75                template(`template`)
76                quote-pairs('""[]')
77                columns("clientip", "ident", "auth",
78                        "timestamp", "rawrequest", "response",
79                        "bytes", "referrer", "agent"));
80        };
81    };
82};
83
84block parser apache-accesslog-parser(prefix(".apache.") template("${MESSAGE}")) {
85    # parse into a logstash-like schema
86    # https://github.com/elastic/logstash/blob/v1.4.2/patterns/grok-patterns#L90
87    channel {
88
89        # parser for formats including vhost:port
90        if {
91            parser { apache-accesslog-parser-vhost(prefix(`prefix`) template(`template`)); };
92
93        # parser for standard formats
94        } else {
95            parser { apache-accesslog-parser-combined(prefix(`prefix`) template(`template`)); };
96        };
97
98        # mungle values to match Kibana/elastic schema and common to all
99        # supported formats.
100        parser {
101            csv-parser(
102                prefix(`prefix`)
103                template("${`prefix`rawrequest}")
104                delimiters(" ")
105                dialect(escape-none)
106                flags(strip-whitespace)
107                columns("verb", "request", "httpversion"));
108
109            date-parser(format("%d/%b/%Y:%H:%M:%S %z")
110                template("${`prefix`timestamp}"));
111        };
112
113        rewrite {
114            subst("^HTTP/(.*)$", "$1", value("`prefix`httpversion"));
115        };
116    };
117};
118