1#!@PERL@
2use strict;
3use warnings;
4use DBI;
5use English qw( -no_match_vars );
6use Getopt::Long;
7use Pod::Usage;
8
9$|=1;
10
11=pod
12
13=head1 NAME
14
15log_db_daemon - Database logging daemon for Squid
16
17Version 0.5.
18
19=head1 SYNOPSIS
20
21log_db_daemon DSN [options]
22
23=head1 DESCRIPTION
24
25This program writes Squid access.log entries to a database.
26Presently only accepts the B<squid> native log format.
27
28The script has been developed and tested in the following environment:
29
30=over 4
31
32=item squid-2.7 Squid-3.2
33
34=item mysql 5.0.26 and 5.1
35
36=item perl 5.8.8
37
38=item OpenSUSE 10.2
39
40=back
41
42=head1 OPTIONS
43
44=over 12
45
46=item   B<DSN>
47
48Database DSN encoded as a path. This is sent as the access_log file path.
49
50Sample configuration:
51  access_log daemon:/host/database/table/username/password squid
52
53  to leave a parameter unspecified use a double slash:
54  access_log daemon://database/table/username/password squid
55
56Default "DBI:mysql:database=squid"
57
58=item   B<--debug>
59
60Write debug info to stderr.
61
62=back
63
64=head1 CONFIGURATION
65
66=head2 Squid configuration
67
68=head3 access_log directive
69
70The path to the access log file is used to provide the database connection parameters.
71
72  access_log daemon:/mysql_host:port/database/table/username/password squid
73
74The 'daemon' prefix is mandatory and tells squid that the B<logfile_daemon> helper is to be used instead of the normal file logging.
75
76The last parameter tells squid which log format to use when writing lines to the log daemon.
77Presently B<squid> format is supported.
78
79=over 4
80
81=item mysql_host:port
82
83Host where the mysql server is running. If left empty, 'localhost' is assumed.
84
85=item database
86
87Name of the database to connect to. If left empty, 'squid_log' is assumed.
88
89=item table
90
91Name of the database table where log lines are stored. If left empty, 'access_log' is assumed.
92
93=item username
94
95Username to use when connecting to the database. If left empty, 'squid' is assumed.
96
97=item password
98
99Password to use when connecting to the database. If left empty, no password is used.
100
101=back
102
103To leave all fields to their default values, you can use a single slash:
104
105  access_log daemon:/ squid
106
107To specify only the database password, which by default is empty, you must leave unspecified all the other parameters by using null strings:
108
109  access_log daemon://///password squid
110
111=head3 logfile_daemon directive
112
113This is the current way of telling squid where the logfile daemon resides.
114
115  logfile_daemon /path/to/squid/libexec/logfile-daemon_mysql.pl
116
117The script must be copied to the location specified in the directive.
118
119=head2 Database configuration
120
121Let's call the database 'squid_log' and the log table 'access_log'. The username and password for the db connection will be both 'squid'.
122
123=head3 Database
124
125Create the database:
126
127  CREATE DATABASE squid_log;
128
129=head3 User
130
131Create the user:
132
133  GRANT INSERT,SELECT,CREATE ON squid_log.* TO 'squid'@'localhost' IDENTIFIED BY 'squid';
134  FLUSH PRIVILEGES;
135
136Note that only CREATE, INSERT and SELECT privileges are granted to the 'squid' user. This ensures that the logfile daemon script cannot change or modify the log entries.
137
138=head3 Table
139
140The Daemon will attempt to initialize this table if none exists when it starts.
141
142The table created should look like:
143
144  CREATE TABLE access_log (
145    id                   INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY,
146    time_since_epoch     DECIMAL(15,3),
147    time_response        INTEGER,
148    ip_client            CHAR(15),
149    ip_server            CHAR(15),
150    http_status_code     VARCHAR(10),
151    http_reply_size      INTEGER,
152    http_method          VARCHAR(20),
153    http_url             TEXT,
154    http_username        VARCHAR(20),
155    http_mime_type       VARCHAR(50),
156    squid_hier_status    VARCHAR(20),
157    squid_request_status VARCHAR(20)
158  );
159
160=head1 DATA EXTRACTION
161
162=head2 Sample queries.
163
164=over 4
165
166=item Clients accessing the cache
167
168  SELECT DISTINCT ip_client FROM access_log;
169
170=item Number of request per day
171
172  SELECT
173    DATE(FROM_UNIXTIME(time_since_epoch)) AS date_day,
174    COUNT(*) AS num_of_requests
175  FROM access_log
176  GROUP BY 1
177  ORDER BY 1;
178
179=item Request status count
180
181To obtain the raw count of each request status:
182
183  SELECT squid_request_status, COUNT(*) AS n
184  FROM access_log
185  GROUP BY squid_request_status
186  ORDER BY 2 DESC;
187
188To calculate the percentage of each request status:
189
190  SELECT
191    squid_request_status,
192    (COUNT(*)/(SELECT COUNT(*) FROM access_log)*100) AS percentage
193  FROM access_log
194  GROUP BY squid_request_status
195  ORDER BY 2 DESC;
196
197To distinguish only between HITs and MISSes:
198
199  SELECT
200    'hits',
201    (SELECT COUNT(*)
202    FROM access_log
203    WHERE squid_request_status LIKE '%HIT%')
204    /
205    (SELECT COUNT(*) FROM access_log)*100
206    AS percentage
207  UNION
208  SELECT
209    'misses',
210    (SELECT COUNT(*)
211    FROM access_log
212    WHERE squid_request_status LIKE '%MISS%')
213    /
214    (SELECT COUNT(*) FROM access_log)*100
215    AS percentage;
216
217=item Response time ranges
218
219  SELECT
220    '0..500',
221    COUNT(*)/(SELECT COUNT(*) FROM access_log)*100 AS percentage
222  FROM access_log
223  WHERE time_response >= 0 AND time_response < 500
224  UNION
225  SELECT
226    '500..1000',
227    COUNT(*)/(SELECT COUNT(*) FROM access_log)*100 AS percentage
228  FROM access_log
229  WHERE time_response >= 500 AND time_response < 1000
230  UNION
231  SELECT
232    '1000..2000',
233    COUNT(*)/(SELECT COUNT(*) FROM access_log)*100 AS percentage
234  FROM access_log
235  WHERE time_response >= 1000 AND time_response < 2000
236  UNION
237  SELECT
238    '>= 2000',
239    COUNT(*)/(SELECT COUNT(*) FROM access_log)*100 AS percentage
240  FROM access_log
241  WHERE time_response >= 2000;
242
243=item Traffic by mime type
244
245  SELECT
246    http_mime_type,
247    SUM(http_reply_size) as total_bytes
248  FROM access_log
249  GROUP BY http_mime_type
250  ORDER BY 2 DESC;
251
252=item Traffic by client
253
254  SELECT
255    ip_client,
256    SUM(http_reply_size) AS total_bytes
257  FROM access_log
258  GROUP BY 1
259  ORDER BY 2 DESC;
260
261=back
262
263=head1 KNOWN ISSUES
264
265=head2 Speed issues
266
267The MyISAM storage engine is known to be faster than the InnoDB one, so although it doesn't support transactions and referential integrity, it might be more appropriate in this scenario. You might want to append "ENGINE=MYISAM" at the end of the table creation code in the above SQL script.
268
269Indexes should be created according to the queries that are more frequently run. The DDL script only creates an implicit index for the primary key column.
270
271=head2 Table cleanup
272
273This script currently implements only the C<L> (i.e. "append a line to the log") command, therefore the log lines are never purged from the table. This approach has an obvious scalability problem.
274
275One solution would be to implement e.g. the "rotate log" command in a way that would calculate some summary values, put them in a "summary table" and then delete the lines used to calculate those values.
276
277Similar cleanup code could be implemented in an external script and run periodically independently from squid log commands.
278
279=head2 Testing
280
281This script has only been tested in low-volume scenarios (single client, less than 10 req/s). Tests in high volume environments could reveal performance bottlenecks and bugs.
282
283=head1 AUTHOR
284
285This program was written by
286I<Marcello Romani <marcello.romani@libero.it>> ,
287I<Amos Jeffries <amosjeffries@squid-cache.org>>
288
289=head1 COPYRIGHT
290
291 * Copyright (C) 1996-2021 The Squid Software Foundation and contributors
292 *
293 * Squid software is distributed under GPLv2+ license and includes
294 * contributions from numerous individuals and organizations.
295 * Please see the COPYING and CONTRIBUTORS files for details.
296
297Copyright (C) 2008 by Marcello Romani
298
299This library is free software; you can redistribute it and/or modify
300it under the same terms as Perl itself, either Perl version 5.8.8 or,
301at your option, any later version of Perl 5 you may have available.
302
303=head1 QUESTIONS
304
305Questions on the usage of this program can be sent to the I<Squid Users mailing list <squid-users@lists.squid-cache.org>>
306
307=head1 REPORTING BUGS
308
309Bug reports need to be made in English.
310See http://wiki.squid-cache.org/SquidFaq/BugReporting for details of what you need to include with your bug report.
311
312Report bugs or bug fixes using http://bugs.squid-cache.org/
313
314Report serious security bugs to I<Squid Bugs <squid-bugs@lists.squid-cache.org>>
315
316Report ideas for new improvements to the I<Squid Developers mailing list <squid-dev@lists.squid-cache.org>>
317
318=head1 SEE ALSO
319
320squid (8), GPL (7),
321
322The Squid FAQ wiki http://wiki.squid-cache.org/SquidFaq
323
324The Squid Configuration Manual http://www.squid-cache.org/Doc/config/
325
326=cut
327
328# the first argument to this script is the log file path describing the DSN
329my $log_file = shift;
330
331# others may be options
332my $debug = 0;
333GetOptions(
334        'debug' => \$debug,
335        );
336
337
338# utility routine to print messages on stderr (so they appear in cache log)
339# without using warn, which would clutter the log with source line numbers
340sub log_info {
341    my $msg = shift;
342    print STDERR "$msg\n";
343}
344
345# we use logfile to pass database access information to this script
346# sample configuration:
347# access_log daemon:/host/database/table/username/password squid
348# to let a parmeter unspecified, e.g. the database host, use a double slash:
349# access_log daemon://database/table/username/password squid
350my ( $host, $database, $table, $user, $pass ) = $log_file =~ / \/(.*?)\/(.*?)\/(.*?)\/(.*?)\/(.*?) \z /xms;
351
352if ( !$host ) {
353    $host = 'localhost';
354    log_info("Database host not specified. Using $host.");
355}
356
357if ( !$database ) {
358    $database = 'squid_log';
359    log_info("Database name not specified. Using $database.");
360}
361
362if ( !$table ) {
363    $table = 'access_log';
364    log_info("Table parameter not specified. Using $table.");
365}
366
367if ( !$user ) {
368    $user = 'squid';
369    log_info("User parameter not specified. Using $user.");
370}
371
372if ( !$pass ) {
373    log_info('No password specified. Connecting with NO password.');
374}
375
376# fields that we should have in the table
377# Order here must match the order of fields in the Log format and parse() output array.
378my @db_fields = qw(
379    id
380    time_since_epoch
381    time_response
382    ip_client
383    squid_request_status
384    http_status_code
385    http_reply_size
386    http_method
387    http_url
388    http_username
389    squid_hier_status
390    ip_server
391    http_mime_type
392);
393
394# perform db connection
395my $dsn = "DBI:mysql:database=$database" . ($host ne "localhost" ? ":$host" : "");
396my $dbh;
397my $sth;
398eval {
399    warn "Connecting... dsn='$dsn', username='$user', password='...'";
400    $dbh = DBI->connect($dsn, $user, $pass, { AutoCommit => 1, RaiseError => 1, PrintError => 1 });
401};
402if ($EVAL_ERROR) {
403    die "Cannot connect to database: $DBI::errstr";
404}
405
406
407# a simple test to assure the specified table exists
408eval {
409    my $q = 'SELECT ' . join(',',@db_fields) . " FROM $table LIMIT 1";
410    my $sth = $dbh->prepare($q);
411    $sth->execute;
412};
413if ($EVAL_ERROR) {
414    # run a query to create the table of required syntax
415    my $create_query = 'CREATE TABLE ' . $table . ' (' .
416    " id INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY," .
417    " time_since_epoch     DECIMAL(15,3)," .
418    " time_response        INTEGER," .
419    " ip_client            CHAR(15)," .
420    " ip_server            CHAR(15)," .
421    " http_status_code     VARCHAR(10)," .
422    " http_reply_size      INTEGER," .
423    " http_method          VARCHAR(20)," .
424    " http_url             TEXT," .
425    " http_username        VARCHAR(20)," .
426    " http_mime_type       VARCHAR(50)," .
427    " squid_request_status VARCHAR(50)," .
428    " squid_hier_status    VARCHAR(20)" .
429    ");" ;
430
431    $sth = $dbh->prepare($create_query);
432    $sth->execute;
433    # test again and fail hard if it is still broken.
434    eval {
435        my $q = 'SELECT ' . join(',',@db_fields) . " FROM $table LIMIT 1";
436        my $sth = $dbh->prepare($q);
437        $sth->execute;
438    };
439    if ($EVAL_ERROR) {
440        die "Error initializing database table: $EVAL_ERROR";
441    };
442}
443# test
444
445# for better performance, prepare the statement at startup
446eval {
447    my $q = "INSERT INTO $table (" . join(',',@db_fields) . ") VALUES(NULL" . ',?' x (scalar(@db_fields)-1) . ')';
448    #$sth = $dbh->prepare("INSERT INTO $table VALUES(NULL,?,?,?,?,?,?,?,?,?,?,?,?)");
449    $sth = $dbh->prepare($q);
450};
451if ($EVAL_ERROR) {
452    die "Error while preparing sql statement: $EVAL_ERROR";
453}
454
455sub parse($) {
456    my ($line) = @_;
457    my (@t) = $line =~ /^L(\d+\.\d+) *(\d+?) (.*?) (.*?)\/(\d+?) (\d+?) (.*?) (.*?) (.*?) (.*?)\/(.*?) (.*)$/;
458}
459
460# main loop
461while (my $line = <>) {
462    chomp $line;
463
464    my $cmd = substr($line, 0, 1);      # extract command byte
465
466    if ( $cmd eq 'L' ) {
467        my @log_entry = parse($line);
468        eval {                  # we catch db errors to avoid crashing squid in case something goes wrong...
469            $sth->execute(@log_entry) or die $sth->errstr
470        };
471        if ( $EVAL_ERROR ) {    # leave a trace of the error in the logs
472            warn $EVAL_ERROR . " values=(" . join(', ', @log_entry) . ')';
473        }
474    }
475}
476
477$dbh->disconnect();
478
479__END__
480