1#!@PERL@ 2use strict; 3use warnings; 4use DBI; 5use English qw( -no_match_vars ); 6use Getopt::Long; 7use Pod::Usage; 8 9$|=1; 10 11=pod 12 13=head1 NAME 14 15log_db_daemon - Database logging daemon for Squid 16 17Version 0.5. 18 19=head1 SYNOPSIS 20 21log_db_daemon DSN [options] 22 23=head1 DESCRIPTION 24 25This program writes Squid access.log entries to a database. 26Presently only accepts the B<squid> native log format. 27 28The script has been developed and tested in the following environment: 29 30=over 4 31 32=item squid-2.7 Squid-3.2 33 34=item mysql 5.0.26 and 5.1 35 36=item perl 5.8.8 37 38=item OpenSUSE 10.2 39 40=back 41 42=head1 OPTIONS 43 44=over 12 45 46=item B<DSN> 47 48Database DSN encoded as a path. This is sent as the access_log file path. 49 50Sample configuration: 51 access_log daemon:/host/database/table/username/password squid 52 53 to leave a parameter unspecified use a double slash: 54 access_log daemon://database/table/username/password squid 55 56Default "DBI:mysql:database=squid" 57 58=item B<--debug> 59 60Write debug info to stderr. 61 62=back 63 64=head1 CONFIGURATION 65 66=head2 Squid configuration 67 68=head3 access_log directive 69 70The path to the access log file is used to provide the database connection parameters. 71 72 access_log daemon:/mysql_host:port/database/table/username/password squid 73 74The 'daemon' prefix is mandatory and tells squid that the B<logfile_daemon> helper is to be used instead of the normal file logging. 75 76The last parameter tells squid which log format to use when writing lines to the log daemon. 77Presently B<squid> format is supported. 78 79=over 4 80 81=item mysql_host:port 82 83Host where the mysql server is running. If left empty, 'localhost' is assumed. 84 85=item database 86 87Name of the database to connect to. If left empty, 'squid_log' is assumed. 88 89=item table 90 91Name of the database table where log lines are stored. If left empty, 'access_log' is assumed. 92 93=item username 94 95Username to use when connecting to the database. If left empty, 'squid' is assumed. 96 97=item password 98 99Password to use when connecting to the database. If left empty, no password is used. 100 101=back 102 103To leave all fields to their default values, you can use a single slash: 104 105 access_log daemon:/ squid 106 107To specify only the database password, which by default is empty, you must leave unspecified all the other parameters by using null strings: 108 109 access_log daemon://///password squid 110 111=head3 logfile_daemon directive 112 113This is the current way of telling squid where the logfile daemon resides. 114 115 logfile_daemon /path/to/squid/libexec/logfile-daemon_mysql.pl 116 117The script must be copied to the location specified in the directive. 118 119=head2 Database configuration 120 121Let's call the database 'squid_log' and the log table 'access_log'. The username and password for the db connection will be both 'squid'. 122 123=head3 Database 124 125Create the database: 126 127 CREATE DATABASE squid_log; 128 129=head3 User 130 131Create the user: 132 133 GRANT INSERT,SELECT,CREATE ON squid_log.* TO 'squid'@'localhost' IDENTIFIED BY 'squid'; 134 FLUSH PRIVILEGES; 135 136Note that only CREATE, INSERT and SELECT privileges are granted to the 'squid' user. This ensures that the logfile daemon script cannot change or modify the log entries. 137 138=head3 Table 139 140The Daemon will attempt to initialize this table if none exists when it starts. 141 142The table created should look like: 143 144 CREATE TABLE access_log ( 145 id INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY, 146 time_since_epoch DECIMAL(15,3), 147 time_response INTEGER, 148 ip_client CHAR(15), 149 ip_server CHAR(15), 150 http_status_code VARCHAR(10), 151 http_reply_size INTEGER, 152 http_method VARCHAR(20), 153 http_url TEXT, 154 http_username VARCHAR(20), 155 http_mime_type VARCHAR(50), 156 squid_hier_status VARCHAR(20), 157 squid_request_status VARCHAR(20) 158 ); 159 160=head1 DATA EXTRACTION 161 162=head2 Sample queries. 163 164=over 4 165 166=item Clients accessing the cache 167 168 SELECT DISTINCT ip_client FROM access_log; 169 170=item Number of request per day 171 172 SELECT 173 DATE(FROM_UNIXTIME(time_since_epoch)) AS date_day, 174 COUNT(*) AS num_of_requests 175 FROM access_log 176 GROUP BY 1 177 ORDER BY 1; 178 179=item Request status count 180 181To obtain the raw count of each request status: 182 183 SELECT squid_request_status, COUNT(*) AS n 184 FROM access_log 185 GROUP BY squid_request_status 186 ORDER BY 2 DESC; 187 188To calculate the percentage of each request status: 189 190 SELECT 191 squid_request_status, 192 (COUNT(*)/(SELECT COUNT(*) FROM access_log)*100) AS percentage 193 FROM access_log 194 GROUP BY squid_request_status 195 ORDER BY 2 DESC; 196 197To distinguish only between HITs and MISSes: 198 199 SELECT 200 'hits', 201 (SELECT COUNT(*) 202 FROM access_log 203 WHERE squid_request_status LIKE '%HIT%') 204 / 205 (SELECT COUNT(*) FROM access_log)*100 206 AS percentage 207 UNION 208 SELECT 209 'misses', 210 (SELECT COUNT(*) 211 FROM access_log 212 WHERE squid_request_status LIKE '%MISS%') 213 / 214 (SELECT COUNT(*) FROM access_log)*100 215 AS percentage; 216 217=item Response time ranges 218 219 SELECT 220 '0..500', 221 COUNT(*)/(SELECT COUNT(*) FROM access_log)*100 AS percentage 222 FROM access_log 223 WHERE time_response >= 0 AND time_response < 500 224 UNION 225 SELECT 226 '500..1000', 227 COUNT(*)/(SELECT COUNT(*) FROM access_log)*100 AS percentage 228 FROM access_log 229 WHERE time_response >= 500 AND time_response < 1000 230 UNION 231 SELECT 232 '1000..2000', 233 COUNT(*)/(SELECT COUNT(*) FROM access_log)*100 AS percentage 234 FROM access_log 235 WHERE time_response >= 1000 AND time_response < 2000 236 UNION 237 SELECT 238 '>= 2000', 239 COUNT(*)/(SELECT COUNT(*) FROM access_log)*100 AS percentage 240 FROM access_log 241 WHERE time_response >= 2000; 242 243=item Traffic by mime type 244 245 SELECT 246 http_mime_type, 247 SUM(http_reply_size) as total_bytes 248 FROM access_log 249 GROUP BY http_mime_type 250 ORDER BY 2 DESC; 251 252=item Traffic by client 253 254 SELECT 255 ip_client, 256 SUM(http_reply_size) AS total_bytes 257 FROM access_log 258 GROUP BY 1 259 ORDER BY 2 DESC; 260 261=back 262 263=head1 KNOWN ISSUES 264 265=head2 Speed issues 266 267The MyISAM storage engine is known to be faster than the InnoDB one, so although it doesn't support transactions and referential integrity, it might be more appropriate in this scenario. You might want to append "ENGINE=MYISAM" at the end of the table creation code in the above SQL script. 268 269Indexes should be created according to the queries that are more frequently run. The DDL script only creates an implicit index for the primary key column. 270 271=head2 Table cleanup 272 273This script currently implements only the C<L> (i.e. "append a line to the log") command, therefore the log lines are never purged from the table. This approach has an obvious scalability problem. 274 275One solution would be to implement e.g. the "rotate log" command in a way that would calculate some summary values, put them in a "summary table" and then delete the lines used to calculate those values. 276 277Similar cleanup code could be implemented in an external script and run periodically independently from squid log commands. 278 279=head2 Testing 280 281This script has only been tested in low-volume scenarios (single client, less than 10 req/s). Tests in high volume environments could reveal performance bottlenecks and bugs. 282 283=head1 AUTHOR 284 285This program was written by 286I<Marcello Romani <marcello.romani@libero.it>> , 287I<Amos Jeffries <amosjeffries@squid-cache.org>> 288 289=head1 COPYRIGHT 290 291 * Copyright (C) 1996-2021 The Squid Software Foundation and contributors 292 * 293 * Squid software is distributed under GPLv2+ license and includes 294 * contributions from numerous individuals and organizations. 295 * Please see the COPYING and CONTRIBUTORS files for details. 296 297Copyright (C) 2008 by Marcello Romani 298 299This library is free software; you can redistribute it and/or modify 300it under the same terms as Perl itself, either Perl version 5.8.8 or, 301at your option, any later version of Perl 5 you may have available. 302 303=head1 QUESTIONS 304 305Questions on the usage of this program can be sent to the I<Squid Users mailing list <squid-users@lists.squid-cache.org>> 306 307=head1 REPORTING BUGS 308 309Bug reports need to be made in English. 310See http://wiki.squid-cache.org/SquidFaq/BugReporting for details of what you need to include with your bug report. 311 312Report bugs or bug fixes using http://bugs.squid-cache.org/ 313 314Report serious security bugs to I<Squid Bugs <squid-bugs@lists.squid-cache.org>> 315 316Report ideas for new improvements to the I<Squid Developers mailing list <squid-dev@lists.squid-cache.org>> 317 318=head1 SEE ALSO 319 320squid (8), GPL (7), 321 322The Squid FAQ wiki http://wiki.squid-cache.org/SquidFaq 323 324The Squid Configuration Manual http://www.squid-cache.org/Doc/config/ 325 326=cut 327 328# the first argument to this script is the log file path describing the DSN 329my $log_file = shift; 330 331# others may be options 332my $debug = 0; 333GetOptions( 334 'debug' => \$debug, 335 ); 336 337 338# utility routine to print messages on stderr (so they appear in cache log) 339# without using warn, which would clutter the log with source line numbers 340sub log_info { 341 my $msg = shift; 342 print STDERR "$msg\n"; 343} 344 345# we use logfile to pass database access information to this script 346# sample configuration: 347# access_log daemon:/host/database/table/username/password squid 348# to let a parmeter unspecified, e.g. the database host, use a double slash: 349# access_log daemon://database/table/username/password squid 350my ( $host, $database, $table, $user, $pass ) = $log_file =~ / \/(.*?)\/(.*?)\/(.*?)\/(.*?)\/(.*?) \z /xms; 351 352if ( !$host ) { 353 $host = 'localhost'; 354 log_info("Database host not specified. Using $host."); 355} 356 357if ( !$database ) { 358 $database = 'squid_log'; 359 log_info("Database name not specified. Using $database."); 360} 361 362if ( !$table ) { 363 $table = 'access_log'; 364 log_info("Table parameter not specified. Using $table."); 365} 366 367if ( !$user ) { 368 $user = 'squid'; 369 log_info("User parameter not specified. Using $user."); 370} 371 372if ( !$pass ) { 373 log_info('No password specified. Connecting with NO password.'); 374} 375 376# fields that we should have in the table 377# Order here must match the order of fields in the Log format and parse() output array. 378my @db_fields = qw( 379 id 380 time_since_epoch 381 time_response 382 ip_client 383 squid_request_status 384 http_status_code 385 http_reply_size 386 http_method 387 http_url 388 http_username 389 squid_hier_status 390 ip_server 391 http_mime_type 392); 393 394# perform db connection 395my $dsn = "DBI:mysql:database=$database" . ($host ne "localhost" ? ":$host" : ""); 396my $dbh; 397my $sth; 398eval { 399 warn "Connecting... dsn='$dsn', username='$user', password='...'"; 400 $dbh = DBI->connect($dsn, $user, $pass, { AutoCommit => 1, RaiseError => 1, PrintError => 1 }); 401}; 402if ($EVAL_ERROR) { 403 die "Cannot connect to database: $DBI::errstr"; 404} 405 406 407# a simple test to assure the specified table exists 408eval { 409 my $q = 'SELECT ' . join(',',@db_fields) . " FROM $table LIMIT 1"; 410 my $sth = $dbh->prepare($q); 411 $sth->execute; 412}; 413if ($EVAL_ERROR) { 414 # run a query to create the table of required syntax 415 my $create_query = 'CREATE TABLE ' . $table . ' (' . 416 " id INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY," . 417 " time_since_epoch DECIMAL(15,3)," . 418 " time_response INTEGER," . 419 " ip_client CHAR(15)," . 420 " ip_server CHAR(15)," . 421 " http_status_code VARCHAR(10)," . 422 " http_reply_size INTEGER," . 423 " http_method VARCHAR(20)," . 424 " http_url TEXT," . 425 " http_username VARCHAR(20)," . 426 " http_mime_type VARCHAR(50)," . 427 " squid_request_status VARCHAR(50)," . 428 " squid_hier_status VARCHAR(20)" . 429 ");" ; 430 431 $sth = $dbh->prepare($create_query); 432 $sth->execute; 433 # test again and fail hard if it is still broken. 434 eval { 435 my $q = 'SELECT ' . join(',',@db_fields) . " FROM $table LIMIT 1"; 436 my $sth = $dbh->prepare($q); 437 $sth->execute; 438 }; 439 if ($EVAL_ERROR) { 440 die "Error initializing database table: $EVAL_ERROR"; 441 }; 442} 443# test 444 445# for better performance, prepare the statement at startup 446eval { 447 my $q = "INSERT INTO $table (" . join(',',@db_fields) . ") VALUES(NULL" . ',?' x (scalar(@db_fields)-1) . ')'; 448 #$sth = $dbh->prepare("INSERT INTO $table VALUES(NULL,?,?,?,?,?,?,?,?,?,?,?,?)"); 449 $sth = $dbh->prepare($q); 450}; 451if ($EVAL_ERROR) { 452 die "Error while preparing sql statement: $EVAL_ERROR"; 453} 454 455sub parse($) { 456 my ($line) = @_; 457 my (@t) = $line =~ /^L(\d+\.\d+) *(\d+?) (.*?) (.*?)\/(\d+?) (\d+?) (.*?) (.*?) (.*?) (.*?)\/(.*?) (.*)$/; 458} 459 460# main loop 461while (my $line = <>) { 462 chomp $line; 463 464 my $cmd = substr($line, 0, 1); # extract command byte 465 466 if ( $cmd eq 'L' ) { 467 my @log_entry = parse($line); 468 eval { # we catch db errors to avoid crashing squid in case something goes wrong... 469 $sth->execute(@log_entry) or die $sth->errstr 470 }; 471 if ( $EVAL_ERROR ) { # leave a trace of the error in the logs 472 warn $EVAL_ERROR . " values=(" . join(', ', @log_entry) . ')'; 473 } 474 } 475} 476 477$dbh->disconnect(); 478 479__END__ 480