1use strict; 2#use warnings; # Not a default module in perl 5.005 3 4use vars qw($VERSION %IRSSI); 5 6$VERSION = '1.2'; 7%IRSSI = ( 8 authors => 'bwolf', 9 contact => 'bwolf@geekmind.org', 10 name => 'urlplot', 11 description => 'URL grabber with HTML generation and cmd execution', 12 license => 'BSD', 13 url => 'http://www.geekmind.net', 14 changed => 'Sun Jun 16 14:00:13 CEST 2002' 15); 16 17# To read the documentation you may use one of the following commands: 18# 19# pod2man urlplot.pl | nroff -man | more 20# pod2text urlplot.pl | more 21# pod2man urlplot.pl | troff -man -Tps -t > urlplot.ps 22 23=head1 NAME 24 25urlplot 26 27=head1 SYNOPSIS 28 29All URL loggers suck. This one just sucks less. 30 31=head1 DESCRIPTION 32 33urlplot watches your channels for URLs and creates nice HTML logfiles of it. 34Actually it parses normal text and topic changes for URLs. Internally it uses 35two caches to prevent flooding and logging of duplicate URLs. As an additional 36feature urlplot can create CSV datafiles. Logfiles can be created for all 37channels and for separate channels. Logging can be allowed and denied on a per 38channel/nick basis. A lockfile is used to protect the caches and logfiles from 39accessing them by multiple irssi instances. A command allows you to send a 40logged URL to your webbrowser of choice. 41 42The format of the CSV logfiles is as follows: 43date nick channel url 44 45=head1 GETTING STARTED 46 47Copy urlplot.pl intoF< $HOME/.irssi/scripts> and create the necessary 48directories withC< mkdir -p>F< $HOME/.irssi/urlplot/urls>. 49Look for the settingsC< url_log_basedir> andC< url_db_basedir> if you want to 50change the directories urlplot will populate with files. 51Follow the documentation and configure urlplot to fit your needs. 52 53=head1 COMMANDS 54 55=head2 /url <integer> 56 57Executes the commandC< url_command> with an URL from the cache as its 58argument. If no number has been specified it defaults to nth URL logged which 59references the most recently logged URL. 60 61=head2 /url -list 62 63Displays a list of all logged URLs. 64 65=head2 /url -clearcache 66 67Clears the cache databases. 68 69=head /url -showlog 70 71ExecutesC< url_command> withC< url_navigate> as its argument. It can be used 72to display the main logfile in your favourite webbrowser. 73 74=head1 SETTINGS 75 76=head2 Pathnames 77 78Please note that you can't use $HOME or any environment variables in the 79settings because irssi/urlplot isn't a shell ;) 80 81=head2 /set url_command <string> 82 83Command to be executed to display an URL (see /url). The command string should 84contain the sequence C<__URL__> which will be replaced by a certain URL. 85 86The default is: 87C< mozilla -remote "openURL(__URL__)" E<gt> /dev/null 2E<gt>&1 || \ > 88C< mozilla "__URL__"& > 89 90This will send a certain URL to mozilla or it will start mozilla if it is not 91already there. The string can be anything. For example I use the following: 92C< ssh host /home/user/bin/mozopenurl "'__URL__'" >/dev/null 2>&1 &> 93where mozopenurl is a shell script that contains similar code as the mozilla 94-remote example above. 95 96=head2 /set url_cache_max <integer> 97 98Specifies the maximum count of items which will be held in the persisten URL 99caches. A value of zero disables automatic cache resizing (round-robbin). The 100default is to keep the last 90 URLs. 101 102=head2 /set url_log_basedir <path> 103 104Specifies the logging base directory used to create the log files beneath it. 105The default isF< $HOME/.irssi/urlplot/urls/>. You have to create directories 106by yourself:C< mkdir -p>F< $HOME/.irssi/urlplot/urls>. 107 108=head2 /set url_log_file_name <relative-filename> 109 110Defines the filename of the full logfile. It will be passed to I< 111strftime(3)>. This can be usefull to create logfiles with a timestamp. 112The file will be created relative toC< url_log_basedir>. The default 113isF< ircurls.html>. 114 115=head2 /set url_chan_prefix <string> 116 117Defines the filename prefix for channel logfiles. The leadingC< # >of the 118channel name will be replaced by this prefix. It will be passed to 119I<strftime(3)>. The file will be created relative toC< url_log_basedir>. The 120default isF< chan_>. 121 122=head2 /set url_chan_logging <bool> 123 124Enables or disable channel logging globally. 125The default isC< ON>. 126 127=head2 /set url_log_csv_file_name <relative-filename> 128 129Defines the filename of the full CSV logfile. It will be passed to 130I<strftime(3)>. The file will be created relative toC< url_log_basedir>. The 131default isF< ircurls.csv>. 132 133=head2 /set url_log_csv_file_max_size <integer> 134 135Defines the maximum size of the full CSV logfile. If it reaches the specified 136maximum size in bytes it will be simply resized to zero. The default isC< 30*1024> 137bytes. 138 139=head2 /set url_log_csv_separator <string> 140 141Defines the separator used as a delimeter for the fields of the CSV files. 142The default isC< |>. 143 144=head2 /set url_csv_logging <bool> 145 146Conditionally turns on or off CSV logging for the full logfile. The default 147isC< OFF>. 148 149=head2 /set url_csv_chan_logging <bool> 150 151Conditionally turns on or off CSV logging of the channel logfiles. The default isC< OFF>. 152 153=head2 /set url_time_format <string> 154 155Specifies the time format that will be passed toI< strftime(3)> to produce an 156ASCII representation of the time/date when an URL was grabbed. It will be used 157in the logfiles. The default isC< %Y:%m:%d - %H:%M:%S>. 158 159=head2 /set url_log_file_max_size <integer> 160 161Defines the maximum size of the full logfile and the channel logfile. If it 162reaches the specified maximum size in bytes it will be simply resized to zero. 163The default isC< 30*1024> bytes. 164 165=head2 /set url_log_file_autoreload_time <integer> 166 167Intervall in seconds used for the HTML logfile header. The logfile reloads 168itself every N seconds. The default isC< 90> seconds. 169 170=head2 /set url_db_basedir <path> 171 172Specifies the database base directory where two database files and a lockfile 173will be created. The default isF< $HOME/.irssi/urlplot>. You have to create 174the directory by yourself. 175 176=head2 /set url_db_cache_a_filename <relative-filename> 177 178Defines the filename of the index URL database. The file will be created 179relative toC< url_db_basedir>. The default isF< a_cache>. 180 181=head2 /set url_db_cache_h_filename <relative-filename> 182 183Defines the filename of the hash URL database. The file will be created 184relative toC< url_db_basedir>. The default isF< h_cache>. 185 186=head2 /set url_db_lock_filename <relative-filename> 187 188Defines the filename of the lockfile used to lock all logfiles and the cache 189databases. It will be created relative toC< url_db_basedir>. The default 190isF< lockfile>. 191 192=head2 /set url_policy_default <allow|deny> 193 194Specifies the default policy that will be used to decide if logging ist 195permitted for a certain nick or channel. This can be eitherC< allow> 196orC< deny>. If you set this toC< deny> you will have to allow explicitly those 197channels and nicks for which logging should be permitted. In contrast if you 198set it to allow, you can deny logging for certain nicks and channels. 199The keysC< url_policy_chans> andC< url_policy_nicks> control the allow, deny 200behaviour depending onC< url_policy_default>. The default isC< allow> which 201permits logging of all channels and nicks. 202 203=head2 /set url_policy_chans <string> 204 205Specifies those channels for whoom logging is permitted or denied. Multiple 206channels may be specified by usingC< ,>C< ;>C< :> or a space to separate the 207items. 208 209=head2 /set url_policy_nicks <string> 210 211SeeC< url_policy_chans> and replace the word channel by nick. 212 213=head2 /set url_navigate <string> 214 215ExecutesC< url_command> withC< url_navigate> as its argument. It can be used 216to display the main logfile in your favourite webbrowser. Because you may pass 217this command at anytime to your webbrowser it will not be passed to strftime. 218Thus you can only specify a static file here. 219 220=head1 AUTHOR 221 222Marcus Geiger <bwolf@geekmind.org> 223 224=cut 225 226use integer; 227use Irssi; 228use POSIX qw(strftime); 229use Fcntl qw(:DEFAULT :flock); 230use DB_File; 231 232# Regexps 233sub URL_SCHEME_REGEX() { '(http|ftp|https|news|irc)' } 234sub URL_GUESS_REGEX() { '(www|ftp)' } 235sub URL_BASE_REGEX() { '[a-z0-9_\-+\\/:?%.&!~;,=\#<>]' } 236 237# Other 238sub BACKWARD_SEEK_BYTES() { 130 } 239sub LOG_FILE_MARKER() { '<!-- bottom-line -->' } 240 241# Keys for settings 242sub KEY_URL_COMMAND() { 'url_command' } 243sub KEY_URL_CACHE_MAX() { 'url_cache_max' } 244sub KEY_URL_LOG_BASEDIR() { 'url_log_basedir' } 245sub KEY_URL_LOG_FILE_NAME() { 'url_log_file_name' } 246sub KEY_URL_CHAN_PREFIX() { 'url_chan_prefix' } 247sub KEY_URL_CHAN_LOGGING() { 'url_chan_logging' } 248sub KEY_URL_LOG_CSV_FILE_NAME() { 'url_log_csv_file_name' } 249sub KEY_URL_LOG_CSV_FILE_MAX_SIZE() { 'url_log_csv_file_max_size' } 250sub KEY_URL_LOG_CSV_SEPARATOR() { 'url_log_csv_separator' } 251sub KEY_URL_CSV_LOGGING() { 'url_csv_logging' } 252sub KEY_URL_CSV_CHAN_LOGGING() { 'url_csv_chan_logging' } 253sub KEY_URL_TIME_FORMAT() { 'url_time_format' } 254sub KEY_URL_LOG_FILE_MAX_SIZE() { 'url_log_file_max_size' } 255sub KEY_URL_LOG_FILE_AUTORELOAD_TIME() { 'url_log_file_autoreload_time' } 256sub KEY_URL_DB_BASEDIR() { 'url_db_basedir' } 257sub KEY_URL_DB_CACHE_A_FILENAME() { 'url_db_cache_a_filename' } 258sub KEY_URL_DB_CACHE_H_FILENAME() { 'url_db_cache_h_filename' } 259sub KEY_URL_DB_LOCK_FILENAME() { 'url_db_lock_filename' } 260sub KEY_URL_POLICY_DEFAULT() { 'url_policy_default' } 261sub KEY_URL_POLICY_CHANS() { 'url_policy_chans' } 262sub KEY_URL_POLICY_NICKS() { 'url_policy_nicks' } 263sub KEY_URL_NAVIGATE() { 'url_navigate' } 264 265# Defaults 266sub DEF_URL_COMMAND() { 267 'mozilla -remote "openURL(__URL__)" > /dev/null 2>&1 || mozilla "__URL__"&' } 268sub DEF_URL_CACHE_MAX() { 90 } 269sub DEF_URL_LOG_FILE_AUTORELOAD_TIME() { 120 } 270sub DEF_URL_TIME_FORMAT() { '%Y:%m:%d - %H:%M:%S' } 271sub DEF_URL_DO_FILE_RESIZE() { '0' } 272sub DEF_URL_LOG_FILE_MAX_SIZE() { 1024 * 30 } 273sub DEF_URL_LOG_BASEDIR() { '.irssi/urlplot/urls/' } 274sub DEF_URL_LOG_FILE_NAME() { 'ircurls.html' } 275sub DEF_URL_CHAN_PREFIX() { 'chan_' } 276sub DEF_URL_CHAN_LOGGING() { '1' } 277sub DEF_URL_LOG_CSV_FILE_NAME() { 'ircurls.csv' } 278sub DEF_URL_LOG_CSV_FILE_MAX_SIZE() { 1024 * 30 } 279sub DEF_URL_LOG_CSV_SEPARATOR() { '|' } 280sub DEF_URL_CSV_LOGGING() { '' } 281sub DEF_URL_CSV_CHAN_LOGGING() { '' } 282sub DEF_URL_DB_BASEDIR() { '.irssi/urlplot/' } 283sub DEF_URL_DB_CACHE_A_FILENAME() { 'a_cache' } 284sub DEF_URL_DB_CACHE_H_FILENAME() { 'h_cache' } 285sub DEF_URL_DB_LOCK_FILENAME() { 'lockfile' } 286sub DEF_URL_POLICY_DEFAULT() { 'allow' } 287sub DEF_URL_POLICY_CHANS() { '' } 288sub DEF_URL_POLICY_NICKS() { '' } 289sub DEF_URL_NAVIGATE() { '.irssi/urlplot/urls/ircurls.html' } 290 291sub print_full_log_file_template { 292 my ($fh, $reload) = @_; 293 print $fh <<EOT; 294<?xml version="1.0" encoding="iso-8859-1"?> 295 <!DOCTYPE html 296 PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" 297 "DTD/xhtml1-strict.dtd"> 298<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> 299 <head> 300 <title>IRC-URLs</title> 301 <meta http-equiv="cache-control" content="no-cache" /> 302 <meta http-equiv="refresh" content="$reload;" /> 303 <style type="text/css"> 304 <!-- 305 .small { font-size: small; } 306 .xsmall { font-size: x-small; } 307 --> 308 </style> 309 </head> 310 <body> 311 <h1>IRC-URLs</h1> 312 <p class="xsmall"> 313 Visit <a href="http://www.geekmind.net">geekmind.net</a> 314 </p> 315 <p>This page reloads itself every $reload seconds.</p> 316 <p> 317 <a name="top" /> 318 <a class="small" href="#bottom">Page bottom</a> 319 <br /> 320 <br /> 321 </p> 322 <table rules="rows" frame="void" width="100%" cellpadding="5"> 323 <tr align="left"> 324 <th><b>Date/Time</b></th> 325 <th><b>Nick</b></th> 326 <th><b>Channel/Nick</b></th> 327 <th><b>URL</b></th> 328 </tr> 329EOT 330} 331 332sub print_chan_log_file_template { 333 my ($fh, $reload, $channel, $full_log) = @_; 334 print $fh <<EOT; 335<?xml version="1.0" encoding="iso-8859-1"?> 336 <!DOCTYPE html 337 PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" 338 "DTD/xhtml1-strict.dtd"> 339<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> 340 <head> 341 <title>IRC-URLs of $channel</title> 342 <meta http-equiv="cache-control" content="no-cache" /> 343 <meta http-equiv="refresh" content="$reload;" /> 344 <style type="text/css"> 345 <!-- 346 .small { font-size: small; } 347 .xsmall { font-size: x-small; } 348 --> 349 </style> 350 </head> 351 <body> 352 <h1>IRC-URLs of $channel</h1> 353 <p class="xsmall"> 354 Visit <a href="http://www.geekmind.net">geekmind.net</a> 355 </p> 356 <p>This page reloads itself every $reload seconds.</p> 357 <p><a href="$full_log">Complete</a> listing.</p> 358 <p> 359 <a name="top" /> 360 <a class="small" href="#bottom">Page bottom</a> 361 <br /> 362 <br /> 363 </p> 364 <table rules="rows" frame="void" width="100%" cellpadding="5"> 365 <tr align="left"> 366 <th><b>Date/Time</b></th> 367 <th><b>Nick</b></th> 368 <th><b>URL</b></th> 369 </tr> 370EOT 371} 372 373sub LOG_FILE_TAIL () { 374 return <<"EOT"; 375 376 @{[ LOG_FILE_MARKER ]} 377 </table> 378 <p> 379 <a class="small" href="#top">Page top</a> 380 <a name="bottom" /> 381 </p> 382 </body> 383</html> 384EOT 385} 386 387sub print_chan_log_file_entry { 388 my ($fh, $date, $nick, $channel, $url) = @_; 389 print $fh <<EOURL; 390 <tr> 391 <td>$date</td> 392 <td><em>$nick</em></td> 393 <td><a href=\"$url\">$url</a></td> 394 </tr> 395EOURL 396 print $fh LOG_FILE_TAIL; 397}; 398 399sub print_full_log_file_entry { 400 my ($fh, $date, $nick, $channel, $chan_log, $url) = @_; 401 print $fh <<EOURL; 402 <tr> 403 <td>$date</td> 404 <td><em>$nick</em></td> 405 <td><a href="$chan_log">$channel</a></td> 406 <td><a href=\"$url\">$url</a></td> 407 </tr> 408EOURL 409 print $fh LOG_FILE_TAIL; 410} 411 412sub p_error { # Error printing (directly to the current window) 413 Irssi::print("urlplot: @_"); 414} 415 416sub p_normal { # Normal printing (to the msg window) 417 Irssi::print("@_", MSGLEVEL_MSGS+MSGLEVEL_NOHILIGHT); 418} 419 420sub scan_url { 421 my $rawtext = shift; 422 return $1 if $rawtext =~ m|(@{[ URL_SCHEME_REGEX ]}://@{[ URL_BASE_REGEX ]}+)|io; 423 # The URL misses a scheme, try to be smart 424 if ($rawtext =~ m|@{[ URL_GUESS_REGEX ]}\.@{[ URL_BASE_REGEX ]}+|io) { 425 my $preserve = $&; 426 return "http://$preserve" if $1 =~ /^www/; 427 return "ftp://$preserve" if $1 =~ /^ftp/; 428 } 429 return undef; 430} 431 432sub aquire_lock { 433 my $db_base = Irssi::settings_get_str(KEY_URL_DB_BASEDIR) 434 || die "missing setting for @{[ KEY_URL_DB_BASEDIR ]}"; 435 my $lockfile = Irssi::settings_get_str(KEY_URL_DB_LOCK_FILENAME) 436 || die "missing setting for @{[ KEY_URL_DB_LOCK_FILENAME ]}"; 437 438 local *LOCK_F; 439 my $fh; 440 $db_base .= '/' if $db_base !~ m#/$#; 441 $lockfile = "${db_base}${lockfile}"; 442 443 die "directory $db_base doesn't exist or isn't readable" 444 unless -d $db_base and -r $db_base; 445 446 sysopen(LOCK_F, $lockfile, O_RDONLY | O_CREAT) 447 || die "can't open/create lockfile $lockfile: $!"; 448 flock(LOCK_F, LOCK_EX | LOCK_NB) 449 || die "can't exclusively lock $lockfile: $!"; 450 # Can't pass back localized typeglob reference 451 $fh = *LOCK_F; 452 return $fh; 453} 454 455sub open_caches { 456 my $db_base = Irssi::settings_get_str(KEY_URL_DB_BASEDIR) 457 || die "missing setting for @{[ KEY_URL_DB_BASEDIR ]}"; 458 my $dbfile_a = Irssi::settings_get_str(KEY_URL_DB_CACHE_A_FILENAME) 459 || die "missing setting for @{[ KEY_URL_DB_CACHE_A_FILENAME ]}"; 460 my $dbfile_h = Irssi::settings_get_str(KEY_URL_DB_CACHE_H_FILENAME) 461 || die "missing setting for @{[ KEY_URL_DB_CACHE_H_FILENAME ]}"; 462 463 my (@cache, %cache); 464 $db_base .= '/' if $db_base !~ m#/$#; 465 $dbfile_a = "${db_base}${dbfile_a}"; 466 $dbfile_h = "${db_base}${dbfile_h}"; 467 468 die "directory $db_base doesn't exist or isn't readable" 469 unless -d $db_base and -r $db_base; 470 471 tie @cache, 'DB_File', $dbfile_a, O_RDWR | O_CREAT, 0666, $DB_RECNO 472 or die "can't tie urlcache db $dbfile_a: $!"; 473 tie %cache, 'DB_File', $dbfile_h, O_RDWR | O_CREAT, 0666 474 or die "can't tie urlcache db $dbfile_h: $!"; 475 return \(@cache, %cache); 476} 477 478sub create_chan_template { 479 my ($full_log, $file, $channel) = @_; 480 my $reload = Irssi::settings_get_int(KEY_URL_LOG_FILE_AUTORELOAD_TIME); 481 local *FH; 482 open(FH, ">", $file) 483 || die "can't create logfile $file: $!"; 484 print_chan_log_file_template(\*FH, $reload, $channel, $full_log); 485 print FH LOG_FILE_TAIL; 486 close(FH); 487} 488 489sub create_full_template { 490 my $file = shift; 491 my $reload = Irssi::settings_get_int(KEY_URL_LOG_FILE_AUTORELOAD_TIME); 492 local *FH; 493 open(FH, ">", $file) 494 || die "can't create logfile $file: $!"; 495 print_full_log_file_template(\*FH, $reload); 496 print FH LOG_FILE_TAIL; 497 close(FH); 498} 499 500sub create_csv_file { 501 my $file = shift; 502 open(FH, ">", $file) 503 || die "can't create $file: $!"; 504 close FH; 505} 506 507sub log_csv { 508 my $csv_log = shift; 509 my $sep = Irssi::settings_get_str(KEY_URL_LOG_CSV_SEPARATOR); 510 my $fields = join $sep, @_; 511 local *FH; 512 open(FH, ">>", $csv_log) 513 || die "can't open $csv_log: $!"; 514 print FH "$fields\n"; 515 close FH; 516} 517 518sub position_log_file { 519 my $file = shift; 520 my ($fh, $pos, $buf, @lines, $off, $got_it); 521 local *FH; 522 my $hint = "Conside manual removal of this file"; 523 sysopen(FH, $file, O_RDWR) 524 || die "can't open $file: $!"; 525 $pos = sysseek(FH, 0, 2) 526 || die "can't seek to EOF in $file. ${hint}: $!"; 527 $pos -= BACKWARD_SEEK_BYTES; 528 sysseek(FH, $pos, 0) 529 || die "can't seek backwards to $pos in $file. ${hint}: $!"; 530 sysread(FH, $buf, 2048) 531 || die "can't read rest of $file. ${hint}: $!"; 532 $off = 0; 533 @lines = split /\n/, $buf; 534 for (@lines) { 535 $off += length; 536 $off += 1; 537 chomp; 538 next if /^$/; 539 if (/@{[ LOG_FILE_MARKER ]}/io) { 540 $got_it = 1; 541 $off -= length; 542 $off -= 1; 543 last; 544 } 545 } 546 die "Can't locate @{[ LOG_FILE_MARKER ]} in $file. ${hint}" 547 unless $got_it; 548 $pos += $off; 549 sysseek(FH, $pos, 0) 550 || die "Can't seek to $pos in $file. ${hint}: $!"; 551 # Can't pass back localized typeglob reference 552 $fh = *FH; 553 return $fh; 554} 555 556sub log_url { 557 my ($nick, $channel, $url) = @_; 558 my $log_base = Irssi::settings_get_str(KEY_URL_LOG_BASEDIR) 559 || die "missing setting for @{[ KEY_URL_LOG_BASEDIR ]}"; 560 my $fullfile = Irssi::settings_get_str(KEY_URL_LOG_FILE_NAME) 561 || die "missing setting for @{[ KEY_URL_LOG_FILE_NAME ]}"; 562 my $csvfile = Irssi::settings_get_str(KEY_URL_LOG_CSV_FILE_NAME) 563 || die "missing setting for @{[ KEY_URL_LOG_CSV_FILE_NAME ]}"; 564 my $csv_max = Irssi::settings_get_int(KEY_URL_LOG_CSV_FILE_MAX_SIZE); 565 my $csv_logging = Irssi::settings_get_bool(KEY_URL_CSV_LOGGING); 566 my $csv_chan_logging = Irssi::settings_get_bool(KEY_URL_CSV_CHAN_LOGGING); 567 my $time_fmt = Irssi::settings_get_str(KEY_URL_TIME_FORMAT) 568 || die "missing setting for @{[ KEY_URL_TIME_FORMAT ]}"; 569 my $max = Irssi::settings_get_int(KEY_URL_LOG_FILE_MAX_SIZE); 570 my $chan_prefix = Irssi::settings_get_str(KEY_URL_CHAN_PREFIX) 571 || die "missing setting for @{[ KEY_URL_CHAN_PREFIX ]}"; 572 my $chan_logging = Irssi::settings_get_bool(KEY_URL_CHAN_LOGGING); 573 574 my @curr_time = localtime(time()); 575 $log_base .= '/' if $log_base !~ m#/$#; 576 577 die "directory $log_base doesn't exist or isn't readable" 578 unless -d $log_base and -r $log_base; 579 580 # Make channel filename 581 my $tmp = POSIX::strftime($chan_prefix, @curr_time); 582 my $chan_fname = lc $channel; 583 $chan_fname =~ s/^#/$tmp/; 584 my $chan_log = "${log_base}${chan_fname}.html"; 585 586 # Make full filename 587 $tmp = POSIX::strftime($fullfile, @curr_time); 588 my $full_fname = $tmp; 589 my $full_log = $log_base . $tmp; 590 591 # Replace spaces in date string to show up as ' ' to prevent line 592 # breaks. 593 my $date = POSIX::strftime($time_fmt, @curr_time); 594 my $html_date = $date; 595 $html_date =~ s/ /\ /g; 596 597 my $fh; 598 599 # Channel logging 600 if ($chan_logging) { 601 create_chan_template $full_fname, $chan_log, $channel 602 if not -r $chan_log or ($max > 0 and (stat($chan_log))[7] > $max); 603 $fh = undef; 604 $fh = position_log_file $chan_log; 605 print_chan_log_file_entry($fh, $html_date, $nick, $channel, $url); 606 close $fh; 607 } 608 609 # Full logging 610 create_full_template $full_log 611 if not -r $full_log or ($max > 0 and (stat($full_log))[7] > $max); 612 $fh = undef; 613 $fh = position_log_file $full_log; 614 print_full_log_file_entry($fh, $html_date, $nick, $channel, 615 "${chan_fname}.html", $url); 616 close $fh; 617 618 # CSV logging 619 if ($csv_logging) { 620 $tmp = POSIX::strftime($csvfile, @curr_time); 621 my $log = $log_base . $tmp; 622 create_csv_file $log 623 if not -r $log or ($csv_max > 0 and (stat($log))[7] > $max); 624 log_csv($log, $date, $nick, $channel, $url); 625 } 626 627 # CSV channel logging 628 if ($csv_chan_logging) { 629 my $log = "${log_base}${chan_fname}.csv"; 630 create_csv_file $log 631 if not -r $log or ($csv_max > 0 and (stat($log))[7] > $max); 632 log_csv($log, $date, $nick, $channel, $url); 633 } 634} 635 636sub mk_home($) { 637 my $arg = shift; 638 return "$ENV{HOME}/$arg"; 639} 640 641sub logging_permited { 642 my ($nick, $chan_or_nick) = @_; 643 my $default_policy = Irssi::settings_get_str(KEY_URL_POLICY_DEFAULT) 644 || die "missing setting for @{[ KEY_URL_POLICY_DEFAULT ]}"; 645 my $chans = Irssi::settings_get_str(KEY_URL_POLICY_CHANS); 646 my $nicks = Irssi::settings_get_str(KEY_URL_POLICY_NICKS); 647 my @policy_chans = split /[,;: ]/, $chans; 648 my @policy_nicks = split /[,;: ]/, $nicks; 649 my $permit; 650 651 if ($default_policy eq 'deny') { 652 # logging must be explicitly permited 653 $permit = 0; 654 for (@policy_chans) { 655 return 1 if $_ eq $chan_or_nick; 656 } 657 for (@policy_nicks) { 658 return 1 if $_ eq $nick; 659 } 660 } elsif ($default_policy eq 'allow') { 661 # logging must be explicitly denied 662 $permit = 1; 663 for (@policy_chans) { 664 return 0 if $_ eq $chan_or_nick; 665 } 666 for (@policy_nicks) { 667 return 0 if $_ eq $nick; 668 } 669 } else { 670 p_error("setting @{[ KEY_URL_POLICY_DEFAULT ]} can be either " . 671 "'allow' or 'deny'"); 672 return undef; 673 } 674 return $permit; 675} 676 677sub do_locked { 678 my $f = shift or die "missing function argument " . caller; 679 my $lockf; 680 eval { $lockf = aquire_lock }; 681 if ($@) { 682 p_error("$@"); 683 return; 684 } 685 eval { $f->(@_) }; 686 p_error("$@") if $@; 687 eval { close $lockf }; 688} 689 690sub do_with_caches { 691 my $f = shift or die "missing function argument " . caller; 692 my ($cache_a, $cache_h) = (); 693 eval { ($cache_a, $cache_h) = open_caches }; 694 if ($@) { 695 p_error("$@"); 696 eval { untie %$cache_h } if defined $cache_h; 697 eval { untie @$cache_a } if defined $cache_a; 698 return; 699 } 700 eval { $f->($cache_a, $cache_h, @_) }; 701 p_error("$@") if $@; 702 eval { untie %$cache_h }; 703 eval { untie @$cache_a }; 704} 705 706sub url_msg_log { 707 my ($cache_a, $cache_h, $nick, $chan_or_nick, $url) = @_; 708 my ($cache_size, $tmp); 709 my $max_cache = Irssi::settings_get_int(KEY_URL_CACHE_MAX); 710 711 unless (exists $cache_h->{$url}) { 712 $cache_size = scalar(@$cache_a) + 1; 713 $cache_h->{$url} = '1'; 714 # push the URL to the end of the file seems to work better on 715 # some systems in contrast to unshift. 716 push @$cache_a, $url; 717 if ($max_cache > 0 && $cache_size > $max_cache) { 718 $tmp = shift @$cache_a; 719 delete $cache_h->{$tmp}; 720 } 721 log_url($nick, $chan_or_nick, $url); 722 } 723} 724 725sub url_topic { 726 my ($server, $channel, $topic, $nick, $hostmask) = @_; 727 url_message($server, $topic, $nick, $hostmask, $channel); 728} 729 730sub url_message { 731 my ($server, $rawtext, $nick, $hostmask, $channel) = @_; 732 my ($url, $permit, $chan_or_nick); 733 734 if (defined($url = scan_url($rawtext))) { 735 $chan_or_nick = defined $channel ? $channel : $server->{nick}; 736 if (defined($permit = logging_permited($nick, $chan_or_nick)) && $permit) { 737 do_locked(\&do_with_caches, \&url_msg_log, $nick, $chan_or_nick, $url); 738 } 739 } 740} 741 742sub url_cmd_show { 743 my ($cache_a, $cache_h) = @_; 744 my $n = 0; 745 p_normal("urlplot: total of " . scalar(@$cache_a) . " URLs"); 746 foreach my $url (@$cache_a) { 747 p_normal(sprintf("%02d - %s", $n++, $url)); 748 } 749} 750 751sub url_cmd_clearcaches { 752 my ($cache_a, $cache_h) = @_; 753 @$cache_a = (); 754 %$cache_h = (); 755} 756 757sub url_cmd_real_navigate { 758 my ($url) = @_; 759 die 'no URLs captured so far' unless $url; 760 my $url_cmd = Irssi::settings_get_str(KEY_URL_COMMAND) 761 || die "missing setting for @{[ KEY_URL_COMMAND ]}"; 762 unless ($url_cmd =~ s/__URL__/$url/g) { 763 die "setting url_cmd doesn't contain an URL placeholder '__URL__'"; 764 } 765 system($url_cmd); 766} 767 768sub url_cmd_navigate { 769 my ($cache_a, $cache_h, $n) = @_; 770 my ($len, $url) = scalar @$cache_a; 771 unless (defined $n) { 772 $n = $len > 0 ? $len - 1 : $len; 773 } 774 die "no such URL; I've only $len" unless $n < $len; 775 $url = $cache_a->[$n]; 776 die 'no URLs captured so far' unless $url; 777 url_cmd_real_navigate $url; 778} 779 780sub url_command { 781 my ($data, $server, $witem) = @_; 782 $_ = $data; 783 if (/^-list/) { 784 do_locked(\&do_with_caches, \&url_cmd_show); 785 } elsif (/^-clearcache/) { 786 do_locked(\&do_with_caches, \&url_cmd_clearcaches); 787 } elsif (/^-showlog/) { 788 my $nav_url = Irssi::settings_get_str(KEY_URL_NAVIGATE) 789 || die "missing setting for @{[ KEY_URL_NAVIGATE ]}"; 790 url_cmd_real_navigate $nav_url; 791 } else { 792 my $n; 793 if (/^(\d+)/) { 794 $n = $1; 795 if ($n < 0) { 796 p_error("argument must be a positive integer"); 797 return; 798 } 799 } elsif (/^$/) { 800 $n = undef; 801 } else { 802 p_error("usage for /url [-list|-showlog|-clearcache|<digit>]"); 803 return; 804 } 805 do_locked(\&do_with_caches, \&url_cmd_navigate, $n); 806 } 807} 808 809Irssi::signal_add_last('message public', 'url_message'); 810Irssi::signal_add_last('message private', 'url_message'); 811Irssi::signal_add_last('message topic', 'url_topic'); 812Irssi::command_bind('url', 'url_command'); 813 814Irssi::settings_add_str('misc', KEY_URL_COMMAND, DEF_URL_COMMAND); 815Irssi::settings_add_int('misc', KEY_URL_CACHE_MAX, DEF_URL_CACHE_MAX); 816Irssi::settings_add_str('misc', KEY_URL_LOG_BASEDIR, mk_home(DEF_URL_LOG_BASEDIR)); 817Irssi::settings_add_str('misc', KEY_URL_LOG_FILE_NAME, DEF_URL_LOG_FILE_NAME); 818Irssi::settings_add_str('misc', KEY_URL_CHAN_PREFIX, DEF_URL_CHAN_PREFIX); 819Irssi::settings_add_bool('misc', KEY_URL_CHAN_LOGGING, DEF_URL_CHAN_LOGGING); 820Irssi::settings_add_str('misc', KEY_URL_LOG_CSV_FILE_NAME, DEF_URL_LOG_CSV_FILE_NAME); 821Irssi::settings_add_int('misc', KEY_URL_LOG_CSV_FILE_MAX_SIZE, DEF_URL_LOG_CSV_FILE_MAX_SIZE); 822Irssi::settings_add_str('misc', KEY_URL_LOG_CSV_SEPARATOR, DEF_URL_LOG_CSV_SEPARATOR); 823Irssi::settings_add_bool('misc', KEY_URL_CSV_LOGGING, DEF_URL_CSV_LOGGING); 824Irssi::settings_add_bool('misc', KEY_URL_CSV_CHAN_LOGGING, DEF_URL_CSV_CHAN_LOGGING); 825Irssi::settings_add_str('misc', KEY_URL_TIME_FORMAT, DEF_URL_TIME_FORMAT); 826Irssi::settings_add_int('misc', KEY_URL_LOG_FILE_MAX_SIZE, DEF_URL_LOG_FILE_MAX_SIZE); 827Irssi::settings_add_int('misc', KEY_URL_LOG_FILE_AUTORELOAD_TIME, 828 DEF_URL_LOG_FILE_AUTORELOAD_TIME); 829Irssi::settings_add_str('misc', KEY_URL_DB_BASEDIR, mk_home(DEF_URL_DB_BASEDIR)); 830Irssi::settings_add_str('misc', KEY_URL_DB_CACHE_A_FILENAME, DEF_URL_DB_CACHE_A_FILENAME); 831Irssi::settings_add_str('misc', KEY_URL_DB_CACHE_H_FILENAME, DEF_URL_DB_CACHE_H_FILENAME); 832Irssi::settings_add_str('misc', KEY_URL_DB_LOCK_FILENAME, DEF_URL_DB_LOCK_FILENAME); 833 834Irssi::settings_add_str('misc', KEY_URL_POLICY_DEFAULT, DEF_URL_POLICY_DEFAULT); 835Irssi::settings_add_str('misc', KEY_URL_POLICY_CHANS, DEF_URL_POLICY_CHANS); 836Irssi::settings_add_str('misc', KEY_URL_POLICY_NICKS, DEF_URL_POLICY_NICKS); 837Irssi::settings_add_str('misc', KEY_URL_NAVIGATE, 'file://' . mk_home(DEF_URL_NAVIGATE)); 838 839# 840# $Log$ 841# 842