1# url grabber, yes it sucks 2# 3# infected with the gpl virus 4# 5# Thomas Graf <tgraf@europe.com> 6# 7# version: 0.2 8# 9# Commands: 10# 11# /URL LIST 12# /URL CLEAR 13# /URL OPEN [<nr>] 14# /URL QUOTE [<nr>] 15# /URL HEAD [<nr>] !! Blocking !! 16# /HEAD <url> !! Blocking !! 17# 18# Config Values 19# 20# [url logfile] 21# url_log log urls to url_log_file 22# url_log_file file to save urls 23# url_log_format format in url logfile 24# url_log_timestamp format of timestamp in url logfile 25# 26# [url log in memory] 27# url_log_browser command to execute to open url, %f will be replaced with the url 28# url_log_size keep that many urls in the list 29# 30# [http head stuff] 31# url_head_format format of HEAD output 32# url_auto_head do a head on every url received 33# url_auto_head_format format of auto head output 34# 35# 36# Database installation 37# - create database and user 38# - create table url ( id INT UNSIGNED NOT NULL PRIMARY KEY AUTO_INCREMENT, 39# time INT UNSIGNED, nick VARCHAR(25), target VARCHAR(25), url VARCHAR(255)); 40# or similiar :) 41# 42# 43# todo: 44# 45# - fix XXX marks 46# - xml output? 47# - don't output "bytes" if content-length is not available 48# - prefix with http:// if no prefix is given 49 50use strict; 51use Irssi; 52use Irssi::Irc; 53use vars qw($VERSION %IRSSI); 54 55$VERSION = "0.2"; 56%IRSSI = ( 57 authors => 'Thomas Graf', 58 contact => 'irssi@reeler.org', 59 name => 'url_log', 60 description => 'logs urls to textfile or/and database, able to list, quote, open or `http head` saved urls.', 61 license => 'GNU GPLv2 or later', 62 url => 'http://irssi.reeler.org/url/', 63); 64 65use LWP; 66use LWP::UserAgent; 67use HTTP::Status; 68use DBI; 69 70use POSIX qw(strftime); 71 72my @urls; 73my $user_agent = new LWP::UserAgent; 74 75$user_agent->agent("IrssiUrlLog/0.2"); 76 77# hmm... stolen.. 78# -verbatim- import expand 79sub expand { 80 my ($string, %format) = @_; 81 my ($exp, $repl); 82 $string =~ s/%$exp/$repl/g while (($exp, $repl) = each(%format)); 83 return $string; 84} 85# -verbatim- end 86 87sub print_msg 88{ 89 Irssi::active_win()->print("@_"); 90} 91 92# 93# open url in brower using url_log_brower command 94# 95sub open_url 96{ 97 my ($data) = @_; 98 99 my ($nick, $target, $url) = split(/ /, $data); 100 101 my $pid = fork(); 102 103 if ($pid) { 104 Irssi::pidwait_add($pid); 105 } elsif (defined $pid) { # $pid is zero here if defined 106 my $data = expand(Irssi::settings_get_str("url_log_browser"), "f", $url); 107 # XXX use exec 108 system $data; 109 exit; 110 } else { 111 # weird fork error 112 print_msg "Can't fork: $!"; 113 } 114} 115 116sub head 117{ 118 my ($url) = @_; 119 my $req = new HTTP::Request HEAD => $url; 120 my $res = $user_agent->request($req); 121 return $res; 122} 123 124# 125# do a HEAD 126# 127sub do_head 128{ 129 my ($url) = @_; 130 131 my $res = head($url); 132 133 if ($res->code ne RC_OK) { 134 Irssi::active_win()->printformat(MSGLEVEL_CRAP, 'url_head', $url, "\n" . 135 $res->status_line()); 136 } else { 137 138 my $t = expand(Irssi::settings_get_str("url_head_format"), 139 "u", $url, 140 "t", scalar $res->content_type, 141 "l", scalar $res->content_length, 142 "s", scalar $res->server); 143 144 Irssi::active_win()->printformat(MSGLEVEL_CRAP, 'url_head', $url, $t); 145 } 146} 147 148# 149# called if url is detected, should do a HEAD and print a 1-liner 150# 151sub do_auto_head 152{ 153 my ($url, $window) = @_; 154 155 return if ($url !~ /^http:\/\//); 156 157 my $res = head($url); 158 159 if ($res->code ne RC_OK) { 160 $window->printformat(MSGLEVEL_CRAP, 'url_auto_head', $res->status_line()); 161 } else { 162 163 my $t = expand(Irssi::settings_get_str("url_auto_head_format"), 164 "u", $url, 165 "c", $res->code, 166 "t", scalar $res->content_type, 167 "l", scalar $res->content_length, 168 "s", scalar $res->server); 169 170 $window->printformat(MSGLEVEL_CRAP, 'url_auto_head', $t); 171 } 172} 173 174# 175# log url to file 176# 177sub log_to_file 178{ 179 my ($nick, $target, $text) = @_; 180 my ($lfile) = glob Irssi::settings_get_str("url_log_file"); 181 182 if ( open(LFD, ">>", $lfile) ) { 183 184 my %h = { 185 time => time, 186 nick => $nick, 187 target => $target, 188 url => $text 189 }; 190 191 print LFD expand(Irssi::settings_get_str("url_log_format"), 192 "s", strftime(Irssi::settings_get_str("url_log_timestamp_format"), localtime), 193 "n", $nick, 194 "t", $target, 195 "u", $text), "\n"; 196 197 close LFD; 198 } else { 199 print_msg "Warning: Unable to open file $lfile $!"; 200 } 201} 202 203 204# 205# log url to database 206# 207sub log_to_database 208{ 209 my ($nick, $target, $text) = @_; 210 211 # this is quite expensive, but... 212 my $dbh = DBI->connect(Irssi::settings_get_str("url_log_db_dsn"), 213 Irssi::settings_get_str("url_log_db_user"), 214 Irssi::settings_get_str("url_log_db_password")) 215 or print_msg "Can't connect to database " . $DBI::errstr; 216 217 if ($dbh) { 218 219 my $sql = "INSERT INTO url (time, nick, target, url) VALUES (UNIX_TIMESTAMP()," . 220 $dbh->quote($nick) . "," . $dbh->quote($target) . "," . $dbh->quote($text) . ")"; 221 222 $dbh->do($sql) or print_msg "Can't execute sql command: " . $DBI::errstr; 223 224 $dbh->disconnect(); 225 } 226} 227 228# 229# head command handler 230# 231sub sig_head 232{ 233 my ($cmd_line, $server, $win_item) = @_; 234 my @args = split(' ', $cmd_line); 235 236 my $url; 237 238 if (@args <= 0) { 239 240 if ($#urls eq 0) { 241 return; 242 } 243 244 $url = $urls[$#urls]; 245 $url =~ s/^.*?\s.*?\s//; 246 } else { 247 $url = lc(shift(@args)); 248 } 249 250 do_head($url); 251} 252 253# 254# msg handler 255# 256sub sig_msg 257{ 258 my ($server, $data, $nick, $address) = @_; 259 my ($target, $text) = split(/ :/, $data, 2); 260 261 # very special, but better than just \w::/* and www.* 262 while ($text =~ s#.*?(^|\s)(\w+?://.+?|[\w\.]{3,}/[\w~\.]+?(/|/\w+?\.\w+?))(\s|$)(.*)#$5#i) { 263 264 return if ($1 =~ /^\.\./); 265 266 push @urls, "$nick $target $2"; 267 268 # XXX resize correctly if delta is > 1 269 if ($#urls >= Irssi::settings_get_int("url_log_size")) { 270 shift @urls; 271 } 272 273 my $ischannel = $server->ischannel($target); 274 my $level = $ischannel ? MSGLEVEL_PUBLIC : MSGLEVEL_MSGS; 275 $target = $nick unless $ischannel; 276 my $window = $server->window_find_closest($target, $level); 277 278 if ( Irssi::settings_get_bool("url_log_auto_head") ) { 279 do_auto_head($2, $window); 280 } 281 282 if ( Irssi::settings_get_bool("url_log") ) { 283 log_to_file($nick, $target, $2); 284 } 285 286 if ( Irssi::settings_get_bool("url_log_db") ) { 287 log_to_database($nick, $target, $2); 288 } 289 } 290} 291 292sub print_url_list_item 293{ 294 my ($n, $data) = @_; 295 my ($src, $dst, $url) = split(/ /, $data); 296 297 Irssi::active_win()->printformat(MSGLEVEL_CRAP, 'url_list', $n, $src, $dst, $url); 298} 299 300# 301# url command handler 302# 303sub sig_url 304{ 305 my ($cmd_line, $server, $win_item) = @_; 306 my @args = split(' ', $cmd_line); 307 308 if (@args <= 0) { 309 print_msg "URL LIST [<nr>] list all url(s)"; 310 print_msg " OPEN [<nr>] open url in browser"; 311 print_msg " QUOTE [<nr>] quote url (print to current channel)"; 312 print_msg " HEAD send HEAD to server"; 313 print_msg " CLEAR clear url list"; 314 return; 315 } 316 317 my $action = lc(shift(@args)); 318 319 if ($action eq "list") { 320 321 if (@args > 0) { 322 my $i = shift(@args); 323 print_url_list_item($i, $urls[$i]); 324 } else { 325 my $i = 0; 326 foreach my $l (@urls) { 327 print_url_list_item($i, $l); 328 $i++; 329 } 330 } 331 332 } elsif($action eq "open") { 333 334 my $i = $#urls; 335 if (@args > 0) { 336 $i = shift(@args); 337 } 338 open_url($urls[$i]); 339 340 } elsif ($action eq "quote") { 341 342 my $i = $#urls; 343 if (@args > 0) { 344 $i = shift(@args); 345 } 346 Irssi::active_win()->command("SAY URL: " . $urls[$i]); 347 348 } elsif ($action eq "clear") { 349 350 splice @urls; 351 352 } elsif ($action eq "head") { 353 354 my $i = $#urls; 355 if (@args > 0) { 356 $i = shift(@args); 357 } 358 my $url = $urls[$i]; 359 $url =~ s/^.*?\s.*?\s//; 360 361 do_head($url); 362 363 } else { 364 print_msg "Unknown action"; 365 } 366} 367 368 369Irssi::command_bind('head', 'sig_head'); 370Irssi::command_bind('url', 'sig_url'); 371Irssi::signal_add_first('event privmsg', 'sig_msg'); 372 373Irssi::settings_add_bool("url_log", "url_log", 1); 374Irssi::settings_add_bool("url_log", "url_log_auto_head", 1); 375Irssi::settings_add_bool("url_log", "url_log_db", 0); 376Irssi::settings_add_str("url_log", "url_log_db_dsn", 'DBI:mysql:irc_url:localhost'); 377Irssi::settings_add_str("url_log", "url_log_db_user", 'irc_url'); 378Irssi::settings_add_str("url_log", "url_log_db_password", 'nada'); 379Irssi::settings_add_str("url_log", "url_log_file", "~/.irssi/url"); 380Irssi::settings_add_str("url_log", "url_log_timestamp_format", '%c'); 381Irssi::settings_add_str("url_log", "url_log_format", '%s %n %t %u'); 382Irssi::settings_add_str("url_log", "url_log_browser", 'galeon -n -x %f > /dev/null'); 383Irssi::settings_add_int("url_log", "url_log_size", 25); 384Irssi::settings_add_str("url_log", "url_auto_head_format", '%c %t %l bytes'); 385Irssi::settings_add_str("url_log", "url_head_format", ' 386Content-Type: %t 387Length: %l bytes 388Server: %s'); 389 390 391Irssi::theme_register(['url_head', '[%gHTTP Head%n %g$0%n]$1-', 392 'url_auto_head', '[%gHEAD%n] $0-', 393 'url_list', '[$0] $1 %W$2%n $3-']); 394