1# url grabber, yes it sucks
2#
3# infected with the gpl virus
4#
5# Thomas Graf <tgraf@europe.com>
6#
7# version: 0.2
8#
9# Commands:
10#
11#   /URL LIST
12#   /URL CLEAR
13#   /URL OPEN [<nr>]
14#   /URL QUOTE [<nr>]
15#   /URL HEAD [<nr>]            !! Blocking !!
16#   /HEAD <url>                 !! Blocking !!
17#
18# Config Values
19#
20# [url logfile]
21#  url_log                log urls to url_log_file
22#  url_log_file           file to save urls
23#  url_log_format         format in url logfile
24#  url_log_timestamp      format of timestamp in url logfile
25#
26# [url log in memory]
27#  url_log_browser        command to execute to open url, %f will be replaced with the url
28#  url_log_size           keep that many urls in the list
29#
30# [http head stuff]
31#  url_head_format        format of HEAD output
32#  url_auto_head          do a head on every url received
33#  url_auto_head_format   format of auto head output
34#
35#
36# Database installation
37# - create database and user
38# - create table url ( id INT UNSIGNED NOT NULL PRIMARY KEY AUTO_INCREMENT,
39#      time INT UNSIGNED, nick VARCHAR(25), target VARCHAR(25), url VARCHAR(255));
40#   or similiar :)
41#
42#
43# todo:
44#
45#  - fix XXX marks
46#  - xml output?
47#  - don't output "bytes" if content-length is not available
48#  - prefix with http:// if no prefix is given
49
50use strict;
51use Irssi;
52use Irssi::Irc;
53use vars qw($VERSION %IRSSI);
54
55$VERSION = "0.2";
56%IRSSI = (
57    authors     => 'Thomas Graf',
58    contact     => 'irssi@reeler.org',
59    name        => 'url_log',
60    description => 'logs urls to textfile or/and database, able to list, quote, open or `http head` saved urls.',
61    license     => 'GNU GPLv2 or later',
62    url         => 'http://irssi.reeler.org/url/',
63);
64
65use LWP;
66use LWP::UserAgent;
67use HTTP::Status;
68use DBI;
69
70use POSIX qw(strftime);
71
72my @urls;
73my $user_agent = new LWP::UserAgent;
74
75$user_agent->agent("IrssiUrlLog/0.2");
76
77# hmm... stolen..
78# -verbatim- import expand
79sub expand {
80  my ($string, %format) = @_;
81  my ($exp, $repl);
82  $string =~ s/%$exp/$repl/g while (($exp, $repl) = each(%format));
83  return $string;
84}
85# -verbatim- end
86
87sub print_msg
88{
89    Irssi::active_win()->print("@_");
90}
91
92#
93# open url in brower using url_log_brower command
94#
95sub open_url
96{
97    my ($data) = @_;
98
99    my ($nick, $target, $url) = split(/ /, $data);
100
101    my $pid = fork();
102
103    if ($pid) {
104        Irssi::pidwait_add($pid);
105    } elsif (defined $pid) { # $pid is zero here if defined
106        my $data = expand(Irssi::settings_get_str("url_log_browser"), "f", $url);
107        # XXX use exec
108        system $data;
109        exit;
110    } else {
111        # weird fork error
112        print_msg "Can't fork: $!";
113    }
114}
115
116sub head
117{
118    my ($url) = @_;
119    my $req = new HTTP::Request HEAD => $url;
120    my $res = $user_agent->request($req);
121    return $res;
122}
123
124#
125# do a HEAD
126#
127sub do_head
128{
129    my ($url) = @_;
130
131    my $res = head($url);
132
133    if ($res->code ne RC_OK) {
134        Irssi::active_win()->printformat(MSGLEVEL_CRAP, 'url_head', $url, "\n" .
135            $res->status_line());
136    } else {
137
138        my $t = expand(Irssi::settings_get_str("url_head_format"),
139           "u", $url,
140           "t", scalar $res->content_type,
141           "l", scalar $res->content_length,
142           "s", scalar $res->server);
143
144        Irssi::active_win()->printformat(MSGLEVEL_CRAP, 'url_head', $url, $t);
145    }
146}
147
148#
149# called if url is detected, should do a HEAD and print a 1-liner
150#
151sub do_auto_head
152{
153    my ($url, $window) = @_;
154
155    return if ($url !~ /^http:\/\//);
156
157    my $res = head($url);
158
159    if ($res->code ne RC_OK) {
160        $window->printformat(MSGLEVEL_CRAP, 'url_auto_head', $res->status_line());
161    } else {
162
163        my $t = expand(Irssi::settings_get_str("url_auto_head_format"),
164           "u", $url,
165           "c", $res->code,
166           "t", scalar $res->content_type,
167           "l", scalar $res->content_length,
168           "s", scalar $res->server);
169
170        $window->printformat(MSGLEVEL_CRAP, 'url_auto_head', $t);
171    }
172}
173
174#
175# log url to file
176#
177sub log_to_file
178{
179    my ($nick, $target, $text) = @_;
180    my ($lfile) = glob Irssi::settings_get_str("url_log_file");
181
182    if ( open(LFD, ">>", $lfile) ) {
183
184        my %h = {
185            time => time,
186            nick => $nick,
187            target => $target,
188            url => $text
189        };
190
191        print LFD expand(Irssi::settings_get_str("url_log_format"),
192          "s", strftime(Irssi::settings_get_str("url_log_timestamp_format"), localtime),
193          "n", $nick,
194          "t", $target,
195          "u", $text), "\n";
196
197        close LFD;
198    } else {
199        print_msg "Warning: Unable to open file $lfile $!";
200    }
201}
202
203
204#
205# log url to database
206#
207sub log_to_database
208{
209    my ($nick, $target, $text) = @_;
210
211    # this is quite expensive, but...
212    my $dbh = DBI->connect(Irssi::settings_get_str("url_log_db_dsn"),
213                           Irssi::settings_get_str("url_log_db_user"),
214                           Irssi::settings_get_str("url_log_db_password"))
215    or print_msg "Can't connect to database " . $DBI::errstr;
216
217    if ($dbh) {
218
219        my $sql = "INSERT INTO url (time, nick, target, url) VALUES (UNIX_TIMESTAMP()," .
220          $dbh->quote($nick) . "," . $dbh->quote($target) . "," . $dbh->quote($text) . ")";
221
222        $dbh->do($sql) or print_msg "Can't execute sql command: " . $DBI::errstr;
223
224        $dbh->disconnect();
225    }
226}
227
228#
229# head command handler
230#
231sub sig_head
232{
233    my ($cmd_line, $server, $win_item) = @_;
234    my @args = split(' ', $cmd_line);
235
236    my $url;
237
238    if (@args <= 0) {
239
240        if ($#urls eq 0) {
241            return;
242        }
243
244        $url = $urls[$#urls];
245        $url =~ s/^.*?\s.*?\s//;
246    } else {
247        $url = lc(shift(@args));
248    }
249
250    do_head($url);
251}
252
253#
254# msg handler
255#
256sub sig_msg
257{
258    my ($server, $data, $nick, $address) = @_;
259    my ($target, $text) = split(/ :/, $data, 2);
260
261    # very special, but better than just \w::/* and www.*
262    while ($text =~ s#.*?(^|\s)(\w+?://.+?|[\w\.]{3,}/[\w~\.]+?(/|/\w+?\.\w+?))(\s|$)(.*)#$5#i) {
263
264        return if ($1 =~ /^\.\./);
265
266        push @urls, "$nick $target $2";
267
268        # XXX resize correctly if delta is > 1
269        if ($#urls >= Irssi::settings_get_int("url_log_size")) {
270            shift @urls;
271        }
272
273        my $ischannel = $server->ischannel($target);
274        my $level = $ischannel ? MSGLEVEL_PUBLIC : MSGLEVEL_MSGS;
275        $target = $nick unless $ischannel;
276        my $window = $server->window_find_closest($target, $level);
277
278        if ( Irssi::settings_get_bool("url_log_auto_head") ) {
279            do_auto_head($2, $window);
280        }
281
282        if ( Irssi::settings_get_bool("url_log") ) {
283            log_to_file($nick, $target, $2);
284        }
285
286        if ( Irssi::settings_get_bool("url_log_db") ) {
287            log_to_database($nick, $target, $2);
288        }
289    }
290}
291
292sub print_url_list_item
293{
294    my ($n, $data) = @_;
295    my ($src, $dst, $url) = split(/ /, $data);
296
297    Irssi::active_win()->printformat(MSGLEVEL_CRAP, 'url_list', $n, $src, $dst, $url);
298}
299
300#
301# url command handler
302#
303sub sig_url
304{
305    my ($cmd_line, $server, $win_item) = @_;
306    my @args = split(' ', $cmd_line);
307
308    if (@args <= 0) {
309        print_msg "URL LIST [<nr>]       list all url(s)";
310        print_msg "    OPEN [<nr>]       open url in browser";
311        print_msg "    QUOTE [<nr>]      quote url (print to current channel)";
312        print_msg "    HEAD              send HEAD to server";
313        print_msg "    CLEAR             clear url list";
314        return;
315    }
316
317    my $action = lc(shift(@args));
318
319    if ($action eq "list") {
320
321        if (@args > 0) {
322            my $i = shift(@args);
323            print_url_list_item($i, $urls[$i]);
324        } else {
325            my $i = 0;
326            foreach my $l (@urls) {
327                print_url_list_item($i, $l);
328                $i++;
329            }
330        }
331
332    } elsif($action eq "open") {
333
334        my $i = $#urls;
335        if (@args > 0) {
336            $i = shift(@args);
337        }
338        open_url($urls[$i]);
339
340    } elsif ($action eq "quote") {
341
342        my $i = $#urls;
343        if (@args > 0) {
344            $i = shift(@args);
345        }
346        Irssi::active_win()->command("SAY URL: " . $urls[$i]);
347
348    } elsif ($action eq "clear") {
349
350        splice @urls;
351
352    } elsif ($action eq "head") {
353
354        my $i = $#urls;
355        if (@args > 0) {
356            $i = shift(@args);
357        }
358        my $url = $urls[$i];
359        $url =~ s/^.*?\s.*?\s//;
360
361        do_head($url);
362
363    } else {
364        print_msg "Unknown action";
365    }
366}
367
368
369Irssi::command_bind('head', 'sig_head');
370Irssi::command_bind('url', 'sig_url');
371Irssi::signal_add_first('event privmsg', 'sig_msg');
372
373Irssi::settings_add_bool("url_log", "url_log", 1);
374Irssi::settings_add_bool("url_log", "url_log_auto_head", 1);
375Irssi::settings_add_bool("url_log", "url_log_db", 0);
376Irssi::settings_add_str("url_log", "url_log_db_dsn", 'DBI:mysql:irc_url:localhost');
377Irssi::settings_add_str("url_log", "url_log_db_user", 'irc_url');
378Irssi::settings_add_str("url_log", "url_log_db_password", 'nada');
379Irssi::settings_add_str("url_log", "url_log_file", "~/.irssi/url");
380Irssi::settings_add_str("url_log", "url_log_timestamp_format", '%c');
381Irssi::settings_add_str("url_log", "url_log_format", '%s %n %t %u');
382Irssi::settings_add_str("url_log", "url_log_browser", 'galeon -n -x %f > /dev/null');
383Irssi::settings_add_int("url_log", "url_log_size", 25);
384Irssi::settings_add_str("url_log", "url_auto_head_format", '%c %t %l bytes');
385Irssi::settings_add_str("url_log", "url_head_format", '
386Content-Type: %t
387Length:       %l bytes
388Server:       %s');
389
390
391Irssi::theme_register(['url_head', '[%gHTTP Head%n %g$0%n]$1-',
392                       'url_auto_head', '[%gHEAD%n] $0-',
393                       'url_list', '[$0] $1 %W$2%n $3-']);
394