1use strict;
2#use warnings;	# Not a default module in perl 5.005
3
4use vars qw($VERSION %IRSSI);
5
6$VERSION = '1.2';
7%IRSSI = (
8	authors		=> 'bwolf',
9	contact		=> 'bwolf@geekmind.org',
10	name		=> 'urlplot',
11	description	=> 'URL grabber with HTML generation and cmd execution',
12	license		=> 'BSD',
13	url		=> 'http://www.geekmind.net',
14	changed		=> 'Sun Jun 16 14:00:13 CEST 2002'
15);
16
17# To read the documentation you may use one of the following commands:
18#
19# pod2man urlplot.pl | nroff -man | more
20# pod2text urlplot.pl | more
21# pod2man urlplot.pl | troff -man -Tps -t > urlplot.ps
22
23=head1 NAME
24
25urlplot
26
27=head1 SYNOPSIS
28
29All URL loggers suck. This one just sucks less.
30
31=head1 DESCRIPTION
32
33urlplot watches your channels for URLs and creates nice HTML logfiles of it.
34Actually it parses normal text and topic changes for URLs. Internally it uses
35two caches to prevent flooding and logging of duplicate URLs. As an additional
36feature urlplot can create CSV datafiles. Logfiles can be created for all
37channels and for separate channels. Logging can be allowed and denied on a per
38channel/nick basis. A lockfile is used to protect the caches and logfiles from
39accessing them by multiple irssi instances. A command allows you to send a
40logged URL to your webbrowser of choice.
41
42The format of the CSV logfiles is as follows:
43date nick channel url
44
45=head1 GETTING STARTED
46
47Copy urlplot.pl intoF< $HOME/.irssi/scripts> and create the necessary
48directories withC< mkdir -p>F< $HOME/.irssi/urlplot/urls>.
49Look for the settingsC< url_log_basedir> andC< url_db_basedir> if you want to
50change the directories urlplot will populate with files.
51Follow the documentation and configure urlplot to fit your needs.
52
53=head1 COMMANDS
54
55=head2 /url <integer>
56
57Executes the commandC< url_command> with an URL from the cache as its
58argument. If no number has been specified it defaults to nth URL logged which
59references the most recently logged URL.
60
61=head2 /url -list
62
63Displays a list of all logged URLs.
64
65=head2 /url -clearcache
66
67Clears the cache databases.
68
69=head /url -showlog
70
71ExecutesC< url_command> withC< url_navigate> as its argument. It can be used
72to display the main logfile in your favourite webbrowser.
73
74=head1 SETTINGS
75
76=head2 Pathnames
77
78Please note that you can't use $HOME or any environment variables in the
79settings because irssi/urlplot isn't a shell ;)
80
81=head2 /set url_command <string>
82
83Command to be executed to display an URL (see /url). The command string should
84contain the sequence C<__URL__> which will be replaced by a certain URL.
85
86The default is:
87C< mozilla -remote "openURL(__URL__)" E<gt> /dev/null 2E<gt>&1 || \ >
88C< mozilla "__URL__"& >
89
90This will send a certain URL to mozilla or it will start mozilla if it is not
91already there. The string can be anything. For example I use the following:
92C< ssh host /home/user/bin/mozopenurl "'__URL__'" >/dev/null 2>&1 &>
93where mozopenurl is a shell script that contains similar code as the mozilla
94-remote example above.
95
96=head2 /set url_cache_max <integer>
97
98Specifies the maximum count of items which will be held in the persisten URL
99caches. A value of zero disables automatic cache resizing (round-robbin). The
100default is to keep the last 90 URLs.
101
102=head2 /set url_log_basedir <path>
103
104Specifies the logging base directory used to create the log files beneath it.
105The default isF< $HOME/.irssi/urlplot/urls/>. You have to create directories
106by yourself:C< mkdir -p>F< $HOME/.irssi/urlplot/urls>.
107
108=head2 /set url_log_file_name <relative-filename>
109
110Defines the filename of the full logfile.  It will be passed to I<
111strftime(3)>. This can be usefull to create logfiles with a timestamp.
112The file will be created relative toC< url_log_basedir>. The default
113isF< ircurls.html>.
114
115=head2 /set url_chan_prefix <string>
116
117Defines the filename prefix for channel logfiles. The leadingC< # >of the
118channel name will be replaced by this prefix. It will be passed to
119I<strftime(3)>. The file will be created relative toC< url_log_basedir>. The
120default isF< chan_>.
121
122=head2 /set url_chan_logging <bool>
123
124Enables or disable channel logging globally.
125The default isC< ON>.
126
127=head2 /set url_log_csv_file_name <relative-filename>
128
129Defines the filename of the full CSV logfile. It will be passed to
130I<strftime(3)>. The file will be created relative toC< url_log_basedir>. The
131default isF< ircurls.csv>.
132
133=head2 /set url_log_csv_file_max_size <integer>
134
135Defines the maximum size of the full CSV logfile. If it reaches the specified
136maximum size in bytes it will be simply resized to zero. The default isC< 30*1024>
137bytes.
138
139=head2 /set url_log_csv_separator <string>
140
141Defines the separator used as a delimeter for the fields of the CSV files.
142The default isC< |>.
143
144=head2 /set url_csv_logging <bool>
145
146Conditionally turns on or off CSV logging for the full logfile. The default
147isC< OFF>.
148
149=head2 /set url_csv_chan_logging <bool>
150
151Conditionally turns on or off CSV logging of the channel logfiles. The default isC< OFF>.
152
153=head2 /set url_time_format <string>
154
155Specifies the time format that will be passed toI< strftime(3)> to produce an
156ASCII representation of the time/date when an URL was grabbed. It will be used
157in the logfiles. The default isC< %Y:%m:%d - %H:%M:%S>.
158
159=head2 /set url_log_file_max_size <integer>
160
161Defines the maximum size of the full logfile and the channel logfile. If it
162reaches the specified maximum size in bytes it will be simply resized to zero.
163The default isC< 30*1024> bytes.
164
165=head2 /set url_log_file_autoreload_time <integer>
166
167Intervall in seconds used for the HTML logfile header. The logfile reloads
168itself every N seconds. The default isC< 90> seconds.
169
170=head2 /set url_db_basedir <path>
171
172Specifies the database base directory where two database files and a lockfile
173will be created. The default isF< $HOME/.irssi/urlplot>. You have to create
174the directory by yourself.
175
176=head2 /set url_db_cache_a_filename <relative-filename>
177
178Defines the filename of the index URL database. The file will be created
179relative toC< url_db_basedir>. The default isF< a_cache>.
180
181=head2 /set url_db_cache_h_filename <relative-filename>
182
183Defines the filename of the hash URL database. The file will be created
184relative toC< url_db_basedir>. The default isF< h_cache>.
185
186=head2 /set url_db_lock_filename <relative-filename>
187
188Defines the filename of the lockfile used to lock all logfiles and the cache
189databases. It will be created relative toC< url_db_basedir>. The default
190isF< lockfile>.
191
192=head2 /set url_policy_default <allow|deny>
193
194Specifies the default policy that will be used to decide if logging ist
195permitted for a certain nick or channel. This can be eitherC< allow>
196orC< deny>. If you set this toC< deny> you will have to allow explicitly those
197channels and nicks for which logging should be permitted. In contrast if you
198set it to allow, you can deny logging for certain nicks and channels.
199The keysC< url_policy_chans> andC< url_policy_nicks> control the allow, deny
200behaviour depending onC< url_policy_default>. The default isC< allow> which
201permits logging of all channels and nicks.
202
203=head2 /set url_policy_chans <string>
204
205Specifies those channels for whoom logging is permitted or denied. Multiple
206channels may be specified by usingC< ,>C< ;>C< :> or a space to separate the
207items.
208
209=head2 /set url_policy_nicks <string>
210
211SeeC< url_policy_chans> and replace the word channel by nick.
212
213=head2 /set url_navigate <string>
214
215ExecutesC< url_command> withC< url_navigate> as its argument. It can be used
216to display the main logfile in your favourite webbrowser. Because you may pass
217this command at anytime to your webbrowser it will not be passed to strftime.
218Thus you can only specify a static file here.
219
220=head1 AUTHOR
221
222Marcus Geiger <bwolf@geekmind.org>
223
224=cut
225
226use integer;
227use Irssi;
228use POSIX qw(strftime);
229use Fcntl qw(:DEFAULT :flock);
230use DB_File;
231
232# Regexps
233sub URL_SCHEME_REGEX()			{ '(http|ftp|https|news|irc)' }
234sub URL_GUESS_REGEX()			{ '(www|ftp)' }
235sub URL_BASE_REGEX()			{ '[a-z0-9_\-+\\/:?%.&!~;,=\#<>]' }
236
237# Other
238sub BACKWARD_SEEK_BYTES()		{ 130 }
239sub LOG_FILE_MARKER()			{ '<!-- bottom-line -->' }
240
241# Keys for settings
242sub KEY_URL_COMMAND()			{ 'url_command' }
243sub KEY_URL_CACHE_MAX()			{ 'url_cache_max' }
244sub KEY_URL_LOG_BASEDIR()		{ 'url_log_basedir' }
245sub KEY_URL_LOG_FILE_NAME()		{ 'url_log_file_name' }
246sub KEY_URL_CHAN_PREFIX()		{ 'url_chan_prefix' }
247sub KEY_URL_CHAN_LOGGING()		{ 'url_chan_logging' }
248sub KEY_URL_LOG_CSV_FILE_NAME()		{ 'url_log_csv_file_name' }
249sub KEY_URL_LOG_CSV_FILE_MAX_SIZE() 	{ 'url_log_csv_file_max_size' }
250sub KEY_URL_LOG_CSV_SEPARATOR()		{ 'url_log_csv_separator' }
251sub KEY_URL_CSV_LOGGING()		{ 'url_csv_logging' }
252sub KEY_URL_CSV_CHAN_LOGGING()		{ 'url_csv_chan_logging' }
253sub KEY_URL_TIME_FORMAT()		{ 'url_time_format' }
254sub KEY_URL_LOG_FILE_MAX_SIZE()		{ 'url_log_file_max_size' }
255sub KEY_URL_LOG_FILE_AUTORELOAD_TIME()	{ 'url_log_file_autoreload_time' }
256sub KEY_URL_DB_BASEDIR()		{ 'url_db_basedir' }
257sub KEY_URL_DB_CACHE_A_FILENAME()	{ 'url_db_cache_a_filename' }
258sub KEY_URL_DB_CACHE_H_FILENAME()	{ 'url_db_cache_h_filename' }
259sub KEY_URL_DB_LOCK_FILENAME()		{ 'url_db_lock_filename' }
260sub KEY_URL_POLICY_DEFAULT()		{ 'url_policy_default' }
261sub KEY_URL_POLICY_CHANS()		{ 'url_policy_chans' }
262sub KEY_URL_POLICY_NICKS()		{ 'url_policy_nicks' }
263sub KEY_URL_NAVIGATE()			{ 'url_navigate' }
264
265# Defaults
266sub DEF_URL_COMMAND() {
267	'mozilla -remote "openURL(__URL__)" > /dev/null 2>&1 || mozilla "__URL__"&' }
268sub DEF_URL_CACHE_MAX()			{ 90 }
269sub DEF_URL_LOG_FILE_AUTORELOAD_TIME()	{ 120 }
270sub DEF_URL_TIME_FORMAT()		{ '%Y:%m:%d - %H:%M:%S' }
271sub DEF_URL_DO_FILE_RESIZE()		{ '0' }
272sub DEF_URL_LOG_FILE_MAX_SIZE()		{ 1024 * 30 }
273sub DEF_URL_LOG_BASEDIR()		{ '.irssi/urlplot/urls/' }
274sub DEF_URL_LOG_FILE_NAME()		{ 'ircurls.html' }
275sub DEF_URL_CHAN_PREFIX()		{ 'chan_' }
276sub DEF_URL_CHAN_LOGGING()		{ '1' }
277sub DEF_URL_LOG_CSV_FILE_NAME()		{ 'ircurls.csv' }
278sub DEF_URL_LOG_CSV_FILE_MAX_SIZE()	{ 1024 * 30 }
279sub DEF_URL_LOG_CSV_SEPARATOR()		{ '|' }
280sub DEF_URL_CSV_LOGGING()		{ '' }
281sub DEF_URL_CSV_CHAN_LOGGING()		{ '' }
282sub DEF_URL_DB_BASEDIR()		{ '.irssi/urlplot/' }
283sub DEF_URL_DB_CACHE_A_FILENAME()	{ 'a_cache' }
284sub DEF_URL_DB_CACHE_H_FILENAME()	{ 'h_cache' }
285sub DEF_URL_DB_LOCK_FILENAME()		{ 'lockfile' }
286sub DEF_URL_POLICY_DEFAULT()		{ 'allow' }
287sub DEF_URL_POLICY_CHANS()		{ '' }
288sub DEF_URL_POLICY_NICKS()		{ '' }
289sub DEF_URL_NAVIGATE()			{ '.irssi/urlplot/urls/ircurls.html' }
290
291sub print_full_log_file_template {
292	my ($fh, $reload) = @_;
293	print $fh <<EOT;
294<?xml version="1.0" encoding="iso-8859-1"?>
295	<!DOCTYPE html
296		PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
297		"DTD/xhtml1-strict.dtd">
298<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
299	<head>
300		<title>IRC-URLs</title>
301		<meta http-equiv="cache-control" content="no-cache" />
302		<meta http-equiv="refresh" content="$reload;" />
303		<style type="text/css">
304		<!--
305			.small { font-size: small; }
306			.xsmall { font-size: x-small; }
307		-->
308		</style>
309	</head>
310	<body>
311		<h1>IRC-URLs</h1>
312		<p class="xsmall">
313			Visit <a href="http://www.geekmind.net">geekmind.net</a>
314		</p>
315		<p>This page reloads itself every $reload seconds.</p>
316		<p>
317			<a name="top" />
318			<a class="small" href="#bottom">Page bottom</a>
319			<br />
320			<br />
321		</p>
322		<table rules="rows" frame="void" width="100%" cellpadding="5">
323			<tr align="left">
324				<th><b>Date/Time</b></th>
325				<th><b>Nick</b></th>
326				<th><b>Channel/Nick</b></th>
327				<th><b>URL</b></th>
328			</tr>
329EOT
330}
331
332sub print_chan_log_file_template {
333	my ($fh, $reload, $channel, $full_log) = @_;
334	print $fh <<EOT;
335<?xml version="1.0" encoding="iso-8859-1"?>
336	<!DOCTYPE html
337		PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
338		"DTD/xhtml1-strict.dtd">
339<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
340	<head>
341		<title>IRC-URLs of $channel</title>
342		<meta http-equiv="cache-control" content="no-cache" />
343		<meta http-equiv="refresh" content="$reload;" />
344		<style type="text/css">
345		<!--
346			.small { font-size: small; }
347			.xsmall { font-size: x-small; }
348		-->
349		</style>
350	</head>
351	<body>
352		<h1>IRC-URLs of $channel</h1>
353		<p class="xsmall">
354			Visit <a href="http://www.geekmind.net">geekmind.net</a>
355		</p>
356		<p>This page reloads itself every $reload seconds.</p>
357		<p><a href="$full_log">Complete</a> listing.</p>
358		<p>
359			<a name="top" />
360			<a class="small" href="#bottom">Page bottom</a>
361			<br />
362			<br />
363		</p>
364		<table rules="rows" frame="void" width="100%" cellpadding="5">
365			<tr align="left">
366				<th><b>Date/Time</b></th>
367				<th><b>Nick</b></th>
368				<th><b>URL</b></th>
369			</tr>
370EOT
371}
372
373sub LOG_FILE_TAIL () {
374	return <<"EOT";
375
376			@{[ LOG_FILE_MARKER ]}
377		</table>
378		<p>
379			<a class="small" href="#top">Page top</a>
380			<a name="bottom" />
381		</p>
382	</body>
383</html>
384EOT
385}
386
387sub print_chan_log_file_entry {
388	my ($fh, $date, $nick, $channel, $url) = @_;
389	print $fh <<EOURL;
390			<tr>
391				<td>$date</td>
392				<td><em>$nick</em></td>
393				<td><a href=\"$url\">$url</a></td>
394			</tr>
395EOURL
396	print $fh LOG_FILE_TAIL;
397};
398
399sub print_full_log_file_entry {
400	my ($fh, $date, $nick, $channel, $chan_log, $url) = @_;
401	print $fh <<EOURL;
402			<tr>
403				<td>$date</td>
404				<td><em>$nick</em></td>
405				<td><a href="$chan_log">$channel</a></td>
406				<td><a href=\"$url\">$url</a></td>
407			</tr>
408EOURL
409	print $fh LOG_FILE_TAIL;
410}
411
412sub p_error { # Error printing (directly to the current window)
413	Irssi::print("urlplot: @_");
414}
415
416sub p_normal { # Normal printing (to the msg window)
417	Irssi::print("@_", MSGLEVEL_MSGS+MSGLEVEL_NOHILIGHT);
418}
419
420sub scan_url {
421	my $rawtext = shift;
422	return $1 if $rawtext =~ m|(@{[ URL_SCHEME_REGEX ]}://@{[ URL_BASE_REGEX ]}+)|io;
423	# The URL misses a scheme, try to be smart
424	if ($rawtext =~ m|@{[ URL_GUESS_REGEX ]}\.@{[ URL_BASE_REGEX ]}+|io) {
425		my $preserve = $&;
426		return "http://$preserve" if $1 =~ /^www/;
427		return "ftp://$preserve"  if $1 =~ /^ftp/;
428	}
429	return undef;
430}
431
432sub aquire_lock {
433	my $db_base = Irssi::settings_get_str(KEY_URL_DB_BASEDIR)
434		|| die "missing setting for @{[ KEY_URL_DB_BASEDIR ]}";
435	my $lockfile = Irssi::settings_get_str(KEY_URL_DB_LOCK_FILENAME)
436		|| die "missing setting for @{[ KEY_URL_DB_LOCK_FILENAME ]}";
437
438	local *LOCK_F;
439	my $fh;
440	$db_base .= '/' if $db_base !~ m#/$#;
441	$lockfile = "${db_base}${lockfile}";
442
443	die "directory $db_base doesn't exist or isn't readable"
444		unless -d $db_base and -r $db_base;
445
446	sysopen(LOCK_F, $lockfile, O_RDONLY | O_CREAT)
447		|| die "can't open/create lockfile $lockfile: $!";
448	flock(LOCK_F, LOCK_EX | LOCK_NB)
449		|| die "can't exclusively lock $lockfile: $!";
450	# Can't pass back localized typeglob reference
451	$fh = *LOCK_F;
452	return $fh;
453}
454
455sub open_caches {
456	my $db_base = Irssi::settings_get_str(KEY_URL_DB_BASEDIR)
457		|| die "missing setting for @{[ KEY_URL_DB_BASEDIR ]}";
458	my $dbfile_a = Irssi::settings_get_str(KEY_URL_DB_CACHE_A_FILENAME)
459		|| die "missing setting for @{[ KEY_URL_DB_CACHE_A_FILENAME ]}";
460	my $dbfile_h = Irssi::settings_get_str(KEY_URL_DB_CACHE_H_FILENAME)
461		|| die "missing setting for @{[ KEY_URL_DB_CACHE_H_FILENAME ]}";
462
463	my (@cache, %cache);
464	$db_base .= '/' if $db_base !~ m#/$#;
465	$dbfile_a = "${db_base}${dbfile_a}";
466	$dbfile_h = "${db_base}${dbfile_h}";
467
468	die "directory $db_base doesn't exist or isn't readable"
469		unless -d $db_base and -r $db_base;
470
471	tie @cache, 'DB_File', $dbfile_a, O_RDWR | O_CREAT, 0666, $DB_RECNO
472		or die "can't tie urlcache db $dbfile_a: $!";
473	tie %cache, 'DB_File', $dbfile_h, O_RDWR | O_CREAT, 0666
474		or die "can't tie urlcache db $dbfile_h: $!";
475	return \(@cache, %cache);
476}
477
478sub create_chan_template {
479	my ($full_log, $file, $channel) = @_;
480	my $reload = Irssi::settings_get_int(KEY_URL_LOG_FILE_AUTORELOAD_TIME);
481	local *FH;
482	open(FH, ">", $file)
483		|| die "can't create logfile $file: $!";
484	print_chan_log_file_template(\*FH, $reload, $channel, $full_log);
485	print FH LOG_FILE_TAIL;
486	close(FH);
487}
488
489sub create_full_template {
490	my $file = shift;
491	my $reload = Irssi::settings_get_int(KEY_URL_LOG_FILE_AUTORELOAD_TIME);
492	local *FH;
493	open(FH, ">", $file)
494		|| die "can't create logfile $file: $!";
495	print_full_log_file_template(\*FH, $reload);
496	print FH LOG_FILE_TAIL;
497	close(FH);
498}
499
500sub create_csv_file {
501	my $file = shift;
502	open(FH, ">", $file)
503		|| die "can't create $file: $!";
504	close FH;
505}
506
507sub log_csv {
508	my $csv_log = shift;
509	my $sep = Irssi::settings_get_str(KEY_URL_LOG_CSV_SEPARATOR);
510	my $fields = join $sep, @_;
511	local *FH;
512	open(FH, ">>", $csv_log)
513		|| die "can't open $csv_log: $!";
514	print FH "$fields\n";
515	close FH;
516}
517
518sub position_log_file {
519	my $file = shift;
520	my ($fh, $pos, $buf, @lines, $off, $got_it);
521	local *FH;
522	my $hint = "Conside manual removal of this file";
523	sysopen(FH, $file, O_RDWR)
524		|| die "can't open $file: $!";
525	$pos = sysseek(FH, 0, 2)
526		|| die "can't seek to EOF in $file. ${hint}: $!";
527	$pos -= BACKWARD_SEEK_BYTES;
528	sysseek(FH, $pos, 0)
529		|| die "can't seek backwards to $pos in $file. ${hint}: $!";
530	sysread(FH, $buf, 2048)
531		|| die "can't read rest of $file. ${hint}: $!";
532	$off = 0;
533	@lines = split /\n/, $buf;
534	for (@lines) {
535		$off += length;
536		$off += 1;
537		chomp;
538		next if /^$/;
539		if (/@{[ LOG_FILE_MARKER ]}/io) {
540			$got_it = 1;
541			$off -= length;
542			$off -= 1;
543			last;
544		}
545	}
546	die "Can't locate @{[ LOG_FILE_MARKER ]} in $file. ${hint}"
547		unless $got_it;
548	$pos += $off;
549	sysseek(FH, $pos, 0)
550		|| die "Can't seek to $pos in $file. ${hint}: $!";
551	# Can't pass back localized typeglob reference
552	$fh = *FH;
553	return $fh;
554}
555
556sub log_url {
557	my ($nick, $channel, $url) = @_;
558	my $log_base =  Irssi::settings_get_str(KEY_URL_LOG_BASEDIR)
559		|| die "missing setting for @{[ KEY_URL_LOG_BASEDIR ]}";
560	my $fullfile = Irssi::settings_get_str(KEY_URL_LOG_FILE_NAME)
561		|| die "missing setting for @{[ KEY_URL_LOG_FILE_NAME ]}";
562	my $csvfile = Irssi::settings_get_str(KEY_URL_LOG_CSV_FILE_NAME)
563		|| die "missing setting for @{[ KEY_URL_LOG_CSV_FILE_NAME ]}";
564	my $csv_max = Irssi::settings_get_int(KEY_URL_LOG_CSV_FILE_MAX_SIZE);
565	my $csv_logging = Irssi::settings_get_bool(KEY_URL_CSV_LOGGING);
566	my $csv_chan_logging = Irssi::settings_get_bool(KEY_URL_CSV_CHAN_LOGGING);
567	my $time_fmt = Irssi::settings_get_str(KEY_URL_TIME_FORMAT)
568		|| die "missing setting for @{[ KEY_URL_TIME_FORMAT ]}";
569	my $max = Irssi::settings_get_int(KEY_URL_LOG_FILE_MAX_SIZE);
570	my $chan_prefix = Irssi::settings_get_str(KEY_URL_CHAN_PREFIX)
571		|| die "missing setting for @{[ KEY_URL_CHAN_PREFIX ]}";
572	my $chan_logging = Irssi::settings_get_bool(KEY_URL_CHAN_LOGGING);
573
574	my @curr_time = localtime(time());
575	$log_base .= '/' if $log_base !~ m#/$#;
576
577	die "directory $log_base doesn't exist or isn't readable"
578		unless -d $log_base and -r $log_base;
579
580	# Make channel filename
581	my $tmp = POSIX::strftime($chan_prefix, @curr_time);
582	my $chan_fname = lc $channel;
583	$chan_fname =~ s/^#/$tmp/;
584	my $chan_log = "${log_base}${chan_fname}.html";
585
586	# Make full filename
587	$tmp = POSIX::strftime($fullfile, @curr_time);
588	my $full_fname = $tmp;
589	my $full_log = $log_base . $tmp;
590
591	# Replace spaces in date string to show up as '&#160;' to prevent line
592	# breaks.
593	my $date = POSIX::strftime($time_fmt, @curr_time);
594	my $html_date = $date;
595	$html_date =~ s/ /\&#160;/g;
596
597	my $fh;
598
599	# Channel logging
600	if ($chan_logging) {
601		create_chan_template $full_fname, $chan_log, $channel
602			if not -r $chan_log or ($max > 0 and (stat($chan_log))[7] > $max);
603		$fh = undef;
604		$fh = position_log_file $chan_log;
605		print_chan_log_file_entry($fh, $html_date, $nick, $channel, $url);
606		close $fh;
607	}
608
609	# Full logging
610	create_full_template $full_log
611		if not -r $full_log or ($max > 0 and (stat($full_log))[7] > $max);
612	$fh = undef;
613	$fh = position_log_file $full_log;
614	print_full_log_file_entry($fh, $html_date, $nick, $channel,
615		"${chan_fname}.html", $url);
616	close $fh;
617
618	# CSV logging
619	if ($csv_logging) {
620		$tmp = POSIX::strftime($csvfile, @curr_time);
621		my $log = $log_base . $tmp;
622		create_csv_file $log
623			if not -r $log or ($csv_max > 0 and (stat($log))[7] > $max);
624		log_csv($log, $date, $nick, $channel, $url);
625	}
626
627	# CSV channel logging
628	if ($csv_chan_logging) {
629		my $log = "${log_base}${chan_fname}.csv";
630		create_csv_file $log
631			if not -r $log or ($csv_max > 0 and (stat($log))[7] > $max);
632		log_csv($log, $date, $nick, $channel, $url);
633	}
634}
635
636sub mk_home($) {
637	my $arg = shift;
638	return "$ENV{HOME}/$arg";
639}
640
641sub logging_permited {
642	my ($nick, $chan_or_nick) = @_;
643	my $default_policy = Irssi::settings_get_str(KEY_URL_POLICY_DEFAULT)
644		|| die "missing setting for @{[ KEY_URL_POLICY_DEFAULT ]}";
645	my $chans = Irssi::settings_get_str(KEY_URL_POLICY_CHANS);
646	my $nicks = Irssi::settings_get_str(KEY_URL_POLICY_NICKS);
647	my @policy_chans = split /[,;: ]/, $chans;
648	my @policy_nicks = split /[,;: ]/, $nicks;
649	my $permit;
650
651	if ($default_policy eq 'deny') {
652		# logging must be explicitly permited
653		$permit = 0;
654		for (@policy_chans) {
655			return 1 if $_ eq $chan_or_nick;
656		}
657		for (@policy_nicks) {
658			return 1 if $_ eq $nick;
659		}
660	} elsif ($default_policy eq 'allow') {
661		# logging must be explicitly denied
662		$permit = 1;
663		for (@policy_chans) {
664			return 0 if $_ eq $chan_or_nick;
665		}
666		for (@policy_nicks) {
667			return 0 if $_ eq $nick;
668		}
669	} else {
670		p_error("setting @{[ KEY_URL_POLICY_DEFAULT ]} can be either " .
671			"'allow' or 'deny'");
672		return undef;
673	}
674	return $permit;
675}
676
677sub do_locked {
678	my $f = shift or die "missing function argument " . caller;
679	my $lockf;
680	eval { $lockf = aquire_lock };
681	if ($@) {
682		p_error("$@");
683		return;
684	}
685	eval { $f->(@_) };
686	p_error("$@") if $@;
687	eval { close $lockf };
688}
689
690sub do_with_caches {
691	my $f = shift or die "missing function argument " . caller;
692	my ($cache_a, $cache_h) = ();
693	eval { ($cache_a, $cache_h) = open_caches };
694	if ($@) {
695		p_error("$@");
696		eval { untie %$cache_h } if defined $cache_h;
697		eval { untie @$cache_a } if defined $cache_a;
698		return;
699	}
700	eval { $f->($cache_a, $cache_h, @_) };
701	p_error("$@") if $@;
702	eval { untie %$cache_h };
703	eval { untie @$cache_a };
704}
705
706sub url_msg_log {
707	my ($cache_a, $cache_h, $nick, $chan_or_nick, $url) = @_;
708	my ($cache_size, $tmp);
709	my $max_cache = Irssi::settings_get_int(KEY_URL_CACHE_MAX);
710
711	unless (exists $cache_h->{$url}) {
712		$cache_size = scalar(@$cache_a) + 1;
713		$cache_h->{$url} = '1';
714		# push the URL to the end of the file seems to work better on
715		# some systems in contrast to unshift.
716		push @$cache_a, $url;
717		if ($max_cache > 0 && $cache_size > $max_cache) {
718			$tmp = shift @$cache_a;
719			delete $cache_h->{$tmp};
720		}
721		log_url($nick, $chan_or_nick, $url);
722	}
723}
724
725sub url_topic {
726	my ($server, $channel, $topic, $nick, $hostmask) = @_;
727	url_message($server, $topic, $nick, $hostmask, $channel);
728}
729
730sub url_message {
731	my ($server, $rawtext, $nick, $hostmask, $channel) = @_;
732	my ($url, $permit, $chan_or_nick);
733
734	if (defined($url = scan_url($rawtext))) {
735		$chan_or_nick = defined $channel ? $channel : $server->{nick};
736		if (defined($permit = logging_permited($nick, $chan_or_nick)) && $permit) {
737			do_locked(\&do_with_caches, \&url_msg_log, $nick, $chan_or_nick, $url);
738		}
739	}
740}
741
742sub url_cmd_show {
743	my ($cache_a, $cache_h) = @_;
744	my $n = 0;
745	p_normal("urlplot: total of " . scalar(@$cache_a) . " URLs");
746	foreach my $url (@$cache_a) {
747		 p_normal(sprintf("%02d - %s", $n++, $url));
748	}
749}
750
751sub url_cmd_clearcaches {
752	my ($cache_a, $cache_h) = @_;
753	@$cache_a = ();
754	%$cache_h = ();
755}
756
757sub url_cmd_real_navigate {
758	my ($url) = @_;
759	die 'no URLs captured so far' unless $url;
760	my $url_cmd = Irssi::settings_get_str(KEY_URL_COMMAND)
761		|| die "missing setting for @{[ KEY_URL_COMMAND ]}";
762	unless ($url_cmd =~ s/__URL__/$url/g) {
763		die "setting url_cmd doesn't contain an URL placeholder '__URL__'";
764	}
765	system($url_cmd);
766}
767
768sub url_cmd_navigate {
769	my ($cache_a, $cache_h, $n) = @_;
770	my ($len, $url) = scalar @$cache_a;
771	unless (defined $n) {
772		$n = $len > 0 ? $len - 1 : $len;
773	}
774	die "no such URL; I've only $len" unless $n < $len;
775	$url = $cache_a->[$n];
776	die 'no URLs captured so far' unless $url;
777	url_cmd_real_navigate $url;
778}
779
780sub url_command {
781	my ($data, $server, $witem) = @_;
782	$_ = $data;
783	if (/^-list/) {
784		do_locked(\&do_with_caches, \&url_cmd_show);
785	} elsif (/^-clearcache/) {
786		do_locked(\&do_with_caches, \&url_cmd_clearcaches);
787	} elsif (/^-showlog/) {
788		my $nav_url = Irssi::settings_get_str(KEY_URL_NAVIGATE)
789			|| die "missing setting for @{[ KEY_URL_NAVIGATE ]}";
790		url_cmd_real_navigate $nav_url;
791	} else {
792		my $n;
793		if (/^(\d+)/) {
794			$n = $1;
795			if ($n < 0) {
796				p_error("argument must be a positive integer");
797				return;
798			}
799		} elsif (/^$/) {
800			$n = undef;
801		} else {
802			p_error("usage for /url [-list|-showlog|-clearcache|<digit>]");
803			return;
804		}
805		do_locked(\&do_with_caches, \&url_cmd_navigate, $n);
806	}
807}
808
809Irssi::signal_add_last('message public', 'url_message');
810Irssi::signal_add_last('message private', 'url_message');
811Irssi::signal_add_last('message topic', 'url_topic');
812Irssi::command_bind('url', 'url_command');
813
814Irssi::settings_add_str('misc', KEY_URL_COMMAND, DEF_URL_COMMAND);
815Irssi::settings_add_int('misc', KEY_URL_CACHE_MAX, DEF_URL_CACHE_MAX);
816Irssi::settings_add_str('misc', KEY_URL_LOG_BASEDIR, mk_home(DEF_URL_LOG_BASEDIR));
817Irssi::settings_add_str('misc', KEY_URL_LOG_FILE_NAME, DEF_URL_LOG_FILE_NAME);
818Irssi::settings_add_str('misc', KEY_URL_CHAN_PREFIX, DEF_URL_CHAN_PREFIX);
819Irssi::settings_add_bool('misc', KEY_URL_CHAN_LOGGING, DEF_URL_CHAN_LOGGING);
820Irssi::settings_add_str('misc', KEY_URL_LOG_CSV_FILE_NAME, DEF_URL_LOG_CSV_FILE_NAME);
821Irssi::settings_add_int('misc', KEY_URL_LOG_CSV_FILE_MAX_SIZE, DEF_URL_LOG_CSV_FILE_MAX_SIZE);
822Irssi::settings_add_str('misc', KEY_URL_LOG_CSV_SEPARATOR, DEF_URL_LOG_CSV_SEPARATOR);
823Irssi::settings_add_bool('misc', KEY_URL_CSV_LOGGING, DEF_URL_CSV_LOGGING);
824Irssi::settings_add_bool('misc', KEY_URL_CSV_CHAN_LOGGING, DEF_URL_CSV_CHAN_LOGGING);
825Irssi::settings_add_str('misc', KEY_URL_TIME_FORMAT, DEF_URL_TIME_FORMAT);
826Irssi::settings_add_int('misc', KEY_URL_LOG_FILE_MAX_SIZE, DEF_URL_LOG_FILE_MAX_SIZE);
827Irssi::settings_add_int('misc', KEY_URL_LOG_FILE_AUTORELOAD_TIME,
828				DEF_URL_LOG_FILE_AUTORELOAD_TIME);
829Irssi::settings_add_str('misc', KEY_URL_DB_BASEDIR, mk_home(DEF_URL_DB_BASEDIR));
830Irssi::settings_add_str('misc', KEY_URL_DB_CACHE_A_FILENAME, DEF_URL_DB_CACHE_A_FILENAME);
831Irssi::settings_add_str('misc', KEY_URL_DB_CACHE_H_FILENAME, DEF_URL_DB_CACHE_H_FILENAME);
832Irssi::settings_add_str('misc', KEY_URL_DB_LOCK_FILENAME, DEF_URL_DB_LOCK_FILENAME);
833
834Irssi::settings_add_str('misc', KEY_URL_POLICY_DEFAULT, DEF_URL_POLICY_DEFAULT);
835Irssi::settings_add_str('misc', KEY_URL_POLICY_CHANS, DEF_URL_POLICY_CHANS);
836Irssi::settings_add_str('misc', KEY_URL_POLICY_NICKS, DEF_URL_POLICY_NICKS);
837Irssi::settings_add_str('misc', KEY_URL_NAVIGATE, 'file://' . mk_home(DEF_URL_NAVIGATE));
838
839#
840# $Log$
841#
842