1# TAB complete words from dictionary
2# for irssi 0.7.99 by Timo Sirainen
3# Greatly modified by Erkki Sepp�l� to build dictionary of said words
4
5use strict;
6use Irssi;
7
8use vars qw($VERSION %IRSSI);
9$VERSION = "0.1";
10%IRSSI = (
11    authors     => "Erkki Sepp�l�",
12    contact     => "flux\@inside.org",
13    name        => "IRC Completion",
14    description => "Adds words from IRC to your tab-completion list, plus fixes typos",
15    license     => "Public Domain",
16    url         => "http://xulfad.inside.org/~flux/software/irssi/",
17    changed     => "Thu Feb  7 22:45:55 EET 2002"
18);
19
20
21my @wordHistory;
22my %words;
23my %permanent;
24
25my $wordChars = join("", ('a'..'z', '0'..'9', '��'));
26my $maxWords = 5000;
27my $minWordLength = 4;
28my $maxWordLength = 80;
29my $maxTypoLength = 10;
30my $permanentThreshold = 1;
31
32my %typoWords;
33my $correctWordCounter = 1;
34my %correctWordsByIndex;
35my %correctWordsByWord;
36
37# by word
38sub addCorrectWord {
39  my $index = $correctWordsByWord{$_[0]} or 0;
40  if ($index > 0) {
41    ++$correctWordsByIndex{$index}->[1];
42    return $index;
43  } else {
44    $correctWordsByIndex{$correctWordCounter} = [$_[0], 1];
45    $correctWordsByWord{$_[0]} = $correctWordCounter;
46    ++$correctWordCounter;
47    return $correctWordCounter - 1;
48  }
49};
50
51# by word
52sub delCorrectWord {
53  my ($word) = @_;
54  my $index = $correctWordsByWord{$word};
55  if (--$correctWordsByIndex{$index}->[1] == 0) {
56    delete $correctWordsByWord{$correctWordsByIndex{$index}->[0]};
57    delete $correctWordsByIndex{$index};
58  }
59}
60
61sub sig_complete {
62  my ($complist, $window, $word, $linestart, $want_space) = @_;
63
64  $word =~ s/([^a-zA-Z0-9])/\\\1/g;
65
66  @$complist = reverse (@$complist, grep(/^$word/, (keys %permanent, keys %words)));
67
68  if (exists $typoWords{$word}) {
69    my $correctWord = $correctWordsByIndex{$typoWords{$word}->[0]}->[0];
70    @$complist = (@$complist, $correctWord);
71  }
72
73  my $n;
74  my %m = map { ($_ => $n++); } @$complist;
75  @$complist = ();
76  my %m2;
77  foreach my $key (sort keys %m) {
78    $m2{$m{$key}}=$key;
79  }
80  foreach my $key (reverse sort keys %m2) {
81    push @$complist, $m2{$key};
82  }
83}
84
85# $word, $removes
86sub generate_drops {
87  my ($word, $changes) = @_;
88  my @list;
89  for (my $c = 0; $c < length($word) - 1; ++$c) {
90    my $misWord = substr($word, 0, $c) . substr($word, $c + 1);
91    if ($changes > 1) {
92      push @list, generate_drops($misWord, $changes - 1);
93    } else {
94      push @list, $misWord;
95    }
96  }
97  return @list;
98}
99
100sub generate_translations {
101  my ($word, $changes) = @_;
102  my @list;
103  for (my $c = 1; $c < length($word); ++$c) {
104    my $misWord = substr($word, 0, $c - 1) . substr($word, $c, 1) . substr($word, $c - 1, 1) . substr($word, $c + 1);
105    if ($changes > 1) {
106      push @list, generate_drops($misWord, $changes - 1);
107    } else {
108      push @list, $misWord;
109    }
110  }
111  return @list;
112}
113
114# $word
115sub generate_typos {
116  my $maxTypoLength = Irssi::settings_get_int('irccomplete_maximum_typo_length');
117  my ($word) = @_;
118
119  if (length($word) > $maxTypoLength) {
120    return ();
121  } else {
122    return (generate_drops($word, 1), generate_translations($word));
123  }
124}
125
126sub sig_message {
127  my ($server, $message) = @_;
128  my $maxWords = Irssi::settings_get_int('irccomplete_words');
129  my $minWordLength = Irssi::settings_get_int('irccomplete_minimum_length');
130  my $maxWordLength = Irssi::settings_get_int('irccomplete_maximum_length');
131  my $wordChars = Irssi::settings_get_str("irccomplete_word_characters");
132  my $permanentThreshold = Irssi::settings_get_int('irccomplete_permanent_percent');
133  foreach my $word (split(/[^$wordChars]/, $message)) {
134    if (length($word) >= $minWordLength && length($word) <= $maxWordLength) {
135      if (++$words{$word} > $permanentThreshold / 100.0 * $maxWords) {
136	if (++$permanent{$word} == 1) {
137	  #Irssi::printformat(MSGLEVEL_CLIENTNOTICE, 'irccomplete_permanent', $word);
138	  Irssi::print "Added $word to the list of permanent words";
139	}
140      }
141      push @wordHistory, $word;
142      my $wordIndex = addCorrectWord($word);
143      foreach my $misword (generate_typos($word, 1)) {
144	if (!exists $typoWords{$misword}) {
145	  $typoWords{$misword} = [$wordIndex, 1];
146	} else {
147	  ++$typoWords{$misword}->[1];
148	}
149      }
150      while (@wordHistory > $maxWords) {
151	my $word = shift @wordHistory;
152	if (--$words{$word} == 0) {
153	  delete $words{$word};
154	}
155	foreach my $misword (generate_typos($word, 1)) {
156	  if (--$typoWords{$misword}->[1] == 0) {
157	    delete $typoWords{$misword};
158	  }
159	}
160	delCorrectWord($word);
161      }
162    }
163  }
164
165
166  return 1;
167}
168
169sub cmd_typowords {
170  Irssi::print (scalar(@wordHistory) . " words, " .
171		scalar(keys %typoWords) . " typowords, " .
172		scalar(keys %correctWordsByWord) . "x" . scalar(keys %correctWordsByIndex) . " correct words");
173  my $line = "";
174
175  foreach my $word (keys %typoWords) {
176    $line .= $word . "|" . $typoWords{$word}->[0] . " ";
177  }
178  Irssi::print "$line";
179  $line = "";
180
181  foreach my $index (keys %correctWordsByIndex) {
182    $line .= $index . ":[" . join("|", @{$correctWordsByIndex{$index}}) . "] ";
183  }
184  Irssi::print "$line";
185  $line = "";
186
187  foreach my $word (keys %correctWordsByWord) {
188    $line .= $word . ":" . $correctWordsByWord{$word} . " ";
189  }
190  Irssi::print "$line";
191  $line = "";
192
193  return 1;
194};
195
196Irssi::theme_register(['irccomplete_permanent', 'Added $1 to the list of permanent words']);
197
198Irssi::settings_add_str("misc", "irccomplete_word_characters", $wordChars);
199Irssi::settings_add_int("misc", "irccomplete_words", $maxWords);
200Irssi::settings_add_int("misc", "irccomplete_minimum_length", $minWordLength);
201Irssi::settings_add_int("misc", "irccomplete_maximum_length", $maxWordLength);
202Irssi::settings_add_int("misc", "irccomplete_maximum_typo_length", $maxTypoLength);
203Irssi::settings_add_int("misc", "irccomplete_permanent_percent", $permanentThreshold);
204
205foreach my $sig ("message public", "message private",
206		 "message own_public", "message own_private",
207		 "message topic") {
208#foreach my $sig ("message own_public", "message own_private") {
209  Irssi::signal_add($sig, "sig_message");
210}
211Irssi::signal_add_last('complete word', 'sig_complete');
212
213Irssi::command_bind("irccomplete_typowords", "cmd_typowords");
214