1;;; navi2ch-spamfilter.el --- Navi2ch interface for spamfilter.el
2
3;; Copyright (C) 2003, 2004 by Navi2ch Project
4;; Copyright (C) 2003 http://pc.2ch.net/test/read.cgi/unix/1065246418/38
5
6;; Author: http://pc.2ch.net/test/read.cgi/unix/1065246418/38
7;; Keywords: 2ch, network, spam
8
9;; This file is free software; you can redistribute it and/or modify
10;; it under the terms of the GNU General Public License as published by
11;; the Free Software Foundation; either version 2, or (at your option)
12;; any later version.
13
14;; This file is distributed in the hope that it will be useful,
15;; but WITHOUT ANY WARRANTY; without even the implied warranty of
16;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17;; GNU General Public License for more details.
18
19;; You should have received a copy of the GNU General Public License
20;; along with GNU Emacs; see the file COPYING.  If not, write to
21;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
22;; Boston, MA 02111-1307, USA.
23
24;;; Commentary:
25
26;; $B;H$$J}(B
27;; http://www.geocities.co.jp/SiliconValley-PaloAlto/7043/#spamfilter.el
28;; $B$+$i(B spamfilter $B%Q%C%1!<%8$rF~<j$7$F%$%s%9%H!<%k$7$F$*$/!#(B
29;; ~/.navi2ch/init.el $B$G(B
30;; (require 'navi2ch-spamfilter)
31;; $B$H$G$b$7$F!"(BNavi2ch $B$N<B9T;~$KFI$_9~$`$h$&$K$9$k!#(B
32;; $B$^$:$O%3!<%Q%9$N3X=,$r$9$k!#E,Ev$J%9%l$r3+$-!"(Bspam $B$r(B "d" $B$7$F(B hide
33;; $B%^!<%/$rIU$1!"(Bspam $B0J30$O(B hide $B%b!<%I$G(B "d" $B$7$F(B hide $B%^!<%/$,IU$$(B
34;; $B$F$J$$>uBV$K$9$k!#$3$N>uBV$G(B M-x navi2ch-article-register-to-corpus
35;; $B$H$7!"%^!<%/$K1~$8$F%3!<%Q%9$KEPO?$7$F$d$k!#(B
36;; $B$"$kDxEY%3!<%Q%9$,0i$C$?$i(B ~/.navi2ch/init.el $B$K(B
37;; (navi2ch-spamf-enable) $B$r2C$($F<+F0E*$K%U%#%k%?$9$k$h$&$K$9$k!#(B
38;; $B8mG'<1$,>/$J$/$J$C$?$i(B navi2ch-article-auto-spam-register-by-filter
39;; $B$r(B non-nil $B$K@_Dj$7!"%U%#%k%?$N7k2L$K1~$8$F%3!<%Q%9$K:FEPO?$9$k!#(B
40
41;;; Code:
42(defconst navi2ch-spamfilter-ident
43  "$Id$")
44
45(eval-when-compile (require 'cl))
46(require 'spamfilter)
47(require 'navi2ch)
48
49(defconst navi2ch-spamf-preferred-major 1)
50(defconst navi2ch-spamf-preferred-minor 10)
51
52(unless (and (boundp 'spamf-cvs-id)
53	     (string-match "\\([0-9]+\\)\\.\\([0-9]+\\)" spamf-cvs-id)
54	     (let ((major (string-to-number (match-string 1 spamf-cvs-id)))
55		   (minor (string-to-number (match-string 2 spamf-cvs-id))))
56	       (or (> major navi2ch-spamf-preferred-major)
57		   (and (= major navi2ch-spamf-preferred-major)
58			(>= minor navi2ch-spamf-preferred-minor)))))
59  (error "Use spamfilter.el revision %d.%d or later."
60	 navi2ch-spamf-preferred-major
61	 navi2ch-spamf-preferred-minor))
62
63(defvar navi2ch-article-bayesian-save-file-name
64  (expand-file-name "spamfilter" navi2ch-directory))
65
66(defvar navi2ch-article-auto-spam-register-by-filter nil
67  "non-nil $B$N>l9g!"%U%#%k%?$N7k2L$r%3!<%Q%9$K:FEPO?$9$k!#(B
68$B<+F0E*$K1#$5$l$?%l%9$N(B spam $B$C$]$5$OA}2C$7!"1#$5$l$J$+$C$?%l%9$N(B spam
69$B$C$]$5$O8:>/$9$k!#(B
70non-nil $B$N$^$^$G8mH=Dj$rJ|CV$9$k$H8m$C$?3X=,$r$7$F$7$^$&$N$GCm0U!#(B")
71
72(defvar navi2ch-article-manual-spam-ratio
73  (if navi2ch-article-auto-spam-register-by-filter 2 1)
74  "$B<jF0$G(B spam $BEPO?(B / $B2r=|$r$7$?:]$NG\N(!#(B
75navi2ch-article-message-filter-by-bayesian $B$G<+F0EPO?$9$k>l9g$O(B 2 $B0J>e(B
76$B$K$7$F$*$$$?J}$,$$$$$H;W$&!#(B")
77
78(defvar navi2ch-article-register-normal-message-as-good t
79  "non-nil $B$N>l9g!"%^!<%/$NIU$$$F$J$$%l%9$b(B good $B$H$7$FEPO?$9$k!#(B
80`navi2ch-article-register-to-corpus' $B$b;2>H!#(B")
81
82(defvar navi2ch-spamf-good-corpus
83  (make-spamf-corpus :name "navi2ch-spamf-good-corpus"
84		     :table (make-hash-table :test #'eq)
85		     :message-count 0))
86
87(defvar navi2ch-spamf-bad-corpus
88  (make-spamf-corpus :name "navi2ch-spamf-bad-corpus"
89		     :table (make-hash-table :test #'eq)
90		     :message-count 0))
91
92(defvar navi2ch-article-before-save-corpus-hook nil)
93
94(defvar navi2ch-spamf-additional-token-flag nil
95  "non-nil $B$N>l9g!"%l%9$NF|;~!"HV9fEy$r%H!<%/%s$H$7$FDI2C$9$k!#(B
96$B7P83$G$O!"%3!<%Q%9$,Bg$-$/$J$k3d$K8z2L$OGv$$$H;W$o$l$k!#(B")
97
98(dolist (map (list navi2ch-article-mode-map navi2ch-popup-article-mode-map))
99  (define-key map "\C-c\C-g"
100    'navi2ch-article-add-message-filter-by-bayesian-good)
101  (define-key map "\C-c\C-b"
102    'navi2ch-article-add-message-filter-by-bayesian-spam)
103  (define-key map "\C-c\C-s"
104    'navi2ch-article-show-spam-probability))
105
106(defsubst navi2ch-spamf-register-token (corpus token)
107  (spamf-increase-word-count corpus (spamf-intern token))
108  (incf (spamf-corpus-message-count corpus)))
109
110(defsubst navi2ch-spamf-register-good-token (token)
111  (interactive "MToken: ")
112  (navi2ch-spamf-register-token navi2ch-spamf-good-corpus token))
113
114(defsubst navi2ch-spamf-register-spam-token (token)
115  (interactive "MToken: ")
116  (navi2ch-spamf-register-token navi2ch-spamf-bad-corpus token))
117
118(defsubst navi2ch-spamf-register-token-list (corpus list)
119  (dolist (token list)
120    (spamf-increase-word-count corpus (spamf-intern token)))
121  (incf (spamf-corpus-message-count corpus)))
122
123(defsubst navi2ch-spamf-register-good-token-list (list)
124  (navi2ch-spamf-register-token-list navi2ch-spamf-good-corpus list))
125
126(defsubst navi2ch-spamf-register-spam-token-list (list)
127  (navi2ch-spamf-register-token-list navi2ch-spamf-bad-corpus list))
128
129(defun navi2ch-article-bayesian-tokenizer (alist)
130  (nconc
131   (funcall spamf-tokenize-string-function
132	    (cdr (assq 'data alist)))
133   (if navi2ch-spamf-additional-token-flag
134       (mapcar (lambda (str)
135		 (concat "date:" str))
136	       (split-string (cdr (assq 'date alist)) "[ $B!!(B]+")))
137   (if (string-match "$B"!(B[^ ]+" (cdr (assq 'name alist)))
138       (list (concat "trip:" (match-string 0 (cdr (assq 'name alist))))))
139   (let ((number (or (cdr (assq 'number alist))
140		     (navi2ch-article-get-current-number))))
141     (when (and navi2ch-spamf-additional-token-flag
142		(numberp number))
143       (list (concat "num:"  (number-to-string number)))))
144   (list
145    (concat "mail:" (cdr (assq 'mail alist)))
146    (concat "name:" (cdr (assq 'name alist))))))
147
148(defsubst navi2ch-article-tokenize-current-message ()
149  (navi2ch-article-bayesian-tokenizer
150   (navi2ch-article-get-message
151    (navi2ch-article-get-current-number))))
152
153(defun navi2ch-article-add-message-filter-by-bayesian-good ()
154  (interactive)
155  (dotimes (i navi2ch-article-manual-spam-ratio)
156    (navi2ch-spamf-register-good-token-list
157     (navi2ch-article-tokenize-current-message))))
158
159(defun navi2ch-article-add-message-filter-by-bayesian-spam ()
160  (interactive)
161  (dotimes (i navi2ch-article-manual-spam-ratio)
162    (navi2ch-spamf-register-spam-token-list
163     (navi2ch-article-tokenize-current-message))))
164
165(defsubst navi2ch-article-spam-probability (token)
166  (spamf-sum-spam-probability
167   (mapcar #'cdr (spamf-cutoff-words token
168				     spamf-cutoff-words-limit
169				     navi2ch-spamf-good-corpus
170				     navi2ch-spamf-bad-corpus))))
171
172(defun navi2ch-article-show-spam-probability (&optional prefix)
173  "$B%l%9$N(B spam $B$C$]$5$rI=<($9$k!#(B"
174  (interactive "P")
175  (let* ((token (navi2ch-article-tokenize-current-message))
176	 (prob (navi2ch-article-spam-probability token)))
177    (if prefix
178	(with-output-to-temp-buffer "*spam probability*"
179	  (princ (format "Spam probability: %f\n\n" prob))
180	  (dolist (pair (spamf-cutoff-words token
181					    spamf-cutoff-words-limit
182					    navi2ch-spamf-good-corpus
183					    navi2ch-spamf-bad-corpus))
184	    (prin1 (cons (symbol-name (car pair)) (cdr pair)))
185	    (princ "\n")))
186      (message "Spam probability: %f" prob))))
187
188(defun navi2ch-article-message-filter-by-bayesian (alist)
189  (let ((token (navi2ch-article-bayesian-tokenizer alist)))
190    (if (> (navi2ch-article-spam-probability token)
191	   spamf-spamness-limit)
192	'hide)))
193
194(defun navi2ch-article-save-corpus ()
195  (run-hooks 'navi2ch-article-before-save-corpus-hook)
196  (message "Saving corpus file...")
197  (spamf-save-corpus navi2ch-article-bayesian-save-file-name
198		     navi2ch-spamf-good-corpus
199		     navi2ch-spamf-bad-corpus)
200  (message "Saving corpus file...done"))
201
202(defun navi2ch-article-load-corpus ()
203  (message "Loading corpus file...")
204  (spamf-load-corpus navi2ch-article-bayesian-save-file-name)
205  (message "Loading corpus file...done"))
206
207(defun navi2ch-article-register-to-corpus ()
208  "$B%l%9$N%^!<%/$K1~$8$F8=:_$N%9%l$r%3!<%Q%9$KEPO?$9$k!#(B
209important $B%^!<%/$N%l%9$r(B good $B$K!"(Bhide $B%^!<%/$N%l%9$r(B bad $B$KEPO?$9$k!#(B
210`navi2ch-article-register-normal-message-as-good' $B$,(B non-nil $B$N>l9g$O(B
211$B%^!<%/$NIU$$$F$J$$%l%9$b(B good $B$KEPO?$9$k!#(B"
212  (interactive)
213  (let ((hide (cdr (assq 'hide navi2ch-article-current-article)))
214	(imp (cdr (assq 'important navi2ch-article-current-article))))
215    (dolist (x navi2ch-article-message-list)
216      (let ((num (car x))
217	    (alist (cdr x)))
218	(if (stringp alist)
219	    (setq alist (navi2ch-article-parse-message alist)))
220	(message "registering...%d" num)
221	(cond ((memq num hide)
222	       (navi2ch-spamf-register-spam-token-list
223		(navi2ch-article-bayesian-tokenizer alist)))
224	      ((or (memq num imp)
225		   navi2ch-article-register-normal-message-as-good)
226	       (navi2ch-spamf-register-good-token-list
227		(navi2ch-article-bayesian-tokenizer alist))))))
228    (message "registering...done")))
229
230;; hook and advice
231(add-hook 'navi2ch-save-status-hook #'navi2ch-article-save-corpus)
232(add-hook 'navi2ch-hook #'navi2ch-article-load-corpus)
233
234(defadvice navi2ch-article-apply-message-filters
235  (after navi2ch-article-register-spam-by-filter (alist))
236  (when navi2ch-article-auto-spam-register-by-filter
237    (let ((token (navi2ch-article-bayesian-tokenizer alist)))
238      (if (eq ad-return-value 'hide)
239	  (navi2ch-spamf-register-spam-token-list token)
240	(navi2ch-spamf-register-good-token-list token))))
241  ad-return-value)
242
243(defadvice navi2ch-article-hide-message
244  (before navi2ch-article-hide-message-as-spam)
245  (navi2ch-article-add-message-filter-by-bayesian-spam))
246
247(defadvice navi2ch-article-cancel-hide-message
248  (before navi2ch-article-cancel-hide-message-as-good)
249  (navi2ch-article-add-message-filter-by-bayesian-good))
250
251(defadvice navi2ch-article-next-message
252  (before navi2ch-article-next-message-as-good)
253  (unless navi2ch-article-hide-mode
254    (navi2ch-article-add-message-filter-by-bayesian-good)))
255
256(defconst navi2ch-spamf-advice-list
257  '((navi2ch-article-apply-message-filters
258     after
259     navi2ch-article-register-spam-by-filter)
260    (navi2ch-article-hide-message
261     before
262     navi2ch-article-hide-message-as-spam)
263    (navi2ch-article-cancel-hide-message
264     before
265     navi2ch-article-cancel-hide-message-as-good)
266    ;; (navi2ch-article-next-message
267    ;;  before
268    ;;  navi2ch-article-next-message-as-good)
269    ))
270
271(defun navi2ch-spamf-enable ()
272  "spamfilter $B$rM-8z$K$9$k!#(B
273$B%Y%$%8%"%s%U%#%k%?$K$h$k<+F0%U%#%k%?%j%s%0!"%-!<$K$h$k<+F0%9%3%"%j%s%0$,(B
274$BM-8z$K$J$k!#(B"
275  (interactive)
276  (dolist (advice navi2ch-spamf-advice-list)
277    (apply #'ad-enable-advice advice)
278    (ad-activate (car advice)))
279  (add-to-list 'navi2ch-article-message-filter-list
280	       #'navi2ch-article-message-filter-by-bayesian))
281
282(defun navi2ch-spamf-disable ()
283  "spamfilter $B$rL58z$K$9$k!#(B
284$B%Y%$%8%"%s%U%#%k%?$K$h$k<+F0%U%#%k%?%j%s%0!"%-!<$K$h$k<+F0%9%3%"%j%s%0$,(B
285$BL58z$K$J$k!#(B"
286  (interactive)
287  (dolist (advice navi2ch-spamf-advice-list)
288    (apply #'ad-disable-advice advice)
289    (ad-activate (car advice)))
290  (setq navi2ch-article-message-filter-list
291	(delq #'navi2ch-article-message-filter-by-bayesian
292	      navi2ch-article-message-filter-list)))
293
294(provide 'navi2ch-spamfilter)
295;;; navi2ch-spamfilter.el ends here
296