1;;; navi2ch-spamfilter.el --- Navi2ch interface for spamfilter.el 2 3;; Copyright (C) 2003, 2004 by Navi2ch Project 4;; Copyright (C) 2003 http://pc.2ch.net/test/read.cgi/unix/1065246418/38 5 6;; Author: http://pc.2ch.net/test/read.cgi/unix/1065246418/38 7;; Keywords: 2ch, network, spam 8 9;; This file is free software; you can redistribute it and/or modify 10;; it under the terms of the GNU General Public License as published by 11;; the Free Software Foundation; either version 2, or (at your option) 12;; any later version. 13 14;; This file is distributed in the hope that it will be useful, 15;; but WITHOUT ANY WARRANTY; without even the implied warranty of 16;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17;; GNU General Public License for more details. 18 19;; You should have received a copy of the GNU General Public License 20;; along with GNU Emacs; see the file COPYING. If not, write to 21;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, 22;; Boston, MA 02111-1307, USA. 23 24;;; Commentary: 25 26;; $B;H$$J}(B 27;; http://www.geocities.co.jp/SiliconValley-PaloAlto/7043/#spamfilter.el 28;; $B$+$i(B spamfilter $B%Q%C%1!<%8$rF~<j$7$F%$%s%9%H!<%k$7$F$*$/!#(B 29;; ~/.navi2ch/init.el $B$G(B 30;; (require 'navi2ch-spamfilter) 31;; $B$H$G$b$7$F!"(BNavi2ch $B$N<B9T;~$KFI$_9~$`$h$&$K$9$k!#(B 32;; $B$^$:$O%3!<%Q%9$N3X=,$r$9$k!#E,Ev$J%9%l$r3+$-!"(Bspam $B$r(B "d" $B$7$F(B hide 33;; $B%^!<%/$rIU$1!"(Bspam $B0J30$O(B hide $B%b!<%I$G(B "d" $B$7$F(B hide $B%^!<%/$,IU$$(B 34;; $B$F$J$$>uBV$K$9$k!#$3$N>uBV$G(B M-x navi2ch-article-register-to-corpus 35;; $B$H$7!"%^!<%/$K1~$8$F%3!<%Q%9$KEPO?$7$F$d$k!#(B 36;; $B$"$kDxEY%3!<%Q%9$,0i$C$?$i(B ~/.navi2ch/init.el $B$K(B 37;; (navi2ch-spamf-enable) $B$r2C$($F<+F0E*$K%U%#%k%?$9$k$h$&$K$9$k!#(B 38;; $B8mG'<1$,>/$J$/$J$C$?$i(B navi2ch-article-auto-spam-register-by-filter 39;; $B$r(B non-nil $B$K@_Dj$7!"%U%#%k%?$N7k2L$K1~$8$F%3!<%Q%9$K:FEPO?$9$k!#(B 40 41;;; Code: 42(defconst navi2ch-spamfilter-ident 43 "$Id$") 44 45(eval-when-compile (require 'cl)) 46(require 'spamfilter) 47(require 'navi2ch) 48 49(defconst navi2ch-spamf-preferred-major 1) 50(defconst navi2ch-spamf-preferred-minor 10) 51 52(unless (and (boundp 'spamf-cvs-id) 53 (string-match "\\([0-9]+\\)\\.\\([0-9]+\\)" spamf-cvs-id) 54 (let ((major (string-to-number (match-string 1 spamf-cvs-id))) 55 (minor (string-to-number (match-string 2 spamf-cvs-id)))) 56 (or (> major navi2ch-spamf-preferred-major) 57 (and (= major navi2ch-spamf-preferred-major) 58 (>= minor navi2ch-spamf-preferred-minor))))) 59 (error "Use spamfilter.el revision %d.%d or later." 60 navi2ch-spamf-preferred-major 61 navi2ch-spamf-preferred-minor)) 62 63(defvar navi2ch-article-bayesian-save-file-name 64 (expand-file-name "spamfilter" navi2ch-directory)) 65 66(defvar navi2ch-article-auto-spam-register-by-filter nil 67 "non-nil $B$N>l9g!"%U%#%k%?$N7k2L$r%3!<%Q%9$K:FEPO?$9$k!#(B 68$B<+F0E*$K1#$5$l$?%l%9$N(B spam $B$C$]$5$OA}2C$7!"1#$5$l$J$+$C$?%l%9$N(B spam 69$B$C$]$5$O8:>/$9$k!#(B 70non-nil $B$N$^$^$G8mH=Dj$rJ|CV$9$k$H8m$C$?3X=,$r$7$F$7$^$&$N$GCm0U!#(B") 71 72(defvar navi2ch-article-manual-spam-ratio 73 (if navi2ch-article-auto-spam-register-by-filter 2 1) 74 "$B<jF0$G(B spam $BEPO?(B / $B2r=|$r$7$?:]$NG\N(!#(B 75navi2ch-article-message-filter-by-bayesian $B$G<+F0EPO?$9$k>l9g$O(B 2 $B0J>e(B 76$B$K$7$F$*$$$?J}$,$$$$$H;W$&!#(B") 77 78(defvar navi2ch-article-register-normal-message-as-good t 79 "non-nil $B$N>l9g!"%^!<%/$NIU$$$F$J$$%l%9$b(B good $B$H$7$FEPO?$9$k!#(B 80`navi2ch-article-register-to-corpus' $B$b;2>H!#(B") 81 82(defvar navi2ch-spamf-good-corpus 83 (make-spamf-corpus :name "navi2ch-spamf-good-corpus" 84 :table (make-hash-table :test #'eq) 85 :message-count 0)) 86 87(defvar navi2ch-spamf-bad-corpus 88 (make-spamf-corpus :name "navi2ch-spamf-bad-corpus" 89 :table (make-hash-table :test #'eq) 90 :message-count 0)) 91 92(defvar navi2ch-article-before-save-corpus-hook nil) 93 94(defvar navi2ch-spamf-additional-token-flag nil 95 "non-nil $B$N>l9g!"%l%9$NF|;~!"HV9fEy$r%H!<%/%s$H$7$FDI2C$9$k!#(B 96$B7P83$G$O!"%3!<%Q%9$,Bg$-$/$J$k3d$K8z2L$OGv$$$H;W$o$l$k!#(B") 97 98(dolist (map (list navi2ch-article-mode-map navi2ch-popup-article-mode-map)) 99 (define-key map "\C-c\C-g" 100 'navi2ch-article-add-message-filter-by-bayesian-good) 101 (define-key map "\C-c\C-b" 102 'navi2ch-article-add-message-filter-by-bayesian-spam) 103 (define-key map "\C-c\C-s" 104 'navi2ch-article-show-spam-probability)) 105 106(defsubst navi2ch-spamf-register-token (corpus token) 107 (spamf-increase-word-count corpus (spamf-intern token)) 108 (incf (spamf-corpus-message-count corpus))) 109 110(defsubst navi2ch-spamf-register-good-token (token) 111 (interactive "MToken: ") 112 (navi2ch-spamf-register-token navi2ch-spamf-good-corpus token)) 113 114(defsubst navi2ch-spamf-register-spam-token (token) 115 (interactive "MToken: ") 116 (navi2ch-spamf-register-token navi2ch-spamf-bad-corpus token)) 117 118(defsubst navi2ch-spamf-register-token-list (corpus list) 119 (dolist (token list) 120 (spamf-increase-word-count corpus (spamf-intern token))) 121 (incf (spamf-corpus-message-count corpus))) 122 123(defsubst navi2ch-spamf-register-good-token-list (list) 124 (navi2ch-spamf-register-token-list navi2ch-spamf-good-corpus list)) 125 126(defsubst navi2ch-spamf-register-spam-token-list (list) 127 (navi2ch-spamf-register-token-list navi2ch-spamf-bad-corpus list)) 128 129(defun navi2ch-article-bayesian-tokenizer (alist) 130 (nconc 131 (funcall spamf-tokenize-string-function 132 (cdr (assq 'data alist))) 133 (if navi2ch-spamf-additional-token-flag 134 (mapcar (lambda (str) 135 (concat "date:" str)) 136 (split-string (cdr (assq 'date alist)) "[ $B!!(B]+"))) 137 (if (string-match "$B"!(B[^ ]+" (cdr (assq 'name alist))) 138 (list (concat "trip:" (match-string 0 (cdr (assq 'name alist)))))) 139 (let ((number (or (cdr (assq 'number alist)) 140 (navi2ch-article-get-current-number)))) 141 (when (and navi2ch-spamf-additional-token-flag 142 (numberp number)) 143 (list (concat "num:" (number-to-string number))))) 144 (list 145 (concat "mail:" (cdr (assq 'mail alist))) 146 (concat "name:" (cdr (assq 'name alist)))))) 147 148(defsubst navi2ch-article-tokenize-current-message () 149 (navi2ch-article-bayesian-tokenizer 150 (navi2ch-article-get-message 151 (navi2ch-article-get-current-number)))) 152 153(defun navi2ch-article-add-message-filter-by-bayesian-good () 154 (interactive) 155 (dotimes (i navi2ch-article-manual-spam-ratio) 156 (navi2ch-spamf-register-good-token-list 157 (navi2ch-article-tokenize-current-message)))) 158 159(defun navi2ch-article-add-message-filter-by-bayesian-spam () 160 (interactive) 161 (dotimes (i navi2ch-article-manual-spam-ratio) 162 (navi2ch-spamf-register-spam-token-list 163 (navi2ch-article-tokenize-current-message)))) 164 165(defsubst navi2ch-article-spam-probability (token) 166 (spamf-sum-spam-probability 167 (mapcar #'cdr (spamf-cutoff-words token 168 spamf-cutoff-words-limit 169 navi2ch-spamf-good-corpus 170 navi2ch-spamf-bad-corpus)))) 171 172(defun navi2ch-article-show-spam-probability (&optional prefix) 173 "$B%l%9$N(B spam $B$C$]$5$rI=<($9$k!#(B" 174 (interactive "P") 175 (let* ((token (navi2ch-article-tokenize-current-message)) 176 (prob (navi2ch-article-spam-probability token))) 177 (if prefix 178 (with-output-to-temp-buffer "*spam probability*" 179 (princ (format "Spam probability: %f\n\n" prob)) 180 (dolist (pair (spamf-cutoff-words token 181 spamf-cutoff-words-limit 182 navi2ch-spamf-good-corpus 183 navi2ch-spamf-bad-corpus)) 184 (prin1 (cons (symbol-name (car pair)) (cdr pair))) 185 (princ "\n"))) 186 (message "Spam probability: %f" prob)))) 187 188(defun navi2ch-article-message-filter-by-bayesian (alist) 189 (let ((token (navi2ch-article-bayesian-tokenizer alist))) 190 (if (> (navi2ch-article-spam-probability token) 191 spamf-spamness-limit) 192 'hide))) 193 194(defun navi2ch-article-save-corpus () 195 (run-hooks 'navi2ch-article-before-save-corpus-hook) 196 (message "Saving corpus file...") 197 (spamf-save-corpus navi2ch-article-bayesian-save-file-name 198 navi2ch-spamf-good-corpus 199 navi2ch-spamf-bad-corpus) 200 (message "Saving corpus file...done")) 201 202(defun navi2ch-article-load-corpus () 203 (message "Loading corpus file...") 204 (spamf-load-corpus navi2ch-article-bayesian-save-file-name) 205 (message "Loading corpus file...done")) 206 207(defun navi2ch-article-register-to-corpus () 208 "$B%l%9$N%^!<%/$K1~$8$F8=:_$N%9%l$r%3!<%Q%9$KEPO?$9$k!#(B 209important $B%^!<%/$N%l%9$r(B good $B$K!"(Bhide $B%^!<%/$N%l%9$r(B bad $B$KEPO?$9$k!#(B 210`navi2ch-article-register-normal-message-as-good' $B$,(B non-nil $B$N>l9g$O(B 211$B%^!<%/$NIU$$$F$J$$%l%9$b(B good $B$KEPO?$9$k!#(B" 212 (interactive) 213 (let ((hide (cdr (assq 'hide navi2ch-article-current-article))) 214 (imp (cdr (assq 'important navi2ch-article-current-article)))) 215 (dolist (x navi2ch-article-message-list) 216 (let ((num (car x)) 217 (alist (cdr x))) 218 (if (stringp alist) 219 (setq alist (navi2ch-article-parse-message alist))) 220 (message "registering...%d" num) 221 (cond ((memq num hide) 222 (navi2ch-spamf-register-spam-token-list 223 (navi2ch-article-bayesian-tokenizer alist))) 224 ((or (memq num imp) 225 navi2ch-article-register-normal-message-as-good) 226 (navi2ch-spamf-register-good-token-list 227 (navi2ch-article-bayesian-tokenizer alist)))))) 228 (message "registering...done"))) 229 230;; hook and advice 231(add-hook 'navi2ch-save-status-hook #'navi2ch-article-save-corpus) 232(add-hook 'navi2ch-hook #'navi2ch-article-load-corpus) 233 234(defadvice navi2ch-article-apply-message-filters 235 (after navi2ch-article-register-spam-by-filter (alist)) 236 (when navi2ch-article-auto-spam-register-by-filter 237 (let ((token (navi2ch-article-bayesian-tokenizer alist))) 238 (if (eq ad-return-value 'hide) 239 (navi2ch-spamf-register-spam-token-list token) 240 (navi2ch-spamf-register-good-token-list token)))) 241 ad-return-value) 242 243(defadvice navi2ch-article-hide-message 244 (before navi2ch-article-hide-message-as-spam) 245 (navi2ch-article-add-message-filter-by-bayesian-spam)) 246 247(defadvice navi2ch-article-cancel-hide-message 248 (before navi2ch-article-cancel-hide-message-as-good) 249 (navi2ch-article-add-message-filter-by-bayesian-good)) 250 251(defadvice navi2ch-article-next-message 252 (before navi2ch-article-next-message-as-good) 253 (unless navi2ch-article-hide-mode 254 (navi2ch-article-add-message-filter-by-bayesian-good))) 255 256(defconst navi2ch-spamf-advice-list 257 '((navi2ch-article-apply-message-filters 258 after 259 navi2ch-article-register-spam-by-filter) 260 (navi2ch-article-hide-message 261 before 262 navi2ch-article-hide-message-as-spam) 263 (navi2ch-article-cancel-hide-message 264 before 265 navi2ch-article-cancel-hide-message-as-good) 266 ;; (navi2ch-article-next-message 267 ;; before 268 ;; navi2ch-article-next-message-as-good) 269 )) 270 271(defun navi2ch-spamf-enable () 272 "spamfilter $B$rM-8z$K$9$k!#(B 273$B%Y%$%8%"%s%U%#%k%?$K$h$k<+F0%U%#%k%?%j%s%0!"%-!<$K$h$k<+F0%9%3%"%j%s%0$,(B 274$BM-8z$K$J$k!#(B" 275 (interactive) 276 (dolist (advice navi2ch-spamf-advice-list) 277 (apply #'ad-enable-advice advice) 278 (ad-activate (car advice))) 279 (add-to-list 'navi2ch-article-message-filter-list 280 #'navi2ch-article-message-filter-by-bayesian)) 281 282(defun navi2ch-spamf-disable () 283 "spamfilter $B$rL58z$K$9$k!#(B 284$B%Y%$%8%"%s%U%#%k%?$K$h$k<+F0%U%#%k%?%j%s%0!"%-!<$K$h$k<+F0%9%3%"%j%s%0$,(B 285$BL58z$K$J$k!#(B" 286 (interactive) 287 (dolist (advice navi2ch-spamf-advice-list) 288 (apply #'ad-disable-advice advice) 289 (ad-activate (car advice))) 290 (setq navi2ch-article-message-filter-list 291 (delq #'navi2ch-article-message-filter-by-bayesian 292 navi2ch-article-message-filter-list))) 293 294(provide 'navi2ch-spamfilter) 295;;; navi2ch-spamfilter.el ends here 296