1;;; sb-tcup.el --- shimbun backend for www.tcup.com
2
3;; Copyright (C) 2001, 2002, 2005, 2019 Yuuichi Teranishi <teranisi@gohome.org>
4
5;; Author: Yuuichi Teranishi <teranisi@gohome.org>
6;; Keywords: news
7
8;; This file is a part of shimbun.
9
10;; This program is free software; you can redistribute it and/or modify
11;; it under the terms of the GNU General Public License as published by
12;; the Free Software Foundation; either version 2, or (at your option)
13;; any later version.
14
15;; This program is distributed in the hope that it will be useful,
16;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18;; GNU General Public License for more details.
19
20;; You should have received a copy of the GNU General Public License
21;; along with this program; see the file COPYING.  If not, write to
22;; the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
23;; Boston, MA 02110-1301, USA.
24
25;;; Commentary:
26
27;; Original was http://homepage2.nifty.com/strlcat/nnshimbun-tcup.el
28
29;;; Code:
30
31(require 'shimbun)
32
33(eval-and-compile
34  (luna-define-class shimbun-tcup (shimbun) (content-hash))
35  (luna-define-internal-accessors 'shimbun-tcup))
36
37(defvar shimbun-tcup-group-alist
38  '(("yutopia" "http://6116.teacup.com/yutopia/bbs2")
39    ("meadow" "http://6629.teacup.com/yutopia/bbs2")
40    ("skk" "http://6718.teacup.com/yutopia/bbs2"))
41  "An alist of tcup bbs shimbun group definition.
42Each element looks like:
43
44 (NAME URL SUBJECT-REGEXP FROM-START-REGEXP DATE-START-REGEXP
45           BODY-START-REGEXP BODY-END-REGEXP).
46
47Each element have a following default value:
48
49SUBJECT-REGEXP: `shimbun-tcup-subject-regexp'
50FROM-START-REGEXP: `shimbun-tcup-from-start-regexp'
51DATE-START-REGEXP: `shimbun-tcup-date-start-regexp'
52BODY-START-REGEXP: `shimbun-tcup-body-start-regexp'
53BODY-END-REGEXP: `shimbun-tcup-body-end-regexp'")
54
55(defvar shimbun-tcup-subject-regexp
56  (let ((s0 "[\t\n ]*")
57	(s1 "[\t\n ]+"))
58    (concat "<font" s1 "size=\"?4\"?[^>]*>" s0 "<b>" s0 "\\([^<]+\\)"
59	    s0 "</b>" s0 "</font>"))
60  "Default regexp for subject.
61 This have a one parenthesized expression match for subject.")
62(defvar shimbun-tcup-from-start-regexp "投稿者:[\t\n ]*"
63  "Default regexp for from start string.")
64(defvar shimbun-tcup-date-start-regexp "投稿日:[\t\n ]*"
65  "Default regexp for date start string.")
66(defvar shimbun-tcup-body-start-regexp
67  "<blockquote>\\([\t\n ]*<[^>]+>\\)*[\t\n ]*"
68  "Default regexp for body start string.")
69(defvar shimbun-tcup-body-end-regexp
70  "[\t\n ]*\\(<[^>]+>[\t\n ]*\\)*</blockquote>"
71  "Default regexp for body end string.")
72
73(defvar shimbun-tcup-content-hash-length 31)
74(defvar shimbun-tcup-x-face-alist
75  '(("yutopia" . "X-Face: ,Em61:vG$KP!G`Q]ZsO\\@&g`VXE-kicRnKs\"Wd'ZSF\
76Q*O'i6OJ2(U$x6/gytz:<jCUn+&*e\n 8$BTg.~1,7OS%tjW#ty4Cp7x%6SD;aNfn(ugAN\
77CC]q(-foA:@ULvLAJz_oeP1@a~C+Bxc3I\\+^W<%n\n y,z@:VoRoJXl'E`kX]3i1m;+I`")
78    ("meadow" . "X-Face: xo];SyM=kg&iWSACakk9gGth>s`0KE!+n9}l[&W\
79SG!QUj`15/+hzWfCvZ\\`R!i<c8{QI=hw\n Ez}CH&IOYewgffOCh5jTPWx/ehA\\\
80:Qe[;P>8re^8`\\8omn]t;P~wC{X%Y$q/f!zC%IG1RVFj~Jf`c6\n t98[2O!+vg\
81w!!gb8HQ,s0F*e6f*xs\"HR}{':>)Q_|+67gobo%?|n_SdjfzLI6kJ(T;q{+?p?")))
82
83(luna-define-method initialize-instance :after ((shimbun shimbun-tcup)
84						&rest init-args)
85  (shimbun-tcup-set-content-hash-internal
86   shimbun
87   (make-vector shimbun-tcup-content-hash-length 0))
88  shimbun)
89
90(luna-define-method shimbun-groups ((shimbun shimbun-tcup))
91  (mapcar 'car shimbun-tcup-group-alist))
92
93(luna-define-method shimbun-index-url ((shimbun shimbun-tcup))
94  (cadr (assoc (shimbun-current-group-internal shimbun)
95	       shimbun-tcup-group-alist)))
96
97(defun shimbun-tcup-get-group-key (group)
98  "Returns (hostname . board-id)"
99  (let ((url (cadr (assoc group
100			  shimbun-tcup-group-alist))))
101    (cond
102     ((string-match "\\(\\`\\|://\\)\\([^:/]+\\)/\\([^/]+\\)/bbs"
103		    url)		; "http://6718.teacup.com/yutopia/bbs"
104      (cons (match-string 2 url)	; "6718.teacup.com"
105	    (match-string 3 url)))	; "yutopia"
106     ((string-match "\\(\\`\\|://\\)www[^/]+/\\([0-9]+\\)/\\([^/]+\\)\\.html"
107		    url)		; "http://www67.tcup.com/6718/yutopia.html"
108      (cons (concat
109	     (match-string 2 url)	; "6718"
110	     ".teacup.com")
111	    (match-string 3 url)))	; "yutopia"
112     (t
113      nil))))
114
115(defun shimbun-tcup-stime-to-time (stime)
116  (let (a b c)
117    (setq a (length stime))
118    (setq b (- (string-to-number (substring stime 0 (- a 4))) 9))
119    (setq c (+ (string-to-number (substring stime (- a 4) a))
120	       (* (% b 4096) 10000)
121	       90000))
122    (list (+ (* (/ b 4096) 625) (/ c 65536)) (% c 65536))))
123
124(defun shimbun-tcup-make-time ()
125  (let (yr mon day hr min sec dow tm)
126    (looking-at
127     "\\([ 0-9]+\\)月\\([ 0-9]+\\)日(\\(.\\))\\([ 0-9]+\\)時\\([ 0-9]+\\)分\\([ 0-9]+\\)秒")
128    (setq mon (string-to-number (match-string 1))
129	  day (string-to-number (match-string 2))
130	  dow (match-string 3)
131	  hr  (string-to-number (match-string 4))
132	  min (string-to-number (match-string 5))
133	  sec (string-to-number (match-string 6)))
134    (setq dow (string-match dow "日月火水木金土"))
135    (setq yr (nth 5 (decode-time (current-time))))
136    (setq tm (encode-time sec min hr day mon yr))
137    (while (not (eq dow (nth 6 (decode-time tm))))
138      (setq yr (1- yr))
139      (setq tm (encode-time sec min hr day mon yr)))
140    tm))
141
142(defun shimbun-tcup-make-id (stime group)
143  (let ((keys (shimbun-tcup-get-group-key group)))
144    (format "<%s.%s@%s>"		; "<1576232885.yutopia@6718.teacup.com>"
145	    stime (cdr keys) (car keys))))
146
147(luna-define-method shimbun-get-headers ((shimbun shimbun-tcup)
148					 &optional range)
149;;;<DEBUG>
150;;  (shimbun-tcup-get-headers shimbun range))
151;;
152;;(defun shimbun-tcup-get-headers (shimbun range)
153;;;</DEBUG>
154  (let* ((case-fold-search t)
155	 (group (shimbun-current-group-internal shimbun))
156	 (param (assoc group shimbun-tcup-group-alist))
157	 (subject-regexp (or (nth 2 param) shimbun-tcup-subject-regexp))
158	 (from-regexp (or (nth 3 param) shimbun-tcup-from-start-regexp))
159	 (date-regexp (or (nth 4 param) shimbun-tcup-date-start-regexp))
160	 (body-st-regexp (or (nth 5 param) shimbun-tcup-body-start-regexp))
161	 (body-end-regexp (or (nth 6 param) shimbun-tcup-body-end-regexp))
162	 (index-url (shimbun-index-url shimbun))
163	 headers from subject date id url stime st body)
164    (catch 'stop
165      (while t
166	(while (re-search-forward subject-regexp nil t)
167	  (setq subject (match-string 1))
168	  (re-search-forward from-regexp)
169	  (setq from
170		(cond
171		 ((looking-at "<b><a href=\"mailto:\\([^\"]+\\)\">\\([^<]+\\)<")
172		  (concat (match-string 2) " <" (match-string 1) ">"))
173		 ((looking-at "\\(<[^>]+>[\t\n ]*\\)+\\([^<]+\\)[\t\n ]*<")
174		  (match-string 2))
175		 (t "(none)")))
176	  (re-search-forward date-regexp nil t)
177	  (cond ((looking-at "[^,]+, Time: \\([^ ]+\\) ")
178		 (setq stime (shimbun-tcup-stime-to-time (match-string 1)))
179		 ;; The clock has been advanced for nine hours in the
180		 ;; level-1 bulletin boards.
181		 (when (string-match "/bbs/?\\'\\|/bbs\\?" index-url)
182		   (let ((ms (car stime))
183			 (ls (cadr stime)))
184		     (setq ls (- ls 32400))
185		     (cond ((< ls 0)
186			    (setq stime (list (1- ms) (+ ls 65536))))
187			   ((>= ls 65536)
188			    (setq stime (list (1+ ms) (- ls 65536))))
189			   (t
190			    (setq stime (list ms ls)))))))
191		((looking-at "\\([^<]+\\)<")
192		 (setq stime (shimbun-tcup-make-time)))
193		(t
194		 (setq stime (current-time))))
195	  (let ((system-time-locale "C"))
196	    (setq date (format-time-string "%d %b %Y %T %z" stime)))
197	  (setq stime (format "%05d%05d" (car stime) (cadr stime)))
198	  (setq id (shimbun-tcup-make-id stime group))
199	  (when (shimbun-search-id shimbun id)
200	    (throw 'stop nil))
201	  (re-search-forward body-st-regexp)
202	  (setq st (match-end 0))
203	  (re-search-forward body-end-regexp)
204	  (setq body (concat (buffer-substring st (match-beginning 0)) "\n"))
205	  (forward-line 1)
206	  (setq url
207		(if (looking-at "<a[^>]+>[^<]+</a>")
208		    (concat (match-string 0) "\n<p>\n")
209		  ""))
210	  (set (intern stime (shimbun-tcup-content-hash-internal shimbun))
211	       (concat body "<p>\n" url))
212	  (push (shimbun-make-header
213		 0
214		 (shimbun-mime-encode-string subject)
215		 (shimbun-mime-encode-string from)
216		 date id "" 0 0 stime)
217		headers))
218	(goto-char (point-min))
219	(if (re-search-forward "<a[\t\n ]+href=\"\\([^\"]+\\)\"[^>]*>[\t\n ]*\
220次のページ[\t\n ]*</a>"
221			       nil t)
222	    (progn
223	      (shimbun-retrieve-url
224	       (prog1
225		   (shimbun-expand-url (match-string 1) index-url)
226		 (erase-buffer))
227	       t)
228	      (goto-char (point-min)))
229	  (throw 'stop nil))))
230    headers))
231
232(luna-define-method shimbun-article ((shimbun shimbun-tcup) header
233				     &optional outbuf)
234;;;<DEBUG>
235;;  (shimbun-tcup-article shimbun header outbuf))
236;;
237;;(defun shimbun-tcup-article (shimbun header outbuf)
238;;;</DEBUG>
239  (when (shimbun-current-group-internal shimbun)
240    (with-current-buffer (or outbuf (current-buffer))
241      (insert
242       (with-temp-buffer
243	 (let ((sym (intern-soft (shimbun-header-xref header)
244				 (shimbun-tcup-content-hash-internal
245				  shimbun))))
246	   (if (boundp sym)
247	       (insert (or (symbol-value sym) "")))
248	   (goto-char (point-min))
249	   (shimbun-header-insert shimbun header)
250	   (insert "Content-Type: " "text/html"
251		   "; charset=ISO-2022-JP\n"
252		   "MIME-Version: 1.0\n")
253	   (insert "\n")
254	   (encode-coding-string
255	    (buffer-string)
256	    (mime-charset-to-coding-system "ISO-2022-JP"))))))))
257
258(provide 'sb-tcup)
259
260;;; sb-tcup.el ends here
261