1 /*
2     cache.* - nntp header cache code
3     Copyright (C) 1999-2004  Matthew Mueller <donut AT dakotacom.net>
4 
5     This program is free software; you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation; either version 2 of the License, or
8     (at your option) any later version.
9 
10     This program is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13     GNU General Public License for more details.
14 
15     You should have received a copy of the GNU General Public License
16     along with this program; if not, write to the Free Software
17     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19 #ifndef _CACHE_H_
20 #define _CACHE_H_
21 #ifdef HAVE_CONFIG_H
22 #include "config.h"
23 #endif
24 #include <sys/types.h>
25 #include <ctype.h>
26 #include <string>
27 #include <map>
28 #include "_hash_map.h"
29 #include <list>
30 #include <vector>
31 #include "file.h"
32 #include "log.h"
33 
34 #include "stlhelp.h"
35 #include "etree.h"
36 
37 #include "rcount.h"
38 
39 #include "server.h"
40 
41 #include "nrange.h"
42 #include "dupe_file.h"
43 
44 #define CACHE_VERSION "NGET5"
45 #define CACHE_SORTVER (1)
46 
47 typedef vector<string> t_references;
48 
49 typedef unsigned long t_id;
50 
51 class c_nntp_file_base {
52 	public:
53 		int req;
54 		int partoff;
55 		string author;
56 		string subject;
57 		t_references references;
c_nntp_file_base(int r,int po,const char * a,const char * s)58 		c_nntp_file_base(int r, int po, const char *a, const char *s): req(r), partoff(po), author(a), subject(s) {}
59 //		c_nntp_file_base(t_id fi, int r, int po, const string &a, const string &s, const t_references& refs): fileid(fi), req(r), partoff(po), author(a), subject(s), references(refs) {}
60 //		c_nntp_file_base(const c_nntp_file_base &fb): fileid(fb.fileid), req(fb.req), partoff(fb.partoff), author(fb.author), subject(fb.subject), references(fb.references) {}
c_nntp_file_base()61 		c_nntp_file_base() {}
62 		bool operator< (const c_nntp_file_base &fb) const {
63 			if (req<fb.req) return true;
64 			if (req>fb.req) return false;
65 			if (partoff<fb.partoff) return true;
66 			if (partoff>fb.partoff) return false;
67 			if (author<fb.author) return true;
68 			if (author>fb.author) return false;
69 			if (subject<fb.subject) return true;
70 			if (subject>fb.subject) return false;
71 			if (references<fb.references) return true;
72 			//if (references>fb.references) return false;
73 			return false;
74 		}
75 		bool operator== (const c_nntp_file_base &fb) const {
76 			return (req==fb.req && partoff==fb.partoff && author==fb.author && subject==fb.subject && references==fb.references);
77 		}
78 };
79 
80 class c_nntp_header : public c_nntp_file_base {
81 	private:
82 		int parsepnum(const char *str,const char *soff);
83 	public:
84 		ulong serverid;
85 		c_group_info::ptr group;
86 		int partnum;
87 		int tailoff;
88 		ulong articlenum;
89 		time_t date;
90 		ulong bytes,lines;
91 		string messageid;
92 		void set(char *s,const char *a,ulong anum,time_t d,ulong b,ulong l,const char *mid,char *refstr);//note: modifies refstr
93 //		c_nntp_header(char *s,const char *a,ulong anum,time_t d,ulong b,ulong l);
94 };
95 
96 
97 class c_nntp_server_article {
98 	public:
99 		ulong serverid;
100 		c_group_info::ptr group;
101 		ulong articlenum;
102 		ulong bytes,lines;
103 		c_nntp_server_article(ulong serverid,const c_group_info::ptr &group,ulong articlenum,ulong bytes,ulong lines);
104 };
105 typedef vector<c_nntp_server_article*> t_nntp_server_articles;
106 typedef pair<c_nntp_server_article*,c_server::ptr> t_real_server_article;
107 typedef multimap<float,t_real_server_article,greater<float> > t_nntp_server_articles_prioritized;
108 class c_nntp_part {
109 	public:
110 		int partnum;
111 		t_nntp_server_articles articles;
112 		time_t date;
113 //		ulong apxbytes,apxlines;//approximate/hrmy.
114 		string messageid;
get_best_sa(void)115 		c_nntp_server_article *get_best_sa(void) const {
116 			t_nntp_server_articles::const_iterator nsai(articles.begin());
117 			c_nntp_server_article *sa;
118 			c_nntp_server_article *highest_sa=NULL;
119 			float highprio=-10000.0,f;
120 			for (;nsai!=articles.end();++nsai) {
121 				sa=(*nsai);
122 				for (t_server_list_range servers = nconfig.getservers(sa->serverid); servers.first!=servers.second; ++servers.first)
123 					if ((f=nconfig.trustsizes->getserverpriority(servers.first->second)) > highprio){
124 						highest_sa=sa;
125 						highprio=f;
126 					}
127 			}
128 			return highest_sa;
129 		}
bytes(void)130 		ulong bytes(void) const {return get_best_sa()->bytes;}
lines(void)131 		ulong lines(void) const {return get_best_sa()->lines;}
c_nntp_part(int pn,time_t d,const string & mid)132 		c_nntp_part(int pn, time_t d,const string &mid):partnum(pn),date(d),messageid(mid){};
133 		c_nntp_part(c_nntp_header *h);
134 		~c_nntp_part();
135 		void addserverarticle(c_nntp_header *h);
136 		c_nntp_server_article *getserverarticle(ulong serverid);
137 };
138 
139 
140 class c_nntp_file_parts : public vector<c_nntp_part*>
141 {
142 public:
haspart(int partnum)143 	bool haspart(int partnum) const {
144 		const_iterator i = lower_bound(partnum);
145 		return (i != end() && (*i)->partnum == partnum);
146 	}
147 
part(int partnum)148 	c_nntp_part* part(int partnum) {
149 		iterator i = lower_bound(partnum);
150 		if (i != end() && (*i)->partnum == partnum)
151 			return *i;
152 		else
153 			return NULL;
154 	}
155 
addpart(c_nntp_part * p)156 	void addpart(c_nntp_part* p) {
157 		iterator i = lower_bound(p->partnum);
158 		insert(i, p);
159 	}
160 
removepart(int partnum)161 	void removepart(int partnum) {
162 		iterator i = lower_bound(partnum);
163 		if (i != end() && (*i)->partnum == partnum)
164 			erase(i);
165 	}
166 
167 private:
168 	iterator lower_bound(int partnum);
169 
lower_bound(int partnum)170 	const_iterator lower_bound(int partnum) const
171 	{
172 		return ((c_nntp_file_parts*)this)->lower_bound(partnum);
173 	}
174 };
175 
176 //#define FILEFLAG_READ 1
177 
178 typedef map<ulong,int> t_server_have_map;
179 
180 class c_nntp_file : public c_nntp_file_base, public c_refcounted<c_nntp_file>{
181 	public:
182 		c_nntp_file_parts parts;
183 		int have;
184 //		ulong bytes,lines;
185 		ulong flags;
186 		int tailoff;
187 		time_t update;
188 		void addpart(c_nntp_part *p);
189 		void addnewpart(c_nntp_part *p);
190 		void mergefile(c_nntp_file::ptr &f);
is_a_reply(void)191 		bool is_a_reply(void) const {return (!references.empty()) || (subject.size()>=4 && tolower(subject[0])=='r' && tolower(subject[1])=='e' && subject[2]==':' && subject[3]==' ');}
maybe_a_textreply(void)192 		bool maybe_a_textreply(void) const {return (have<=1 && is_a_reply() && lines()<1000);}
maybe_a_zerofile(void)193 		bool maybe_a_zerofile(void) const {return (req==0) && (partoff>=0) && (have==1);}
maybe_a_textpost(void)194 		bool maybe_a_textpost(void) const {return maybe_a_zerofile() || maybe_a_textreply();}
iscomplete(void)195 		bool iscomplete(void) const {return (have>=req) || maybe_a_textreply();}
196 		void get_server_have_map(t_server_have_map &have_map) const;
197 //		ulong banum(void){assert(!parts.empty());return (*parts.begin()).second->articlenum;}
bamid(void)198 		string bamid(void) const {assert(!parts.empty());return (*parts.begin())->messageid;}
badate(void)199 		time_t badate(void) const {assert(!parts.empty());return (*parts.begin())->date;}
200 #define HAPPYSIZEFUNC2(T)		ulong T(void) const {\
201 			ulong b=0;\
202 			c_nntp_file_parts::const_iterator nfpi(parts.begin());\
203 			for (;nfpi!=parts.end();++nfpi){\
204 				b+=(*nfpi)->T();\
205 			}\
206 			return b;\
207 		}
208 		t_id getfileid(void) const;
209 		HAPPYSIZEFUNC2(bytes)
210 		HAPPYSIZEFUNC2(lines)
211 		c_nntp_file(c_nntp_header *h);
212 		c_nntp_file(int r,ulong f,const char *s,const char *a,int po,int to,time_t update);
213 		virtual ~c_nntp_file();
214 };
215 
216 
217 struct ltfb {
operatorltfb218 	bool operator()(const c_nntp_file_base *fb1, const c_nntp_file_base *fb2) const {
219 		return *fb1 < *fb2;
220 	}
221 };
222 typedef multimap<c_nntp_file_base*, c_nntp_file::ptr, ltfb> t_nntp_files;
223 
224 class c_nntp_file_retr : public c_refcounted<c_nntp_file_retr>{
225 	public:
226 		string path;
227 		string temppath;
228 		c_nntp_file::ptr file;
229 		bool dupecheck;
c_nntp_file_retr(const string & p,const string & tp,const c_nntp_file::ptr & f,bool dupec)230 		c_nntp_file_retr(const string &p, const string &tp, const c_nntp_file::ptr &f, bool dupec):path(p),temppath(tp),file(f),dupecheck(dupec){}
231 };
232 typedef multimap<time_t,c_nntp_file_retr::ptr> t_nntp_files_u;
233 class c_nntp_files_u {
234 	public:
235 		uint_fast64_t bytes, lines;
236 		t_nntp_files_u files;
237 		void addfile(c_nntp_file::ptr f, const string &path, const string &temppath, bool dupecheck=true) {
238 			files.insert(t_nntp_files_u::value_type(f->badate(), new c_nntp_file_retr(path,temppath,f,dupecheck)));
239 			lines+=f->lines();
240 			bytes+=f->bytes();
241 		}
c_nntp_files_u(void)242 		c_nntp_files_u(void):bytes(0),lines(0){}
243 		~c_nntp_files_u();
244 };
245 
246 
247 class c_nntp_server_info {
248 	public:
249 		ulong serverid;
250 		ulong high,low,num;
reset(void)251 		void reset(void){high=0;low=ULONG_MAX;num=0;}
c_nntp_server_info(ulong sid)252 		c_nntp_server_info(ulong sid):serverid(sid){reset();}
c_nntp_server_info(ulong sid,ulong hig,ulong lo,ulong nu)253 		c_nntp_server_info(ulong sid,ulong hig,ulong lo,ulong nu):serverid(sid),high(hig),low(lo),num(nu){}
254 };
255 typedef map<ulong,c_nntp_server_info> t_nntp_server_info;
256 
257 class c_message_state : public c_refcounted<c_message_state>{
258 	public:
259 		string messageid;
260 		time_t date_added,date_removed;
c_message_state(string mid,time_t da,time_t dr)261 		c_message_state(string mid,time_t da,time_t dr):messageid(mid),date_added(da),date_removed(dr){}
262 };
263 
264 #ifdef HAVE_WORKING_HASH_MAP
265 typedef hash_map<const char*, c_message_state::ptr, std::hash<const char*>, eqstr> t_message_state_list;
266 #else
267 typedef map<const char*, c_message_state::ptr, ltstr> t_message_state_list;
268 #endif
269 
270 //hrm.
271 #define TIME_T_MAX INT_MAX
272 #define TIME_T_MAX1 (TIME_T_MAX-1)
273 #define TIME_T_DEAD TIME_T_MAX
274 
275 class c_mid_info {
276 	protected:
277 		string file;
278 		int changed;
279 		t_message_state_list states;
insert_full(string mid,time_t a,time_t d)280 		void insert_full(string mid, time_t a, time_t d){
281 			t_message_state_list::iterator i=states.find(mid.c_str());
282 			c_message_state::ptr s;
283 			if (i!=states.end()){
284 				s=(*i).second;
285 //				if ((*i).second->changed)return;/arrrr
286 				if (d==TIME_T_MAX1 && s->date_removed!=TIME_T_MAX1) return;//ours has been deleted but not what we merging
287 				if (s->date_removed!=TIME_T_MAX1 && s->date_removed > d) return; //both are deleted, but ours has more recent time?
288 				states.erase(i);
289 			}
290 			s=new c_message_state(mid,a,d);
291 			states.insert(t_message_state_list::value_type(s->messageid.c_str(),s));
292 		}
293 	public:
check(const string & mid)294 		int check(const string &mid) const {
295 			if (states.find(mid.c_str())!=states.end())
296 				return 1;
297 			return 0;
298 		}
insert(const string & mid)299 		void insert(const string &mid){
300 			if (check(mid))return;
301 			changed=1;
302 			c_message_state::ptr s=new c_message_state(mid,time(NULL),TIME_T_MAX1);
303 			states.insert(t_message_state_list::value_type(s->messageid.c_str(),s));
304 			return;
305 		}
remove(const string & mid)306 		void remove(const string &mid){
307 			t_message_state_list::iterator i=states.find(mid.c_str());
308 			if (i==states.end())
309 				return;
310 			(*i).second->date_removed=TIME_T_DEAD;
311 			changed=1;
312 		}
clear(void)313 		void clear(void){
314 			if (!states.empty()){
315 				states.clear();
316 				changed=1;
317 			}
318 		}
set_delete(const string & mid)319 		void set_delete(const string &mid){
320 			t_message_state_list::iterator i=states.find(mid.c_str());
321 			if (i!=states.end()){
322 				(*i).second->date_removed=time(NULL);
323 			}
324 		}
325 		void do_delete_fun(const c_mid_info &rel_mid);
326 		void load(string path,bool merge=0,bool lock=1);
327 		void save(void);
328 		c_mid_info(string path);
329 		~c_mid_info();
330 };
331 
332 typedef map<string, c_mid_info *> t_mid_info_list;
333 class meta_mid_info {
334 	protected:
335 		t_mid_info_list midinfos;
add_mid_info(const string & path,const c_group_info::ptr & group)336 		void add_mid_info(const string &path, const c_group_info::ptr &group){
337 			midinfos.insert(t_mid_info_list::value_type(group->group, new c_mid_info(path + group->group + ",midinfo")));
338 		}
339 	public:
check(const string & mid)340 		bool check(const string &mid) const {
341 			for (t_mid_info_list::const_iterator mili=midinfos.begin(); mili!=midinfos.end(); ++mili)
342 				if ((*mili).second->check(mid))
343 					return true;
344 			return false;
345 		}
insert(const c_nntp_file::ptr & f)346 		void insert(const c_nntp_file::ptr &f){
347 			const string &mid=f->bamid();
348 			c_nntp_part *p = *(f->parts.begin());
349 			for (t_nntp_server_articles::iterator sai=p->articles.begin(); sai!=p->articles.end(); ++sai)
350 				midinfos.find((*sai)->group->group)->second->insert(mid);
351 		}
remove(const string & mid)352 		void remove(const string &mid){
353 			for (t_mid_info_list::iterator mili=midinfos.begin(); mili!=midinfos.end(); ++mili)
354 				(*mili).second->remove(mid);
355 		}
set_delete(const string & mid)356 		void set_delete(const string &mid){
357 			for (t_mid_info_list::iterator mili=midinfos.begin(); mili!=midinfos.end(); ++mili)
358 				(*mili).second->set_delete(mid);
359 		}
do_delete_fun(const c_mid_info & rel_mid)360 		void do_delete_fun(const c_mid_info &rel_mid){
361 			for (t_mid_info_list::iterator mili=midinfos.begin(); mili!=midinfos.end(); ++mili)
362 				(*mili).second->do_delete_fun(rel_mid);
363 		}
364 
meta_mid_info(string path,const vector<c_group_info::ptr> & groups)365 		meta_mid_info(string path, const vector<c_group_info::ptr> &groups){
366 			for (vector<c_group_info::ptr>::const_iterator gi=groups.begin(); gi!=groups.end(); ++gi)
367 				add_mid_info(path, *gi);
368 		}
meta_mid_info(string path,const c_group_info::ptr & group)369 		meta_mid_info(string path, const c_group_info::ptr &group) {
370 			add_mid_info(path, group);
371 		}
~meta_mid_info()372 		~meta_mid_info(){
373 			for (t_mid_info_list::iterator mili=midinfos.begin(); mili!=midinfos.end(); ++mili)
374 				delete mili->second;
375 		}
376 };
377 
378 class c_xpat : public c_refcounted<c_xpat>{
379 	public:
380 		string field;
381 		string wildmat;
c_xpat(const string & fiel,const string & wildma)382 		c_xpat(const string &fiel,const string &wildma):field(fiel), wildmat(wildma){ }
383 };
384 typedef list<c_xpat::ptr> t_xpat_list;
385 
386 class c_nntp_getinfo : public c_refcounted<c_nntp_getinfo>{
387 	public:
388 		string path;
389 		string temppath;
390 		nntp_file_pred *pred;
391 		int flags;
392 		dupe_file_checker flist;
393 		c_nntp_getinfo(const string &pat, const string &temppat, const vector<string> &dupepaths, nntp_file_pred *pre,int flag);
~c_nntp_getinfo()394 		~c_nntp_getinfo() { delete pred; }
395 };
396 typedef list<c_nntp_getinfo::ptr> t_nntp_getinfo_list;
397 
398 class ParHandler;
399 class c_nntp_cache : public c_refcounted<c_nntp_cache>{
400 	public:
401 		string file;
402 		t_nntp_files files;
403 		ulong totalnum;
404 //		ulong high,low,num;
405 		t_nntp_server_info server_info;
406 		c_nntp_server_info*getserverinfo(ulong serverid);
407 		c_group_info::ptr group;
408 		int saveit;
409 		int fileread;
410 		bool ismultiserver(void) const;
411 		//int additem(ulong an,char *s,const char * a,time_t d, ulong b, ulong l){
412 		int additem(c_nntp_header *h);
413 		ulong flush(c_nntp_server_info *servinfo, c_nrange flushrange, meta_mid_info *midinfo);
414 		ulong flushlow(c_nntp_server_info *servinfo, ulong newlow, meta_mid_info *midinfo);
415 		void getxrange(c_nntp_server_info *servinfo, ulong newlow, ulong newhigh, c_nrange *range) const;
416 		void getxrange(c_nntp_server_info *servinfo, c_nrange *range) const;
417 		void getfiles(c_nntp_files_u *fc, ParHandler *parhandler, meta_mid_info *midinfo, const t_nntp_getinfo_list &getinfos);
418 		c_nntp_cache(void);
419 		c_nntp_cache(string path,c_group_info::ptr group,meta_mid_info*midinfo);
420 		virtual ~c_nntp_cache();
421 };
422 
423 void nntp_cache_getfiles(c_nntp_files_u *fc, ParHandler *parhandler, bool *ismultiserver, string path, c_group_info::ptr group, meta_mid_info*midinfo, const t_nntp_getinfo_list &getinfos);
424 void nntp_cache_getfiles(c_nntp_files_u *fc, ParHandler *parhandler, bool *ismultiserver, string path, const vector<c_group_info::ptr> &groups, meta_mid_info*midinfo, const t_nntp_getinfo_list &getinfos);
425 
426 #endif
427