1 /*
2 cache.* - nntp header cache code
3 Copyright (C) 1999-2004 Matthew Mueller <donut AT dakotacom.net>
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19 #include "cache.h"
20 #include "strreps.h"
21 #include "log.h"
22 #include <set>
23 #include <memory>
24 #include <errno.h>
25 #include <unistd.h>
26 #include "auto_vector.h"
27 #include "nget.h"
28 #include "status.h"
29 #include "mylockfile.h"
30 #include "strtoker.h"
31 #include "path.h"
32 #include "par.h"
33
count_partnum(int partnum,int req)34 static inline bool count_partnum(int partnum, int req) {
35 if (req>0)
36 return (partnum>0 && partnum<=req);
37 else
38 return (partnum == req);
39 }
40
parsepnum(const char * str,const char * soff)41 int c_nntp_header::parsepnum(const char *str,const char *soff){
42 const char *p;
43 assert(str);
44 assert(soff>=str);
45 if ((p=strpbrk(soff+1,")]"))){
46 char m,m2=*p;
47 if (m2==')') m='(';
48 else m='[';
49 tailoff=p-str;
50 for(p=soff;p>str;p--)
51 if (*p==m){
52 p++;
53 char *erp;
54 partoff=p-str;
55 partnum=strtol(p,&erp,10);
56 if (*erp!='/' || erp==p) return -1;
57 int req=strtol(soff+1,&erp,10);
58 if (*erp!=m2 || erp==soff+1) return -1;
59 if (partnum>req) return -1;
60 if (partnum==0)
61 req=0;//handle 0-files seperatly from the binary they accompany
62 return req;
63 }
64 }
65 return -1;
66 }
67
getfileid(void) const68 t_id c_nntp_file::getfileid(void) const {
69 #ifdef CHECKSUM
70 t_id fileid=CHECKSUM(0L, Z_NULL, 0);
71 fileid=CHECKSUM(fileid,(Byte*)subject.data(),subject.size());
72 fileid=CHECKSUM(fileid,(Byte*)author.data(),author.size());
73 for (t_references::const_iterator ri = references.begin(); ri != references.end(); ++ri)
74 fileid=CHECKSUM(fileid,(Byte*)ri->data(),ri->size());
75 if (req<=0){
76 const string &mid=bamid();
77 fileid=CHECKSUM(fileid,(Byte*)mid.data(),mid.size());
78 }
79 #else
80 hash<char *> H;
81 t_id fileid=H(subject.c_str())+H(author.c_str());//prolly not as good as crc32, but oh well.
82 for (t_references::const_iterator ri = references.begin(); ri != references.end(); ++ri)
83 fileid+=H(ri->c_str());
84 if (req<=0)
85 fileid+=H(bamid().c_str());
86 #endif
87 return fileid;
88 }
set(char * str,const char * a,ulong anum,time_t d,ulong b,ulong l,const char * mid,char * refstr)89 void c_nntp_header::set(char * str,const char *a,ulong anum,time_t d,ulong b,ulong l,const char *mid, char *refstr){
90 assert(str);
91 assert(a);
92 author=a;articlenum=anum;date=d;bytes=b;lines=l;
93 messageid=mid;
94
95 references.clear();
96 if (refstr && *refstr) {
97 char *ref, *refstr_copy=refstr;
98 while ((ref = goodstrtok(&refstr_copy,' '))) {
99 references.push_back(ref);
100 }
101 }
102
103 const char *s=str+strlen(str)-3;//-1 for null, -2 for ), -3 for num
104 req=0;
105 for (;s>str;s--) {
106 if (*s=='/')
107 if ((req=parsepnum(str,s))>=0){
108 if (req==0) {
109 subject=str;
110 } else {
111 subject="";
112 subject.append(str,partoff);
113 subject.append("*");
114 subject.append(s);
115 }
116 return;
117 }
118 }
119 partoff=-1;tailoff=-1;
120 // partnum=0;
121 partnum=-1;
122 subject=str;
123 }
124
c_nntp_server_article(ulong _server,const c_group_info::ptr & _group,ulong _articlenum,ulong _bytes,ulong _lines)125 c_nntp_server_article::c_nntp_server_article(ulong _server,const c_group_info::ptr &_group,ulong _articlenum,ulong _bytes,ulong _lines):serverid(_server),group(_group),articlenum(_articlenum),bytes(_bytes),lines(_lines){}
126
127 //c_nntp_part::c_nntp_part(c_nntp_header *h):partnum(h->partnum),articlenum(h->articlenum),date(h->date),bytes(h->bytes),lines(h->lines){}
c_nntp_part(c_nntp_header * h)128 c_nntp_part::c_nntp_part(c_nntp_header *h):partnum(h->partnum),date(h->date),messageid(h->messageid){
129 addserverarticle(h);
130 }
131
addserverarticle(c_nntp_header * h)132 void c_nntp_part::addserverarticle(c_nntp_header *h){
133 c_nntp_server_article *sa;
134 #ifndef NDEBUG
135 if (debug>=DEBUG_MIN){
136 t_nntp_server_articles::iterator sai=articles.begin();
137 for (;sai!=articles.end();++sai){
138 if ((*sai)->serverid == h->serverid)
139 {
140 sa=(*sai);
141 printf("adding server_article we already have %lu %lu %lu %lu(%lu %lu %lu %lu)\n",h->serverid,h->articlenum,h->bytes,h->lines,sa->serverid,sa->articlenum,sa->bytes,sa->lines);
142 // return;//could be useful, lets add it.
143 }
144 }
145 }
146 if (h->date!=date)
147 printf("adding server_article with different date, date=%li h->date=%li mid=%s\n",date,h->date,h->messageid.c_str());
148 #endif
149 sa=new c_nntp_server_article(h->serverid,h->group,h->articlenum,h->bytes,h->lines);
150 articles.push_back(sa);
151 }
152
~c_nntp_part()153 c_nntp_part::~c_nntp_part(){
154 t_nntp_server_articles::iterator i;
155 for(i = articles.begin();i!=articles.end();++i){
156 assert(*i);
157 delete (*i);
158 }
159 }
160
lower_bound(int partnum)161 c_nntp_file_parts::iterator c_nntp_file_parts::lower_bound(int partnum)
162 {
163 iterator first = begin();
164 ptrdiff_t len = distance(begin(), end());
165 ptrdiff_t half;
166 iterator middle;
167
168 while (len > 0) {
169 half = len >> 1;
170 middle = first;
171 advance(middle, half);
172
173 if ((*middle)->partnum < partnum) {
174 first = middle;
175 ++first;
176 len = len - half - 1;
177 }
178 else
179 len = half;
180 }
181 return first;
182 }
183
184
addpart(c_nntp_part * p)185 void c_nntp_file::addpart(c_nntp_part *p){
186 assert(p);
187 //assert((req==-1 && p->partnum<=0) || (p->partnum<=req));//#### req==-1 hack for old version that set non-multipart messages partnum to 0 instead of -1
188 // parts[p->partnum]=p;
189 #ifndef NDEBUG
190 assert(!parts.haspart(p->partnum));
191 #endif
192 parts.addpart(p);
193 if (count_partnum(p->partnum, req)) have++;
194 // bytes+=p->apxbytes;lines+=p->apxlines;
195 }
addnewpart(c_nntp_part * p)196 void c_nntp_file::addnewpart(c_nntp_part *p){
197 time(&update);
198 addpart(p);
199 }
200
mergefile(c_nntp_file::ptr & f)201 void c_nntp_file::mergefile(c_nntp_file::ptr &f){
202 if (f->update>update)
203 update=f->update;
204 c_nntp_file_parts::iterator fpi=f->parts.begin();
205 while (fpi!=f->parts.end()){
206 const c_nntp_part *p = (*fpi);
207 c_nntp_part* mp = parts.part(p->partnum);
208 if (mp == NULL) {
209 addpart(new c_nntp_part(p->partnum, p->date, p->messageid));
210 mp=parts.part(p->partnum);
211 }else{
212 if (mp->messageid!=p->messageid){
213 PDEBUG(DEBUG_MED,"%s was gonna merge, but already have this part(sub=%s part=%i omid=%s)?\n",p->messageid.c_str(),f->subject.c_str(),p->partnum,mp->messageid.c_str());
214 ++fpi;
215 continue;
216 }
217 }
218 for (t_nntp_server_articles::const_iterator fsai=p->articles.begin(); fsai!=p->articles.end(); ++fsai){
219
220 c_nntp_server_article *nsa = new c_nntp_server_article(**fsai);
221 mp->articles.push_back(nsa);
222 }
223 delete (*fpi);
224 fpi = f->parts.erase(fpi);
225 }
226 }
227
228 //fill a mapping of how many parts of the file each server has
get_server_have_map(t_server_have_map & have_map) const229 void c_nntp_file::get_server_have_map(t_server_have_map &have_map) const{
230 c_nntp_file_parts::const_iterator pi(parts.begin());
231 for (;pi!=parts.end();++pi){
232 t_nntp_server_articles::const_iterator nsai((*pi)->articles.begin());
233 ulong serverid;
234 int partnum=(*pi)->partnum;
235 set<ulong> servers_already_found;
236
237 for (;nsai!=(*pi)->articles.end();++nsai) {
238 serverid=(*nsai)->serverid;
239 //don't increment count twice if a server has multiple server_articles for a single part
240 if (servers_already_found.insert(serverid).second){
241 t_server_have_map::iterator hmi(have_map.insert(t_server_have_map::value_type(serverid, 0)).first);
242 if (count_partnum(partnum, req))
243 ++hmi->second;
244 }
245 }
246 }
247 }
248
c_nntp_file(int r,ulong f,const char * s,const char * a,int po,int to,time_t ud)249 c_nntp_file::c_nntp_file(int r,ulong f,const char *s,const char *a,int po,int to,time_t ud):c_nntp_file_base(r, po, a, s),have(0),flags(f),tailoff(to),update(ud){
250 // printf("aoeu1.1\n");
251 }
c_nntp_file(c_nntp_header * h)252 c_nntp_file::c_nntp_file(c_nntp_header *h):c_nntp_file_base(*h),have(0),flags(0),tailoff(h->tailoff),update(0){
253 // printf("aoeu1\n");
254 }
255
~c_nntp_file()256 c_nntp_file::~c_nntp_file(){
257 c_nntp_file_parts::iterator i;
258 for(i = parts.begin();i!=parts.end();++i){
259 assert(*i);
260 delete (*i);
261 }
262 }
263
264
c_nntp_getinfo(const string & pat,const string & temppat,const vector<string> & dupepaths,nntp_file_pred * pre,int flag)265 c_nntp_getinfo::c_nntp_getinfo(const string &pat, const string &temppat, const vector<string> &dupepaths, nntp_file_pred *pre,int flag):path(pat), temppath(temppat), pred(pre), flags(flag) {
266 if (!(flags&GETFILES_NODUPEFILECHECK)) {
267 for (vector<string>::const_iterator si=dupepaths.begin(); si!=dupepaths.end(); ++si)
268 flist.addfrompath(*si);
269 flist.addfrompath(path);
270 }
271 }
272
nntp_cache_getfile(c_nntp_files_u * fc,ParHandler * parhandler,meta_mid_info * midinfo,const t_nntp_getinfo_list & getinfos,const c_nntp_file::ptr & f)273 static void nntp_cache_getfile(c_nntp_files_u *fc, ParHandler *parhandler, meta_mid_info *midinfo, const t_nntp_getinfo_list &getinfos, const c_nntp_file::ptr &f) {
274 pair<t_nntp_files_u::const_iterator,t_nntp_files_u::const_iterator> firange;
275 t_nntp_getinfo_list::const_iterator gii, giibegin=getinfos.begin(), giiend=getinfos.end();
276 c_nntp_getinfo::ptr info;
277 for (gii=giibegin; gii!=giiend; ++gii) {
278 info = *gii;
279 if ( (!(info->flags&GETFILES_AUTOPAR_DISABLING_FLAGS) || info->flags&GETFILES_GETINCOMPLETE || f->iscomplete()) // --autopar or -i or file_is_complete
280 && (info->flags&GETFILES_NODUPEIDCHECK || !(midinfo->check(f->bamid()))) // -dI or file_not_in_midinfo
281 && (*info->pred)(f.gimmethepointer()) // matches user spec
282 ){
283 if (!(info->flags&GETFILES_AUTOPAR_DISABLING_FLAGS)) {
284 if (parhandler->maybe_add_parfile(f, info->path, info->temppath, info->flags&GETFILES_GETINCOMPLETE))
285 continue;
286 if (!(info->flags&GETFILES_GETINCOMPLETE || f->iscomplete())) // autopar_didnt_want_it and -I and file_incomplete
287 continue;
288 }
289 firange=fc->files.equal_range(f->badate());
290 for (;firange.first!=firange.second;++firange.first){
291 if ((*firange.first).second->file->bamid()==f->bamid())
292 return;
293 }
294
295 if (!(info->flags&GETFILES_NODUPEFILECHECK) && info->flist.checkhavefile(f->subject.c_str(),f->bamid(),f->bytes())){
296 if (info->flags&GETFILES_DUPEFILEMARK)
297 midinfo->insert(f);
298 continue;
299 }
300 fc->addfile(f,info->path,info->temppath);
301 return;
302 }
303 }
304 }
305
getfiles(c_nntp_files_u * fc,ParHandler * parhandler,meta_mid_info * midinfo,const t_nntp_getinfo_list & getinfos)306 void c_nntp_cache::getfiles(c_nntp_files_u *fc, ParHandler *parhandler, meta_mid_info *midinfo, const t_nntp_getinfo_list &getinfos) {
307 t_nntp_files::const_iterator fi;
308 for(fi = files.begin();fi!=files.end();++fi){
309 nntp_cache_getfile(fc, parhandler, midinfo, getinfos, (*fi).second);
310 }
311 }
312
cache_ismultiserver(const t_nntp_server_info & server_info)313 static bool cache_ismultiserver(const t_nntp_server_info &server_info) {
314 int num=0;
315 for (t_nntp_server_info::const_iterator sii=server_info.begin(); sii!=server_info.end(); ++sii)
316 if (sii->second.num > 0)
317 num++;
318 return num > 1;
319 }
ismultiserver(void) const320 bool c_nntp_cache::ismultiserver(void) const {
321 return cache_ismultiserver(server_info);
322 }
323
getserverinfo(ulong serverid)324 c_nntp_server_info* c_nntp_cache::getserverinfo(ulong serverid){
325 t_nntp_server_info::iterator i = server_info.find(serverid);
326 if (i != server_info.end())
327 return &i->second;
328 return &server_info.insert(t_nntp_server_info::value_type(serverid, serverid)).first->second;
329 }
additem(c_nntp_header * h)330 int c_nntp_cache::additem(c_nntp_header *h){
331 assert(h);
332 c_nntp_file::ptr f;
333 t_nntp_files::iterator i;
334 pair<t_nntp_files::iterator, t_nntp_files::iterator> irange = files.equal_range(h);
335 // t_nntp_files::const_iterator i;
336 // pair<t_nntp_files::const_iterator, t_nntp_files::const_iterator> irange = files.equal_range(h->mid);
337
338 c_nntp_server_info *servinfo=getserverinfo(h->serverid);
339 if (h->articlenum > servinfo->high)
340 servinfo->high = h->articlenum;
341 if (h->articlenum < servinfo->low)
342 servinfo->low = h->articlenum;
343 servinfo->num++;
344
345 saveit=1;
346 // printf("%lu %s..",h->articlenum,h->subject.c_str());
347 for (i=irange.first;i!=irange.second;++i){
348 f=(*i).second;
349 assert(!f.isnull());
350 c_nntp_part* matchpart = f->parts.part(h->partnum);
351 if (matchpart != NULL) {
352 if (matchpart->messageid==h->messageid){
353 matchpart->addserverarticle(h);
354 return 0;
355 }
356 PDEBUG(DEBUG_MED,"%s was gonna add, but already have this part(sub=%s part=%i omid=%s)?\n",h->messageid.c_str(),f->subject.c_str(),h->partnum,matchpart->messageid.c_str());
357 continue;
358 }
359 // printf("adding\n");
360 c_nntp_part *p=new c_nntp_part(h);
361 f->addnewpart(p);
362 totalnum++;
363 return 0;
364 }
365 // printf("new\n");
366 f=new c_nntp_file(h);
367 c_nntp_part *p=new c_nntp_part(h);
368 f->addnewpart(p);
369 totalnum++;
370 //files[f->subject.c_str()]=f;
371 files.insert(t_nntp_files::value_type(f.gimmethepointer(),f));
372 return 1;
373 }
374
getxrange(c_nntp_server_info * servinfo,ulong newlow,ulong newhigh,c_nrange * range) const375 void c_nntp_cache::getxrange(c_nntp_server_info *servinfo,ulong newlow,ulong newhigh, c_nrange *range) const {
376 range->clear();
377 range->insert(newlow<servinfo->low?newlow:servinfo->low,newhigh);
378 getxrange(servinfo, range);
379 }
getxrange(c_nntp_server_info * servinfo,c_nrange * range) const380 void c_nntp_cache::getxrange(c_nntp_server_info *servinfo, c_nrange *range) const {
381 t_nntp_files::const_iterator i;
382 c_nntp_file::ptr nf;
383 c_nntp_file_parts::const_iterator pi;
384 c_nntp_part *np;
385 pair<t_nntp_server_articles::const_iterator,t_nntp_server_articles::const_iterator> sarange;
386 for(i = files.begin();i!=files.end();++i){
387 nf=(*i).second;
388 assert(!nf.isnull());
389 assert(!nf->parts.empty());
390 for(pi = nf->parts.begin();pi!=nf->parts.end();++pi){
391 np=(*pi);
392 assert(np);
393 t_nntp_server_articles::const_iterator sai = np->articles.begin();
394 for (; sai != np->articles.end(); ++sai){
395 if ((*sai)->serverid == servinfo->serverid)
396 {
397 range->remove((*sai)->articlenum);
398 }
399 }
400 }
401 }
402 }
flushlow(c_nntp_server_info * servinfo,ulong newlow,meta_mid_info * midinfo)403 ulong c_nntp_cache::flushlow(c_nntp_server_info *servinfo, ulong newlow, meta_mid_info *midinfo){
404 assert(newlow>0);
405 c_nrange flushrange;
406 flushrange.insert(0, newlow-1);
407 ulong r = flush(servinfo, flushrange, midinfo);
408 servinfo->low=newlow;
409 return r;
410 }
flush(c_nntp_server_info * servinfo,c_nrange flushrange,meta_mid_info * midinfo)411 ulong c_nntp_cache::flush(c_nntp_server_info *servinfo, c_nrange flushrange, meta_mid_info *midinfo){
412 ulong count=0,countp=0,countf=0;
413 t_nntp_files::iterator i,in;
414 c_nntp_file::ptr nf;
415 c_nntp_file_parts::iterator pi;
416 c_nntp_part *np;
417 t_nntp_server_articles::iterator sai;
418 c_nntp_server_article *sa;
419 c_mid_info rel_midinfo("");
420 //restrict the message to the range of headers we actually have, since showing 0-4294967295 or something isn't too useful ;0
421 flushrange.remove(0,servinfo->low-1);
422 flushrange.remove(servinfo->high+1,ULONG_MAX);
423 if (flushrange.empty())
424 return 0;
425 if (quiet<2) {printf("Flushing headers %lu-%lu(%lu):", flushrange.low(), flushrange.high(), flushrange.get_total());fflush(stdout);}
426 for(in = files.begin();in!=files.end();){
427 i=in;
428 ++in;
429 nf=(*i).second;
430 assert(!nf.isnull());
431 assert(!nf->parts.empty());
432 for(pi = nf->parts.end();pi!=nf->parts.begin();){
433 --pi;
434 np=(*pi);
435 assert(np);
436 for (sai = np->articles.end(); sai != np->articles.begin();) {
437 --sai;
438 sa=*sai;
439 assert(sa);
440 if ((sa->serverid == servinfo->serverid) && flushrange.check(sa->articlenum)){
441 delete sa;
442 sai = np->articles.erase(sai);
443 count++;
444 }
445 }
446 if (np->articles.empty()){
447 if (count_partnum(np->partnum,nf->req)) nf->have--;
448 midinfo->set_delete(np->messageid);
449 delete np;
450 pi = nf->parts.erase(pi);
451 countp++;
452 }
453 else if (midinfo->check(np->messageid))
454 {
455 rel_midinfo.insert(np->messageid);
456 }
457 }
458 if (nf->parts.empty()){
459 // nf->dec_rcount();
460 // delete nf;
461 files.erase(i);
462 countf++;
463 //#ifdef HAVE_HASH_MAP_H
464 // in=files.begin();//not needed, apparantly.
465 //#endif
466 }
467 }
468 servinfo->num-=count;
469 totalnum-=countp;
470 #ifndef NDEBUG
471 for(in = files.begin();in!=files.end();++in){
472 nf=(*in).second;
473 assert(!nf.isnull());
474 assert(!nf->parts.empty());
475 for(pi = nf->parts.begin();pi!=nf->parts.end();++pi){
476 np=(*pi);
477 assert(np);
478 for (sai=np->articles.begin(); sai != np->articles.end(); ++sai){
479 sa=*sai;
480 if (sa->serverid == servinfo->serverid){
481 assert(!flushrange.check(sa->articlenum));
482 }
483 }
484 }
485 }
486 #endif
487 if (quiet<2){printf(" %lu (%lu,%lu)\n",count,countp,countf);}
488 if (count)saveit=1;
489
490 midinfo->do_delete_fun(rel_midinfo);
491
492 return count;
493 }
setfilenamegz(string & file,int gz=-2)494 void setfilenamegz(string &file, int gz=-2){
495 #ifndef HAVE_LIBZ
496 gz=0;
497 #endif
498 if (gz==-2)
499 gz=nconfig.usegz;
500 if (gz)
501 file.append(".gz");
502 }
dofileopen(string file,string mode,int gz=-2)503 c_file *dofileopen(string file, string mode, int gz=-2){
504 c_file *f=NULL;
505 #ifndef HAVE_LIBZ
506 gz=0;
507 #endif
508 if (gz==-2)
509 gz=nconfig.usegz;
510 #ifdef HAVE_LIBZ
511 if (gz){
512 if (gz>0){
513 char blah[10];
514 sprintf(blah,"%i",gz);
515 mode.append(blah);
516 }
517 f=new c_file_gz(file.c_str(),mode.c_str());
518 }
519 #endif
520 if (!gz){
521 f=new c_file_fd(file.c_str(),mode.c_str());
522 }
523 if (mode[0]=='r' || mode.find('+')>=0)
524 f->initrbuf();
525 return f;
526 }
527
528 enum {
529 START_MODE=2,
530 SERVERINFO_MODE=4,
531 FILE_MODE=0,
532 PART_MODE=1,
533 SERVER_ARTICLE_MODE=3,
534 REFERENCES_MODE=5,
535 };
536
537 class c_nntp_cache_reader {
538 protected:
539 c_file *f;
540 meta_mid_info *midinfo;
541 c_group_info::ptr group;
542 public:
543 int cache_sortver;
544 ulong count,counta,curline,countdeada,totalnum;
545 c_nntp_cache_reader(c_file *cf, meta_mid_info*mi, t_nntp_server_info &server_infoi, const c_group_info::ptr &grou);
546 c_nntp_file::ptr read_file(void);
filename(void) const547 const char *filename(void) const {return f->name();}
548 void check_counts(void);
549 };
550
c_nntp_cache_reader(c_file * cf,meta_mid_info * mi,t_nntp_server_info & server_info,const c_group_info::ptr & grou)551 c_nntp_cache_reader::c_nntp_cache_reader(c_file *cf, meta_mid_info *mi, t_nntp_server_info &server_info, const c_group_info::ptr &grou):f(cf), midinfo(mi), group(grou){
552 count=0;counta=0;curline=0;countdeada=0;totalnum=0;cache_sortver=-1;
553
554 char *t[5];
555 int i;
556 if (f->beof())
557 throw CacheEx(Ex_INIT, "unexpected EOF on cache file line %lu",curline);
558 curline++;
559 //(mode==START_MODE)
560 i = f->btoks('\t',t,2);
561 if (i==2 && (strcmp(t[0],CACHE_VERSION)==0)){
562 totalnum=atoul(t[1]);
563 char *subvercp=strchr(t[1], ' ');
564 if (subvercp)
565 cache_sortver = atoi(subvercp);
566 }else{
567 if (i>0 && strncmp(t[0],"NGET",4)==0)
568 throw CacheEx(Ex_INIT,"cache is from a different version of nget");
569 else
570 throw CacheEx(Ex_INIT,"cache does not seem to be an nget cache file");
571 }
572
573 while (1) {
574 if (f->beof())
575 throw CacheEx(Ex_INIT, "unexpected EOF on cache file line %lu",curline);
576 curline++;
577 //(mode==SERVERINFO_MODE)
578 if (f->bpeek()=='.') {
579 if (f->bgetsp()[1]!=0) {
580 printf("warning: stuff after . line %lu mode %i\n",curline,SERVERINFO_MODE);
581 set_cache_warn_status();
582 }
583 //mode=FILE_MODE;//start new file mode
584 return;
585 }
586 i = f->btoks('\t',t,4);
587 if (i==4){
588 ulong serverid=atoul(t[0]);
589 if (nconfig.hasserver(serverid)) {
590 server_info.insert(t_nntp_server_info::value_type(serverid, c_nntp_server_info(serverid, atoul(t[1]), atoul(t[2]), atoul(t[3]))));
591 }else{
592 printf("warning: serverid %lu not found in server list\n",serverid);
593 set_cache_warn_status();
594 }
595 }else{
596 printf("invalid line %lu mode %i (%i toks)\n",curline,SERVERINFO_MODE,i);//mode);
597 set_cache_warn_status();
598 }
599 }
600
601 }
602
read_file(void)603 c_nntp_file::ptr c_nntp_cache_reader::read_file(void) {
604 int mode=FILE_MODE;
605 //c_nntp_file *nf=NULL;
606 c_nntp_file::ptr nf=NULL;
607 c_nntp_part *np=NULL;
608 c_nntp_server_article *sa;
609 char *t[8];
610 int i;
611 while (!f->beof()){
612 curline++;
613 if (mode==SERVER_ARTICLE_MODE && np){//new server_article mode
614 if (f->bpeek()=='.'){
615 if (f->bgetsp()[1]!=0) {
616 printf("warning: stuff after . line %lu mode %i\n",curline,mode);
617 set_cache_warn_status();
618 }
619 mode=PART_MODE;//go back to new part mode
620 continue;
621 }else{
622 i = f->btoks('\t',t,4);
623 if (i==4){
624 ulong serverid=atoul(t[0]);
625 if (nconfig.hasserver(serverid)) {
626 sa=new c_nntp_server_article(serverid,group,atoul(t[1]),atoul(t[2]),atoul(t[3]));
627 //np->addserverarticle(sa);
628 np->articles.push_back(sa);
629 counta++;
630 }else
631 countdeada++;
632 }else{
633 printf("invalid line %lu mode %i (%i toks)\n",curline,mode,i);
634 set_cache_warn_status();
635 }
636 }
637 }
638 else if (mode==PART_MODE && nf){//new part mode
639 if (np && np->articles.empty()) {
640 midinfo->set_delete(np->messageid);
641 nf->parts.removepart(np->partnum);
642 delete np;
643 np=NULL;
644 count--;
645 }
646 if (f->bpeek()=='.'){
647 if (f->bgetsp()[1]!=0) {
648 printf("warning: stuff after . line %lu mode %i\n",curline,mode);
649 set_cache_warn_status();
650 }
651 if (nf->parts.empty()){
652 set_cache_warn_status();
653 printf("empty nntp_file finished at line %lu mode %i\n",curline,mode);
654 nf=NULL;
655 np=NULL;
656 mode=FILE_MODE;//go back to new file mode
657 }else
658 return nf;
659 }else{
660 i = f->btoks('\t',t,3);
661 if (i==3){
662 np=new c_nntp_part(atoi(t[0]),atoul(t[1]),t[2]);
663 nf->addpart(np);//add at '.' section (above) ... o r not.
664 count++;
665 mode=SERVER_ARTICLE_MODE;//start adding server_articles
666 }else{
667 printf("invalid line %lu mode %i (%i toks)\n",curline,mode,i);
668 set_cache_warn_status();
669 }
670 }
671 }
672 else if (mode==FILE_MODE){//new file mode
673 i = f->btoks('\t',t,7);
674 if (i==7){
675 nf=new c_nntp_file(atoi(t[0]),atoul(t[1]),t[2],t[3],atoi(t[4]),atoi(t[5]),atoul(t[6]));
676 mode=REFERENCES_MODE;
677 }else{
678 printf("invalid line %lu mode %i (%i toks)\n",curline,mode,i);
679 set_cache_warn_status();
680 }
681 }
682 else if (mode==REFERENCES_MODE && nf){//adding references on new file
683 char *buf=f->bgetsp();
684 if (buf[0]=='.' && buf[1]==0){
685 mode=PART_MODE;
686 np=NULL;
687 continue;
688 }else{
689 if (buf[0]=='.') buf++;//unescape any invalid references that started with .
690 nf->references.push_back(buf);
691 }
692 }else{
693 assert(0);//should never get here
694 }
695 }
696 if (nf)
697 throw CacheEx(Ex_INIT, "unexpected EOF on cache file line %lu",curline);
698 return NULL;
699 }
check_counts(void)700 void c_nntp_cache_reader::check_counts(void) {
701 if (countdeada){
702 printf("warning: read (and ignored) %lu articles with bad serverids\n",countdeada);
703 set_cache_warn_status();
704 }
705 if (count!=totalnum){
706 printf("warning: read %lu parts from cache, expecting %lu\n",count,totalnum);
707 totalnum=count;
708 set_cache_warn_status();
709 }
710 }
711
c_nntp_cache(void)712 c_nntp_cache::c_nntp_cache(void):totalnum(0), saveit(0){
713 fileread=-1;
714 }
c_nntp_cache(string path,c_group_info::ptr group_,meta_mid_info * midinfo)715 c_nntp_cache::c_nntp_cache(string path,c_group_info::ptr group_,meta_mid_info *midinfo):totalnum(0),group(group_){
716 saveit=0;
717 //file=nid;
718 c_file *f;
719 file=path_join(path,group->group + ",cache");
720 setfilenamegz(file,group->usegz);
721 fileread=0;
722 try {
723 f=dofileopen(file.c_str(),"rb",group->usegz);
724 }catch(FileNOENTEx &e){
725 return;
726 }
727 auto_ptr<c_file> fcloser(f);
728 try{
729 c_nntp_cache_reader reader(f, midinfo, server_info, group_);
730 c_nntp_file::ptr nf;
731 while ((nf=reader.read_file()))
732 files.insert(t_nntp_files::value_type(nf.gimmethepointer(),nf));
733 fileread=1;
734 if (reader.cache_sortver!=CACHE_SORTVER)
735 saveit=1; //if the cache is from a version with different sorting, force saving it with new sorting even if nothing is changed otherwise.
736 PDEBUG(DEBUG_MIN,"read %lu parts (%lu sa) %lu files",reader.count,reader.counta,(ulong)files.size());
737 reader.check_counts();
738 totalnum = reader.totalnum;
739 } catch (CacheEx &e) {
740 set_cache_warn_status();
741 printf("%s: %s\n", file.c_str(), e.getExStr());
742 }
743 f->close();
744 }
~c_nntp_cache()745 c_nntp_cache::~c_nntp_cache(){
746 t_nntp_files::iterator i;
747 if (fileread!=-1 && saveit && (fileread || !files.empty())){
748 string tmpfn;
749 tmpfn=file+".tmp";
750 try {
751 c_file *f=dofileopen(tmpfn,"wb",group->usegz);
752 ulong count=0,counta=0;
753 try {
754 auto_ptr<c_file> fcloser(f);
755 if (quiet<2){printf("saving cache: %lu parts, %lu files..",totalnum,(ulong)files.size());fflush(stdout);}
756 c_nntp_file::ptr nf;
757 t_references::iterator ri;
758 c_nntp_file_parts::iterator pi;
759 t_nntp_server_articles::iterator sai;
760 c_nntp_server_article *sa;
761 c_nntp_part *np;
762 f->putf(CACHE_VERSION"\t%lu %i\n",totalnum,CACHE_SORTVER);//START_MODE
763 //vv SERVERINFO_MODE
764 for (t_nntp_server_info::const_iterator sii=server_info.begin(); sii!=server_info.end(); ++sii) {
765 const c_nntp_server_info &si = sii->second;
766 f->putf("%lu\t%lu\t%lu\t%lu\n",si.serverid,si.high,si.low,si.num);//mode 4
767 }
768 f->putf(".\n");
769 //end SERVERINFO_MODE
770 //vv FILE_MODE
771 for(i = files.begin();i!=files.end();++i){
772 nf=(*i).second;
773 assert(!nf.isnull());
774 assert(!nf->parts.empty());
775 f->putf("%i\t%lu\t%s\t%s\t%i\t%i\t%lu\n",nf->req,nf->flags,nf->subject.c_str(),nf->author.c_str(),nf->partoff,nf->tailoff,nf->update);//FILE_MODE
776 for(ri = nf->references.begin();ri!=nf->references.end();++ri){
777 if ((*ri)[0]=='.') f->putf("."); //escape possible invalid references that might start with .
778 f->putf("%s\n",ri->c_str());//REFERENCES_MODE
779 }
780 f->putf(".\n");//end REFERENCES_MODE
781 for(pi = nf->parts.begin();pi!=nf->parts.end();++pi){
782 np=(*pi);
783 assert(np);
784 f->putf("%i\t%lu\t%s\n",np->partnum,np->date,np->messageid.c_str());//PART_MODE
785 for (sai = np->articles.begin(); sai != np->articles.end(); ++sai){
786 sa=(*sai);
787 assert(sa);
788 f->putf("%lu\t%lu\t%lu\t%lu\n",sa->serverid,sa->articlenum,sa->bytes,sa->lines);//SERVER_ARTICLE_MODE
789 counta++;
790 }
791 f->putf(".\n");//end SERVER_ARTICLE_MODE
792 count++;
793 }
794 f->putf(".\n");//end PART_MODE
795 (*i).second=NULL; //free cache as we go along instead of at the end, so we don't swap more with low-mem.
796 //nf->storef(f);
797 //delete nf;
798 //nf->dec_rcount();
799 }
800 f->close();
801 }catch(FileEx &e){
802 printCaughtEx(e);
803 if (unlink(tmpfn.c_str()))
804 perror("unlink:");
805 fatal_exit();
806 }
807 if (quiet<2) printf(" done. (%lu sa)\n",counta);
808 if (count!=totalnum){
809 printf("warning: wrote %lu parts from cache, expecting %lu\n",count,totalnum);
810 set_cache_warn_status();
811 }
812 xxrename(tmpfn.c_str(), file.c_str());
813 return;
814 }catch (FileEx &e){
815 printCaughtEx(e);
816 fatal_exit();
817 }
818 }
819
820 if (quiet<2){printf("freeing cache: %lu parts, %lu files..\n",totalnum,(ulong)files.size());}//fflush(stdout);}
821
822 // for(i = files.begin();i!=files.end();++i){
823 //delete (*i).second;
824 // (*i).second->dec_rcount();
825 // }
826 // if (!quiet) printf(" done.\n");
827 }
~c_nntp_files_u()828 c_nntp_files_u::~c_nntp_files_u(){
829 // t_nntp_files_u::iterator i;
830 // for(i = files.begin();i!=files.end();++i){
831 // (*i).second->dec_rcount();
832 // }
833 }
834
ltfp(const c_nntp_file::ptr & f1,const c_nntp_file::ptr & f2)835 static inline bool ltfp(const c_nntp_file::ptr &f1, const c_nntp_file::ptr &f2) {
836 return *f1 < *f2;
837 }
838
nntp_cache_getfiles(c_nntp_files_u * fc,ParHandler * parhandler,bool * ismultiserver,string path,const vector<c_group_info::ptr> & groups,meta_mid_info * midinfo,const t_nntp_getinfo_list & getinfos)839 void nntp_cache_getfiles(c_nntp_files_u *fc, ParHandler *parhandler, bool *ismultiserver, string path, const vector<c_group_info::ptr> &groups, meta_mid_info*midinfo, const t_nntp_getinfo_list &getinfos){
840 set<ulong> usedservers;
841 auto_vector<c_file> cachefiles;
842 vector<t_nntp_server_info> server_infos;
843 vector<c_nntp_cache_reader> readers;
844 vector<c_nntp_file::ptr> nfiles;
845 ulong mergedcount=0, numfiles=0, mergedfiles=0, count=0, counta=0;
846 c_nntp_file::ptr nf, mergef;
847 for (vector<c_group_info::ptr>::const_iterator gi=groups.begin(); gi!=groups.end(); ++gi) {
848 const c_group_info::ptr &group = *gi;
849 string file=path_join(path,group->group + ",cache");
850 setfilenamegz(file,group->usegz);
851 c_file *f=NULL;
852 try {
853 f=dofileopen(file.c_str(),"rb",group->usegz);
854 }catch(FileNOENTEx &e){
855 //pass
856 }
857
858 if (f) {
859 cachefiles.push_back(f);
860 try{
861 t_nntp_server_info server_info;
862 c_nntp_cache_reader reader(f, midinfo, server_info, group);
863
864 if (reader.cache_sortver!=CACHE_SORTVER)
865 throw CacheEx(Ex_INIT, "cache file must be updated with this version of nget before it can be used with metagrouping");
866
867 for (t_nntp_server_info::const_iterator sii=server_info.begin(); sii!=server_info.end(); ++sii)
868 if (sii->second.num > 0)
869 usedservers.insert(sii->first);
870
871 if ((nf=reader.read_file())) {
872 //printf("initial file %i\n", nfiles.size());
873 nfiles.push_back(nf);//initialize with first nntp_file.
874 readers.push_back(reader);
875 numfiles++;
876 }
877
878 } catch (CacheEx &e) {
879 set_cache_warn_status();
880 printf("%s: %s\n", file.c_str(), e.getExStr());
881 }
882 }
883 }
884
885 *ismultiserver = usedservers.size() > 1;
886
887 vector<c_nntp_file::ptr>::iterator nfi_m;
888
889 while (!nfiles.empty()) {
890 nfi_m = min_element(nfiles.begin(), nfiles.end(), ltfp);
891 mergef = *nfi_m;
892 //printf("pre-loop. nfiles.size=%u, merged=%lu, numfiles=%lu\n", nfiles.size(), mergedfiles, numfiles);
893 for (unsigned i = 0; i<nfiles.size();) {
894 //printf("loop start. i=%u nfiles.size=%u, merged=%lu, numfiles=%lu\n",i, nfiles.size(), mergedfiles, numfiles);
895 if (nfiles[i]==mergef || *nfiles[i]==*mergef) {
896 //printf("file %u equals min\n", i);
897 if (nfiles[i]!=mergef){
898 //printf("file %u merging\n", i);
899 mergef->mergefile(nfiles[i]);
900 }
901 if (nfiles[i]==mergef || nfiles[i]->parts.empty()) {
902 try{
903 //printf("reading file %u\n", i);
904 nf=readers[i].read_file();
905 } catch (CacheEx &e) {
906 nf=NULL;
907 set_cache_warn_status();
908 printf("%s: %s\n", readers[i].filename(), e.getExStr());
909 }
910 //printf("file %u = %p\n", i, nf.gimmethepointer());
911 if (nf) {
912 assert(!(*nf < *nfiles[i]));
913 numfiles++;
914 nfiles[i]=nf;
915 } else {
916 nfiles.erase(nfiles.begin()+i);
917 count+=readers[i].count;
918 counta+=readers[i].counta;
919 readers[i].check_counts();
920 readers.erase(readers.begin()+i);
921 continue;
922 }
923 }
924 }
925 ++i;
926 }
927 //printf("post-loop. nfiles.size=%u, merged=%lu, numfiles=%lu\n", nfiles.size(), mergedfiles, numfiles);
928 nntp_cache_getfile(fc, parhandler, midinfo, getinfos, mergef);
929 mergedfiles++;
930 mergedcount+=mergef->parts.size();
931 }
932
933 PDEBUG(DEBUG_MIN,"scanned %lu parts %lu files (total: %lu parts (%lu sa) %lu files)",mergedcount,mergedfiles,count,counta,numfiles);
934
935 for (vector<c_nntp_cache_reader>::iterator cri=readers.begin(); cri!=readers.end(); ++cri)
936 cri->check_counts();
937
938 for (auto_vector<c_file>::iterator cfi=cachefiles.begin(); cfi!=cachefiles.end(); ++cfi)
939 (*cfi)->close();
940 }
941
nntp_cache_getfiles(c_nntp_files_u * fc,ParHandler * parhandler,bool * ismultiserver,string path,c_group_info::ptr group,meta_mid_info * midinfo,const t_nntp_getinfo_list & getinfos)942 void nntp_cache_getfiles(c_nntp_files_u *fc, ParHandler *parhandler, bool *ismultiserver, string path, c_group_info::ptr group, meta_mid_info*midinfo, const t_nntp_getinfo_list &getinfos){
943
944 string file=path_join(path,group->group + ",cache");
945 setfilenamegz(file,group->usegz);
946 c_file *f;
947 try {
948 f=dofileopen(file.c_str(),"rb",group->usegz);
949 }catch(FileNOENTEx &e){
950 return;
951 }
952 auto_ptr<c_file> fcloser(f);
953 try{
954 t_nntp_server_info server_info;
955 ulong numfiles=0;
956 c_nntp_cache_reader reader(f, midinfo, server_info, group);
957 *ismultiserver = cache_ismultiserver(server_info);
958 c_nntp_file::ptr nf;
959
960 while ((nf=reader.read_file())) {
961 nntp_cache_getfile(fc, parhandler, midinfo, getinfos, nf);
962 numfiles++;
963 }
964
965 PDEBUG(DEBUG_MIN,"scanned %lu parts (%lu sa) %lu files",reader.count,reader.counta,numfiles);
966 reader.check_counts();
967 } catch (CacheEx &e) {
968 set_cache_warn_status();
969 printf("%s: %s\n", file.c_str(), e.getExStr());
970 }
971 f->close();
972 }
973
974
975
976 #define MID_INFO_MIN_KEEP (14*24*60*60)
977 #define MID_INFO_MIN_KEEP_DEL (7*24*60*60)
do_delete_fun(const c_mid_info & rel_mid)978 void c_mid_info::do_delete_fun(const c_mid_info &rel_mid){
979 t_message_state_list::iterator i=states.begin();
980 c_message_state::ptr s;
981 int deld=0;
982 time_t curtime=time(NULL);
983 for (;i!=states.end();++i){
984 s=(*i).second;
985 if (rel_mid.check(s->messageid))
986 continue;
987 if ((s->date_removed==TIME_T_MAX1 && s->date_added+MID_INFO_MIN_KEEP<curtime) || (s->date_added+MID_INFO_MIN_KEEP<curtime && s->date_removed+MID_INFO_MIN_KEEP_DEL<curtime)){
988 // delete s;
989 // states.erase(i);
990 // i=states.begin();//urgh.
991 s->date_removed=TIME_T_DEAD;//let em just not get saved.
992 changed=1;deld++;
993 }
994 }
995 PDEBUG(DEBUG_MIN,"c_mid_info::do_delete_fun: %i killed",deld);
996 }
c_mid_info(string path)997 c_mid_info::c_mid_info(string path){
998 load(path);
999 }
load(string path,bool merge,bool lock)1000 void c_mid_info::load(string path,bool merge,bool lock){
1001 if (!merge){
1002 clear();
1003 changed=0;
1004 }
1005 if (path.empty())
1006 return;
1007 c_file *f=NULL;
1008 if (!merge)
1009 setfilenamegz(path);//ugh, hack.
1010 file=path;
1011 int line=0;
1012 //c_lockfile locker(path,WANT_SH_LOCK);
1013 auto_ptr<c_lockfile> locker;
1014 if (lock)
1015 locker.reset(new c_lockfile(path,WANT_SH_LOCK));
1016 // c_regex_r midre("^(.+) ([0-9]+) ([0-9]+)$");
1017 char *t[3];
1018 int i;
1019 try {
1020 f=dofileopen(path.c_str(),"rb");
1021 }catch(FileNOENTEx &e){
1022 return;
1023 }
1024 auto_ptr<c_file> fcloser(f);
1025 while (!f->beof()){
1026 line++;
1027 i = f->btoks(' ',t,3);
1028 if (i==3)
1029 insert_full(t[0],atol(t[1]),atol(t[2]));//TODO: shouldn't set changed flag if no new ones are actually merged.
1030 else {
1031 printf("c_mid_info::load: invalid line %i (%i toks)\n",line,i);
1032 set_cache_warn_status();
1033 }
1034 }
1035 f->close();
1036 PDEBUG(DEBUG_MIN,"c_mid_info::load read %i lines",line);
1037 if (!merge)
1038 changed=0;
1039 return;
1040 }
~c_mid_info()1041 c_mid_info::~c_mid_info(){
1042 try {
1043 save();
1044 } catch (FileEx &e) {
1045 printCaughtEx(e);
1046 fatal_exit();
1047 }
1048 clear();
1049 }
save(void)1050 void c_mid_info::save(void){
1051 if (!changed)
1052 return;
1053 if (file.empty())
1054 return;
1055 c_file *f=NULL;
1056 c_lockfile locker(file,WANT_EX_LOCK);//lock before we read, so that multiple copies trying to save at once don't lose changes.
1057 {
1058 unsigned long count1=states.size();
1059 load(file,1,0);//merge any changes that might have happened
1060 if (count1!=states.size()){
1061 if (debug){printf("saving mid_info: merged something...(%lu)\n",(ulong)states.size()-count1);}
1062 }
1063 }
1064 int nums=0;
1065 string tmpfn=file+".tmp";
1066 f=dofileopen(tmpfn,"wb");
1067 try {
1068 auto_ptr<c_file> fcloser(f);
1069 if (debug){printf("saving mid_info: %lu infos..",(ulong)states.size());fflush(stdout);}
1070 t_message_state_list::iterator sli;
1071 c_message_state::ptr ms;
1072 for (sli=states.begin(); sli!=states.end(); ++sli){
1073 ms=(*sli).second;
1074 if (ms->date_removed==TIME_T_DEAD)
1075 continue;
1076 f->putf("%s %li %li\n",ms->messageid.c_str(),ms->date_added,ms->date_removed);
1077 nums++;
1078 }
1079 if (debug) printf(" (%i) done.\n",nums);
1080 f->close();
1081 }catch(FileEx &e){
1082 if (unlink(tmpfn.c_str()))
1083 perror("unlink:");
1084 throw;
1085 }
1086 xxrename(tmpfn.c_str(), file.c_str());
1087 return;
1088 }
1089
1090