1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /*
3  * Pan - A Newsreader for Gtk+
4  * Copyright (C) 2002-2006  Charles Kerr <charles@rebelbase.com>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; version 2 of the License.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, see <http://www.gnu.org/licenses/>.
17  *
18  */
19 
20 #include <config.h>
21 #include <cerrno>
22 #include <fstream>
23 #include <map>
24 #include <vector>
25 #include <string>
26 #include <cmath>
27 #include <glib.h>
28 extern "C" {
29   #include <sys/types.h> // for chmod
30   #include <sys/stat.h> // for chmod
31   #include <glib/gi18n.h>
32 }
33 #include <pan/general/debug.h>
34 #include <pan/general/log.h>
35 #include <pan/general/macros.h>
36 #include <pan/general/messages.h>
37 #include <pan/general/quark.h>
38 #include <pan/general/time-elapsed.h>
39 #include <pan/usenet-utils/filter-info.h>
40 #include <pan/data/article.h>
41 #include "article-filter.h"
42 #include "data-impl.h"
43 
44 using namespace pan;
45 
GroupHeaders()46 DataImpl :: GroupHeaders :: GroupHeaders ():
47   _ref (0),
48   _dirty (false)
49 {
50 
51 }
52 
~GroupHeaders()53 DataImpl :: GroupHeaders :: ~GroupHeaders ()
54 {
55 
56 }
57 
58 DataImpl :: ArticleNode*
find_node(const Quark & mid)59 DataImpl :: GroupHeaders :: find_node (const Quark& mid)
60 {
61   ArticleNode * node (0);
62   nodes_t::iterator it (_nodes.find (mid));
63   if (it != _nodes.end())
64     node = it->second;
65   return node;
66 }
67 
68 const DataImpl :: ArticleNode*
find_node(const Quark & mid) const69 DataImpl :: GroupHeaders :: find_node (const Quark& mid) const
70 {
71   const ArticleNode * node (0);
72   nodes_t::const_iterator it (_nodes.find (mid));
73   if (it != _nodes.end())
74     node = it->second;
75   return node;
76 }
77 
78 const Quark&
find_parent_message_id(const Quark & mid) const79 DataImpl :: GroupHeaders :: find_parent_message_id (const Quark& mid) const
80 {
81   const ArticleNode * node (find_node (mid));
82   if (node && node->_parent)
83     return node->_parent->_mid;
84 
85   static const Quark empty_quark;
86   return empty_quark;
87 }
88 
89 const Article*
find_article(const Quark & message_id) const90 DataImpl :: GroupHeaders :: find_article (const Quark& message_id) const
91 {
92   Article *a (0);
93 
94   const ArticleNode * node (find_node (message_id));
95   if (node)
96     a = node->_article;
97 
98   return a;
99 }
100 
101 Article*
find_article(const Quark & message_id)102 DataImpl :: GroupHeaders :: find_article (const Quark& message_id)
103 {
104   Article *a(0);
105 
106   const ArticleNode * node (find_node (message_id));
107   if (node)
108     a = node->_article;
109 
110   return a;
111 }
112 
113 void
remove_articles(const quarks_t & mids)114 DataImpl :: GroupHeaders :: remove_articles (const quarks_t& mids)
115 {
116   nodes_v nodes;
117   find_nodes (mids, _nodes, nodes);
118   foreach (nodes_v, nodes, it)
119     (*it)->_article = 0;
120   _dirty = true;
121 }
122 
123 const DataImpl :: GroupHeaders*
get_group_headers(const Quark & group) const124 DataImpl :: get_group_headers (const Quark& group) const
125 {
126    group_to_headers_t::const_iterator it (_group_to_headers.find(group));
127    return it==_group_to_headers.end() ? 0 : it->second;
128 }
129 
130 DataImpl :: GroupHeaders*
get_group_headers(const Quark & group)131 DataImpl :: get_group_headers (const Quark& group)
132 {
133    group_to_headers_t::iterator it (_group_to_headers.find(group));
134    return it==_group_to_headers.end() ? 0 : it->second;
135 }
136 
137 void
build_references_header(const Article * article,std::string & setme) const138 DataImpl :: GroupHeaders :: build_references_header (const Article* article, std::string& setme) const
139 {
140   setme.clear ();
141   const Quark& message_id (article->message_id);
142   const ArticleNode * node (find_node (message_id));
143   while (node && node->_parent) {
144     node = node->_parent;
145     const StringView& ancestor_mid = node->_mid.to_view ();
146     setme.insert (0, ancestor_mid.str, ancestor_mid.len);
147     if (node->_parent)
148       setme.insert (0, 1, ' ');
149   }
150 }
151 
152 void
get_article_references(const Quark & group,const Article * article,std::string & setme) const153 DataImpl :: get_article_references (const Quark& group, const Article* article, std::string& setme) const
154 {
155   const GroupHeaders * h (get_group_headers (group));
156   if (!h)
157     setme.clear ();
158   else
159     h->build_references_header (article, setme);
160 }
161 
162 void
free_group_headers_memory(const Quark & group)163 DataImpl :: free_group_headers_memory (const Quark& group)
164 {
165   group_to_headers_t::iterator it (_group_to_headers.find (group));
166   if (it != _group_to_headers.end()) {
167     delete it->second;
168     _group_to_headers.erase (it);
169   }
170 }
171 
172 void
ref_group(const Quark & group)173 DataImpl :: ref_group (const Quark& group)
174 {
175   GroupHeaders * h (get_group_headers (group));
176   if (!h)
177   {
178     h = _group_to_headers[group] = new GroupHeaders ();
179     load_headers (*_data_io, group);
180   }
181   ++h->_ref;
182 //  std::cerr << LINE_ID << " group " << group << " refcount up to " << h->_ref << std::endl;
183 }
184 
185 void
unref_group(const Quark & group)186 DataImpl :: unref_group   (const Quark& group)
187 {
188   GroupHeaders * h (get_group_headers (group));
189   pan_return_if_fail (h != 0);
190 
191   --h->_ref;
192 //  std::cerr << LINE_ID << " group " << group << " refcount down to " << h->_ref << std::endl;
193   if (h->_ref == 0)
194   {
195 //    if (h->_dirty )
196       save_headers (*_data_io, group);
197     h->_dirty = false;
198     free_group_headers_memory (group);
199   }
200 }
201 
202 void
fire_article_flag_changed(articles_t & a,const Quark & group)203 DataImpl :: fire_article_flag_changed (articles_t& a, const Quark& group)
204 {
205   GroupHeaders * h (get_group_headers (group));
206   h->_dirty = true;
207   Data::fire_article_flag_changed(a,group);
208 }
209 
210 void
find_nodes(const quarks_t & mids,nodes_t & nodes,nodes_v & setme)211 DataImpl :: find_nodes (const quarks_t           & mids,
212                         nodes_t                  & nodes,
213                         nodes_v                  & setme)
214 {
215   NodeWeakOrdering o;
216   nodes_t tmp;
217   std::set_intersection (nodes.begin(), nodes.end(),
218                          mids.begin(), mids.end(),
219                          std::inserter (tmp, tmp.begin()), o);
220 
221   setme.reserve (tmp.size());
222   foreach_const (nodes_t, tmp, it)
223     setme.push_back (it->second);
224 }
225 
226 void
find_nodes(const quarks_t & mids,const nodes_t & nodes,const_nodes_v & setme)227 DataImpl :: find_nodes (const quarks_t           & mids,
228                         const nodes_t            & nodes,
229                         const_nodes_v            & setme)
230 {
231   NodeWeakOrdering o;
232   nodes_t tmp;
233   std::set_intersection (nodes.begin(), nodes.end(),
234                          mids.begin(), mids.end(),
235                          std::inserter (tmp, tmp.begin()), o);
236 
237   setme.reserve (tmp.size());
238   foreach_const (nodes_t, tmp, it)
239     setme.push_back (it->second);
240 }
241 
242 /*******
243 ********
244 *******/
245 
246 // 'article' must have been instantiated by
247 // GroupHeaders::alloc_new_article()!!
248 void
load_article(const Quark & group,Article * article,const StringView & references)249 DataImpl :: load_article (const Quark       & group,
250                           Article           * article,
251                           const StringView  & references)
252 
253 {
254 #if 0
255   std::cerr << LINE_ID << " adding article "
256             << " subject [" << article->subject << ']'
257             << " mid [" << article->message_id <<  ']'
258             << " references [" << references << ']'
259             << std::endl;
260 #endif
261 
262   GroupHeaders * h (get_group_headers (group));
263   pan_return_if_fail (h!=0);
264 
265   // populate the current node
266   const Quark& mid (article->message_id);
267   ArticleNode * node (h->_nodes[mid]);
268   if (!node) {
269     static const ArticleNode blank_node;
270     h->_node_chunk.push_back (blank_node);
271     node = h->_nodes[mid] = &h->_node_chunk.back();
272     node->_mid = mid;
273   }
274   // !!INFO!! : this is bypassed for now, as it causes an abort on local cache corruptions
275   //assert (!node->_article);
276   node->_article = article;
277   ArticleNode * article_node = node;
278 
279   // build nodes for each of the references
280   StringView tok, refs(references);
281   //std::cerr << LINE_ID << " references [" << refs << ']' << std::endl;
282   while (refs.pop_last_token (tok, ' '))
283   {
284     tok.trim ();
285     if (tok.empty())
286       break;
287 
288     ArticleNode * old_parent_node (node->_parent);
289     const Quark old_parent_mid (old_parent_node ? old_parent_node->_mid : Quark());
290     const Quark new_parent_mid (tok);
291     //std::cerr << LINE_ID << " now we're working on " << new_parent_mid << std::endl;
292 
293     if (new_parent_mid == old_parent_mid)
294     {
295       //std::cerr << LINE_ID << " our tree agrees with the References header here..." << std::endl;
296       node = node->_parent;
297       continue;
298     }
299 
300     if (!old_parent_node)
301     {
302       //std::cerr << LINE_ID << " haven't mapped " << new_parent_mid << " before..." << std::endl;
303       ArticleNode * new_parent_node (h->_nodes[new_parent_mid]);
304       const bool found (new_parent_node != 0);
305       if (!found) {
306         //std::cerr << LINE_ID << " didn't find it; adding new node for " << new_parent_mid << std::endl;
307         static const ArticleNode blank_node;
308         h->_node_chunk.push_back (blank_node);
309         new_parent_node = h->_nodes[new_parent_mid] = &h->_node_chunk.back();
310         new_parent_node->_mid = new_parent_mid;
311       }
312       node->_parent = new_parent_node;
313       if (find_ancestor (new_parent_node, new_parent_mid)) {
314         node->_parent = 0;
315         //std::cerr << LINE_ID << " someone's been munging References headers to cause trouble!" << std::endl;
316         break;
317       }
318       new_parent_node->_children.push_front (node);
319       node = new_parent_node;
320       continue;
321     }
322 
323     ArticleNode * tmp;
324     if ((tmp = find_ancestor (node, new_parent_mid)))
325     {
326       //std::cerr << LINE_ID << " this References header has a hole... jumping to " << tmp->_mid << std::endl;
327       node = tmp;
328       continue;
329     }
330 
331     const char * cpch;
332     if ((cpch = refs.strstr (old_parent_mid.to_view())))
333     {
334       //std::cerr << LINE_ID << " this References header fills a hole of ours ... " << new_parent_mid << std::endl;
335 
336       // unlink from old parent
337       old_parent_node->_children.remove (node);
338       node->_parent = 0;
339 
340       // link to new parent
341       ArticleNode * new_parent_node (h->_nodes[new_parent_mid]);
342       const bool found (new_parent_node != 0);
343       if (!found) {
344         //std::cerr << LINE_ID << " didn't find it; adding new node for " << new_parent_mid << std::endl;
345         static const ArticleNode blank_node;
346         h->_node_chunk.push_back (blank_node);
347         new_parent_node = h->_nodes[new_parent_mid] = &h->_node_chunk.back();
348         new_parent_node->_mid = new_parent_mid;
349       }
350       node->_parent = new_parent_node;
351       if (find_ancestor (new_parent_node, new_parent_mid) != 0) {
352         node->_parent = 0;
353         //std::cerr << LINE_ID << " someone's been munging References headers to cause trouble!" << std::endl;
354         break;
355       }
356       new_parent_node->_children.push_front (node);
357       node = new_parent_node;
358       continue;
359     }
360   }
361 
362   // recursion?
363   assert (find_ancestor(article_node, article->message_id) == 0);
364 }
365 
366 #if 0
367 std::string
368 DataImpl :: get_references (const Quark& group, const Article& a) const
369 {
370   std::string s;
371 
372   GroupHeaders * h (get_group_headers (group));
373   pan_return_if_fail (h!=0);
374 
375   const Quark& mid (a.message_id);
376   ArticleNode * node (h->_nodes[mid]);
377   node = node->parent;
378   while (node) {
379     if (!s.empty())
380       s.insert (0, 1, ' ');
381     const StringView v (node->_mid.to_view());
382     s.insert (0, v.str, v.len);
383     node = node->parent;
384   }
385 //  std::cerr << "article " << a.message_id << " references " << s << std::endl;
386   return s;
387 }
388 #endif
389 
390 void
load_part(const Quark & group,const Quark & mid,int number,const StringView & part_mid,unsigned long lines,unsigned long bytes)391 DataImpl :: load_part (const Quark          & group,
392                        const Quark          & mid,
393                        int                    number,
394                        const StringView     & part_mid,
395                        unsigned long          lines,
396                        unsigned long          bytes)
397 {
398    GroupHeaders * h = get_group_headers (group);
399    Article * a (h->find_article (mid));
400    pan_return_if_fail (a != 0);
401 
402    if (a->add_part (number, part_mid, bytes))
403      a->lines += lines;
404 }
405 
406 namespace
407 {
view_to_ul(const StringView & view)408   unsigned long view_to_ul (const StringView& view)
409   {
410     unsigned long val (0);
411     if (!view.empty()) {
412       errno = 0;
413       val = strtoul (view.str, 0, 10);
414       if (errno) val = 0ul;
415     }
416     return val;
417   }
418 }
419 
420 void
load_headers(const DataIO & data_io,const Quark & group)421 DataImpl :: load_headers (const DataIO   & data_io,
422                           const Quark    & group)
423 {
424   TimeElapsed timer;
425 
426   GroupHeaders * h (get_group_headers (group));
427   assert (h != 0);
428 
429   unsigned long article_count (0);
430   unsigned long unread_count (0);
431   StringView line;
432   bool success (false);
433   quarks_t servers;
434 
435   ArticleFilter::sections_t score_sections;
436   _scorefile.get_matching_sections (StringView(group), score_sections);
437 
438   const char * groupname (group.c_str());
439   LineReader * in (data_io.read_group_headers (group));
440   if (in && !in->fail())
441   {
442     do { // skip past the comments at the top
443       in->getline (line);
444       line.trim ();
445     } while (!line.empty() && *line.str=='#');
446 
447     const int version (atoi (line.str));
448     if (version==1 || version==2 || version == 3)
449     {
450       // build the symbolic server / group lookup table
451       in->getline (line);
452       int symbol_qty (atoi (line.str));
453       Quark xref_lookup[CHAR_MAX];
454       for (int i=0; i<symbol_qty; ++i) {
455         StringView key;
456         in->getline (line);
457         line.trim();
458         if (line.pop_token(key,'\t') && key.len==1)
459           xref_lookup[(int)*key.str] = line;
460       }
461 
462       // build the author lookup table
463       in->getline (line);
464       symbol_qty = atoi (line.str);
465       Quark author_lookup[CHAR_MAX];
466       for (int i=0; i<symbol_qty; ++i) {
467         StringView key;
468         in->getline (line);
469         line.trim ();
470         if (line.pop_token(key,'\t') && key.len==1) {
471           author_lookup[(int)*key.str] = line;
472         }
473       }
474 
475       Xref::targets_t targets;
476       std::vector<Xref::Target>& targets_v (targets.get_container());
477 
478       // each article in this group...
479       unsigned int expire_count (0);
480       in->getline (line);
481       //const unsigned long article_qty = view_to_ul (line); /* unused */
482       const time_t now (time (0));
483       PartBatch part_batch;
484       for (;;)
485       {
486         // look for the beginning of an Article record.
487         // it'll be a message-id line with no leading whitespace.
488         StringView s;
489         if (!in->getline (s)) // end of file
490           break;
491 
492         Article& a (h->alloc_new_article());
493 
494 
495         // flag line
496         a.flag = false;
497         if (version == 3)
498         {
499           a.flag = atoi(s.str) == 1 ? true : false;
500           in->getline(s);
501         }
502 
503         if (s.empty() || *s.str!='<') // not a message-id...
504           continue;
505 
506         //message id
507         s.ltrim(); a.message_id = s;
508 
509         // subject line
510 
511         in->getline (s); s.ltrim(); a.subject = s;
512 
513         // author line
514         in->getline (s); s.ltrim(); a.author = s.len==1 ? author_lookup[(int)*s.str] : Quark(s);
515 
516         // optional references line
517         std::string references;
518         in->getline (s); s.ltrim();
519         if (!s.empty() && *s.str=='<') {
520           references = s;
521           in->getline (s); s.ltrim();
522         }
523 
524         // date-posted line
525         a.time_posted = view_to_ul (s);
526         const int days_old ((now - a.time_posted) / (24*60*60));
527 
528         // xref line
529         in->getline (s); s.ltrim();
530         const size_t max_targets (std::count (s.begin(), s.end(), ' ') + 1);
531         targets_v.resize (max_targets);
532         Xref::Target * target_it (&targets_v.front());
533         StringView tok, server_tok, group_tok;
534         while (s.pop_token (tok)) {
535           if (tok.pop_token(server_tok,':') && tok.pop_token(group_tok,':')) {
536             target_it->server = server_tok;
537             target_it->group = group_tok.len==1 ? xref_lookup[(int)*group_tok.str] : Quark(group_tok);
538             target_it->number = g_ascii_strtoull (tok.str, NULL, 10);
539             const Server * server (find_server (target_it->server));
540             if (server && ((!server->article_expiration_age) || (days_old <= server->article_expiration_age)))
541               ++target_it;
542           }
543         }
544         targets_v.resize (target_it - &targets_v.front());
545         targets.sort();
546         bool expired (targets.empty());
547         a.xref.swap (targets);
548 
549         // is_binary [total_part_count found_part_count]
550         int total_part_count (1);
551         int found_part_count (1);
552         in->getline (s);
553         s.ltrim();
554         s.pop_token (tok); a.is_binary = !tok.empty() && tok.str[0]=='t';
555         if (a.is_binary) {
556           s.ltrim(); s.pop_token (tok); total_part_count = atoi(tok.str);
557           s.ltrim(); s.pop_token (tok); found_part_count = atoi(tok.str);
558         }
559         s.ltrim(); if (s.pop_token (tok)) a.lines = atoi (tok.str); // this field was added in 0.115
560 
561         // found parts...
562         part_batch.init (a.message_id, total_part_count, found_part_count);
563 //        std::cerr<<"article "<<a.message_id<<" "<<total_part_count<<" "<<found_part_count<<std::endl;
564         for (int i(0), count(found_part_count); i<count; ++i)
565         {
566           const bool gotline (in->getline (s));
567 
568           if (gotline && !expired)
569           {
570             StringView tok;
571             s.ltrim ();
572             s.pop_token (tok);
573             const int number (atoi (tok.str));
574             if (number > total_part_count) { // corrupted entry
575               expired = true;
576               break;
577             }
578             StringView part_mid;
579             unsigned long part_bytes (0);
580             s.ltrim ();
581             s.pop_token (part_mid);
582             if (part_mid.len==1 && *part_mid.str=='"')
583               part_mid = a.message_id.to_view ();
584             s.pop_token(tok); part_bytes = view_to_ul (tok);
585             part_batch.add_part (number, part_mid, part_bytes);
586 
587             if (s.pop_token(tok)) a.lines += atoi (tok.str); // this field was removed in 0.115
588           }
589         }
590         if (!expired)
591           a.set_parts (part_batch);
592 
593         // add the article to the group if it hasn't all expired
594         if (expired)
595           ++expire_count;
596         else {
597           load_article (group, &a, references);
598           a.score = _article_filter.score_article (*this, score_sections, group, a); // score _after_ threading, so References: works
599           ++article_count;
600           if (!is_read(&a))
601             ++unread_count;
602         }
603       }
604 
605       if (expire_count)
606         Log::add_info_va (_("Expired %lu old articles from “%s”"), expire_count, group.c_str());
607 
608       success = !in->fail();
609     }
610     else
611     {
612       Log::add_urgent_va (
613         _("Unsupported data version for %s headers: %d.\nAre you running an old version of Pan by accident?"),
614         groupname, version);
615     }
616   }
617   delete in;
618 
619   // update the group's article count...
620   ReadGroup& g (_read_groups[group]);
621   g._unread_count = unread_count;
622   g._article_count = article_count;
623   fire_group_counts (group, unread_count, article_count);
624 
625   if (success) {
626     const double seconds = timer.get_seconds_elapsed ();
627     Log::add_info_va (
628       _("Loaded %lu articles for “%s” in %.1f seconds (%.0f per second)"),
629       article_count, group.c_str(), seconds,
630       article_count/(fabs(seconds)<0.001?0.001:seconds));
631   }
632 }
633 
634 namespace
635 {
636   const char * lookup_symbols ("abcdefghijklmnopqrstuvwxyz"
637                                "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
638                                "1234567890!@#$%^&*()");
639 
640   struct QuarkToSymbol
641   {
642     char buf[2];
QuarkToSymbol__anon33913e9d0211::QuarkToSymbol643     QuarkToSymbol() { buf[1] = '\0'; }
644 
645     typedef Loki::AssocVector < pan::Quark, char > quark_to_symbol_t;
646     quark_to_symbol_t _map;
647 
operator ()__anon33913e9d0211::QuarkToSymbol648     const char* operator() (const Quark& quark)
649     {
650       quark_to_symbol_t::const_iterator it (_map.find (quark));
651       if (it == _map.end())
652         return quark.c_str();
653 
654       buf[0] = it->second;
655       return buf;
656     }
657 
write__anon33913e9d0211::QuarkToSymbol658     void write (std::ostream& out, const StringView& comment) const
659     {
660       Quark quarks[UCHAR_MAX];
661       foreach_const (quark_to_symbol_t, _map, it)
662         quarks[(int)it->second] = it->first;
663 
664       const size_t len (_map.size());
665       out << len;
666       if (!comment.empty())
667         out << "\t # " << comment << '\n';
668       for (size_t i(0); i!=len; ++i) {
669         const char ch (lookup_symbols[i]);
670         out << '\t' << ch << '\t' << quarks[(int)ch] << '\n';
671       }
672     }
673   };
674 
675   typedef Loki::AssocVector<Quark,unsigned long> frequency_t;
676 
build_qts(frequency_t & freq,QuarkToSymbol & setme)677   void build_qts (frequency_t& freq, QuarkToSymbol& setme)
678   {
679     setme._map.clear ();
680 
681     typedef std::multimap<unsigned long,Quark> counts_t;
682     counts_t counts;
683     foreach_const (frequency_t, freq, it)
684       counts.insert (std::pair<unsigned long,Quark>(it->second,it->first));
685 
686     counts_t::const_reverse_iterator it=counts.rbegin(), end=counts.rend();
687     for (const char * pch=lookup_symbols; *pch && it!=end; ++pch, ++it)
688       setme._map[it->second] = *pch;
689     freq.clear ();
690   }
691 }
692 
693 
694 bool
save_headers(DataIO & data_io,const Quark & group,const std::vector<Article * > & articles,unsigned long & part_count,unsigned long & article_count) const695 DataImpl :: save_headers (DataIO                       & data_io,
696                           const Quark                  & group,
697                           const std::vector<Article*>  & articles,
698                           unsigned long                & part_count,
699                           unsigned long                & article_count) const
700 {
701   const char endl ('\n');
702   const GroupHeaders * h (get_group_headers (group));
703   assert (h != 0);
704 
705   part_count = 0;
706   article_count = 0;
707 
708   bool success;
709   if (_unit_test)
710   {
711     Log::add_info_va ("Not saving %s's headers because we're in unit test mode",
712                       group.c_str());
713     success = true;
714   }
715   else
716   {
717     std::ostream& out (*data_io.write_group_headers(group));
718 
719     out << "#\n"
720            "# This file has three sections.\n"
721            "#\n"
722            "# A. A shorthand table for the most frequent groups in the xrefs.\n"
723            "#    The first line tells the number of elements to follow,\n"
724            "#    then one line per entry with a one-character shorthand and full name.\n"
725            "#\n"
726            "# B. A shorthand table for the most freqent author names.\n"
727            "#    This is formatted just like the other shorthand table.\n"
728            "#    (sorted by post count, so it's also a most-frequent-posters list...)\n"
729            "#\n"
730            "# C. The group's headers section.\n"
731            "#    The first line tells the number of articles to follow,\n"
732            "#    then articles which each have the following lines:\n"
733            "#    1. message-id\n"
734            "#    2. subject\n"
735            "#    3. author\n"
736            "#    4. references. This line is omitted if the Article has an empty References header.\n"
737            "#    5. time-posted. This is a time_t (see http://en.wikipedia.org/wiki/Unix_time)\n"
738            "#    6. xref line, server1:group1:number1 server2:group2:number2 ...\n"
739            "#    7. has-attachments [parts-total-count parts-found-count] line-count\n"
740            "#       If has-attachments isn't 't' (for true), fields 2 and 3 are omitted.\n"
741            "#       If fields 2 and 3 are equal, the article is `complete'.\n"
742            "#    8. One line per parts-found-count: part-index message-id byte-count\n"
743            "#\n"
744            "#\n";
745 
746     // lines moved from line 8 to line 7 in 0.115, causing version 2
747     // flag added, version 3 (12/2011, imhotep)
748     out << "3\t # file format version number\n";
749 
750     // xref lookup section
751     frequency_t frequency;
752     foreach_const (std::vector<Article*>, articles, ait)
753       foreach_const (Xref, (*ait)->xref, xit)
754         ++frequency[xit->group];
755     QuarkToSymbol xref_qts;
756     build_qts (frequency, xref_qts);
757     xref_qts.write (out, "xref shorthand count");
758 
759     // author lookup section
760     frequency.clear ();
761     foreach_const (std::vector<Article*>, articles, ait)
762       ++frequency[(*ait)->author];
763     QuarkToSymbol author_qts;
764     build_qts (frequency, author_qts);
765     author_qts.write (out, "author shorthand count");
766 
767     // header section
768     out << articles.size() << endl;
769     std::string references;
770     foreach_const (std::vector<Article*>, articles, ait)
771     {
772       ++article_count;
773 
774       const Article * a (*ait);
775       const Quark& message_id (a->message_id);
776       h->build_references_header (a, references);
777 
778       // flag, message-id, subject, author
779       out <<a->flag<<"\n"
780           << message_id << "\n\t"
781           << a->subject << "\n\t"
782           << author_qts(a->author) << "\n\t";
783       // references line *IF* the article has a References header
784       if (!references.empty())
785         out << references << "\n\t";
786 
787       // date
788       out << a->time_posted << "\n\t";
789 
790       // xref
791       foreach_const (Xref, a->xref, xit) {
792         out << xref_qts(xit->server);
793         out.put(':');
794         out << xref_qts(xit->group);
795         out.put(':');
796         out << xit->number;
797         out.put(' ');
798       }
799       out << "\n\t";
800 
801       // is_binary [total_part_count found_part_count]
802       out.put (a->is_binary ? 't' : 'f');
803       if (a->is_binary) {
804         out.put(' ');
805         out << a->get_total_part_count();
806         out.put(' ');
807         out << a->get_found_part_count();
808       }
809       out.put(' ');
810       out << a->lines;
811       out.put('\n');
812 
813       // one line per foundPartCount (part-index message-id bytes lines)
814       for (Article::part_iterator pit(a->pbegin()), end(a->pend()); pit!=end; ++pit) {
815         out.put('\t'); out << pit.number();
816         out.put (' ');
817         out << pit.mid();
818         out.put (' ');
819         out << pit.bytes();
820         out.put ('\n');
821         ++part_count;
822       }
823     }
824 
825     success = !out.fail ();
826     data_io.write_done (&out);
827     save_group_xovers (data_io);
828   }
829 
830   return success;
831 }
832 
833 void
save_headers(DataIO & data_io,const Quark & group) const834 DataImpl :: save_headers (DataIO& data_io, const Quark& group) const
835 {
836   if (_unit_test)
837     return;
838 
839    pan_return_if_fail (!group.empty());
840 
841    TimeElapsed timer;
842 
843    // get a list of the articles
844    const GroupHeaders * h (get_group_headers (group));
845    std::vector<Article*> articles;
846    foreach_const (nodes_t, h->_nodes, it)
847      if (it->second->_article)
848        articles.push_back (it->second->_article);
849 
850    unsigned long part_count (0ul);
851    unsigned long article_count (0ul);
852    const bool success (
853      save_headers(data_io, group, articles, part_count, article_count));
854    const double time_elapsed (timer.get_seconds_elapsed());
855    if (success)
856       Log::add_info_va (
857    _("Saved %lu parts, %lu articles in “%s” in %.1f seconds (%.0f articles/sec)"),
858          part_count,
859          article_count,
860          group.c_str(),
861          time_elapsed,
862          article_count/(fabs(time_elapsed)<0.001?0.001:time_elapsed));
863 }
864 
865 
866 
867 /*******
868 ********
869 *******/
870 namespace
871 {
872   /* autosave newsrc files */
nrc_as_cb(gpointer ptr)873   gboolean nrc_as_cb(gpointer ptr)
874   {
875     DataImpl *data = static_cast<DataImpl*>(ptr);
876     data->save_newsrc_files();
877 
878     return FALSE;
879   }
880 }
881 
882 void
mark_read(const Article & a,bool read)883 DataImpl :: mark_read (const Article & a, bool read)
884 {
885   const Article * aptr (&a);
886   mark_read (&aptr, 1, read);
887 }
888 
889 void
mark_read(const Article ** articles,unsigned long article_count,bool read)890 DataImpl :: mark_read (const Article  ** articles,
891                        unsigned long     article_count,
892                        bool              read)
893 {
894   typedef std::map<Quark,quarks_t> group_to_changed_mids_t;
895   group_to_changed_mids_t group_to_changed_mids;
896 
897   // set them to `read'...
898   for (const Article **it(articles), **end(articles+article_count); it!=end; ++it) {
899     const Article * article (*it);
900     foreach_const (Xref, article->xref, xit) {
901       const bool old_state (_read_groups[xit->group][xit->server]._read.mark_one (xit->number, read));
902       if (!old_state != !read)
903         group_to_changed_mids[xit->group].insert (article->message_id);
904     }
905   }
906 
907   // update the affected groups' unread counts...
908   foreach_const (group_to_changed_mids_t, group_to_changed_mids, it) {
909     const Quark& group (it->first);
910     ReadGroup& g (_read_groups[group]);
911     const size_t n (it->second.size());
912     if (read)
913       g.decrement_unread (n);
914     else
915       g._unread_count += n;
916     fire_group_counts (group, g._unread_count, g._article_count);
917     on_articles_changed (group, it->second, false);
918   }
919 
920   if( !newsrc_autosave_id && newsrc_autosave_timeout )
921     newsrc_autosave_id = g_timeout_add_seconds( newsrc_autosave_timeout * 60, nrc_as_cb, this);
922 }
923 
924 
925 bool
is_read(const Article * a) const926 DataImpl :: is_read (const Article* a) const
927 {
928   // if it's read on any server, the whole thing is read.
929   if (a != 0)  {
930     foreach_const (Xref, a->xref, xit) {
931       const ReadGroup::Server * rgs (find_read_group_server (xit->group, xit->server));
932       if (rgs && rgs->_read.is_marked (xit->number))
933         return true;
934     }
935   }
936 
937   return false;
938 }
939 
940 void
get_article_scores(const Quark & group,const Article & article,Scorefile::items_t & setme) const941 DataImpl :: get_article_scores (const Quark         & group,
942                                 const Article       & article,
943                                 Scorefile::items_t  & setme) const
944 {
945   ArticleFilter :: sections_t sections;
946   _scorefile.get_matching_sections (StringView(group), sections);
947   _article_filter.get_article_scores (*this, sections, group, article, setme);
948 }
949 
950 void
rescore_articles(const Quark & group,const quarks_t mids)951 DataImpl :: rescore_articles (const Quark& group, const quarks_t mids)
952 {
953 
954   GroupHeaders * gh (get_group_headers (group));
955   if (!gh) // group isn't loaded
956     return;
957 
958   ArticleFilter::sections_t sections;
959   _scorefile.get_matching_sections (group.to_view(), sections);
960   nodes_v nodes;
961   find_nodes (mids, gh->_nodes, nodes);
962   foreach (nodes_v, nodes, it) {
963     if ((*it)->_article) {
964       Article& a (*(*it)->_article);
965       a.score = _article_filter.score_article (*this, sections, group, a);
966     }
967   }
968 }
969 
970 void
rescore_group_articles(const Quark & group)971 DataImpl :: rescore_group_articles (const Quark& group)
972 {
973 
974   GroupHeaders * gh (get_group_headers (group));
975   if (!gh) // group isn't loaded
976     return;
977 
978   ArticleFilter::sections_t sections;
979   _scorefile.get_matching_sections (group.to_view(), sections);
980   foreach (nodes_t, gh->_nodes, it) {
981     if (it->second->_article) {
982       Article& a (*(it->second->_article));
983       a.score = _article_filter.score_article (*this, sections, group, a);
984     }
985   }
986 }
987 
988 void
rescore()989 DataImpl :: rescore ()
990 {
991   //std::cerr << LINE_ID << " rescoring... " << std::endl;
992   const std::string filename (_data_io->get_scorefile_name());
993 
994   // reload the scorefile...
995   _scorefile.clear ();
996   _scorefile.parse_file (filename);
997 
998   // enumerate the groups that need rescoring...
999   quarks_t groups;
1000   foreach (std::set<MyTree*>, _trees, it)
1001     groups.insert ((*it)->_group);
1002 
1003   // "on_articles_changed" rescores the articles...
1004   foreach_const (quarks_t, groups, git) {
1005     quarks_t mids;
1006     const Quark& group (*git);
1007     const GroupHeaders * h (get_group_headers (group));
1008     foreach_const (nodes_t, h->_nodes, nit)
1009       //only insert mids for nodes with articles
1010       if (nit->second->_article)
1011         mids.insert (mids.end(), nit->first);
1012     if (!mids.empty())
1013       on_articles_changed (group, mids, true);
1014   }
1015 }
1016 
1017 void
add_score(const StringView & section_wildmat,int score_value,bool score_assign_flag,int lifespan_days,bool all_items_must_be_true,const Scorefile::AddItem * items,size_t item_count,bool do_rescore)1018 DataImpl :: add_score (const StringView           & section_wildmat,
1019                        int                          score_value,
1020                        bool                         score_assign_flag,
1021                        int                          lifespan_days,
1022                        bool                         all_items_must_be_true,
1023                        const Scorefile::AddItem   * items,
1024                        size_t                       item_count,
1025                        bool                         do_rescore)
1026 {
1027   const std::string filename (_data_io->get_scorefile_name());
1028 
1029   if (item_count && items)
1030   {
1031     // append to the file...
1032     const std::string str (_scorefile.build_score_string (
1033       section_wildmat, score_value, score_assign_flag, lifespan_days,
1034       all_items_must_be_true, items, item_count));
1035     std::ofstream o (filename.c_str(), std::ofstream::app|std::ofstream::out);
1036     o << '\n' << str << '\n';
1037     o.close ();
1038     chmod (filename.c_str(), 0600);
1039   }
1040 
1041   if (do_rescore)
1042     rescore ();
1043 }
1044 
1045 void
comment_out_scorefile_line(const StringView & filename,size_t begin_line,size_t end_line,bool do_rescore)1046 DataImpl :: comment_out_scorefile_line (const StringView    & filename,
1047                                         size_t                begin_line,
1048                                         size_t                end_line,
1049                                         bool                  do_rescore)
1050 {
1051   std::string buf;
1052 
1053   // read the file in...
1054   std::string line;
1055   std::ifstream in (filename.to_string().c_str());
1056   size_t line_number (0);
1057   while (std::getline (in, line)) {
1058     ++line_number;
1059     if (begin_line<=line_number && line_number<=end_line)
1060       buf += '#';
1061     buf += line;
1062     buf += '\n';
1063   }
1064   in.close ();
1065 
1066   // ..and back out again
1067   const std::string f (filename.str, filename.len);
1068   std::ofstream o (f.c_str(), std::ofstream::trunc|std::ofstream::out);
1069   o << buf;
1070   o.close ();
1071   chmod (f.c_str(), 0600);
1072 
1073   // rescore
1074   if (do_rescore)
1075     rescore ();
1076 }
1077 
1078 
1079 /***************************************************************************
1080 ****************************************************************************
1081 ***************************************************************************/
1082 
1083 namespace
1084 {
1085   /** used by delete_articles */
1086   struct PerGroup {
1087     quarks_t mids;
1088     int unread;
1089     int count;
PerGroup__anon33913e9d0411::PerGroup1090     PerGroup(): unread(0), count(0) {}
1091   };
1092 }
1093 
1094 void
group_clear_articles(const Quark & group)1095 DataImpl :: group_clear_articles (const Quark& group)
1096 {
1097   // if they're in memory, remove them from there too...
1098   GroupHeaders* headers (get_group_headers (group));
1099   if (headers) {
1100     unique_articles_t all;
1101     foreach (nodes_t, headers->_nodes, it)
1102       if (it->second->_article)
1103         all.insert (it->second->_article);
1104     delete_articles (all);
1105   }
1106 
1107   // reset GroupHeaders' memory...
1108 //  headers->_nodes.clear ();
1109 //  headers->_node_chunk.clear ();
1110 //  headers->_art_chunk.clear ();
1111 
1112   // remove 'em from disk too.
1113   _data_io->clear_group_headers (group);
1114 
1115   // fire a 'count changed' event.
1116   ReadGroup& g (_read_groups[group]);
1117   g._article_count = 0;
1118   g._unread_count = 0;
1119   fire_group_counts (group, g._unread_count, g._article_count);
1120 }
1121 
1122 void
delete_articles(const unique_articles_t & articles)1123 DataImpl :: delete_articles (const unique_articles_t& articles)
1124 {
1125 
1126   quarks_t all_mids;
1127 
1128   // info we need to batch these deletions per group...
1129   typedef std::map<Quark,PerGroup> per_groups_t;
1130   per_groups_t per_groups;
1131 
1132   // populate the per_groups map
1133   foreach_const (unique_articles_t, articles, it) {
1134     const Article * article (*it);
1135     quarks_t groups;
1136     foreach_const (Xref, article->xref, xit)
1137       groups.insert (xit->group);
1138     const bool was_read (is_read (article));
1139     foreach_const (quarks_t, groups, git) {
1140       PerGroup& per (per_groups[*git]);
1141       ++per.count;
1142       if (!was_read) ++per.unread;
1143       per.mids.insert (article->message_id);
1144       all_mids.insert (article->message_id);
1145     }
1146   }
1147 
1148   // process each group
1149   foreach (per_groups_t, per_groups, it)
1150   {
1151     // update the group's read/unread count...
1152     const Quark& group (it->first);
1153     ReadGroup& g (_read_groups[group]);
1154     g.decrement_unread (it->second.unread);
1155     g.decrement_count (it->second.count);
1156     fire_group_counts (group, g._unread_count, g._article_count);
1157 
1158     // remove the articles from our lookup table...
1159     GroupHeaders * h (get_group_headers (group));
1160     if (h)
1161       h->remove_articles (it->second.mids);
1162   }
1163 
1164   on_articles_removed (all_mids);
1165 }
1166 
1167 void
on_articles_removed(const quarks_t & mids) const1168 DataImpl :: on_articles_removed (const quarks_t& mids) const
1169 {
1170   foreach (std::set<MyTree*>, _trees, it)
1171     (*it)->remove_articles (mids);
1172 }
1173 
1174 void
on_articles_changed(const Quark & group,const quarks_t & mids,bool do_refilter)1175 DataImpl :: on_articles_changed (const Quark& group, const quarks_t& mids, bool do_refilter)
1176 {
1177   rescore_articles (group, mids);
1178 
1179   // notify the trees that the articles have changed...
1180   foreach (std::set<MyTree*>, _trees, it)
1181     (*it)->articles_changed (mids, do_refilter);
1182 }
1183 
1184 void
on_articles_added(const Quark & group,const quarks_t & mids)1185 DataImpl :: on_articles_added (const Quark& group, const quarks_t& mids)
1186 {
1187 
1188   if (!mids.empty())
1189   {
1190     Log::add_info_va (_("Added %lu articles to %s."),
1191                       mids.size(), group.c_str());
1192 
1193     rescore_articles (group, mids);
1194 
1195     foreach (std::set<MyTree*>, _trees, it) {
1196       debug ("This tree has a group " << (*it)->_group);
1197       if ((*it)->_group == group) {
1198         debug ("trying to add the articles to tree " << *it);
1199         (*it)->add_articles (mids);
1200       }
1201     }
1202 
1203     ReadGroup& g (_read_groups[group]);
1204     g._article_count += mids.size ();
1205     g._unread_count += mids.size ();
1206     fire_group_counts (group, g._unread_count, g._article_count);
1207   }
1208 }
1209 
1210 
1211 DataImpl::ArticleNode*
find_ancestor(ArticleNode * node,const Quark & ancestor_mid)1212 DataImpl :: find_ancestor (ArticleNode * node,
1213                            const Quark & ancestor_mid)
1214 {
1215   ArticleNode * parent_node (node->_parent);
1216   while (parent_node && (parent_node->_mid != ancestor_mid))
1217     parent_node = parent_node->_parent;
1218   return parent_node;
1219 }
1220 
1221 DataImpl::ArticleNode*
find_closest_ancestor(ArticleNode * node,const unique_sorted_quarks_t & mid_pool)1222 DataImpl :: find_closest_ancestor (ArticleNode                  * node,
1223                                    const unique_sorted_quarks_t & mid_pool)
1224 {
1225   ArticleNode * parent_node (node->_parent);
1226   while (parent_node && !mid_pool.count(parent_node->_mid))
1227     parent_node = parent_node->_parent;
1228   return parent_node;
1229 }
1230 
1231 const DataImpl::ArticleNode*
find_closest_ancestor(const ArticleNode * node,const unique_sorted_quarks_t & mid_pool)1232 DataImpl :: find_closest_ancestor (const ArticleNode             * node,
1233                                    const unique_sorted_quarks_t  & mid_pool)
1234 {
1235   const ArticleNode * parent_node (node->_parent);
1236   while (parent_node && !mid_pool.count(parent_node->_mid))
1237     parent_node = parent_node->_parent;
1238   return parent_node;
1239 }
1240 
1241 Data::ArticleTree*
group_get_articles(const Quark & group,const Quark & save_path,const ShowType show_type,const FilterInfo * filter,const RulesInfo * rules) const1242 DataImpl :: group_get_articles (const Quark       & group,
1243                                 const Quark       & save_path,
1244                                 const ShowType      show_type,
1245                                 const FilterInfo  * filter,
1246                                 const RulesInfo   * rules) const
1247 {
1248   // cast const away for group_ref()... consider _groups mutable
1249   return new MyTree (*const_cast<DataImpl*>(this), group, save_path, show_type, filter, rules);
1250 }
1251