1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /*
3 * Pan - A Newsreader for Gtk+
4 * Copyright (C) 2002-2006 Charles Kerr <charles@rebelbase.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; version 2 of the License.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, see <http://www.gnu.org/licenses/>.
17 *
18 */
19
20 #include <config.h>
21 #include <cerrno>
22 #include <fstream>
23 #include <map>
24 #include <vector>
25 #include <string>
26 #include <cmath>
27 #include <glib.h>
28 extern "C" {
29 #include <sys/types.h> // for chmod
30 #include <sys/stat.h> // for chmod
31 #include <glib/gi18n.h>
32 }
33 #include <pan/general/debug.h>
34 #include <pan/general/log.h>
35 #include <pan/general/macros.h>
36 #include <pan/general/messages.h>
37 #include <pan/general/quark.h>
38 #include <pan/general/time-elapsed.h>
39 #include <pan/usenet-utils/filter-info.h>
40 #include <pan/data/article.h>
41 #include "article-filter.h"
42 #include "data-impl.h"
43
44 using namespace pan;
45
GroupHeaders()46 DataImpl :: GroupHeaders :: GroupHeaders ():
47 _ref (0),
48 _dirty (false)
49 {
50
51 }
52
~GroupHeaders()53 DataImpl :: GroupHeaders :: ~GroupHeaders ()
54 {
55
56 }
57
58 DataImpl :: ArticleNode*
find_node(const Quark & mid)59 DataImpl :: GroupHeaders :: find_node (const Quark& mid)
60 {
61 ArticleNode * node (0);
62 nodes_t::iterator it (_nodes.find (mid));
63 if (it != _nodes.end())
64 node = it->second;
65 return node;
66 }
67
68 const DataImpl :: ArticleNode*
find_node(const Quark & mid) const69 DataImpl :: GroupHeaders :: find_node (const Quark& mid) const
70 {
71 const ArticleNode * node (0);
72 nodes_t::const_iterator it (_nodes.find (mid));
73 if (it != _nodes.end())
74 node = it->second;
75 return node;
76 }
77
78 const Quark&
find_parent_message_id(const Quark & mid) const79 DataImpl :: GroupHeaders :: find_parent_message_id (const Quark& mid) const
80 {
81 const ArticleNode * node (find_node (mid));
82 if (node && node->_parent)
83 return node->_parent->_mid;
84
85 static const Quark empty_quark;
86 return empty_quark;
87 }
88
89 const Article*
find_article(const Quark & message_id) const90 DataImpl :: GroupHeaders :: find_article (const Quark& message_id) const
91 {
92 Article *a (0);
93
94 const ArticleNode * node (find_node (message_id));
95 if (node)
96 a = node->_article;
97
98 return a;
99 }
100
101 Article*
find_article(const Quark & message_id)102 DataImpl :: GroupHeaders :: find_article (const Quark& message_id)
103 {
104 Article *a(0);
105
106 const ArticleNode * node (find_node (message_id));
107 if (node)
108 a = node->_article;
109
110 return a;
111 }
112
113 void
remove_articles(const quarks_t & mids)114 DataImpl :: GroupHeaders :: remove_articles (const quarks_t& mids)
115 {
116 nodes_v nodes;
117 find_nodes (mids, _nodes, nodes);
118 foreach (nodes_v, nodes, it)
119 (*it)->_article = 0;
120 _dirty = true;
121 }
122
123 const DataImpl :: GroupHeaders*
get_group_headers(const Quark & group) const124 DataImpl :: get_group_headers (const Quark& group) const
125 {
126 group_to_headers_t::const_iterator it (_group_to_headers.find(group));
127 return it==_group_to_headers.end() ? 0 : it->second;
128 }
129
130 DataImpl :: GroupHeaders*
get_group_headers(const Quark & group)131 DataImpl :: get_group_headers (const Quark& group)
132 {
133 group_to_headers_t::iterator it (_group_to_headers.find(group));
134 return it==_group_to_headers.end() ? 0 : it->second;
135 }
136
137 void
build_references_header(const Article * article,std::string & setme) const138 DataImpl :: GroupHeaders :: build_references_header (const Article* article, std::string& setme) const
139 {
140 setme.clear ();
141 const Quark& message_id (article->message_id);
142 const ArticleNode * node (find_node (message_id));
143 while (node && node->_parent) {
144 node = node->_parent;
145 const StringView& ancestor_mid = node->_mid.to_view ();
146 setme.insert (0, ancestor_mid.str, ancestor_mid.len);
147 if (node->_parent)
148 setme.insert (0, 1, ' ');
149 }
150 }
151
152 void
get_article_references(const Quark & group,const Article * article,std::string & setme) const153 DataImpl :: get_article_references (const Quark& group, const Article* article, std::string& setme) const
154 {
155 const GroupHeaders * h (get_group_headers (group));
156 if (!h)
157 setme.clear ();
158 else
159 h->build_references_header (article, setme);
160 }
161
162 void
free_group_headers_memory(const Quark & group)163 DataImpl :: free_group_headers_memory (const Quark& group)
164 {
165 group_to_headers_t::iterator it (_group_to_headers.find (group));
166 if (it != _group_to_headers.end()) {
167 delete it->second;
168 _group_to_headers.erase (it);
169 }
170 }
171
172 void
ref_group(const Quark & group)173 DataImpl :: ref_group (const Quark& group)
174 {
175 GroupHeaders * h (get_group_headers (group));
176 if (!h)
177 {
178 h = _group_to_headers[group] = new GroupHeaders ();
179 load_headers (*_data_io, group);
180 }
181 ++h->_ref;
182 // std::cerr << LINE_ID << " group " << group << " refcount up to " << h->_ref << std::endl;
183 }
184
185 void
unref_group(const Quark & group)186 DataImpl :: unref_group (const Quark& group)
187 {
188 GroupHeaders * h (get_group_headers (group));
189 pan_return_if_fail (h != 0);
190
191 --h->_ref;
192 // std::cerr << LINE_ID << " group " << group << " refcount down to " << h->_ref << std::endl;
193 if (h->_ref == 0)
194 {
195 // if (h->_dirty )
196 save_headers (*_data_io, group);
197 h->_dirty = false;
198 free_group_headers_memory (group);
199 }
200 }
201
202 void
fire_article_flag_changed(articles_t & a,const Quark & group)203 DataImpl :: fire_article_flag_changed (articles_t& a, const Quark& group)
204 {
205 GroupHeaders * h (get_group_headers (group));
206 h->_dirty = true;
207 Data::fire_article_flag_changed(a,group);
208 }
209
210 void
find_nodes(const quarks_t & mids,nodes_t & nodes,nodes_v & setme)211 DataImpl :: find_nodes (const quarks_t & mids,
212 nodes_t & nodes,
213 nodes_v & setme)
214 {
215 NodeWeakOrdering o;
216 nodes_t tmp;
217 std::set_intersection (nodes.begin(), nodes.end(),
218 mids.begin(), mids.end(),
219 std::inserter (tmp, tmp.begin()), o);
220
221 setme.reserve (tmp.size());
222 foreach_const (nodes_t, tmp, it)
223 setme.push_back (it->second);
224 }
225
226 void
find_nodes(const quarks_t & mids,const nodes_t & nodes,const_nodes_v & setme)227 DataImpl :: find_nodes (const quarks_t & mids,
228 const nodes_t & nodes,
229 const_nodes_v & setme)
230 {
231 NodeWeakOrdering o;
232 nodes_t tmp;
233 std::set_intersection (nodes.begin(), nodes.end(),
234 mids.begin(), mids.end(),
235 std::inserter (tmp, tmp.begin()), o);
236
237 setme.reserve (tmp.size());
238 foreach_const (nodes_t, tmp, it)
239 setme.push_back (it->second);
240 }
241
242 /*******
243 ********
244 *******/
245
246 // 'article' must have been instantiated by
247 // GroupHeaders::alloc_new_article()!!
248 void
load_article(const Quark & group,Article * article,const StringView & references)249 DataImpl :: load_article (const Quark & group,
250 Article * article,
251 const StringView & references)
252
253 {
254 #if 0
255 std::cerr << LINE_ID << " adding article "
256 << " subject [" << article->subject << ']'
257 << " mid [" << article->message_id << ']'
258 << " references [" << references << ']'
259 << std::endl;
260 #endif
261
262 GroupHeaders * h (get_group_headers (group));
263 pan_return_if_fail (h!=0);
264
265 // populate the current node
266 const Quark& mid (article->message_id);
267 ArticleNode * node (h->_nodes[mid]);
268 if (!node) {
269 static const ArticleNode blank_node;
270 h->_node_chunk.push_back (blank_node);
271 node = h->_nodes[mid] = &h->_node_chunk.back();
272 node->_mid = mid;
273 }
274 // !!INFO!! : this is bypassed for now, as it causes an abort on local cache corruptions
275 //assert (!node->_article);
276 node->_article = article;
277 ArticleNode * article_node = node;
278
279 // build nodes for each of the references
280 StringView tok, refs(references);
281 //std::cerr << LINE_ID << " references [" << refs << ']' << std::endl;
282 while (refs.pop_last_token (tok, ' '))
283 {
284 tok.trim ();
285 if (tok.empty())
286 break;
287
288 ArticleNode * old_parent_node (node->_parent);
289 const Quark old_parent_mid (old_parent_node ? old_parent_node->_mid : Quark());
290 const Quark new_parent_mid (tok);
291 //std::cerr << LINE_ID << " now we're working on " << new_parent_mid << std::endl;
292
293 if (new_parent_mid == old_parent_mid)
294 {
295 //std::cerr << LINE_ID << " our tree agrees with the References header here..." << std::endl;
296 node = node->_parent;
297 continue;
298 }
299
300 if (!old_parent_node)
301 {
302 //std::cerr << LINE_ID << " haven't mapped " << new_parent_mid << " before..." << std::endl;
303 ArticleNode * new_parent_node (h->_nodes[new_parent_mid]);
304 const bool found (new_parent_node != 0);
305 if (!found) {
306 //std::cerr << LINE_ID << " didn't find it; adding new node for " << new_parent_mid << std::endl;
307 static const ArticleNode blank_node;
308 h->_node_chunk.push_back (blank_node);
309 new_parent_node = h->_nodes[new_parent_mid] = &h->_node_chunk.back();
310 new_parent_node->_mid = new_parent_mid;
311 }
312 node->_parent = new_parent_node;
313 if (find_ancestor (new_parent_node, new_parent_mid)) {
314 node->_parent = 0;
315 //std::cerr << LINE_ID << " someone's been munging References headers to cause trouble!" << std::endl;
316 break;
317 }
318 new_parent_node->_children.push_front (node);
319 node = new_parent_node;
320 continue;
321 }
322
323 ArticleNode * tmp;
324 if ((tmp = find_ancestor (node, new_parent_mid)))
325 {
326 //std::cerr << LINE_ID << " this References header has a hole... jumping to " << tmp->_mid << std::endl;
327 node = tmp;
328 continue;
329 }
330
331 const char * cpch;
332 if ((cpch = refs.strstr (old_parent_mid.to_view())))
333 {
334 //std::cerr << LINE_ID << " this References header fills a hole of ours ... " << new_parent_mid << std::endl;
335
336 // unlink from old parent
337 old_parent_node->_children.remove (node);
338 node->_parent = 0;
339
340 // link to new parent
341 ArticleNode * new_parent_node (h->_nodes[new_parent_mid]);
342 const bool found (new_parent_node != 0);
343 if (!found) {
344 //std::cerr << LINE_ID << " didn't find it; adding new node for " << new_parent_mid << std::endl;
345 static const ArticleNode blank_node;
346 h->_node_chunk.push_back (blank_node);
347 new_parent_node = h->_nodes[new_parent_mid] = &h->_node_chunk.back();
348 new_parent_node->_mid = new_parent_mid;
349 }
350 node->_parent = new_parent_node;
351 if (find_ancestor (new_parent_node, new_parent_mid) != 0) {
352 node->_parent = 0;
353 //std::cerr << LINE_ID << " someone's been munging References headers to cause trouble!" << std::endl;
354 break;
355 }
356 new_parent_node->_children.push_front (node);
357 node = new_parent_node;
358 continue;
359 }
360 }
361
362 // recursion?
363 assert (find_ancestor(article_node, article->message_id) == 0);
364 }
365
366 #if 0
367 std::string
368 DataImpl :: get_references (const Quark& group, const Article& a) const
369 {
370 std::string s;
371
372 GroupHeaders * h (get_group_headers (group));
373 pan_return_if_fail (h!=0);
374
375 const Quark& mid (a.message_id);
376 ArticleNode * node (h->_nodes[mid]);
377 node = node->parent;
378 while (node) {
379 if (!s.empty())
380 s.insert (0, 1, ' ');
381 const StringView v (node->_mid.to_view());
382 s.insert (0, v.str, v.len);
383 node = node->parent;
384 }
385 // std::cerr << "article " << a.message_id << " references " << s << std::endl;
386 return s;
387 }
388 #endif
389
390 void
load_part(const Quark & group,const Quark & mid,int number,const StringView & part_mid,unsigned long lines,unsigned long bytes)391 DataImpl :: load_part (const Quark & group,
392 const Quark & mid,
393 int number,
394 const StringView & part_mid,
395 unsigned long lines,
396 unsigned long bytes)
397 {
398 GroupHeaders * h = get_group_headers (group);
399 Article * a (h->find_article (mid));
400 pan_return_if_fail (a != 0);
401
402 if (a->add_part (number, part_mid, bytes))
403 a->lines += lines;
404 }
405
406 namespace
407 {
view_to_ul(const StringView & view)408 unsigned long view_to_ul (const StringView& view)
409 {
410 unsigned long val (0);
411 if (!view.empty()) {
412 errno = 0;
413 val = strtoul (view.str, 0, 10);
414 if (errno) val = 0ul;
415 }
416 return val;
417 }
418 }
419
420 void
load_headers(const DataIO & data_io,const Quark & group)421 DataImpl :: load_headers (const DataIO & data_io,
422 const Quark & group)
423 {
424 TimeElapsed timer;
425
426 GroupHeaders * h (get_group_headers (group));
427 assert (h != 0);
428
429 unsigned long article_count (0);
430 unsigned long unread_count (0);
431 StringView line;
432 bool success (false);
433 quarks_t servers;
434
435 ArticleFilter::sections_t score_sections;
436 _scorefile.get_matching_sections (StringView(group), score_sections);
437
438 const char * groupname (group.c_str());
439 LineReader * in (data_io.read_group_headers (group));
440 if (in && !in->fail())
441 {
442 do { // skip past the comments at the top
443 in->getline (line);
444 line.trim ();
445 } while (!line.empty() && *line.str=='#');
446
447 const int version (atoi (line.str));
448 if (version==1 || version==2 || version == 3)
449 {
450 // build the symbolic server / group lookup table
451 in->getline (line);
452 int symbol_qty (atoi (line.str));
453 Quark xref_lookup[CHAR_MAX];
454 for (int i=0; i<symbol_qty; ++i) {
455 StringView key;
456 in->getline (line);
457 line.trim();
458 if (line.pop_token(key,'\t') && key.len==1)
459 xref_lookup[(int)*key.str] = line;
460 }
461
462 // build the author lookup table
463 in->getline (line);
464 symbol_qty = atoi (line.str);
465 Quark author_lookup[CHAR_MAX];
466 for (int i=0; i<symbol_qty; ++i) {
467 StringView key;
468 in->getline (line);
469 line.trim ();
470 if (line.pop_token(key,'\t') && key.len==1) {
471 author_lookup[(int)*key.str] = line;
472 }
473 }
474
475 Xref::targets_t targets;
476 std::vector<Xref::Target>& targets_v (targets.get_container());
477
478 // each article in this group...
479 unsigned int expire_count (0);
480 in->getline (line);
481 //const unsigned long article_qty = view_to_ul (line); /* unused */
482 const time_t now (time (0));
483 PartBatch part_batch;
484 for (;;)
485 {
486 // look for the beginning of an Article record.
487 // it'll be a message-id line with no leading whitespace.
488 StringView s;
489 if (!in->getline (s)) // end of file
490 break;
491
492 Article& a (h->alloc_new_article());
493
494
495 // flag line
496 a.flag = false;
497 if (version == 3)
498 {
499 a.flag = atoi(s.str) == 1 ? true : false;
500 in->getline(s);
501 }
502
503 if (s.empty() || *s.str!='<') // not a message-id...
504 continue;
505
506 //message id
507 s.ltrim(); a.message_id = s;
508
509 // subject line
510
511 in->getline (s); s.ltrim(); a.subject = s;
512
513 // author line
514 in->getline (s); s.ltrim(); a.author = s.len==1 ? author_lookup[(int)*s.str] : Quark(s);
515
516 // optional references line
517 std::string references;
518 in->getline (s); s.ltrim();
519 if (!s.empty() && *s.str=='<') {
520 references = s;
521 in->getline (s); s.ltrim();
522 }
523
524 // date-posted line
525 a.time_posted = view_to_ul (s);
526 const int days_old ((now - a.time_posted) / (24*60*60));
527
528 // xref line
529 in->getline (s); s.ltrim();
530 const size_t max_targets (std::count (s.begin(), s.end(), ' ') + 1);
531 targets_v.resize (max_targets);
532 Xref::Target * target_it (&targets_v.front());
533 StringView tok, server_tok, group_tok;
534 while (s.pop_token (tok)) {
535 if (tok.pop_token(server_tok,':') && tok.pop_token(group_tok,':')) {
536 target_it->server = server_tok;
537 target_it->group = group_tok.len==1 ? xref_lookup[(int)*group_tok.str] : Quark(group_tok);
538 target_it->number = g_ascii_strtoull (tok.str, NULL, 10);
539 const Server * server (find_server (target_it->server));
540 if (server && ((!server->article_expiration_age) || (days_old <= server->article_expiration_age)))
541 ++target_it;
542 }
543 }
544 targets_v.resize (target_it - &targets_v.front());
545 targets.sort();
546 bool expired (targets.empty());
547 a.xref.swap (targets);
548
549 // is_binary [total_part_count found_part_count]
550 int total_part_count (1);
551 int found_part_count (1);
552 in->getline (s);
553 s.ltrim();
554 s.pop_token (tok); a.is_binary = !tok.empty() && tok.str[0]=='t';
555 if (a.is_binary) {
556 s.ltrim(); s.pop_token (tok); total_part_count = atoi(tok.str);
557 s.ltrim(); s.pop_token (tok); found_part_count = atoi(tok.str);
558 }
559 s.ltrim(); if (s.pop_token (tok)) a.lines = atoi (tok.str); // this field was added in 0.115
560
561 // found parts...
562 part_batch.init (a.message_id, total_part_count, found_part_count);
563 // std::cerr<<"article "<<a.message_id<<" "<<total_part_count<<" "<<found_part_count<<std::endl;
564 for (int i(0), count(found_part_count); i<count; ++i)
565 {
566 const bool gotline (in->getline (s));
567
568 if (gotline && !expired)
569 {
570 StringView tok;
571 s.ltrim ();
572 s.pop_token (tok);
573 const int number (atoi (tok.str));
574 if (number > total_part_count) { // corrupted entry
575 expired = true;
576 break;
577 }
578 StringView part_mid;
579 unsigned long part_bytes (0);
580 s.ltrim ();
581 s.pop_token (part_mid);
582 if (part_mid.len==1 && *part_mid.str=='"')
583 part_mid = a.message_id.to_view ();
584 s.pop_token(tok); part_bytes = view_to_ul (tok);
585 part_batch.add_part (number, part_mid, part_bytes);
586
587 if (s.pop_token(tok)) a.lines += atoi (tok.str); // this field was removed in 0.115
588 }
589 }
590 if (!expired)
591 a.set_parts (part_batch);
592
593 // add the article to the group if it hasn't all expired
594 if (expired)
595 ++expire_count;
596 else {
597 load_article (group, &a, references);
598 a.score = _article_filter.score_article (*this, score_sections, group, a); // score _after_ threading, so References: works
599 ++article_count;
600 if (!is_read(&a))
601 ++unread_count;
602 }
603 }
604
605 if (expire_count)
606 Log::add_info_va (_("Expired %lu old articles from “%s”"), expire_count, group.c_str());
607
608 success = !in->fail();
609 }
610 else
611 {
612 Log::add_urgent_va (
613 _("Unsupported data version for %s headers: %d.\nAre you running an old version of Pan by accident?"),
614 groupname, version);
615 }
616 }
617 delete in;
618
619 // update the group's article count...
620 ReadGroup& g (_read_groups[group]);
621 g._unread_count = unread_count;
622 g._article_count = article_count;
623 fire_group_counts (group, unread_count, article_count);
624
625 if (success) {
626 const double seconds = timer.get_seconds_elapsed ();
627 Log::add_info_va (
628 _("Loaded %lu articles for “%s” in %.1f seconds (%.0f per second)"),
629 article_count, group.c_str(), seconds,
630 article_count/(fabs(seconds)<0.001?0.001:seconds));
631 }
632 }
633
634 namespace
635 {
636 const char * lookup_symbols ("abcdefghijklmnopqrstuvwxyz"
637 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
638 "1234567890!@#$%^&*()");
639
640 struct QuarkToSymbol
641 {
642 char buf[2];
QuarkToSymbol__anon33913e9d0211::QuarkToSymbol643 QuarkToSymbol() { buf[1] = '\0'; }
644
645 typedef Loki::AssocVector < pan::Quark, char > quark_to_symbol_t;
646 quark_to_symbol_t _map;
647
operator ()__anon33913e9d0211::QuarkToSymbol648 const char* operator() (const Quark& quark)
649 {
650 quark_to_symbol_t::const_iterator it (_map.find (quark));
651 if (it == _map.end())
652 return quark.c_str();
653
654 buf[0] = it->second;
655 return buf;
656 }
657
write__anon33913e9d0211::QuarkToSymbol658 void write (std::ostream& out, const StringView& comment) const
659 {
660 Quark quarks[UCHAR_MAX];
661 foreach_const (quark_to_symbol_t, _map, it)
662 quarks[(int)it->second] = it->first;
663
664 const size_t len (_map.size());
665 out << len;
666 if (!comment.empty())
667 out << "\t # " << comment << '\n';
668 for (size_t i(0); i!=len; ++i) {
669 const char ch (lookup_symbols[i]);
670 out << '\t' << ch << '\t' << quarks[(int)ch] << '\n';
671 }
672 }
673 };
674
675 typedef Loki::AssocVector<Quark,unsigned long> frequency_t;
676
build_qts(frequency_t & freq,QuarkToSymbol & setme)677 void build_qts (frequency_t& freq, QuarkToSymbol& setme)
678 {
679 setme._map.clear ();
680
681 typedef std::multimap<unsigned long,Quark> counts_t;
682 counts_t counts;
683 foreach_const (frequency_t, freq, it)
684 counts.insert (std::pair<unsigned long,Quark>(it->second,it->first));
685
686 counts_t::const_reverse_iterator it=counts.rbegin(), end=counts.rend();
687 for (const char * pch=lookup_symbols; *pch && it!=end; ++pch, ++it)
688 setme._map[it->second] = *pch;
689 freq.clear ();
690 }
691 }
692
693
694 bool
save_headers(DataIO & data_io,const Quark & group,const std::vector<Article * > & articles,unsigned long & part_count,unsigned long & article_count) const695 DataImpl :: save_headers (DataIO & data_io,
696 const Quark & group,
697 const std::vector<Article*> & articles,
698 unsigned long & part_count,
699 unsigned long & article_count) const
700 {
701 const char endl ('\n');
702 const GroupHeaders * h (get_group_headers (group));
703 assert (h != 0);
704
705 part_count = 0;
706 article_count = 0;
707
708 bool success;
709 if (_unit_test)
710 {
711 Log::add_info_va ("Not saving %s's headers because we're in unit test mode",
712 group.c_str());
713 success = true;
714 }
715 else
716 {
717 std::ostream& out (*data_io.write_group_headers(group));
718
719 out << "#\n"
720 "# This file has three sections.\n"
721 "#\n"
722 "# A. A shorthand table for the most frequent groups in the xrefs.\n"
723 "# The first line tells the number of elements to follow,\n"
724 "# then one line per entry with a one-character shorthand and full name.\n"
725 "#\n"
726 "# B. A shorthand table for the most freqent author names.\n"
727 "# This is formatted just like the other shorthand table.\n"
728 "# (sorted by post count, so it's also a most-frequent-posters list...)\n"
729 "#\n"
730 "# C. The group's headers section.\n"
731 "# The first line tells the number of articles to follow,\n"
732 "# then articles which each have the following lines:\n"
733 "# 1. message-id\n"
734 "# 2. subject\n"
735 "# 3. author\n"
736 "# 4. references. This line is omitted if the Article has an empty References header.\n"
737 "# 5. time-posted. This is a time_t (see http://en.wikipedia.org/wiki/Unix_time)\n"
738 "# 6. xref line, server1:group1:number1 server2:group2:number2 ...\n"
739 "# 7. has-attachments [parts-total-count parts-found-count] line-count\n"
740 "# If has-attachments isn't 't' (for true), fields 2 and 3 are omitted.\n"
741 "# If fields 2 and 3 are equal, the article is `complete'.\n"
742 "# 8. One line per parts-found-count: part-index message-id byte-count\n"
743 "#\n"
744 "#\n";
745
746 // lines moved from line 8 to line 7 in 0.115, causing version 2
747 // flag added, version 3 (12/2011, imhotep)
748 out << "3\t # file format version number\n";
749
750 // xref lookup section
751 frequency_t frequency;
752 foreach_const (std::vector<Article*>, articles, ait)
753 foreach_const (Xref, (*ait)->xref, xit)
754 ++frequency[xit->group];
755 QuarkToSymbol xref_qts;
756 build_qts (frequency, xref_qts);
757 xref_qts.write (out, "xref shorthand count");
758
759 // author lookup section
760 frequency.clear ();
761 foreach_const (std::vector<Article*>, articles, ait)
762 ++frequency[(*ait)->author];
763 QuarkToSymbol author_qts;
764 build_qts (frequency, author_qts);
765 author_qts.write (out, "author shorthand count");
766
767 // header section
768 out << articles.size() << endl;
769 std::string references;
770 foreach_const (std::vector<Article*>, articles, ait)
771 {
772 ++article_count;
773
774 const Article * a (*ait);
775 const Quark& message_id (a->message_id);
776 h->build_references_header (a, references);
777
778 // flag, message-id, subject, author
779 out <<a->flag<<"\n"
780 << message_id << "\n\t"
781 << a->subject << "\n\t"
782 << author_qts(a->author) << "\n\t";
783 // references line *IF* the article has a References header
784 if (!references.empty())
785 out << references << "\n\t";
786
787 // date
788 out << a->time_posted << "\n\t";
789
790 // xref
791 foreach_const (Xref, a->xref, xit) {
792 out << xref_qts(xit->server);
793 out.put(':');
794 out << xref_qts(xit->group);
795 out.put(':');
796 out << xit->number;
797 out.put(' ');
798 }
799 out << "\n\t";
800
801 // is_binary [total_part_count found_part_count]
802 out.put (a->is_binary ? 't' : 'f');
803 if (a->is_binary) {
804 out.put(' ');
805 out << a->get_total_part_count();
806 out.put(' ');
807 out << a->get_found_part_count();
808 }
809 out.put(' ');
810 out << a->lines;
811 out.put('\n');
812
813 // one line per foundPartCount (part-index message-id bytes lines)
814 for (Article::part_iterator pit(a->pbegin()), end(a->pend()); pit!=end; ++pit) {
815 out.put('\t'); out << pit.number();
816 out.put (' ');
817 out << pit.mid();
818 out.put (' ');
819 out << pit.bytes();
820 out.put ('\n');
821 ++part_count;
822 }
823 }
824
825 success = !out.fail ();
826 data_io.write_done (&out);
827 save_group_xovers (data_io);
828 }
829
830 return success;
831 }
832
833 void
save_headers(DataIO & data_io,const Quark & group) const834 DataImpl :: save_headers (DataIO& data_io, const Quark& group) const
835 {
836 if (_unit_test)
837 return;
838
839 pan_return_if_fail (!group.empty());
840
841 TimeElapsed timer;
842
843 // get a list of the articles
844 const GroupHeaders * h (get_group_headers (group));
845 std::vector<Article*> articles;
846 foreach_const (nodes_t, h->_nodes, it)
847 if (it->second->_article)
848 articles.push_back (it->second->_article);
849
850 unsigned long part_count (0ul);
851 unsigned long article_count (0ul);
852 const bool success (
853 save_headers(data_io, group, articles, part_count, article_count));
854 const double time_elapsed (timer.get_seconds_elapsed());
855 if (success)
856 Log::add_info_va (
857 _("Saved %lu parts, %lu articles in “%s” in %.1f seconds (%.0f articles/sec)"),
858 part_count,
859 article_count,
860 group.c_str(),
861 time_elapsed,
862 article_count/(fabs(time_elapsed)<0.001?0.001:time_elapsed));
863 }
864
865
866
867 /*******
868 ********
869 *******/
870 namespace
871 {
872 /* autosave newsrc files */
nrc_as_cb(gpointer ptr)873 gboolean nrc_as_cb(gpointer ptr)
874 {
875 DataImpl *data = static_cast<DataImpl*>(ptr);
876 data->save_newsrc_files();
877
878 return FALSE;
879 }
880 }
881
882 void
mark_read(const Article & a,bool read)883 DataImpl :: mark_read (const Article & a, bool read)
884 {
885 const Article * aptr (&a);
886 mark_read (&aptr, 1, read);
887 }
888
889 void
mark_read(const Article ** articles,unsigned long article_count,bool read)890 DataImpl :: mark_read (const Article ** articles,
891 unsigned long article_count,
892 bool read)
893 {
894 typedef std::map<Quark,quarks_t> group_to_changed_mids_t;
895 group_to_changed_mids_t group_to_changed_mids;
896
897 // set them to `read'...
898 for (const Article **it(articles), **end(articles+article_count); it!=end; ++it) {
899 const Article * article (*it);
900 foreach_const (Xref, article->xref, xit) {
901 const bool old_state (_read_groups[xit->group][xit->server]._read.mark_one (xit->number, read));
902 if (!old_state != !read)
903 group_to_changed_mids[xit->group].insert (article->message_id);
904 }
905 }
906
907 // update the affected groups' unread counts...
908 foreach_const (group_to_changed_mids_t, group_to_changed_mids, it) {
909 const Quark& group (it->first);
910 ReadGroup& g (_read_groups[group]);
911 const size_t n (it->second.size());
912 if (read)
913 g.decrement_unread (n);
914 else
915 g._unread_count += n;
916 fire_group_counts (group, g._unread_count, g._article_count);
917 on_articles_changed (group, it->second, false);
918 }
919
920 if( !newsrc_autosave_id && newsrc_autosave_timeout )
921 newsrc_autosave_id = g_timeout_add_seconds( newsrc_autosave_timeout * 60, nrc_as_cb, this);
922 }
923
924
925 bool
is_read(const Article * a) const926 DataImpl :: is_read (const Article* a) const
927 {
928 // if it's read on any server, the whole thing is read.
929 if (a != 0) {
930 foreach_const (Xref, a->xref, xit) {
931 const ReadGroup::Server * rgs (find_read_group_server (xit->group, xit->server));
932 if (rgs && rgs->_read.is_marked (xit->number))
933 return true;
934 }
935 }
936
937 return false;
938 }
939
940 void
get_article_scores(const Quark & group,const Article & article,Scorefile::items_t & setme) const941 DataImpl :: get_article_scores (const Quark & group,
942 const Article & article,
943 Scorefile::items_t & setme) const
944 {
945 ArticleFilter :: sections_t sections;
946 _scorefile.get_matching_sections (StringView(group), sections);
947 _article_filter.get_article_scores (*this, sections, group, article, setme);
948 }
949
950 void
rescore_articles(const Quark & group,const quarks_t mids)951 DataImpl :: rescore_articles (const Quark& group, const quarks_t mids)
952 {
953
954 GroupHeaders * gh (get_group_headers (group));
955 if (!gh) // group isn't loaded
956 return;
957
958 ArticleFilter::sections_t sections;
959 _scorefile.get_matching_sections (group.to_view(), sections);
960 nodes_v nodes;
961 find_nodes (mids, gh->_nodes, nodes);
962 foreach (nodes_v, nodes, it) {
963 if ((*it)->_article) {
964 Article& a (*(*it)->_article);
965 a.score = _article_filter.score_article (*this, sections, group, a);
966 }
967 }
968 }
969
970 void
rescore_group_articles(const Quark & group)971 DataImpl :: rescore_group_articles (const Quark& group)
972 {
973
974 GroupHeaders * gh (get_group_headers (group));
975 if (!gh) // group isn't loaded
976 return;
977
978 ArticleFilter::sections_t sections;
979 _scorefile.get_matching_sections (group.to_view(), sections);
980 foreach (nodes_t, gh->_nodes, it) {
981 if (it->second->_article) {
982 Article& a (*(it->second->_article));
983 a.score = _article_filter.score_article (*this, sections, group, a);
984 }
985 }
986 }
987
988 void
rescore()989 DataImpl :: rescore ()
990 {
991 //std::cerr << LINE_ID << " rescoring... " << std::endl;
992 const std::string filename (_data_io->get_scorefile_name());
993
994 // reload the scorefile...
995 _scorefile.clear ();
996 _scorefile.parse_file (filename);
997
998 // enumerate the groups that need rescoring...
999 quarks_t groups;
1000 foreach (std::set<MyTree*>, _trees, it)
1001 groups.insert ((*it)->_group);
1002
1003 // "on_articles_changed" rescores the articles...
1004 foreach_const (quarks_t, groups, git) {
1005 quarks_t mids;
1006 const Quark& group (*git);
1007 const GroupHeaders * h (get_group_headers (group));
1008 foreach_const (nodes_t, h->_nodes, nit)
1009 //only insert mids for nodes with articles
1010 if (nit->second->_article)
1011 mids.insert (mids.end(), nit->first);
1012 if (!mids.empty())
1013 on_articles_changed (group, mids, true);
1014 }
1015 }
1016
1017 void
add_score(const StringView & section_wildmat,int score_value,bool score_assign_flag,int lifespan_days,bool all_items_must_be_true,const Scorefile::AddItem * items,size_t item_count,bool do_rescore)1018 DataImpl :: add_score (const StringView & section_wildmat,
1019 int score_value,
1020 bool score_assign_flag,
1021 int lifespan_days,
1022 bool all_items_must_be_true,
1023 const Scorefile::AddItem * items,
1024 size_t item_count,
1025 bool do_rescore)
1026 {
1027 const std::string filename (_data_io->get_scorefile_name());
1028
1029 if (item_count && items)
1030 {
1031 // append to the file...
1032 const std::string str (_scorefile.build_score_string (
1033 section_wildmat, score_value, score_assign_flag, lifespan_days,
1034 all_items_must_be_true, items, item_count));
1035 std::ofstream o (filename.c_str(), std::ofstream::app|std::ofstream::out);
1036 o << '\n' << str << '\n';
1037 o.close ();
1038 chmod (filename.c_str(), 0600);
1039 }
1040
1041 if (do_rescore)
1042 rescore ();
1043 }
1044
1045 void
comment_out_scorefile_line(const StringView & filename,size_t begin_line,size_t end_line,bool do_rescore)1046 DataImpl :: comment_out_scorefile_line (const StringView & filename,
1047 size_t begin_line,
1048 size_t end_line,
1049 bool do_rescore)
1050 {
1051 std::string buf;
1052
1053 // read the file in...
1054 std::string line;
1055 std::ifstream in (filename.to_string().c_str());
1056 size_t line_number (0);
1057 while (std::getline (in, line)) {
1058 ++line_number;
1059 if (begin_line<=line_number && line_number<=end_line)
1060 buf += '#';
1061 buf += line;
1062 buf += '\n';
1063 }
1064 in.close ();
1065
1066 // ..and back out again
1067 const std::string f (filename.str, filename.len);
1068 std::ofstream o (f.c_str(), std::ofstream::trunc|std::ofstream::out);
1069 o << buf;
1070 o.close ();
1071 chmod (f.c_str(), 0600);
1072
1073 // rescore
1074 if (do_rescore)
1075 rescore ();
1076 }
1077
1078
1079 /***************************************************************************
1080 ****************************************************************************
1081 ***************************************************************************/
1082
1083 namespace
1084 {
1085 /** used by delete_articles */
1086 struct PerGroup {
1087 quarks_t mids;
1088 int unread;
1089 int count;
PerGroup__anon33913e9d0411::PerGroup1090 PerGroup(): unread(0), count(0) {}
1091 };
1092 }
1093
1094 void
group_clear_articles(const Quark & group)1095 DataImpl :: group_clear_articles (const Quark& group)
1096 {
1097 // if they're in memory, remove them from there too...
1098 GroupHeaders* headers (get_group_headers (group));
1099 if (headers) {
1100 unique_articles_t all;
1101 foreach (nodes_t, headers->_nodes, it)
1102 if (it->second->_article)
1103 all.insert (it->second->_article);
1104 delete_articles (all);
1105 }
1106
1107 // reset GroupHeaders' memory...
1108 // headers->_nodes.clear ();
1109 // headers->_node_chunk.clear ();
1110 // headers->_art_chunk.clear ();
1111
1112 // remove 'em from disk too.
1113 _data_io->clear_group_headers (group);
1114
1115 // fire a 'count changed' event.
1116 ReadGroup& g (_read_groups[group]);
1117 g._article_count = 0;
1118 g._unread_count = 0;
1119 fire_group_counts (group, g._unread_count, g._article_count);
1120 }
1121
1122 void
delete_articles(const unique_articles_t & articles)1123 DataImpl :: delete_articles (const unique_articles_t& articles)
1124 {
1125
1126 quarks_t all_mids;
1127
1128 // info we need to batch these deletions per group...
1129 typedef std::map<Quark,PerGroup> per_groups_t;
1130 per_groups_t per_groups;
1131
1132 // populate the per_groups map
1133 foreach_const (unique_articles_t, articles, it) {
1134 const Article * article (*it);
1135 quarks_t groups;
1136 foreach_const (Xref, article->xref, xit)
1137 groups.insert (xit->group);
1138 const bool was_read (is_read (article));
1139 foreach_const (quarks_t, groups, git) {
1140 PerGroup& per (per_groups[*git]);
1141 ++per.count;
1142 if (!was_read) ++per.unread;
1143 per.mids.insert (article->message_id);
1144 all_mids.insert (article->message_id);
1145 }
1146 }
1147
1148 // process each group
1149 foreach (per_groups_t, per_groups, it)
1150 {
1151 // update the group's read/unread count...
1152 const Quark& group (it->first);
1153 ReadGroup& g (_read_groups[group]);
1154 g.decrement_unread (it->second.unread);
1155 g.decrement_count (it->second.count);
1156 fire_group_counts (group, g._unread_count, g._article_count);
1157
1158 // remove the articles from our lookup table...
1159 GroupHeaders * h (get_group_headers (group));
1160 if (h)
1161 h->remove_articles (it->second.mids);
1162 }
1163
1164 on_articles_removed (all_mids);
1165 }
1166
1167 void
on_articles_removed(const quarks_t & mids) const1168 DataImpl :: on_articles_removed (const quarks_t& mids) const
1169 {
1170 foreach (std::set<MyTree*>, _trees, it)
1171 (*it)->remove_articles (mids);
1172 }
1173
1174 void
on_articles_changed(const Quark & group,const quarks_t & mids,bool do_refilter)1175 DataImpl :: on_articles_changed (const Quark& group, const quarks_t& mids, bool do_refilter)
1176 {
1177 rescore_articles (group, mids);
1178
1179 // notify the trees that the articles have changed...
1180 foreach (std::set<MyTree*>, _trees, it)
1181 (*it)->articles_changed (mids, do_refilter);
1182 }
1183
1184 void
on_articles_added(const Quark & group,const quarks_t & mids)1185 DataImpl :: on_articles_added (const Quark& group, const quarks_t& mids)
1186 {
1187
1188 if (!mids.empty())
1189 {
1190 Log::add_info_va (_("Added %lu articles to %s."),
1191 mids.size(), group.c_str());
1192
1193 rescore_articles (group, mids);
1194
1195 foreach (std::set<MyTree*>, _trees, it) {
1196 debug ("This tree has a group " << (*it)->_group);
1197 if ((*it)->_group == group) {
1198 debug ("trying to add the articles to tree " << *it);
1199 (*it)->add_articles (mids);
1200 }
1201 }
1202
1203 ReadGroup& g (_read_groups[group]);
1204 g._article_count += mids.size ();
1205 g._unread_count += mids.size ();
1206 fire_group_counts (group, g._unread_count, g._article_count);
1207 }
1208 }
1209
1210
1211 DataImpl::ArticleNode*
find_ancestor(ArticleNode * node,const Quark & ancestor_mid)1212 DataImpl :: find_ancestor (ArticleNode * node,
1213 const Quark & ancestor_mid)
1214 {
1215 ArticleNode * parent_node (node->_parent);
1216 while (parent_node && (parent_node->_mid != ancestor_mid))
1217 parent_node = parent_node->_parent;
1218 return parent_node;
1219 }
1220
1221 DataImpl::ArticleNode*
find_closest_ancestor(ArticleNode * node,const unique_sorted_quarks_t & mid_pool)1222 DataImpl :: find_closest_ancestor (ArticleNode * node,
1223 const unique_sorted_quarks_t & mid_pool)
1224 {
1225 ArticleNode * parent_node (node->_parent);
1226 while (parent_node && !mid_pool.count(parent_node->_mid))
1227 parent_node = parent_node->_parent;
1228 return parent_node;
1229 }
1230
1231 const DataImpl::ArticleNode*
find_closest_ancestor(const ArticleNode * node,const unique_sorted_quarks_t & mid_pool)1232 DataImpl :: find_closest_ancestor (const ArticleNode * node,
1233 const unique_sorted_quarks_t & mid_pool)
1234 {
1235 const ArticleNode * parent_node (node->_parent);
1236 while (parent_node && !mid_pool.count(parent_node->_mid))
1237 parent_node = parent_node->_parent;
1238 return parent_node;
1239 }
1240
1241 Data::ArticleTree*
group_get_articles(const Quark & group,const Quark & save_path,const ShowType show_type,const FilterInfo * filter,const RulesInfo * rules) const1242 DataImpl :: group_get_articles (const Quark & group,
1243 const Quark & save_path,
1244 const ShowType show_type,
1245 const FilterInfo * filter,
1246 const RulesInfo * rules) const
1247 {
1248 // cast const away for group_ref()... consider _groups mutable
1249 return new MyTree (*const_cast<DataImpl*>(this), group, save_path, show_type, filter, rules);
1250 }
1251