1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /*
3  * Pan - A Newsreader for Gtk+
4  * Copyright (C) 2002-2006  Charles Kerr <charles@rebelbase.com>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; version 2 of the License.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, see <http://www.gnu.org/licenses/>.
17  *
18  */
19 
20 #include <config.h>
21 #include <algorithm> // std::replace
22 #include <cstdlib> // atoi, strtoul
23 #include <iostream>
24 #include <sstream>
25 #include <string>
26 extern "C" {
27   #include <glib/gi18n.h>
28 }
29 #include <pan/general/debug.h>
30 #include <pan/general/log.h>
31 #include <pan/general/macros.h>
32 #include "filter-info.h"
33 #include "scorefile.h"
34 
35 using namespace pan;
36 
37 /**
38 ***  Age
39 **/
40 namespace
41 {
get_today()42   unsigned long get_today ()
43   {
44     const time_t now (time (0));
45     struct tm t (*localtime (&now));
46     return (t.tm_year*10000) + (t.tm_mon*100) + t.tm_mday;
47   }
48 
49   /**
50    * 0 if it has not expired
51    * 1 if it has expired
52    * -1 if an error occurred while parsing
53    */
54   int
has_score_expired(const StringView & v,unsigned long today)55   has_score_expired (const StringView& v, unsigned long today)
56   {
57     if (v.empty())
58       return 0;
59 
60     const std::string tmp (v.str, v.len); // ensure zero termination for sscanf
61 
62     unsigned long mm, dd, yyyy;
63     if (((3 != sscanf (tmp.c_str(), "%lu/%lu/%lu", &mm, &dd, &yyyy))
64       && (3 != sscanf (tmp.c_str(), "%lu-%lu-%lu", &dd, &mm, &yyyy)))
65       || (dd > 31)
66       || (mm > 12)
67       || (yyyy < 1900))
68       return -1;
69 
70     unsigned long score_time = (yyyy - 1900) * 10000 + (mm - 1) * 100 + dd;
71     return score_time <= today ? 1 : 0;
72   }
73 }
74 
75 /****
76 *****
77 *****  Parsing the scorefile
78 *****
79 ****/
80 
81 /**
82  * private Scorefile class used when reading scorefiles from disk.
83  */
84 struct pan::Scorefile::ParseContext
85 {
86   /** The current Section object, or NULL if none. */
87   Scorefile::Section * current_section;
88 
89   /** For get_current_test() */
90   std::vector<int> test_offsets;
91 
get_current_itempan::Scorefile::ParseContext92   Scorefile::Item * get_current_item () {
93     Scorefile::Item * ret (0);
94     if (current_section!=0 && !current_section->items.empty())
95       ret = &current_section->items.back();
96     return ret;
97   }
98 
get_current_testpan::Scorefile::ParseContext99   FilterInfo * get_current_test () {
100     FilterInfo * test (0);
101     Scorefile::Item * item (get_current_item());
102     if (item)
103       test = &item->test;
104     if (test)
105       foreach_const (std::vector<int>, test_offsets, it)
106         test = test->_aggregates[*it];
107     return test;
108   }
109 
update_item_end_linepan::Scorefile::ParseContext110   void update_item_end_line (size_t line_number) {
111     Scorefile::Item * item (get_current_item());
112     if (item)
113       item->end_line = line_number;
114   }
115 
116   unsigned long today;
117 
ParseContextpan::Scorefile::ParseContext118   ParseContext (): current_section(0), today(get_today()) {}
119 };
120 
121 
122 namespace
123 {
slrn_fix_regexp(const StringView & in)124   std::string slrn_fix_regexp (const StringView& in)
125   {
126     std::string s;
127     s.reserve (in.len + 10); // 10 is a guess on how many extra chars we need
128     s += '^';
129     for (const char *pch(in.begin()), *end(in.end()); pch!=end; ++pch) {
130       if (*pch=='.' || *pch=='+')
131         s += '\\';
132       else if (*pch=='*')
133         s += '.';
134       s += *pch;
135     }
136     if (s[s.size()-1]!='$')
137       s += '$';
138     return s;
139   }
140 }
141 
142 Scorefile :: Section*
get_section(const StringView & name)143 Scorefile :: get_section (const StringView& name)
144 {
145   if (name.empty())
146     return 0;
147 
148   // look for a section that already matches the name
149   foreach (sections_t, _sections, it)
150     if (name == it->name)
151       return &*it;
152 
153   // make a new section
154   _sections.resize (_sections.size()+1);
155   Section& s (_sections.back());
156   s.name = name;
157   s.negate = *name.str=='~';
158 
159   // break the name into group tokens
160   typedef std::vector<StringView> tokens_t;
161   std::string tmp (name.str, name.len);
162   std::replace (tmp.begin(), tmp.end(), ',', ' ');
163   tokens_t tokens;
164   StringView n (tmp);
165   if (s.negate) { ++n.str; --n.len; } // walk past the negate tilde
166   for (const char *pch(n.begin()), *e(n.end()); pch!=e; ++pch) {
167     while (pch!=e && ::isspace(*pch)) ++pch;
168     if (pch==e) break;
169     const char * tok_begin = pch;
170     while (pch!=e && !::isspace(*pch)) ++pch;
171     if (pch!=tok_begin)
172       tokens.push_back (StringView (tok_begin, pch-tok_begin));
173     if (pch==e)
174       break;
175   }
176 
177   foreach_const (tokens_t, tokens, it) {
178     const std::string groupname (slrn_fix_regexp (*it));
179     if (!TextMatch::validate_regex (groupname.c_str()))
180       continue;
181     TextMatch tm;
182     tm.set  (groupname, TextMatch::REGEX, true/*case*/, false/*negate*/);
183     s.groups.push_back (tm);
184   }
185 
186   return &s;
187 }
188 
189 int
parse_file(ParseContext & context,const StringView & filename)190 Scorefile :: parse_file (ParseContext& context, const StringView& filename)
191 {
192   int retval (0);
193 
194   LineReader * in ((*_filename_to_reader)(filename));
195   if (!in)
196     return -1;
197 
198   size_t line_number (0);
199   StringView line;
200   while (in->getline (line))
201   {
202     ++line_number;
203     //std::cerr << LINE_ID << " line " << line_number << " [" << line << ']' << std::endl;
204 
205     line.trim ();
206 
207     // skip comments & blank lines
208     if (line.empty() || *line.str=='%' || *line.str=='#')
209       continue;
210 
211     // new section
212     if (*line.str=='[')
213     {
214       StringView name (line.substr (line.str+1, line.strchr(']')));
215       name.trim ();
216 
217       context.current_section = get_section (name);
218       context.test_offsets.clear ();
219     }
220 
221     // new Item
222     else if (context.current_section!=0 && !line.strncasecmp("Score:",6))
223     {
224       line.eat_chars (6);
225       const bool all_tests_must_pass (line.len>=2 && !memcmp(line.str,"::",2));
226       while (!line.empty() && *line.str==':') line.eat_chars (1);
227       while (!line.empty() && ::isspace(*line.str)) line.eat_chars (1);
228       const bool value_assign_flag = (!line.empty() && *line.str=='=');
229       if (value_assign_flag) line.eat_chars(1); // skip past the '='
230       while (!line.empty() && ::isspace(*line.str)) line.eat_chars (1);
231       const int value (line.empty() ? 0 : atoi(line.str));
232       StringView name;
233       const char * hash = line.strchr ('#');
234       if (hash)
235         name = line.substr (hash+1, 0);
236       name = name.substr (0, name.strchr('%')); // skip trailing comments
237       name.trim ();
238 
239       std::deque<Item>& items (context.current_section->items);
240       items.resize (items.size() + 1);
241       Item& item (items.back());
242 
243       item.name.assign (name.str, name.len);
244       item.filename = filename;
245       item.begin_line = line_number;
246       item.value_assign_flag = value_assign_flag;
247       item.value = value;
248       if (all_tests_must_pass)
249         item.test.set_type_aggregate_and ();
250       else
251         item.test.set_type_aggregate_or ();
252     }
253 
254     // begin nested condition
255     else if (line.len>=2 && line.str[0]=='{' && line.str[1]==':' && context.get_current_test()!=0)
256     {
257       context.update_item_end_line (line_number);
258 
259       line.eat_chars (1); // skip past the '{'
260       const bool only_one_test_must_pass (line.len>=2 && !memcmp(line.str,"::",2));
261       FilterInfo *test = new FilterInfo;
262       if (only_one_test_must_pass)
263         test->set_type_aggregate_or ();
264       else
265         test->set_type_aggregate_and ();
266 
267       FilterInfo * parent (context.get_current_test ());
268       context.test_offsets.push_back (parent->_aggregates.size());
269       parent->_aggregates.push_back (test);
270     }
271 
272     // end nested conditions
273     else if (line.len>=1 && *line.str=='}' && context.get_current_test()!=0)
274     {
275       context.update_item_end_line (line_number);
276       context.test_offsets.resize (context.test_offsets.size()-1);
277     }
278 
279     // include another file
280     else if (!line.strncasecmp ("include ", 8))
281     {
282       context.update_item_end_line (line_number);
283 
284       StringView new_filename (line);
285       new_filename.eat_chars (8);
286       new_filename.trim();
287       const int status (parse_file (context, new_filename));
288       if (status != 0) {
289         retval = status;
290         break;
291       }
292     }
293 
294     // include another file
295     else if (!line.strncasecmp("Expires:", 6) && context.get_current_test()!=0)
296     {
297       context.update_item_end_line (line_number);
298 
299       // get the date
300       line.eat_chars (8);
301       line.trim ();
302       const int has_expired (has_score_expired (line, context.today));
303       if (has_expired < 0)
304         Log::add_err_va (_("Error reading score in %*.*s, line %d: expected “Expires: MM/DD/YYYY” or “Expires: DD-MM-YYYY”."),
305           filename.len, filename.len, filename.str, line_number);
306       else if (has_expired) {
307         Log::add_info_va (_("Expired old score from %*.*s, line %d"),
308           filename.len, filename.len, filename.str, line_number);
309         Item * item = context.get_current_item ();
310         if (item)
311           item->expired = true;
312       }
313     }
314 
315     // new filter
316     else if (line.strpbrk (":=") && context.get_current_item()!=0)
317     {
318       context.update_item_end_line (line_number);
319 
320       // follow XNews' idiom for specifying case sensitivity:
321       // '=' as the delimiter instead of ':'
322       const char * delimiter = line.strpbrk (":=");
323       const bool case_sensitive (*delimiter=='=');
324 
325       line.trim ();
326       bool negate (*line.str=='~');
327       if (negate) line.eat_chars (1);
328 
329       StringView key (line.substr (0, delimiter));
330       key.trim ();
331       StringView val (line.substr (delimiter+1, 0));
332       val.trim ();
333 
334       FilterInfo::aggregatesp_t& aggregates (context.get_current_test()->_aggregates);
335       aggregates.push_back (new FilterInfo);
336       FilterInfo& test (*aggregates.back());
337 
338       if (!key.strncasecmp ("Lines", 5))
339       {
340         // "Lines: 5"  matches articles with > 5 lines.
341         // "~Lines: 5" matches articles with <= 5 lines.
342         const unsigned long gt = strtoul (val.str, NULL, 10);
343         const unsigned long ge = gt + 1;
344         test.set_type_line_count_ge (ge);
345       }
346       else if (!key.strncasecmp("Bytes", 5))
347       {
348         // bytes works the same way as lines.
349         const unsigned long gt = strtoul (val.str, NULL, 10);
350         const unsigned long ge = gt + 1;
351         test.set_type_byte_count_ge (ge);
352       }
353       else if (!key.strncasecmp ("Age", 3))
354       {
355         // age works differently from Lines and Bytes:
356         // "Age: 7" matches articles <= 7 days old.
357         const unsigned long le = strtoul (val.str, NULL, 10);
358         test.set_type_days_old_le (le);
359         negate = !negate; // double negative: le is a negate state
360       }
361       else if (!key.strncasecmp ("Has-Body", 8))
362       {
363         test.set_type_cached ();
364         if (val == "0")
365           negate = !negate;
366       }
367       else
368       {
369         TextMatch::Description d;
370         d.type = TextMatch::REGEX;
371         d.case_sensitive = case_sensitive;
372         d.text.assign (val.str, val.len);
373         test.set_type_text (key, d);
374       }
375       test._negate = negate;
376     }
377 
378     // error
379     else {
380       Log::add_err_va (_("Error reading score in %*.*s, line %d: unexpected line."),
381         filename.len, filename.len, filename.str, line_number);
382       retval = -1;
383       break;
384     }
385   }
386 
387   delete in;
388   return retval;
389 }
390 
391 namespace
392 {
normalize_test(FilterInfo * test)393   void normalize_test (FilterInfo *test)
394   {
395     if ((test->_type!=test->AGGREGATE_AND) && (test->_type!=test->AGGREGATE_OR))
396       return;
397 
398     if (test->_aggregates.size() == 1) {
399       *test = *test->_aggregates[0];
400       normalize_test (test);
401     } else foreach (FilterInfo::aggregatesp_t, test->_aggregates, it)
402       normalize_test (*it);
403   }
404 }
405 
406 void
clear()407 Scorefile :: clear ()
408 {
409   _sections.clear ();
410 }
411 
412 int
parse_file(const StringView & filename)413 Scorefile :: parse_file (const StringView& filename)
414 {
415   ParseContext context;
416   const int err (parse_file (context, filename));
417   if (err)
418     return err;
419 
420   foreach (sections_t, _sections, sit)
421     foreach (items_t, sit->items, it)
422       normalize_test (&it->test);
423 
424   size_t item_count (0);
425   foreach (sections_t, _sections, sit)
426     item_count += sit->items.size ();
427 
428   if (!_sections.empty())
429     Log::add_info_va (_("Read %lu scoring rules in %lu sections from “%s”"),
430       item_count, _sections.size(), filename.to_string().c_str());
431   return 0;
432 }
433 
434 void
get_matching_sections(const StringView & groupname,std::vector<const Section * > & setme) const435 Scorefile :: get_matching_sections (const StringView& groupname, std::vector<const Section*>& setme) const
436 {
437   foreach_const (sections_t, _sections, sit)
438   {
439     bool match (false);
440     foreach_const (std::deque<TextMatch>, sit->groups, git) {
441       match = git->test (groupname);
442       if (sit->negate) match = !match;
443       if (match) break;
444     }
445     if (match)
446       setme.push_back (&*sit);
447   }
448 }
449 
450 std::string
build_score_string(const StringView & section_wildmat,int score_value,bool score_assign_flag,int lifespan_days,bool all_items_must_be_true,const AddItem * items,size_t item_count)451 Scorefile :: build_score_string (const StringView    & section_wildmat,
452                                  int                   score_value,
453                                  bool                  score_assign_flag,
454                                  int                   lifespan_days,
455                                  bool                  all_items_must_be_true,
456                                  const AddItem       * items,
457                                  size_t                item_count)
458 {
459   const time_t now (time (0));
460   std::ostringstream out;
461   out << "%BOS" << std::endl
462       << "%Score created by Pan on " << ctime(&now)
463       << "[" << (section_wildmat.empty() ? "*" : section_wildmat) << ']' << std::endl
464       << "Score" << (all_items_must_be_true ? ":" : "::") << " " << (score_assign_flag?"=":"") << score_value << std::endl;
465   if (lifespan_days > 0) {
466     time_t expire_time_t = now + lifespan_days * 24 * 3600;
467     struct tm expire_tm (*localtime (&expire_time_t));
468     int dd = expire_tm.tm_mday;
469     int mm = expire_tm.tm_mon + 1;
470     int yyyy = expire_tm.tm_year + 1900;
471     out << "Expires: " << mm << '/' << dd << '/' << yyyy << std::endl;
472   }
473   for (size_t i(0); i!=item_count; ++i) {
474     const Scorefile::AddItem& item (items[i]);
475     if (!item.value.empty())
476       out << (item.on ? "" : "%") << (item.negate ? "~" : "") << item.key << ": " << item.value << std::endl;
477   }
478   out << "%EOS";
479   return out.str ();
480 }
481