1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /*
3 * Pan - A Newsreader for Gtk+
4 * Copyright (C) 2002-2006 Charles Kerr <charles@rebelbase.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; version 2 of the License.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, see <http://www.gnu.org/licenses/>.
17 *
18 */
19
20 #include <config.h>
21 #include <algorithm> // std::replace
22 #include <cstdlib> // atoi, strtoul
23 #include <iostream>
24 #include <sstream>
25 #include <string>
26 extern "C" {
27 #include <glib/gi18n.h>
28 }
29 #include <pan/general/debug.h>
30 #include <pan/general/log.h>
31 #include <pan/general/macros.h>
32 #include "filter-info.h"
33 #include "scorefile.h"
34
35 using namespace pan;
36
37 /**
38 *** Age
39 **/
40 namespace
41 {
get_today()42 unsigned long get_today ()
43 {
44 const time_t now (time (0));
45 struct tm t (*localtime (&now));
46 return (t.tm_year*10000) + (t.tm_mon*100) + t.tm_mday;
47 }
48
49 /**
50 * 0 if it has not expired
51 * 1 if it has expired
52 * -1 if an error occurred while parsing
53 */
54 int
has_score_expired(const StringView & v,unsigned long today)55 has_score_expired (const StringView& v, unsigned long today)
56 {
57 if (v.empty())
58 return 0;
59
60 const std::string tmp (v.str, v.len); // ensure zero termination for sscanf
61
62 unsigned long mm, dd, yyyy;
63 if (((3 != sscanf (tmp.c_str(), "%lu/%lu/%lu", &mm, &dd, &yyyy))
64 && (3 != sscanf (tmp.c_str(), "%lu-%lu-%lu", &dd, &mm, &yyyy)))
65 || (dd > 31)
66 || (mm > 12)
67 || (yyyy < 1900))
68 return -1;
69
70 unsigned long score_time = (yyyy - 1900) * 10000 + (mm - 1) * 100 + dd;
71 return score_time <= today ? 1 : 0;
72 }
73 }
74
75 /****
76 *****
77 ***** Parsing the scorefile
78 *****
79 ****/
80
81 /**
82 * private Scorefile class used when reading scorefiles from disk.
83 */
84 struct pan::Scorefile::ParseContext
85 {
86 /** The current Section object, or NULL if none. */
87 Scorefile::Section * current_section;
88
89 /** For get_current_test() */
90 std::vector<int> test_offsets;
91
get_current_itempan::Scorefile::ParseContext92 Scorefile::Item * get_current_item () {
93 Scorefile::Item * ret (0);
94 if (current_section!=0 && !current_section->items.empty())
95 ret = ¤t_section->items.back();
96 return ret;
97 }
98
get_current_testpan::Scorefile::ParseContext99 FilterInfo * get_current_test () {
100 FilterInfo * test (0);
101 Scorefile::Item * item (get_current_item());
102 if (item)
103 test = &item->test;
104 if (test)
105 foreach_const (std::vector<int>, test_offsets, it)
106 test = test->_aggregates[*it];
107 return test;
108 }
109
update_item_end_linepan::Scorefile::ParseContext110 void update_item_end_line (size_t line_number) {
111 Scorefile::Item * item (get_current_item());
112 if (item)
113 item->end_line = line_number;
114 }
115
116 unsigned long today;
117
ParseContextpan::Scorefile::ParseContext118 ParseContext (): current_section(0), today(get_today()) {}
119 };
120
121
122 namespace
123 {
slrn_fix_regexp(const StringView & in)124 std::string slrn_fix_regexp (const StringView& in)
125 {
126 std::string s;
127 s.reserve (in.len + 10); // 10 is a guess on how many extra chars we need
128 s += '^';
129 for (const char *pch(in.begin()), *end(in.end()); pch!=end; ++pch) {
130 if (*pch=='.' || *pch=='+')
131 s += '\\';
132 else if (*pch=='*')
133 s += '.';
134 s += *pch;
135 }
136 if (s[s.size()-1]!='$')
137 s += '$';
138 return s;
139 }
140 }
141
142 Scorefile :: Section*
get_section(const StringView & name)143 Scorefile :: get_section (const StringView& name)
144 {
145 if (name.empty())
146 return 0;
147
148 // look for a section that already matches the name
149 foreach (sections_t, _sections, it)
150 if (name == it->name)
151 return &*it;
152
153 // make a new section
154 _sections.resize (_sections.size()+1);
155 Section& s (_sections.back());
156 s.name = name;
157 s.negate = *name.str=='~';
158
159 // break the name into group tokens
160 typedef std::vector<StringView> tokens_t;
161 std::string tmp (name.str, name.len);
162 std::replace (tmp.begin(), tmp.end(), ',', ' ');
163 tokens_t tokens;
164 StringView n (tmp);
165 if (s.negate) { ++n.str; --n.len; } // walk past the negate tilde
166 for (const char *pch(n.begin()), *e(n.end()); pch!=e; ++pch) {
167 while (pch!=e && ::isspace(*pch)) ++pch;
168 if (pch==e) break;
169 const char * tok_begin = pch;
170 while (pch!=e && !::isspace(*pch)) ++pch;
171 if (pch!=tok_begin)
172 tokens.push_back (StringView (tok_begin, pch-tok_begin));
173 if (pch==e)
174 break;
175 }
176
177 foreach_const (tokens_t, tokens, it) {
178 const std::string groupname (slrn_fix_regexp (*it));
179 if (!TextMatch::validate_regex (groupname.c_str()))
180 continue;
181 TextMatch tm;
182 tm.set (groupname, TextMatch::REGEX, true/*case*/, false/*negate*/);
183 s.groups.push_back (tm);
184 }
185
186 return &s;
187 }
188
189 int
parse_file(ParseContext & context,const StringView & filename)190 Scorefile :: parse_file (ParseContext& context, const StringView& filename)
191 {
192 int retval (0);
193
194 LineReader * in ((*_filename_to_reader)(filename));
195 if (!in)
196 return -1;
197
198 size_t line_number (0);
199 StringView line;
200 while (in->getline (line))
201 {
202 ++line_number;
203 //std::cerr << LINE_ID << " line " << line_number << " [" << line << ']' << std::endl;
204
205 line.trim ();
206
207 // skip comments & blank lines
208 if (line.empty() || *line.str=='%' || *line.str=='#')
209 continue;
210
211 // new section
212 if (*line.str=='[')
213 {
214 StringView name (line.substr (line.str+1, line.strchr(']')));
215 name.trim ();
216
217 context.current_section = get_section (name);
218 context.test_offsets.clear ();
219 }
220
221 // new Item
222 else if (context.current_section!=0 && !line.strncasecmp("Score:",6))
223 {
224 line.eat_chars (6);
225 const bool all_tests_must_pass (line.len>=2 && !memcmp(line.str,"::",2));
226 while (!line.empty() && *line.str==':') line.eat_chars (1);
227 while (!line.empty() && ::isspace(*line.str)) line.eat_chars (1);
228 const bool value_assign_flag = (!line.empty() && *line.str=='=');
229 if (value_assign_flag) line.eat_chars(1); // skip past the '='
230 while (!line.empty() && ::isspace(*line.str)) line.eat_chars (1);
231 const int value (line.empty() ? 0 : atoi(line.str));
232 StringView name;
233 const char * hash = line.strchr ('#');
234 if (hash)
235 name = line.substr (hash+1, 0);
236 name = name.substr (0, name.strchr('%')); // skip trailing comments
237 name.trim ();
238
239 std::deque<Item>& items (context.current_section->items);
240 items.resize (items.size() + 1);
241 Item& item (items.back());
242
243 item.name.assign (name.str, name.len);
244 item.filename = filename;
245 item.begin_line = line_number;
246 item.value_assign_flag = value_assign_flag;
247 item.value = value;
248 if (all_tests_must_pass)
249 item.test.set_type_aggregate_and ();
250 else
251 item.test.set_type_aggregate_or ();
252 }
253
254 // begin nested condition
255 else if (line.len>=2 && line.str[0]=='{' && line.str[1]==':' && context.get_current_test()!=0)
256 {
257 context.update_item_end_line (line_number);
258
259 line.eat_chars (1); // skip past the '{'
260 const bool only_one_test_must_pass (line.len>=2 && !memcmp(line.str,"::",2));
261 FilterInfo *test = new FilterInfo;
262 if (only_one_test_must_pass)
263 test->set_type_aggregate_or ();
264 else
265 test->set_type_aggregate_and ();
266
267 FilterInfo * parent (context.get_current_test ());
268 context.test_offsets.push_back (parent->_aggregates.size());
269 parent->_aggregates.push_back (test);
270 }
271
272 // end nested conditions
273 else if (line.len>=1 && *line.str=='}' && context.get_current_test()!=0)
274 {
275 context.update_item_end_line (line_number);
276 context.test_offsets.resize (context.test_offsets.size()-1);
277 }
278
279 // include another file
280 else if (!line.strncasecmp ("include ", 8))
281 {
282 context.update_item_end_line (line_number);
283
284 StringView new_filename (line);
285 new_filename.eat_chars (8);
286 new_filename.trim();
287 const int status (parse_file (context, new_filename));
288 if (status != 0) {
289 retval = status;
290 break;
291 }
292 }
293
294 // include another file
295 else if (!line.strncasecmp("Expires:", 6) && context.get_current_test()!=0)
296 {
297 context.update_item_end_line (line_number);
298
299 // get the date
300 line.eat_chars (8);
301 line.trim ();
302 const int has_expired (has_score_expired (line, context.today));
303 if (has_expired < 0)
304 Log::add_err_va (_("Error reading score in %*.*s, line %d: expected “Expires: MM/DD/YYYY” or “Expires: DD-MM-YYYY”."),
305 filename.len, filename.len, filename.str, line_number);
306 else if (has_expired) {
307 Log::add_info_va (_("Expired old score from %*.*s, line %d"),
308 filename.len, filename.len, filename.str, line_number);
309 Item * item = context.get_current_item ();
310 if (item)
311 item->expired = true;
312 }
313 }
314
315 // new filter
316 else if (line.strpbrk (":=") && context.get_current_item()!=0)
317 {
318 context.update_item_end_line (line_number);
319
320 // follow XNews' idiom for specifying case sensitivity:
321 // '=' as the delimiter instead of ':'
322 const char * delimiter = line.strpbrk (":=");
323 const bool case_sensitive (*delimiter=='=');
324
325 line.trim ();
326 bool negate (*line.str=='~');
327 if (negate) line.eat_chars (1);
328
329 StringView key (line.substr (0, delimiter));
330 key.trim ();
331 StringView val (line.substr (delimiter+1, 0));
332 val.trim ();
333
334 FilterInfo::aggregatesp_t& aggregates (context.get_current_test()->_aggregates);
335 aggregates.push_back (new FilterInfo);
336 FilterInfo& test (*aggregates.back());
337
338 if (!key.strncasecmp ("Lines", 5))
339 {
340 // "Lines: 5" matches articles with > 5 lines.
341 // "~Lines: 5" matches articles with <= 5 lines.
342 const unsigned long gt = strtoul (val.str, NULL, 10);
343 const unsigned long ge = gt + 1;
344 test.set_type_line_count_ge (ge);
345 }
346 else if (!key.strncasecmp("Bytes", 5))
347 {
348 // bytes works the same way as lines.
349 const unsigned long gt = strtoul (val.str, NULL, 10);
350 const unsigned long ge = gt + 1;
351 test.set_type_byte_count_ge (ge);
352 }
353 else if (!key.strncasecmp ("Age", 3))
354 {
355 // age works differently from Lines and Bytes:
356 // "Age: 7" matches articles <= 7 days old.
357 const unsigned long le = strtoul (val.str, NULL, 10);
358 test.set_type_days_old_le (le);
359 negate = !negate; // double negative: le is a negate state
360 }
361 else if (!key.strncasecmp ("Has-Body", 8))
362 {
363 test.set_type_cached ();
364 if (val == "0")
365 negate = !negate;
366 }
367 else
368 {
369 TextMatch::Description d;
370 d.type = TextMatch::REGEX;
371 d.case_sensitive = case_sensitive;
372 d.text.assign (val.str, val.len);
373 test.set_type_text (key, d);
374 }
375 test._negate = negate;
376 }
377
378 // error
379 else {
380 Log::add_err_va (_("Error reading score in %*.*s, line %d: unexpected line."),
381 filename.len, filename.len, filename.str, line_number);
382 retval = -1;
383 break;
384 }
385 }
386
387 delete in;
388 return retval;
389 }
390
391 namespace
392 {
normalize_test(FilterInfo * test)393 void normalize_test (FilterInfo *test)
394 {
395 if ((test->_type!=test->AGGREGATE_AND) && (test->_type!=test->AGGREGATE_OR))
396 return;
397
398 if (test->_aggregates.size() == 1) {
399 *test = *test->_aggregates[0];
400 normalize_test (test);
401 } else foreach (FilterInfo::aggregatesp_t, test->_aggregates, it)
402 normalize_test (*it);
403 }
404 }
405
406 void
clear()407 Scorefile :: clear ()
408 {
409 _sections.clear ();
410 }
411
412 int
parse_file(const StringView & filename)413 Scorefile :: parse_file (const StringView& filename)
414 {
415 ParseContext context;
416 const int err (parse_file (context, filename));
417 if (err)
418 return err;
419
420 foreach (sections_t, _sections, sit)
421 foreach (items_t, sit->items, it)
422 normalize_test (&it->test);
423
424 size_t item_count (0);
425 foreach (sections_t, _sections, sit)
426 item_count += sit->items.size ();
427
428 if (!_sections.empty())
429 Log::add_info_va (_("Read %lu scoring rules in %lu sections from “%s”"),
430 item_count, _sections.size(), filename.to_string().c_str());
431 return 0;
432 }
433
434 void
get_matching_sections(const StringView & groupname,std::vector<const Section * > & setme) const435 Scorefile :: get_matching_sections (const StringView& groupname, std::vector<const Section*>& setme) const
436 {
437 foreach_const (sections_t, _sections, sit)
438 {
439 bool match (false);
440 foreach_const (std::deque<TextMatch>, sit->groups, git) {
441 match = git->test (groupname);
442 if (sit->negate) match = !match;
443 if (match) break;
444 }
445 if (match)
446 setme.push_back (&*sit);
447 }
448 }
449
450 std::string
build_score_string(const StringView & section_wildmat,int score_value,bool score_assign_flag,int lifespan_days,bool all_items_must_be_true,const AddItem * items,size_t item_count)451 Scorefile :: build_score_string (const StringView & section_wildmat,
452 int score_value,
453 bool score_assign_flag,
454 int lifespan_days,
455 bool all_items_must_be_true,
456 const AddItem * items,
457 size_t item_count)
458 {
459 const time_t now (time (0));
460 std::ostringstream out;
461 out << "%BOS" << std::endl
462 << "%Score created by Pan on " << ctime(&now)
463 << "[" << (section_wildmat.empty() ? "*" : section_wildmat) << ']' << std::endl
464 << "Score" << (all_items_must_be_true ? ":" : "::") << " " << (score_assign_flag?"=":"") << score_value << std::endl;
465 if (lifespan_days > 0) {
466 time_t expire_time_t = now + lifespan_days * 24 * 3600;
467 struct tm expire_tm (*localtime (&expire_time_t));
468 int dd = expire_tm.tm_mday;
469 int mm = expire_tm.tm_mon + 1;
470 int yyyy = expire_tm.tm_year + 1900;
471 out << "Expires: " << mm << '/' << dd << '/' << yyyy << std::endl;
472 }
473 for (size_t i(0); i!=item_count; ++i) {
474 const Scorefile::AddItem& item (items[i]);
475 if (!item.value.empty())
476 out << (item.on ? "" : "%") << (item.negate ? "~" : "") << item.key << ": " << item.value << std::endl;
477 }
478 out << "%EOS";
479 return out.str ();
480 }
481