1 // This file is part of The New Aspell
2 // Copyright (C) 2001 by Kevin Atkinson under the GNU LGPL license
3 // version 2.0 or 2.1.  You should have received a copy of the LGPL
4 // license along with this library if you did not you can find
5 // it at http://www.gnu.org/.
6 
7 #include "settings.h"
8 #include "indiv_filter.hpp"
9 #include "key_info.hpp"
10 
11 namespace {
12   using namespace acommon;
13 
14   class UrlFilter : public IndividualFilter {
15   public:
16     PosibErr<bool> setup(Config *);
reset()17     void reset() {}
18     void process(FilterChar * &, FilterChar * &);
19   };
20 
setup(Config *)21   PosibErr<bool> UrlFilter::setup(Config *)
22   {
23     name_ = "url-filter";
24     order_num_ = 0.95;
25     return true;
26   }
27 
url_char(char c)28   static bool url_char(char c)
29   {
30     return c != '"' && c != ' ' && c != '\n' && c != '\t';
31   }
32 
process(FilterChar * & str,FilterChar * & end)33   void UrlFilter::process(FilterChar * & str, FilterChar * & end)
34   {
35     for (FilterChar * cur = str; cur < end; ++cur)
36     {
37       if (!url_char(*cur)) continue;
38       FilterChar * cur0 = cur;
39       bool blank_out = false;
40       int point_chars = 0;
41       // only consider special url deciding characters if they are in
42       // the middle of a word
43       for (++cur; cur + 1 < end && url_char(cur[1]); ++cur) {
44         if (blank_out) continue;
45         if ((cur[0] == '/' && (point_chars > 0 || cur[1] == '/'))
46             || cur[0] == '@') {
47           blank_out = true;
48         } else if (cur[0] == '.' && cur[1] != '.') {
49           // count multiple '.' as one
50           if (point_chars < 1) ++point_chars;
51           else                 blank_out = true;
52         }
53       }
54       ++cur;
55       if (blank_out) {
56 	for (; cur0 != cur; ++cur0) *cur0 = ' ';
57       }
58     }
59   }
60 }
61 
62 C_EXPORT
new_aspell_url_filter()63 IndividualFilter * new_aspell_url_filter() {
64   return new UrlFilter;
65 }
66 
67