1 /* Copyright (C) 2006 J.F.Dockes
2  *   This program is free software; you can redistribute it and/or modify
3  *   it under the terms of the GNU General Public License as published by
4  *   the Free Software Foundation; either version 2 of the License, or
5  *   (at your option) any later version.
6  *
7  *   This program is distributed in the hope that it will be useful,
8  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
9  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10  *   GNU General Public License for more details.
11  *
12  *   You should have received a copy of the GNU General Public License
13  *   along with this program; if not, write to the
14  *   Free Software Foundation, Inc.,
15  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
16  */
17 #ifndef _STOPLIST_H_INCLUDED_
18 #define _STOPLIST_H_INCLUDED_
19 
20 #include <set>
21 #include <string>
22 
23 #ifndef NO_NAMESPACES
24 using std::set;
25 using std::string;
26 namespace Rcl
27 {
28 #endif
29 
30 /**
31  * A StopList is just a bunch of strings read from a file.
32  *
33  * Some of the string may contain whitespace (that's for experimentation with
34  * stop n-grams), so we take care of dquotes while reading the file. We also
35  * lowercase and remove accents. The source file should be utf-8.
36  */
37 class StopList {
38 public:
StopList()39     StopList() {}
StopList(const string & filename)40     StopList(const string &filename) {setFile(filename);}
~StopList()41     virtual ~StopList() {}
42 
43     bool setFile(const string &filename);
44     bool isStop(const string &term) const;
hasStops()45     bool hasStops() const {return !m_stops.empty();}
46 
47 private:
48     set<string> m_stops;
49 };
50 
51 #ifndef NO_NAMESPACES
52 }
53 #endif
54 
55 #endif /* _STOPLIST_H_INCLUDED_ */
56