1 /* Copyright (C) 2006 J.F.Dockes 2 * This program is free software; you can redistribute it and/or modify 3 * it under the terms of the GNU General Public License as published by 4 * the Free Software Foundation; either version 2 of the License, or 5 * (at your option) any later version. 6 * 7 * This program is distributed in the hope that it will be useful, 8 * but WITHOUT ANY WARRANTY; without even the implied warranty of 9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 * GNU General Public License for more details. 11 * 12 * You should have received a copy of the GNU General Public License 13 * along with this program; if not, write to the 14 * Free Software Foundation, Inc., 15 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 16 */ 17 #ifndef _STOPLIST_H_INCLUDED_ 18 #define _STOPLIST_H_INCLUDED_ 19 20 #include <set> 21 #include <string> 22 23 #ifndef NO_NAMESPACES 24 using std::set; 25 using std::string; 26 namespace Rcl 27 { 28 #endif 29 30 /** 31 * A StopList is just a bunch of strings read from a file. 32 * 33 * Some of the string may contain whitespace (that's for experimentation with 34 * stop n-grams), so we take care of dquotes while reading the file. We also 35 * lowercase and remove accents. The source file should be utf-8. 36 */ 37 class StopList { 38 public: StopList()39 StopList() {} StopList(const string & filename)40 StopList(const string &filename) {setFile(filename);} ~StopList()41 virtual ~StopList() {} 42 43 bool setFile(const string &filename); 44 bool isStop(const string &term) const; hasStops()45 bool hasStops() const {return !m_stops.empty();} 46 47 private: 48 set<string> m_stops; 49 }; 50 51 #ifndef NO_NAMESPACES 52 } 53 #endif 54 55 #endif /* _STOPLIST_H_INCLUDED_ */ 56