1 /*------------------------------------------------------------------------------ 2 * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team 3 * 4 * Distributable under the terms of either the Apache License (Version 2.0) or 5 * the GNU Lesser General Public License, as specified in the COPYING file. 6 ------------------------------------------------------------------------------*/ 7 #include "CLucene/StdHeader.h" 8 #include "StandardFilter.h" 9 10 #include "../AnalysisHeader.h" 11 #include "../Analyzers.h" 12 #include "StandardTokenizerConstants.h" 13 #include "CLucene/util/StringBuffer.h" 14 15 CL_NS_USE(analysis) CL_NS_USE(util)16CL_NS_USE(util) 17 CL_NS_DEF2(analysis,standard) 18 19 StandardFilter::StandardFilter(TokenStream* in, bool deleteTokenStream): 20 TokenFilter(in, deleteTokenStream) 21 { 22 } 23 ~StandardFilter()24 StandardFilter::~StandardFilter(){ 25 } 26 next(Token * t)27 bool StandardFilter::next(Token* t) { 28 if (!input->next(t)) 29 return false; 30 31 TCHAR* text = t->_termText; 32 const int32_t textLength = t->termTextLength(); 33 const TCHAR* type = t->type(); 34 35 if ( type == tokenImage[APOSTROPHE] && //we can compare the type directy since the type should always come from the tokenImage 36 ( textLength >= 2 && _tcsicmp(text+textLength-2, _T("'s"))==0 ) ) 37 { 38 // remove 's 39 text[textLength-2]=0; 40 t->resetTermTextLen(); 41 42 return true; 43 44 } else if ( type == tokenImage[ACRONYM] ) { // remove dots 45 int32_t j = 0; 46 for ( int32_t i=0;i<textLength;i++ ){ 47 if ( text[i] != '.' ) 48 text[j++]=text[i]; 49 } 50 text[j]=0; 51 return true; 52 53 } else { 54 return true; 55 } 56 } 57 58 CL_NS_END2 59