1 /*------------------------------------------------------------------------------
2 * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
3 *
4 * Distributable under the terms of either the Apache License (Version 2.0) or
5 * the GNU Lesser General Public License, as specified in the COPYING file.
6 ------------------------------------------------------------------------------*/
7 #include "CLucene/StdHeader.h"
8 #include "StandardFilter.h"
9 
10 #include "../AnalysisHeader.h"
11 #include "../Analyzers.h"
12 #include "StandardTokenizerConstants.h"
13 #include "CLucene/util/StringBuffer.h"
14 
15 CL_NS_USE(analysis)
CL_NS_USE(util)16 CL_NS_USE(util)
17 CL_NS_DEF2(analysis,standard)
18 
19   StandardFilter::StandardFilter(TokenStream* in, bool deleteTokenStream):
20     TokenFilter(in, deleteTokenStream)
21   {
22   }
23 
~StandardFilter()24   StandardFilter::~StandardFilter(){
25   }
26 
next(Token * t)27   bool StandardFilter::next(Token* t) {
28     if (!input->next(t))
29       return false;
30 
31     TCHAR* text = t->_termText;
32     const int32_t textLength = t->termTextLength();
33     const TCHAR* type = t->type();
34 
35     if ( type == tokenImage[APOSTROPHE] && //we can compare the type directy since the type should always come from the tokenImage
36 		( textLength >= 2 && _tcsicmp(text+textLength-2, _T("'s"))==0  ) )
37     {
38       // remove 's
39       text[textLength-2]=0;
40 	  t->resetTermTextLen();
41 
42       return true;
43 
44     } else if ( type == tokenImage[ACRONYM] ) {		  // remove dots
45 		int32_t j = 0;
46 		for ( int32_t i=0;i<textLength;i++ ){
47 			if ( text[i] != '.' )
48 				text[j++]=text[i];
49 		}
50 		text[j]=0;
51       return true;
52 
53     } else {
54       return true;
55     }
56   }
57 
58 CL_NS_END2
59