1// Copyright (c) 2014 Couchbase, Inc. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15// Package stop implements a TokenFilter removing tokens found in 16// a TokenMap. 17// 18// It constructor takes the following arguments: 19// 20// "stop_token_map" (string): the name of the token map identifying tokens to 21// remove. 22package stop 23 24import ( 25 "fmt" 26 27 "github.com/blevesearch/bleve/analysis" 28 "github.com/blevesearch/bleve/registry" 29) 30 31const Name = "stop_tokens" 32 33type StopTokensFilter struct { 34 stopTokens analysis.TokenMap 35} 36 37func NewStopTokensFilter(stopTokens analysis.TokenMap) *StopTokensFilter { 38 return &StopTokensFilter{ 39 stopTokens: stopTokens, 40 } 41} 42 43func (f *StopTokensFilter) Filter(input analysis.TokenStream) analysis.TokenStream { 44 j := 0 45 for _, token := range input { 46 _, isStopToken := f.stopTokens[string(token.Term)] 47 if !isStopToken { 48 input[j] = token 49 j++ 50 } 51 } 52 53 return input[:j] 54} 55 56func StopTokensFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 57 stopTokenMapName, ok := config["stop_token_map"].(string) 58 if !ok { 59 return nil, fmt.Errorf("must specify stop_token_map") 60 } 61 stopTokenMap, err := cache.TokenMapNamed(stopTokenMapName) 62 if err != nil { 63 return nil, fmt.Errorf("error building stop words filter: %v", err) 64 } 65 return NewStopTokensFilter(stopTokenMap), nil 66} 67 68func init() { 69 registry.RegisterTokenFilter(Name, StopTokensFilterConstructor) 70} 71