1// Copyright (c) 2014 Couchbase, Inc. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package stop 16 17import ( 18 "reflect" 19 "testing" 20 21 "github.com/blevesearch/bleve/analysis" 22 "github.com/blevesearch/bleve/analysis/tokenmap" 23 "github.com/blevesearch/bleve/registry" 24) 25 26func TestStopWordsFilter(t *testing.T) { 27 28 inputTokenStream := analysis.TokenStream{ 29 &analysis.Token{ 30 Term: []byte("a"), 31 }, 32 &analysis.Token{ 33 Term: []byte("walk"), 34 }, 35 &analysis.Token{ 36 Term: []byte("in"), 37 }, 38 &analysis.Token{ 39 Term: []byte("the"), 40 }, 41 &analysis.Token{ 42 Term: []byte("park"), 43 }, 44 } 45 46 expectedTokenStream := analysis.TokenStream{ 47 &analysis.Token{ 48 Term: []byte("walk"), 49 }, 50 &analysis.Token{ 51 Term: []byte("park"), 52 }, 53 } 54 55 cache := registry.NewCache() 56 stopListConfig := map[string]interface{}{ 57 "type": tokenmap.Name, 58 "tokens": []interface{}{"a", "in", "the"}, 59 } 60 _, err := cache.DefineTokenMap("stop_test", stopListConfig) 61 if err != nil { 62 t.Fatal(err) 63 } 64 65 stopConfig := map[string]interface{}{ 66 "type": "stop_tokens", 67 "stop_token_map": "stop_test", 68 } 69 stopFilter, err := cache.DefineTokenFilter("stop_test", stopConfig) 70 if err != nil { 71 t.Fatal(err) 72 } 73 74 ouputTokenStream := stopFilter.Filter(inputTokenStream) 75 if !reflect.DeepEqual(ouputTokenStream, expectedTokenStream) { 76 t.Errorf("expected %#v got %#v", expectedTokenStream, ouputTokenStream) 77 } 78} 79 80func BenchmarkStopWordsFilter(b *testing.B) { 81 82 inputTokenStream := analysis.TokenStream{ 83 &analysis.Token{ 84 Term: []byte("a"), 85 }, 86 &analysis.Token{ 87 Term: []byte("walk"), 88 }, 89 &analysis.Token{ 90 Term: []byte("in"), 91 }, 92 &analysis.Token{ 93 Term: []byte("the"), 94 }, 95 &analysis.Token{ 96 Term: []byte("park"), 97 }, 98 } 99 100 cache := registry.NewCache() 101 stopListConfig := map[string]interface{}{ 102 "type": tokenmap.Name, 103 "tokens": []interface{}{"a", "in", "the"}, 104 } 105 _, err := cache.DefineTokenMap("stop_test", stopListConfig) 106 if err != nil { 107 b.Fatal(err) 108 } 109 110 stopConfig := map[string]interface{}{ 111 "type": "stop_tokens", 112 "stop_token_map": "stop_test", 113 } 114 stopFilter, err := cache.DefineTokenFilter("stop_test", stopConfig) 115 if err != nil { 116 b.Fatal(err) 117 } 118 b.ResetTimer() 119 120 for i := 0; i < b.N; i++ { 121 stopFilter.Filter(inputTokenStream) 122 } 123 124} 125