1//  Copyright (c) 2014 Couchbase, Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// 		http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package stop
16
17import (
18	"reflect"
19	"testing"
20
21	"github.com/blevesearch/bleve/analysis"
22	"github.com/blevesearch/bleve/analysis/tokenmap"
23	"github.com/blevesearch/bleve/registry"
24)
25
26func TestStopWordsFilter(t *testing.T) {
27
28	inputTokenStream := analysis.TokenStream{
29		&analysis.Token{
30			Term: []byte("a"),
31		},
32		&analysis.Token{
33			Term: []byte("walk"),
34		},
35		&analysis.Token{
36			Term: []byte("in"),
37		},
38		&analysis.Token{
39			Term: []byte("the"),
40		},
41		&analysis.Token{
42			Term: []byte("park"),
43		},
44	}
45
46	expectedTokenStream := analysis.TokenStream{
47		&analysis.Token{
48			Term: []byte("walk"),
49		},
50		&analysis.Token{
51			Term: []byte("park"),
52		},
53	}
54
55	cache := registry.NewCache()
56	stopListConfig := map[string]interface{}{
57		"type":   tokenmap.Name,
58		"tokens": []interface{}{"a", "in", "the"},
59	}
60	_, err := cache.DefineTokenMap("stop_test", stopListConfig)
61	if err != nil {
62		t.Fatal(err)
63	}
64
65	stopConfig := map[string]interface{}{
66		"type":           "stop_tokens",
67		"stop_token_map": "stop_test",
68	}
69	stopFilter, err := cache.DefineTokenFilter("stop_test", stopConfig)
70	if err != nil {
71		t.Fatal(err)
72	}
73
74	ouputTokenStream := stopFilter.Filter(inputTokenStream)
75	if !reflect.DeepEqual(ouputTokenStream, expectedTokenStream) {
76		t.Errorf("expected %#v got %#v", expectedTokenStream, ouputTokenStream)
77	}
78}
79
80func BenchmarkStopWordsFilter(b *testing.B) {
81
82	inputTokenStream := analysis.TokenStream{
83		&analysis.Token{
84			Term: []byte("a"),
85		},
86		&analysis.Token{
87			Term: []byte("walk"),
88		},
89		&analysis.Token{
90			Term: []byte("in"),
91		},
92		&analysis.Token{
93			Term: []byte("the"),
94		},
95		&analysis.Token{
96			Term: []byte("park"),
97		},
98	}
99
100	cache := registry.NewCache()
101	stopListConfig := map[string]interface{}{
102		"type":   tokenmap.Name,
103		"tokens": []interface{}{"a", "in", "the"},
104	}
105	_, err := cache.DefineTokenMap("stop_test", stopListConfig)
106	if err != nil {
107		b.Fatal(err)
108	}
109
110	stopConfig := map[string]interface{}{
111		"type":           "stop_tokens",
112		"stop_token_map": "stop_test",
113	}
114	stopFilter, err := cache.DefineTokenFilter("stop_test", stopConfig)
115	if err != nil {
116		b.Fatal(err)
117	}
118	b.ResetTimer()
119
120	for i := 0; i < b.N; i++ {
121		stopFilter.Filter(inputTokenStream)
122	}
123
124}
125