1//  Copyright (c) 2014 Couchbase, Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// 		http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package ar
16
17import (
18	"github.com/blevesearch/bleve/analysis"
19	"github.com/blevesearch/bleve/registry"
20
21	"github.com/blevesearch/bleve/analysis/token/lowercase"
22	"github.com/blevesearch/bleve/analysis/token/unicodenorm"
23	"github.com/blevesearch/bleve/analysis/tokenizer/unicode"
24)
25
26const AnalyzerName = "ar"
27
28func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
29	tokenizer, err := cache.TokenizerNamed(unicode.Name)
30	if err != nil {
31		return nil, err
32	}
33	toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
34	if err != nil {
35		return nil, err
36	}
37	normalizeFilter := unicodenorm.MustNewUnicodeNormalizeFilter(unicodenorm.NFKC)
38	stopArFilter, err := cache.TokenFilterNamed(StopName)
39	if err != nil {
40		return nil, err
41	}
42	normalizeArFilter, err := cache.TokenFilterNamed(NormalizeName)
43	if err != nil {
44		return nil, err
45	}
46	stemmerArFilter, err := cache.TokenFilterNamed(StemmerName)
47	if err != nil {
48		return nil, err
49	}
50	rv := analysis.Analyzer{
51		Tokenizer: tokenizer,
52		TokenFilters: []analysis.TokenFilter{
53			toLowerFilter,
54			normalizeFilter,
55			stopArFilter,
56			normalizeArFilter,
57			stemmerArFilter,
58		},
59	}
60	return &rv, nil
61}
62
63func init() {
64	registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
65}
66