1// Copyright (c) 2014 Couchbase, Inc. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package ar 16 17import ( 18 "github.com/blevesearch/bleve/analysis" 19 "github.com/blevesearch/bleve/registry" 20 21 "github.com/blevesearch/bleve/analysis/token/lowercase" 22 "github.com/blevesearch/bleve/analysis/token/unicodenorm" 23 "github.com/blevesearch/bleve/analysis/tokenizer/unicode" 24) 25 26const AnalyzerName = "ar" 27 28func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) { 29 tokenizer, err := cache.TokenizerNamed(unicode.Name) 30 if err != nil { 31 return nil, err 32 } 33 toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name) 34 if err != nil { 35 return nil, err 36 } 37 normalizeFilter := unicodenorm.MustNewUnicodeNormalizeFilter(unicodenorm.NFKC) 38 stopArFilter, err := cache.TokenFilterNamed(StopName) 39 if err != nil { 40 return nil, err 41 } 42 normalizeArFilter, err := cache.TokenFilterNamed(NormalizeName) 43 if err != nil { 44 return nil, err 45 } 46 stemmerArFilter, err := cache.TokenFilterNamed(StemmerName) 47 if err != nil { 48 return nil, err 49 } 50 rv := analysis.Analyzer{ 51 Tokenizer: tokenizer, 52 TokenFilters: []analysis.TokenFilter{ 53 toLowerFilter, 54 normalizeFilter, 55 stopArFilter, 56 normalizeArFilter, 57 stemmerArFilter, 58 }, 59 } 60 return &rv, nil 61} 62 63func init() { 64 registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor) 65} 66