1//  Copyright (c) 2014 Couchbase, Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// 		http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package custom
16
17import (
18	"fmt"
19
20	"github.com/blevesearch/bleve/analysis"
21	"github.com/blevesearch/bleve/registry"
22)
23
24const Name = "custom"
25
26func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
27
28	var err error
29	var charFilters []analysis.CharFilter
30	charFiltersValue, ok := config["char_filters"]
31	if ok {
32		switch charFiltersValue := charFiltersValue.(type) {
33		case []string:
34			charFilters, err = getCharFilters(charFiltersValue, cache)
35			if err != nil {
36				return nil, err
37			}
38		case []interface{}:
39			charFiltersNames, err := convertInterfaceSliceToStringSlice(charFiltersValue, "char filter")
40			if err != nil {
41				return nil, err
42			}
43			charFilters, err = getCharFilters(charFiltersNames, cache)
44			if err != nil {
45				return nil, err
46			}
47		default:
48			return nil, fmt.Errorf("unsupported type for char_filters, must be slice")
49		}
50	}
51
52	var tokenizerName string
53	tokenizerValue, ok := config["tokenizer"]
54	if ok {
55		tokenizerName, ok = tokenizerValue.(string)
56		if !ok {
57			return nil, fmt.Errorf("must specify tokenizer as string")
58		}
59	} else {
60		return nil, fmt.Errorf("must specify tokenizer")
61	}
62
63	tokenizer, err := cache.TokenizerNamed(tokenizerName)
64	if err != nil {
65		return nil, err
66	}
67
68	var tokenFilters []analysis.TokenFilter
69	tokenFiltersValue, ok := config["token_filters"]
70	if ok {
71		switch tokenFiltersValue := tokenFiltersValue.(type) {
72		case []string:
73			tokenFilters, err = getTokenFilters(tokenFiltersValue, cache)
74			if err != nil {
75				return nil, err
76			}
77		case []interface{}:
78			tokenFiltersNames, err := convertInterfaceSliceToStringSlice(tokenFiltersValue, "token filter")
79			if err != nil {
80				return nil, err
81			}
82			tokenFilters, err = getTokenFilters(tokenFiltersNames, cache)
83			if err != nil {
84				return nil, err
85			}
86		default:
87			return nil, fmt.Errorf("unsupported type for token_filters, must be slice")
88		}
89	}
90
91	rv := analysis.Analyzer{
92		Tokenizer: tokenizer,
93	}
94	if charFilters != nil {
95		rv.CharFilters = charFilters
96	}
97	if tokenFilters != nil {
98		rv.TokenFilters = tokenFilters
99	}
100	return &rv, nil
101}
102
103func init() {
104	registry.RegisterAnalyzer(Name, AnalyzerConstructor)
105}
106
107func getCharFilters(charFilterNames []string, cache *registry.Cache) ([]analysis.CharFilter, error) {
108	charFilters := make([]analysis.CharFilter, len(charFilterNames))
109	for i, charFilterName := range charFilterNames {
110		charFilter, err := cache.CharFilterNamed(charFilterName)
111		if err != nil {
112			return nil, err
113		}
114		charFilters[i] = charFilter
115	}
116
117	return charFilters, nil
118}
119
120func getTokenFilters(tokenFilterNames []string, cache *registry.Cache) ([]analysis.TokenFilter, error) {
121	tokenFilters := make([]analysis.TokenFilter, len(tokenFilterNames))
122	for i, tokenFilterName := range tokenFilterNames {
123		tokenFilter, err := cache.TokenFilterNamed(tokenFilterName)
124		if err != nil {
125			return nil, err
126		}
127		tokenFilters[i] = tokenFilter
128	}
129
130	return tokenFilters, nil
131}
132
133func convertInterfaceSliceToStringSlice(interfaceSlice []interface{}, objType string) ([]string, error) {
134	stringSlice := make([]string, len(interfaceSlice))
135	for i, interfaceObj := range interfaceSlice {
136		stringObj, ok := interfaceObj.(string)
137		if ok {
138			stringSlice[i] = stringObj
139		} else {
140			return nil, fmt.Errorf(objType + " name must be a string")
141		}
142	}
143
144	return stringSlice, nil
145}
146