1//  Copyright (c) 2014 Couchbase, Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// 		http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package mapping
16
17import (
18	"encoding/json"
19	"fmt"
20
21	"github.com/blevesearch/bleve/analysis"
22	"github.com/blevesearch/bleve/analysis/analyzer/standard"
23	"github.com/blevesearch/bleve/analysis/datetime/optional"
24	"github.com/blevesearch/bleve/document"
25	"github.com/blevesearch/bleve/registry"
26)
27
28var MappingJSONStrict = false
29
30const defaultTypeField = "_type"
31const defaultType = "_default"
32const defaultField = "_all"
33const defaultAnalyzer = standard.Name
34const defaultDateTimeParser = optional.Name
35
36// An IndexMappingImpl controls how objects are placed
37// into an index.
38// First the type of the object is determined.
39// Once the type is know, the appropriate
40// DocumentMapping is selected by the type.
41// If no mapping was determined for that type,
42// a DefaultMapping will be used.
43type IndexMappingImpl struct {
44	TypeMapping           map[string]*DocumentMapping `json:"types,omitempty"`
45	DefaultMapping        *DocumentMapping            `json:"default_mapping"`
46	TypeField             string                      `json:"type_field"`
47	DefaultType           string                      `json:"default_type"`
48	DefaultAnalyzer       string                      `json:"default_analyzer"`
49	DefaultDateTimeParser string                      `json:"default_datetime_parser"`
50	DefaultField          string                      `json:"default_field"`
51	StoreDynamic          bool                        `json:"store_dynamic"`
52	IndexDynamic          bool                        `json:"index_dynamic"`
53	DocValuesDynamic      bool                        `json:"docvalues_dynamic,omitempty"`
54	CustomAnalysis        *customAnalysis             `json:"analysis,omitempty"`
55	cache                 *registry.Cache
56}
57
58// AddCustomCharFilter defines a custom char filter for use in this mapping
59func (im *IndexMappingImpl) AddCustomCharFilter(name string, config map[string]interface{}) error {
60	_, err := im.cache.DefineCharFilter(name, config)
61	if err != nil {
62		return err
63	}
64	im.CustomAnalysis.CharFilters[name] = config
65	return nil
66}
67
68// AddCustomTokenizer defines a custom tokenizer for use in this mapping
69func (im *IndexMappingImpl) AddCustomTokenizer(name string, config map[string]interface{}) error {
70	_, err := im.cache.DefineTokenizer(name, config)
71	if err != nil {
72		return err
73	}
74	im.CustomAnalysis.Tokenizers[name] = config
75	return nil
76}
77
78// AddCustomTokenMap defines a custom token map for use in this mapping
79func (im *IndexMappingImpl) AddCustomTokenMap(name string, config map[string]interface{}) error {
80	_, err := im.cache.DefineTokenMap(name, config)
81	if err != nil {
82		return err
83	}
84	im.CustomAnalysis.TokenMaps[name] = config
85	return nil
86}
87
88// AddCustomTokenFilter defines a custom token filter for use in this mapping
89func (im *IndexMappingImpl) AddCustomTokenFilter(name string, config map[string]interface{}) error {
90	_, err := im.cache.DefineTokenFilter(name, config)
91	if err != nil {
92		return err
93	}
94	im.CustomAnalysis.TokenFilters[name] = config
95	return nil
96}
97
98// AddCustomAnalyzer defines a custom analyzer for use in this mapping. The
99// config map must have a "type" string entry to resolve the analyzer
100// constructor. The constructor is invoked with the remaining entries and
101// returned analyzer is registered in the IndexMapping.
102//
103// bleve comes with predefined analyzers, like
104// github.com/blevesearch/bleve/analysis/analyzers/custom_analyzer. They are
105// available only if their package is imported by client code. To achieve this,
106// use their metadata to fill configuration entries:
107//
108//   import (
109//       "github.com/blevesearch/bleve/analysis/analyzers/custom_analyzer"
110//       "github.com/blevesearch/bleve/analysis/char_filters/html_char_filter"
111//       "github.com/blevesearch/bleve/analysis/token_filters/lower_case_filter"
112//       "github.com/blevesearch/bleve/analysis/tokenizers/unicode"
113//   )
114//
115//   m := bleve.NewIndexMapping()
116//   err := m.AddCustomAnalyzer("html", map[string]interface{}{
117//       "type": custom_analyzer.Name,
118//       "char_filters": []string{
119//           html_char_filter.Name,
120//       },
121//       "tokenizer":     unicode.Name,
122//       "token_filters": []string{
123//           lower_case_filter.Name,
124//           ...
125//       },
126//   })
127func (im *IndexMappingImpl) AddCustomAnalyzer(name string, config map[string]interface{}) error {
128	_, err := im.cache.DefineAnalyzer(name, config)
129	if err != nil {
130		return err
131	}
132	im.CustomAnalysis.Analyzers[name] = config
133	return nil
134}
135
136// AddCustomDateTimeParser defines a custom date time parser for use in this mapping
137func (im *IndexMappingImpl) AddCustomDateTimeParser(name string, config map[string]interface{}) error {
138	_, err := im.cache.DefineDateTimeParser(name, config)
139	if err != nil {
140		return err
141	}
142	im.CustomAnalysis.DateTimeParsers[name] = config
143	return nil
144}
145
146// NewIndexMapping creates a new IndexMapping that will use all the default indexing rules
147func NewIndexMapping() *IndexMappingImpl {
148	return &IndexMappingImpl{
149		TypeMapping:           make(map[string]*DocumentMapping),
150		DefaultMapping:        NewDocumentMapping(),
151		TypeField:             defaultTypeField,
152		DefaultType:           defaultType,
153		DefaultAnalyzer:       defaultAnalyzer,
154		DefaultDateTimeParser: defaultDateTimeParser,
155		DefaultField:          defaultField,
156		IndexDynamic:          IndexDynamic,
157		StoreDynamic:          StoreDynamic,
158		DocValuesDynamic:      DocValuesDynamic,
159		CustomAnalysis:        newCustomAnalysis(),
160		cache:                 registry.NewCache(),
161	}
162}
163
164// Validate will walk the entire structure ensuring the following
165// explicitly named and default analyzers can be built
166func (im *IndexMappingImpl) Validate() error {
167	_, err := im.cache.AnalyzerNamed(im.DefaultAnalyzer)
168	if err != nil {
169		return err
170	}
171	_, err = im.cache.DateTimeParserNamed(im.DefaultDateTimeParser)
172	if err != nil {
173		return err
174	}
175	err = im.DefaultMapping.Validate(im.cache)
176	if err != nil {
177		return err
178	}
179	for _, docMapping := range im.TypeMapping {
180		err = docMapping.Validate(im.cache)
181		if err != nil {
182			return err
183		}
184	}
185	return nil
186}
187
188// AddDocumentMapping sets a custom document mapping for the specified type
189func (im *IndexMappingImpl) AddDocumentMapping(doctype string, dm *DocumentMapping) {
190	im.TypeMapping[doctype] = dm
191}
192
193func (im *IndexMappingImpl) mappingForType(docType string) *DocumentMapping {
194	docMapping := im.TypeMapping[docType]
195	if docMapping == nil {
196		docMapping = im.DefaultMapping
197	}
198	return docMapping
199}
200
201// UnmarshalJSON offers custom unmarshaling with optional strict validation
202func (im *IndexMappingImpl) UnmarshalJSON(data []byte) error {
203
204	var tmp map[string]json.RawMessage
205	err := json.Unmarshal(data, &tmp)
206	if err != nil {
207		return err
208	}
209
210	// set defaults for fields which might have been omitted
211	im.cache = registry.NewCache()
212	im.CustomAnalysis = newCustomAnalysis()
213	im.TypeField = defaultTypeField
214	im.DefaultType = defaultType
215	im.DefaultAnalyzer = defaultAnalyzer
216	im.DefaultDateTimeParser = defaultDateTimeParser
217	im.DefaultField = defaultField
218	im.DefaultMapping = NewDocumentMapping()
219	im.TypeMapping = make(map[string]*DocumentMapping)
220	im.StoreDynamic = StoreDynamic
221	im.IndexDynamic = IndexDynamic
222	im.DocValuesDynamic = DocValuesDynamic
223
224	var invalidKeys []string
225	for k, v := range tmp {
226		switch k {
227		case "analysis":
228			err := json.Unmarshal(v, &im.CustomAnalysis)
229			if err != nil {
230				return err
231			}
232		case "type_field":
233			err := json.Unmarshal(v, &im.TypeField)
234			if err != nil {
235				return err
236			}
237		case "default_type":
238			err := json.Unmarshal(v, &im.DefaultType)
239			if err != nil {
240				return err
241			}
242		case "default_analyzer":
243			err := json.Unmarshal(v, &im.DefaultAnalyzer)
244			if err != nil {
245				return err
246			}
247		case "default_datetime_parser":
248			err := json.Unmarshal(v, &im.DefaultDateTimeParser)
249			if err != nil {
250				return err
251			}
252		case "default_field":
253			err := json.Unmarshal(v, &im.DefaultField)
254			if err != nil {
255				return err
256			}
257		case "default_mapping":
258			err := json.Unmarshal(v, &im.DefaultMapping)
259			if err != nil {
260				return err
261			}
262		case "types":
263			err := json.Unmarshal(v, &im.TypeMapping)
264			if err != nil {
265				return err
266			}
267		case "store_dynamic":
268			err := json.Unmarshal(v, &im.StoreDynamic)
269			if err != nil {
270				return err
271			}
272		case "index_dynamic":
273			err := json.Unmarshal(v, &im.IndexDynamic)
274			if err != nil {
275				return err
276			}
277		case "docvalues_dynamic":
278			err := json.Unmarshal(v, &im.DocValuesDynamic)
279			if err != nil {
280				return err
281			}
282		default:
283			invalidKeys = append(invalidKeys, k)
284		}
285	}
286
287	if MappingJSONStrict && len(invalidKeys) > 0 {
288		return fmt.Errorf("index mapping contains invalid keys: %v", invalidKeys)
289	}
290
291	err = im.CustomAnalysis.registerAll(im)
292	if err != nil {
293		return err
294	}
295
296	return nil
297}
298
299func (im *IndexMappingImpl) determineType(data interface{}) string {
300	// first see if the object implements bleveClassifier
301	bleveClassifier, ok := data.(bleveClassifier)
302	if ok {
303		return bleveClassifier.BleveType()
304	}
305	// next see if the object implements Classifier
306	classifier, ok := data.(Classifier)
307	if ok {
308		return classifier.Type()
309	}
310
311	// now see if we can find a type using the mapping
312	typ, ok := mustString(lookupPropertyPath(data, im.TypeField))
313	if ok {
314		return typ
315	}
316
317	return im.DefaultType
318}
319
320func (im *IndexMappingImpl) MapDocument(doc *document.Document, data interface{}) error {
321	docType := im.determineType(data)
322	docMapping := im.mappingForType(docType)
323	walkContext := im.newWalkContext(doc, docMapping)
324	if docMapping.Enabled {
325		docMapping.walkDocument(data, []string{}, []uint64{}, walkContext)
326
327		// see if the _all field was disabled
328		allMapping := docMapping.documentMappingForPath("_all")
329		if allMapping == nil || allMapping.Enabled {
330			field := document.NewCompositeFieldWithIndexingOptions("_all", true, []string{}, walkContext.excludedFromAll, document.IndexField|document.IncludeTermVectors)
331			doc.AddField(field)
332		}
333	}
334
335	return nil
336}
337
338type walkContext struct {
339	doc             *document.Document
340	im              *IndexMappingImpl
341	dm              *DocumentMapping
342	excludedFromAll []string
343}
344
345func (im *IndexMappingImpl) newWalkContext(doc *document.Document, dm *DocumentMapping) *walkContext {
346	return &walkContext{
347		doc:             doc,
348		im:              im,
349		dm:              dm,
350		excludedFromAll: []string{"_id"},
351	}
352}
353
354// AnalyzerNameForPath attempts to find the best analyzer to use with only a
355// field name will walk all the document types, look for field mappings at the
356// provided path, if one exists and it has an explicit analyzer that is
357// returned.
358func (im *IndexMappingImpl) AnalyzerNameForPath(path string) string {
359	// first we look for explicit mapping on the field
360	for _, docMapping := range im.TypeMapping {
361		analyzerName := docMapping.analyzerNameForPath(path)
362		if analyzerName != "" {
363			return analyzerName
364		}
365	}
366	// now try the default mapping
367	pathMapping := im.DefaultMapping.documentMappingForPath(path)
368	if pathMapping != nil {
369		if len(pathMapping.Fields) > 0 {
370			if pathMapping.Fields[0].Analyzer != "" {
371				return pathMapping.Fields[0].Analyzer
372			}
373		}
374	}
375
376	// next we will try default analyzers for the path
377	pathDecoded := decodePath(path)
378	for _, docMapping := range im.TypeMapping {
379		rv := docMapping.defaultAnalyzerName(pathDecoded)
380		if rv != "" {
381			return rv
382		}
383	}
384
385	return im.DefaultAnalyzer
386}
387
388func (im *IndexMappingImpl) AnalyzerNamed(name string) *analysis.Analyzer {
389	analyzer, err := im.cache.AnalyzerNamed(name)
390	if err != nil {
391		logger.Printf("error using analyzer named: %s", name)
392		return nil
393	}
394	return analyzer
395}
396
397func (im *IndexMappingImpl) DateTimeParserNamed(name string) analysis.DateTimeParser {
398	if name == "" {
399		name = im.DefaultDateTimeParser
400	}
401	dateTimeParser, err := im.cache.DateTimeParserNamed(name)
402	if err != nil {
403		logger.Printf("error using datetime parser named: %s", name)
404		return nil
405	}
406	return dateTimeParser
407}
408
409func (im *IndexMappingImpl) datetimeParserNameForPath(path string) string {
410
411	// first we look for explicit mapping on the field
412	for _, docMapping := range im.TypeMapping {
413		pathMapping := docMapping.documentMappingForPath(path)
414		if pathMapping != nil {
415			if len(pathMapping.Fields) > 0 {
416				if pathMapping.Fields[0].Analyzer != "" {
417					return pathMapping.Fields[0].Analyzer
418				}
419			}
420		}
421	}
422
423	return im.DefaultDateTimeParser
424}
425
426func (im *IndexMappingImpl) AnalyzeText(analyzerName string, text []byte) (analysis.TokenStream, error) {
427	analyzer, err := im.cache.AnalyzerNamed(analyzerName)
428	if err != nil {
429		return nil, err
430	}
431	return analyzer.Analyze(text), nil
432}
433
434// FieldAnalyzer returns the name of the analyzer used on a field.
435func (im *IndexMappingImpl) FieldAnalyzer(field string) string {
436	return im.AnalyzerNameForPath(field)
437}
438
439// wrapper to satisfy new interface
440
441func (im *IndexMappingImpl) DefaultSearchField() string {
442	return im.DefaultField
443}
444