1// Copyright (c) 2014 Couchbase, Inc. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package mapping 16 17import ( 18 "encoding/json" 19 "fmt" 20 21 "github.com/blevesearch/bleve/analysis" 22 "github.com/blevesearch/bleve/analysis/analyzer/standard" 23 "github.com/blevesearch/bleve/analysis/datetime/optional" 24 "github.com/blevesearch/bleve/document" 25 "github.com/blevesearch/bleve/registry" 26) 27 28var MappingJSONStrict = false 29 30const defaultTypeField = "_type" 31const defaultType = "_default" 32const defaultField = "_all" 33const defaultAnalyzer = standard.Name 34const defaultDateTimeParser = optional.Name 35 36// An IndexMappingImpl controls how objects are placed 37// into an index. 38// First the type of the object is determined. 39// Once the type is know, the appropriate 40// DocumentMapping is selected by the type. 41// If no mapping was determined for that type, 42// a DefaultMapping will be used. 43type IndexMappingImpl struct { 44 TypeMapping map[string]*DocumentMapping `json:"types,omitempty"` 45 DefaultMapping *DocumentMapping `json:"default_mapping"` 46 TypeField string `json:"type_field"` 47 DefaultType string `json:"default_type"` 48 DefaultAnalyzer string `json:"default_analyzer"` 49 DefaultDateTimeParser string `json:"default_datetime_parser"` 50 DefaultField string `json:"default_field"` 51 StoreDynamic bool `json:"store_dynamic"` 52 IndexDynamic bool `json:"index_dynamic"` 53 DocValuesDynamic bool `json:"docvalues_dynamic,omitempty"` 54 CustomAnalysis *customAnalysis `json:"analysis,omitempty"` 55 cache *registry.Cache 56} 57 58// AddCustomCharFilter defines a custom char filter for use in this mapping 59func (im *IndexMappingImpl) AddCustomCharFilter(name string, config map[string]interface{}) error { 60 _, err := im.cache.DefineCharFilter(name, config) 61 if err != nil { 62 return err 63 } 64 im.CustomAnalysis.CharFilters[name] = config 65 return nil 66} 67 68// AddCustomTokenizer defines a custom tokenizer for use in this mapping 69func (im *IndexMappingImpl) AddCustomTokenizer(name string, config map[string]interface{}) error { 70 _, err := im.cache.DefineTokenizer(name, config) 71 if err != nil { 72 return err 73 } 74 im.CustomAnalysis.Tokenizers[name] = config 75 return nil 76} 77 78// AddCustomTokenMap defines a custom token map for use in this mapping 79func (im *IndexMappingImpl) AddCustomTokenMap(name string, config map[string]interface{}) error { 80 _, err := im.cache.DefineTokenMap(name, config) 81 if err != nil { 82 return err 83 } 84 im.CustomAnalysis.TokenMaps[name] = config 85 return nil 86} 87 88// AddCustomTokenFilter defines a custom token filter for use in this mapping 89func (im *IndexMappingImpl) AddCustomTokenFilter(name string, config map[string]interface{}) error { 90 _, err := im.cache.DefineTokenFilter(name, config) 91 if err != nil { 92 return err 93 } 94 im.CustomAnalysis.TokenFilters[name] = config 95 return nil 96} 97 98// AddCustomAnalyzer defines a custom analyzer for use in this mapping. The 99// config map must have a "type" string entry to resolve the analyzer 100// constructor. The constructor is invoked with the remaining entries and 101// returned analyzer is registered in the IndexMapping. 102// 103// bleve comes with predefined analyzers, like 104// github.com/blevesearch/bleve/analysis/analyzers/custom_analyzer. They are 105// available only if their package is imported by client code. To achieve this, 106// use their metadata to fill configuration entries: 107// 108// import ( 109// "github.com/blevesearch/bleve/analysis/analyzers/custom_analyzer" 110// "github.com/blevesearch/bleve/analysis/char_filters/html_char_filter" 111// "github.com/blevesearch/bleve/analysis/token_filters/lower_case_filter" 112// "github.com/blevesearch/bleve/analysis/tokenizers/unicode" 113// ) 114// 115// m := bleve.NewIndexMapping() 116// err := m.AddCustomAnalyzer("html", map[string]interface{}{ 117// "type": custom_analyzer.Name, 118// "char_filters": []string{ 119// html_char_filter.Name, 120// }, 121// "tokenizer": unicode.Name, 122// "token_filters": []string{ 123// lower_case_filter.Name, 124// ... 125// }, 126// }) 127func (im *IndexMappingImpl) AddCustomAnalyzer(name string, config map[string]interface{}) error { 128 _, err := im.cache.DefineAnalyzer(name, config) 129 if err != nil { 130 return err 131 } 132 im.CustomAnalysis.Analyzers[name] = config 133 return nil 134} 135 136// AddCustomDateTimeParser defines a custom date time parser for use in this mapping 137func (im *IndexMappingImpl) AddCustomDateTimeParser(name string, config map[string]interface{}) error { 138 _, err := im.cache.DefineDateTimeParser(name, config) 139 if err != nil { 140 return err 141 } 142 im.CustomAnalysis.DateTimeParsers[name] = config 143 return nil 144} 145 146// NewIndexMapping creates a new IndexMapping that will use all the default indexing rules 147func NewIndexMapping() *IndexMappingImpl { 148 return &IndexMappingImpl{ 149 TypeMapping: make(map[string]*DocumentMapping), 150 DefaultMapping: NewDocumentMapping(), 151 TypeField: defaultTypeField, 152 DefaultType: defaultType, 153 DefaultAnalyzer: defaultAnalyzer, 154 DefaultDateTimeParser: defaultDateTimeParser, 155 DefaultField: defaultField, 156 IndexDynamic: IndexDynamic, 157 StoreDynamic: StoreDynamic, 158 DocValuesDynamic: DocValuesDynamic, 159 CustomAnalysis: newCustomAnalysis(), 160 cache: registry.NewCache(), 161 } 162} 163 164// Validate will walk the entire structure ensuring the following 165// explicitly named and default analyzers can be built 166func (im *IndexMappingImpl) Validate() error { 167 _, err := im.cache.AnalyzerNamed(im.DefaultAnalyzer) 168 if err != nil { 169 return err 170 } 171 _, err = im.cache.DateTimeParserNamed(im.DefaultDateTimeParser) 172 if err != nil { 173 return err 174 } 175 err = im.DefaultMapping.Validate(im.cache) 176 if err != nil { 177 return err 178 } 179 for _, docMapping := range im.TypeMapping { 180 err = docMapping.Validate(im.cache) 181 if err != nil { 182 return err 183 } 184 } 185 return nil 186} 187 188// AddDocumentMapping sets a custom document mapping for the specified type 189func (im *IndexMappingImpl) AddDocumentMapping(doctype string, dm *DocumentMapping) { 190 im.TypeMapping[doctype] = dm 191} 192 193func (im *IndexMappingImpl) mappingForType(docType string) *DocumentMapping { 194 docMapping := im.TypeMapping[docType] 195 if docMapping == nil { 196 docMapping = im.DefaultMapping 197 } 198 return docMapping 199} 200 201// UnmarshalJSON offers custom unmarshaling with optional strict validation 202func (im *IndexMappingImpl) UnmarshalJSON(data []byte) error { 203 204 var tmp map[string]json.RawMessage 205 err := json.Unmarshal(data, &tmp) 206 if err != nil { 207 return err 208 } 209 210 // set defaults for fields which might have been omitted 211 im.cache = registry.NewCache() 212 im.CustomAnalysis = newCustomAnalysis() 213 im.TypeField = defaultTypeField 214 im.DefaultType = defaultType 215 im.DefaultAnalyzer = defaultAnalyzer 216 im.DefaultDateTimeParser = defaultDateTimeParser 217 im.DefaultField = defaultField 218 im.DefaultMapping = NewDocumentMapping() 219 im.TypeMapping = make(map[string]*DocumentMapping) 220 im.StoreDynamic = StoreDynamic 221 im.IndexDynamic = IndexDynamic 222 im.DocValuesDynamic = DocValuesDynamic 223 224 var invalidKeys []string 225 for k, v := range tmp { 226 switch k { 227 case "analysis": 228 err := json.Unmarshal(v, &im.CustomAnalysis) 229 if err != nil { 230 return err 231 } 232 case "type_field": 233 err := json.Unmarshal(v, &im.TypeField) 234 if err != nil { 235 return err 236 } 237 case "default_type": 238 err := json.Unmarshal(v, &im.DefaultType) 239 if err != nil { 240 return err 241 } 242 case "default_analyzer": 243 err := json.Unmarshal(v, &im.DefaultAnalyzer) 244 if err != nil { 245 return err 246 } 247 case "default_datetime_parser": 248 err := json.Unmarshal(v, &im.DefaultDateTimeParser) 249 if err != nil { 250 return err 251 } 252 case "default_field": 253 err := json.Unmarshal(v, &im.DefaultField) 254 if err != nil { 255 return err 256 } 257 case "default_mapping": 258 err := json.Unmarshal(v, &im.DefaultMapping) 259 if err != nil { 260 return err 261 } 262 case "types": 263 err := json.Unmarshal(v, &im.TypeMapping) 264 if err != nil { 265 return err 266 } 267 case "store_dynamic": 268 err := json.Unmarshal(v, &im.StoreDynamic) 269 if err != nil { 270 return err 271 } 272 case "index_dynamic": 273 err := json.Unmarshal(v, &im.IndexDynamic) 274 if err != nil { 275 return err 276 } 277 case "docvalues_dynamic": 278 err := json.Unmarshal(v, &im.DocValuesDynamic) 279 if err != nil { 280 return err 281 } 282 default: 283 invalidKeys = append(invalidKeys, k) 284 } 285 } 286 287 if MappingJSONStrict && len(invalidKeys) > 0 { 288 return fmt.Errorf("index mapping contains invalid keys: %v", invalidKeys) 289 } 290 291 err = im.CustomAnalysis.registerAll(im) 292 if err != nil { 293 return err 294 } 295 296 return nil 297} 298 299func (im *IndexMappingImpl) determineType(data interface{}) string { 300 // first see if the object implements bleveClassifier 301 bleveClassifier, ok := data.(bleveClassifier) 302 if ok { 303 return bleveClassifier.BleveType() 304 } 305 // next see if the object implements Classifier 306 classifier, ok := data.(Classifier) 307 if ok { 308 return classifier.Type() 309 } 310 311 // now see if we can find a type using the mapping 312 typ, ok := mustString(lookupPropertyPath(data, im.TypeField)) 313 if ok { 314 return typ 315 } 316 317 return im.DefaultType 318} 319 320func (im *IndexMappingImpl) MapDocument(doc *document.Document, data interface{}) error { 321 docType := im.determineType(data) 322 docMapping := im.mappingForType(docType) 323 walkContext := im.newWalkContext(doc, docMapping) 324 if docMapping.Enabled { 325 docMapping.walkDocument(data, []string{}, []uint64{}, walkContext) 326 327 // see if the _all field was disabled 328 allMapping := docMapping.documentMappingForPath("_all") 329 if allMapping == nil || allMapping.Enabled { 330 field := document.NewCompositeFieldWithIndexingOptions("_all", true, []string{}, walkContext.excludedFromAll, document.IndexField|document.IncludeTermVectors) 331 doc.AddField(field) 332 } 333 } 334 335 return nil 336} 337 338type walkContext struct { 339 doc *document.Document 340 im *IndexMappingImpl 341 dm *DocumentMapping 342 excludedFromAll []string 343} 344 345func (im *IndexMappingImpl) newWalkContext(doc *document.Document, dm *DocumentMapping) *walkContext { 346 return &walkContext{ 347 doc: doc, 348 im: im, 349 dm: dm, 350 excludedFromAll: []string{"_id"}, 351 } 352} 353 354// AnalyzerNameForPath attempts to find the best analyzer to use with only a 355// field name will walk all the document types, look for field mappings at the 356// provided path, if one exists and it has an explicit analyzer that is 357// returned. 358func (im *IndexMappingImpl) AnalyzerNameForPath(path string) string { 359 // first we look for explicit mapping on the field 360 for _, docMapping := range im.TypeMapping { 361 analyzerName := docMapping.analyzerNameForPath(path) 362 if analyzerName != "" { 363 return analyzerName 364 } 365 } 366 // now try the default mapping 367 pathMapping := im.DefaultMapping.documentMappingForPath(path) 368 if pathMapping != nil { 369 if len(pathMapping.Fields) > 0 { 370 if pathMapping.Fields[0].Analyzer != "" { 371 return pathMapping.Fields[0].Analyzer 372 } 373 } 374 } 375 376 // next we will try default analyzers for the path 377 pathDecoded := decodePath(path) 378 for _, docMapping := range im.TypeMapping { 379 rv := docMapping.defaultAnalyzerName(pathDecoded) 380 if rv != "" { 381 return rv 382 } 383 } 384 385 return im.DefaultAnalyzer 386} 387 388func (im *IndexMappingImpl) AnalyzerNamed(name string) *analysis.Analyzer { 389 analyzer, err := im.cache.AnalyzerNamed(name) 390 if err != nil { 391 logger.Printf("error using analyzer named: %s", name) 392 return nil 393 } 394 return analyzer 395} 396 397func (im *IndexMappingImpl) DateTimeParserNamed(name string) analysis.DateTimeParser { 398 if name == "" { 399 name = im.DefaultDateTimeParser 400 } 401 dateTimeParser, err := im.cache.DateTimeParserNamed(name) 402 if err != nil { 403 logger.Printf("error using datetime parser named: %s", name) 404 return nil 405 } 406 return dateTimeParser 407} 408 409func (im *IndexMappingImpl) datetimeParserNameForPath(path string) string { 410 411 // first we look for explicit mapping on the field 412 for _, docMapping := range im.TypeMapping { 413 pathMapping := docMapping.documentMappingForPath(path) 414 if pathMapping != nil { 415 if len(pathMapping.Fields) > 0 { 416 if pathMapping.Fields[0].Analyzer != "" { 417 return pathMapping.Fields[0].Analyzer 418 } 419 } 420 } 421 } 422 423 return im.DefaultDateTimeParser 424} 425 426func (im *IndexMappingImpl) AnalyzeText(analyzerName string, text []byte) (analysis.TokenStream, error) { 427 analyzer, err := im.cache.AnalyzerNamed(analyzerName) 428 if err != nil { 429 return nil, err 430 } 431 return analyzer.Analyze(text), nil 432} 433 434// FieldAnalyzer returns the name of the analyzer used on a field. 435func (im *IndexMappingImpl) FieldAnalyzer(field string) string { 436 return im.AnalyzerNameForPath(field) 437} 438 439// wrapper to satisfy new interface 440 441func (im *IndexMappingImpl) DefaultSearchField() string { 442 return im.DefaultField 443} 444