1//  Copyright (c) 2014 Couchbase, Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// 		http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package bleve
16
17import (
18	"encoding/json"
19	"fmt"
20	"reflect"
21	"sort"
22	"time"
23
24	"github.com/blevesearch/bleve/analysis"
25	"github.com/blevesearch/bleve/analysis/datetime/optional"
26	"github.com/blevesearch/bleve/document"
27	"github.com/blevesearch/bleve/registry"
28	"github.com/blevesearch/bleve/search"
29	"github.com/blevesearch/bleve/search/collector"
30	"github.com/blevesearch/bleve/search/query"
31	"github.com/blevesearch/bleve/size"
32)
33
34var reflectStaticSizeSearchResult int
35var reflectStaticSizeSearchStatus int
36
37func init() {
38	var sr SearchResult
39	reflectStaticSizeSearchResult = int(reflect.TypeOf(sr).Size())
40	var ss SearchStatus
41	reflectStaticSizeSearchStatus = int(reflect.TypeOf(ss).Size())
42}
43
44var cache = registry.NewCache()
45
46const defaultDateTimeParser = optional.Name
47
48type numericRange struct {
49	Name string   `json:"name,omitempty"`
50	Min  *float64 `json:"min,omitempty"`
51	Max  *float64 `json:"max,omitempty"`
52}
53
54type dateTimeRange struct {
55	Name        string    `json:"name,omitempty"`
56	Start       time.Time `json:"start,omitempty"`
57	End         time.Time `json:"end,omitempty"`
58	startString *string
59	endString   *string
60}
61
62func (dr *dateTimeRange) ParseDates(dateTimeParser analysis.DateTimeParser) (start, end time.Time) {
63	start = dr.Start
64	if dr.Start.IsZero() && dr.startString != nil {
65		s, err := dateTimeParser.ParseDateTime(*dr.startString)
66		if err == nil {
67			start = s
68		}
69	}
70	end = dr.End
71	if dr.End.IsZero() && dr.endString != nil {
72		e, err := dateTimeParser.ParseDateTime(*dr.endString)
73		if err == nil {
74			end = e
75		}
76	}
77	return start, end
78}
79
80func (dr *dateTimeRange) UnmarshalJSON(input []byte) error {
81	var temp struct {
82		Name  string  `json:"name,omitempty"`
83		Start *string `json:"start,omitempty"`
84		End   *string `json:"end,omitempty"`
85	}
86
87	err := json.Unmarshal(input, &temp)
88	if err != nil {
89		return err
90	}
91
92	dr.Name = temp.Name
93	if temp.Start != nil {
94		dr.startString = temp.Start
95	}
96	if temp.End != nil {
97		dr.endString = temp.End
98	}
99
100	return nil
101}
102
103func (dr *dateTimeRange) MarshalJSON() ([]byte, error) {
104	rv := map[string]interface{}{
105		"name":  dr.Name,
106		"start": dr.Start,
107		"end":   dr.End,
108	}
109	if dr.Start.IsZero() && dr.startString != nil {
110		rv["start"] = dr.startString
111	}
112	if dr.End.IsZero() && dr.endString != nil {
113		rv["end"] = dr.endString
114	}
115	return json.Marshal(rv)
116}
117
118// A FacetRequest describes a facet or aggregation
119// of the result document set you would like to be
120// built.
121type FacetRequest struct {
122	Size           int              `json:"size"`
123	Field          string           `json:"field"`
124	NumericRanges  []*numericRange  `json:"numeric_ranges,omitempty"`
125	DateTimeRanges []*dateTimeRange `json:"date_ranges,omitempty"`
126}
127
128func (fr *FacetRequest) Validate() error {
129	nrCount := len(fr.NumericRanges)
130	drCount := len(fr.DateTimeRanges)
131	if nrCount > 0 && drCount > 0 {
132		return fmt.Errorf("facet can only conain numeric ranges or date ranges, not both")
133	}
134
135	if nrCount > 0 {
136		nrNames := map[string]interface{}{}
137		for _, nr := range fr.NumericRanges {
138			if _, ok := nrNames[nr.Name]; ok {
139				return fmt.Errorf("numeric ranges contains duplicate name '%s'", nr.Name)
140			}
141			nrNames[nr.Name] = struct{}{}
142			if nr.Min == nil && nr.Max == nil {
143				return fmt.Errorf("numeric range query must specify either min, max or both for range name '%s'", nr.Name)
144			}
145		}
146
147	} else {
148		dateTimeParser, err := cache.DateTimeParserNamed(defaultDateTimeParser)
149		if err != nil {
150			return err
151		}
152		drNames := map[string]interface{}{}
153		for _, dr := range fr.DateTimeRanges {
154			if _, ok := drNames[dr.Name]; ok {
155				return fmt.Errorf("date ranges contains duplicate name '%s'", dr.Name)
156			}
157			drNames[dr.Name] = struct{}{}
158			start, end := dr.ParseDates(dateTimeParser)
159			if start.IsZero() && end.IsZero() {
160				return fmt.Errorf("date range query must specify either start, end or both for range name '%s'", dr.Name)
161			}
162		}
163	}
164	return nil
165}
166
167// NewFacetRequest creates a facet on the specified
168// field that limits the number of entries to the
169// specified size.
170func NewFacetRequest(field string, size int) *FacetRequest {
171	return &FacetRequest{
172		Field: field,
173		Size:  size,
174	}
175}
176
177// AddDateTimeRange adds a bucket to a field
178// containing date values.  Documents with a
179// date value falling into this range are tabulated
180// as part of this bucket/range.
181func (fr *FacetRequest) AddDateTimeRange(name string, start, end time.Time) {
182	if fr.DateTimeRanges == nil {
183		fr.DateTimeRanges = make([]*dateTimeRange, 0, 1)
184	}
185	fr.DateTimeRanges = append(fr.DateTimeRanges, &dateTimeRange{Name: name, Start: start, End: end})
186}
187
188// AddDateTimeRangeString adds a bucket to a field
189// containing date values.
190func (fr *FacetRequest) AddDateTimeRangeString(name string, start, end *string) {
191	if fr.DateTimeRanges == nil {
192		fr.DateTimeRanges = make([]*dateTimeRange, 0, 1)
193	}
194	fr.DateTimeRanges = append(fr.DateTimeRanges,
195		&dateTimeRange{Name: name, startString: start, endString: end})
196}
197
198// AddNumericRange adds a bucket to a field
199// containing numeric values.  Documents with a
200// numeric value falling into this range are
201// tabulated as part of this bucket/range.
202func (fr *FacetRequest) AddNumericRange(name string, min, max *float64) {
203	if fr.NumericRanges == nil {
204		fr.NumericRanges = make([]*numericRange, 0, 1)
205	}
206	fr.NumericRanges = append(fr.NumericRanges, &numericRange{Name: name, Min: min, Max: max})
207}
208
209// FacetsRequest groups together all the
210// FacetRequest objects for a single query.
211type FacetsRequest map[string]*FacetRequest
212
213func (fr FacetsRequest) Validate() error {
214	for _, v := range fr {
215		err := v.Validate()
216		if err != nil {
217			return err
218		}
219	}
220	return nil
221}
222
223// HighlightRequest describes how field matches
224// should be highlighted.
225type HighlightRequest struct {
226	Style  *string  `json:"style"`
227	Fields []string `json:"fields"`
228}
229
230// NewHighlight creates a default
231// HighlightRequest.
232func NewHighlight() *HighlightRequest {
233	return &HighlightRequest{}
234}
235
236// NewHighlightWithStyle creates a HighlightRequest
237// with an alternate style.
238func NewHighlightWithStyle(style string) *HighlightRequest {
239	return &HighlightRequest{
240		Style: &style,
241	}
242}
243
244func (h *HighlightRequest) AddField(field string) {
245	if h.Fields == nil {
246		h.Fields = make([]string, 0, 1)
247	}
248	h.Fields = append(h.Fields, field)
249}
250
251// A SearchRequest describes all the parameters
252// needed to search the index.
253// Query is required.
254// Size/From describe how much and which part of the
255// result set to return.
256// Highlight describes optional search result
257// highlighting.
258// Fields describes a list of field values which
259// should be retrieved for result documents, provided they
260// were stored while indexing.
261// Facets describe the set of facets to be computed.
262// Explain triggers inclusion of additional search
263// result score explanations.
264// Sort describes the desired order for the results to be returned.
265// Score controls the kind of scoring performed
266// SearchAfter supports deep paging by providing a minimum sort key
267// SearchBefore supports deep paging by providing a maximum sort key
268// sortFunc specifies the sort implementation to use for sorting results.
269//
270// A special field named "*" can be used to return all fields.
271type SearchRequest struct {
272	Query            query.Query       `json:"query"`
273	Size             int               `json:"size"`
274	From             int               `json:"from"`
275	Highlight        *HighlightRequest `json:"highlight"`
276	Fields           []string          `json:"fields"`
277	Facets           FacetsRequest     `json:"facets"`
278	Explain          bool              `json:"explain"`
279	Sort             search.SortOrder  `json:"sort"`
280	IncludeLocations bool              `json:"includeLocations"`
281	Score            string            `json:"score,omitempty"`
282	SearchAfter      []string          `json:"search_after"`
283	SearchBefore     []string          `json:"search_before"`
284
285	sortFunc func(sort.Interface)
286}
287
288func (r *SearchRequest) Validate() error {
289	if srq, ok := r.Query.(query.ValidatableQuery); ok {
290		err := srq.Validate()
291		if err != nil {
292			return err
293		}
294	}
295
296	if r.SearchAfter != nil && r.SearchBefore != nil {
297		return fmt.Errorf("cannot use search after and search before together")
298	}
299
300	if r.SearchAfter != nil {
301		if r.From != 0 {
302			return fmt.Errorf("cannot use search after with from !=0")
303		}
304		if len(r.SearchAfter) != len(r.Sort) {
305			return fmt.Errorf("search after must have same size as sort order")
306		}
307	}
308	if r.SearchBefore != nil {
309		if r.From != 0 {
310			return fmt.Errorf("cannot use search before with from !=0")
311		}
312		if len(r.SearchBefore) != len(r.Sort) {
313			return fmt.Errorf("search before must have same size as sort order")
314		}
315	}
316
317	return r.Facets.Validate()
318}
319
320// AddFacet adds a FacetRequest to this SearchRequest
321func (r *SearchRequest) AddFacet(facetName string, f *FacetRequest) {
322	if r.Facets == nil {
323		r.Facets = make(FacetsRequest, 1)
324	}
325	r.Facets[facetName] = f
326}
327
328// SortBy changes the request to use the requested sort order
329// this form uses the simplified syntax with an array of strings
330// each string can either be a field name
331// or the magic value _id and _score which refer to the doc id and search score
332// any of these values can optionally be prefixed with - to reverse the order
333func (r *SearchRequest) SortBy(order []string) {
334	so := search.ParseSortOrderStrings(order)
335	r.Sort = so
336}
337
338// SortByCustom changes the request to use the requested sort order
339func (r *SearchRequest) SortByCustom(order search.SortOrder) {
340	r.Sort = order
341}
342
343// SetSearchAfter sets the request to skip over hits with a sort
344// value less than the provided sort after key
345func (r *SearchRequest) SetSearchAfter(after []string) {
346	r.SearchAfter = after
347}
348
349// SetSearchBefore sets the request to skip over hits with a sort
350// value greater than the provided sort before key
351func (r *SearchRequest) SetSearchBefore(before []string) {
352	r.SearchBefore = before
353}
354
355// UnmarshalJSON deserializes a JSON representation of
356// a SearchRequest
357func (r *SearchRequest) UnmarshalJSON(input []byte) error {
358	var temp struct {
359		Q                json.RawMessage   `json:"query"`
360		Size             *int              `json:"size"`
361		From             int               `json:"from"`
362		Highlight        *HighlightRequest `json:"highlight"`
363		Fields           []string          `json:"fields"`
364		Facets           FacetsRequest     `json:"facets"`
365		Explain          bool              `json:"explain"`
366		Sort             []json.RawMessage `json:"sort"`
367		IncludeLocations bool              `json:"includeLocations"`
368		Score            string            `json:"score"`
369		SearchAfter      []string          `json:"search_after"`
370		SearchBefore     []string          `json:"search_before"`
371	}
372
373	err := json.Unmarshal(input, &temp)
374	if err != nil {
375		return err
376	}
377
378	if temp.Size == nil {
379		r.Size = 10
380	} else {
381		r.Size = *temp.Size
382	}
383	if temp.Sort == nil {
384		r.Sort = search.SortOrder{&search.SortScore{Desc: true}}
385	} else {
386		r.Sort, err = search.ParseSortOrderJSON(temp.Sort)
387		if err != nil {
388			return err
389		}
390	}
391	r.From = temp.From
392	r.Explain = temp.Explain
393	r.Highlight = temp.Highlight
394	r.Fields = temp.Fields
395	r.Facets = temp.Facets
396	r.IncludeLocations = temp.IncludeLocations
397	r.Score = temp.Score
398	r.SearchAfter = temp.SearchAfter
399	r.SearchBefore = temp.SearchBefore
400	r.Query, err = query.ParseQuery(temp.Q)
401	if err != nil {
402		return err
403	}
404
405	if r.Size < 0 {
406		r.Size = 10
407	}
408	if r.From < 0 {
409		r.From = 0
410	}
411
412	return nil
413
414}
415
416// NewSearchRequest creates a new SearchRequest
417// for the Query, using default values for all
418// other search parameters.
419func NewSearchRequest(q query.Query) *SearchRequest {
420	return NewSearchRequestOptions(q, 10, 0, false)
421}
422
423// NewSearchRequestOptions creates a new SearchRequest
424// for the Query, with the requested size, from
425// and explanation search parameters.
426// By default results are ordered by score, descending.
427func NewSearchRequestOptions(q query.Query, size, from int, explain bool) *SearchRequest {
428	return &SearchRequest{
429		Query:   q,
430		Size:    size,
431		From:    from,
432		Explain: explain,
433		Sort:    search.SortOrder{&search.SortScore{Desc: true}},
434	}
435}
436
437// IndexErrMap tracks errors with the name of the index where it occurred
438type IndexErrMap map[string]error
439
440// MarshalJSON seralizes the error into a string for JSON consumption
441func (iem IndexErrMap) MarshalJSON() ([]byte, error) {
442	tmp := make(map[string]string, len(iem))
443	for k, v := range iem {
444		tmp[k] = v.Error()
445	}
446	return json.Marshal(tmp)
447}
448
449func (iem IndexErrMap) UnmarshalJSON(data []byte) error {
450	var tmp map[string]string
451	err := json.Unmarshal(data, &tmp)
452	if err != nil {
453		return err
454	}
455	for k, v := range tmp {
456		iem[k] = fmt.Errorf("%s", v)
457	}
458	return nil
459}
460
461// SearchStatus is a secion in the SearchResult reporting how many
462// underlying indexes were queried, how many were successful/failed
463// and a map of any errors that were encountered
464type SearchStatus struct {
465	Total      int         `json:"total"`
466	Failed     int         `json:"failed"`
467	Successful int         `json:"successful"`
468	Errors     IndexErrMap `json:"errors,omitempty"`
469}
470
471// Merge will merge together multiple SearchStatuses during a MultiSearch
472func (ss *SearchStatus) Merge(other *SearchStatus) {
473	ss.Total += other.Total
474	ss.Failed += other.Failed
475	ss.Successful += other.Successful
476	if len(other.Errors) > 0 {
477		if ss.Errors == nil {
478			ss.Errors = make(map[string]error)
479		}
480		for otherIndex, otherError := range other.Errors {
481			ss.Errors[otherIndex] = otherError
482		}
483	}
484}
485
486// A SearchResult describes the results of executing
487// a SearchRequest.
488type SearchResult struct {
489	Status   *SearchStatus                  `json:"status"`
490	Request  *SearchRequest                 `json:"request"`
491	Hits     search.DocumentMatchCollection `json:"hits"`
492	Total    uint64                         `json:"total_hits"`
493	MaxScore float64                        `json:"max_score"`
494	Took     time.Duration                  `json:"took"`
495	Facets   search.FacetResults            `json:"facets"`
496}
497
498func (sr *SearchResult) Size() int {
499	sizeInBytes := reflectStaticSizeSearchResult + size.SizeOfPtr +
500		reflectStaticSizeSearchStatus
501
502	for _, entry := range sr.Hits {
503		if entry != nil {
504			sizeInBytes += entry.Size()
505		}
506	}
507
508	for k, v := range sr.Facets {
509		sizeInBytes += size.SizeOfString + len(k) +
510			v.Size()
511	}
512
513	return sizeInBytes
514}
515
516func (sr *SearchResult) String() string {
517	rv := ""
518	if sr.Total > 0 {
519		if sr.Request.Size > 0 {
520			rv = fmt.Sprintf("%d matches, showing %d through %d, took %s\n", sr.Total, sr.Request.From+1, sr.Request.From+len(sr.Hits), sr.Took)
521			for i, hit := range sr.Hits {
522				rv += fmt.Sprintf("%5d. %s (%f)\n", i+sr.Request.From+1, hit.ID, hit.Score)
523				for fragmentField, fragments := range hit.Fragments {
524					rv += fmt.Sprintf("\t%s\n", fragmentField)
525					for _, fragment := range fragments {
526						rv += fmt.Sprintf("\t\t%s\n", fragment)
527					}
528				}
529				for otherFieldName, otherFieldValue := range hit.Fields {
530					if _, ok := hit.Fragments[otherFieldName]; !ok {
531						rv += fmt.Sprintf("\t%s\n", otherFieldName)
532						rv += fmt.Sprintf("\t\t%v\n", otherFieldValue)
533					}
534				}
535			}
536		} else {
537			rv = fmt.Sprintf("%d matches, took %s\n", sr.Total, sr.Took)
538		}
539	} else {
540		rv = "No matches"
541	}
542	if len(sr.Facets) > 0 {
543		rv += fmt.Sprintf("Facets:\n")
544		for fn, f := range sr.Facets {
545			rv += fmt.Sprintf("%s(%d)\n", fn, f.Total)
546			for _, t := range f.Terms {
547				rv += fmt.Sprintf("\t%s(%d)\n", t.Term, t.Count)
548			}
549			if f.Other != 0 {
550				rv += fmt.Sprintf("\tOther(%d)\n", f.Other)
551			}
552		}
553	}
554	return rv
555}
556
557// Merge will merge together multiple SearchResults during a MultiSearch
558func (sr *SearchResult) Merge(other *SearchResult) {
559	sr.Status.Merge(other.Status)
560	sr.Hits = append(sr.Hits, other.Hits...)
561	sr.Total += other.Total
562	if other.MaxScore > sr.MaxScore {
563		sr.MaxScore = other.MaxScore
564	}
565	if sr.Facets == nil && len(other.Facets) != 0 {
566		sr.Facets = other.Facets
567		return
568	}
569
570	sr.Facets.Merge(other.Facets)
571}
572
573// MemoryNeededForSearchResult is an exported helper function to determine the RAM
574// needed to accommodate the results for a given search request.
575func MemoryNeededForSearchResult(req *SearchRequest) uint64 {
576	if req == nil {
577		return 0
578	}
579
580	numDocMatches := req.Size + req.From
581	if req.Size+req.From > collector.PreAllocSizeSkipCap {
582		numDocMatches = collector.PreAllocSizeSkipCap
583	}
584
585	estimate := 0
586
587	// overhead from the SearchResult structure
588	var sr SearchResult
589	estimate += sr.Size()
590
591	var dm search.DocumentMatch
592	sizeOfDocumentMatch := dm.Size()
593
594	// overhead from results
595	estimate += numDocMatches * sizeOfDocumentMatch
596
597	// overhead from facet results
598	if req.Facets != nil {
599		var fr search.FacetResult
600		estimate += len(req.Facets) * fr.Size()
601	}
602
603	// highlighting, store
604	var d document.Document
605	if len(req.Fields) > 0 || req.Highlight != nil {
606		for i := 0; i < (req.Size + req.From); i++ {
607			estimate += (req.Size + req.From) * d.Size()
608		}
609	}
610
611	return uint64(estimate)
612}
613
614// SetSortFunc sets the sort implementation to use when sorting hits.
615//
616// SearchRequests can specify a custom sort implementation to meet
617// their needs. For instance, by specifying a parallel sort
618// that uses all available cores.
619func (r *SearchRequest) SetSortFunc(s func(sort.Interface)) {
620	r.sortFunc = s
621}
622
623// SortFunc returns the sort implementation to use when sorting hits.
624// Defaults to sort.Sort.
625func (r *SearchRequest) SortFunc() func(data sort.Interface) {
626	if r.sortFunc != nil {
627		return r.sortFunc
628	}
629
630	return sort.Sort
631}
632