1// Copyright 2012-present Oliver Eilhard. All rights reserved.
2// Use of this source code is governed by a MIT-license.
3// See http://olivere.mit-license.org/license.txt for details.
4
5package elastic
6
7import "errors"
8
9// MoreLikeThis query (MLT Query) finds documents that are "like" a given
10// set of documents. In order to do so, MLT selects a set of representative
11// terms of these input documents, forms a query using these terms, executes
12// the query and returns the results. The user controls the input documents,
13// how the terms should be selected and how the query is formed.
14//
15// For more details, see
16// https://www.elastic.co/guide/en/elasticsearch/reference/6.8/query-dsl-mlt-query.html
17type MoreLikeThisQuery struct {
18	fields                 []string
19	docs                   []*MoreLikeThisQueryItem
20	unlikeDocs             []*MoreLikeThisQueryItem
21	include                *bool
22	minimumShouldMatch     string
23	minTermFreq            *int
24	maxQueryTerms          *int
25	stopWords              []string
26	minDocFreq             *int
27	maxDocFreq             *int
28	minWordLength          *int
29	maxWordLength          *int
30	boostTerms             *float64
31	boost                  *float64
32	analyzer               string
33	failOnUnsupportedField *bool
34	queryName              string
35}
36
37// NewMoreLikeThisQuery creates and initializes a new MoreLikeThisQuery.
38func NewMoreLikeThisQuery() *MoreLikeThisQuery {
39	return &MoreLikeThisQuery{
40		fields:     make([]string, 0),
41		stopWords:  make([]string, 0),
42		docs:       make([]*MoreLikeThisQueryItem, 0),
43		unlikeDocs: make([]*MoreLikeThisQueryItem, 0),
44	}
45}
46
47// Field adds one or more field names to the query.
48func (q *MoreLikeThisQuery) Field(fields ...string) *MoreLikeThisQuery {
49	q.fields = append(q.fields, fields...)
50	return q
51}
52
53// StopWord sets the stopwords. Any word in this set is considered
54// "uninteresting" and ignored. Even if your Analyzer allows stopwords,
55// you might want to tell the MoreLikeThis code to ignore them, as for
56// the purposes of document similarity it seems reasonable to assume that
57// "a stop word is never interesting".
58func (q *MoreLikeThisQuery) StopWord(stopWords ...string) *MoreLikeThisQuery {
59	q.stopWords = append(q.stopWords, stopWords...)
60	return q
61}
62
63// LikeText sets the text to use in order to find documents that are "like" this.
64func (q *MoreLikeThisQuery) LikeText(likeTexts ...string) *MoreLikeThisQuery {
65	for _, s := range likeTexts {
66		item := NewMoreLikeThisQueryItem().LikeText(s)
67		q.docs = append(q.docs, item)
68	}
69	return q
70}
71
72// LikeItems sets the documents to use in order to find documents that are "like" this.
73func (q *MoreLikeThisQuery) LikeItems(docs ...*MoreLikeThisQueryItem) *MoreLikeThisQuery {
74	q.docs = append(q.docs, docs...)
75	return q
76}
77
78// IgnoreLikeText sets the text from which the terms should not be selected from.
79func (q *MoreLikeThisQuery) IgnoreLikeText(ignoreLikeText ...string) *MoreLikeThisQuery {
80	for _, s := range ignoreLikeText {
81		item := NewMoreLikeThisQueryItem().LikeText(s)
82		q.unlikeDocs = append(q.unlikeDocs, item)
83	}
84	return q
85}
86
87// IgnoreLikeItems sets the documents from which the terms should not be selected from.
88func (q *MoreLikeThisQuery) IgnoreLikeItems(ignoreDocs ...*MoreLikeThisQueryItem) *MoreLikeThisQuery {
89	q.unlikeDocs = append(q.unlikeDocs, ignoreDocs...)
90	return q
91}
92
93// Ids sets the document ids to use in order to find documents that are "like" this.
94func (q *MoreLikeThisQuery) Ids(ids ...string) *MoreLikeThisQuery {
95	for _, id := range ids {
96		item := NewMoreLikeThisQueryItem().Id(id)
97		q.docs = append(q.docs, item)
98	}
99	return q
100}
101
102// Include specifies whether the input documents should also be included
103// in the results returned. Defaults to false.
104func (q *MoreLikeThisQuery) Include(include bool) *MoreLikeThisQuery {
105	q.include = &include
106	return q
107}
108
109// MinimumShouldMatch sets the number of terms that must match the generated
110// query expressed in the common syntax for minimum should match.
111// The default value is "30%".
112//
113// This used to be "PercentTermsToMatch" in Elasticsearch versions before 2.0.
114func (q *MoreLikeThisQuery) MinimumShouldMatch(minimumShouldMatch string) *MoreLikeThisQuery {
115	q.minimumShouldMatch = minimumShouldMatch
116	return q
117}
118
119// MinTermFreq is the frequency below which terms will be ignored in the
120// source doc. The default frequency is 2.
121func (q *MoreLikeThisQuery) MinTermFreq(minTermFreq int) *MoreLikeThisQuery {
122	q.minTermFreq = &minTermFreq
123	return q
124}
125
126// MaxQueryTerms sets the maximum number of query terms that will be included
127// in any generated query. It defaults to 25.
128func (q *MoreLikeThisQuery) MaxQueryTerms(maxQueryTerms int) *MoreLikeThisQuery {
129	q.maxQueryTerms = &maxQueryTerms
130	return q
131}
132
133// MinDocFreq sets the frequency at which words will be ignored which do
134// not occur in at least this many docs. The default is 5.
135func (q *MoreLikeThisQuery) MinDocFreq(minDocFreq int) *MoreLikeThisQuery {
136	q.minDocFreq = &minDocFreq
137	return q
138}
139
140// MaxDocFreq sets the maximum frequency for which words may still appear.
141// Words that appear in more than this many docs will be ignored.
142// It defaults to unbounded.
143func (q *MoreLikeThisQuery) MaxDocFreq(maxDocFreq int) *MoreLikeThisQuery {
144	q.maxDocFreq = &maxDocFreq
145	return q
146}
147
148// MinWordLength sets the minimum word length below which words will be
149// ignored. It defaults to 0.
150func (q *MoreLikeThisQuery) MinWordLength(minWordLength int) *MoreLikeThisQuery {
151	q.minWordLength = &minWordLength
152	return q
153}
154
155// MaxWordLength sets the maximum word length above which words will be ignored.
156// Defaults to unbounded (0).
157func (q *MoreLikeThisQuery) MaxWordLength(maxWordLength int) *MoreLikeThisQuery {
158	q.maxWordLength = &maxWordLength
159	return q
160}
161
162// BoostTerms sets the boost factor to use when boosting terms.
163// It defaults to 1.
164func (q *MoreLikeThisQuery) BoostTerms(boostTerms float64) *MoreLikeThisQuery {
165	q.boostTerms = &boostTerms
166	return q
167}
168
169// Analyzer specifies the analyzer that will be use to analyze the text.
170// Defaults to the analyzer associated with the field.
171func (q *MoreLikeThisQuery) Analyzer(analyzer string) *MoreLikeThisQuery {
172	q.analyzer = analyzer
173	return q
174}
175
176// Boost sets the boost for this query.
177func (q *MoreLikeThisQuery) Boost(boost float64) *MoreLikeThisQuery {
178	q.boost = &boost
179	return q
180}
181
182// FailOnUnsupportedField indicates whether to fail or return no result
183// when this query is run against a field which is not supported such as
184// a binary/numeric field.
185func (q *MoreLikeThisQuery) FailOnUnsupportedField(fail bool) *MoreLikeThisQuery {
186	q.failOnUnsupportedField = &fail
187	return q
188}
189
190// QueryName sets the query name for the filter that can be used when
191// searching for matched_filters per hit.
192func (q *MoreLikeThisQuery) QueryName(queryName string) *MoreLikeThisQuery {
193	q.queryName = queryName
194	return q
195}
196
197// Source creates the source for the MLT query.
198// It may return an error if the caller forgot to specify any documents to
199// be "liked" in the MoreLikeThisQuery.
200func (q *MoreLikeThisQuery) Source() (interface{}, error) {
201	// {
202	//   "match_all" : { ... }
203	// }
204	if len(q.docs) == 0 {
205		return nil, errors.New(`more_like_this requires some documents to be "liked"`)
206	}
207
208	source := make(map[string]interface{})
209
210	params := make(map[string]interface{})
211	source["more_like_this"] = params
212
213	if len(q.fields) > 0 {
214		params["fields"] = q.fields
215	}
216
217	var likes []interface{}
218	for _, doc := range q.docs {
219		src, err := doc.Source()
220		if err != nil {
221			return nil, err
222		}
223		likes = append(likes, src)
224	}
225	params["like"] = likes
226
227	if len(q.unlikeDocs) > 0 {
228		var dontLikes []interface{}
229		for _, doc := range q.unlikeDocs {
230			src, err := doc.Source()
231			if err != nil {
232				return nil, err
233			}
234			dontLikes = append(dontLikes, src)
235		}
236		params["unlike"] = dontLikes
237	}
238
239	if q.minimumShouldMatch != "" {
240		params["minimum_should_match"] = q.minimumShouldMatch
241	}
242	if q.minTermFreq != nil {
243		params["min_term_freq"] = *q.minTermFreq
244	}
245	if q.maxQueryTerms != nil {
246		params["max_query_terms"] = *q.maxQueryTerms
247	}
248	if len(q.stopWords) > 0 {
249		params["stop_words"] = q.stopWords
250	}
251	if q.minDocFreq != nil {
252		params["min_doc_freq"] = *q.minDocFreq
253	}
254	if q.maxDocFreq != nil {
255		params["max_doc_freq"] = *q.maxDocFreq
256	}
257	if q.minWordLength != nil {
258		params["min_word_length"] = *q.minWordLength
259	}
260	if q.maxWordLength != nil {
261		params["max_word_length"] = *q.maxWordLength
262	}
263	if q.boostTerms != nil {
264		params["boost_terms"] = *q.boostTerms
265	}
266	if q.boost != nil {
267		params["boost"] = *q.boost
268	}
269	if q.analyzer != "" {
270		params["analyzer"] = q.analyzer
271	}
272	if q.failOnUnsupportedField != nil {
273		params["fail_on_unsupported_field"] = *q.failOnUnsupportedField
274	}
275	if q.queryName != "" {
276		params["_name"] = q.queryName
277	}
278	if q.include != nil {
279		params["include"] = *q.include
280	}
281
282	return source, nil
283}
284
285// -- MoreLikeThisQueryItem --
286
287// MoreLikeThisQueryItem represents a single item of a MoreLikeThisQuery
288// to be "liked" or "unliked".
289type MoreLikeThisQueryItem struct {
290	likeText string
291
292	index       string
293	typ         string
294	id          string
295	doc         interface{}
296	fields      []string
297	routing     string
298	fsc         *FetchSourceContext
299	version     int64
300	versionType string
301}
302
303// NewMoreLikeThisQueryItem creates and initializes a MoreLikeThisQueryItem.
304func NewMoreLikeThisQueryItem() *MoreLikeThisQueryItem {
305	return &MoreLikeThisQueryItem{
306		version: -1,
307	}
308}
309
310// LikeText represents a text to be "liked".
311func (item *MoreLikeThisQueryItem) LikeText(likeText string) *MoreLikeThisQueryItem {
312	item.likeText = likeText
313	return item
314}
315
316// Index represents the index of the item.
317func (item *MoreLikeThisQueryItem) Index(index string) *MoreLikeThisQueryItem {
318	item.index = index
319	return item
320}
321
322// Type represents the document type of the item.
323func (item *MoreLikeThisQueryItem) Type(typ string) *MoreLikeThisQueryItem {
324	item.typ = typ
325	return item
326}
327
328// Id represents the document id of the item.
329func (item *MoreLikeThisQueryItem) Id(id string) *MoreLikeThisQueryItem {
330	item.id = id
331	return item
332}
333
334// Doc represents a raw document template for the item.
335func (item *MoreLikeThisQueryItem) Doc(doc interface{}) *MoreLikeThisQueryItem {
336	item.doc = doc
337	return item
338}
339
340// Fields represents the list of fields of the item.
341func (item *MoreLikeThisQueryItem) Fields(fields ...string) *MoreLikeThisQueryItem {
342	item.fields = append(item.fields, fields...)
343	return item
344}
345
346// Routing sets the routing associated with the item.
347func (item *MoreLikeThisQueryItem) Routing(routing string) *MoreLikeThisQueryItem {
348	item.routing = routing
349	return item
350}
351
352// FetchSourceContext represents the fetch source of the item which controls
353// if and how _source should be returned.
354func (item *MoreLikeThisQueryItem) FetchSourceContext(fsc *FetchSourceContext) *MoreLikeThisQueryItem {
355	item.fsc = fsc
356	return item
357}
358
359// Version specifies the version of the item.
360func (item *MoreLikeThisQueryItem) Version(version int64) *MoreLikeThisQueryItem {
361	item.version = version
362	return item
363}
364
365// VersionType represents the version type of the item.
366func (item *MoreLikeThisQueryItem) VersionType(versionType string) *MoreLikeThisQueryItem {
367	item.versionType = versionType
368	return item
369}
370
371// Source returns the JSON-serializable fragment of the entity.
372func (item *MoreLikeThisQueryItem) Source() (interface{}, error) {
373	if item.likeText != "" {
374		return item.likeText, nil
375	}
376
377	source := make(map[string]interface{})
378
379	if item.index != "" {
380		source["_index"] = item.index
381	}
382	if item.typ != "" {
383		source["_type"] = item.typ
384	}
385	if item.id != "" {
386		source["_id"] = item.id
387	}
388	if item.doc != nil {
389		source["doc"] = item.doc
390	}
391	if len(item.fields) > 0 {
392		source["fields"] = item.fields
393	}
394	if item.routing != "" {
395		source["_routing"] = item.routing
396	}
397	if item.fsc != nil {
398		src, err := item.fsc.Source()
399		if err != nil {
400			return nil, err
401		}
402		source["_source"] = src
403	}
404	if item.version >= 0 {
405		source["_version"] = item.version
406	}
407	if item.versionType != "" {
408		source["_version_type"] = item.versionType
409	}
410
411	return source, nil
412}
413