1// Copyright 2012-present Oliver Eilhard. All rights reserved. 2// Use of this source code is governed by a MIT-license. 3// See http://olivere.mit-license.org/license.txt for details. 4 5package elastic 6 7import "errors" 8 9// MoreLikeThis query (MLT Query) finds documents that are "like" a given 10// set of documents. In order to do so, MLT selects a set of representative 11// terms of these input documents, forms a query using these terms, executes 12// the query and returns the results. The user controls the input documents, 13// how the terms should be selected and how the query is formed. 14// 15// For more details, see 16// https://www.elastic.co/guide/en/elasticsearch/reference/6.8/query-dsl-mlt-query.html 17type MoreLikeThisQuery struct { 18 fields []string 19 docs []*MoreLikeThisQueryItem 20 unlikeDocs []*MoreLikeThisQueryItem 21 include *bool 22 minimumShouldMatch string 23 minTermFreq *int 24 maxQueryTerms *int 25 stopWords []string 26 minDocFreq *int 27 maxDocFreq *int 28 minWordLength *int 29 maxWordLength *int 30 boostTerms *float64 31 boost *float64 32 analyzer string 33 failOnUnsupportedField *bool 34 queryName string 35} 36 37// NewMoreLikeThisQuery creates and initializes a new MoreLikeThisQuery. 38func NewMoreLikeThisQuery() *MoreLikeThisQuery { 39 return &MoreLikeThisQuery{ 40 fields: make([]string, 0), 41 stopWords: make([]string, 0), 42 docs: make([]*MoreLikeThisQueryItem, 0), 43 unlikeDocs: make([]*MoreLikeThisQueryItem, 0), 44 } 45} 46 47// Field adds one or more field names to the query. 48func (q *MoreLikeThisQuery) Field(fields ...string) *MoreLikeThisQuery { 49 q.fields = append(q.fields, fields...) 50 return q 51} 52 53// StopWord sets the stopwords. Any word in this set is considered 54// "uninteresting" and ignored. Even if your Analyzer allows stopwords, 55// you might want to tell the MoreLikeThis code to ignore them, as for 56// the purposes of document similarity it seems reasonable to assume that 57// "a stop word is never interesting". 58func (q *MoreLikeThisQuery) StopWord(stopWords ...string) *MoreLikeThisQuery { 59 q.stopWords = append(q.stopWords, stopWords...) 60 return q 61} 62 63// LikeText sets the text to use in order to find documents that are "like" this. 64func (q *MoreLikeThisQuery) LikeText(likeTexts ...string) *MoreLikeThisQuery { 65 for _, s := range likeTexts { 66 item := NewMoreLikeThisQueryItem().LikeText(s) 67 q.docs = append(q.docs, item) 68 } 69 return q 70} 71 72// LikeItems sets the documents to use in order to find documents that are "like" this. 73func (q *MoreLikeThisQuery) LikeItems(docs ...*MoreLikeThisQueryItem) *MoreLikeThisQuery { 74 q.docs = append(q.docs, docs...) 75 return q 76} 77 78// IgnoreLikeText sets the text from which the terms should not be selected from. 79func (q *MoreLikeThisQuery) IgnoreLikeText(ignoreLikeText ...string) *MoreLikeThisQuery { 80 for _, s := range ignoreLikeText { 81 item := NewMoreLikeThisQueryItem().LikeText(s) 82 q.unlikeDocs = append(q.unlikeDocs, item) 83 } 84 return q 85} 86 87// IgnoreLikeItems sets the documents from which the terms should not be selected from. 88func (q *MoreLikeThisQuery) IgnoreLikeItems(ignoreDocs ...*MoreLikeThisQueryItem) *MoreLikeThisQuery { 89 q.unlikeDocs = append(q.unlikeDocs, ignoreDocs...) 90 return q 91} 92 93// Ids sets the document ids to use in order to find documents that are "like" this. 94func (q *MoreLikeThisQuery) Ids(ids ...string) *MoreLikeThisQuery { 95 for _, id := range ids { 96 item := NewMoreLikeThisQueryItem().Id(id) 97 q.docs = append(q.docs, item) 98 } 99 return q 100} 101 102// Include specifies whether the input documents should also be included 103// in the results returned. Defaults to false. 104func (q *MoreLikeThisQuery) Include(include bool) *MoreLikeThisQuery { 105 q.include = &include 106 return q 107} 108 109// MinimumShouldMatch sets the number of terms that must match the generated 110// query expressed in the common syntax for minimum should match. 111// The default value is "30%". 112// 113// This used to be "PercentTermsToMatch" in Elasticsearch versions before 2.0. 114func (q *MoreLikeThisQuery) MinimumShouldMatch(minimumShouldMatch string) *MoreLikeThisQuery { 115 q.minimumShouldMatch = minimumShouldMatch 116 return q 117} 118 119// MinTermFreq is the frequency below which terms will be ignored in the 120// source doc. The default frequency is 2. 121func (q *MoreLikeThisQuery) MinTermFreq(minTermFreq int) *MoreLikeThisQuery { 122 q.minTermFreq = &minTermFreq 123 return q 124} 125 126// MaxQueryTerms sets the maximum number of query terms that will be included 127// in any generated query. It defaults to 25. 128func (q *MoreLikeThisQuery) MaxQueryTerms(maxQueryTerms int) *MoreLikeThisQuery { 129 q.maxQueryTerms = &maxQueryTerms 130 return q 131} 132 133// MinDocFreq sets the frequency at which words will be ignored which do 134// not occur in at least this many docs. The default is 5. 135func (q *MoreLikeThisQuery) MinDocFreq(minDocFreq int) *MoreLikeThisQuery { 136 q.minDocFreq = &minDocFreq 137 return q 138} 139 140// MaxDocFreq sets the maximum frequency for which words may still appear. 141// Words that appear in more than this many docs will be ignored. 142// It defaults to unbounded. 143func (q *MoreLikeThisQuery) MaxDocFreq(maxDocFreq int) *MoreLikeThisQuery { 144 q.maxDocFreq = &maxDocFreq 145 return q 146} 147 148// MinWordLength sets the minimum word length below which words will be 149// ignored. It defaults to 0. 150func (q *MoreLikeThisQuery) MinWordLength(minWordLength int) *MoreLikeThisQuery { 151 q.minWordLength = &minWordLength 152 return q 153} 154 155// MaxWordLength sets the maximum word length above which words will be ignored. 156// Defaults to unbounded (0). 157func (q *MoreLikeThisQuery) MaxWordLength(maxWordLength int) *MoreLikeThisQuery { 158 q.maxWordLength = &maxWordLength 159 return q 160} 161 162// BoostTerms sets the boost factor to use when boosting terms. 163// It defaults to 1. 164func (q *MoreLikeThisQuery) BoostTerms(boostTerms float64) *MoreLikeThisQuery { 165 q.boostTerms = &boostTerms 166 return q 167} 168 169// Analyzer specifies the analyzer that will be use to analyze the text. 170// Defaults to the analyzer associated with the field. 171func (q *MoreLikeThisQuery) Analyzer(analyzer string) *MoreLikeThisQuery { 172 q.analyzer = analyzer 173 return q 174} 175 176// Boost sets the boost for this query. 177func (q *MoreLikeThisQuery) Boost(boost float64) *MoreLikeThisQuery { 178 q.boost = &boost 179 return q 180} 181 182// FailOnUnsupportedField indicates whether to fail or return no result 183// when this query is run against a field which is not supported such as 184// a binary/numeric field. 185func (q *MoreLikeThisQuery) FailOnUnsupportedField(fail bool) *MoreLikeThisQuery { 186 q.failOnUnsupportedField = &fail 187 return q 188} 189 190// QueryName sets the query name for the filter that can be used when 191// searching for matched_filters per hit. 192func (q *MoreLikeThisQuery) QueryName(queryName string) *MoreLikeThisQuery { 193 q.queryName = queryName 194 return q 195} 196 197// Source creates the source for the MLT query. 198// It may return an error if the caller forgot to specify any documents to 199// be "liked" in the MoreLikeThisQuery. 200func (q *MoreLikeThisQuery) Source() (interface{}, error) { 201 // { 202 // "match_all" : { ... } 203 // } 204 if len(q.docs) == 0 { 205 return nil, errors.New(`more_like_this requires some documents to be "liked"`) 206 } 207 208 source := make(map[string]interface{}) 209 210 params := make(map[string]interface{}) 211 source["more_like_this"] = params 212 213 if len(q.fields) > 0 { 214 params["fields"] = q.fields 215 } 216 217 var likes []interface{} 218 for _, doc := range q.docs { 219 src, err := doc.Source() 220 if err != nil { 221 return nil, err 222 } 223 likes = append(likes, src) 224 } 225 params["like"] = likes 226 227 if len(q.unlikeDocs) > 0 { 228 var dontLikes []interface{} 229 for _, doc := range q.unlikeDocs { 230 src, err := doc.Source() 231 if err != nil { 232 return nil, err 233 } 234 dontLikes = append(dontLikes, src) 235 } 236 params["unlike"] = dontLikes 237 } 238 239 if q.minimumShouldMatch != "" { 240 params["minimum_should_match"] = q.minimumShouldMatch 241 } 242 if q.minTermFreq != nil { 243 params["min_term_freq"] = *q.minTermFreq 244 } 245 if q.maxQueryTerms != nil { 246 params["max_query_terms"] = *q.maxQueryTerms 247 } 248 if len(q.stopWords) > 0 { 249 params["stop_words"] = q.stopWords 250 } 251 if q.minDocFreq != nil { 252 params["min_doc_freq"] = *q.minDocFreq 253 } 254 if q.maxDocFreq != nil { 255 params["max_doc_freq"] = *q.maxDocFreq 256 } 257 if q.minWordLength != nil { 258 params["min_word_length"] = *q.minWordLength 259 } 260 if q.maxWordLength != nil { 261 params["max_word_length"] = *q.maxWordLength 262 } 263 if q.boostTerms != nil { 264 params["boost_terms"] = *q.boostTerms 265 } 266 if q.boost != nil { 267 params["boost"] = *q.boost 268 } 269 if q.analyzer != "" { 270 params["analyzer"] = q.analyzer 271 } 272 if q.failOnUnsupportedField != nil { 273 params["fail_on_unsupported_field"] = *q.failOnUnsupportedField 274 } 275 if q.queryName != "" { 276 params["_name"] = q.queryName 277 } 278 if q.include != nil { 279 params["include"] = *q.include 280 } 281 282 return source, nil 283} 284 285// -- MoreLikeThisQueryItem -- 286 287// MoreLikeThisQueryItem represents a single item of a MoreLikeThisQuery 288// to be "liked" or "unliked". 289type MoreLikeThisQueryItem struct { 290 likeText string 291 292 index string 293 typ string 294 id string 295 doc interface{} 296 fields []string 297 routing string 298 fsc *FetchSourceContext 299 version int64 300 versionType string 301} 302 303// NewMoreLikeThisQueryItem creates and initializes a MoreLikeThisQueryItem. 304func NewMoreLikeThisQueryItem() *MoreLikeThisQueryItem { 305 return &MoreLikeThisQueryItem{ 306 version: -1, 307 } 308} 309 310// LikeText represents a text to be "liked". 311func (item *MoreLikeThisQueryItem) LikeText(likeText string) *MoreLikeThisQueryItem { 312 item.likeText = likeText 313 return item 314} 315 316// Index represents the index of the item. 317func (item *MoreLikeThisQueryItem) Index(index string) *MoreLikeThisQueryItem { 318 item.index = index 319 return item 320} 321 322// Type represents the document type of the item. 323func (item *MoreLikeThisQueryItem) Type(typ string) *MoreLikeThisQueryItem { 324 item.typ = typ 325 return item 326} 327 328// Id represents the document id of the item. 329func (item *MoreLikeThisQueryItem) Id(id string) *MoreLikeThisQueryItem { 330 item.id = id 331 return item 332} 333 334// Doc represents a raw document template for the item. 335func (item *MoreLikeThisQueryItem) Doc(doc interface{}) *MoreLikeThisQueryItem { 336 item.doc = doc 337 return item 338} 339 340// Fields represents the list of fields of the item. 341func (item *MoreLikeThisQueryItem) Fields(fields ...string) *MoreLikeThisQueryItem { 342 item.fields = append(item.fields, fields...) 343 return item 344} 345 346// Routing sets the routing associated with the item. 347func (item *MoreLikeThisQueryItem) Routing(routing string) *MoreLikeThisQueryItem { 348 item.routing = routing 349 return item 350} 351 352// FetchSourceContext represents the fetch source of the item which controls 353// if and how _source should be returned. 354func (item *MoreLikeThisQueryItem) FetchSourceContext(fsc *FetchSourceContext) *MoreLikeThisQueryItem { 355 item.fsc = fsc 356 return item 357} 358 359// Version specifies the version of the item. 360func (item *MoreLikeThisQueryItem) Version(version int64) *MoreLikeThisQueryItem { 361 item.version = version 362 return item 363} 364 365// VersionType represents the version type of the item. 366func (item *MoreLikeThisQueryItem) VersionType(versionType string) *MoreLikeThisQueryItem { 367 item.versionType = versionType 368 return item 369} 370 371// Source returns the JSON-serializable fragment of the entity. 372func (item *MoreLikeThisQueryItem) Source() (interface{}, error) { 373 if item.likeText != "" { 374 return item.likeText, nil 375 } 376 377 source := make(map[string]interface{}) 378 379 if item.index != "" { 380 source["_index"] = item.index 381 } 382 if item.typ != "" { 383 source["_type"] = item.typ 384 } 385 if item.id != "" { 386 source["_id"] = item.id 387 } 388 if item.doc != nil { 389 source["doc"] = item.doc 390 } 391 if len(item.fields) > 0 { 392 source["fields"] = item.fields 393 } 394 if item.routing != "" { 395 source["_routing"] = item.routing 396 } 397 if item.fsc != nil { 398 src, err := item.fsc.Source() 399 if err != nil { 400 return nil, err 401 } 402 source["_source"] = src 403 } 404 if item.version >= 0 { 405 source["_version"] = item.version 406 } 407 if item.versionType != "" { 408 source["_version_type"] = item.versionType 409 } 410 411 return source, nil 412} 413