1 // fts_matcher.cpp
2 
3 
4 /**
5  *    Copyright (C) 2018-present MongoDB, Inc.
6  *
7  *    This program is free software: you can redistribute it and/or modify
8  *    it under the terms of the Server Side Public License, version 1,
9  *    as published by MongoDB, Inc.
10  *
11  *    This program is distributed in the hope that it will be useful,
12  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *    Server Side Public License for more details.
15  *
16  *    You should have received a copy of the Server Side Public License
17  *    along with this program. If not, see
18  *    <http://www.mongodb.com/licensing/server-side-public-license>.
19  *
20  *    As a special exception, the copyright holders give permission to link the
21  *    code of portions of this program with the OpenSSL library under certain
22  *    conditions as described in each individual source file and distribute
23  *    linked combinations including the program with the OpenSSL library. You
24  *    must comply with the Server Side Public License in all respects for
25  *    all of the code used other than as permitted herein. If you modify file(s)
26  *    with this exception, you may extend this exception to your version of the
27  *    file(s), but you are not obligated to do so. If you do not wish to do so,
28  *    delete this exception statement from your version. If you delete this
29  *    exception statement from all source files in the program, then also delete
30  *    it in the license file.
31  */
32 
33 #include "mongo/platform/basic.h"
34 
35 #include "mongo/db/fts/fts_element_iterator.h"
36 #include "mongo/db/fts/fts_matcher.h"
37 #include "mongo/db/fts/fts_phrase_matcher.h"
38 #include "mongo/db/fts/fts_tokenizer.h"
39 
40 namespace mongo {
41 
42 namespace fts {
43 
44 using std::string;
45 
FTSMatcher(const FTSQueryImpl & query,const FTSSpec & spec)46 FTSMatcher::FTSMatcher(const FTSQueryImpl& query, const FTSSpec& spec)
47     : _query(query), _spec(spec) {}
48 
matches(const BSONObj & obj) const49 bool FTSMatcher::matches(const BSONObj& obj) const {
50     if (canSkipPositiveTermCheck()) {
51         // We can assume that 'obj' has at least one positive term, and dassert as a sanity
52         // check.
53         dassert(hasPositiveTerm(obj));
54     } else {
55         if (!hasPositiveTerm(obj)) {
56             return false;
57         }
58     }
59 
60     if (hasNegativeTerm(obj)) {
61         return false;
62     }
63 
64     if (!positivePhrasesMatch(obj)) {
65         return false;
66     }
67 
68     return negativePhrasesMatch(obj);
69 }
70 
hasPositiveTerm(const BSONObj & obj) const71 bool FTSMatcher::hasPositiveTerm(const BSONObj& obj) const {
72     FTSElementIterator it(_spec, obj);
73 
74     while (it.more()) {
75         FTSIteratorValue val = it.next();
76         if (_hasPositiveTerm_string(val._language, val._text)) {
77             return true;
78         }
79     }
80 
81     return false;
82 }
83 
_hasPositiveTerm_string(const FTSLanguage * language,const string & raw) const84 bool FTSMatcher::_hasPositiveTerm_string(const FTSLanguage* language, const string& raw) const {
85     std::unique_ptr<FTSTokenizer> tokenizer(language->createTokenizer());
86     tokenizer->reset(raw.c_str(), _getTokenizerOptions());
87 
88     while (tokenizer->moveNext()) {
89         string word = tokenizer->get().toString();
90         if (_query.getPositiveTerms().count(word) > 0) {
91             return true;
92         }
93     }
94     return false;
95 }
96 
hasNegativeTerm(const BSONObj & obj) const97 bool FTSMatcher::hasNegativeTerm(const BSONObj& obj) const {
98     if (_query.getNegatedTerms().size() == 0) {
99         return false;
100     }
101 
102     FTSElementIterator it(_spec, obj);
103 
104     while (it.more()) {
105         FTSIteratorValue val = it.next();
106         if (_hasNegativeTerm_string(val._language, val._text)) {
107             return true;
108         }
109     }
110 
111     return false;
112 }
113 
_hasNegativeTerm_string(const FTSLanguage * language,const string & raw) const114 bool FTSMatcher::_hasNegativeTerm_string(const FTSLanguage* language, const string& raw) const {
115     std::unique_ptr<FTSTokenizer> tokenizer(language->createTokenizer());
116     tokenizer->reset(raw.c_str(), _getTokenizerOptions());
117 
118     while (tokenizer->moveNext()) {
119         string word = tokenizer->get().toString();
120         if (_query.getNegatedTerms().count(word) > 0) {
121             return true;
122         }
123     }
124     return false;
125 }
126 
positivePhrasesMatch(const BSONObj & obj) const127 bool FTSMatcher::positivePhrasesMatch(const BSONObj& obj) const {
128     for (size_t i = 0; i < _query.getPositivePhr().size(); i++) {
129         if (!_phraseMatch(_query.getPositivePhr()[i], obj)) {
130             return false;
131         }
132     }
133 
134     return true;
135 }
136 
negativePhrasesMatch(const BSONObj & obj) const137 bool FTSMatcher::negativePhrasesMatch(const BSONObj& obj) const {
138     for (size_t i = 0; i < _query.getNegatedPhr().size(); i++) {
139         if (_phraseMatch(_query.getNegatedPhr()[i], obj)) {
140             return false;
141         }
142     }
143 
144     return true;
145 }
146 
_phraseMatch(const string & phrase,const BSONObj & obj) const147 bool FTSMatcher::_phraseMatch(const string& phrase, const BSONObj& obj) const {
148     FTSElementIterator it(_spec, obj);
149 
150     while (it.more()) {
151         FTSIteratorValue val = it.next();
152 
153         FTSPhraseMatcher::Options matcherOptions = FTSPhraseMatcher::kNone;
154 
155         if (_query.getCaseSensitive()) {
156             matcherOptions |= FTSPhraseMatcher::kCaseSensitive;
157         }
158         if (_query.getDiacriticSensitive()) {
159             matcherOptions |= FTSPhraseMatcher::kDiacriticSensitive;
160         }
161 
162         if (val._language->getPhraseMatcher().phraseMatches(phrase, val._text, matcherOptions)) {
163             return true;
164         }
165     }
166 
167     return false;
168 }
169 
_getTokenizerOptions() const170 FTSTokenizer::Options FTSMatcher::_getTokenizerOptions() const {
171     FTSTokenizer::Options tokenizerOptions = FTSTokenizer::kNone;
172 
173     if (_query.getCaseSensitive()) {
174         tokenizerOptions |= FTSTokenizer::kGenerateCaseSensitiveTokens;
175     }
176     if (_query.getDiacriticSensitive()) {
177         tokenizerOptions |= FTSTokenizer::kGenerateDiacriticSensitiveTokens;
178     }
179 
180     return tokenizerOptions;
181 }
182 }
183 }
184