1 // fts_matcher.cpp
2
3
4 /**
5 * Copyright (C) 2018-present MongoDB, Inc.
6 *
7 * This program is free software: you can redistribute it and/or modify
8 * it under the terms of the Server Side Public License, version 1,
9 * as published by MongoDB, Inc.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * Server Side Public License for more details.
15 *
16 * You should have received a copy of the Server Side Public License
17 * along with this program. If not, see
18 * <http://www.mongodb.com/licensing/server-side-public-license>.
19 *
20 * As a special exception, the copyright holders give permission to link the
21 * code of portions of this program with the OpenSSL library under certain
22 * conditions as described in each individual source file and distribute
23 * linked combinations including the program with the OpenSSL library. You
24 * must comply with the Server Side Public License in all respects for
25 * all of the code used other than as permitted herein. If you modify file(s)
26 * with this exception, you may extend this exception to your version of the
27 * file(s), but you are not obligated to do so. If you do not wish to do so,
28 * delete this exception statement from your version. If you delete this
29 * exception statement from all source files in the program, then also delete
30 * it in the license file.
31 */
32
33 #include "mongo/platform/basic.h"
34
35 #include "mongo/db/fts/fts_element_iterator.h"
36 #include "mongo/db/fts/fts_matcher.h"
37 #include "mongo/db/fts/fts_phrase_matcher.h"
38 #include "mongo/db/fts/fts_tokenizer.h"
39
40 namespace mongo {
41
42 namespace fts {
43
44 using std::string;
45
FTSMatcher(const FTSQueryImpl & query,const FTSSpec & spec)46 FTSMatcher::FTSMatcher(const FTSQueryImpl& query, const FTSSpec& spec)
47 : _query(query), _spec(spec) {}
48
matches(const BSONObj & obj) const49 bool FTSMatcher::matches(const BSONObj& obj) const {
50 if (canSkipPositiveTermCheck()) {
51 // We can assume that 'obj' has at least one positive term, and dassert as a sanity
52 // check.
53 dassert(hasPositiveTerm(obj));
54 } else {
55 if (!hasPositiveTerm(obj)) {
56 return false;
57 }
58 }
59
60 if (hasNegativeTerm(obj)) {
61 return false;
62 }
63
64 if (!positivePhrasesMatch(obj)) {
65 return false;
66 }
67
68 return negativePhrasesMatch(obj);
69 }
70
hasPositiveTerm(const BSONObj & obj) const71 bool FTSMatcher::hasPositiveTerm(const BSONObj& obj) const {
72 FTSElementIterator it(_spec, obj);
73
74 while (it.more()) {
75 FTSIteratorValue val = it.next();
76 if (_hasPositiveTerm_string(val._language, val._text)) {
77 return true;
78 }
79 }
80
81 return false;
82 }
83
_hasPositiveTerm_string(const FTSLanguage * language,const string & raw) const84 bool FTSMatcher::_hasPositiveTerm_string(const FTSLanguage* language, const string& raw) const {
85 std::unique_ptr<FTSTokenizer> tokenizer(language->createTokenizer());
86 tokenizer->reset(raw.c_str(), _getTokenizerOptions());
87
88 while (tokenizer->moveNext()) {
89 string word = tokenizer->get().toString();
90 if (_query.getPositiveTerms().count(word) > 0) {
91 return true;
92 }
93 }
94 return false;
95 }
96
hasNegativeTerm(const BSONObj & obj) const97 bool FTSMatcher::hasNegativeTerm(const BSONObj& obj) const {
98 if (_query.getNegatedTerms().size() == 0) {
99 return false;
100 }
101
102 FTSElementIterator it(_spec, obj);
103
104 while (it.more()) {
105 FTSIteratorValue val = it.next();
106 if (_hasNegativeTerm_string(val._language, val._text)) {
107 return true;
108 }
109 }
110
111 return false;
112 }
113
_hasNegativeTerm_string(const FTSLanguage * language,const string & raw) const114 bool FTSMatcher::_hasNegativeTerm_string(const FTSLanguage* language, const string& raw) const {
115 std::unique_ptr<FTSTokenizer> tokenizer(language->createTokenizer());
116 tokenizer->reset(raw.c_str(), _getTokenizerOptions());
117
118 while (tokenizer->moveNext()) {
119 string word = tokenizer->get().toString();
120 if (_query.getNegatedTerms().count(word) > 0) {
121 return true;
122 }
123 }
124 return false;
125 }
126
positivePhrasesMatch(const BSONObj & obj) const127 bool FTSMatcher::positivePhrasesMatch(const BSONObj& obj) const {
128 for (size_t i = 0; i < _query.getPositivePhr().size(); i++) {
129 if (!_phraseMatch(_query.getPositivePhr()[i], obj)) {
130 return false;
131 }
132 }
133
134 return true;
135 }
136
negativePhrasesMatch(const BSONObj & obj) const137 bool FTSMatcher::negativePhrasesMatch(const BSONObj& obj) const {
138 for (size_t i = 0; i < _query.getNegatedPhr().size(); i++) {
139 if (_phraseMatch(_query.getNegatedPhr()[i], obj)) {
140 return false;
141 }
142 }
143
144 return true;
145 }
146
_phraseMatch(const string & phrase,const BSONObj & obj) const147 bool FTSMatcher::_phraseMatch(const string& phrase, const BSONObj& obj) const {
148 FTSElementIterator it(_spec, obj);
149
150 while (it.more()) {
151 FTSIteratorValue val = it.next();
152
153 FTSPhraseMatcher::Options matcherOptions = FTSPhraseMatcher::kNone;
154
155 if (_query.getCaseSensitive()) {
156 matcherOptions |= FTSPhraseMatcher::kCaseSensitive;
157 }
158 if (_query.getDiacriticSensitive()) {
159 matcherOptions |= FTSPhraseMatcher::kDiacriticSensitive;
160 }
161
162 if (val._language->getPhraseMatcher().phraseMatches(phrase, val._text, matcherOptions)) {
163 return true;
164 }
165 }
166
167 return false;
168 }
169
_getTokenizerOptions() const170 FTSTokenizer::Options FTSMatcher::_getTokenizerOptions() const {
171 FTSTokenizer::Options tokenizerOptions = FTSTokenizer::kNone;
172
173 if (_query.getCaseSensitive()) {
174 tokenizerOptions |= FTSTokenizer::kGenerateCaseSensitiveTokens;
175 }
176 if (_query.getDiacriticSensitive()) {
177 tokenizerOptions |= FTSTokenizer::kGenerateDiacriticSensitiveTokens;
178 }
179
180 return tokenizerOptions;
181 }
182 }
183 }
184