1 // expression_parser.cpp
2
3
4 /**
5 * Copyright (C) 2018-present MongoDB, Inc.
6 *
7 * This program is free software: you can redistribute it and/or modify
8 * it under the terms of the Server Side Public License, version 1,
9 * as published by MongoDB, Inc.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * Server Side Public License for more details.
15 *
16 * You should have received a copy of the Server Side Public License
17 * along with this program. If not, see
18 * <http://www.mongodb.com/licensing/server-side-public-license>.
19 *
20 * As a special exception, the copyright holders give permission to link the
21 * code of portions of this program with the OpenSSL library under certain
22 * conditions as described in each individual source file and distribute
23 * linked combinations including the program with the OpenSSL library. You
24 * must comply with the Server Side Public License in all respects for
25 * all of the code used other than as permitted herein. If you modify file(s)
26 * with this exception, you may extend this exception to your version of the
27 * file(s), but you are not obligated to do so. If you do not wish to do so,
28 * delete this exception statement from your version. If you delete this
29 * exception statement from all source files in the program, then also delete
30 * it in the license file.
31 */
32
33 #include "mongo/platform/basic.h"
34
35 #include "mongo/db/matcher/expression_parser.h"
36
37 #include <boost/container/flat_set.hpp>
38 #include <pcrecpp.h>
39
40 #include "mongo/base/init.h"
41 #include "mongo/bson/bsonmisc.h"
42 #include "mongo/bson/bsonobj.h"
43 #include "mongo/bson/bsonobjbuilder.h"
44 #include "mongo/db/matcher/expression_always_boolean.h"
45 #include "mongo/db/matcher/expression_array.h"
46 #include "mongo/db/matcher/expression_expr.h"
47 #include "mongo/db/matcher/expression_geo.h"
48 #include "mongo/db/matcher/expression_internal_expr_eq.h"
49 #include "mongo/db/matcher/expression_leaf.h"
50 #include "mongo/db/matcher/expression_tree.h"
51 #include "mongo/db/matcher/expression_type.h"
52 #include "mongo/db/matcher/expression_with_placeholder.h"
53 #include "mongo/db/matcher/schema/expression_internal_schema_all_elem_match_from_index.h"
54 #include "mongo/db/matcher/schema/expression_internal_schema_allowed_properties.h"
55 #include "mongo/db/matcher/schema/expression_internal_schema_cond.h"
56 #include "mongo/db/matcher/schema/expression_internal_schema_eq.h"
57 #include "mongo/db/matcher/schema/expression_internal_schema_fmod.h"
58 #include "mongo/db/matcher/schema/expression_internal_schema_match_array_index.h"
59 #include "mongo/db/matcher/schema/expression_internal_schema_max_items.h"
60 #include "mongo/db/matcher/schema/expression_internal_schema_max_length.h"
61 #include "mongo/db/matcher/schema/expression_internal_schema_max_properties.h"
62 #include "mongo/db/matcher/schema/expression_internal_schema_min_items.h"
63 #include "mongo/db/matcher/schema/expression_internal_schema_min_length.h"
64 #include "mongo/db/matcher/schema/expression_internal_schema_min_properties.h"
65 #include "mongo/db/matcher/schema/expression_internal_schema_object_match.h"
66 #include "mongo/db/matcher/schema/expression_internal_schema_root_doc_eq.h"
67 #include "mongo/db/matcher/schema/expression_internal_schema_unique_items.h"
68 #include "mongo/db/matcher/schema/expression_internal_schema_xor.h"
69 #include "mongo/db/matcher/schema/json_schema_parser.h"
70 #include "mongo/db/namespace_string.h"
71 #include "mongo/db/query/query_knobs.h"
72 #include "mongo/stdx/memory.h"
73 #include "mongo/util/mongoutils/str.h"
74 #include "mongo/util/string_map.h"
75
76 namespace {
77
78 using namespace mongo;
79
80 /**
81 * Returns true if subtree contains MatchExpression 'type'.
82 */
hasNode(const MatchExpression * root,MatchExpression::MatchType type)83 bool hasNode(const MatchExpression* root, MatchExpression::MatchType type) {
84 if (type == root->matchType()) {
85 return true;
86 }
87 for (size_t i = 0; i < root->numChildren(); ++i) {
88 if (hasNode(root->getChild(i), type)) {
89 return true;
90 }
91 }
92 return false;
93 }
94
95 } // namespace
96
97 namespace mongo {
98
99 constexpr StringData AlwaysFalseMatchExpression::kName;
100 constexpr StringData AlwaysTrueMatchExpression::kName;
101 constexpr StringData OrMatchExpression::kName;
102 constexpr StringData AndMatchExpression::kName;
103 constexpr StringData NorMatchExpression::kName;
104
105 const double MatchExpressionParser::kLongLongMaxPlusOneAsDouble =
106 scalbn(1, std::numeric_limits<long long>::digits);
107
108 /**
109 * 'DocumentParseLevel' refers to the current position of the parser as it descends a
110 * MatchExpression tree.
111 */
112 enum class DocumentParseLevel {
113 // Indicates that the parser is looking at the root level of the BSON object containing the
114 // user's query predicate.
115 kPredicateTopLevel,
116 // Indicates that match expression nodes in this position will match against the complete
117 // user document, as opposed to matching against a nested document or a subdocument inside
118 // an array.
119 kUserDocumentTopLevel,
120 // Indicates that match expression nodes in this position will match against a nested
121 // document or a subdocument inside an array.
122 kUserSubDocument,
123 };
124
parseIntegerElementToNonNegativeLong(BSONElement elem)125 StatusWith<long long> MatchExpressionParser::parseIntegerElementToNonNegativeLong(
126 BSONElement elem) {
127 auto number = parseIntegerElementToLong(elem);
128 if (!number.isOK()) {
129 return number;
130 }
131
132 if (number.getValue() < 0) {
133 return Status(ErrorCodes::FailedToParse,
134 str::stream() << "Expected a positive number in: " << elem);
135 }
136
137 return number;
138 }
139
parseIntegerElementToLong(BSONElement elem)140 StatusWith<long long> MatchExpressionParser::parseIntegerElementToLong(BSONElement elem) {
141 if (!elem.isNumber()) {
142 return Status(ErrorCodes::FailedToParse, str::stream() << "Expected a number in: " << elem);
143 }
144
145 long long number = 0;
146 if (elem.type() == BSONType::NumberDouble) {
147 auto eDouble = elem.numberDouble();
148
149 // NaN doubles are rejected.
150 if (std::isnan(eDouble)) {
151 return Status(ErrorCodes::FailedToParse,
152 str::stream() << "Expected an integer, but found NaN in: " << elem);
153 }
154
155 // No integral doubles that are too large to be represented as a 64 bit signed integer.
156 // We use 'kLongLongMaxAsDouble' because if we just did eDouble > 2^63-1, it would be
157 // compared against 2^63. eDouble=2^63 would not get caught that way.
158 if (eDouble >= MatchExpressionParser::kLongLongMaxPlusOneAsDouble ||
159 eDouble < std::numeric_limits<long long>::min()) {
160 return Status(ErrorCodes::FailedToParse,
161 str::stream() << "Cannot represent as a 64-bit integer: " << elem);
162 }
163
164 // This checks if elem is an integral double.
165 if (eDouble != static_cast<double>(static_cast<long long>(eDouble))) {
166 return Status(ErrorCodes::FailedToParse,
167 str::stream() << "Expected an integer: " << elem);
168 }
169
170 number = elem.numberLong();
171 } else if (elem.type() == BSONType::NumberDecimal) {
172 uint32_t signalingFlags = Decimal128::kNoFlag;
173 number = elem.numberDecimal().toLongExact(&signalingFlags);
174 if (signalingFlags != Decimal128::kNoFlag) {
175 return Status(ErrorCodes::FailedToParse,
176 str::stream() << "Cannot represent as a 64-bit integer: " << elem);
177 }
178 } else {
179 number = elem.numberLong();
180 }
181
182 return number;
183 }
184
parseIntegerElementToInt(BSONElement elem)185 StatusWith<int> MatchExpressionParser::parseIntegerElementToInt(BSONElement elem) {
186 auto parsedLong = MatchExpressionParser::parseIntegerElementToLong(elem);
187 if (!parsedLong.isOK()) {
188 return parsedLong.getStatus();
189 }
190
191 auto valueLong = parsedLong.getValue();
192 if (valueLong < std::numeric_limits<int>::min() ||
193 valueLong > std::numeric_limits<int>::max()) {
194 return {ErrorCodes::FailedToParse,
195 str::stream() << "Cannot represent " << elem << " in an int"};
196 }
197 return static_cast<int>(valueLong);
198 }
199
200 namespace {
201
202 // Forward declarations.
203
204 Status parseSub(StringData name,
205 const BSONObj& sub,
206 AndMatchExpression* root,
207 const boost::intrusive_ptr<ExpressionContext>& expCtx,
208 const ExtensionsCallback* extensionsCallback,
209 MatchExpressionParser::AllowedFeatureSet allowedFeatures,
210 DocumentParseLevel currentLevel);
211
212 stdx::function<StatusWithMatchExpression(StringData,
213 BSONElement,
214 const boost::intrusive_ptr<ExpressionContext>&,
215 const ExtensionsCallback*,
216 MatchExpressionParser::AllowedFeatureSet,
217 DocumentParseLevel)>
218 retrievePathlessParser(StringData name);
219
parseRegexElement(StringData name,BSONElement e)220 StatusWithMatchExpression parseRegexElement(StringData name, BSONElement e) {
221 if (e.type() != BSONType::RegEx)
222 return {Status(ErrorCodes::BadValue, "not a regex")};
223
224 auto temp = stdx::make_unique<RegexMatchExpression>();
225 auto s = temp->init(name, e.regex(), e.regexFlags());
226 if (!s.isOK())
227 return s;
228 return {std::move(temp)};
229 }
230
parseComparison(StringData name,ComparisonMatchExpression * cmp,BSONElement e,const boost::intrusive_ptr<ExpressionContext> & expCtx,MatchExpressionParser::AllowedFeatureSet allowedFeatures)231 StatusWithMatchExpression parseComparison(
232 StringData name,
233 ComparisonMatchExpression* cmp,
234 BSONElement e,
235 const boost::intrusive_ptr<ExpressionContext>& expCtx,
236 MatchExpressionParser::AllowedFeatureSet allowedFeatures) {
237 std::unique_ptr<ComparisonMatchExpression> temp(cmp);
238
239 // Non-equality comparison match expressions cannot have a regular expression as the argument.
240 // (e.g. {a: {$gt: /b/}} is illegal).
241 if (MatchExpression::EQ != cmp->matchType() && BSONType::RegEx == e.type()) {
242 return {Status(ErrorCodes::BadValue,
243 str::stream() << "Can't have RegEx as arg to predicate over field '" << name
244 << "'.")};
245 }
246
247 auto s = temp->init(name, e);
248 if (!s.isOK()) {
249 return s;
250 }
251
252 temp->setCollator(expCtx->getCollator());
253
254 return {std::move(temp)};
255 }
256
257 /**
258 * DBRef fields are ordered in the collection. In the query, we consider an embedded object a query
259 * on a DBRef as long as it contains $ref and $id.
260 * Required fields: $ref and $id (if incomplete DBRefs are not allowed).
261 *
262 * If incomplete DBRefs are allowed, we accept the BSON object as long as it contains $ref, $id or
263 * $db.
264 *
265 * Field names are checked but not field types.
266 *
267 * { $ref: "s", $id: "x" } = true
268 * { $ref : "s" } = true (if incomplete DBRef is allowed)
269 * { $id : "x" } = true (if incomplete DBRef is allowed)
270 * { $db : "x" } = true (if incomplete DBRef is allowed)
271 */
isDBRefDocument(const BSONObj & obj,bool allowIncompleteDBRef)272 bool isDBRefDocument(const BSONObj& obj, bool allowIncompleteDBRef) {
273 bool hasRef = false;
274 bool hasID = false;
275 bool hasDB = false;
276
277 BSONObjIterator i(obj);
278 while (i.more() && !(hasRef && hasID)) {
279 auto element = i.next();
280 auto fieldName = element.fieldNameStringData();
281 // $ref
282 if (!hasRef && "$ref"_sd == fieldName) {
283 hasRef = true;
284 }
285 // $id
286 else if (!hasID && "$id"_sd == fieldName) {
287 hasID = true;
288 }
289 // $db
290 else if (!hasDB && "$db"_sd == fieldName) {
291 hasDB = true;
292 }
293 }
294
295 if (allowIncompleteDBRef) {
296 return hasRef || hasID || hasDB;
297 }
298
299 return hasRef && hasID;
300 }
301
302 /**
303 * 5 = false
304 * { a : 5 } = false
305 * { $lt : 5 } = true
306 * { $ref: "s", $id: "x" } = false
307 * { $ref: "s", $id: "x", $db: "mydb" } = false
308 * { $ref : "s" } = false (if incomplete DBRef is allowed)
309 * { $id : "x" } = false (if incomplete DBRef is allowed)
310 * { $db : "mydb" } = false (if incomplete DBRef is allowed)
311 */
isExpressionDocument(BSONElement e,bool allowIncompleteDBRef)312 bool isExpressionDocument(BSONElement e, bool allowIncompleteDBRef) {
313 if (e.type() != BSONType::Object)
314 return false;
315
316 auto o = e.Obj();
317 if (o.isEmpty())
318 return false;
319
320 auto name = o.firstElement().fieldNameStringData();
321 if (name[0] != '$')
322 return false;
323
324 if (isDBRefDocument(o, allowIncompleteDBRef)) {
325 return false;
326 }
327
328 return true;
329 }
330
331 /**
332 * Parse 'obj' and return either a MatchExpression or an error.
333 */
parse(const BSONObj & obj,const boost::intrusive_ptr<ExpressionContext> & expCtx,const ExtensionsCallback * extensionsCallback,MatchExpressionParser::AllowedFeatureSet allowedFeatures,DocumentParseLevel currentLevel)334 StatusWithMatchExpression parse(const BSONObj& obj,
335 const boost::intrusive_ptr<ExpressionContext>& expCtx,
336 const ExtensionsCallback* extensionsCallback,
337 MatchExpressionParser::AllowedFeatureSet allowedFeatures,
338 DocumentParseLevel currentLevel) {
339 auto root = stdx::make_unique<AndMatchExpression>();
340
341 const DocumentParseLevel nextLevel = (currentLevel == DocumentParseLevel::kPredicateTopLevel)
342 ? DocumentParseLevel::kUserDocumentTopLevel
343 : currentLevel;
344
345 for (auto e : obj) {
346 if (e.fieldName()[0] == '$') {
347 auto name = e.fieldNameStringData().substr(1);
348 auto parseExpressionMatchFunction = retrievePathlessParser(name);
349
350 if (!parseExpressionMatchFunction) {
351 return {Status(ErrorCodes::BadValue,
352 str::stream() << "unknown top level operator: "
353 << e.fieldNameStringData())};
354 }
355
356 auto parsedExpression = parseExpressionMatchFunction(
357 name, e, expCtx, extensionsCallback, allowedFeatures, currentLevel);
358
359 if (!parsedExpression.isOK()) {
360 return parsedExpression;
361 }
362
363 // A nullptr for 'parsedExpression' indicates that the particular operator should not
364 // be added to 'root', because it is handled outside of the MatchExpressionParser
365 // library. The following operators currently follow this convention:
366 // - $atomic is explicitly handled in CanonicalQuery::init()
367 // - $comment has no action associated with the operator.
368 // - $isolated is explicitly handled in CanoncialQuery::init()
369 if (parsedExpression.getValue().get()) {
370 root->add(parsedExpression.getValue().release());
371 }
372
373 continue;
374 }
375
376 if (isExpressionDocument(e, false)) {
377 auto s = parseSub(e.fieldNameStringData(),
378 e.Obj(),
379 root.get(),
380 expCtx,
381 extensionsCallback,
382 allowedFeatures,
383 nextLevel);
384 if (!s.isOK())
385 return s;
386 continue;
387 }
388
389 if (e.type() == BSONType::RegEx) {
390 auto result = parseRegexElement(e.fieldNameStringData(), e);
391 if (!result.isOK())
392 return result;
393 root->add(result.getValue().release());
394 continue;
395 }
396
397 auto eq = parseComparison(
398 e.fieldNameStringData(), new EqualityMatchExpression(), e, expCtx, allowedFeatures);
399 if (!eq.isOK())
400 return eq;
401
402 root->add(eq.getValue().release());
403 }
404
405 if (root->numChildren() == 1) {
406 std::unique_ptr<MatchExpression> real(root->getChild(0));
407 root->clearAndRelease();
408 return {std::move(real)};
409 }
410
411 return {std::move(root)};
412 }
413
parseAtomicOrIsolated(StringData name,BSONElement elem,const boost::intrusive_ptr<ExpressionContext> & expCtx,const ExtensionsCallback * extensionsCallback,MatchExpressionParser::AllowedFeatureSet allowedFeatures,DocumentParseLevel currentLevel)414 StatusWithMatchExpression parseAtomicOrIsolated(
415 StringData name,
416 BSONElement elem,
417 const boost::intrusive_ptr<ExpressionContext>& expCtx,
418 const ExtensionsCallback* extensionsCallback,
419 MatchExpressionParser::AllowedFeatureSet allowedFeatures,
420 DocumentParseLevel currentLevel) {
421 if ((allowedFeatures & MatchExpressionParser::AllowedFeatures::kIsolated) == 0u) {
422 return {Status(ErrorCodes::QueryFeatureNotAllowed,
423 "$isolated ($atomic) is not allowed in this context")};
424 }
425 if (currentLevel != DocumentParseLevel::kPredicateTopLevel) {
426 return {
427 Status(ErrorCodes::FailedToParse, "$isolated ($atomic) has to be at the top level")};
428 }
429 return {nullptr};
430 }
431
parseComment(StringData name,BSONElement elem,const boost::intrusive_ptr<ExpressionContext> & expCtx,const ExtensionsCallback * extensionsCallback,MatchExpressionParser::AllowedFeatureSet allowedFeatures,DocumentParseLevel currentLevel)432 StatusWithMatchExpression parseComment(StringData name,
433 BSONElement elem,
434 const boost::intrusive_ptr<ExpressionContext>& expCtx,
435 const ExtensionsCallback* extensionsCallback,
436 MatchExpressionParser::AllowedFeatureSet allowedFeatures,
437 DocumentParseLevel currentLevel) {
438 return {nullptr};
439 }
440
parseWhere(StringData name,BSONElement elem,const boost::intrusive_ptr<ExpressionContext> & expCtx,const ExtensionsCallback * extensionsCallback,MatchExpressionParser::AllowedFeatureSet allowedFeatures,DocumentParseLevel currentLevel)441 StatusWithMatchExpression parseWhere(StringData name,
442 BSONElement elem,
443 const boost::intrusive_ptr<ExpressionContext>& expCtx,
444 const ExtensionsCallback* extensionsCallback,
445 MatchExpressionParser::AllowedFeatureSet allowedFeatures,
446 DocumentParseLevel currentLevel) {
447 if ((allowedFeatures & MatchExpressionParser::AllowedFeatures::kJavascript) == 0u) {
448 return {Status(ErrorCodes::BadValue, "$where is not allowed in this context")};
449 }
450
451 return extensionsCallback->parseWhere(elem);
452 }
453
parseText(StringData name,BSONElement elem,const boost::intrusive_ptr<ExpressionContext> & expCtx,const ExtensionsCallback * extensionsCallback,MatchExpressionParser::AllowedFeatureSet allowedFeatures,DocumentParseLevel currentLevel)454 StatusWithMatchExpression parseText(StringData name,
455 BSONElement elem,
456 const boost::intrusive_ptr<ExpressionContext>& expCtx,
457 const ExtensionsCallback* extensionsCallback,
458 MatchExpressionParser::AllowedFeatureSet allowedFeatures,
459 DocumentParseLevel currentLevel) {
460 if (currentLevel == DocumentParseLevel::kUserSubDocument) {
461 return {
462 Status(ErrorCodes::BadValue, "$text can only be applied to the top-level document")};
463 }
464
465 if ((allowedFeatures & MatchExpressionParser::AllowedFeatures::kText) == 0u) {
466 return {Status(ErrorCodes::BadValue, "$text is not allowed in this context")};
467 }
468
469 return extensionsCallback->parseText(elem);
470 }
471
parseDBRef(StringData name,BSONElement elem,const boost::intrusive_ptr<ExpressionContext> & expCtx,const ExtensionsCallback * extensionsCallback,MatchExpressionParser::AllowedFeatureSet allowedFeatures,DocumentParseLevel currentLevel)472 StatusWithMatchExpression parseDBRef(StringData name,
473 BSONElement elem,
474 const boost::intrusive_ptr<ExpressionContext>& expCtx,
475 const ExtensionsCallback* extensionsCallback,
476 MatchExpressionParser::AllowedFeatureSet allowedFeatures,
477 DocumentParseLevel currentLevel) {
478 auto eq = stdx::make_unique<EqualityMatchExpression>();
479 auto s = eq->init(elem.fieldName(), elem);
480 if (!s.isOK()) {
481 return s;
482 }
483 // 'id' is collation-aware. 'ref' and 'db' are compared using binary comparison.
484 eq->setCollator("id"_sd == name ? expCtx->getCollator() : nullptr);
485
486 return {std::move(eq)};
487 }
488
parseJSONSchema(StringData name,BSONElement elem,const boost::intrusive_ptr<ExpressionContext> & expCtx,const ExtensionsCallback * extensionsCallback,MatchExpressionParser::AllowedFeatureSet allowedFeatures,DocumentParseLevel currentLevel)489 StatusWithMatchExpression parseJSONSchema(StringData name,
490 BSONElement elem,
491 const boost::intrusive_ptr<ExpressionContext>& expCtx,
492 const ExtensionsCallback* extensionsCallback,
493 MatchExpressionParser::AllowedFeatureSet allowedFeatures,
494 DocumentParseLevel currentLevel) {
495 if ((allowedFeatures & MatchExpressionParser::AllowedFeatures::kJSONSchema) == 0u) {
496 return Status(ErrorCodes::QueryFeatureNotAllowed,
497 "$jsonSchema is not allowed in this context");
498 }
499
500 if (elem.type() != BSONType::Object) {
501 return {Status(ErrorCodes::TypeMismatch, "$jsonSchema must be an object")};
502 }
503
504 return JSONSchemaParser::parse(elem.Obj(), internalQueryIgnoreUnknownJSONSchemaKeywords.load());
505 }
506
507 template <class T>
parseAlwaysBoolean(StringData name,BSONElement elem,const boost::intrusive_ptr<ExpressionContext> & expCtx,const ExtensionsCallback * extensionsCallback,MatchExpressionParser::AllowedFeatureSet allowedFeatures,DocumentParseLevel currentLevel)508 StatusWithMatchExpression parseAlwaysBoolean(
509 StringData name,
510 BSONElement elem,
511 const boost::intrusive_ptr<ExpressionContext>& expCtx,
512 const ExtensionsCallback* extensionsCallback,
513 MatchExpressionParser::AllowedFeatureSet allowedFeatures,
514 DocumentParseLevel currentLevel) {
515 auto statusWithLong = MatchExpressionParser::parseIntegerElementToLong(elem);
516 if (!statusWithLong.isOK()) {
517 return statusWithLong.getStatus();
518 }
519
520 if (statusWithLong.getValue() != 1) {
521 return {Status(ErrorCodes::FailedToParse,
522 str::stream() << T::kName << " must be an integer value of 1")};
523 }
524
525 return {stdx::make_unique<T>()};
526 }
527
parseExpr(StringData name,BSONElement elem,const boost::intrusive_ptr<ExpressionContext> & expCtx,const ExtensionsCallback * extensionsCallback,MatchExpressionParser::AllowedFeatureSet allowedFeatures,DocumentParseLevel currentLevel)528 StatusWithMatchExpression parseExpr(StringData name,
529 BSONElement elem,
530 const boost::intrusive_ptr<ExpressionContext>& expCtx,
531 const ExtensionsCallback* extensionsCallback,
532 MatchExpressionParser::AllowedFeatureSet allowedFeatures,
533 DocumentParseLevel currentLevel) {
534 if (currentLevel == DocumentParseLevel::kUserSubDocument) {
535 return {
536 Status(ErrorCodes::BadValue, "$expr can only be applied to the top-level document")};
537 }
538
539 if ((allowedFeatures & MatchExpressionParser::AllowedFeatures::kExpr) == 0u) {
540 return {Status(ErrorCodes::QueryFeatureNotAllowed, "$expr is not allowed in this context")};
541 }
542
543 return {stdx::make_unique<ExprMatchExpression>(std::move(elem), expCtx)};
544 }
545
parseMOD(StringData name,BSONElement e)546 StatusWithMatchExpression parseMOD(StringData name, BSONElement e) {
547 if (e.type() != BSONType::Array)
548 return {Status(ErrorCodes::BadValue, "malformed mod, needs to be an array")};
549
550 BSONObjIterator i(e.Obj());
551
552 if (!i.more())
553 return {Status(ErrorCodes::BadValue, "malformed mod, not enough elements")};
554 auto d = i.next();
555 if (!d.isNumber())
556 return {Status(ErrorCodes::BadValue, "malformed mod, divisor not a number")};
557
558 if (!i.more())
559 return {Status(ErrorCodes::BadValue, "malformed mod, not enough elements")};
560 auto r = i.next();
561 if (!d.isNumber())
562 return {Status(ErrorCodes::BadValue, "malformed mod, remainder not a number")};
563
564 if (i.more())
565 return {Status(ErrorCodes::BadValue, "malformed mod, too many elements")};
566
567 auto temp = stdx::make_unique<ModMatchExpression>();
568 auto s = temp->init(
569 name, ModMatchExpression::truncateToLong(d), ModMatchExpression::truncateToLong(r));
570 if (!s.isOK())
571 return s;
572 return {std::move(temp)};
573 }
574
parseRegexDocument(StringData name,const BSONObj & doc)575 StatusWithMatchExpression parseRegexDocument(StringData name, const BSONObj& doc) {
576 StringData regex;
577 StringData regexOptions;
578
579 for (auto e : doc) {
580 auto matchType = MatchExpressionParser::parsePathAcceptingKeyword(e);
581 if (!matchType) {
582 continue;
583 }
584
585 switch (*matchType) {
586 case PathAcceptingKeyword::REGEX:
587 if (e.type() == BSONType::String) {
588 regex = e.valueStringData();
589 } else if (e.type() == BSONType::RegEx) {
590 regex = e.regex();
591 regexOptions = e.regexFlags();
592 } else {
593 return {Status(ErrorCodes::BadValue, "$regex has to be a string")};
594 }
595
596 break;
597 case PathAcceptingKeyword::OPTIONS:
598 if (e.type() != BSONType::String)
599 return {Status(ErrorCodes::BadValue, "$options has to be a string")};
600 regexOptions = e.valueStringData();
601 break;
602 default:
603 break;
604 }
605 }
606
607 auto temp = stdx::make_unique<RegexMatchExpression>();
608 auto s = temp->init(name, regex, regexOptions);
609 if (!s.isOK())
610 return s;
611 return {std::move(temp)};
612 }
613
parseInExpression(InMatchExpression * inExpression,const BSONObj & theArray,const boost::intrusive_ptr<ExpressionContext> & expCtx)614 Status parseInExpression(InMatchExpression* inExpression,
615 const BSONObj& theArray,
616 const boost::intrusive_ptr<ExpressionContext>& expCtx) {
617 inExpression->setCollator(expCtx->getCollator());
618 std::vector<BSONElement> equalities;
619 for (auto e : theArray) {
620 // Allow DBRefs, but reject all fields with names starting with $.
621 if (isExpressionDocument(e, false)) {
622 return Status(ErrorCodes::BadValue, "cannot nest $ under $in");
623 }
624
625 if (e.type() == BSONType::RegEx) {
626 auto r = stdx::make_unique<RegexMatchExpression>();
627 auto s = r->init("", e);
628 if (!s.isOK())
629 return s;
630 s = inExpression->addRegex(std::move(r));
631 if (!s.isOK())
632 return s;
633 } else {
634 equalities.push_back(e);
635 }
636 }
637 return inExpression->setEqualities(std::move(equalities));
638 }
639
640 template <class T>
parseType(StringData name,BSONElement elt)641 StatusWithMatchExpression parseType(StringData name, BSONElement elt) {
642 auto typeSet = MatcherTypeSet::parse(elt, MatcherTypeSet::kTypeAliasMap);
643 if (!typeSet.isOK()) {
644 return typeSet.getStatus();
645 }
646
647 auto typeExpr = stdx::make_unique<T>();
648
649 if (typeSet.getValue().isEmpty()) {
650 return {Status(ErrorCodes::FailedToParse,
651 str::stream() << typeExpr->name() << " must match at least one type")};
652 }
653
654 auto status = typeExpr->init(name, std::move(typeSet.getValue()));
655 if (!status.isOK()) {
656 return status;
657 }
658
659 return {std::move(typeExpr)};
660 }
661
662 /**
663 * Converts 'theArray', a BSONArray of integers, into a std::vector of integers.
664 */
parseBitPositionsArray(const BSONObj & theArray)665 StatusWith<std::vector<uint32_t>> parseBitPositionsArray(const BSONObj& theArray) {
666 std::vector<uint32_t> bitPositions;
667
668 // Fill temporary bit position array with integers read from the BSON array.
669 for (auto e : theArray) {
670 if (!e.isNumber()) {
671 return Status(ErrorCodes::BadValue,
672 str::stream() << "bit positions must be an integer but got: " << e);
673 }
674
675 if (e.type() == BSONType::NumberDouble) {
676 auto eDouble = e.numberDouble();
677
678 // NaN doubles are rejected.
679 if (std::isnan(eDouble)) {
680 return Status(ErrorCodes::BadValue,
681 str::stream() << "bit positions cannot take a NaN: " << e);
682 }
683
684 // This makes sure e does not overflow a 32-bit integer container.
685 if (eDouble > std::numeric_limits<int>::max() ||
686 eDouble < std::numeric_limits<int>::min()) {
687 return Status(
688 ErrorCodes::BadValue,
689 str::stream()
690 << "bit positions cannot be represented as a 32-bit signed integer: "
691 << e);
692 }
693
694 // This checks if e is integral.
695 if (eDouble != static_cast<double>(static_cast<long long>(eDouble))) {
696 return Status(ErrorCodes::BadValue,
697 str::stream() << "bit positions must be an integer but got: " << e);
698 }
699 }
700
701 if (e.type() == BSONType::NumberLong) {
702 auto eLong = e.numberLong();
703
704 // This makes sure e does not overflow a 32-bit integer container.
705 if (eLong > std::numeric_limits<int>::max() ||
706 eLong < std::numeric_limits<int>::min()) {
707 return Status(
708 ErrorCodes::BadValue,
709 str::stream()
710 << "bit positions cannot be represented as a 32-bit signed integer: "
711 << e);
712 }
713 }
714
715 auto eValue = e.numberInt();
716
717 // No negatives.
718 if (eValue < 0) {
719 return Status(ErrorCodes::BadValue,
720 str::stream() << "bit positions must be >= 0 but got: " << e);
721 }
722
723 bitPositions.push_back(eValue);
724 }
725
726 return bitPositions;
727 }
728
729 /**
730 * Parses 'e' into a BitTestMatchExpression.
731 */
732 template <class T>
parseBitTest(StringData name,BSONElement e)733 StatusWithMatchExpression parseBitTest(StringData name, BSONElement e) {
734 auto bitTestMatchExpression = stdx::make_unique<T>();
735
736 if (e.type() == BSONType::Array) {
737 // Array of bit positions provided as value.
738 auto statusWithBitPositions = parseBitPositionsArray(e.Obj());
739 if (!statusWithBitPositions.isOK()) {
740 return statusWithBitPositions.getStatus();
741 }
742
743 std::vector<uint32_t> bitPositions = statusWithBitPositions.getValue();
744 auto s = bitTestMatchExpression->init(name, bitPositions);
745 if (!s.isOK()) {
746 return s;
747 }
748 } else if (e.isNumber()) {
749 // Integer bitmask provided as value.
750 auto bitMask = MatchExpressionParser::parseIntegerElementToNonNegativeLong(e);
751 if (!bitMask.isOK()) {
752 return bitMask.getStatus();
753 }
754
755 auto s = bitTestMatchExpression->init(name, bitMask.getValue());
756 if (!s.isOK()) {
757 return s;
758 }
759 } else if (e.type() == BSONType::BinData) {
760 // Binary bitmask provided as value.
761
762 int eBinaryLen;
763 auto eBinary = e.binData(eBinaryLen);
764
765 auto s = bitTestMatchExpression->init(name, eBinary, eBinaryLen);
766 if (!s.isOK()) {
767 return s;
768 }
769 } else {
770 return Status(
771 ErrorCodes::BadValue,
772 str::stream() << name << " takes an Array, a number, or a BinData but received: " << e);
773 }
774
775 return {std::move(bitTestMatchExpression)};
776 }
777
parseInternalSchemaFmod(StringData name,BSONElement elem)778 StatusWithMatchExpression parseInternalSchemaFmod(StringData name, BSONElement elem) {
779 auto path(name);
780 if (elem.type() != BSONType::Array)
781 return {ErrorCodes::BadValue,
782 str::stream() << path << " must be an array, but got type " << elem.type()};
783
784 BSONObjIterator i(elem.embeddedObject());
785
786 if (!i.more())
787 return {ErrorCodes::BadValue, str::stream() << path << " does not have enough elements"};
788 auto d = i.next();
789 if (!d.isNumber())
790 return {ErrorCodes::TypeMismatch,
791 str::stream() << path << " does not have a numeric divisor"};
792
793 if (!i.more())
794 return {ErrorCodes::BadValue, str::stream() << path << " does not have enough elements"};
795 auto r = i.next();
796 if (!d.isNumber())
797 return {ErrorCodes::TypeMismatch,
798 str::stream() << path << " does not have a numeric remainder"};
799
800 if (i.more())
801 return {ErrorCodes::BadValue, str::stream() << path << " has too many elements"};
802
803 auto result = stdx::make_unique<InternalSchemaFmodMatchExpression>();
804 auto s = result->init(name, d.numberDecimal(), r.numberDecimal());
805 if (!s.isOK())
806 return s;
807 return {std::move(result)};
808 }
809
parseInternalSchemaRootDocEq(StringData name,BSONElement elem,const boost::intrusive_ptr<ExpressionContext> & expCtx,const ExtensionsCallback * extensionsCallback,MatchExpressionParser::AllowedFeatureSet allowedFeatures,DocumentParseLevel currentLevel)810 StatusWithMatchExpression parseInternalSchemaRootDocEq(
811 StringData name,
812 BSONElement elem,
813 const boost::intrusive_ptr<ExpressionContext>& expCtx,
814 const ExtensionsCallback* extensionsCallback,
815 MatchExpressionParser::AllowedFeatureSet allowedFeatures,
816 DocumentParseLevel currentLevel) {
817 if (currentLevel == DocumentParseLevel::kUserSubDocument) {
818 return {Status(ErrorCodes::FailedToParse,
819 str::stream() << InternalSchemaRootDocEqMatchExpression::kName
820 << " can only be applied to the top level document")};
821 }
822
823 if (elem.type() != BSONType::Object) {
824 return {Status(ErrorCodes::TypeMismatch,
825 str::stream() << InternalSchemaRootDocEqMatchExpression::kName
826 << " must be an object, found type "
827 << elem.type())};
828 }
829 auto rootDocEq = stdx::make_unique<InternalSchemaRootDocEqMatchExpression>();
830 rootDocEq->init(elem.embeddedObject());
831 return {std::move(rootDocEq)};
832 }
833
834 /**
835 * Parses the given BSONElement into a single integer argument and creates a MatchExpression
836 * of type 'T' that gets initialized with the resulting integer.
837 */
838 template <class T>
parseInternalSchemaSingleIntegerArgument(StringData name,BSONElement elem)839 StatusWithMatchExpression parseInternalSchemaSingleIntegerArgument(StringData name,
840 BSONElement elem) {
841 auto parsedInt = MatchExpressionParser::parseIntegerElementToNonNegativeLong(elem);
842 if (!parsedInt.isOK()) {
843 return parsedInt.getStatus();
844 }
845
846 auto matchExpression = stdx::make_unique<T>();
847 auto status = matchExpression->init(name, parsedInt.getValue());
848 if (!status.isOK()) {
849 return status;
850 }
851
852 return {std::move(matchExpression)};
853 }
854
855 /**
856 * Same as the parseInternalSchemaSingleIntegerArgument function, but for top-level
857 * operators which don't have paths.
858 */
859 template <class T>
parseTopLevelInternalSchemaSingleIntegerArgument(StringData name,BSONElement elem,const boost::intrusive_ptr<ExpressionContext> & expCtx,const ExtensionsCallback * extensionsCallback,MatchExpressionParser::AllowedFeatureSet allowedFeatures,DocumentParseLevel currentLevel)860 StatusWithMatchExpression parseTopLevelInternalSchemaSingleIntegerArgument(
861 StringData name,
862 BSONElement elem,
863 const boost::intrusive_ptr<ExpressionContext>& expCtx,
864 const ExtensionsCallback* extensionsCallback,
865 MatchExpressionParser::AllowedFeatureSet allowedFeatures,
866 DocumentParseLevel currentLevel) {
867 auto parsedInt = MatchExpressionParser::parseIntegerElementToNonNegativeLong(elem);
868 if (!parsedInt.isOK()) {
869 return parsedInt.getStatus();
870 }
871 auto matchExpression = stdx::make_unique<T>();
872 auto status = matchExpression->init(parsedInt.getValue());
873 if (!status.isOK()) {
874 return status;
875 }
876 return {std::move(matchExpression)};
877 }
878
879 /**
880 * Looks at the field named 'namePlaceholderFieldName' within 'containingObject' and parses a name
881 * placeholder from that element. 'expressionName' is the name of the expression that requires the
882 * name placeholder and is used to generate helpful error messages.
883 */
parseNamePlaceholder(const BSONObj & containingObject,StringData namePlaceholderFieldName,StringData expressionName)884 StatusWith<StringData> parseNamePlaceholder(const BSONObj& containingObject,
885 StringData namePlaceholderFieldName,
886 StringData expressionName) {
887 auto namePlaceholderElem = containingObject[namePlaceholderFieldName];
888 if (!namePlaceholderElem) {
889 return {ErrorCodes::FailedToParse,
890 str::stream() << expressionName << " requires a '" << namePlaceholderFieldName
891 << "'"};
892 } else if (namePlaceholderElem.type() != BSONType::String) {
893 return {ErrorCodes::TypeMismatch,
894 str::stream() << expressionName << " requires '" << namePlaceholderFieldName
895 << "' to be a string, not "
896 << namePlaceholderElem.type()};
897 }
898 return {namePlaceholderElem.valueStringData()};
899 }
900
901 /**
902 * Looks at the field named 'exprWithPlaceholderFieldName' within 'containingObject' and parses an
903 * ExpressionWithPlaceholder from that element. Fails if an error occurs during parsing, or if the
904 * ExpressionWithPlaceholder has a different name placeholder than 'expectedPlaceholder'.
905 * 'expressionName' is the name of the expression that requires the ExpressionWithPlaceholder and is
906 * used to generate helpful error messages.
907 */
parseExprWithPlaceholder(const BSONObj & containingObject,StringData exprWithPlaceholderFieldName,StringData expressionName,StringData expectedPlaceholder,const boost::intrusive_ptr<ExpressionContext> & expCtx,const ExtensionsCallback * extensionsCallback,MatchExpressionParser::AllowedFeatureSet allowedFeatures,DocumentParseLevel currentLevel)908 StatusWith<std::unique_ptr<ExpressionWithPlaceholder>> parseExprWithPlaceholder(
909 const BSONObj& containingObject,
910 StringData exprWithPlaceholderFieldName,
911 StringData expressionName,
912 StringData expectedPlaceholder,
913 const boost::intrusive_ptr<ExpressionContext>& expCtx,
914 const ExtensionsCallback* extensionsCallback,
915 MatchExpressionParser::AllowedFeatureSet allowedFeatures,
916 DocumentParseLevel currentLevel) {
917 auto exprWithPlaceholderElem = containingObject[exprWithPlaceholderFieldName];
918 if (!exprWithPlaceholderElem) {
919 return {ErrorCodes::FailedToParse,
920 str::stream() << expressionName << " requires '" << exprWithPlaceholderFieldName
921 << "'"};
922 } else if (exprWithPlaceholderElem.type() != BSONType::Object) {
923 return {ErrorCodes::TypeMismatch,
924 str::stream() << expressionName << " found '" << exprWithPlaceholderFieldName
925 << "', which is an incompatible type: "
926 << exprWithPlaceholderElem.type()};
927 }
928
929 auto filter = parse(exprWithPlaceholderElem.embeddedObject(),
930 expCtx,
931 extensionsCallback,
932 MatchExpressionParser::kBanAllSpecialFeatures,
933 currentLevel);
934
935 if (!filter.isOK()) {
936 return filter.getStatus();
937 }
938
939 auto result = ExpressionWithPlaceholder::make(std::move(filter.getValue()));
940 if (!result.isOK()) {
941 return result.getStatus();
942 }
943
944 auto placeholder = result.getValue()->getPlaceholder();
945 if (placeholder && (*placeholder != expectedPlaceholder)) {
946 return {ErrorCodes::FailedToParse,
947 str::stream() << expressionName << " expected a name placeholder of "
948 << expectedPlaceholder
949 << ", but '"
950 << exprWithPlaceholderElem.fieldNameStringData()
951 << "' has a mismatching placeholder '"
952 << *placeholder
953 << "'"};
954 }
955 return result;
956 }
957
958 StatusWith<std::vector<InternalSchemaAllowedPropertiesMatchExpression::PatternSchema>>
parsePatternProperties(BSONElement patternPropertiesElem,StringData expectedPlaceholder,const boost::intrusive_ptr<ExpressionContext> & expCtx,const ExtensionsCallback * extensionsCallback,MatchExpressionParser::AllowedFeatureSet allowedFeatures,DocumentParseLevel currentLevel)959 parsePatternProperties(BSONElement patternPropertiesElem,
960 StringData expectedPlaceholder,
961 const boost::intrusive_ptr<ExpressionContext>& expCtx,
962 const ExtensionsCallback* extensionsCallback,
963 MatchExpressionParser::AllowedFeatureSet allowedFeatures,
964 DocumentParseLevel currentLevel) {
965 if (!patternPropertiesElem) {
966 return {ErrorCodes::FailedToParse,
967 str::stream() << InternalSchemaAllowedPropertiesMatchExpression::kName
968 << " requires 'patternProperties'"};
969 } else if (patternPropertiesElem.type() != BSONType::Array) {
970 return {ErrorCodes::TypeMismatch,
971 str::stream() << InternalSchemaAllowedPropertiesMatchExpression::kName
972 << " requires 'patternProperties' to be an array, not "
973 << patternPropertiesElem.type()};
974 }
975
976 std::vector<InternalSchemaAllowedPropertiesMatchExpression::PatternSchema> patternProperties;
977 for (auto constraintElem : patternPropertiesElem.embeddedObject()) {
978 if (constraintElem.type() != BSONType::Object) {
979 return {ErrorCodes::TypeMismatch,
980 str::stream() << InternalSchemaAllowedPropertiesMatchExpression::kName
981 << " requires 'patternProperties' to be an array of objects"};
982 }
983
984 auto constraint = constraintElem.embeddedObject();
985 if (constraint.nFields() != 2) {
986 return {ErrorCodes::FailedToParse,
987 str::stream() << InternalSchemaAllowedPropertiesMatchExpression::kName
988 << " requires 'patternProperties' to be an array of objects "
989 "containing exactly two fields, 'regex' and 'expression'"};
990 }
991
992 auto expressionWithPlaceholder =
993 parseExprWithPlaceholder(constraint,
994 "expression"_sd,
995 InternalSchemaAllowedPropertiesMatchExpression::kName,
996 expectedPlaceholder,
997 expCtx,
998 extensionsCallback,
999 allowedFeatures,
1000 currentLevel);
1001 if (!expressionWithPlaceholder.isOK()) {
1002 return expressionWithPlaceholder.getStatus();
1003 }
1004
1005 auto regexElem = constraint["regex"];
1006 if (!regexElem) {
1007 return {
1008 ErrorCodes::FailedToParse,
1009 str::stream() << InternalSchemaAllowedPropertiesMatchExpression::kName
1010 << " requires each object in 'patternProperties' to have a 'regex'"};
1011 }
1012 if (regexElem.type() != BSONType::RegEx) {
1013 return {ErrorCodes::TypeMismatch,
1014 str::stream() << InternalSchemaAllowedPropertiesMatchExpression::kName
1015 << " requires 'patternProperties' to be an array of objects, "
1016 "where 'regex' is a regular expression"};
1017 } else if (*regexElem.regexFlags() != '\0') {
1018 return {
1019 ErrorCodes::BadValue,
1020 str::stream()
1021 << InternalSchemaAllowedPropertiesMatchExpression::kName
1022 << " does not accept regex flags for pattern schemas in 'patternProperties'"};
1023 }
1024
1025 patternProperties.emplace_back(
1026 InternalSchemaAllowedPropertiesMatchExpression::Pattern(regexElem.regex()),
1027 std::move(expressionWithPlaceholder.getValue()));
1028 }
1029
1030 return std::move(patternProperties);
1031 }
1032
parseProperties(BSONElement propertiesElem)1033 StatusWith<boost::container::flat_set<StringData>> parseProperties(BSONElement propertiesElem) {
1034 if (!propertiesElem) {
1035 return {ErrorCodes::FailedToParse,
1036 str::stream() << InternalSchemaAllowedPropertiesMatchExpression::kName
1037 << " requires 'properties' to be present"};
1038 } else if (propertiesElem.type() != BSONType::Array) {
1039 return {ErrorCodes::TypeMismatch,
1040 str::stream() << InternalSchemaAllowedPropertiesMatchExpression::kName
1041 << " requires 'properties' to be an array, not "
1042 << propertiesElem.type()};
1043 }
1044
1045 std::vector<StringData> properties;
1046 for (auto property : propertiesElem.embeddedObject()) {
1047 if (property.type() != BSONType::String) {
1048 return {
1049 ErrorCodes::TypeMismatch,
1050 str::stream() << InternalSchemaAllowedPropertiesMatchExpression::kName
1051 << " requires 'properties' to be an array of strings, but found a "
1052 << property.type()};
1053 }
1054 properties.push_back(property.valueStringData());
1055 }
1056
1057 return boost::container::flat_set<StringData>(properties.begin(), properties.end());
1058 }
1059
parseInternalSchemaAllowedProperties(StringData name,BSONElement elem,const boost::intrusive_ptr<ExpressionContext> & expCtx,const ExtensionsCallback * extensionsCallback,MatchExpressionParser::AllowedFeatureSet allowedFeatures,DocumentParseLevel currentLevel)1060 StatusWithMatchExpression parseInternalSchemaAllowedProperties(
1061 StringData name,
1062 BSONElement elem,
1063 const boost::intrusive_ptr<ExpressionContext>& expCtx,
1064 const ExtensionsCallback* extensionsCallback,
1065 MatchExpressionParser::AllowedFeatureSet allowedFeatures,
1066 DocumentParseLevel currentLevel) {
1067 if (elem.type() != BSONType::Object) {
1068 return {ErrorCodes::TypeMismatch,
1069 str::stream() << InternalSchemaAllowedPropertiesMatchExpression::kName
1070 << " must be an object"};
1071 }
1072
1073 auto subobj = elem.embeddedObject();
1074 if (subobj.nFields() != 4) {
1075 return {ErrorCodes::FailedToParse,
1076 str::stream() << InternalSchemaAllowedPropertiesMatchExpression::kName
1077 << " requires exactly four fields: 'properties', 'namePlaceholder', "
1078 "'patternProperties' and 'otherwise'"};
1079 }
1080
1081 auto namePlaceholder = parseNamePlaceholder(
1082 subobj, "namePlaceholder"_sd, InternalSchemaAllowedPropertiesMatchExpression::kName);
1083 if (!namePlaceholder.isOK()) {
1084 return namePlaceholder.getStatus();
1085 }
1086
1087 auto patternProperties = parsePatternProperties(subobj["patternProperties"],
1088 namePlaceholder.getValue(),
1089 expCtx,
1090 extensionsCallback,
1091 allowedFeatures,
1092 currentLevel);
1093 if (!patternProperties.isOK()) {
1094 return patternProperties.getStatus();
1095 }
1096
1097 auto otherwise = parseExprWithPlaceholder(subobj,
1098 "otherwise"_sd,
1099 InternalSchemaAllowedPropertiesMatchExpression::kName,
1100 namePlaceholder.getValue(),
1101 expCtx,
1102 extensionsCallback,
1103 allowedFeatures,
1104 currentLevel);
1105 if (!otherwise.isOK()) {
1106 return otherwise.getStatus();
1107 }
1108
1109 auto properties = parseProperties(subobj["properties"]);
1110 if (!properties.isOK()) {
1111 return properties.getStatus();
1112 }
1113
1114 auto allowedPropertiesExpr =
1115 stdx::make_unique<InternalSchemaAllowedPropertiesMatchExpression>();
1116 auto status = allowedPropertiesExpr->init(std::move(properties.getValue()),
1117 namePlaceholder.getValue(),
1118 std::move(patternProperties.getValue()),
1119 std::move(otherwise.getValue()));
1120 if (!status.isOK()) {
1121 return status;
1122 }
1123
1124 return {std::move(allowedPropertiesExpr)};
1125 }
1126
1127 /**
1128 * Parses 'elem' into an InternalSchemaMatchArrayIndexMatchExpression.
1129 */
parseInternalSchemaMatchArrayIndex(StringData path,BSONElement elem,const boost::intrusive_ptr<ExpressionContext> & expCtx,const ExtensionsCallback * extensionsCallback,MatchExpressionParser::AllowedFeatureSet allowedFeatures,DocumentParseLevel currentLevel)1130 StatusWithMatchExpression parseInternalSchemaMatchArrayIndex(
1131 StringData path,
1132 BSONElement elem,
1133 const boost::intrusive_ptr<ExpressionContext>& expCtx,
1134 const ExtensionsCallback* extensionsCallback,
1135 MatchExpressionParser::AllowedFeatureSet allowedFeatures,
1136 DocumentParseLevel currentLevel) {
1137 if (elem.type() != BSONType::Object) {
1138 return {ErrorCodes::TypeMismatch,
1139 str::stream() << InternalSchemaMatchArrayIndexMatchExpression::kName
1140 << " must be an object"};
1141 }
1142
1143 auto subobj = elem.embeddedObject();
1144 if (subobj.nFields() != 3) {
1145 return {ErrorCodes::FailedToParse,
1146 str::stream() << InternalSchemaMatchArrayIndexMatchExpression::kName
1147 << " requires exactly three fields: 'index', "
1148 "'namePlaceholder' and 'expression'"};
1149 }
1150
1151 auto index = MatchExpressionParser::parseIntegerElementToNonNegativeLong(subobj["index"]);
1152 if (!index.isOK()) {
1153 return index.getStatus();
1154 }
1155
1156 auto namePlaceholder = parseNamePlaceholder(
1157 subobj, "namePlaceholder"_sd, InternalSchemaMatchArrayIndexMatchExpression::kName);
1158 if (!namePlaceholder.isOK()) {
1159 return namePlaceholder.getStatus();
1160 }
1161
1162 auto expressionWithPlaceholder =
1163 parseExprWithPlaceholder(subobj,
1164 "expression"_sd,
1165 InternalSchemaMatchArrayIndexMatchExpression::kName,
1166 namePlaceholder.getValue(),
1167 expCtx,
1168 extensionsCallback,
1169 allowedFeatures,
1170 currentLevel);
1171 if (!expressionWithPlaceholder.isOK()) {
1172 return expressionWithPlaceholder.getStatus();
1173 }
1174
1175 auto matchArrayIndexExpr = stdx::make_unique<InternalSchemaMatchArrayIndexMatchExpression>();
1176 auto initStatus = matchArrayIndexExpr->init(
1177 path, index.getValue(), std::move(expressionWithPlaceholder.getValue()));
1178 if (!initStatus.isOK()) {
1179 return initStatus;
1180 }
1181 return {std::move(matchArrayIndexExpr)};
1182 }
1183
parseGeo(StringData name,PathAcceptingKeyword type,const BSONObj & section,MatchExpressionParser::AllowedFeatureSet allowedFeatures)1184 StatusWithMatchExpression parseGeo(StringData name,
1185 PathAcceptingKeyword type,
1186 const BSONObj& section,
1187 MatchExpressionParser::AllowedFeatureSet allowedFeatures) {
1188 if (PathAcceptingKeyword::WITHIN == type || PathAcceptingKeyword::GEO_INTERSECTS == type) {
1189 auto gq = stdx::make_unique<GeoExpression>(name.toString());
1190 auto parseStatus = gq->parseFrom(section);
1191
1192 if (!parseStatus.isOK())
1193 return StatusWithMatchExpression(parseStatus);
1194
1195 auto e = stdx::make_unique<GeoMatchExpression>();
1196
1197 auto s = e->init(name, gq.release(), section);
1198 if (!s.isOK())
1199 return StatusWithMatchExpression(s);
1200 return {std::move(e)};
1201 } else {
1202 invariant(PathAcceptingKeyword::GEO_NEAR == type);
1203
1204 if ((allowedFeatures & MatchExpressionParser::AllowedFeatures::kGeoNear) == 0u) {
1205 return {Status(ErrorCodes::BadValue,
1206 "$geoNear, $near, and $nearSphere are not allowed in this context")};
1207 }
1208
1209 auto nq = stdx::make_unique<GeoNearExpression>(name.toString());
1210 auto s = nq->parseFrom(section);
1211 if (!s.isOK()) {
1212 return StatusWithMatchExpression(s);
1213 }
1214 auto e = stdx::make_unique<GeoNearMatchExpression>();
1215 s = e->init(name, nq.release(), section);
1216 if (!s.isOK())
1217 return StatusWithMatchExpression(s);
1218 return {std::move(e)};
1219 }
1220 }
1221
1222 template <class T>
parseTreeTopLevel(StringData name,BSONElement elem,const boost::intrusive_ptr<ExpressionContext> & expCtx,const ExtensionsCallback * extensionsCallback,MatchExpressionParser::AllowedFeatureSet allowedFeatures,DocumentParseLevel currentLevel)1223 StatusWithMatchExpression parseTreeTopLevel(
1224 StringData name,
1225 BSONElement elem,
1226 const boost::intrusive_ptr<ExpressionContext>& expCtx,
1227 const ExtensionsCallback* extensionsCallback,
1228 MatchExpressionParser::AllowedFeatureSet allowedFeatures,
1229 DocumentParseLevel currentLevel) {
1230 if (elem.type() != BSONType::Array) {
1231 return {Status(ErrorCodes::BadValue, str::stream() << T::kName << " must be an array")};
1232 }
1233
1234 auto temp = stdx::make_unique<T>();
1235
1236 auto arr = elem.Obj();
1237 if (arr.isEmpty()) {
1238 return Status(ErrorCodes::BadValue, "$and/$or/$nor must be a nonempty array");
1239 }
1240
1241 for (auto e : arr) {
1242 if (e.type() != BSONType::Object)
1243 return Status(ErrorCodes::BadValue, "$or/$and/$nor entries need to be full objects");
1244
1245 auto sub = parse(e.Obj(), expCtx, extensionsCallback, allowedFeatures, currentLevel);
1246 if (!sub.isOK())
1247 return sub.getStatus();
1248
1249 temp->add(sub.getValue().release());
1250 }
1251
1252 return {std::move(temp)};
1253 }
1254
parseElemMatch(StringData name,BSONElement e,const boost::intrusive_ptr<ExpressionContext> & expCtx,const ExtensionsCallback * extensionsCallback,MatchExpressionParser::AllowedFeatureSet allowedFeatures)1255 StatusWithMatchExpression parseElemMatch(StringData name,
1256 BSONElement e,
1257 const boost::intrusive_ptr<ExpressionContext>& expCtx,
1258 const ExtensionsCallback* extensionsCallback,
1259 MatchExpressionParser::AllowedFeatureSet allowedFeatures) {
1260 if (e.type() != BSONType::Object)
1261 return {Status(ErrorCodes::BadValue, "$elemMatch needs an Object")};
1262
1263 auto obj = e.Obj();
1264
1265 // $elemMatch value case applies when the children all
1266 // work on the field 'name'.
1267 // This is the case when:
1268 // 1) the argument is an expression document; and
1269 // 2) expression is not a AND/NOR/OR logical operator. Children of
1270 // these logical operators are initialized with field names.
1271 // 3) expression is not a WHERE operator. WHERE works on objects instead
1272 // of specific field.
1273 bool isElemMatchValue = false;
1274 if (isExpressionDocument(e, true)) {
1275 auto elt = obj.firstElement();
1276 invariant(elt);
1277
1278 isElemMatchValue = !retrievePathlessParser(elt.fieldNameStringData().substr(1));
1279 }
1280
1281 if (isElemMatchValue) {
1282 // Value case.
1283
1284 AndMatchExpression theAnd;
1285 auto s = parseSub("",
1286 obj,
1287 &theAnd,
1288 expCtx,
1289 extensionsCallback,
1290 allowedFeatures,
1291 DocumentParseLevel::kUserSubDocument);
1292 if (!s.isOK())
1293 return s;
1294
1295 auto temp = stdx::make_unique<ElemMatchValueMatchExpression>();
1296 s = temp->init(name);
1297 if (!s.isOK())
1298 return s;
1299
1300 for (size_t i = 0; i < theAnd.numChildren(); i++) {
1301 temp->add(theAnd.getChild(i));
1302 }
1303 theAnd.clearAndRelease();
1304
1305 return {std::move(temp)};
1306 }
1307
1308 // DBRef value case
1309 // A DBRef document under a $elemMatch should be treated as an object case because it may
1310 // contain non-DBRef fields in addition to $ref, $id and $db.
1311
1312 // Object case.
1313
1314 auto subRaw = parse(
1315 obj, expCtx, extensionsCallback, allowedFeatures, DocumentParseLevel::kUserSubDocument);
1316 if (!subRaw.isOK())
1317 return subRaw;
1318 auto sub = std::move(subRaw.getValue());
1319
1320 // $where is not supported under $elemMatch because $where applies to top-level document, not
1321 // array elements in a field.
1322 if (hasNode(sub.get(), MatchExpression::WHERE)) {
1323 return {Status(ErrorCodes::BadValue, "$elemMatch cannot contain $where expression")};
1324 }
1325
1326 auto temp = stdx::make_unique<ElemMatchObjectMatchExpression>();
1327 auto status = temp->init(name, sub.release());
1328 if (!status.isOK())
1329 return status;
1330
1331 return {std::move(temp)};
1332 }
1333
parseAll(StringData name,BSONElement e,const boost::intrusive_ptr<ExpressionContext> & expCtx,const ExtensionsCallback * extensionsCallback,MatchExpressionParser::AllowedFeatureSet allowedFeatures)1334 StatusWithMatchExpression parseAll(StringData name,
1335 BSONElement e,
1336 const boost::intrusive_ptr<ExpressionContext>& expCtx,
1337 const ExtensionsCallback* extensionsCallback,
1338 MatchExpressionParser::AllowedFeatureSet allowedFeatures) {
1339 if (e.type() != BSONType::Array)
1340 return {Status(ErrorCodes::BadValue, "$all needs an array")};
1341
1342 auto arr = e.Obj();
1343 auto myAnd = stdx::make_unique<AndMatchExpression>();
1344 BSONObjIterator i(arr);
1345
1346 if (arr.firstElement().type() == BSONType::Object &&
1347 "$elemMatch"_sd == arr.firstElement().Obj().firstElement().fieldNameStringData()) {
1348 // $all : [ { $elemMatch : {} } ... ]
1349
1350 while (i.more()) {
1351 auto hopefullyElemMatchElement = i.next();
1352
1353 if (hopefullyElemMatchElement.type() != BSONType::Object) {
1354 // $all : [ { $elemMatch : ... }, 5 ]
1355 return {Status(ErrorCodes::BadValue, "$all/$elemMatch has to be consistent")};
1356 }
1357
1358 auto hopefullyElemMatchObj = hopefullyElemMatchElement.Obj();
1359 if ("$elemMatch"_sd != hopefullyElemMatchObj.firstElement().fieldNameStringData()) {
1360 // $all : [ { $elemMatch : ... }, { x : 5 } ]
1361 return {Status(ErrorCodes::BadValue, "$all/$elemMatch has to be consistent")};
1362 }
1363
1364 auto inner = parseElemMatch(name,
1365 hopefullyElemMatchObj.firstElement(),
1366 expCtx,
1367 extensionsCallback,
1368 allowedFeatures);
1369 if (!inner.isOK())
1370 return inner;
1371 myAnd->add(inner.getValue().release());
1372 }
1373
1374 return {std::move(myAnd)};
1375 }
1376
1377 while (i.more()) {
1378 auto e = i.next();
1379
1380 if (e.type() == BSONType::RegEx) {
1381 auto r = stdx::make_unique<RegexMatchExpression>();
1382 auto s = r->init(name, e);
1383 if (!s.isOK())
1384 return s;
1385 myAnd->add(r.release());
1386 } else if (e.type() == BSONType::Object &&
1387 MatchExpressionParser::parsePathAcceptingKeyword(e.Obj().firstElement())) {
1388 return {Status(ErrorCodes::BadValue, "no $ expressions in $all")};
1389 } else {
1390 auto x = stdx::make_unique<EqualityMatchExpression>();
1391 auto s = x->init(name, e);
1392 if (!s.isOK())
1393 return s;
1394 x->setCollator(expCtx->getCollator());
1395 myAnd->add(x.release());
1396 }
1397 }
1398
1399 if (myAnd->numChildren() == 0) {
1400 return {stdx::make_unique<AlwaysFalseMatchExpression>()};
1401 }
1402
1403 return {std::move(myAnd)};
1404 }
1405
1406 /**
1407 * Parses a MatchExpression which takes a fixed-size array of MatchExpressions as arguments.
1408 */
1409 template <class T>
parseInternalSchemaFixedArityArgument(StringData name,BSONElement elem,const boost::intrusive_ptr<ExpressionContext> & expCtx,const ExtensionsCallback * extensionsCallback,MatchExpressionParser::AllowedFeatureSet allowedFeatures,DocumentParseLevel currentLevel)1410 StatusWithMatchExpression parseInternalSchemaFixedArityArgument(
1411 StringData name,
1412 BSONElement elem,
1413 const boost::intrusive_ptr<ExpressionContext>& expCtx,
1414 const ExtensionsCallback* extensionsCallback,
1415 MatchExpressionParser::AllowedFeatureSet allowedFeatures,
1416 DocumentParseLevel currentLevel) {
1417 constexpr auto arity = T::arity();
1418 if (elem.type() != BSONType::Array) {
1419 return {ErrorCodes::FailedToParse,
1420 str::stream() << elem.fieldNameStringData() << " must be an array of " << arity
1421 << " MatchExpressions"};
1422 }
1423
1424 auto inputObj = elem.embeddedObject();
1425 if (static_cast<size_t>(inputObj.nFields()) != arity) {
1426 return {ErrorCodes::FailedToParse,
1427 str::stream() << elem.fieldNameStringData() << " requires exactly " << arity
1428 << " MatchExpressions, but got "
1429 << inputObj.nFields()};
1430 }
1431
1432 // Fill out 'expressions' with all of the parsed subexpressions contained in the array,
1433 // tracking our location in the array with 'position'.
1434 std::array<std::unique_ptr<MatchExpression>, arity> expressions;
1435 auto position = expressions.begin();
1436
1437 for (auto obj : inputObj) {
1438 if (obj.type() != BSONType::Object) {
1439 return {ErrorCodes::FailedToParse,
1440 str::stream() << elem.fieldNameStringData()
1441 << " must be an array of objects, but found an element of type "
1442 << obj.type()};
1443 }
1444
1445 auto subexpr =
1446 parse(obj.embeddedObject(), expCtx, extensionsCallback, allowedFeatures, currentLevel);
1447 if (!subexpr.isOK()) {
1448 return subexpr.getStatus();
1449 }
1450 *position = std::move(subexpr.getValue());
1451 ++position;
1452 }
1453
1454 auto parsedExpression = stdx::make_unique<T>();
1455 parsedExpression->init(std::move(expressions));
1456 return {std::move(parsedExpression)};
1457 }
1458
parseNot(StringData name,BSONElement elem,const boost::intrusive_ptr<ExpressionContext> & expCtx,const ExtensionsCallback * extensionsCallback,MatchExpressionParser::AllowedFeatureSet allowedFeatures,DocumentParseLevel currentLevel)1459 StatusWithMatchExpression parseNot(StringData name,
1460 BSONElement elem,
1461 const boost::intrusive_ptr<ExpressionContext>& expCtx,
1462 const ExtensionsCallback* extensionsCallback,
1463 MatchExpressionParser::AllowedFeatureSet allowedFeatures,
1464 DocumentParseLevel currentLevel) {
1465 if (elem.type() == BSONType::RegEx) {
1466 auto s = parseRegexElement(name, elem);
1467 if (!s.isOK())
1468 return s;
1469 auto n = stdx::make_unique<NotMatchExpression>();
1470 auto s2 = n->init(s.getValue().release());
1471 if (!s2.isOK())
1472 return StatusWithMatchExpression(s2);
1473 return {std::move(n)};
1474 }
1475
1476 if (elem.type() != BSONType::Object)
1477 return StatusWithMatchExpression(ErrorCodes::BadValue, "$not needs a regex or a document");
1478
1479 auto notObject = elem.Obj();
1480 if (notObject.isEmpty())
1481 return StatusWithMatchExpression(ErrorCodes::BadValue, "$not cannot be empty");
1482
1483 auto theAnd = stdx::make_unique<AndMatchExpression>();
1484 auto s = parseSub(
1485 name, notObject, theAnd.get(), expCtx, extensionsCallback, allowedFeatures, currentLevel);
1486 if (!s.isOK())
1487 return StatusWithMatchExpression(s);
1488
1489 for (size_t i = 0; i < theAnd->numChildren(); i++)
1490 if (theAnd->getChild(i)->matchType() == MatchExpression::REGEX)
1491 return StatusWithMatchExpression(ErrorCodes::BadValue, "$not cannot have a regex");
1492
1493 auto theNot = stdx::make_unique<NotMatchExpression>();
1494 s = theNot->init(theAnd.release());
1495 if (!s.isOK())
1496 return StatusWithMatchExpression(s);
1497
1498 return {std::move(theNot)};
1499 }
1500
1501 /**
1502 * Parses a single field in a sub expression.
1503 * If the query is { x : { $gt : 5, $lt : 8 } },
1504 * 'e' is $gt : 5
1505 */
parseSubField(const BSONObj & context,const AndMatchExpression * andSoFar,StringData name,BSONElement e,const boost::intrusive_ptr<ExpressionContext> & expCtx,const ExtensionsCallback * extensionsCallback,MatchExpressionParser::AllowedFeatureSet allowedFeatures,DocumentParseLevel currentLevel)1506 StatusWithMatchExpression parseSubField(const BSONObj& context,
1507 const AndMatchExpression* andSoFar,
1508 StringData name,
1509 BSONElement e,
1510 const boost::intrusive_ptr<ExpressionContext>& expCtx,
1511 const ExtensionsCallback* extensionsCallback,
1512 MatchExpressionParser::AllowedFeatureSet allowedFeatures,
1513 DocumentParseLevel currentLevel) {
1514 invariant(e);
1515
1516 if ("$eq"_sd == e.fieldNameStringData()) {
1517 return parseComparison(name, new EqualityMatchExpression(), e, expCtx, allowedFeatures);
1518 }
1519
1520 if ("$not"_sd == e.fieldNameStringData()) {
1521 return parseNot(name, e, expCtx, extensionsCallback, allowedFeatures, currentLevel);
1522 }
1523
1524 auto parseExpMatchType = MatchExpressionParser::parsePathAcceptingKeyword(e);
1525 if (!parseExpMatchType) {
1526 // $where cannot be a sub-expression because it works on top-level documents only.
1527 if ("$where"_sd == e.fieldNameStringData()) {
1528 return {Status(ErrorCodes::BadValue, "$where cannot be applied to a field")};
1529 }
1530
1531 return {Status(ErrorCodes::BadValue,
1532 str::stream() << "unknown operator: " << e.fieldNameStringData())};
1533 }
1534
1535 switch (*parseExpMatchType) {
1536 case PathAcceptingKeyword::LESS_THAN:
1537 return parseComparison(name, new LTMatchExpression(), e, expCtx, allowedFeatures);
1538 case PathAcceptingKeyword::LESS_THAN_OR_EQUAL:
1539 return parseComparison(name, new LTEMatchExpression(), e, expCtx, allowedFeatures);
1540 case PathAcceptingKeyword::GREATER_THAN:
1541 return parseComparison(name, new GTMatchExpression(), e, expCtx, allowedFeatures);
1542 case PathAcceptingKeyword::GREATER_THAN_OR_EQUAL:
1543 return parseComparison(name, new GTEMatchExpression(), e, expCtx, allowedFeatures);
1544 case PathAcceptingKeyword::NOT_EQUAL: {
1545 if (BSONType::RegEx == e.type()) {
1546 // Just because $ne can be rewritten as the negation of an equality does not mean
1547 // that $ne of a regex is allowed. See SERVER-1705.
1548 return {Status(ErrorCodes::BadValue, "Can't have regex as arg to $ne.")};
1549 }
1550 auto s =
1551 parseComparison(name, new EqualityMatchExpression(), e, expCtx, allowedFeatures);
1552 if (!s.isOK())
1553 return s;
1554 auto n = stdx::make_unique<NotMatchExpression>();
1555 auto s2 = n->init(s.getValue().release());
1556 if (!s2.isOK())
1557 return s2;
1558 return {std::move(n)};
1559 }
1560 case PathAcceptingKeyword::EQUALITY:
1561 return parseComparison(name, new EqualityMatchExpression(), e, expCtx, allowedFeatures);
1562
1563 case PathAcceptingKeyword::IN_EXPR: {
1564 if (e.type() != BSONType::Array)
1565 return {Status(ErrorCodes::BadValue, "$in needs an array")};
1566 auto temp = stdx::make_unique<InMatchExpression>();
1567 auto s = temp->init(name);
1568 if (!s.isOK())
1569 return s;
1570 s = parseInExpression(temp.get(), e.Obj(), expCtx);
1571 if (!s.isOK())
1572 return s;
1573 return {std::move(temp)};
1574 }
1575
1576 case PathAcceptingKeyword::NOT_IN: {
1577 if (e.type() != BSONType::Array)
1578 return {Status(ErrorCodes::BadValue, "$nin needs an array")};
1579 auto temp = stdx::make_unique<InMatchExpression>();
1580 auto s = temp->init(name);
1581 if (!s.isOK())
1582 return s;
1583 s = parseInExpression(temp.get(), e.Obj(), expCtx);
1584 if (!s.isOK())
1585 return s;
1586
1587 auto temp2 = stdx::make_unique<NotMatchExpression>();
1588 s = temp2->init(temp.release());
1589 if (!s.isOK())
1590 return s;
1591
1592 return {std::move(temp2)};
1593 }
1594
1595 case PathAcceptingKeyword::SIZE: {
1596 int size = 0;
1597 if (e.type() == BSONType::NumberInt) {
1598 size = e.numberInt();
1599 } else if (e.type() == BSONType::NumberLong) {
1600 if (e.numberInt() == e.numberLong()) {
1601 size = e.numberInt();
1602 } else {
1603 return {Status(ErrorCodes::BadValue,
1604 "$size must be representable as a 32-bit integer")};
1605 }
1606 } else if (e.type() == BSONType::NumberDouble) {
1607 if (e.numberInt() == e.numberDouble()) {
1608 size = e.numberInt();
1609 } else {
1610 return {Status(ErrorCodes::BadValue, "$size must be a whole number")};
1611 }
1612 } else {
1613 return {Status(ErrorCodes::BadValue, "$size needs a number")};
1614 }
1615 if (size < 0) {
1616 return {Status(ErrorCodes::BadValue, "$size may not be negative")};
1617 }
1618
1619 auto temp = stdx::make_unique<SizeMatchExpression>();
1620 auto s = temp->init(name, size);
1621 if (!s.isOK())
1622 return s;
1623 return {std::move(temp)};
1624 }
1625
1626 case PathAcceptingKeyword::EXISTS: {
1627 if (!e)
1628 return {Status(ErrorCodes::BadValue, "$exists can't be eoo")};
1629 auto temp = stdx::make_unique<ExistsMatchExpression>();
1630 auto s = temp->init(name);
1631 if (!s.isOK())
1632 return s;
1633 if (e.trueValue())
1634 return {std::move(temp)};
1635 auto temp2 = stdx::make_unique<NotMatchExpression>();
1636 s = temp2->init(temp.release());
1637 if (!s.isOK())
1638 return s;
1639 return {std::move(temp2)};
1640 }
1641
1642 case PathAcceptingKeyword::TYPE:
1643 return parseType<TypeMatchExpression>(name, e);
1644
1645 case PathAcceptingKeyword::MOD:
1646 return parseMOD(name, e);
1647
1648 case PathAcceptingKeyword::OPTIONS: {
1649 // TODO: try to optimize this
1650 // we have to do this since $options can be before or after a $regex
1651 // but we validate here
1652 for (auto temp : context) {
1653 if (MatchExpressionParser::parsePathAcceptingKeyword(temp) ==
1654 PathAcceptingKeyword::REGEX)
1655 return {nullptr};
1656 }
1657
1658 return {Status(ErrorCodes::BadValue, "$options needs a $regex")};
1659 }
1660
1661 case PathAcceptingKeyword::REGEX: {
1662 return parseRegexDocument(name, context);
1663 }
1664
1665 case PathAcceptingKeyword::ELEM_MATCH:
1666 return parseElemMatch(name, e, expCtx, extensionsCallback, allowedFeatures);
1667
1668 case PathAcceptingKeyword::ALL:
1669 return parseAll(name, e, expCtx, extensionsCallback, allowedFeatures);
1670
1671 case PathAcceptingKeyword::WITHIN:
1672 case PathAcceptingKeyword::GEO_INTERSECTS:
1673 return parseGeo(name, *parseExpMatchType, context, allowedFeatures);
1674
1675 case PathAcceptingKeyword::GEO_NEAR:
1676 return {Status(ErrorCodes::BadValue,
1677 str::stream() << "near must be first in: " << context)};
1678
1679 case PathAcceptingKeyword::INTERNAL_EXPR_EQ: {
1680 if (e.type() == BSONType::Undefined || e.type() == BSONType::Array) {
1681 return {Status(ErrorCodes::BadValue,
1682 str::stream() << InternalExprEqMatchExpression::kName
1683 << " cannot be used to compare to type: "
1684 << typeName(e.type()))};
1685 }
1686
1687 auto exprEqExpr = stdx::make_unique<InternalExprEqMatchExpression>();
1688 auto status = exprEqExpr->init(name, e);
1689 if (!status.isOK()) {
1690 return status;
1691 }
1692 exprEqExpr->setCollator(expCtx->getCollator());
1693 return {std::move(exprEqExpr)};
1694 }
1695
1696 // Handles bitwise query operators.
1697 case PathAcceptingKeyword::BITS_ALL_SET: {
1698 return parseBitTest<BitsAllSetMatchExpression>(name, e);
1699 }
1700
1701 case PathAcceptingKeyword::BITS_ALL_CLEAR: {
1702 return parseBitTest<BitsAllClearMatchExpression>(name, e);
1703 }
1704
1705 case PathAcceptingKeyword::BITS_ANY_SET: {
1706 return parseBitTest<BitsAnySetMatchExpression>(name, e);
1707 }
1708
1709 case PathAcceptingKeyword::BITS_ANY_CLEAR: {
1710 return parseBitTest<BitsAnyClearMatchExpression>(name, e);
1711 }
1712
1713 case PathAcceptingKeyword::INTERNAL_SCHEMA_FMOD:
1714 return parseInternalSchemaFmod(name, e);
1715
1716 case PathAcceptingKeyword::INTERNAL_SCHEMA_MIN_ITEMS: {
1717 return parseInternalSchemaSingleIntegerArgument<InternalSchemaMinItemsMatchExpression>(
1718 name, e);
1719 }
1720
1721 case PathAcceptingKeyword::INTERNAL_SCHEMA_MAX_ITEMS: {
1722 return parseInternalSchemaSingleIntegerArgument<InternalSchemaMaxItemsMatchExpression>(
1723 name, e);
1724 }
1725
1726 case PathAcceptingKeyword::INTERNAL_SCHEMA_OBJECT_MATCH: {
1727 if (e.type() != BSONType::Object) {
1728 return Status(ErrorCodes::FailedToParse,
1729 str::stream() << "$_internalSchemaObjectMatch must be an object");
1730 }
1731
1732 auto parsedSubObjExpr = parse(e.Obj(),
1733 expCtx,
1734 extensionsCallback,
1735 allowedFeatures,
1736 DocumentParseLevel::kUserSubDocument);
1737 if (!parsedSubObjExpr.isOK()) {
1738 return parsedSubObjExpr;
1739 }
1740
1741 auto expr = stdx::make_unique<InternalSchemaObjectMatchExpression>();
1742 auto status = expr->init(std::move(parsedSubObjExpr.getValue()), name);
1743 if (!status.isOK()) {
1744 return status;
1745 }
1746 return {std::move(expr)};
1747 }
1748
1749 case PathAcceptingKeyword::INTERNAL_SCHEMA_UNIQUE_ITEMS: {
1750 if (!e.isBoolean() || !e.boolean()) {
1751 return {ErrorCodes::FailedToParse,
1752 str::stream() << name << " must be a boolean of value true"};
1753 }
1754
1755 auto expr = stdx::make_unique<InternalSchemaUniqueItemsMatchExpression>();
1756 auto status = expr->init(name);
1757 if (!status.isOK()) {
1758 return status;
1759 }
1760 return {std::move(expr)};
1761 }
1762
1763 case PathAcceptingKeyword::INTERNAL_SCHEMA_MIN_LENGTH: {
1764 return parseInternalSchemaSingleIntegerArgument<InternalSchemaMinLengthMatchExpression>(
1765 name, e);
1766 }
1767
1768 case PathAcceptingKeyword::INTERNAL_SCHEMA_MAX_LENGTH: {
1769 return parseInternalSchemaSingleIntegerArgument<InternalSchemaMaxLengthMatchExpression>(
1770 name, e);
1771 }
1772
1773 case PathAcceptingKeyword::INTERNAL_SCHEMA_MATCH_ARRAY_INDEX: {
1774 return parseInternalSchemaMatchArrayIndex(
1775 name, e, expCtx, extensionsCallback, allowedFeatures, currentLevel);
1776 }
1777
1778 case PathAcceptingKeyword::INTERNAL_SCHEMA_ALL_ELEM_MATCH_FROM_INDEX: {
1779 if (e.type() != BSONType::Array) {
1780 return Status(ErrorCodes::FailedToParse,
1781 str::stream()
1782 << InternalSchemaAllElemMatchFromIndexMatchExpression::kName
1783 << " must be an array");
1784 }
1785 auto elemMatchObj = e.embeddedObject();
1786 auto iter = BSONObjIterator(elemMatchObj);
1787 if (!iter.more()) {
1788 return Status(ErrorCodes::FailedToParse,
1789 str::stream()
1790 << InternalSchemaAllElemMatchFromIndexMatchExpression::kName
1791 << " must be an array of size 2");
1792 }
1793 auto first = iter.next();
1794 auto parsedIndex = MatchExpressionParser::parseIntegerElementToNonNegativeLong(first);
1795 if (!parsedIndex.isOK()) {
1796 return Status(ErrorCodes::TypeMismatch,
1797 str::stream()
1798 << "first element of "
1799 << InternalSchemaAllElemMatchFromIndexMatchExpression::kName
1800 << " must be a non-negative integer");
1801 }
1802 if (!iter.more()) {
1803 return Status(ErrorCodes::FailedToParse,
1804 str::stream()
1805 << InternalSchemaAllElemMatchFromIndexMatchExpression::kName
1806 << " must be an array of size 2");
1807 }
1808 auto second = iter.next();
1809 if (iter.more()) {
1810 return Status(ErrorCodes::FailedToParse,
1811 str::stream()
1812 << InternalSchemaAllElemMatchFromIndexMatchExpression::kName
1813 << " has too many elements, must be an array of size 2");
1814 }
1815 if (second.type() != BSONType::Object) {
1816 return Status(ErrorCodes::TypeMismatch,
1817 str::stream()
1818 << "second element of "
1819 << InternalSchemaAllElemMatchFromIndexMatchExpression::kName
1820 << "must be an object");
1821 }
1822
1823 auto filter = parse(second.embeddedObject(),
1824 expCtx,
1825 extensionsCallback,
1826 MatchExpressionParser::kBanAllSpecialFeatures,
1827 DocumentParseLevel::kUserSubDocument);
1828
1829 if (!filter.isOK()) {
1830 return filter.getStatus();
1831 }
1832
1833 auto exprWithPlaceholder =
1834 ExpressionWithPlaceholder::make(std::move(filter.getValue()));
1835 if (!exprWithPlaceholder.isOK()) {
1836 return exprWithPlaceholder.getStatus();
1837 }
1838
1839 auto expr = stdx::make_unique<InternalSchemaAllElemMatchFromIndexMatchExpression>();
1840 auto status =
1841 expr->init(name, parsedIndex.getValue(), std::move(exprWithPlaceholder.getValue()));
1842 if (!status.isOK()) {
1843 return status;
1844 }
1845 return {std::move(expr)};
1846 }
1847
1848 case PathAcceptingKeyword::INTERNAL_SCHEMA_TYPE: {
1849 return parseType<InternalSchemaTypeExpression>(name, e);
1850 }
1851
1852 case PathAcceptingKeyword::INTERNAL_SCHEMA_EQ: {
1853 auto eqExpr = stdx::make_unique<InternalSchemaEqMatchExpression>();
1854 auto status = eqExpr->init(name, e);
1855 if (!status.isOK()) {
1856 return status;
1857 }
1858 return {std::move(eqExpr)};
1859 }
1860 }
1861
1862 return {
1863 Status(ErrorCodes::BadValue, str::stream() << "not handled: " << e.fieldNameStringData())};
1864 }
1865
1866 /**
1867 * Parses a field in a sub expression.
1868 * If the query is { x : { $gt : 5, $lt : 8 } },
1869 * 'e' is { $gt : 5, $lt : 8 }
1870 */
parseSub(StringData name,const BSONObj & sub,AndMatchExpression * root,const boost::intrusive_ptr<ExpressionContext> & expCtx,const ExtensionsCallback * extensionsCallback,MatchExpressionParser::AllowedFeatureSet allowedFeatures,DocumentParseLevel currentLevel)1871 Status parseSub(StringData name,
1872 const BSONObj& sub,
1873 AndMatchExpression* root,
1874 const boost::intrusive_ptr<ExpressionContext>& expCtx,
1875 const ExtensionsCallback* extensionsCallback,
1876 MatchExpressionParser::AllowedFeatureSet allowedFeatures,
1877 DocumentParseLevel currentLevel) {
1878 // The one exception to {field : {fully contained argument} } is, of course, geo. Example:
1879 // sub == { field : {$near[Sphere]: [0,0], $maxDistance: 1000, $minDistance: 10 } }
1880 // We peek inside of 'sub' to see if it's possibly a $near. If so, we can't iterate over its
1881 // subfields and parse them one at a time (there is no $maxDistance without $near), so we hand
1882 // the entire object over to the geo parsing routines.
1883
1884 // Special case parsing for geoNear. This is necessary in order to support query formats like
1885 // {$near: <coords>, $maxDistance: <distance>}. No other query operators allow $-prefixed
1886 // modifiers as sibling BSON elements.
1887 BSONObjIterator geoIt(sub);
1888 if (geoIt.more()) {
1889 auto firstElt = geoIt.next();
1890 if (firstElt.isABSONObj()) {
1891 if (MatchExpressionParser::parsePathAcceptingKeyword(firstElt) ==
1892 PathAcceptingKeyword::GEO_NEAR) {
1893 auto s = parseGeo(name, PathAcceptingKeyword::GEO_NEAR, sub, allowedFeatures);
1894 if (s.isOK()) {
1895 root->add(s.getValue().release());
1896 }
1897
1898 // Propagate geo parsing result to caller.
1899 return s.getStatus();
1900 }
1901 }
1902 }
1903
1904 for (auto deep : sub) {
1905 auto s = parseSubField(
1906 sub, root, name, deep, expCtx, extensionsCallback, allowedFeatures, currentLevel);
1907 if (!s.isOK())
1908 return s.getStatus();
1909
1910 if (s.getValue())
1911 root->add(s.getValue().release());
1912 }
1913
1914 return Status::OK();
1915 }
1916
1917 } // namespace
1918
parse(const BSONObj & obj,const boost::intrusive_ptr<ExpressionContext> & expCtx,const ExtensionsCallback & extensionsCallback,AllowedFeatureSet allowedFeatures)1919 StatusWithMatchExpression MatchExpressionParser::parse(
1920 const BSONObj& obj,
1921 const boost::intrusive_ptr<ExpressionContext>& expCtx,
1922 const ExtensionsCallback& extensionsCallback,
1923 AllowedFeatureSet allowedFeatures) {
1924 invariant(expCtx.get());
1925 const DocumentParseLevel currentLevelCall = DocumentParseLevel::kPredicateTopLevel;
1926 try {
1927 return ::mongo::parse(obj, expCtx, &extensionsCallback, allowedFeatures, currentLevelCall);
1928 } catch (const DBException& ex) {
1929 return {ex.toStatus()};
1930 }
1931 }
1932
1933 namespace {
1934 // Maps from query operator string name to function.
1935 std::unique_ptr<StringMap<
1936 stdx::function<StatusWithMatchExpression(StringData,
1937 BSONElement,
1938 const boost::intrusive_ptr<ExpressionContext>&,
1939 const ExtensionsCallback*,
1940 MatchExpressionParser::AllowedFeatureSet,
1941 DocumentParseLevel)>>>
1942 pathlessOperatorMap;
1943
MONGO_INITIALIZER(PathlessOperatorMap)1944 MONGO_INITIALIZER(PathlessOperatorMap)(InitializerContext* context) {
1945 pathlessOperatorMap = stdx::make_unique<StringMap<
1946 stdx::function<StatusWithMatchExpression(StringData,
1947 BSONElement,
1948 const boost::intrusive_ptr<ExpressionContext>&,
1949 const ExtensionsCallback*,
1950 MatchExpressionParser::AllowedFeatureSet,
1951 DocumentParseLevel)>>>(
1952 StringMap<
1953 stdx::function<StatusWithMatchExpression(StringData,
1954 BSONElement,
1955 const boost::intrusive_ptr<ExpressionContext>&,
1956 const ExtensionsCallback*,
1957 MatchExpressionParser::AllowedFeatureSet,
1958 DocumentParseLevel)>>{
1959 {"_internalSchemaAllowedProperties", &parseInternalSchemaAllowedProperties},
1960 {"_internalSchemaCond",
1961 &parseInternalSchemaFixedArityArgument<InternalSchemaCondMatchExpression>},
1962 {"_internalSchemaMaxProperties",
1963 &parseTopLevelInternalSchemaSingleIntegerArgument<
1964 InternalSchemaMaxPropertiesMatchExpression>},
1965 {"_internalSchemaMinProperties",
1966 &parseTopLevelInternalSchemaSingleIntegerArgument<
1967 InternalSchemaMinPropertiesMatchExpression>},
1968 {"_internalSchemaRootDocEq", &parseInternalSchemaRootDocEq},
1969 {"_internalSchemaXor", &parseTreeTopLevel<InternalSchemaXorMatchExpression>},
1970 {"alwaysFalse", &parseAlwaysBoolean<AlwaysFalseMatchExpression>},
1971 {"alwaysTrue", &parseAlwaysBoolean<AlwaysTrueMatchExpression>},
1972 {"and", &parseTreeTopLevel<AndMatchExpression>},
1973 {"atomic", &parseAtomicOrIsolated},
1974 {"comment", &parseComment},
1975 {"db", &parseDBRef},
1976 {"expr", &parseExpr},
1977 {"id", &parseDBRef},
1978 {"isolated", &parseAtomicOrIsolated},
1979 {"jsonSchema", &parseJSONSchema},
1980 {"nor", &parseTreeTopLevel<NorMatchExpression>},
1981 {"or", &parseTreeTopLevel<OrMatchExpression>},
1982 {"ref", &parseDBRef},
1983 {"text", &parseText},
1984 {"where", &parseWhere},
1985 });
1986 return Status::OK();
1987 }
1988
1989 // Maps from query operator string name to operator PathAcceptingKeyword.
1990 std::unique_ptr<StringMap<PathAcceptingKeyword>> queryOperatorMap;
1991
MONGO_INITIALIZER(MatchExpressionParser)1992 MONGO_INITIALIZER(MatchExpressionParser)(InitializerContext* context) {
1993 queryOperatorMap =
1994 stdx::make_unique<StringMap<PathAcceptingKeyword>>(StringMap<PathAcceptingKeyword>{
1995 // TODO: SERVER-19565 Add $eq after auditing callers.
1996 {"_internalExprEq", PathAcceptingKeyword::INTERNAL_EXPR_EQ},
1997 {"_internalSchemaAllElemMatchFromIndex",
1998 PathAcceptingKeyword::INTERNAL_SCHEMA_ALL_ELEM_MATCH_FROM_INDEX},
1999 {"_internalSchemaEq", PathAcceptingKeyword::INTERNAL_SCHEMA_EQ},
2000 {"_internalSchemaFmod", PathAcceptingKeyword::INTERNAL_SCHEMA_FMOD},
2001 {"_internalSchemaMatchArrayIndex",
2002 PathAcceptingKeyword::INTERNAL_SCHEMA_MATCH_ARRAY_INDEX},
2003 {"_internalSchemaMaxItems", PathAcceptingKeyword::INTERNAL_SCHEMA_MAX_ITEMS},
2004 {"_internalSchemaMaxLength", PathAcceptingKeyword::INTERNAL_SCHEMA_MAX_LENGTH},
2005 {"_internalSchemaMinItems", PathAcceptingKeyword::INTERNAL_SCHEMA_MIN_ITEMS},
2006 {"_internalSchemaMinItems", PathAcceptingKeyword::INTERNAL_SCHEMA_MIN_ITEMS},
2007 {"_internalSchemaMinLength", PathAcceptingKeyword::INTERNAL_SCHEMA_MIN_LENGTH},
2008 {"_internalSchemaObjectMatch", PathAcceptingKeyword::INTERNAL_SCHEMA_OBJECT_MATCH},
2009 {"_internalSchemaType", PathAcceptingKeyword::INTERNAL_SCHEMA_TYPE},
2010 {"_internalSchemaUniqueItems", PathAcceptingKeyword::INTERNAL_SCHEMA_UNIQUE_ITEMS},
2011 {"all", PathAcceptingKeyword::ALL},
2012 {"bitsAllClear", PathAcceptingKeyword::BITS_ALL_CLEAR},
2013 {"bitsAllSet", PathAcceptingKeyword::BITS_ALL_SET},
2014 {"bitsAnyClear", PathAcceptingKeyword::BITS_ANY_CLEAR},
2015 {"bitsAnySet", PathAcceptingKeyword::BITS_ANY_SET},
2016 {"elemMatch", PathAcceptingKeyword::ELEM_MATCH},
2017 {"exists", PathAcceptingKeyword::EXISTS},
2018 {"geoIntersects", PathAcceptingKeyword::GEO_INTERSECTS},
2019 {"geoNear", PathAcceptingKeyword::GEO_NEAR},
2020 {"geoWithin", PathAcceptingKeyword::WITHIN},
2021 {"gt", PathAcceptingKeyword::GREATER_THAN},
2022 {"gte", PathAcceptingKeyword::GREATER_THAN_OR_EQUAL},
2023 {"in", PathAcceptingKeyword::IN_EXPR},
2024 {"lt", PathAcceptingKeyword::LESS_THAN},
2025 {"lte", PathAcceptingKeyword::LESS_THAN_OR_EQUAL},
2026 {"mod", PathAcceptingKeyword::MOD},
2027 {"ne", PathAcceptingKeyword::NOT_EQUAL},
2028 {"near", PathAcceptingKeyword::GEO_NEAR},
2029 {"nearSphere", PathAcceptingKeyword::GEO_NEAR},
2030 {"nin", PathAcceptingKeyword::NOT_IN},
2031 {"options", PathAcceptingKeyword::OPTIONS},
2032 {"regex", PathAcceptingKeyword::REGEX},
2033 {"size", PathAcceptingKeyword::SIZE},
2034 {"type", PathAcceptingKeyword::TYPE},
2035 {"within", PathAcceptingKeyword::WITHIN},
2036 });
2037 return Status::OK();
2038 }
2039
2040 /**
2041 * Returns the proper parser for the indicated pathless operator. Returns 'null' if 'name'
2042 * doesn't represent a known type.
2043 */
2044 stdx::function<StatusWithMatchExpression(StringData,
2045 BSONElement,
2046 const boost::intrusive_ptr<ExpressionContext>&,
2047 const ExtensionsCallback*,
2048 MatchExpressionParser::AllowedFeatureSet,
2049 DocumentParseLevel)>
retrievePathlessParser(StringData name)2050 retrievePathlessParser(StringData name) {
2051 auto func = pathlessOperatorMap->find(name);
2052 if (func == pathlessOperatorMap->end()) {
2053 return nullptr;
2054 }
2055 return func->second;
2056 }
2057 } // namespace
2058
parsePathAcceptingKeyword(BSONElement typeElem,boost::optional<PathAcceptingKeyword> defaultKeyword)2059 boost::optional<PathAcceptingKeyword> MatchExpressionParser::parsePathAcceptingKeyword(
2060 BSONElement typeElem, boost::optional<PathAcceptingKeyword> defaultKeyword) {
2061 auto fieldName = typeElem.fieldNameStringData();
2062 if (fieldName[0] == '$' && fieldName[1]) {
2063 auto opName = fieldName.substr(1);
2064 auto queryOp = queryOperatorMap->find(opName);
2065
2066 if (queryOp == queryOperatorMap->end()) {
2067 return defaultKeyword;
2068 }
2069 return queryOp->second;
2070 }
2071 return defaultKeyword;
2072 }
2073 } // namespace mongo
2074