1 // expression_parser.cpp
2 
3 
4 /**
5  *    Copyright (C) 2018-present MongoDB, Inc.
6  *
7  *    This program is free software: you can redistribute it and/or modify
8  *    it under the terms of the Server Side Public License, version 1,
9  *    as published by MongoDB, Inc.
10  *
11  *    This program is distributed in the hope that it will be useful,
12  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *    Server Side Public License for more details.
15  *
16  *    You should have received a copy of the Server Side Public License
17  *    along with this program. If not, see
18  *    <http://www.mongodb.com/licensing/server-side-public-license>.
19  *
20  *    As a special exception, the copyright holders give permission to link the
21  *    code of portions of this program with the OpenSSL library under certain
22  *    conditions as described in each individual source file and distribute
23  *    linked combinations including the program with the OpenSSL library. You
24  *    must comply with the Server Side Public License in all respects for
25  *    all of the code used other than as permitted herein. If you modify file(s)
26  *    with this exception, you may extend this exception to your version of the
27  *    file(s), but you are not obligated to do so. If you do not wish to do so,
28  *    delete this exception statement from your version. If you delete this
29  *    exception statement from all source files in the program, then also delete
30  *    it in the license file.
31  */
32 
33 #include "mongo/platform/basic.h"
34 
35 #include "mongo/db/matcher/expression_parser.h"
36 
37 #include <boost/container/flat_set.hpp>
38 #include <pcrecpp.h>
39 
40 #include "mongo/base/init.h"
41 #include "mongo/bson/bsonmisc.h"
42 #include "mongo/bson/bsonobj.h"
43 #include "mongo/bson/bsonobjbuilder.h"
44 #include "mongo/db/matcher/expression_always_boolean.h"
45 #include "mongo/db/matcher/expression_array.h"
46 #include "mongo/db/matcher/expression_expr.h"
47 #include "mongo/db/matcher/expression_geo.h"
48 #include "mongo/db/matcher/expression_internal_expr_eq.h"
49 #include "mongo/db/matcher/expression_leaf.h"
50 #include "mongo/db/matcher/expression_tree.h"
51 #include "mongo/db/matcher/expression_type.h"
52 #include "mongo/db/matcher/expression_with_placeholder.h"
53 #include "mongo/db/matcher/schema/expression_internal_schema_all_elem_match_from_index.h"
54 #include "mongo/db/matcher/schema/expression_internal_schema_allowed_properties.h"
55 #include "mongo/db/matcher/schema/expression_internal_schema_cond.h"
56 #include "mongo/db/matcher/schema/expression_internal_schema_eq.h"
57 #include "mongo/db/matcher/schema/expression_internal_schema_fmod.h"
58 #include "mongo/db/matcher/schema/expression_internal_schema_match_array_index.h"
59 #include "mongo/db/matcher/schema/expression_internal_schema_max_items.h"
60 #include "mongo/db/matcher/schema/expression_internal_schema_max_length.h"
61 #include "mongo/db/matcher/schema/expression_internal_schema_max_properties.h"
62 #include "mongo/db/matcher/schema/expression_internal_schema_min_items.h"
63 #include "mongo/db/matcher/schema/expression_internal_schema_min_length.h"
64 #include "mongo/db/matcher/schema/expression_internal_schema_min_properties.h"
65 #include "mongo/db/matcher/schema/expression_internal_schema_object_match.h"
66 #include "mongo/db/matcher/schema/expression_internal_schema_root_doc_eq.h"
67 #include "mongo/db/matcher/schema/expression_internal_schema_unique_items.h"
68 #include "mongo/db/matcher/schema/expression_internal_schema_xor.h"
69 #include "mongo/db/matcher/schema/json_schema_parser.h"
70 #include "mongo/db/namespace_string.h"
71 #include "mongo/db/query/query_knobs.h"
72 #include "mongo/stdx/memory.h"
73 #include "mongo/util/mongoutils/str.h"
74 #include "mongo/util/string_map.h"
75 
76 namespace {
77 
78 using namespace mongo;
79 
80 /**
81  * Returns true if subtree contains MatchExpression 'type'.
82  */
hasNode(const MatchExpression * root,MatchExpression::MatchType type)83 bool hasNode(const MatchExpression* root, MatchExpression::MatchType type) {
84     if (type == root->matchType()) {
85         return true;
86     }
87     for (size_t i = 0; i < root->numChildren(); ++i) {
88         if (hasNode(root->getChild(i), type)) {
89             return true;
90         }
91     }
92     return false;
93 }
94 
95 }  // namespace
96 
97 namespace mongo {
98 
99 constexpr StringData AlwaysFalseMatchExpression::kName;
100 constexpr StringData AlwaysTrueMatchExpression::kName;
101 constexpr StringData OrMatchExpression::kName;
102 constexpr StringData AndMatchExpression::kName;
103 constexpr StringData NorMatchExpression::kName;
104 
105 const double MatchExpressionParser::kLongLongMaxPlusOneAsDouble =
106     scalbn(1, std::numeric_limits<long long>::digits);
107 
108 /**
109  * 'DocumentParseLevel' refers to the current position of the parser as it descends a
110  *  MatchExpression tree.
111  */
112 enum class DocumentParseLevel {
113     // Indicates that the parser is looking at the root level of the BSON object containing the
114     // user's query predicate.
115     kPredicateTopLevel,
116     // Indicates that match expression nodes in this position will match against the complete
117     // user document, as opposed to matching against a nested document or a subdocument inside
118     // an array.
119     kUserDocumentTopLevel,
120     // Indicates that match expression nodes in this position will match against a nested
121     // document or a subdocument inside an array.
122     kUserSubDocument,
123 };
124 
parseIntegerElementToNonNegativeLong(BSONElement elem)125 StatusWith<long long> MatchExpressionParser::parseIntegerElementToNonNegativeLong(
126     BSONElement elem) {
127     auto number = parseIntegerElementToLong(elem);
128     if (!number.isOK()) {
129         return number;
130     }
131 
132     if (number.getValue() < 0) {
133         return Status(ErrorCodes::FailedToParse,
134                       str::stream() << "Expected a positive number in: " << elem);
135     }
136 
137     return number;
138 }
139 
parseIntegerElementToLong(BSONElement elem)140 StatusWith<long long> MatchExpressionParser::parseIntegerElementToLong(BSONElement elem) {
141     if (!elem.isNumber()) {
142         return Status(ErrorCodes::FailedToParse, str::stream() << "Expected a number in: " << elem);
143     }
144 
145     long long number = 0;
146     if (elem.type() == BSONType::NumberDouble) {
147         auto eDouble = elem.numberDouble();
148 
149         // NaN doubles are rejected.
150         if (std::isnan(eDouble)) {
151             return Status(ErrorCodes::FailedToParse,
152                           str::stream() << "Expected an integer, but found NaN in: " << elem);
153         }
154 
155         // No integral doubles that are too large to be represented as a 64 bit signed integer.
156         // We use 'kLongLongMaxAsDouble' because if we just did eDouble > 2^63-1, it would be
157         // compared against 2^63. eDouble=2^63 would not get caught that way.
158         if (eDouble >= MatchExpressionParser::kLongLongMaxPlusOneAsDouble ||
159             eDouble < std::numeric_limits<long long>::min()) {
160             return Status(ErrorCodes::FailedToParse,
161                           str::stream() << "Cannot represent as a 64-bit integer: " << elem);
162         }
163 
164         // This checks if elem is an integral double.
165         if (eDouble != static_cast<double>(static_cast<long long>(eDouble))) {
166             return Status(ErrorCodes::FailedToParse,
167                           str::stream() << "Expected an integer: " << elem);
168         }
169 
170         number = elem.numberLong();
171     } else if (elem.type() == BSONType::NumberDecimal) {
172         uint32_t signalingFlags = Decimal128::kNoFlag;
173         number = elem.numberDecimal().toLongExact(&signalingFlags);
174         if (signalingFlags != Decimal128::kNoFlag) {
175             return Status(ErrorCodes::FailedToParse,
176                           str::stream() << "Cannot represent as a 64-bit integer: " << elem);
177         }
178     } else {
179         number = elem.numberLong();
180     }
181 
182     return number;
183 }
184 
parseIntegerElementToInt(BSONElement elem)185 StatusWith<int> MatchExpressionParser::parseIntegerElementToInt(BSONElement elem) {
186     auto parsedLong = MatchExpressionParser::parseIntegerElementToLong(elem);
187     if (!parsedLong.isOK()) {
188         return parsedLong.getStatus();
189     }
190 
191     auto valueLong = parsedLong.getValue();
192     if (valueLong < std::numeric_limits<int>::min() ||
193         valueLong > std::numeric_limits<int>::max()) {
194         return {ErrorCodes::FailedToParse,
195                 str::stream() << "Cannot represent " << elem << " in an int"};
196     }
197     return static_cast<int>(valueLong);
198 }
199 
200 namespace {
201 
202 // Forward declarations.
203 
204 Status parseSub(StringData name,
205                 const BSONObj& sub,
206                 AndMatchExpression* root,
207                 const boost::intrusive_ptr<ExpressionContext>& expCtx,
208                 const ExtensionsCallback* extensionsCallback,
209                 MatchExpressionParser::AllowedFeatureSet allowedFeatures,
210                 DocumentParseLevel currentLevel);
211 
212 stdx::function<StatusWithMatchExpression(StringData,
213                                          BSONElement,
214                                          const boost::intrusive_ptr<ExpressionContext>&,
215                                          const ExtensionsCallback*,
216                                          MatchExpressionParser::AllowedFeatureSet,
217                                          DocumentParseLevel)>
218 retrievePathlessParser(StringData name);
219 
parseRegexElement(StringData name,BSONElement e)220 StatusWithMatchExpression parseRegexElement(StringData name, BSONElement e) {
221     if (e.type() != BSONType::RegEx)
222         return {Status(ErrorCodes::BadValue, "not a regex")};
223 
224     auto temp = stdx::make_unique<RegexMatchExpression>();
225     auto s = temp->init(name, e.regex(), e.regexFlags());
226     if (!s.isOK())
227         return s;
228     return {std::move(temp)};
229 }
230 
parseComparison(StringData name,ComparisonMatchExpression * cmp,BSONElement e,const boost::intrusive_ptr<ExpressionContext> & expCtx,MatchExpressionParser::AllowedFeatureSet allowedFeatures)231 StatusWithMatchExpression parseComparison(
232     StringData name,
233     ComparisonMatchExpression* cmp,
234     BSONElement e,
235     const boost::intrusive_ptr<ExpressionContext>& expCtx,
236     MatchExpressionParser::AllowedFeatureSet allowedFeatures) {
237     std::unique_ptr<ComparisonMatchExpression> temp(cmp);
238 
239     // Non-equality comparison match expressions cannot have a regular expression as the argument.
240     // (e.g. {a: {$gt: /b/}} is illegal).
241     if (MatchExpression::EQ != cmp->matchType() && BSONType::RegEx == e.type()) {
242         return {Status(ErrorCodes::BadValue,
243                        str::stream() << "Can't have RegEx as arg to predicate over field '" << name
244                                      << "'.")};
245     }
246 
247     auto s = temp->init(name, e);
248     if (!s.isOK()) {
249         return s;
250     }
251 
252     temp->setCollator(expCtx->getCollator());
253 
254     return {std::move(temp)};
255 }
256 
257 /**
258  * DBRef fields are ordered in the collection. In the query, we consider an embedded object a query
259  * on a DBRef as long as it contains $ref and $id.
260  * Required fields: $ref and $id (if incomplete DBRefs are not allowed).
261  *
262  * If incomplete DBRefs are allowed, we accept the BSON object as long as it contains $ref, $id or
263  * $db.
264  *
265  * Field names are checked but not field types.
266  *
267  * { $ref: "s", $id: "x" } = true
268  * { $ref : "s" } = true (if incomplete DBRef is allowed)
269  * { $id : "x" } = true (if incomplete DBRef is allowed)
270  * { $db : "x" } = true (if incomplete DBRef is allowed)
271  */
isDBRefDocument(const BSONObj & obj,bool allowIncompleteDBRef)272 bool isDBRefDocument(const BSONObj& obj, bool allowIncompleteDBRef) {
273     bool hasRef = false;
274     bool hasID = false;
275     bool hasDB = false;
276 
277     BSONObjIterator i(obj);
278     while (i.more() && !(hasRef && hasID)) {
279         auto element = i.next();
280         auto fieldName = element.fieldNameStringData();
281         // $ref
282         if (!hasRef && "$ref"_sd == fieldName) {
283             hasRef = true;
284         }
285         // $id
286         else if (!hasID && "$id"_sd == fieldName) {
287             hasID = true;
288         }
289         // $db
290         else if (!hasDB && "$db"_sd == fieldName) {
291             hasDB = true;
292         }
293     }
294 
295     if (allowIncompleteDBRef) {
296         return hasRef || hasID || hasDB;
297     }
298 
299     return hasRef && hasID;
300 }
301 
302 /**
303  * 5 = false
304  * { a : 5 } = false
305  * { $lt : 5 } = true
306  * { $ref: "s", $id: "x" } = false
307  * { $ref: "s", $id: "x", $db: "mydb" } = false
308  * { $ref : "s" } = false (if incomplete DBRef is allowed)
309  * { $id : "x" } = false (if incomplete DBRef is allowed)
310  * { $db : "mydb" } = false (if incomplete DBRef is allowed)
311  */
isExpressionDocument(BSONElement e,bool allowIncompleteDBRef)312 bool isExpressionDocument(BSONElement e, bool allowIncompleteDBRef) {
313     if (e.type() != BSONType::Object)
314         return false;
315 
316     auto o = e.Obj();
317     if (o.isEmpty())
318         return false;
319 
320     auto name = o.firstElement().fieldNameStringData();
321     if (name[0] != '$')
322         return false;
323 
324     if (isDBRefDocument(o, allowIncompleteDBRef)) {
325         return false;
326     }
327 
328     return true;
329 }
330 
331 /**
332  * Parse 'obj' and return either a MatchExpression or an error.
333  */
parse(const BSONObj & obj,const boost::intrusive_ptr<ExpressionContext> & expCtx,const ExtensionsCallback * extensionsCallback,MatchExpressionParser::AllowedFeatureSet allowedFeatures,DocumentParseLevel currentLevel)334 StatusWithMatchExpression parse(const BSONObj& obj,
335                                 const boost::intrusive_ptr<ExpressionContext>& expCtx,
336                                 const ExtensionsCallback* extensionsCallback,
337                                 MatchExpressionParser::AllowedFeatureSet allowedFeatures,
338                                 DocumentParseLevel currentLevel) {
339     auto root = stdx::make_unique<AndMatchExpression>();
340 
341     const DocumentParseLevel nextLevel = (currentLevel == DocumentParseLevel::kPredicateTopLevel)
342         ? DocumentParseLevel::kUserDocumentTopLevel
343         : currentLevel;
344 
345     for (auto e : obj) {
346         if (e.fieldName()[0] == '$') {
347             auto name = e.fieldNameStringData().substr(1);
348             auto parseExpressionMatchFunction = retrievePathlessParser(name);
349 
350             if (!parseExpressionMatchFunction) {
351                 return {Status(ErrorCodes::BadValue,
352                                str::stream() << "unknown top level operator: "
353                                              << e.fieldNameStringData())};
354             }
355 
356             auto parsedExpression = parseExpressionMatchFunction(
357                 name, e, expCtx, extensionsCallback, allowedFeatures, currentLevel);
358 
359             if (!parsedExpression.isOK()) {
360                 return parsedExpression;
361             }
362 
363             // A nullptr for 'parsedExpression' indicates that the particular operator should not
364             // be added to 'root', because it is handled outside of the MatchExpressionParser
365             // library. The following operators currently follow this convention:
366             //    - $atomic   is explicitly handled in CanonicalQuery::init()
367             //    - $comment  has no action associated with the operator.
368             //    - $isolated is explicitly handled in CanoncialQuery::init()
369             if (parsedExpression.getValue().get()) {
370                 root->add(parsedExpression.getValue().release());
371             }
372 
373             continue;
374         }
375 
376         if (isExpressionDocument(e, false)) {
377             auto s = parseSub(e.fieldNameStringData(),
378                               e.Obj(),
379                               root.get(),
380                               expCtx,
381                               extensionsCallback,
382                               allowedFeatures,
383                               nextLevel);
384             if (!s.isOK())
385                 return s;
386             continue;
387         }
388 
389         if (e.type() == BSONType::RegEx) {
390             auto result = parseRegexElement(e.fieldNameStringData(), e);
391             if (!result.isOK())
392                 return result;
393             root->add(result.getValue().release());
394             continue;
395         }
396 
397         auto eq = parseComparison(
398             e.fieldNameStringData(), new EqualityMatchExpression(), e, expCtx, allowedFeatures);
399         if (!eq.isOK())
400             return eq;
401 
402         root->add(eq.getValue().release());
403     }
404 
405     if (root->numChildren() == 1) {
406         std::unique_ptr<MatchExpression> real(root->getChild(0));
407         root->clearAndRelease();
408         return {std::move(real)};
409     }
410 
411     return {std::move(root)};
412 }
413 
parseAtomicOrIsolated(StringData name,BSONElement elem,const boost::intrusive_ptr<ExpressionContext> & expCtx,const ExtensionsCallback * extensionsCallback,MatchExpressionParser::AllowedFeatureSet allowedFeatures,DocumentParseLevel currentLevel)414 StatusWithMatchExpression parseAtomicOrIsolated(
415     StringData name,
416     BSONElement elem,
417     const boost::intrusive_ptr<ExpressionContext>& expCtx,
418     const ExtensionsCallback* extensionsCallback,
419     MatchExpressionParser::AllowedFeatureSet allowedFeatures,
420     DocumentParseLevel currentLevel) {
421     if ((allowedFeatures & MatchExpressionParser::AllowedFeatures::kIsolated) == 0u) {
422         return {Status(ErrorCodes::QueryFeatureNotAllowed,
423                        "$isolated ($atomic) is not allowed in this context")};
424     }
425     if (currentLevel != DocumentParseLevel::kPredicateTopLevel) {
426         return {
427             Status(ErrorCodes::FailedToParse, "$isolated ($atomic) has to be at the top level")};
428     }
429     return {nullptr};
430 }
431 
parseComment(StringData name,BSONElement elem,const boost::intrusive_ptr<ExpressionContext> & expCtx,const ExtensionsCallback * extensionsCallback,MatchExpressionParser::AllowedFeatureSet allowedFeatures,DocumentParseLevel currentLevel)432 StatusWithMatchExpression parseComment(StringData name,
433                                        BSONElement elem,
434                                        const boost::intrusive_ptr<ExpressionContext>& expCtx,
435                                        const ExtensionsCallback* extensionsCallback,
436                                        MatchExpressionParser::AllowedFeatureSet allowedFeatures,
437                                        DocumentParseLevel currentLevel) {
438     return {nullptr};
439 }
440 
parseWhere(StringData name,BSONElement elem,const boost::intrusive_ptr<ExpressionContext> & expCtx,const ExtensionsCallback * extensionsCallback,MatchExpressionParser::AllowedFeatureSet allowedFeatures,DocumentParseLevel currentLevel)441 StatusWithMatchExpression parseWhere(StringData name,
442                                      BSONElement elem,
443                                      const boost::intrusive_ptr<ExpressionContext>& expCtx,
444                                      const ExtensionsCallback* extensionsCallback,
445                                      MatchExpressionParser::AllowedFeatureSet allowedFeatures,
446                                      DocumentParseLevel currentLevel) {
447     if ((allowedFeatures & MatchExpressionParser::AllowedFeatures::kJavascript) == 0u) {
448         return {Status(ErrorCodes::BadValue, "$where is not allowed in this context")};
449     }
450 
451     return extensionsCallback->parseWhere(elem);
452 }
453 
parseText(StringData name,BSONElement elem,const boost::intrusive_ptr<ExpressionContext> & expCtx,const ExtensionsCallback * extensionsCallback,MatchExpressionParser::AllowedFeatureSet allowedFeatures,DocumentParseLevel currentLevel)454 StatusWithMatchExpression parseText(StringData name,
455                                     BSONElement elem,
456                                     const boost::intrusive_ptr<ExpressionContext>& expCtx,
457                                     const ExtensionsCallback* extensionsCallback,
458                                     MatchExpressionParser::AllowedFeatureSet allowedFeatures,
459                                     DocumentParseLevel currentLevel) {
460     if (currentLevel == DocumentParseLevel::kUserSubDocument) {
461         return {
462             Status(ErrorCodes::BadValue, "$text can only be applied to the top-level document")};
463     }
464 
465     if ((allowedFeatures & MatchExpressionParser::AllowedFeatures::kText) == 0u) {
466         return {Status(ErrorCodes::BadValue, "$text is not allowed in this context")};
467     }
468 
469     return extensionsCallback->parseText(elem);
470 }
471 
parseDBRef(StringData name,BSONElement elem,const boost::intrusive_ptr<ExpressionContext> & expCtx,const ExtensionsCallback * extensionsCallback,MatchExpressionParser::AllowedFeatureSet allowedFeatures,DocumentParseLevel currentLevel)472 StatusWithMatchExpression parseDBRef(StringData name,
473                                      BSONElement elem,
474                                      const boost::intrusive_ptr<ExpressionContext>& expCtx,
475                                      const ExtensionsCallback* extensionsCallback,
476                                      MatchExpressionParser::AllowedFeatureSet allowedFeatures,
477                                      DocumentParseLevel currentLevel) {
478     auto eq = stdx::make_unique<EqualityMatchExpression>();
479     auto s = eq->init(elem.fieldName(), elem);
480     if (!s.isOK()) {
481         return s;
482     }
483     // 'id' is collation-aware. 'ref' and 'db' are compared using binary comparison.
484     eq->setCollator("id"_sd == name ? expCtx->getCollator() : nullptr);
485 
486     return {std::move(eq)};
487 }
488 
parseJSONSchema(StringData name,BSONElement elem,const boost::intrusive_ptr<ExpressionContext> & expCtx,const ExtensionsCallback * extensionsCallback,MatchExpressionParser::AllowedFeatureSet allowedFeatures,DocumentParseLevel currentLevel)489 StatusWithMatchExpression parseJSONSchema(StringData name,
490                                           BSONElement elem,
491                                           const boost::intrusive_ptr<ExpressionContext>& expCtx,
492                                           const ExtensionsCallback* extensionsCallback,
493                                           MatchExpressionParser::AllowedFeatureSet allowedFeatures,
494                                           DocumentParseLevel currentLevel) {
495     if ((allowedFeatures & MatchExpressionParser::AllowedFeatures::kJSONSchema) == 0u) {
496         return Status(ErrorCodes::QueryFeatureNotAllowed,
497                       "$jsonSchema is not allowed in this context");
498     }
499 
500     if (elem.type() != BSONType::Object) {
501         return {Status(ErrorCodes::TypeMismatch, "$jsonSchema must be an object")};
502     }
503 
504     return JSONSchemaParser::parse(elem.Obj(), internalQueryIgnoreUnknownJSONSchemaKeywords.load());
505 }
506 
507 template <class T>
parseAlwaysBoolean(StringData name,BSONElement elem,const boost::intrusive_ptr<ExpressionContext> & expCtx,const ExtensionsCallback * extensionsCallback,MatchExpressionParser::AllowedFeatureSet allowedFeatures,DocumentParseLevel currentLevel)508 StatusWithMatchExpression parseAlwaysBoolean(
509     StringData name,
510     BSONElement elem,
511     const boost::intrusive_ptr<ExpressionContext>& expCtx,
512     const ExtensionsCallback* extensionsCallback,
513     MatchExpressionParser::AllowedFeatureSet allowedFeatures,
514     DocumentParseLevel currentLevel) {
515     auto statusWithLong = MatchExpressionParser::parseIntegerElementToLong(elem);
516     if (!statusWithLong.isOK()) {
517         return statusWithLong.getStatus();
518     }
519 
520     if (statusWithLong.getValue() != 1) {
521         return {Status(ErrorCodes::FailedToParse,
522                        str::stream() << T::kName << " must be an integer value of 1")};
523     }
524 
525     return {stdx::make_unique<T>()};
526 }
527 
parseExpr(StringData name,BSONElement elem,const boost::intrusive_ptr<ExpressionContext> & expCtx,const ExtensionsCallback * extensionsCallback,MatchExpressionParser::AllowedFeatureSet allowedFeatures,DocumentParseLevel currentLevel)528 StatusWithMatchExpression parseExpr(StringData name,
529                                     BSONElement elem,
530                                     const boost::intrusive_ptr<ExpressionContext>& expCtx,
531                                     const ExtensionsCallback* extensionsCallback,
532                                     MatchExpressionParser::AllowedFeatureSet allowedFeatures,
533                                     DocumentParseLevel currentLevel) {
534     if (currentLevel == DocumentParseLevel::kUserSubDocument) {
535         return {
536             Status(ErrorCodes::BadValue, "$expr can only be applied to the top-level document")};
537     }
538 
539     if ((allowedFeatures & MatchExpressionParser::AllowedFeatures::kExpr) == 0u) {
540         return {Status(ErrorCodes::QueryFeatureNotAllowed, "$expr is not allowed in this context")};
541     }
542 
543     return {stdx::make_unique<ExprMatchExpression>(std::move(elem), expCtx)};
544 }
545 
parseMOD(StringData name,BSONElement e)546 StatusWithMatchExpression parseMOD(StringData name, BSONElement e) {
547     if (e.type() != BSONType::Array)
548         return {Status(ErrorCodes::BadValue, "malformed mod, needs to be an array")};
549 
550     BSONObjIterator i(e.Obj());
551 
552     if (!i.more())
553         return {Status(ErrorCodes::BadValue, "malformed mod, not enough elements")};
554     auto d = i.next();
555     if (!d.isNumber())
556         return {Status(ErrorCodes::BadValue, "malformed mod, divisor not a number")};
557 
558     if (!i.more())
559         return {Status(ErrorCodes::BadValue, "malformed mod, not enough elements")};
560     auto r = i.next();
561     if (!d.isNumber())
562         return {Status(ErrorCodes::BadValue, "malformed mod, remainder not a number")};
563 
564     if (i.more())
565         return {Status(ErrorCodes::BadValue, "malformed mod, too many elements")};
566 
567     auto temp = stdx::make_unique<ModMatchExpression>();
568     auto s = temp->init(
569         name, ModMatchExpression::truncateToLong(d), ModMatchExpression::truncateToLong(r));
570     if (!s.isOK())
571         return s;
572     return {std::move(temp)};
573 }
574 
parseRegexDocument(StringData name,const BSONObj & doc)575 StatusWithMatchExpression parseRegexDocument(StringData name, const BSONObj& doc) {
576     StringData regex;
577     StringData regexOptions;
578 
579     for (auto e : doc) {
580         auto matchType = MatchExpressionParser::parsePathAcceptingKeyword(e);
581         if (!matchType) {
582             continue;
583         }
584 
585         switch (*matchType) {
586             case PathAcceptingKeyword::REGEX:
587                 if (e.type() == BSONType::String) {
588                     regex = e.valueStringData();
589                 } else if (e.type() == BSONType::RegEx) {
590                     regex = e.regex();
591                     regexOptions = e.regexFlags();
592                 } else {
593                     return {Status(ErrorCodes::BadValue, "$regex has to be a string")};
594                 }
595 
596                 break;
597             case PathAcceptingKeyword::OPTIONS:
598                 if (e.type() != BSONType::String)
599                     return {Status(ErrorCodes::BadValue, "$options has to be a string")};
600                 regexOptions = e.valueStringData();
601                 break;
602             default:
603                 break;
604         }
605     }
606 
607     auto temp = stdx::make_unique<RegexMatchExpression>();
608     auto s = temp->init(name, regex, regexOptions);
609     if (!s.isOK())
610         return s;
611     return {std::move(temp)};
612 }
613 
parseInExpression(InMatchExpression * inExpression,const BSONObj & theArray,const boost::intrusive_ptr<ExpressionContext> & expCtx)614 Status parseInExpression(InMatchExpression* inExpression,
615                          const BSONObj& theArray,
616                          const boost::intrusive_ptr<ExpressionContext>& expCtx) {
617     inExpression->setCollator(expCtx->getCollator());
618     std::vector<BSONElement> equalities;
619     for (auto e : theArray) {
620         // Allow DBRefs, but reject all fields with names starting with $.
621         if (isExpressionDocument(e, false)) {
622             return Status(ErrorCodes::BadValue, "cannot nest $ under $in");
623         }
624 
625         if (e.type() == BSONType::RegEx) {
626             auto r = stdx::make_unique<RegexMatchExpression>();
627             auto s = r->init("", e);
628             if (!s.isOK())
629                 return s;
630             s = inExpression->addRegex(std::move(r));
631             if (!s.isOK())
632                 return s;
633         } else {
634             equalities.push_back(e);
635         }
636     }
637     return inExpression->setEqualities(std::move(equalities));
638 }
639 
640 template <class T>
parseType(StringData name,BSONElement elt)641 StatusWithMatchExpression parseType(StringData name, BSONElement elt) {
642     auto typeSet = MatcherTypeSet::parse(elt, MatcherTypeSet::kTypeAliasMap);
643     if (!typeSet.isOK()) {
644         return typeSet.getStatus();
645     }
646 
647     auto typeExpr = stdx::make_unique<T>();
648 
649     if (typeSet.getValue().isEmpty()) {
650         return {Status(ErrorCodes::FailedToParse,
651                        str::stream() << typeExpr->name() << " must match at least one type")};
652     }
653 
654     auto status = typeExpr->init(name, std::move(typeSet.getValue()));
655     if (!status.isOK()) {
656         return status;
657     }
658 
659     return {std::move(typeExpr)};
660 }
661 
662 /**
663  * Converts 'theArray', a BSONArray of integers, into a std::vector of integers.
664  */
parseBitPositionsArray(const BSONObj & theArray)665 StatusWith<std::vector<uint32_t>> parseBitPositionsArray(const BSONObj& theArray) {
666     std::vector<uint32_t> bitPositions;
667 
668     // Fill temporary bit position array with integers read from the BSON array.
669     for (auto e : theArray) {
670         if (!e.isNumber()) {
671             return Status(ErrorCodes::BadValue,
672                           str::stream() << "bit positions must be an integer but got: " << e);
673         }
674 
675         if (e.type() == BSONType::NumberDouble) {
676             auto eDouble = e.numberDouble();
677 
678             // NaN doubles are rejected.
679             if (std::isnan(eDouble)) {
680                 return Status(ErrorCodes::BadValue,
681                               str::stream() << "bit positions cannot take a NaN: " << e);
682             }
683 
684             // This makes sure e does not overflow a 32-bit integer container.
685             if (eDouble > std::numeric_limits<int>::max() ||
686                 eDouble < std::numeric_limits<int>::min()) {
687                 return Status(
688                     ErrorCodes::BadValue,
689                     str::stream()
690                         << "bit positions cannot be represented as a 32-bit signed integer: "
691                         << e);
692             }
693 
694             // This checks if e is integral.
695             if (eDouble != static_cast<double>(static_cast<long long>(eDouble))) {
696                 return Status(ErrorCodes::BadValue,
697                               str::stream() << "bit positions must be an integer but got: " << e);
698             }
699         }
700 
701         if (e.type() == BSONType::NumberLong) {
702             auto eLong = e.numberLong();
703 
704             // This makes sure e does not overflow a 32-bit integer container.
705             if (eLong > std::numeric_limits<int>::max() ||
706                 eLong < std::numeric_limits<int>::min()) {
707                 return Status(
708                     ErrorCodes::BadValue,
709                     str::stream()
710                         << "bit positions cannot be represented as a 32-bit signed integer: "
711                         << e);
712             }
713         }
714 
715         auto eValue = e.numberInt();
716 
717         // No negatives.
718         if (eValue < 0) {
719             return Status(ErrorCodes::BadValue,
720                           str::stream() << "bit positions must be >= 0 but got: " << e);
721         }
722 
723         bitPositions.push_back(eValue);
724     }
725 
726     return bitPositions;
727 }
728 
729 /**
730  * Parses 'e' into a BitTestMatchExpression.
731  */
732 template <class T>
parseBitTest(StringData name,BSONElement e)733 StatusWithMatchExpression parseBitTest(StringData name, BSONElement e) {
734     auto bitTestMatchExpression = stdx::make_unique<T>();
735 
736     if (e.type() == BSONType::Array) {
737         // Array of bit positions provided as value.
738         auto statusWithBitPositions = parseBitPositionsArray(e.Obj());
739         if (!statusWithBitPositions.isOK()) {
740             return statusWithBitPositions.getStatus();
741         }
742 
743         std::vector<uint32_t> bitPositions = statusWithBitPositions.getValue();
744         auto s = bitTestMatchExpression->init(name, bitPositions);
745         if (!s.isOK()) {
746             return s;
747         }
748     } else if (e.isNumber()) {
749         // Integer bitmask provided as value.
750         auto bitMask = MatchExpressionParser::parseIntegerElementToNonNegativeLong(e);
751         if (!bitMask.isOK()) {
752             return bitMask.getStatus();
753         }
754 
755         auto s = bitTestMatchExpression->init(name, bitMask.getValue());
756         if (!s.isOK()) {
757             return s;
758         }
759     } else if (e.type() == BSONType::BinData) {
760         // Binary bitmask provided as value.
761 
762         int eBinaryLen;
763         auto eBinary = e.binData(eBinaryLen);
764 
765         auto s = bitTestMatchExpression->init(name, eBinary, eBinaryLen);
766         if (!s.isOK()) {
767             return s;
768         }
769     } else {
770         return Status(
771             ErrorCodes::BadValue,
772             str::stream() << name << " takes an Array, a number, or a BinData but received: " << e);
773     }
774 
775     return {std::move(bitTestMatchExpression)};
776 }
777 
parseInternalSchemaFmod(StringData name,BSONElement elem)778 StatusWithMatchExpression parseInternalSchemaFmod(StringData name, BSONElement elem) {
779     auto path(name);
780     if (elem.type() != BSONType::Array)
781         return {ErrorCodes::BadValue,
782                 str::stream() << path << " must be an array, but got type " << elem.type()};
783 
784     BSONObjIterator i(elem.embeddedObject());
785 
786     if (!i.more())
787         return {ErrorCodes::BadValue, str::stream() << path << " does not have enough elements"};
788     auto d = i.next();
789     if (!d.isNumber())
790         return {ErrorCodes::TypeMismatch,
791                 str::stream() << path << " does not have a numeric divisor"};
792 
793     if (!i.more())
794         return {ErrorCodes::BadValue, str::stream() << path << " does not have enough elements"};
795     auto r = i.next();
796     if (!d.isNumber())
797         return {ErrorCodes::TypeMismatch,
798                 str::stream() << path << " does not have a numeric remainder"};
799 
800     if (i.more())
801         return {ErrorCodes::BadValue, str::stream() << path << " has too many elements"};
802 
803     auto result = stdx::make_unique<InternalSchemaFmodMatchExpression>();
804     auto s = result->init(name, d.numberDecimal(), r.numberDecimal());
805     if (!s.isOK())
806         return s;
807     return {std::move(result)};
808 }
809 
parseInternalSchemaRootDocEq(StringData name,BSONElement elem,const boost::intrusive_ptr<ExpressionContext> & expCtx,const ExtensionsCallback * extensionsCallback,MatchExpressionParser::AllowedFeatureSet allowedFeatures,DocumentParseLevel currentLevel)810 StatusWithMatchExpression parseInternalSchemaRootDocEq(
811     StringData name,
812     BSONElement elem,
813     const boost::intrusive_ptr<ExpressionContext>& expCtx,
814     const ExtensionsCallback* extensionsCallback,
815     MatchExpressionParser::AllowedFeatureSet allowedFeatures,
816     DocumentParseLevel currentLevel) {
817     if (currentLevel == DocumentParseLevel::kUserSubDocument) {
818         return {Status(ErrorCodes::FailedToParse,
819                        str::stream() << InternalSchemaRootDocEqMatchExpression::kName
820                                      << " can only be applied to the top level document")};
821     }
822 
823     if (elem.type() != BSONType::Object) {
824         return {Status(ErrorCodes::TypeMismatch,
825                        str::stream() << InternalSchemaRootDocEqMatchExpression::kName
826                                      << " must be an object, found type "
827                                      << elem.type())};
828     }
829     auto rootDocEq = stdx::make_unique<InternalSchemaRootDocEqMatchExpression>();
830     rootDocEq->init(elem.embeddedObject());
831     return {std::move(rootDocEq)};
832 }
833 
834 /**
835  * Parses the given BSONElement into a single integer argument and creates a MatchExpression
836  * of type 'T' that gets initialized with the resulting integer.
837  */
838 template <class T>
parseInternalSchemaSingleIntegerArgument(StringData name,BSONElement elem)839 StatusWithMatchExpression parseInternalSchemaSingleIntegerArgument(StringData name,
840                                                                    BSONElement elem) {
841     auto parsedInt = MatchExpressionParser::parseIntegerElementToNonNegativeLong(elem);
842     if (!parsedInt.isOK()) {
843         return parsedInt.getStatus();
844     }
845 
846     auto matchExpression = stdx::make_unique<T>();
847     auto status = matchExpression->init(name, parsedInt.getValue());
848     if (!status.isOK()) {
849         return status;
850     }
851 
852     return {std::move(matchExpression)};
853 }
854 
855 /**
856  * Same as the parseInternalSchemaSingleIntegerArgument function, but for top-level
857  * operators which don't have paths.
858  */
859 template <class T>
parseTopLevelInternalSchemaSingleIntegerArgument(StringData name,BSONElement elem,const boost::intrusive_ptr<ExpressionContext> & expCtx,const ExtensionsCallback * extensionsCallback,MatchExpressionParser::AllowedFeatureSet allowedFeatures,DocumentParseLevel currentLevel)860 StatusWithMatchExpression parseTopLevelInternalSchemaSingleIntegerArgument(
861     StringData name,
862     BSONElement elem,
863     const boost::intrusive_ptr<ExpressionContext>& expCtx,
864     const ExtensionsCallback* extensionsCallback,
865     MatchExpressionParser::AllowedFeatureSet allowedFeatures,
866     DocumentParseLevel currentLevel) {
867     auto parsedInt = MatchExpressionParser::parseIntegerElementToNonNegativeLong(elem);
868     if (!parsedInt.isOK()) {
869         return parsedInt.getStatus();
870     }
871     auto matchExpression = stdx::make_unique<T>();
872     auto status = matchExpression->init(parsedInt.getValue());
873     if (!status.isOK()) {
874         return status;
875     }
876     return {std::move(matchExpression)};
877 }
878 
879 /**
880  * Looks at the field named 'namePlaceholderFieldName' within 'containingObject' and parses a name
881  * placeholder from that element. 'expressionName' is the name of the expression that requires the
882  * name placeholder and is used to generate helpful error messages.
883  */
parseNamePlaceholder(const BSONObj & containingObject,StringData namePlaceholderFieldName,StringData expressionName)884 StatusWith<StringData> parseNamePlaceholder(const BSONObj& containingObject,
885                                             StringData namePlaceholderFieldName,
886                                             StringData expressionName) {
887     auto namePlaceholderElem = containingObject[namePlaceholderFieldName];
888     if (!namePlaceholderElem) {
889         return {ErrorCodes::FailedToParse,
890                 str::stream() << expressionName << " requires a '" << namePlaceholderFieldName
891                               << "'"};
892     } else if (namePlaceholderElem.type() != BSONType::String) {
893         return {ErrorCodes::TypeMismatch,
894                 str::stream() << expressionName << " requires '" << namePlaceholderFieldName
895                               << "' to be a string, not "
896                               << namePlaceholderElem.type()};
897     }
898     return {namePlaceholderElem.valueStringData()};
899 }
900 
901 /**
902  * Looks at the field named 'exprWithPlaceholderFieldName' within 'containingObject' and parses an
903  * ExpressionWithPlaceholder from that element. Fails if an error occurs during parsing, or if the
904  * ExpressionWithPlaceholder has a different name placeholder than 'expectedPlaceholder'.
905  * 'expressionName' is the name of the expression that requires the ExpressionWithPlaceholder and is
906  * used to generate helpful error messages.
907  */
parseExprWithPlaceholder(const BSONObj & containingObject,StringData exprWithPlaceholderFieldName,StringData expressionName,StringData expectedPlaceholder,const boost::intrusive_ptr<ExpressionContext> & expCtx,const ExtensionsCallback * extensionsCallback,MatchExpressionParser::AllowedFeatureSet allowedFeatures,DocumentParseLevel currentLevel)908 StatusWith<std::unique_ptr<ExpressionWithPlaceholder>> parseExprWithPlaceholder(
909     const BSONObj& containingObject,
910     StringData exprWithPlaceholderFieldName,
911     StringData expressionName,
912     StringData expectedPlaceholder,
913     const boost::intrusive_ptr<ExpressionContext>& expCtx,
914     const ExtensionsCallback* extensionsCallback,
915     MatchExpressionParser::AllowedFeatureSet allowedFeatures,
916     DocumentParseLevel currentLevel) {
917     auto exprWithPlaceholderElem = containingObject[exprWithPlaceholderFieldName];
918     if (!exprWithPlaceholderElem) {
919         return {ErrorCodes::FailedToParse,
920                 str::stream() << expressionName << " requires '" << exprWithPlaceholderFieldName
921                               << "'"};
922     } else if (exprWithPlaceholderElem.type() != BSONType::Object) {
923         return {ErrorCodes::TypeMismatch,
924                 str::stream() << expressionName << " found '" << exprWithPlaceholderFieldName
925                               << "', which is an incompatible type: "
926                               << exprWithPlaceholderElem.type()};
927     }
928 
929     auto filter = parse(exprWithPlaceholderElem.embeddedObject(),
930                         expCtx,
931                         extensionsCallback,
932                         MatchExpressionParser::kBanAllSpecialFeatures,
933                         currentLevel);
934 
935     if (!filter.isOK()) {
936         return filter.getStatus();
937     }
938 
939     auto result = ExpressionWithPlaceholder::make(std::move(filter.getValue()));
940     if (!result.isOK()) {
941         return result.getStatus();
942     }
943 
944     auto placeholder = result.getValue()->getPlaceholder();
945     if (placeholder && (*placeholder != expectedPlaceholder)) {
946         return {ErrorCodes::FailedToParse,
947                 str::stream() << expressionName << " expected a name placeholder of "
948                               << expectedPlaceholder
949                               << ", but '"
950                               << exprWithPlaceholderElem.fieldNameStringData()
951                               << "' has a mismatching placeholder '"
952                               << *placeholder
953                               << "'"};
954     }
955     return result;
956 }
957 
958 StatusWith<std::vector<InternalSchemaAllowedPropertiesMatchExpression::PatternSchema>>
parsePatternProperties(BSONElement patternPropertiesElem,StringData expectedPlaceholder,const boost::intrusive_ptr<ExpressionContext> & expCtx,const ExtensionsCallback * extensionsCallback,MatchExpressionParser::AllowedFeatureSet allowedFeatures,DocumentParseLevel currentLevel)959 parsePatternProperties(BSONElement patternPropertiesElem,
960                        StringData expectedPlaceholder,
961                        const boost::intrusive_ptr<ExpressionContext>& expCtx,
962                        const ExtensionsCallback* extensionsCallback,
963                        MatchExpressionParser::AllowedFeatureSet allowedFeatures,
964                        DocumentParseLevel currentLevel) {
965     if (!patternPropertiesElem) {
966         return {ErrorCodes::FailedToParse,
967                 str::stream() << InternalSchemaAllowedPropertiesMatchExpression::kName
968                               << " requires 'patternProperties'"};
969     } else if (patternPropertiesElem.type() != BSONType::Array) {
970         return {ErrorCodes::TypeMismatch,
971                 str::stream() << InternalSchemaAllowedPropertiesMatchExpression::kName
972                               << " requires 'patternProperties' to be an array, not "
973                               << patternPropertiesElem.type()};
974     }
975 
976     std::vector<InternalSchemaAllowedPropertiesMatchExpression::PatternSchema> patternProperties;
977     for (auto constraintElem : patternPropertiesElem.embeddedObject()) {
978         if (constraintElem.type() != BSONType::Object) {
979             return {ErrorCodes::TypeMismatch,
980                     str::stream() << InternalSchemaAllowedPropertiesMatchExpression::kName
981                                   << " requires 'patternProperties' to be an array of objects"};
982         }
983 
984         auto constraint = constraintElem.embeddedObject();
985         if (constraint.nFields() != 2) {
986             return {ErrorCodes::FailedToParse,
987                     str::stream() << InternalSchemaAllowedPropertiesMatchExpression::kName
988                                   << " requires 'patternProperties' to be an array of objects "
989                                      "containing exactly two fields, 'regex' and 'expression'"};
990         }
991 
992         auto expressionWithPlaceholder =
993             parseExprWithPlaceholder(constraint,
994                                      "expression"_sd,
995                                      InternalSchemaAllowedPropertiesMatchExpression::kName,
996                                      expectedPlaceholder,
997                                      expCtx,
998                                      extensionsCallback,
999                                      allowedFeatures,
1000                                      currentLevel);
1001         if (!expressionWithPlaceholder.isOK()) {
1002             return expressionWithPlaceholder.getStatus();
1003         }
1004 
1005         auto regexElem = constraint["regex"];
1006         if (!regexElem) {
1007             return {
1008                 ErrorCodes::FailedToParse,
1009                 str::stream() << InternalSchemaAllowedPropertiesMatchExpression::kName
1010                               << " requires each object in 'patternProperties' to have a 'regex'"};
1011         }
1012         if (regexElem.type() != BSONType::RegEx) {
1013             return {ErrorCodes::TypeMismatch,
1014                     str::stream() << InternalSchemaAllowedPropertiesMatchExpression::kName
1015                                   << " requires 'patternProperties' to be an array of objects, "
1016                                      "where 'regex' is a regular expression"};
1017         } else if (*regexElem.regexFlags() != '\0') {
1018             return {
1019                 ErrorCodes::BadValue,
1020                 str::stream()
1021                     << InternalSchemaAllowedPropertiesMatchExpression::kName
1022                     << " does not accept regex flags for pattern schemas in 'patternProperties'"};
1023         }
1024 
1025         patternProperties.emplace_back(
1026             InternalSchemaAllowedPropertiesMatchExpression::Pattern(regexElem.regex()),
1027             std::move(expressionWithPlaceholder.getValue()));
1028     }
1029 
1030     return std::move(patternProperties);
1031 }
1032 
parseProperties(BSONElement propertiesElem)1033 StatusWith<boost::container::flat_set<StringData>> parseProperties(BSONElement propertiesElem) {
1034     if (!propertiesElem) {
1035         return {ErrorCodes::FailedToParse,
1036                 str::stream() << InternalSchemaAllowedPropertiesMatchExpression::kName
1037                               << " requires 'properties' to be present"};
1038     } else if (propertiesElem.type() != BSONType::Array) {
1039         return {ErrorCodes::TypeMismatch,
1040                 str::stream() << InternalSchemaAllowedPropertiesMatchExpression::kName
1041                               << " requires 'properties' to be an array, not "
1042                               << propertiesElem.type()};
1043     }
1044 
1045     std::vector<StringData> properties;
1046     for (auto property : propertiesElem.embeddedObject()) {
1047         if (property.type() != BSONType::String) {
1048             return {
1049                 ErrorCodes::TypeMismatch,
1050                 str::stream() << InternalSchemaAllowedPropertiesMatchExpression::kName
1051                               << " requires 'properties' to be an array of strings, but found a "
1052                               << property.type()};
1053         }
1054         properties.push_back(property.valueStringData());
1055     }
1056 
1057     return boost::container::flat_set<StringData>(properties.begin(), properties.end());
1058 }
1059 
parseInternalSchemaAllowedProperties(StringData name,BSONElement elem,const boost::intrusive_ptr<ExpressionContext> & expCtx,const ExtensionsCallback * extensionsCallback,MatchExpressionParser::AllowedFeatureSet allowedFeatures,DocumentParseLevel currentLevel)1060 StatusWithMatchExpression parseInternalSchemaAllowedProperties(
1061     StringData name,
1062     BSONElement elem,
1063     const boost::intrusive_ptr<ExpressionContext>& expCtx,
1064     const ExtensionsCallback* extensionsCallback,
1065     MatchExpressionParser::AllowedFeatureSet allowedFeatures,
1066     DocumentParseLevel currentLevel) {
1067     if (elem.type() != BSONType::Object) {
1068         return {ErrorCodes::TypeMismatch,
1069                 str::stream() << InternalSchemaAllowedPropertiesMatchExpression::kName
1070                               << " must be an object"};
1071     }
1072 
1073     auto subobj = elem.embeddedObject();
1074     if (subobj.nFields() != 4) {
1075         return {ErrorCodes::FailedToParse,
1076                 str::stream() << InternalSchemaAllowedPropertiesMatchExpression::kName
1077                               << " requires exactly four fields: 'properties', 'namePlaceholder', "
1078                                  "'patternProperties' and 'otherwise'"};
1079     }
1080 
1081     auto namePlaceholder = parseNamePlaceholder(
1082         subobj, "namePlaceholder"_sd, InternalSchemaAllowedPropertiesMatchExpression::kName);
1083     if (!namePlaceholder.isOK()) {
1084         return namePlaceholder.getStatus();
1085     }
1086 
1087     auto patternProperties = parsePatternProperties(subobj["patternProperties"],
1088                                                     namePlaceholder.getValue(),
1089                                                     expCtx,
1090                                                     extensionsCallback,
1091                                                     allowedFeatures,
1092                                                     currentLevel);
1093     if (!patternProperties.isOK()) {
1094         return patternProperties.getStatus();
1095     }
1096 
1097     auto otherwise = parseExprWithPlaceholder(subobj,
1098                                               "otherwise"_sd,
1099                                               InternalSchemaAllowedPropertiesMatchExpression::kName,
1100                                               namePlaceholder.getValue(),
1101                                               expCtx,
1102                                               extensionsCallback,
1103                                               allowedFeatures,
1104                                               currentLevel);
1105     if (!otherwise.isOK()) {
1106         return otherwise.getStatus();
1107     }
1108 
1109     auto properties = parseProperties(subobj["properties"]);
1110     if (!properties.isOK()) {
1111         return properties.getStatus();
1112     }
1113 
1114     auto allowedPropertiesExpr =
1115         stdx::make_unique<InternalSchemaAllowedPropertiesMatchExpression>();
1116     auto status = allowedPropertiesExpr->init(std::move(properties.getValue()),
1117                                               namePlaceholder.getValue(),
1118                                               std::move(patternProperties.getValue()),
1119                                               std::move(otherwise.getValue()));
1120     if (!status.isOK()) {
1121         return status;
1122     }
1123 
1124     return {std::move(allowedPropertiesExpr)};
1125 }
1126 
1127 /**
1128  * Parses 'elem' into an InternalSchemaMatchArrayIndexMatchExpression.
1129  */
parseInternalSchemaMatchArrayIndex(StringData path,BSONElement elem,const boost::intrusive_ptr<ExpressionContext> & expCtx,const ExtensionsCallback * extensionsCallback,MatchExpressionParser::AllowedFeatureSet allowedFeatures,DocumentParseLevel currentLevel)1130 StatusWithMatchExpression parseInternalSchemaMatchArrayIndex(
1131     StringData path,
1132     BSONElement elem,
1133     const boost::intrusive_ptr<ExpressionContext>& expCtx,
1134     const ExtensionsCallback* extensionsCallback,
1135     MatchExpressionParser::AllowedFeatureSet allowedFeatures,
1136     DocumentParseLevel currentLevel) {
1137     if (elem.type() != BSONType::Object) {
1138         return {ErrorCodes::TypeMismatch,
1139                 str::stream() << InternalSchemaMatchArrayIndexMatchExpression::kName
1140                               << " must be an object"};
1141     }
1142 
1143     auto subobj = elem.embeddedObject();
1144     if (subobj.nFields() != 3) {
1145         return {ErrorCodes::FailedToParse,
1146                 str::stream() << InternalSchemaMatchArrayIndexMatchExpression::kName
1147                               << " requires exactly three fields: 'index', "
1148                                  "'namePlaceholder' and 'expression'"};
1149     }
1150 
1151     auto index = MatchExpressionParser::parseIntegerElementToNonNegativeLong(subobj["index"]);
1152     if (!index.isOK()) {
1153         return index.getStatus();
1154     }
1155 
1156     auto namePlaceholder = parseNamePlaceholder(
1157         subobj, "namePlaceholder"_sd, InternalSchemaMatchArrayIndexMatchExpression::kName);
1158     if (!namePlaceholder.isOK()) {
1159         return namePlaceholder.getStatus();
1160     }
1161 
1162     auto expressionWithPlaceholder =
1163         parseExprWithPlaceholder(subobj,
1164                                  "expression"_sd,
1165                                  InternalSchemaMatchArrayIndexMatchExpression::kName,
1166                                  namePlaceholder.getValue(),
1167                                  expCtx,
1168                                  extensionsCallback,
1169                                  allowedFeatures,
1170                                  currentLevel);
1171     if (!expressionWithPlaceholder.isOK()) {
1172         return expressionWithPlaceholder.getStatus();
1173     }
1174 
1175     auto matchArrayIndexExpr = stdx::make_unique<InternalSchemaMatchArrayIndexMatchExpression>();
1176     auto initStatus = matchArrayIndexExpr->init(
1177         path, index.getValue(), std::move(expressionWithPlaceholder.getValue()));
1178     if (!initStatus.isOK()) {
1179         return initStatus;
1180     }
1181     return {std::move(matchArrayIndexExpr)};
1182 }
1183 
parseGeo(StringData name,PathAcceptingKeyword type,const BSONObj & section,MatchExpressionParser::AllowedFeatureSet allowedFeatures)1184 StatusWithMatchExpression parseGeo(StringData name,
1185                                    PathAcceptingKeyword type,
1186                                    const BSONObj& section,
1187                                    MatchExpressionParser::AllowedFeatureSet allowedFeatures) {
1188     if (PathAcceptingKeyword::WITHIN == type || PathAcceptingKeyword::GEO_INTERSECTS == type) {
1189         auto gq = stdx::make_unique<GeoExpression>(name.toString());
1190         auto parseStatus = gq->parseFrom(section);
1191 
1192         if (!parseStatus.isOK())
1193             return StatusWithMatchExpression(parseStatus);
1194 
1195         auto e = stdx::make_unique<GeoMatchExpression>();
1196 
1197         auto s = e->init(name, gq.release(), section);
1198         if (!s.isOK())
1199             return StatusWithMatchExpression(s);
1200         return {std::move(e)};
1201     } else {
1202         invariant(PathAcceptingKeyword::GEO_NEAR == type);
1203 
1204         if ((allowedFeatures & MatchExpressionParser::AllowedFeatures::kGeoNear) == 0u) {
1205             return {Status(ErrorCodes::BadValue,
1206                            "$geoNear, $near, and $nearSphere are not allowed in this context")};
1207         }
1208 
1209         auto nq = stdx::make_unique<GeoNearExpression>(name.toString());
1210         auto s = nq->parseFrom(section);
1211         if (!s.isOK()) {
1212             return StatusWithMatchExpression(s);
1213         }
1214         auto e = stdx::make_unique<GeoNearMatchExpression>();
1215         s = e->init(name, nq.release(), section);
1216         if (!s.isOK())
1217             return StatusWithMatchExpression(s);
1218         return {std::move(e)};
1219     }
1220 }
1221 
1222 template <class T>
parseTreeTopLevel(StringData name,BSONElement elem,const boost::intrusive_ptr<ExpressionContext> & expCtx,const ExtensionsCallback * extensionsCallback,MatchExpressionParser::AllowedFeatureSet allowedFeatures,DocumentParseLevel currentLevel)1223 StatusWithMatchExpression parseTreeTopLevel(
1224     StringData name,
1225     BSONElement elem,
1226     const boost::intrusive_ptr<ExpressionContext>& expCtx,
1227     const ExtensionsCallback* extensionsCallback,
1228     MatchExpressionParser::AllowedFeatureSet allowedFeatures,
1229     DocumentParseLevel currentLevel) {
1230     if (elem.type() != BSONType::Array) {
1231         return {Status(ErrorCodes::BadValue, str::stream() << T::kName << " must be an array")};
1232     }
1233 
1234     auto temp = stdx::make_unique<T>();
1235 
1236     auto arr = elem.Obj();
1237     if (arr.isEmpty()) {
1238         return Status(ErrorCodes::BadValue, "$and/$or/$nor must be a nonempty array");
1239     }
1240 
1241     for (auto e : arr) {
1242         if (e.type() != BSONType::Object)
1243             return Status(ErrorCodes::BadValue, "$or/$and/$nor entries need to be full objects");
1244 
1245         auto sub = parse(e.Obj(), expCtx, extensionsCallback, allowedFeatures, currentLevel);
1246         if (!sub.isOK())
1247             return sub.getStatus();
1248 
1249         temp->add(sub.getValue().release());
1250     }
1251 
1252     return {std::move(temp)};
1253 }
1254 
parseElemMatch(StringData name,BSONElement e,const boost::intrusive_ptr<ExpressionContext> & expCtx,const ExtensionsCallback * extensionsCallback,MatchExpressionParser::AllowedFeatureSet allowedFeatures)1255 StatusWithMatchExpression parseElemMatch(StringData name,
1256                                          BSONElement e,
1257                                          const boost::intrusive_ptr<ExpressionContext>& expCtx,
1258                                          const ExtensionsCallback* extensionsCallback,
1259                                          MatchExpressionParser::AllowedFeatureSet allowedFeatures) {
1260     if (e.type() != BSONType::Object)
1261         return {Status(ErrorCodes::BadValue, "$elemMatch needs an Object")};
1262 
1263     auto obj = e.Obj();
1264 
1265     // $elemMatch value case applies when the children all
1266     // work on the field 'name'.
1267     // This is the case when:
1268     //     1) the argument is an expression document; and
1269     //     2) expression is not a AND/NOR/OR logical operator. Children of
1270     //        these logical operators are initialized with field names.
1271     //     3) expression is not a WHERE operator. WHERE works on objects instead
1272     //        of specific field.
1273     bool isElemMatchValue = false;
1274     if (isExpressionDocument(e, true)) {
1275         auto elt = obj.firstElement();
1276         invariant(elt);
1277 
1278         isElemMatchValue = !retrievePathlessParser(elt.fieldNameStringData().substr(1));
1279     }
1280 
1281     if (isElemMatchValue) {
1282         // Value case.
1283 
1284         AndMatchExpression theAnd;
1285         auto s = parseSub("",
1286                           obj,
1287                           &theAnd,
1288                           expCtx,
1289                           extensionsCallback,
1290                           allowedFeatures,
1291                           DocumentParseLevel::kUserSubDocument);
1292         if (!s.isOK())
1293             return s;
1294 
1295         auto temp = stdx::make_unique<ElemMatchValueMatchExpression>();
1296         s = temp->init(name);
1297         if (!s.isOK())
1298             return s;
1299 
1300         for (size_t i = 0; i < theAnd.numChildren(); i++) {
1301             temp->add(theAnd.getChild(i));
1302         }
1303         theAnd.clearAndRelease();
1304 
1305         return {std::move(temp)};
1306     }
1307 
1308     // DBRef value case
1309     // A DBRef document under a $elemMatch should be treated as an object case because it may
1310     // contain non-DBRef fields in addition to $ref, $id and $db.
1311 
1312     // Object case.
1313 
1314     auto subRaw = parse(
1315         obj, expCtx, extensionsCallback, allowedFeatures, DocumentParseLevel::kUserSubDocument);
1316     if (!subRaw.isOK())
1317         return subRaw;
1318     auto sub = std::move(subRaw.getValue());
1319 
1320     // $where is not supported under $elemMatch because $where applies to top-level document, not
1321     // array elements in a field.
1322     if (hasNode(sub.get(), MatchExpression::WHERE)) {
1323         return {Status(ErrorCodes::BadValue, "$elemMatch cannot contain $where expression")};
1324     }
1325 
1326     auto temp = stdx::make_unique<ElemMatchObjectMatchExpression>();
1327     auto status = temp->init(name, sub.release());
1328     if (!status.isOK())
1329         return status;
1330 
1331     return {std::move(temp)};
1332 }
1333 
parseAll(StringData name,BSONElement e,const boost::intrusive_ptr<ExpressionContext> & expCtx,const ExtensionsCallback * extensionsCallback,MatchExpressionParser::AllowedFeatureSet allowedFeatures)1334 StatusWithMatchExpression parseAll(StringData name,
1335                                    BSONElement e,
1336                                    const boost::intrusive_ptr<ExpressionContext>& expCtx,
1337                                    const ExtensionsCallback* extensionsCallback,
1338                                    MatchExpressionParser::AllowedFeatureSet allowedFeatures) {
1339     if (e.type() != BSONType::Array)
1340         return {Status(ErrorCodes::BadValue, "$all needs an array")};
1341 
1342     auto arr = e.Obj();
1343     auto myAnd = stdx::make_unique<AndMatchExpression>();
1344     BSONObjIterator i(arr);
1345 
1346     if (arr.firstElement().type() == BSONType::Object &&
1347         "$elemMatch"_sd == arr.firstElement().Obj().firstElement().fieldNameStringData()) {
1348         // $all : [ { $elemMatch : {} } ... ]
1349 
1350         while (i.more()) {
1351             auto hopefullyElemMatchElement = i.next();
1352 
1353             if (hopefullyElemMatchElement.type() != BSONType::Object) {
1354                 // $all : [ { $elemMatch : ... }, 5 ]
1355                 return {Status(ErrorCodes::BadValue, "$all/$elemMatch has to be consistent")};
1356             }
1357 
1358             auto hopefullyElemMatchObj = hopefullyElemMatchElement.Obj();
1359             if ("$elemMatch"_sd != hopefullyElemMatchObj.firstElement().fieldNameStringData()) {
1360                 // $all : [ { $elemMatch : ... }, { x : 5 } ]
1361                 return {Status(ErrorCodes::BadValue, "$all/$elemMatch has to be consistent")};
1362             }
1363 
1364             auto inner = parseElemMatch(name,
1365                                         hopefullyElemMatchObj.firstElement(),
1366                                         expCtx,
1367                                         extensionsCallback,
1368                                         allowedFeatures);
1369             if (!inner.isOK())
1370                 return inner;
1371             myAnd->add(inner.getValue().release());
1372         }
1373 
1374         return {std::move(myAnd)};
1375     }
1376 
1377     while (i.more()) {
1378         auto e = i.next();
1379 
1380         if (e.type() == BSONType::RegEx) {
1381             auto r = stdx::make_unique<RegexMatchExpression>();
1382             auto s = r->init(name, e);
1383             if (!s.isOK())
1384                 return s;
1385             myAnd->add(r.release());
1386         } else if (e.type() == BSONType::Object &&
1387                    MatchExpressionParser::parsePathAcceptingKeyword(e.Obj().firstElement())) {
1388             return {Status(ErrorCodes::BadValue, "no $ expressions in $all")};
1389         } else {
1390             auto x = stdx::make_unique<EqualityMatchExpression>();
1391             auto s = x->init(name, e);
1392             if (!s.isOK())
1393                 return s;
1394             x->setCollator(expCtx->getCollator());
1395             myAnd->add(x.release());
1396         }
1397     }
1398 
1399     if (myAnd->numChildren() == 0) {
1400         return {stdx::make_unique<AlwaysFalseMatchExpression>()};
1401     }
1402 
1403     return {std::move(myAnd)};
1404 }
1405 
1406 /**
1407  * Parses a MatchExpression which takes a fixed-size array of MatchExpressions as arguments.
1408  */
1409 template <class T>
parseInternalSchemaFixedArityArgument(StringData name,BSONElement elem,const boost::intrusive_ptr<ExpressionContext> & expCtx,const ExtensionsCallback * extensionsCallback,MatchExpressionParser::AllowedFeatureSet allowedFeatures,DocumentParseLevel currentLevel)1410 StatusWithMatchExpression parseInternalSchemaFixedArityArgument(
1411     StringData name,
1412     BSONElement elem,
1413     const boost::intrusive_ptr<ExpressionContext>& expCtx,
1414     const ExtensionsCallback* extensionsCallback,
1415     MatchExpressionParser::AllowedFeatureSet allowedFeatures,
1416     DocumentParseLevel currentLevel) {
1417     constexpr auto arity = T::arity();
1418     if (elem.type() != BSONType::Array) {
1419         return {ErrorCodes::FailedToParse,
1420                 str::stream() << elem.fieldNameStringData() << " must be an array of " << arity
1421                               << " MatchExpressions"};
1422     }
1423 
1424     auto inputObj = elem.embeddedObject();
1425     if (static_cast<size_t>(inputObj.nFields()) != arity) {
1426         return {ErrorCodes::FailedToParse,
1427                 str::stream() << elem.fieldNameStringData() << " requires exactly " << arity
1428                               << " MatchExpressions, but got "
1429                               << inputObj.nFields()};
1430     }
1431 
1432     // Fill out 'expressions' with all of the parsed subexpressions contained in the array,
1433     // tracking our location in the array with 'position'.
1434     std::array<std::unique_ptr<MatchExpression>, arity> expressions;
1435     auto position = expressions.begin();
1436 
1437     for (auto obj : inputObj) {
1438         if (obj.type() != BSONType::Object) {
1439             return {ErrorCodes::FailedToParse,
1440                     str::stream() << elem.fieldNameStringData()
1441                                   << " must be an array of objects, but found an element of type "
1442                                   << obj.type()};
1443         }
1444 
1445         auto subexpr =
1446             parse(obj.embeddedObject(), expCtx, extensionsCallback, allowedFeatures, currentLevel);
1447         if (!subexpr.isOK()) {
1448             return subexpr.getStatus();
1449         }
1450         *position = std::move(subexpr.getValue());
1451         ++position;
1452     }
1453 
1454     auto parsedExpression = stdx::make_unique<T>();
1455     parsedExpression->init(std::move(expressions));
1456     return {std::move(parsedExpression)};
1457 }
1458 
parseNot(StringData name,BSONElement elem,const boost::intrusive_ptr<ExpressionContext> & expCtx,const ExtensionsCallback * extensionsCallback,MatchExpressionParser::AllowedFeatureSet allowedFeatures,DocumentParseLevel currentLevel)1459 StatusWithMatchExpression parseNot(StringData name,
1460                                    BSONElement elem,
1461                                    const boost::intrusive_ptr<ExpressionContext>& expCtx,
1462                                    const ExtensionsCallback* extensionsCallback,
1463                                    MatchExpressionParser::AllowedFeatureSet allowedFeatures,
1464                                    DocumentParseLevel currentLevel) {
1465     if (elem.type() == BSONType::RegEx) {
1466         auto s = parseRegexElement(name, elem);
1467         if (!s.isOK())
1468             return s;
1469         auto n = stdx::make_unique<NotMatchExpression>();
1470         auto s2 = n->init(s.getValue().release());
1471         if (!s2.isOK())
1472             return StatusWithMatchExpression(s2);
1473         return {std::move(n)};
1474     }
1475 
1476     if (elem.type() != BSONType::Object)
1477         return StatusWithMatchExpression(ErrorCodes::BadValue, "$not needs a regex or a document");
1478 
1479     auto notObject = elem.Obj();
1480     if (notObject.isEmpty())
1481         return StatusWithMatchExpression(ErrorCodes::BadValue, "$not cannot be empty");
1482 
1483     auto theAnd = stdx::make_unique<AndMatchExpression>();
1484     auto s = parseSub(
1485         name, notObject, theAnd.get(), expCtx, extensionsCallback, allowedFeatures, currentLevel);
1486     if (!s.isOK())
1487         return StatusWithMatchExpression(s);
1488 
1489     for (size_t i = 0; i < theAnd->numChildren(); i++)
1490         if (theAnd->getChild(i)->matchType() == MatchExpression::REGEX)
1491             return StatusWithMatchExpression(ErrorCodes::BadValue, "$not cannot have a regex");
1492 
1493     auto theNot = stdx::make_unique<NotMatchExpression>();
1494     s = theNot->init(theAnd.release());
1495     if (!s.isOK())
1496         return StatusWithMatchExpression(s);
1497 
1498     return {std::move(theNot)};
1499 }
1500 
1501 /**
1502  * Parses a single field in a sub expression.
1503  * If the query is { x : { $gt : 5, $lt : 8 } },
1504  * 'e' is $gt : 5
1505  */
parseSubField(const BSONObj & context,const AndMatchExpression * andSoFar,StringData name,BSONElement e,const boost::intrusive_ptr<ExpressionContext> & expCtx,const ExtensionsCallback * extensionsCallback,MatchExpressionParser::AllowedFeatureSet allowedFeatures,DocumentParseLevel currentLevel)1506 StatusWithMatchExpression parseSubField(const BSONObj& context,
1507                                         const AndMatchExpression* andSoFar,
1508                                         StringData name,
1509                                         BSONElement e,
1510                                         const boost::intrusive_ptr<ExpressionContext>& expCtx,
1511                                         const ExtensionsCallback* extensionsCallback,
1512                                         MatchExpressionParser::AllowedFeatureSet allowedFeatures,
1513                                         DocumentParseLevel currentLevel) {
1514     invariant(e);
1515 
1516     if ("$eq"_sd == e.fieldNameStringData()) {
1517         return parseComparison(name, new EqualityMatchExpression(), e, expCtx, allowedFeatures);
1518     }
1519 
1520     if ("$not"_sd == e.fieldNameStringData()) {
1521         return parseNot(name, e, expCtx, extensionsCallback, allowedFeatures, currentLevel);
1522     }
1523 
1524     auto parseExpMatchType = MatchExpressionParser::parsePathAcceptingKeyword(e);
1525     if (!parseExpMatchType) {
1526         // $where cannot be a sub-expression because it works on top-level documents only.
1527         if ("$where"_sd == e.fieldNameStringData()) {
1528             return {Status(ErrorCodes::BadValue, "$where cannot be applied to a field")};
1529         }
1530 
1531         return {Status(ErrorCodes::BadValue,
1532                        str::stream() << "unknown operator: " << e.fieldNameStringData())};
1533     }
1534 
1535     switch (*parseExpMatchType) {
1536         case PathAcceptingKeyword::LESS_THAN:
1537             return parseComparison(name, new LTMatchExpression(), e, expCtx, allowedFeatures);
1538         case PathAcceptingKeyword::LESS_THAN_OR_EQUAL:
1539             return parseComparison(name, new LTEMatchExpression(), e, expCtx, allowedFeatures);
1540         case PathAcceptingKeyword::GREATER_THAN:
1541             return parseComparison(name, new GTMatchExpression(), e, expCtx, allowedFeatures);
1542         case PathAcceptingKeyword::GREATER_THAN_OR_EQUAL:
1543             return parseComparison(name, new GTEMatchExpression(), e, expCtx, allowedFeatures);
1544         case PathAcceptingKeyword::NOT_EQUAL: {
1545             if (BSONType::RegEx == e.type()) {
1546                 // Just because $ne can be rewritten as the negation of an equality does not mean
1547                 // that $ne of a regex is allowed. See SERVER-1705.
1548                 return {Status(ErrorCodes::BadValue, "Can't have regex as arg to $ne.")};
1549             }
1550             auto s =
1551                 parseComparison(name, new EqualityMatchExpression(), e, expCtx, allowedFeatures);
1552             if (!s.isOK())
1553                 return s;
1554             auto n = stdx::make_unique<NotMatchExpression>();
1555             auto s2 = n->init(s.getValue().release());
1556             if (!s2.isOK())
1557                 return s2;
1558             return {std::move(n)};
1559         }
1560         case PathAcceptingKeyword::EQUALITY:
1561             return parseComparison(name, new EqualityMatchExpression(), e, expCtx, allowedFeatures);
1562 
1563         case PathAcceptingKeyword::IN_EXPR: {
1564             if (e.type() != BSONType::Array)
1565                 return {Status(ErrorCodes::BadValue, "$in needs an array")};
1566             auto temp = stdx::make_unique<InMatchExpression>();
1567             auto s = temp->init(name);
1568             if (!s.isOK())
1569                 return s;
1570             s = parseInExpression(temp.get(), e.Obj(), expCtx);
1571             if (!s.isOK())
1572                 return s;
1573             return {std::move(temp)};
1574         }
1575 
1576         case PathAcceptingKeyword::NOT_IN: {
1577             if (e.type() != BSONType::Array)
1578                 return {Status(ErrorCodes::BadValue, "$nin needs an array")};
1579             auto temp = stdx::make_unique<InMatchExpression>();
1580             auto s = temp->init(name);
1581             if (!s.isOK())
1582                 return s;
1583             s = parseInExpression(temp.get(), e.Obj(), expCtx);
1584             if (!s.isOK())
1585                 return s;
1586 
1587             auto temp2 = stdx::make_unique<NotMatchExpression>();
1588             s = temp2->init(temp.release());
1589             if (!s.isOK())
1590                 return s;
1591 
1592             return {std::move(temp2)};
1593         }
1594 
1595         case PathAcceptingKeyword::SIZE: {
1596             int size = 0;
1597             if (e.type() == BSONType::NumberInt) {
1598                 size = e.numberInt();
1599             } else if (e.type() == BSONType::NumberLong) {
1600                 if (e.numberInt() == e.numberLong()) {
1601                     size = e.numberInt();
1602                 } else {
1603                     return {Status(ErrorCodes::BadValue,
1604                                    "$size must be representable as a 32-bit integer")};
1605                 }
1606             } else if (e.type() == BSONType::NumberDouble) {
1607                 if (e.numberInt() == e.numberDouble()) {
1608                     size = e.numberInt();
1609                 } else {
1610                     return {Status(ErrorCodes::BadValue, "$size must be a whole number")};
1611                 }
1612             } else {
1613                 return {Status(ErrorCodes::BadValue, "$size needs a number")};
1614             }
1615             if (size < 0) {
1616                 return {Status(ErrorCodes::BadValue, "$size may not be negative")};
1617             }
1618 
1619             auto temp = stdx::make_unique<SizeMatchExpression>();
1620             auto s = temp->init(name, size);
1621             if (!s.isOK())
1622                 return s;
1623             return {std::move(temp)};
1624         }
1625 
1626         case PathAcceptingKeyword::EXISTS: {
1627             if (!e)
1628                 return {Status(ErrorCodes::BadValue, "$exists can't be eoo")};
1629             auto temp = stdx::make_unique<ExistsMatchExpression>();
1630             auto s = temp->init(name);
1631             if (!s.isOK())
1632                 return s;
1633             if (e.trueValue())
1634                 return {std::move(temp)};
1635             auto temp2 = stdx::make_unique<NotMatchExpression>();
1636             s = temp2->init(temp.release());
1637             if (!s.isOK())
1638                 return s;
1639             return {std::move(temp2)};
1640         }
1641 
1642         case PathAcceptingKeyword::TYPE:
1643             return parseType<TypeMatchExpression>(name, e);
1644 
1645         case PathAcceptingKeyword::MOD:
1646             return parseMOD(name, e);
1647 
1648         case PathAcceptingKeyword::OPTIONS: {
1649             // TODO: try to optimize this
1650             // we have to do this since $options can be before or after a $regex
1651             // but we validate here
1652             for (auto temp : context) {
1653                 if (MatchExpressionParser::parsePathAcceptingKeyword(temp) ==
1654                     PathAcceptingKeyword::REGEX)
1655                     return {nullptr};
1656             }
1657 
1658             return {Status(ErrorCodes::BadValue, "$options needs a $regex")};
1659         }
1660 
1661         case PathAcceptingKeyword::REGEX: {
1662             return parseRegexDocument(name, context);
1663         }
1664 
1665         case PathAcceptingKeyword::ELEM_MATCH:
1666             return parseElemMatch(name, e, expCtx, extensionsCallback, allowedFeatures);
1667 
1668         case PathAcceptingKeyword::ALL:
1669             return parseAll(name, e, expCtx, extensionsCallback, allowedFeatures);
1670 
1671         case PathAcceptingKeyword::WITHIN:
1672         case PathAcceptingKeyword::GEO_INTERSECTS:
1673             return parseGeo(name, *parseExpMatchType, context, allowedFeatures);
1674 
1675         case PathAcceptingKeyword::GEO_NEAR:
1676             return {Status(ErrorCodes::BadValue,
1677                            str::stream() << "near must be first in: " << context)};
1678 
1679         case PathAcceptingKeyword::INTERNAL_EXPR_EQ: {
1680             if (e.type() == BSONType::Undefined || e.type() == BSONType::Array) {
1681                 return {Status(ErrorCodes::BadValue,
1682                                str::stream() << InternalExprEqMatchExpression::kName
1683                                              << " cannot be used to compare to type: "
1684                                              << typeName(e.type()))};
1685             }
1686 
1687             auto exprEqExpr = stdx::make_unique<InternalExprEqMatchExpression>();
1688             auto status = exprEqExpr->init(name, e);
1689             if (!status.isOK()) {
1690                 return status;
1691             }
1692             exprEqExpr->setCollator(expCtx->getCollator());
1693             return {std::move(exprEqExpr)};
1694         }
1695 
1696         // Handles bitwise query operators.
1697         case PathAcceptingKeyword::BITS_ALL_SET: {
1698             return parseBitTest<BitsAllSetMatchExpression>(name, e);
1699         }
1700 
1701         case PathAcceptingKeyword::BITS_ALL_CLEAR: {
1702             return parseBitTest<BitsAllClearMatchExpression>(name, e);
1703         }
1704 
1705         case PathAcceptingKeyword::BITS_ANY_SET: {
1706             return parseBitTest<BitsAnySetMatchExpression>(name, e);
1707         }
1708 
1709         case PathAcceptingKeyword::BITS_ANY_CLEAR: {
1710             return parseBitTest<BitsAnyClearMatchExpression>(name, e);
1711         }
1712 
1713         case PathAcceptingKeyword::INTERNAL_SCHEMA_FMOD:
1714             return parseInternalSchemaFmod(name, e);
1715 
1716         case PathAcceptingKeyword::INTERNAL_SCHEMA_MIN_ITEMS: {
1717             return parseInternalSchemaSingleIntegerArgument<InternalSchemaMinItemsMatchExpression>(
1718                 name, e);
1719         }
1720 
1721         case PathAcceptingKeyword::INTERNAL_SCHEMA_MAX_ITEMS: {
1722             return parseInternalSchemaSingleIntegerArgument<InternalSchemaMaxItemsMatchExpression>(
1723                 name, e);
1724         }
1725 
1726         case PathAcceptingKeyword::INTERNAL_SCHEMA_OBJECT_MATCH: {
1727             if (e.type() != BSONType::Object) {
1728                 return Status(ErrorCodes::FailedToParse,
1729                               str::stream() << "$_internalSchemaObjectMatch must be an object");
1730             }
1731 
1732             auto parsedSubObjExpr = parse(e.Obj(),
1733                                           expCtx,
1734                                           extensionsCallback,
1735                                           allowedFeatures,
1736                                           DocumentParseLevel::kUserSubDocument);
1737             if (!parsedSubObjExpr.isOK()) {
1738                 return parsedSubObjExpr;
1739             }
1740 
1741             auto expr = stdx::make_unique<InternalSchemaObjectMatchExpression>();
1742             auto status = expr->init(std::move(parsedSubObjExpr.getValue()), name);
1743             if (!status.isOK()) {
1744                 return status;
1745             }
1746             return {std::move(expr)};
1747         }
1748 
1749         case PathAcceptingKeyword::INTERNAL_SCHEMA_UNIQUE_ITEMS: {
1750             if (!e.isBoolean() || !e.boolean()) {
1751                 return {ErrorCodes::FailedToParse,
1752                         str::stream() << name << " must be a boolean of value true"};
1753             }
1754 
1755             auto expr = stdx::make_unique<InternalSchemaUniqueItemsMatchExpression>();
1756             auto status = expr->init(name);
1757             if (!status.isOK()) {
1758                 return status;
1759             }
1760             return {std::move(expr)};
1761         }
1762 
1763         case PathAcceptingKeyword::INTERNAL_SCHEMA_MIN_LENGTH: {
1764             return parseInternalSchemaSingleIntegerArgument<InternalSchemaMinLengthMatchExpression>(
1765                 name, e);
1766         }
1767 
1768         case PathAcceptingKeyword::INTERNAL_SCHEMA_MAX_LENGTH: {
1769             return parseInternalSchemaSingleIntegerArgument<InternalSchemaMaxLengthMatchExpression>(
1770                 name, e);
1771         }
1772 
1773         case PathAcceptingKeyword::INTERNAL_SCHEMA_MATCH_ARRAY_INDEX: {
1774             return parseInternalSchemaMatchArrayIndex(
1775                 name, e, expCtx, extensionsCallback, allowedFeatures, currentLevel);
1776         }
1777 
1778         case PathAcceptingKeyword::INTERNAL_SCHEMA_ALL_ELEM_MATCH_FROM_INDEX: {
1779             if (e.type() != BSONType::Array) {
1780                 return Status(ErrorCodes::FailedToParse,
1781                               str::stream()
1782                                   << InternalSchemaAllElemMatchFromIndexMatchExpression::kName
1783                                   << " must be an array");
1784             }
1785             auto elemMatchObj = e.embeddedObject();
1786             auto iter = BSONObjIterator(elemMatchObj);
1787             if (!iter.more()) {
1788                 return Status(ErrorCodes::FailedToParse,
1789                               str::stream()
1790                                   << InternalSchemaAllElemMatchFromIndexMatchExpression::kName
1791                                   << " must be an array of size 2");
1792             }
1793             auto first = iter.next();
1794             auto parsedIndex = MatchExpressionParser::parseIntegerElementToNonNegativeLong(first);
1795             if (!parsedIndex.isOK()) {
1796                 return Status(ErrorCodes::TypeMismatch,
1797                               str::stream()
1798                                   << "first element of "
1799                                   << InternalSchemaAllElemMatchFromIndexMatchExpression::kName
1800                                   << " must be a non-negative integer");
1801             }
1802             if (!iter.more()) {
1803                 return Status(ErrorCodes::FailedToParse,
1804                               str::stream()
1805                                   << InternalSchemaAllElemMatchFromIndexMatchExpression::kName
1806                                   << " must be an array of size 2");
1807             }
1808             auto second = iter.next();
1809             if (iter.more()) {
1810                 return Status(ErrorCodes::FailedToParse,
1811                               str::stream()
1812                                   << InternalSchemaAllElemMatchFromIndexMatchExpression::kName
1813                                   << " has too many elements, must be an array of size 2");
1814             }
1815             if (second.type() != BSONType::Object) {
1816                 return Status(ErrorCodes::TypeMismatch,
1817                               str::stream()
1818                                   << "second element of "
1819                                   << InternalSchemaAllElemMatchFromIndexMatchExpression::kName
1820                                   << "must be an object");
1821             }
1822 
1823             auto filter = parse(second.embeddedObject(),
1824                                 expCtx,
1825                                 extensionsCallback,
1826                                 MatchExpressionParser::kBanAllSpecialFeatures,
1827                                 DocumentParseLevel::kUserSubDocument);
1828 
1829             if (!filter.isOK()) {
1830                 return filter.getStatus();
1831             }
1832 
1833             auto exprWithPlaceholder =
1834                 ExpressionWithPlaceholder::make(std::move(filter.getValue()));
1835             if (!exprWithPlaceholder.isOK()) {
1836                 return exprWithPlaceholder.getStatus();
1837             }
1838 
1839             auto expr = stdx::make_unique<InternalSchemaAllElemMatchFromIndexMatchExpression>();
1840             auto status =
1841                 expr->init(name, parsedIndex.getValue(), std::move(exprWithPlaceholder.getValue()));
1842             if (!status.isOK()) {
1843                 return status;
1844             }
1845             return {std::move(expr)};
1846         }
1847 
1848         case PathAcceptingKeyword::INTERNAL_SCHEMA_TYPE: {
1849             return parseType<InternalSchemaTypeExpression>(name, e);
1850         }
1851 
1852         case PathAcceptingKeyword::INTERNAL_SCHEMA_EQ: {
1853             auto eqExpr = stdx::make_unique<InternalSchemaEqMatchExpression>();
1854             auto status = eqExpr->init(name, e);
1855             if (!status.isOK()) {
1856                 return status;
1857             }
1858             return {std::move(eqExpr)};
1859         }
1860     }
1861 
1862     return {
1863         Status(ErrorCodes::BadValue, str::stream() << "not handled: " << e.fieldNameStringData())};
1864 }
1865 
1866 /**
1867  * Parses a field in a sub expression.
1868  * If the query is { x : { $gt : 5, $lt : 8 } },
1869  * 'e' is { $gt : 5, $lt : 8 }
1870  */
parseSub(StringData name,const BSONObj & sub,AndMatchExpression * root,const boost::intrusive_ptr<ExpressionContext> & expCtx,const ExtensionsCallback * extensionsCallback,MatchExpressionParser::AllowedFeatureSet allowedFeatures,DocumentParseLevel currentLevel)1871 Status parseSub(StringData name,
1872                 const BSONObj& sub,
1873                 AndMatchExpression* root,
1874                 const boost::intrusive_ptr<ExpressionContext>& expCtx,
1875                 const ExtensionsCallback* extensionsCallback,
1876                 MatchExpressionParser::AllowedFeatureSet allowedFeatures,
1877                 DocumentParseLevel currentLevel) {
1878     // The one exception to {field : {fully contained argument} } is, of course, geo.  Example:
1879     // sub == { field : {$near[Sphere]: [0,0], $maxDistance: 1000, $minDistance: 10 } }
1880     // We peek inside of 'sub' to see if it's possibly a $near.  If so, we can't iterate over its
1881     // subfields and parse them one at a time (there is no $maxDistance without $near), so we hand
1882     // the entire object over to the geo parsing routines.
1883 
1884     // Special case parsing for geoNear. This is necessary in order to support query formats like
1885     // {$near: <coords>, $maxDistance: <distance>}. No other query operators allow $-prefixed
1886     // modifiers as sibling BSON elements.
1887     BSONObjIterator geoIt(sub);
1888     if (geoIt.more()) {
1889         auto firstElt = geoIt.next();
1890         if (firstElt.isABSONObj()) {
1891             if (MatchExpressionParser::parsePathAcceptingKeyword(firstElt) ==
1892                 PathAcceptingKeyword::GEO_NEAR) {
1893                 auto s = parseGeo(name, PathAcceptingKeyword::GEO_NEAR, sub, allowedFeatures);
1894                 if (s.isOK()) {
1895                     root->add(s.getValue().release());
1896                 }
1897 
1898                 // Propagate geo parsing result to caller.
1899                 return s.getStatus();
1900             }
1901         }
1902     }
1903 
1904     for (auto deep : sub) {
1905         auto s = parseSubField(
1906             sub, root, name, deep, expCtx, extensionsCallback, allowedFeatures, currentLevel);
1907         if (!s.isOK())
1908             return s.getStatus();
1909 
1910         if (s.getValue())
1911             root->add(s.getValue().release());
1912     }
1913 
1914     return Status::OK();
1915 }
1916 
1917 }  // namespace
1918 
parse(const BSONObj & obj,const boost::intrusive_ptr<ExpressionContext> & expCtx,const ExtensionsCallback & extensionsCallback,AllowedFeatureSet allowedFeatures)1919 StatusWithMatchExpression MatchExpressionParser::parse(
1920     const BSONObj& obj,
1921     const boost::intrusive_ptr<ExpressionContext>& expCtx,
1922     const ExtensionsCallback& extensionsCallback,
1923     AllowedFeatureSet allowedFeatures) {
1924     invariant(expCtx.get());
1925     const DocumentParseLevel currentLevelCall = DocumentParseLevel::kPredicateTopLevel;
1926     try {
1927         return ::mongo::parse(obj, expCtx, &extensionsCallback, allowedFeatures, currentLevelCall);
1928     } catch (const DBException& ex) {
1929         return {ex.toStatus()};
1930     }
1931 }
1932 
1933 namespace {
1934 // Maps from query operator string name to function.
1935 std::unique_ptr<StringMap<
1936     stdx::function<StatusWithMatchExpression(StringData,
1937                                              BSONElement,
1938                                              const boost::intrusive_ptr<ExpressionContext>&,
1939                                              const ExtensionsCallback*,
1940                                              MatchExpressionParser::AllowedFeatureSet,
1941                                              DocumentParseLevel)>>>
1942     pathlessOperatorMap;
1943 
MONGO_INITIALIZER(PathlessOperatorMap)1944 MONGO_INITIALIZER(PathlessOperatorMap)(InitializerContext* context) {
1945     pathlessOperatorMap = stdx::make_unique<StringMap<
1946         stdx::function<StatusWithMatchExpression(StringData,
1947                                                  BSONElement,
1948                                                  const boost::intrusive_ptr<ExpressionContext>&,
1949                                                  const ExtensionsCallback*,
1950                                                  MatchExpressionParser::AllowedFeatureSet,
1951                                                  DocumentParseLevel)>>>(
1952         StringMap<
1953             stdx::function<StatusWithMatchExpression(StringData,
1954                                                      BSONElement,
1955                                                      const boost::intrusive_ptr<ExpressionContext>&,
1956                                                      const ExtensionsCallback*,
1957                                                      MatchExpressionParser::AllowedFeatureSet,
1958                                                      DocumentParseLevel)>>{
1959             {"_internalSchemaAllowedProperties", &parseInternalSchemaAllowedProperties},
1960             {"_internalSchemaCond",
1961              &parseInternalSchemaFixedArityArgument<InternalSchemaCondMatchExpression>},
1962             {"_internalSchemaMaxProperties",
1963              &parseTopLevelInternalSchemaSingleIntegerArgument<
1964                  InternalSchemaMaxPropertiesMatchExpression>},
1965             {"_internalSchemaMinProperties",
1966              &parseTopLevelInternalSchemaSingleIntegerArgument<
1967                  InternalSchemaMinPropertiesMatchExpression>},
1968             {"_internalSchemaRootDocEq", &parseInternalSchemaRootDocEq},
1969             {"_internalSchemaXor", &parseTreeTopLevel<InternalSchemaXorMatchExpression>},
1970             {"alwaysFalse", &parseAlwaysBoolean<AlwaysFalseMatchExpression>},
1971             {"alwaysTrue", &parseAlwaysBoolean<AlwaysTrueMatchExpression>},
1972             {"and", &parseTreeTopLevel<AndMatchExpression>},
1973             {"atomic", &parseAtomicOrIsolated},
1974             {"comment", &parseComment},
1975             {"db", &parseDBRef},
1976             {"expr", &parseExpr},
1977             {"id", &parseDBRef},
1978             {"isolated", &parseAtomicOrIsolated},
1979             {"jsonSchema", &parseJSONSchema},
1980             {"nor", &parseTreeTopLevel<NorMatchExpression>},
1981             {"or", &parseTreeTopLevel<OrMatchExpression>},
1982             {"ref", &parseDBRef},
1983             {"text", &parseText},
1984             {"where", &parseWhere},
1985         });
1986     return Status::OK();
1987 }
1988 
1989 // Maps from query operator string name to operator PathAcceptingKeyword.
1990 std::unique_ptr<StringMap<PathAcceptingKeyword>> queryOperatorMap;
1991 
MONGO_INITIALIZER(MatchExpressionParser)1992 MONGO_INITIALIZER(MatchExpressionParser)(InitializerContext* context) {
1993     queryOperatorMap =
1994         stdx::make_unique<StringMap<PathAcceptingKeyword>>(StringMap<PathAcceptingKeyword>{
1995             // TODO: SERVER-19565 Add $eq after auditing callers.
1996             {"_internalExprEq", PathAcceptingKeyword::INTERNAL_EXPR_EQ},
1997             {"_internalSchemaAllElemMatchFromIndex",
1998              PathAcceptingKeyword::INTERNAL_SCHEMA_ALL_ELEM_MATCH_FROM_INDEX},
1999             {"_internalSchemaEq", PathAcceptingKeyword::INTERNAL_SCHEMA_EQ},
2000             {"_internalSchemaFmod", PathAcceptingKeyword::INTERNAL_SCHEMA_FMOD},
2001             {"_internalSchemaMatchArrayIndex",
2002              PathAcceptingKeyword::INTERNAL_SCHEMA_MATCH_ARRAY_INDEX},
2003             {"_internalSchemaMaxItems", PathAcceptingKeyword::INTERNAL_SCHEMA_MAX_ITEMS},
2004             {"_internalSchemaMaxLength", PathAcceptingKeyword::INTERNAL_SCHEMA_MAX_LENGTH},
2005             {"_internalSchemaMinItems", PathAcceptingKeyword::INTERNAL_SCHEMA_MIN_ITEMS},
2006             {"_internalSchemaMinItems", PathAcceptingKeyword::INTERNAL_SCHEMA_MIN_ITEMS},
2007             {"_internalSchemaMinLength", PathAcceptingKeyword::INTERNAL_SCHEMA_MIN_LENGTH},
2008             {"_internalSchemaObjectMatch", PathAcceptingKeyword::INTERNAL_SCHEMA_OBJECT_MATCH},
2009             {"_internalSchemaType", PathAcceptingKeyword::INTERNAL_SCHEMA_TYPE},
2010             {"_internalSchemaUniqueItems", PathAcceptingKeyword::INTERNAL_SCHEMA_UNIQUE_ITEMS},
2011             {"all", PathAcceptingKeyword::ALL},
2012             {"bitsAllClear", PathAcceptingKeyword::BITS_ALL_CLEAR},
2013             {"bitsAllSet", PathAcceptingKeyword::BITS_ALL_SET},
2014             {"bitsAnyClear", PathAcceptingKeyword::BITS_ANY_CLEAR},
2015             {"bitsAnySet", PathAcceptingKeyword::BITS_ANY_SET},
2016             {"elemMatch", PathAcceptingKeyword::ELEM_MATCH},
2017             {"exists", PathAcceptingKeyword::EXISTS},
2018             {"geoIntersects", PathAcceptingKeyword::GEO_INTERSECTS},
2019             {"geoNear", PathAcceptingKeyword::GEO_NEAR},
2020             {"geoWithin", PathAcceptingKeyword::WITHIN},
2021             {"gt", PathAcceptingKeyword::GREATER_THAN},
2022             {"gte", PathAcceptingKeyword::GREATER_THAN_OR_EQUAL},
2023             {"in", PathAcceptingKeyword::IN_EXPR},
2024             {"lt", PathAcceptingKeyword::LESS_THAN},
2025             {"lte", PathAcceptingKeyword::LESS_THAN_OR_EQUAL},
2026             {"mod", PathAcceptingKeyword::MOD},
2027             {"ne", PathAcceptingKeyword::NOT_EQUAL},
2028             {"near", PathAcceptingKeyword::GEO_NEAR},
2029             {"nearSphere", PathAcceptingKeyword::GEO_NEAR},
2030             {"nin", PathAcceptingKeyword::NOT_IN},
2031             {"options", PathAcceptingKeyword::OPTIONS},
2032             {"regex", PathAcceptingKeyword::REGEX},
2033             {"size", PathAcceptingKeyword::SIZE},
2034             {"type", PathAcceptingKeyword::TYPE},
2035             {"within", PathAcceptingKeyword::WITHIN},
2036         });
2037     return Status::OK();
2038 }
2039 
2040 /**
2041  * Returns the proper parser for the indicated pathless operator. Returns 'null' if 'name'
2042  * doesn't represent a known type.
2043  */
2044 stdx::function<StatusWithMatchExpression(StringData,
2045                                          BSONElement,
2046                                          const boost::intrusive_ptr<ExpressionContext>&,
2047                                          const ExtensionsCallback*,
2048                                          MatchExpressionParser::AllowedFeatureSet,
2049                                          DocumentParseLevel)>
retrievePathlessParser(StringData name)2050 retrievePathlessParser(StringData name) {
2051     auto func = pathlessOperatorMap->find(name);
2052     if (func == pathlessOperatorMap->end()) {
2053         return nullptr;
2054     }
2055     return func->second;
2056 }
2057 }  // namespace
2058 
parsePathAcceptingKeyword(BSONElement typeElem,boost::optional<PathAcceptingKeyword> defaultKeyword)2059 boost::optional<PathAcceptingKeyword> MatchExpressionParser::parsePathAcceptingKeyword(
2060     BSONElement typeElem, boost::optional<PathAcceptingKeyword> defaultKeyword) {
2061     auto fieldName = typeElem.fieldNameStringData();
2062     if (fieldName[0] == '$' && fieldName[1]) {
2063         auto opName = fieldName.substr(1);
2064         auto queryOp = queryOperatorMap->find(opName);
2065 
2066         if (queryOp == queryOperatorMap->end()) {
2067             return defaultKeyword;
2068         }
2069         return queryOp->second;
2070     }
2071     return defaultKeyword;
2072 }
2073 }  // namespace mongo
2074