1 
2 /**
3  *    Copyright (C) 2018-present MongoDB, Inc.
4  *
5  *    This program is free software: you can redistribute it and/or modify
6  *    it under the terms of the Server Side Public License, version 1,
7  *    as published by MongoDB, Inc.
8  *
9  *    This program is distributed in the hope that it will be useful,
10  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *    Server Side Public License for more details.
13  *
14  *    You should have received a copy of the Server Side Public License
15  *    along with this program. If not, see
16  *    <http://www.mongodb.com/licensing/server-side-public-license>.
17  *
18  *    As a special exception, the copyright holders give permission to link the
19  *    code of portions of this program with the OpenSSL library under certain
20  *    conditions as described in each individual source file and distribute
21  *    linked combinations including the program with the OpenSSL library. You
22  *    must comply with the Server Side Public License in all respects for
23  *    all of the code used other than as permitted herein. If you modify file(s)
24  *    with this exception, you may extend this exception to your version of the
25  *    file(s), but you are not obligated to do so. If you do not wish to do so,
26  *    delete this exception statement from your version. If you delete this
27  *    exception statement from all source files in the program, then also delete
28  *    it in the license file.
29  */
30 
31 #pragma once
32 
33 #include "mongo/bson/bsonelement_comparator.h"
34 #include "mongo/bson/bsonmisc.h"
35 #include "mongo/bson/bsonobj.h"
36 #include "mongo/db/matcher/expression.h"
37 #include "mongo/db/matcher/expression_path.h"
38 #include "mongo/db/query/collation/collator_interface.h"
39 #include "mongo/stdx/memory.h"
40 #include "mongo/stdx/unordered_map.h"
41 
42 namespace pcrecpp {
43 class RE;
44 }  // namespace pcrecpp;
45 
46 namespace mongo {
47 
48 class CollatorInterface;
49 
50 class LeafMatchExpression : public PathMatchExpression {
51 public:
LeafMatchExpression(MatchType matchType)52     LeafMatchExpression(MatchType matchType)
53         : LeafMatchExpression(matchType,
54                               ElementPath::LeafArrayBehavior::kTraverse,
55                               ElementPath::NonLeafArrayBehavior::kTraverse) {}
56 
LeafMatchExpression(MatchType matchType,ElementPath::LeafArrayBehavior leafArrBehavior,ElementPath::NonLeafArrayBehavior nonLeafArrBehavior)57     LeafMatchExpression(MatchType matchType,
58                         ElementPath::LeafArrayBehavior leafArrBehavior,
59                         ElementPath::NonLeafArrayBehavior nonLeafArrBehavior)
60         : PathMatchExpression(matchType, leafArrBehavior, nonLeafArrBehavior) {}
61 
62     virtual ~LeafMatchExpression() = default;
63 
numChildren()64     size_t numChildren() const override {
65         return 0;
66     }
67 
getChild(size_t i)68     MatchExpression* getChild(size_t i) const override {
69         MONGO_UNREACHABLE;
70     }
71 
getChildVector()72     std::vector<MatchExpression*>* getChildVector() override {
73         return nullptr;
74     }
75 
getCategory()76     MatchCategory getCategory() const override {
77         return MatchCategory::kLeaf;
78     }
79 };
80 
81 /**
82  * Base class for comparison-like match expression nodes. This includes both the comparison nodes in
83  * the match language ($eq, $gt, $gte, $lt, and $lte), as well as internal comparison nodes like
84  * $_internalExprEq.
85  */
86 class ComparisonMatchExpressionBase : public LeafMatchExpression {
87 public:
isEquality(MatchType matchType)88     static bool isEquality(MatchType matchType) {
89         switch (matchType) {
90             case MatchExpression::EQ:
91             case MatchExpression::INTERNAL_EXPR_EQ:
92                 return true;
93             default:
94                 return false;
95         }
96     }
97 
ComparisonMatchExpressionBase(MatchType type,ElementPath::LeafArrayBehavior leafArrBehavior,ElementPath::NonLeafArrayBehavior nonLeafArrBehavior)98     ComparisonMatchExpressionBase(MatchType type,
99                                   ElementPath::LeafArrayBehavior leafArrBehavior,
100                                   ElementPath::NonLeafArrayBehavior nonLeafArrBehavior)
101         : LeafMatchExpression(type, leafArrBehavior, nonLeafArrBehavior) {}
102 
103     virtual ~ComparisonMatchExpressionBase() = default;
104 
105     virtual void debugString(StringBuilder& debug, int level = 0) const;
106 
107     virtual void serialize(BSONObjBuilder* out) const;
108 
109     virtual bool equivalent(const MatchExpression* other) const;
110 
111     /**
112      * Returns the name of this MatchExpression.
113      */
114     virtual StringData name() const = 0;
115 
getData()116     const BSONElement& getData() const {
117         return _rhs;
118     }
119 
getCollator()120     const CollatorInterface* getCollator() const {
121         return _collator;
122     }
123 
124 protected:
125     /**
126      * 'collator' must outlive the ComparisonMatchExpression and any clones made of it.
127      */
_doSetCollator(const CollatorInterface * collator)128     virtual void _doSetCollator(const CollatorInterface* collator) {
129         _collator = collator;
130     }
131 
132     BSONElement _rhs;
133 
134     // Collator used to compare elements. By default, simple binary comparison will be used.
135     const CollatorInterface* _collator = nullptr;
136 
137 private:
getOptimizer()138     ExpressionOptimizerFunc getOptimizer() const final {
139         return [](std::unique_ptr<MatchExpression> expression) { return expression; };
140     }
141 };
142 
143 /**
144  * EQ, LTE, LT, GT, GTE subclass from ComparisonMatchExpression.
145  */
146 class ComparisonMatchExpression : public ComparisonMatchExpressionBase {
147 public:
148     /**
149      * Returns true if the MatchExpression is a ComparisonMatchExpression.
150      */
isComparisonMatchExpression(const MatchExpression * expr)151     static bool isComparisonMatchExpression(const MatchExpression* expr) {
152         switch (expr->matchType()) {
153             case MatchExpression::LT:
154             case MatchExpression::LTE:
155             case MatchExpression::EQ:
156             case MatchExpression::GTE:
157             case MatchExpression::GT:
158                 return true;
159             default:
160                 return false;
161         }
162     }
163 
ComparisonMatchExpression(MatchType type)164     explicit ComparisonMatchExpression(MatchType type)
165         : ComparisonMatchExpressionBase(type,
166                                         ElementPath::LeafArrayBehavior::kTraverse,
167                                         ElementPath::NonLeafArrayBehavior::kTraverse) {}
168 
169     virtual ~ComparisonMatchExpression() = default;
170 
171     Status init(StringData path, BSONElement rhs);
172 
173     bool matchesSingleElement(const BSONElement&, MatchDetails* details = nullptr) const final;
174 };
175 
176 class EqualityMatchExpression final : public ComparisonMatchExpression {
177 public:
178     static constexpr StringData kName = "$eq"_sd;
179 
EqualityMatchExpression()180     EqualityMatchExpression() : ComparisonMatchExpression(EQ) {}
181 
name()182     StringData name() const final {
183         return kName;
184     }
185 
shallowClone()186     virtual std::unique_ptr<MatchExpression> shallowClone() const {
187         std::unique_ptr<ComparisonMatchExpression> e = stdx::make_unique<EqualityMatchExpression>();
188         invariantOK(e->init(path(), _rhs));
189         if (getTag()) {
190             e->setTag(getTag()->clone());
191         }
192         e->setCollator(_collator);
193         return std::move(e);
194     }
195 };
196 
197 class LTEMatchExpression final : public ComparisonMatchExpression {
198 public:
199     static constexpr StringData kName = "$lte"_sd;
200 
LTEMatchExpression()201     LTEMatchExpression() : ComparisonMatchExpression(LTE) {}
202 
name()203     StringData name() const final {
204         return kName;
205     }
206 
shallowClone()207     virtual std::unique_ptr<MatchExpression> shallowClone() const {
208         std::unique_ptr<ComparisonMatchExpression> e = stdx::make_unique<LTEMatchExpression>();
209         invariantOK(e->init(path(), _rhs));
210         if (getTag()) {
211             e->setTag(getTag()->clone());
212         }
213         e->setCollator(_collator);
214         return std::move(e);
215     }
216 };
217 
218 class LTMatchExpression final : public ComparisonMatchExpression {
219 public:
220     static constexpr StringData kName = "$lt"_sd;
221 
LTMatchExpression()222     LTMatchExpression() : ComparisonMatchExpression(LT) {}
223 
name()224     StringData name() const final {
225         return kName;
226     }
227 
shallowClone()228     virtual std::unique_ptr<MatchExpression> shallowClone() const {
229         std::unique_ptr<ComparisonMatchExpression> e = stdx::make_unique<LTMatchExpression>();
230         invariantOK(e->init(path(), _rhs));
231         if (getTag()) {
232             e->setTag(getTag()->clone());
233         }
234         e->setCollator(_collator);
235         return std::move(e);
236     }
237 };
238 
239 class GTMatchExpression final : public ComparisonMatchExpression {
240 public:
241     static constexpr StringData kName = "$gt"_sd;
242 
GTMatchExpression()243     GTMatchExpression() : ComparisonMatchExpression(GT) {}
244 
name()245     StringData name() const final {
246         return kName;
247     }
248 
shallowClone()249     virtual std::unique_ptr<MatchExpression> shallowClone() const {
250         std::unique_ptr<ComparisonMatchExpression> e = stdx::make_unique<GTMatchExpression>();
251         invariantOK(e->init(path(), _rhs));
252         if (getTag()) {
253             e->setTag(getTag()->clone());
254         }
255         e->setCollator(_collator);
256         return std::move(e);
257     }
258 };
259 
260 class GTEMatchExpression : public ComparisonMatchExpression {
261 public:
262     static constexpr StringData kName = "$gte"_sd;
263 
GTEMatchExpression()264     GTEMatchExpression() : ComparisonMatchExpression(GTE) {}
265 
name()266     StringData name() const final {
267         return kName;
268     }
269 
shallowClone()270     virtual std::unique_ptr<MatchExpression> shallowClone() const {
271         std::unique_ptr<ComparisonMatchExpression> e = stdx::make_unique<GTEMatchExpression>();
272         invariantOK(e->init(path(), _rhs));
273         if (getTag()) {
274             e->setTag(getTag()->clone());
275         }
276         e->setCollator(_collator);
277         return std::move(e);
278     }
279 };
280 
281 class RegexMatchExpression : public LeafMatchExpression {
282 public:
283     /**
284      * Maximum pattern size which pcre v8.3 can do matches correctly with
285      * LINK_SIZE define macro set to 2 @ pcre's config.h (based on
286      * experiments)
287      */
288     static const size_t MaxPatternSize = 32764;
289 
290     RegexMatchExpression();
291     ~RegexMatchExpression();
292 
293     Status init(StringData path, StringData regex, StringData options);
294     Status init(StringData path, const BSONElement& e);
295 
shallowClone()296     virtual std::unique_ptr<MatchExpression> shallowClone() const {
297         std::unique_ptr<RegexMatchExpression> e = stdx::make_unique<RegexMatchExpression>();
298         invariantOK(e->init(path(), _regex, _flags));
299         if (getTag()) {
300             e->setTag(getTag()->clone());
301         }
302         return std::move(e);
303     }
304 
305     bool matchesSingleElement(const BSONElement&, MatchDetails* details = nullptr) const final;
306 
307     virtual void debugString(StringBuilder& debug, int level) const;
308 
309     virtual void serialize(BSONObjBuilder* out) const;
310 
311     void serializeToBSONTypeRegex(BSONObjBuilder* out) const;
312 
313     void shortDebugString(StringBuilder& debug) const;
314 
315     virtual bool equivalent(const MatchExpression* other) const;
316 
getString()317     const std::string& getString() const {
318         return _regex;
319     }
getFlags()320     const std::string& getFlags() const {
321         return _flags;
322     }
323 
324 private:
getOptimizer()325     ExpressionOptimizerFunc getOptimizer() const final {
326         return [](std::unique_ptr<MatchExpression> expression) { return expression; };
327     }
328 
329     std::string _regex;
330     std::string _flags;
331     std::unique_ptr<pcrecpp::RE> _re;
332 };
333 
334 class ModMatchExpression : public LeafMatchExpression {
335 public:
ModMatchExpression()336     ModMatchExpression() : LeafMatchExpression(MOD) {}
337 
338     Status init(StringData path, long long divisor, long long remainder);
339 
shallowClone()340     virtual std::unique_ptr<MatchExpression> shallowClone() const {
341         std::unique_ptr<ModMatchExpression> m = stdx::make_unique<ModMatchExpression>();
342         invariantOK(m->init(path(), _divisor, _remainder));
343         if (getTag()) {
344             m->setTag(getTag()->clone());
345         }
346         return std::move(m);
347     }
348 
349     bool matchesSingleElement(const BSONElement&, MatchDetails* details = nullptr) const final;
350 
351     virtual void debugString(StringBuilder& debug, int level) const;
352 
353     virtual void serialize(BSONObjBuilder* out) const;
354 
355     virtual bool equivalent(const MatchExpression* other) const;
356 
getDivisor()357     long long getDivisor() const {
358         return _divisor;
359     }
getRemainder()360     long long getRemainder() const {
361         return _remainder;
362     }
363 
truncateToLong(const BSONElement & element)364     static long long truncateToLong(const BSONElement& element) {
365         if (element.type() == BSONType::NumberDecimal) {
366             return element.numberDecimal().toLong(Decimal128::kRoundTowardZero);
367         }
368         return element.numberLong();
369     }
370 
371 private:
getOptimizer()372     ExpressionOptimizerFunc getOptimizer() const final {
373         return [](std::unique_ptr<MatchExpression> expression) { return expression; };
374     }
375 
376     long long _divisor;
377     long long _remainder;
378 };
379 
380 class ExistsMatchExpression : public LeafMatchExpression {
381 public:
ExistsMatchExpression()382     ExistsMatchExpression() : LeafMatchExpression(EXISTS) {}
383 
384     Status init(StringData path);
385 
shallowClone()386     virtual std::unique_ptr<MatchExpression> shallowClone() const {
387         std::unique_ptr<ExistsMatchExpression> e = stdx::make_unique<ExistsMatchExpression>();
388         invariantOK(e->init(path()));
389         if (getTag()) {
390             e->setTag(getTag()->clone());
391         }
392         return std::move(e);
393     }
394 
395     bool matchesSingleElement(const BSONElement&, MatchDetails* details = nullptr) const final;
396 
397     virtual void debugString(StringBuilder& debug, int level) const;
398 
399     virtual void serialize(BSONObjBuilder* out) const;
400 
401     virtual bool equivalent(const MatchExpression* other) const;
402 
403 private:
getOptimizer()404     ExpressionOptimizerFunc getOptimizer() const final {
405         return [](std::unique_ptr<MatchExpression> expression) { return expression; };
406     }
407 };
408 
409 /**
410  * query operator: $in
411  */
412 class InMatchExpression : public LeafMatchExpression {
413 public:
InMatchExpression()414     InMatchExpression()
415         : LeafMatchExpression(MATCH_IN),
416           _eltCmp(BSONElementComparator::FieldNamesMode::kIgnore, _collator),
417           _equalitySet(_eltCmp.makeBSONEltFlatSet(_originalEqualityVector)) {}
418 
419     Status init(StringData path);
420 
421     virtual std::unique_ptr<MatchExpression> shallowClone() const;
422 
423     bool matchesSingleElement(const BSONElement&, MatchDetails* details = nullptr) const final;
424 
425     virtual void debugString(StringBuilder& debug, int level) const;
426 
427     virtual void serialize(BSONObjBuilder* out) const;
428 
429     virtual bool equivalent(const MatchExpression* other) const;
430 
431     /**
432      * 'collator' must outlive the InMatchExpression and any clones made of it.
433      */
434     virtual void _doSetCollator(const CollatorInterface* collator);
435 
436     Status setEqualities(std::vector<BSONElement> equalities);
437 
438     Status addRegex(std::unique_ptr<RegexMatchExpression> expr);
439 
getEqualities()440     const BSONEltFlatSet& getEqualities() const {
441         return _equalitySet;
442     }
443 
getRegexes()444     const std::vector<std::unique_ptr<RegexMatchExpression>>& getRegexes() const {
445         return _regexes;
446     }
447 
getCollator()448     const CollatorInterface* getCollator() const {
449         return _collator;
450     }
451 
hasNull()452     bool hasNull() const {
453         return _hasNull;
454     }
455 
hasEmptyArray()456     bool hasEmptyArray() const {
457         return _hasEmptyArray;
458     }
459 
460 private:
461     ExpressionOptimizerFunc getOptimizer() const final;
462 
463     // Whether or not '_equalities' has a jstNULL element in it.
464     bool _hasNull = false;
465 
466     // Whether or not '_equalities' has an empty array element in it.
467     bool _hasEmptyArray = false;
468 
469     // Collator used to construct '_eltCmp';
470     const CollatorInterface* _collator = nullptr;
471 
472     // Comparator used to compare elements. By default, simple binary comparison will be used.
473     BSONElementComparator _eltCmp;
474 
475     // Original container of equality elements, including duplicates. Needed for re-computing
476     // '_equalitySet' in case '_collator' changes after elements have been added.
477     //
478     // We keep the equalities in sorted order according to the current BSON element comparator. This
479     // list of equalities will be used to construct a boost::flat_set, which maintains the set of
480     // elements in sorted order within a contiguous region of memory. Sorting and then constructing
481     // a flat_set is O(n log n), whereas the boost::flat_set constructor is O(n ^ 2) due to
482     // https://svn.boost.org/trac10/ticket/13140.
483     std::vector<BSONElement> _originalEqualityVector;
484 
485     // Set of equality elements associated with this expression. '_eltCmp' is used as a comparator
486     // for this set.
487     BSONEltFlatSet _equalitySet;
488 
489     // Container of regex elements this object owns.
490     std::vector<std::unique_ptr<RegexMatchExpression>> _regexes;
491 };
492 
493 /**
494  * Bit test query operators include $bitsAllSet, $bitsAllClear, $bitsAnySet, and $bitsAnyClear.
495  */
496 class BitTestMatchExpression : public LeafMatchExpression {
497 public:
BitTestMatchExpression(MatchType type)498     explicit BitTestMatchExpression(MatchType type) : LeafMatchExpression(type) {}
~BitTestMatchExpression()499     virtual ~BitTestMatchExpression() {}
500 
501     /**
502      * Initialize with either bit positions, a 64-bit numeric bitmask, or a char array
503      * bitmask.
504      */
505     Status init(StringData path, std::vector<uint32_t> bitPositions);
506     Status init(StringData path, uint64_t bitMask);
507     Status init(StringData path, const char* bitMaskBinary, uint32_t bitMaskLen);
508 
509     bool matchesSingleElement(const BSONElement&, MatchDetails* details = nullptr) const final;
510 
511     virtual void debugString(StringBuilder& debug, int level) const;
512 
513     virtual void serialize(BSONObjBuilder* out) const;
514 
515     virtual bool equivalent(const MatchExpression* other) const;
516 
numBitPositions()517     size_t numBitPositions() const {
518         return _bitPositions.size();
519     }
520 
getBitPositions()521     const std::vector<uint32_t>& getBitPositions() const {
522         return _bitPositions;
523     }
524 
525 protected:
526     /**
527      * Used to copy this match expression to another BitTestMatchExpression. Does not take
528      * ownership.
529      */
initClone(BitTestMatchExpression * clone)530     void initClone(BitTestMatchExpression* clone) const {
531         invariantOK(clone->init(path(), _bitPositions));
532         if (getTag()) {
533             clone->setTag(getTag()->clone());
534         }
535     }
536 
537 private:
getOptimizer()538     ExpressionOptimizerFunc getOptimizer() const final {
539         return [](std::unique_ptr<MatchExpression> expression) { return expression; };
540     }
541 
542     /**
543      * Performs bit test using bit positions on 'eValue' and returns whether or not the bit test
544      * passes.
545      */
546     bool performBitTest(long long eValue) const;
547 
548     /**
549      * Performs bit test using bit positions on 'eBinary' with length (in bytes) 'eBinaryLen' and
550      * returns whether or not the bit test passes.
551      */
552     bool performBitTest(const char* eBinary, uint32_t eBinaryLen) const;
553 
554     /**
555      * Helper function for performBitTest(...).
556      *
557      * needFurtherBitTests() determines if the result of a bit-test ('isBitSet') is enough
558      * information to skip the rest of the bit tests.
559      **/
560     bool needFurtherBitTests(bool isBitSet) const;
561 
562     // Vector of bit positions to test, with bit position 0 being the least significant bit.
563     // Used to perform bit tests against BinData.
564     std::vector<uint32_t> _bitPositions;
565 
566     // Used to perform bit tests against numbers using a single bitwise operation.
567     uint64_t _bitMask = 0;
568 };
569 
570 class BitsAllSetMatchExpression : public BitTestMatchExpression {
571 public:
BitsAllSetMatchExpression()572     BitsAllSetMatchExpression() : BitTestMatchExpression(BITS_ALL_SET) {}
shallowClone()573     virtual std::unique_ptr<MatchExpression> shallowClone() const {
574         std::unique_ptr<BitTestMatchExpression> bitTestMatchExpression =
575             stdx::make_unique<BitsAllSetMatchExpression>();
576         initClone(bitTestMatchExpression.get());
577         return std::move(bitTestMatchExpression);
578     }
579 };
580 
581 class BitsAllClearMatchExpression : public BitTestMatchExpression {
582 public:
BitsAllClearMatchExpression()583     BitsAllClearMatchExpression() : BitTestMatchExpression(BITS_ALL_CLEAR) {}
shallowClone()584     virtual std::unique_ptr<MatchExpression> shallowClone() const {
585         std::unique_ptr<BitTestMatchExpression> bitTestMatchExpression =
586             stdx::make_unique<BitsAllClearMatchExpression>();
587         initClone(bitTestMatchExpression.get());
588         return std::move(bitTestMatchExpression);
589     }
590 };
591 
592 class BitsAnySetMatchExpression : public BitTestMatchExpression {
593 public:
BitsAnySetMatchExpression()594     BitsAnySetMatchExpression() : BitTestMatchExpression(BITS_ANY_SET) {}
shallowClone()595     virtual std::unique_ptr<MatchExpression> shallowClone() const {
596         std::unique_ptr<BitTestMatchExpression> bitTestMatchExpression =
597             stdx::make_unique<BitsAnySetMatchExpression>();
598         initClone(bitTestMatchExpression.get());
599         return std::move(bitTestMatchExpression);
600     }
601 };
602 
603 class BitsAnyClearMatchExpression : public BitTestMatchExpression {
604 public:
BitsAnyClearMatchExpression()605     BitsAnyClearMatchExpression() : BitTestMatchExpression(BITS_ANY_CLEAR) {}
shallowClone()606     virtual std::unique_ptr<MatchExpression> shallowClone() const {
607         std::unique_ptr<BitTestMatchExpression> bitTestMatchExpression =
608             stdx::make_unique<BitsAnyClearMatchExpression>();
609         initClone(bitTestMatchExpression.get());
610         return std::move(bitTestMatchExpression);
611     }
612 };
613 
614 }  // namespace mongo
615