1 
2 /**
3  *    Copyright (C) 2018-present MongoDB, Inc.
4  *
5  *    This program is free software: you can redistribute it and/or modify
6  *    it under the terms of the Server Side Public License, version 1,
7  *    as published by MongoDB, Inc.
8  *
9  *    This program is distributed in the hope that it will be useful,
10  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *    Server Side Public License for more details.
13  *
14  *    You should have received a copy of the Server Side Public License
15  *    along with this program. If not, see
16  *    <http://www.mongodb.com/licensing/server-side-public-license>.
17  *
18  *    As a special exception, the copyright holders give permission to link the
19  *    code of portions of this program with the OpenSSL library under certain
20  *    conditions as described in each individual source file and distribute
21  *    linked combinations including the program with the OpenSSL library. You
22  *    must comply with the Server Side Public License in all respects for
23  *    all of the code used other than as permitted herein. If you modify file(s)
24  *    with this exception, you may extend this exception to your version of the
25  *    file(s), but you are not obligated to do so. If you do not wish to do so,
26  *    delete this exception statement from your version. If you delete this
27  *    exception statement from all source files in the program, then also delete
28  *    it in the license file.
29  */
30 
31 #include "mongo/platform/basic.h"
32 
33 #include "mongo/db/pipeline/value.h"
34 
35 #include <boost/functional/hash.hpp>
36 #include <cmath>
37 #include <limits>
38 
39 #include "mongo/base/compare_numbers.h"
40 #include "mongo/base/data_type_endian.h"
41 #include "mongo/base/simple_string_data_comparator.h"
42 #include "mongo/bson/bson_depth.h"
43 #include "mongo/bson/simple_bsonobj_comparator.h"
44 #include "mongo/db/jsobj.h"
45 #include "mongo/db/pipeline/document.h"
46 #include "mongo/db/query/datetime/date_time_support.h"
47 #include "mongo/platform/decimal128.h"
48 #include "mongo/util/hex.h"
49 #include "mongo/util/mongoutils/str.h"
50 
51 namespace mongo {
52 using namespace mongoutils;
53 using boost::intrusive_ptr;
54 using std::min;
55 using std::numeric_limits;
56 using std::ostream;
57 using std::string;
58 using std::stringstream;
59 using std::vector;
60 
61 namespace {
62 constexpr StringData kISOFormatString = "%Y-%m-%dT%H:%M:%S.%LZ"_sd;
63 }
64 
verifyRefCountingIfShould() const65 void ValueStorage::verifyRefCountingIfShould() const {
66     switch (type) {
67         case MinKey:
68         case MaxKey:
69         case jstOID:
70         case Date:
71         case bsonTimestamp:
72         case EOO:
73         case jstNULL:
74         case Undefined:
75         case Bool:
76         case NumberInt:
77         case NumberLong:
78         case NumberDouble:
79             // the above types never reference external data
80             verify(!refCounter);
81             break;
82 
83         case String:
84         case RegEx:
85         case Code:
86         case Symbol:
87             // the above types reference data when not using short-string optimization
88             verify(refCounter == !shortStr);
89             break;
90 
91         case NumberDecimal:
92         case BinData:  // TODO this should probably support short-string optimization
93         case Array:    // TODO this should probably support empty-is-NULL optimization
94         case DBRef:
95         case CodeWScope:
96             // the above types always reference external data.
97             verify(refCounter);
98             verify(bool(genericRCPtr));
99             break;
100 
101         case Object:
102             // Objects either hold a NULL ptr or should be ref-counting
103             verify(refCounter == bool(genericRCPtr));
104             break;
105     }
106 }
107 
putString(StringData s)108 void ValueStorage::putString(StringData s) {
109     // Note: this also stores data portion of BinData
110     const size_t sizeNoNUL = s.size();
111     if (sizeNoNUL <= sizeof(shortStrStorage)) {
112         shortStr = true;
113         shortStrSize = s.size();
114         s.copyTo(shortStrStorage, false);  // no NUL
115 
116         // All memory is zeroed before this is called, so we know that
117         // the nulTerminator field will definitely contain a NUL byte.
118         dassert(((sizeNoNUL < sizeof(shortStrStorage)) && (shortStrStorage[sizeNoNUL] == '\0')) ||
119                 (((shortStrStorage + sizeNoNUL) == &nulTerminator) && (nulTerminator == '\0')));
120     } else {
121         putRefCountable(RCString::create(s));
122     }
123 }
124 
putDocument(const Document & d)125 void ValueStorage::putDocument(const Document& d) {
126     putRefCountable(d._storage);
127 }
128 
putVector(const RCVector * vec)129 void ValueStorage::putVector(const RCVector* vec) {
130     fassert(16485, vec);
131     putRefCountable(vec);
132 }
133 
putRegEx(const BSONRegEx & re)134 void ValueStorage::putRegEx(const BSONRegEx& re) {
135     const size_t patternLen = re.pattern.size();
136     const size_t flagsLen = re.flags.size();
137     const size_t totalLen = patternLen + 1 /*middle NUL*/ + flagsLen;
138 
139     // Need to copy since putString doesn't support scatter-gather.
140     std::unique_ptr<char[]> buf(new char[totalLen]);
141     re.pattern.copyTo(buf.get(), true);
142     re.flags.copyTo(buf.get() + patternLen + 1, false);  // no NUL
143     putString(StringData(buf.get(), totalLen));
144 }
145 
getDocument() const146 Document ValueStorage::getDocument() const {
147     if (!genericRCPtr)
148         return Document();
149 
150     dassert(typeid(*genericRCPtr) == typeid(const DocumentStorage));
151     const DocumentStorage* documentPtr = static_cast<const DocumentStorage*>(genericRCPtr);
152     return Document(documentPtr);
153 }
154 
155 // not in header because document is fwd declared
Value(const BSONObj & obj)156 Value::Value(const BSONObj& obj) : _storage(Object, Document(obj)) {}
157 
Value(const BSONElement & elem)158 Value::Value(const BSONElement& elem) : _storage(elem.type()) {
159     switch (elem.type()) {
160         // These are all type-only, no data
161         case EOO:
162         case MinKey:
163         case MaxKey:
164         case Undefined:
165         case jstNULL:
166             break;
167 
168         case NumberDouble:
169             _storage.doubleValue = elem.Double();
170             break;
171 
172         case Code:
173         case Symbol:
174         case String:
175             _storage.putString(elem.valueStringData());
176             break;
177 
178         case Object: {
179             _storage.putDocument(Document(elem.embeddedObject()));
180             break;
181         }
182 
183         case Array: {
184             intrusive_ptr<RCVector> vec(new RCVector);
185             BSONForEach(sub, elem.embeddedObject()) {
186                 vec->vec.push_back(Value(sub));
187             }
188             _storage.putVector(vec.get());
189             break;
190         }
191 
192         case jstOID:
193             MONGO_STATIC_ASSERT(sizeof(_storage.oid) == OID::kOIDSize);
194             memcpy(_storage.oid, elem.OID().view().view(), OID::kOIDSize);
195             break;
196 
197         case Bool:
198             _storage.boolValue = elem.boolean();
199             break;
200 
201         case Date:
202             _storage.dateValue = elem.date().toMillisSinceEpoch();
203             break;
204 
205         case RegEx: {
206             _storage.putRegEx(BSONRegEx(elem.regex(), elem.regexFlags()));
207             break;
208         }
209 
210         case NumberInt:
211             _storage.intValue = elem.numberInt();
212             break;
213 
214         case bsonTimestamp:
215             _storage.timestampValue = elem.timestamp().asULL();
216             break;
217 
218         case NumberLong:
219             _storage.longValue = elem.numberLong();
220             break;
221 
222         case NumberDecimal:
223             _storage.putDecimal(elem.numberDecimal());
224             break;
225 
226         case CodeWScope: {
227             StringData code(elem.codeWScopeCode(), elem.codeWScopeCodeLen() - 1);
228             _storage.putCodeWScope(BSONCodeWScope(code, elem.codeWScopeObject()));
229             break;
230         }
231 
232         case BinData: {
233             int len;
234             const char* data = elem.binData(len);
235             _storage.putBinData(BSONBinData(data, len, elem.binDataType()));
236             break;
237         }
238 
239         case DBRef:
240             _storage.putDBRef(BSONDBRef(elem.dbrefNS(), elem.dbrefOID()));
241             break;
242     }
243 }
244 
Value(const BSONArray & arr)245 Value::Value(const BSONArray& arr) : _storage(Array) {
246     intrusive_ptr<RCVector> vec(new RCVector);
247     BSONForEach(sub, arr) {
248         vec->vec.push_back(Value(sub));
249     }
250     _storage.putVector(vec.get());
251 }
252 
Value(const vector<BSONObj> & vec)253 Value::Value(const vector<BSONObj>& vec) : _storage(Array) {
254     intrusive_ptr<RCVector> storageVec(new RCVector);
255     storageVec->vec.reserve(vec.size());
256     for (auto&& obj : vec) {
257         storageVec->vec.push_back(Value(obj));
258     }
259     _storage.putVector(storageVec.get());
260 }
261 
Value(const vector<Document> & vec)262 Value::Value(const vector<Document>& vec) : _storage(Array) {
263     intrusive_ptr<RCVector> storageVec(new RCVector);
264     storageVec->vec.reserve(vec.size());
265     for (auto&& obj : vec) {
266         storageVec->vec.push_back(Value(obj));
267     }
268     _storage.putVector(storageVec.get());
269 }
270 
createIntOrLong(long long longValue)271 Value Value::createIntOrLong(long long longValue) {
272     int intValue = longValue;
273     if (intValue != longValue) {
274         // it is too large to be an int and should remain a long
275         return Value(longValue);
276     }
277 
278     // should be an int since all arguments were int and it fits
279     return Value(intValue);
280 }
281 
getDecimal() const282 Decimal128 Value::getDecimal() const {
283     BSONType type = getType();
284     if (type == NumberInt)
285         return Decimal128(static_cast<int32_t>(_storage.intValue));
286     if (type == NumberLong)
287         return Decimal128(static_cast<int64_t>(_storage.longValue));
288     if (type == NumberDouble)
289         return Decimal128(_storage.doubleValue);
290     invariant(type == NumberDecimal);
291     return _storage.getDecimal();
292 }
293 
getDouble() const294 double Value::getDouble() const {
295     BSONType type = getType();
296     if (type == NumberInt)
297         return _storage.intValue;
298     if (type == NumberLong)
299         return static_cast<double>(_storage.longValue);
300     if (type == NumberDecimal)
301         return _storage.getDecimal().toDouble();
302 
303     verify(type == NumberDouble);
304     return _storage.doubleValue;
305 }
306 
getDocument() const307 Document Value::getDocument() const {
308     verify(getType() == Object);
309     return _storage.getDocument();
310 }
311 
operator [](size_t index) const312 Value Value::operator[](size_t index) const {
313     if (getType() != Array || index >= getArrayLength())
314         return Value();
315 
316     return getArray()[index];
317 }
318 
operator [](StringData name) const319 Value Value::operator[](StringData name) const {
320     if (getType() != Object)
321         return Value();
322 
323     return getDocument()[name];
324 }
325 
operator <<(BSONObjBuilderValueStream & builder,const Value & val)326 BSONObjBuilder& operator<<(BSONObjBuilderValueStream& builder, const Value& val) {
327     switch (val.getType()) {
328         case EOO:
329             return builder.builder();  // nothing appended
330         case MinKey:
331             return builder << MINKEY;
332         case MaxKey:
333             return builder << MAXKEY;
334         case jstNULL:
335             return builder << BSONNULL;
336         case Undefined:
337             return builder << BSONUndefined;
338         case jstOID:
339             return builder << val.getOid();
340         case NumberInt:
341             return builder << val.getInt();
342         case NumberLong:
343             return builder << val.getLong();
344         case NumberDouble:
345             return builder << val.getDouble();
346         case NumberDecimal:
347             return builder << val.getDecimal();
348         case String:
349             return builder << val.getStringData();
350         case Bool:
351             return builder << val.getBool();
352         case Date:
353             return builder << val.getDate();
354         case bsonTimestamp:
355             return builder << val.getTimestamp();
356         case Object:
357             return builder << val.getDocument();
358         case Symbol:
359             return builder << BSONSymbol(val.getStringData());
360         case Code:
361             return builder << BSONCode(val.getStringData());
362         case RegEx:
363             return builder << BSONRegEx(val.getRegex(), val.getRegexFlags());
364 
365         case DBRef:
366             return builder << BSONDBRef(val._storage.getDBRef()->ns, val._storage.getDBRef()->oid);
367 
368         case BinData:
369             return builder << BSONBinData(val.getStringData().rawData(),  // looking for void*
370                                           val.getStringData().size(),
371                                           val._storage.binDataType());
372 
373         case CodeWScope:
374             return builder << BSONCodeWScope(val._storage.getCodeWScope()->code,
375                                              val._storage.getCodeWScope()->scope);
376 
377         case Array: {
378             BSONArrayBuilder arrayBuilder(builder.subarrayStart());
379             for (auto&& value : val.getArray()) {
380                 value.addToBsonArray(&arrayBuilder);
381             }
382             arrayBuilder.doneFast();
383             return builder.builder();
384         }
385     }
386     verify(false);
387 }
388 
addToBsonObj(BSONObjBuilder * builder,StringData fieldName,size_t recursionLevel) const389 void Value::addToBsonObj(BSONObjBuilder* builder,
390                          StringData fieldName,
391                          size_t recursionLevel) const {
392     uassert(ErrorCodes::Overflow,
393             str::stream() << "cannot convert document to BSON because it exceeds the limit of "
394                           << BSONDepth::getMaxAllowableDepth()
395                           << " levels of nesting",
396             recursionLevel <= BSONDepth::getMaxAllowableDepth());
397 
398     if (getType() == BSONType::Object) {
399         BSONObjBuilder subobjBuilder(builder->subobjStart(fieldName));
400         getDocument().toBson(&subobjBuilder, recursionLevel + 1);
401         subobjBuilder.doneFast();
402     } else if (getType() == BSONType::Array) {
403         BSONArrayBuilder subarrBuilder(builder->subarrayStart(fieldName));
404         for (auto&& value : getArray()) {
405             value.addToBsonArray(&subarrBuilder, recursionLevel + 1);
406         }
407         subarrBuilder.doneFast();
408     } else {
409         *builder << fieldName << *this;
410     }
411 }
412 
addToBsonArray(BSONArrayBuilder * builder,size_t recursionLevel) const413 void Value::addToBsonArray(BSONArrayBuilder* builder, size_t recursionLevel) const {
414     uassert(ErrorCodes::Overflow,
415             str::stream() << "cannot convert document to BSON because it exceeds the limit of "
416                           << BSONDepth::getMaxAllowableDepth()
417                           << " levels of nesting",
418             recursionLevel <= BSONDepth::getMaxAllowableDepth());
419 
420     // If this Value is empty, do nothing to avoid incrementing the builder's counter.
421     if (missing()) {
422         return;
423     }
424 
425     if (getType() == BSONType::Object) {
426         BSONObjBuilder subobjBuilder(builder->subobjStart());
427         getDocument().toBson(&subobjBuilder, recursionLevel + 1);
428         subobjBuilder.doneFast();
429     } else if (getType() == BSONType::Array) {
430         BSONArrayBuilder subarrBuilder(builder->subarrayStart());
431         for (auto&& value : getArray()) {
432             value.addToBsonArray(&subarrBuilder, recursionLevel + 1);
433         }
434         subarrBuilder.doneFast();
435     } else {
436         *builder << *this;
437     }
438 }
439 
coerceToBool() const440 bool Value::coerceToBool() const {
441     // TODO Unify the implementation with BSONElement::trueValue().
442     switch (getType()) {
443         case CodeWScope:
444         case MinKey:
445         case DBRef:
446         case Code:
447         case MaxKey:
448         case String:
449         case Object:
450         case Array:
451         case BinData:
452         case jstOID:
453         case Date:
454         case RegEx:
455         case Symbol:
456         case bsonTimestamp:
457             return true;
458 
459         case EOO:
460         case jstNULL:
461         case Undefined:
462             return false;
463 
464         case Bool:
465             return _storage.boolValue;
466         case NumberInt:
467             return _storage.intValue;
468         case NumberLong:
469             return _storage.longValue;
470         case NumberDouble:
471             return _storage.doubleValue;
472         case NumberDecimal:
473             return !_storage.getDecimal().isZero();
474     }
475     verify(false);
476 }
477 
coerceToInt() const478 int Value::coerceToInt() const {
479     switch (getType()) {
480         case NumberInt:
481             return _storage.intValue;
482 
483         case NumberLong:
484             return static_cast<int>(_storage.longValue);
485 
486         case NumberDouble:
487             return static_cast<int>(_storage.doubleValue);
488 
489         case NumberDecimal:
490             return (_storage.getDecimal()).toInt();
491 
492         default:
493             uassert(16003,
494                     str::stream() << "can't convert from BSON type " << typeName(getType())
495                                   << " to int",
496                     false);
497     }  // switch(getType())
498 }
499 
coerceToLong() const500 long long Value::coerceToLong() const {
501     switch (getType()) {
502         case NumberLong:
503             return _storage.longValue;
504 
505         case NumberInt:
506             return static_cast<long long>(_storage.intValue);
507 
508         case NumberDouble:
509             return static_cast<long long>(_storage.doubleValue);
510 
511         case NumberDecimal:
512             return (_storage.getDecimal()).toLong();
513 
514         default:
515             uassert(16004,
516                     str::stream() << "can't convert from BSON type " << typeName(getType())
517                                   << " to long",
518                     false);
519     }  // switch(getType())
520 }
521 
coerceToDouble() const522 double Value::coerceToDouble() const {
523     switch (getType()) {
524         case NumberDouble:
525             return _storage.doubleValue;
526 
527         case NumberInt:
528             return static_cast<double>(_storage.intValue);
529 
530         case NumberLong:
531             return static_cast<double>(_storage.longValue);
532 
533         case NumberDecimal:
534             return (_storage.getDecimal()).toDouble();
535 
536         default:
537             uassert(16005,
538                     str::stream() << "can't convert from BSON type " << typeName(getType())
539                                   << " to double",
540                     false);
541     }  // switch(getType())
542 }
543 
coerceToDecimal() const544 Decimal128 Value::coerceToDecimal() const {
545     switch (getType()) {
546         case NumberDecimal:
547             return _storage.getDecimal();
548 
549         case NumberInt:
550             return Decimal128(static_cast<int32_t>(_storage.intValue));
551 
552         case NumberLong:
553             return Decimal128(static_cast<int64_t>(_storage.longValue));
554 
555         case NumberDouble:
556             return Decimal128(_storage.doubleValue);
557 
558         default:
559             uassert(16008,
560                     str::stream() << "can't convert from BSON type " << typeName(getType())
561                                   << " to decimal",
562                     false);
563     }  // switch(getType())
564 }
565 
coerceToDate() const566 Date_t Value::coerceToDate() const {
567     switch (getType()) {
568         case Date:
569             return getDate();
570 
571         case bsonTimestamp:
572             return Date_t::fromMillisSinceEpoch(getTimestamp().getSecs() * 1000LL);
573 
574         case jstOID:
575             return getOid().asDateT();
576 
577         default:
578             uassert(16006,
579                     str::stream() << "can't convert from BSON type " << typeName(getType())
580                                   << " to Date",
581                     false);
582     }  // switch(getType())
583 }
584 
coerceToString() const585 string Value::coerceToString() const {
586     switch (getType()) {
587         case NumberDouble:
588             return str::stream() << _storage.doubleValue;
589 
590         case NumberInt:
591             return str::stream() << _storage.intValue;
592 
593         case NumberLong:
594             return str::stream() << _storage.longValue;
595 
596         case NumberDecimal:
597             return str::stream() << _storage.getDecimal().toString();
598 
599         case Code:
600         case Symbol:
601         case String:
602             return getStringData().toString();
603 
604         case bsonTimestamp:
605             return getTimestamp().toStringPretty();
606 
607         case Date:
608             return TimeZoneDatabase::utcZone().formatDate(kISOFormatString, getDate());
609 
610         case EOO:
611         case jstNULL:
612         case Undefined:
613             return "";
614 
615         default:
616             uassert(16007,
617                     str::stream() << "can't convert from BSON type " << typeName(getType())
618                                   << " to String",
619                     false);
620     }  // switch(getType())
621 }
622 
coerceToTimestamp() const623 Timestamp Value::coerceToTimestamp() const {
624     switch (getType()) {
625         case bsonTimestamp:
626             return getTimestamp();
627 
628         default:
629             uassert(16378,
630                     str::stream() << "can't convert from BSON type " << typeName(getType())
631                                   << " to timestamp",
632                     false);
633     }  // switch(getType())
634 }
635 
636 // Helper function for Value::compare.
637 // Better than l-r for cases where difference > MAX_INT
638 template <typename T>
cmp(const T & left,const T & right)639 inline static int cmp(const T& left, const T& right) {
640     if (left < right) {
641         return -1;
642     } else if (left == right) {
643         return 0;
644     } else {
645         dassert(left > right);
646         return 1;
647     }
648 }
649 
compare(const Value & rL,const Value & rR,const StringData::ComparatorInterface * stringComparator)650 int Value::compare(const Value& rL,
651                    const Value& rR,
652                    const StringData::ComparatorInterface* stringComparator) {
653     // Note, this function needs to behave identically to BSONElement::compareElements().
654     // Additionally, any changes here must be replicated in hash_combine().
655     BSONType lType = rL.getType();
656     BSONType rType = rR.getType();
657 
658     int ret = lType == rType ? 0  // fast-path common case
659                              : cmp(canonicalizeBSONType(lType), canonicalizeBSONType(rType));
660 
661     if (ret)
662         return ret;
663 
664     switch (lType) {
665         // Order of types is the same as in BSONElement::compareElements() to make it easier to
666         // verify.
667 
668         // These are valueless types
669         case EOO:
670         case Undefined:
671         case jstNULL:
672         case MaxKey:
673         case MinKey:
674             return ret;
675 
676         case Bool:
677             return rL.getBool() - rR.getBool();
678 
679         case bsonTimestamp:  // unsigned
680             return cmp(rL._storage.timestampValue, rR._storage.timestampValue);
681 
682         case Date:  // signed
683             return cmp(rL._storage.dateValue, rR._storage.dateValue);
684 
685         // Numbers should compare by equivalence even if different types
686 
687         case NumberDecimal: {
688             switch (rType) {
689                 case NumberDecimal:
690                     return compareDecimals(rL._storage.getDecimal(), rR._storage.getDecimal());
691                 case NumberInt:
692                     return compareDecimalToInt(rL._storage.getDecimal(), rR._storage.intValue);
693                 case NumberLong:
694                     return compareDecimalToLong(rL._storage.getDecimal(), rR._storage.longValue);
695                 case NumberDouble:
696                     return compareDecimalToDouble(rL._storage.getDecimal(),
697                                                   rR._storage.doubleValue);
698                 default:
699                     invariant(false);
700             }
701         }
702 
703         case NumberInt: {
704             // All types can precisely represent all NumberInts, so it is safe to simply convert to
705             // whatever rhs's type is.
706             switch (rType) {
707                 case NumberInt:
708                     return compareInts(rL._storage.intValue, rR._storage.intValue);
709                 case NumberLong:
710                     return compareLongs(rL._storage.intValue, rR._storage.longValue);
711                 case NumberDouble:
712                     return compareDoubles(rL._storage.intValue, rR._storage.doubleValue);
713                 case NumberDecimal:
714                     return compareIntToDecimal(rL._storage.intValue, rR._storage.getDecimal());
715                 default:
716                     invariant(false);
717             }
718         }
719 
720         case NumberLong: {
721             switch (rType) {
722                 case NumberLong:
723                     return compareLongs(rL._storage.longValue, rR._storage.longValue);
724                 case NumberInt:
725                     return compareLongs(rL._storage.longValue, rR._storage.intValue);
726                 case NumberDouble:
727                     return compareLongToDouble(rL._storage.longValue, rR._storage.doubleValue);
728                 case NumberDecimal:
729                     return compareLongToDecimal(rL._storage.longValue, rR._storage.getDecimal());
730                 default:
731                     invariant(false);
732             }
733         }
734 
735         case NumberDouble: {
736             switch (rType) {
737                 case NumberDouble:
738                     return compareDoubles(rL._storage.doubleValue, rR._storage.doubleValue);
739                 case NumberInt:
740                     return compareDoubles(rL._storage.doubleValue, rR._storage.intValue);
741                 case NumberLong:
742                     return compareDoubleToLong(rL._storage.doubleValue, rR._storage.longValue);
743                 case NumberDecimal:
744                     return compareDoubleToDecimal(rL._storage.doubleValue,
745                                                   rR._storage.getDecimal());
746                 default:
747                     invariant(false);
748             }
749         }
750 
751         case jstOID:
752             return memcmp(rL._storage.oid, rR._storage.oid, OID::kOIDSize);
753 
754         case String: {
755             if (!stringComparator) {
756                 return rL.getStringData().compare(rR.getStringData());
757             }
758 
759             return stringComparator->compare(rL.getStringData(), rR.getStringData());
760         }
761 
762         case Code:
763         case Symbol:
764             return rL.getStringData().compare(rR.getStringData());
765 
766         case Object:
767             return Document::compare(rL.getDocument(), rR.getDocument(), stringComparator);
768 
769         case Array: {
770             const vector<Value>& lArr = rL.getArray();
771             const vector<Value>& rArr = rR.getArray();
772 
773             const size_t elems = std::min(lArr.size(), rArr.size());
774             for (size_t i = 0; i < elems; i++) {
775                 // compare the two corresponding elements
776                 ret = Value::compare(lArr[i], rArr[i], stringComparator);
777                 if (ret)
778                     return ret;  // values are unequal
779             }
780 
781             // if we get here we are either equal or one is prefix of the other
782             return cmp(lArr.size(), rArr.size());
783         }
784 
785         case DBRef: {
786             intrusive_ptr<const RCDBRef> l = rL._storage.getDBRef();
787             intrusive_ptr<const RCDBRef> r = rR._storage.getDBRef();
788             ret = cmp(l->ns.size(), r->ns.size());
789             if (ret)
790                 return ret;
791 
792             return l->oid.compare(r->oid);
793         }
794 
795         case BinData: {
796             ret = cmp(rL.getStringData().size(), rR.getStringData().size());
797             if (ret)
798                 return ret;
799 
800             // Need to compare as an unsigned char rather than enum since BSON uses memcmp
801             ret = cmp(rL._storage.binSubType, rR._storage.binSubType);
802             if (ret)
803                 return ret;
804 
805             return rL.getStringData().compare(rR.getStringData());
806         }
807 
808         case RegEx:
809             // same as String in this impl but keeping order same as
810             // BSONElement::compareElements().
811             return rL.getStringData().compare(rR.getStringData());
812 
813         case CodeWScope: {
814             intrusive_ptr<const RCCodeWScope> l = rL._storage.getCodeWScope();
815             intrusive_ptr<const RCCodeWScope> r = rR._storage.getCodeWScope();
816 
817             ret = l->code.compare(r->code);
818             if (ret)
819                 return ret;
820 
821             return l->scope.woCompare(r->scope);
822         }
823     }
824     verify(false);
825 }
826 
hash_combine(size_t & seed,const StringData::ComparatorInterface * stringComparator) const827 void Value::hash_combine(size_t& seed,
828                          const StringData::ComparatorInterface* stringComparator) const {
829     BSONType type = getType();
830 
831     boost::hash_combine(seed, canonicalizeBSONType(type));
832 
833     switch (type) {
834         // Order of types is the same as in Value::compare() and BSONElement::compareElements().
835 
836         // These are valueless types
837         case EOO:
838         case Undefined:
839         case jstNULL:
840         case MaxKey:
841         case MinKey:
842             return;
843 
844         case Bool:
845             boost::hash_combine(seed, getBool());
846             break;
847 
848         case bsonTimestamp:
849         case Date:
850             MONGO_STATIC_ASSERT(sizeof(_storage.dateValue) == sizeof(_storage.timestampValue));
851             boost::hash_combine(seed, _storage.dateValue);
852             break;
853 
854         case mongo::NumberDecimal: {
855             const Decimal128 dcml = getDecimal();
856             if (dcml.toAbs().isGreater(Decimal128(std::numeric_limits<double>::max(),
857                                                   Decimal128::kRoundTo34Digits,
858                                                   Decimal128::kRoundTowardZero)) &&
859                 !dcml.isInfinite() && !dcml.isNaN()) {
860                 // Normalize our decimal to force equivalent decimals
861                 // in the same cohort to hash to the same value
862                 Decimal128 dcmlNorm(dcml.normalize());
863                 boost::hash_combine(seed, dcmlNorm.getValue().low64);
864                 boost::hash_combine(seed, dcmlNorm.getValue().high64);
865                 break;
866             }
867             // Else, fall through and convert the decimal to a double and hash.
868             // At this point the decimal fits into the range of doubles, is infinity, or is NaN,
869             // which doubles have a cheaper representation for.
870         }
871         // This converts all numbers to doubles, which ignores the low-order bits of
872         // NumberLongs > 2**53 and precise decimal numbers without double representations,
873         // but that is ok since the hash will still be the same for equal numbers and is
874         // still likely to be different for different numbers. (Note: this issue only
875         // applies for decimals when they are inside of the valid double range. See
876         // the above case.)
877         // SERVER-16851
878         case NumberDouble:
879         case NumberLong:
880         case NumberInt: {
881             const double dbl = getDouble();
882             if (std::isnan(dbl)) {
883                 boost::hash_combine(seed, numeric_limits<double>::quiet_NaN());
884             } else {
885                 boost::hash_combine(seed, dbl);
886             }
887             break;
888         }
889 
890         case jstOID:
891             getOid().hash_combine(seed);
892             break;
893 
894         case Code:
895         case Symbol: {
896             StringData sd = getStringData();
897             MurmurHash3_x86_32(sd.rawData(), sd.size(), seed, &seed);
898             break;
899         }
900 
901         case String: {
902             StringData sd = getStringData();
903             if (stringComparator) {
904                 stringComparator->hash_combine(seed, sd);
905             } else {
906                 MurmurHash3_x86_32(sd.rawData(), sd.size(), seed, &seed);
907             }
908             break;
909         }
910 
911         case Object:
912             getDocument().hash_combine(seed, stringComparator);
913             break;
914 
915         case Array: {
916             const vector<Value>& vec = getArray();
917             for (size_t i = 0; i < vec.size(); i++)
918                 vec[i].hash_combine(seed, stringComparator);
919             break;
920         }
921 
922         case DBRef:
923             boost::hash_combine(seed, _storage.getDBRef()->ns);
924             _storage.getDBRef()->oid.hash_combine(seed);
925             break;
926 
927 
928         case BinData: {
929             StringData sd = getStringData();
930             MurmurHash3_x86_32(sd.rawData(), sd.size(), seed, &seed);
931             boost::hash_combine(seed, _storage.binDataType());
932             break;
933         }
934 
935         case RegEx: {
936             StringData sd = getStringData();
937             MurmurHash3_x86_32(sd.rawData(), sd.size(), seed, &seed);
938             break;
939         }
940 
941         case CodeWScope: {
942             intrusive_ptr<const RCCodeWScope> cws = _storage.getCodeWScope();
943             SimpleStringDataComparator::kInstance.hash_combine(seed, cws->code);
944             SimpleBSONObjComparator::kInstance.hash_combine(seed, cws->scope);
945             break;
946         }
947     }
948 }
949 
getWidestNumeric(BSONType lType,BSONType rType)950 BSONType Value::getWidestNumeric(BSONType lType, BSONType rType) {
951     if (lType == NumberDouble) {
952         switch (rType) {
953             case NumberDecimal:
954                 return NumberDecimal;
955 
956             case NumberDouble:
957             case NumberLong:
958             case NumberInt:
959                 return NumberDouble;
960 
961             default:
962                 break;
963         }
964     } else if (lType == NumberLong) {
965         switch (rType) {
966             case NumberDecimal:
967                 return NumberDecimal;
968 
969             case NumberDouble:
970                 return NumberDouble;
971 
972             case NumberLong:
973             case NumberInt:
974                 return NumberLong;
975 
976             default:
977                 break;
978         }
979     } else if (lType == NumberInt) {
980         switch (rType) {
981             case NumberDecimal:
982                 return NumberDecimal;
983 
984             case NumberDouble:
985                 return NumberDouble;
986 
987             case NumberLong:
988                 return NumberLong;
989 
990             case NumberInt:
991                 return NumberInt;
992 
993             default:
994                 break;
995         }
996     } else if (lType == NumberDecimal) {
997         switch (rType) {
998             case NumberInt:
999             case NumberLong:
1000             case NumberDouble:
1001             case NumberDecimal:
1002                 return NumberDecimal;
1003 
1004             default:
1005                 break;
1006         }
1007     }
1008 
1009     // Reachable, but callers must subsequently err out in this case.
1010     return Undefined;
1011 }
1012 
integral() const1013 bool Value::integral() const {
1014     switch (getType()) {
1015         case NumberInt:
1016             return true;
1017         case NumberLong:
1018             return (_storage.longValue <= numeric_limits<int>::max() &&
1019                     _storage.longValue >= numeric_limits<int>::min());
1020         case NumberDouble:
1021             return (_storage.doubleValue <= numeric_limits<int>::max() &&
1022                     _storage.doubleValue >= numeric_limits<int>::min() &&
1023                     _storage.doubleValue == static_cast<int>(_storage.doubleValue));
1024         case NumberDecimal: {
1025             // If we are able to convert the decimal to an int32_t without an rounding errors,
1026             // then it is integral.
1027             uint32_t signalingFlags = Decimal128::kNoFlag;
1028             (void)_storage.getDecimal().toIntExact(&signalingFlags);
1029             return signalingFlags == Decimal128::kNoFlag;
1030         }
1031         default:
1032             return false;
1033     }
1034 }
1035 
getApproximateSize() const1036 size_t Value::getApproximateSize() const {
1037     switch (getType()) {
1038         case Code:
1039         case RegEx:
1040         case Symbol:
1041         case BinData:
1042         case String:
1043             return sizeof(Value) + (_storage.shortStr
1044                                         ? 0  // string stored inline, so no extra mem usage
1045                                         : sizeof(RCString) + _storage.getString().size());
1046 
1047         case Object:
1048             return sizeof(Value) + getDocument().getApproximateSize();
1049 
1050         case Array: {
1051             size_t size = sizeof(Value);
1052             size += sizeof(RCVector);
1053             const size_t n = getArray().size();
1054             for (size_t i = 0; i < n; ++i) {
1055                 size += getArray()[i].getApproximateSize();
1056             }
1057             return size;
1058         }
1059 
1060         case CodeWScope:
1061             return sizeof(Value) + sizeof(RCCodeWScope) + _storage.getCodeWScope()->code.size() +
1062                 _storage.getCodeWScope()->scope.objsize();
1063 
1064         case DBRef:
1065             return sizeof(Value) + sizeof(RCDBRef) + _storage.getDBRef()->ns.size();
1066 
1067         case NumberDecimal:
1068             return sizeof(Value) + sizeof(RCDecimal);
1069 
1070         // These types are always contained within the Value
1071         case EOO:
1072         case MinKey:
1073         case MaxKey:
1074         case NumberDouble:
1075         case jstOID:
1076         case Bool:
1077         case Date:
1078         case NumberInt:
1079         case bsonTimestamp:
1080         case NumberLong:
1081         case jstNULL:
1082         case Undefined:
1083             return sizeof(Value);
1084     }
1085     verify(false);
1086 }
1087 
toString() const1088 string Value::toString() const {
1089     // TODO use StringBuilder when operator << is ready
1090     stringstream out;
1091     out << *this;
1092     return out.str();
1093 }
1094 
operator <<(ostream & out,const Value & val)1095 ostream& operator<<(ostream& out, const Value& val) {
1096     switch (val.getType()) {
1097         case EOO:
1098             return out << "MISSING";
1099         case MinKey:
1100             return out << "MinKey";
1101         case MaxKey:
1102             return out << "MaxKey";
1103         case jstOID:
1104             return out << val.getOid();
1105         case String:
1106             return out << '"' << val.getString() << '"';
1107         case RegEx:
1108             return out << '/' << val.getRegex() << '/' << val.getRegexFlags();
1109         case Symbol:
1110             return out << "Symbol(\"" << val.getSymbol() << "\")";
1111         case Code:
1112             return out << "Code(\"" << val.getCode() << "\")";
1113         case Bool:
1114             return out << (val.getBool() ? "true" : "false");
1115         case NumberDecimal:
1116             return out << val.getDecimal().toString();
1117         case NumberDouble:
1118             return out << val.getDouble();
1119         case NumberLong:
1120             return out << val.getLong();
1121         case NumberInt:
1122             return out << val.getInt();
1123         case jstNULL:
1124             return out << "null";
1125         case Undefined:
1126             return out << "undefined";
1127         case Date:
1128             return out << TimeZoneDatabase::utcZone().formatDate(kISOFormatString,
1129                                                                  val.coerceToDate());
1130         case bsonTimestamp:
1131             return out << val.getTimestamp().toString();
1132         case Object:
1133             return out << val.getDocument().toString();
1134         case Array: {
1135             out << "[";
1136             const size_t n = val.getArray().size();
1137             for (size_t i = 0; i < n; i++) {
1138                 if (i)
1139                     out << ", ";
1140                 out << val.getArray()[i];
1141             }
1142             out << "]";
1143             return out;
1144         }
1145 
1146         case CodeWScope:
1147             return out << "CodeWScope(\"" << val._storage.getCodeWScope()->code << "\", "
1148                        << val._storage.getCodeWScope()->scope << ')';
1149 
1150         case BinData:
1151             return out << "BinData(" << val._storage.binDataType() << ", \""
1152                        << toHex(val._storage.getString().rawData(), val._storage.getString().size())
1153                        << "\")";
1154 
1155         case DBRef:
1156             return out << "DBRef(\"" << val._storage.getDBRef()->ns << "\", "
1157                        << val._storage.getDBRef()->oid << ')';
1158     }
1159 
1160     // Not in default case to trigger better warning if a case is missing
1161     verify(false);
1162 }
1163 
serializeForSorter(BufBuilder & buf) const1164 void Value::serializeForSorter(BufBuilder& buf) const {
1165     buf.appendChar(getType());
1166     switch (getType()) {
1167         // type-only types
1168         case EOO:
1169         case MinKey:
1170         case MaxKey:
1171         case jstNULL:
1172         case Undefined:
1173             break;
1174 
1175         // simple types
1176         case jstOID:
1177             buf.appendStruct(_storage.oid);
1178             break;
1179         case NumberInt:
1180             buf.appendNum(_storage.intValue);
1181             break;
1182         case NumberLong:
1183             buf.appendNum(_storage.longValue);
1184             break;
1185         case NumberDouble:
1186             buf.appendNum(_storage.doubleValue);
1187             break;
1188         case NumberDecimal:
1189             buf.appendNum(_storage.getDecimal());
1190             break;
1191         case Bool:
1192             buf.appendChar(_storage.boolValue);
1193             break;
1194         case Date:
1195             buf.appendNum(_storage.dateValue);
1196             break;
1197         case bsonTimestamp:
1198             buf.appendStruct(getTimestamp());
1199             break;
1200 
1201         // types that are like strings
1202         case String:
1203         case Symbol:
1204         case Code: {
1205             StringData str = getStringData();
1206             buf.appendNum(int(str.size()));
1207             buf.appendStr(str, /*NUL byte*/ false);
1208             break;
1209         }
1210 
1211         case BinData: {
1212             StringData str = getStringData();
1213             buf.appendChar(_storage.binDataType());
1214             buf.appendNum(int(str.size()));
1215             buf.appendStr(str, /*NUL byte*/ false);
1216             break;
1217         }
1218 
1219         case RegEx:
1220             buf.appendStr(getRegex(), /*NUL byte*/ true);
1221             buf.appendStr(getRegexFlags(), /*NUL byte*/ true);
1222             break;
1223 
1224         case Object:
1225             getDocument().serializeForSorter(buf);
1226             break;
1227 
1228         case DBRef:
1229             buf.appendStruct(_storage.getDBRef()->oid);
1230             buf.appendStr(_storage.getDBRef()->ns, /*NUL byte*/ true);
1231             break;
1232 
1233         case CodeWScope: {
1234             intrusive_ptr<const RCCodeWScope> cws = _storage.getCodeWScope();
1235             buf.appendNum(int(cws->code.size()));
1236             buf.appendStr(cws->code, /*NUL byte*/ false);
1237             cws->scope.serializeForSorter(buf);
1238             break;
1239         }
1240 
1241         case Array: {
1242             const vector<Value>& array = getArray();
1243             const int numElems = array.size();
1244             buf.appendNum(numElems);
1245             for (int i = 0; i < numElems; i++)
1246                 array[i].serializeForSorter(buf);
1247             break;
1248         }
1249     }
1250 }
1251 
deserializeForSorter(BufReader & buf,const SorterDeserializeSettings & settings)1252 Value Value::deserializeForSorter(BufReader& buf, const SorterDeserializeSettings& settings) {
1253     const BSONType type = BSONType(buf.read<signed char>());  // need sign extension for MinKey
1254     switch (type) {
1255         // type-only types
1256         case EOO:
1257         case MinKey:
1258         case MaxKey:
1259         case jstNULL:
1260         case Undefined:
1261             return Value(ValueStorage(type));
1262 
1263         // simple types
1264         case jstOID:
1265             return Value(OID::from(buf.skip(OID::kOIDSize)));
1266         case NumberInt:
1267             return Value(buf.read<LittleEndian<int>>().value);
1268         case NumberLong:
1269             return Value(buf.read<LittleEndian<long long>>().value);
1270         case NumberDouble:
1271             return Value(buf.read<LittleEndian<double>>().value);
1272         case NumberDecimal:
1273             return Value(Decimal128(buf.read<LittleEndian<Decimal128::Value>>().value));
1274         case Bool:
1275             return Value(bool(buf.read<char>()));
1276         case Date:
1277             return Value(Date_t::fromMillisSinceEpoch(buf.read<LittleEndian<long long>>().value));
1278         case bsonTimestamp:
1279             return Value(buf.read<Timestamp>());
1280 
1281         // types that are like strings
1282         case String:
1283         case Symbol:
1284         case Code: {
1285             int size = buf.read<LittleEndian<int>>();
1286             const char* str = static_cast<const char*>(buf.skip(size));
1287             return Value(ValueStorage(type, StringData(str, size)));
1288         }
1289 
1290         case BinData: {
1291             BinDataType bdt = BinDataType(buf.read<unsigned char>());
1292             int size = buf.read<LittleEndian<int>>();
1293             const void* data = buf.skip(size);
1294             return Value(BSONBinData(data, size, bdt));
1295         }
1296 
1297         case RegEx: {
1298             StringData regex = buf.readCStr();
1299             StringData flags = buf.readCStr();
1300             return Value(BSONRegEx(regex, flags));
1301         }
1302 
1303         case Object:
1304             return Value(
1305                 Document::deserializeForSorter(buf, Document::SorterDeserializeSettings()));
1306 
1307         case DBRef: {
1308             OID oid = OID::from(buf.skip(OID::kOIDSize));
1309             StringData ns = buf.readCStr();
1310             return Value(BSONDBRef(ns, oid));
1311         }
1312 
1313         case CodeWScope: {
1314             int size = buf.read<LittleEndian<int>>();
1315             const char* str = static_cast<const char*>(buf.skip(size));
1316             BSONObj bson = BSONObj::deserializeForSorter(buf, BSONObj::SorterDeserializeSettings());
1317             return Value(BSONCodeWScope(StringData(str, size), bson));
1318         }
1319 
1320         case Array: {
1321             const int numElems = buf.read<LittleEndian<int>>();
1322             vector<Value> array;
1323             array.reserve(numElems);
1324             for (int i = 0; i < numElems; i++)
1325                 array.push_back(deserializeForSorter(buf, settings));
1326             return Value(std::move(array));
1327         }
1328     }
1329     verify(false);
1330 }
1331 
serializeForIDL(StringData fieldName,BSONObjBuilder * builder) const1332 void Value::serializeForIDL(StringData fieldName, BSONObjBuilder* builder) const {
1333     addToBsonObj(builder, fieldName);
1334 }
1335 
serializeForIDL(BSONArrayBuilder * builder) const1336 void Value::serializeForIDL(BSONArrayBuilder* builder) const {
1337     addToBsonArray(builder);
1338 }
1339 
deserializeForIDL(const BSONElement & element)1340 Value Value::deserializeForIDL(const BSONElement& element) {
1341     return Value(element);
1342 }
1343 
1344 }  // namespace mongo
1345