1
2 /**
3 * Copyright (C) 2018-present MongoDB, Inc.
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the Server Side Public License, version 1,
7 * as published by MongoDB, Inc.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * Server Side Public License for more details.
13 *
14 * You should have received a copy of the Server Side Public License
15 * along with this program. If not, see
16 * <http://www.mongodb.com/licensing/server-side-public-license>.
17 *
18 * As a special exception, the copyright holders give permission to link the
19 * code of portions of this program with the OpenSSL library under certain
20 * conditions as described in each individual source file and distribute
21 * linked combinations including the program with the OpenSSL library. You
22 * must comply with the Server Side Public License in all respects for
23 * all of the code used other than as permitted herein. If you modify file(s)
24 * with this exception, you may extend this exception to your version of the
25 * file(s), but you are not obligated to do so. If you do not wish to do so,
26 * delete this exception statement from your version. If you delete this
27 * exception statement from all source files in the program, then also delete
28 * it in the license file.
29 */
30
31 #include "mongo/platform/basic.h"
32
33 #include "mongo/db/pipeline/value.h"
34
35 #include <boost/functional/hash.hpp>
36 #include <cmath>
37 #include <limits>
38
39 #include "mongo/base/compare_numbers.h"
40 #include "mongo/base/data_type_endian.h"
41 #include "mongo/base/simple_string_data_comparator.h"
42 #include "mongo/bson/bson_depth.h"
43 #include "mongo/bson/simple_bsonobj_comparator.h"
44 #include "mongo/db/jsobj.h"
45 #include "mongo/db/pipeline/document.h"
46 #include "mongo/db/query/datetime/date_time_support.h"
47 #include "mongo/platform/decimal128.h"
48 #include "mongo/util/hex.h"
49 #include "mongo/util/mongoutils/str.h"
50
51 namespace mongo {
52 using namespace mongoutils;
53 using boost::intrusive_ptr;
54 using std::min;
55 using std::numeric_limits;
56 using std::ostream;
57 using std::string;
58 using std::stringstream;
59 using std::vector;
60
61 namespace {
62 constexpr StringData kISOFormatString = "%Y-%m-%dT%H:%M:%S.%LZ"_sd;
63 }
64
verifyRefCountingIfShould() const65 void ValueStorage::verifyRefCountingIfShould() const {
66 switch (type) {
67 case MinKey:
68 case MaxKey:
69 case jstOID:
70 case Date:
71 case bsonTimestamp:
72 case EOO:
73 case jstNULL:
74 case Undefined:
75 case Bool:
76 case NumberInt:
77 case NumberLong:
78 case NumberDouble:
79 // the above types never reference external data
80 verify(!refCounter);
81 break;
82
83 case String:
84 case RegEx:
85 case Code:
86 case Symbol:
87 // the above types reference data when not using short-string optimization
88 verify(refCounter == !shortStr);
89 break;
90
91 case NumberDecimal:
92 case BinData: // TODO this should probably support short-string optimization
93 case Array: // TODO this should probably support empty-is-NULL optimization
94 case DBRef:
95 case CodeWScope:
96 // the above types always reference external data.
97 verify(refCounter);
98 verify(bool(genericRCPtr));
99 break;
100
101 case Object:
102 // Objects either hold a NULL ptr or should be ref-counting
103 verify(refCounter == bool(genericRCPtr));
104 break;
105 }
106 }
107
putString(StringData s)108 void ValueStorage::putString(StringData s) {
109 // Note: this also stores data portion of BinData
110 const size_t sizeNoNUL = s.size();
111 if (sizeNoNUL <= sizeof(shortStrStorage)) {
112 shortStr = true;
113 shortStrSize = s.size();
114 s.copyTo(shortStrStorage, false); // no NUL
115
116 // All memory is zeroed before this is called, so we know that
117 // the nulTerminator field will definitely contain a NUL byte.
118 dassert(((sizeNoNUL < sizeof(shortStrStorage)) && (shortStrStorage[sizeNoNUL] == '\0')) ||
119 (((shortStrStorage + sizeNoNUL) == &nulTerminator) && (nulTerminator == '\0')));
120 } else {
121 putRefCountable(RCString::create(s));
122 }
123 }
124
putDocument(const Document & d)125 void ValueStorage::putDocument(const Document& d) {
126 putRefCountable(d._storage);
127 }
128
putVector(const RCVector * vec)129 void ValueStorage::putVector(const RCVector* vec) {
130 fassert(16485, vec);
131 putRefCountable(vec);
132 }
133
putRegEx(const BSONRegEx & re)134 void ValueStorage::putRegEx(const BSONRegEx& re) {
135 const size_t patternLen = re.pattern.size();
136 const size_t flagsLen = re.flags.size();
137 const size_t totalLen = patternLen + 1 /*middle NUL*/ + flagsLen;
138
139 // Need to copy since putString doesn't support scatter-gather.
140 std::unique_ptr<char[]> buf(new char[totalLen]);
141 re.pattern.copyTo(buf.get(), true);
142 re.flags.copyTo(buf.get() + patternLen + 1, false); // no NUL
143 putString(StringData(buf.get(), totalLen));
144 }
145
getDocument() const146 Document ValueStorage::getDocument() const {
147 if (!genericRCPtr)
148 return Document();
149
150 dassert(typeid(*genericRCPtr) == typeid(const DocumentStorage));
151 const DocumentStorage* documentPtr = static_cast<const DocumentStorage*>(genericRCPtr);
152 return Document(documentPtr);
153 }
154
155 // not in header because document is fwd declared
Value(const BSONObj & obj)156 Value::Value(const BSONObj& obj) : _storage(Object, Document(obj)) {}
157
Value(const BSONElement & elem)158 Value::Value(const BSONElement& elem) : _storage(elem.type()) {
159 switch (elem.type()) {
160 // These are all type-only, no data
161 case EOO:
162 case MinKey:
163 case MaxKey:
164 case Undefined:
165 case jstNULL:
166 break;
167
168 case NumberDouble:
169 _storage.doubleValue = elem.Double();
170 break;
171
172 case Code:
173 case Symbol:
174 case String:
175 _storage.putString(elem.valueStringData());
176 break;
177
178 case Object: {
179 _storage.putDocument(Document(elem.embeddedObject()));
180 break;
181 }
182
183 case Array: {
184 intrusive_ptr<RCVector> vec(new RCVector);
185 BSONForEach(sub, elem.embeddedObject()) {
186 vec->vec.push_back(Value(sub));
187 }
188 _storage.putVector(vec.get());
189 break;
190 }
191
192 case jstOID:
193 MONGO_STATIC_ASSERT(sizeof(_storage.oid) == OID::kOIDSize);
194 memcpy(_storage.oid, elem.OID().view().view(), OID::kOIDSize);
195 break;
196
197 case Bool:
198 _storage.boolValue = elem.boolean();
199 break;
200
201 case Date:
202 _storage.dateValue = elem.date().toMillisSinceEpoch();
203 break;
204
205 case RegEx: {
206 _storage.putRegEx(BSONRegEx(elem.regex(), elem.regexFlags()));
207 break;
208 }
209
210 case NumberInt:
211 _storage.intValue = elem.numberInt();
212 break;
213
214 case bsonTimestamp:
215 _storage.timestampValue = elem.timestamp().asULL();
216 break;
217
218 case NumberLong:
219 _storage.longValue = elem.numberLong();
220 break;
221
222 case NumberDecimal:
223 _storage.putDecimal(elem.numberDecimal());
224 break;
225
226 case CodeWScope: {
227 StringData code(elem.codeWScopeCode(), elem.codeWScopeCodeLen() - 1);
228 _storage.putCodeWScope(BSONCodeWScope(code, elem.codeWScopeObject()));
229 break;
230 }
231
232 case BinData: {
233 int len;
234 const char* data = elem.binData(len);
235 _storage.putBinData(BSONBinData(data, len, elem.binDataType()));
236 break;
237 }
238
239 case DBRef:
240 _storage.putDBRef(BSONDBRef(elem.dbrefNS(), elem.dbrefOID()));
241 break;
242 }
243 }
244
Value(const BSONArray & arr)245 Value::Value(const BSONArray& arr) : _storage(Array) {
246 intrusive_ptr<RCVector> vec(new RCVector);
247 BSONForEach(sub, arr) {
248 vec->vec.push_back(Value(sub));
249 }
250 _storage.putVector(vec.get());
251 }
252
Value(const vector<BSONObj> & vec)253 Value::Value(const vector<BSONObj>& vec) : _storage(Array) {
254 intrusive_ptr<RCVector> storageVec(new RCVector);
255 storageVec->vec.reserve(vec.size());
256 for (auto&& obj : vec) {
257 storageVec->vec.push_back(Value(obj));
258 }
259 _storage.putVector(storageVec.get());
260 }
261
Value(const vector<Document> & vec)262 Value::Value(const vector<Document>& vec) : _storage(Array) {
263 intrusive_ptr<RCVector> storageVec(new RCVector);
264 storageVec->vec.reserve(vec.size());
265 for (auto&& obj : vec) {
266 storageVec->vec.push_back(Value(obj));
267 }
268 _storage.putVector(storageVec.get());
269 }
270
createIntOrLong(long long longValue)271 Value Value::createIntOrLong(long long longValue) {
272 int intValue = longValue;
273 if (intValue != longValue) {
274 // it is too large to be an int and should remain a long
275 return Value(longValue);
276 }
277
278 // should be an int since all arguments were int and it fits
279 return Value(intValue);
280 }
281
getDecimal() const282 Decimal128 Value::getDecimal() const {
283 BSONType type = getType();
284 if (type == NumberInt)
285 return Decimal128(static_cast<int32_t>(_storage.intValue));
286 if (type == NumberLong)
287 return Decimal128(static_cast<int64_t>(_storage.longValue));
288 if (type == NumberDouble)
289 return Decimal128(_storage.doubleValue);
290 invariant(type == NumberDecimal);
291 return _storage.getDecimal();
292 }
293
getDouble() const294 double Value::getDouble() const {
295 BSONType type = getType();
296 if (type == NumberInt)
297 return _storage.intValue;
298 if (type == NumberLong)
299 return static_cast<double>(_storage.longValue);
300 if (type == NumberDecimal)
301 return _storage.getDecimal().toDouble();
302
303 verify(type == NumberDouble);
304 return _storage.doubleValue;
305 }
306
getDocument() const307 Document Value::getDocument() const {
308 verify(getType() == Object);
309 return _storage.getDocument();
310 }
311
operator [](size_t index) const312 Value Value::operator[](size_t index) const {
313 if (getType() != Array || index >= getArrayLength())
314 return Value();
315
316 return getArray()[index];
317 }
318
operator [](StringData name) const319 Value Value::operator[](StringData name) const {
320 if (getType() != Object)
321 return Value();
322
323 return getDocument()[name];
324 }
325
operator <<(BSONObjBuilderValueStream & builder,const Value & val)326 BSONObjBuilder& operator<<(BSONObjBuilderValueStream& builder, const Value& val) {
327 switch (val.getType()) {
328 case EOO:
329 return builder.builder(); // nothing appended
330 case MinKey:
331 return builder << MINKEY;
332 case MaxKey:
333 return builder << MAXKEY;
334 case jstNULL:
335 return builder << BSONNULL;
336 case Undefined:
337 return builder << BSONUndefined;
338 case jstOID:
339 return builder << val.getOid();
340 case NumberInt:
341 return builder << val.getInt();
342 case NumberLong:
343 return builder << val.getLong();
344 case NumberDouble:
345 return builder << val.getDouble();
346 case NumberDecimal:
347 return builder << val.getDecimal();
348 case String:
349 return builder << val.getStringData();
350 case Bool:
351 return builder << val.getBool();
352 case Date:
353 return builder << val.getDate();
354 case bsonTimestamp:
355 return builder << val.getTimestamp();
356 case Object:
357 return builder << val.getDocument();
358 case Symbol:
359 return builder << BSONSymbol(val.getStringData());
360 case Code:
361 return builder << BSONCode(val.getStringData());
362 case RegEx:
363 return builder << BSONRegEx(val.getRegex(), val.getRegexFlags());
364
365 case DBRef:
366 return builder << BSONDBRef(val._storage.getDBRef()->ns, val._storage.getDBRef()->oid);
367
368 case BinData:
369 return builder << BSONBinData(val.getStringData().rawData(), // looking for void*
370 val.getStringData().size(),
371 val._storage.binDataType());
372
373 case CodeWScope:
374 return builder << BSONCodeWScope(val._storage.getCodeWScope()->code,
375 val._storage.getCodeWScope()->scope);
376
377 case Array: {
378 BSONArrayBuilder arrayBuilder(builder.subarrayStart());
379 for (auto&& value : val.getArray()) {
380 value.addToBsonArray(&arrayBuilder);
381 }
382 arrayBuilder.doneFast();
383 return builder.builder();
384 }
385 }
386 verify(false);
387 }
388
addToBsonObj(BSONObjBuilder * builder,StringData fieldName,size_t recursionLevel) const389 void Value::addToBsonObj(BSONObjBuilder* builder,
390 StringData fieldName,
391 size_t recursionLevel) const {
392 uassert(ErrorCodes::Overflow,
393 str::stream() << "cannot convert document to BSON because it exceeds the limit of "
394 << BSONDepth::getMaxAllowableDepth()
395 << " levels of nesting",
396 recursionLevel <= BSONDepth::getMaxAllowableDepth());
397
398 if (getType() == BSONType::Object) {
399 BSONObjBuilder subobjBuilder(builder->subobjStart(fieldName));
400 getDocument().toBson(&subobjBuilder, recursionLevel + 1);
401 subobjBuilder.doneFast();
402 } else if (getType() == BSONType::Array) {
403 BSONArrayBuilder subarrBuilder(builder->subarrayStart(fieldName));
404 for (auto&& value : getArray()) {
405 value.addToBsonArray(&subarrBuilder, recursionLevel + 1);
406 }
407 subarrBuilder.doneFast();
408 } else {
409 *builder << fieldName << *this;
410 }
411 }
412
addToBsonArray(BSONArrayBuilder * builder,size_t recursionLevel) const413 void Value::addToBsonArray(BSONArrayBuilder* builder, size_t recursionLevel) const {
414 uassert(ErrorCodes::Overflow,
415 str::stream() << "cannot convert document to BSON because it exceeds the limit of "
416 << BSONDepth::getMaxAllowableDepth()
417 << " levels of nesting",
418 recursionLevel <= BSONDepth::getMaxAllowableDepth());
419
420 // If this Value is empty, do nothing to avoid incrementing the builder's counter.
421 if (missing()) {
422 return;
423 }
424
425 if (getType() == BSONType::Object) {
426 BSONObjBuilder subobjBuilder(builder->subobjStart());
427 getDocument().toBson(&subobjBuilder, recursionLevel + 1);
428 subobjBuilder.doneFast();
429 } else if (getType() == BSONType::Array) {
430 BSONArrayBuilder subarrBuilder(builder->subarrayStart());
431 for (auto&& value : getArray()) {
432 value.addToBsonArray(&subarrBuilder, recursionLevel + 1);
433 }
434 subarrBuilder.doneFast();
435 } else {
436 *builder << *this;
437 }
438 }
439
coerceToBool() const440 bool Value::coerceToBool() const {
441 // TODO Unify the implementation with BSONElement::trueValue().
442 switch (getType()) {
443 case CodeWScope:
444 case MinKey:
445 case DBRef:
446 case Code:
447 case MaxKey:
448 case String:
449 case Object:
450 case Array:
451 case BinData:
452 case jstOID:
453 case Date:
454 case RegEx:
455 case Symbol:
456 case bsonTimestamp:
457 return true;
458
459 case EOO:
460 case jstNULL:
461 case Undefined:
462 return false;
463
464 case Bool:
465 return _storage.boolValue;
466 case NumberInt:
467 return _storage.intValue;
468 case NumberLong:
469 return _storage.longValue;
470 case NumberDouble:
471 return _storage.doubleValue;
472 case NumberDecimal:
473 return !_storage.getDecimal().isZero();
474 }
475 verify(false);
476 }
477
coerceToInt() const478 int Value::coerceToInt() const {
479 switch (getType()) {
480 case NumberInt:
481 return _storage.intValue;
482
483 case NumberLong:
484 return static_cast<int>(_storage.longValue);
485
486 case NumberDouble:
487 return static_cast<int>(_storage.doubleValue);
488
489 case NumberDecimal:
490 return (_storage.getDecimal()).toInt();
491
492 default:
493 uassert(16003,
494 str::stream() << "can't convert from BSON type " << typeName(getType())
495 << " to int",
496 false);
497 } // switch(getType())
498 }
499
coerceToLong() const500 long long Value::coerceToLong() const {
501 switch (getType()) {
502 case NumberLong:
503 return _storage.longValue;
504
505 case NumberInt:
506 return static_cast<long long>(_storage.intValue);
507
508 case NumberDouble:
509 return static_cast<long long>(_storage.doubleValue);
510
511 case NumberDecimal:
512 return (_storage.getDecimal()).toLong();
513
514 default:
515 uassert(16004,
516 str::stream() << "can't convert from BSON type " << typeName(getType())
517 << " to long",
518 false);
519 } // switch(getType())
520 }
521
coerceToDouble() const522 double Value::coerceToDouble() const {
523 switch (getType()) {
524 case NumberDouble:
525 return _storage.doubleValue;
526
527 case NumberInt:
528 return static_cast<double>(_storage.intValue);
529
530 case NumberLong:
531 return static_cast<double>(_storage.longValue);
532
533 case NumberDecimal:
534 return (_storage.getDecimal()).toDouble();
535
536 default:
537 uassert(16005,
538 str::stream() << "can't convert from BSON type " << typeName(getType())
539 << " to double",
540 false);
541 } // switch(getType())
542 }
543
coerceToDecimal() const544 Decimal128 Value::coerceToDecimal() const {
545 switch (getType()) {
546 case NumberDecimal:
547 return _storage.getDecimal();
548
549 case NumberInt:
550 return Decimal128(static_cast<int32_t>(_storage.intValue));
551
552 case NumberLong:
553 return Decimal128(static_cast<int64_t>(_storage.longValue));
554
555 case NumberDouble:
556 return Decimal128(_storage.doubleValue);
557
558 default:
559 uassert(16008,
560 str::stream() << "can't convert from BSON type " << typeName(getType())
561 << " to decimal",
562 false);
563 } // switch(getType())
564 }
565
coerceToDate() const566 Date_t Value::coerceToDate() const {
567 switch (getType()) {
568 case Date:
569 return getDate();
570
571 case bsonTimestamp:
572 return Date_t::fromMillisSinceEpoch(getTimestamp().getSecs() * 1000LL);
573
574 case jstOID:
575 return getOid().asDateT();
576
577 default:
578 uassert(16006,
579 str::stream() << "can't convert from BSON type " << typeName(getType())
580 << " to Date",
581 false);
582 } // switch(getType())
583 }
584
coerceToString() const585 string Value::coerceToString() const {
586 switch (getType()) {
587 case NumberDouble:
588 return str::stream() << _storage.doubleValue;
589
590 case NumberInt:
591 return str::stream() << _storage.intValue;
592
593 case NumberLong:
594 return str::stream() << _storage.longValue;
595
596 case NumberDecimal:
597 return str::stream() << _storage.getDecimal().toString();
598
599 case Code:
600 case Symbol:
601 case String:
602 return getStringData().toString();
603
604 case bsonTimestamp:
605 return getTimestamp().toStringPretty();
606
607 case Date:
608 return TimeZoneDatabase::utcZone().formatDate(kISOFormatString, getDate());
609
610 case EOO:
611 case jstNULL:
612 case Undefined:
613 return "";
614
615 default:
616 uassert(16007,
617 str::stream() << "can't convert from BSON type " << typeName(getType())
618 << " to String",
619 false);
620 } // switch(getType())
621 }
622
coerceToTimestamp() const623 Timestamp Value::coerceToTimestamp() const {
624 switch (getType()) {
625 case bsonTimestamp:
626 return getTimestamp();
627
628 default:
629 uassert(16378,
630 str::stream() << "can't convert from BSON type " << typeName(getType())
631 << " to timestamp",
632 false);
633 } // switch(getType())
634 }
635
636 // Helper function for Value::compare.
637 // Better than l-r for cases where difference > MAX_INT
638 template <typename T>
cmp(const T & left,const T & right)639 inline static int cmp(const T& left, const T& right) {
640 if (left < right) {
641 return -1;
642 } else if (left == right) {
643 return 0;
644 } else {
645 dassert(left > right);
646 return 1;
647 }
648 }
649
compare(const Value & rL,const Value & rR,const StringData::ComparatorInterface * stringComparator)650 int Value::compare(const Value& rL,
651 const Value& rR,
652 const StringData::ComparatorInterface* stringComparator) {
653 // Note, this function needs to behave identically to BSONElement::compareElements().
654 // Additionally, any changes here must be replicated in hash_combine().
655 BSONType lType = rL.getType();
656 BSONType rType = rR.getType();
657
658 int ret = lType == rType ? 0 // fast-path common case
659 : cmp(canonicalizeBSONType(lType), canonicalizeBSONType(rType));
660
661 if (ret)
662 return ret;
663
664 switch (lType) {
665 // Order of types is the same as in BSONElement::compareElements() to make it easier to
666 // verify.
667
668 // These are valueless types
669 case EOO:
670 case Undefined:
671 case jstNULL:
672 case MaxKey:
673 case MinKey:
674 return ret;
675
676 case Bool:
677 return rL.getBool() - rR.getBool();
678
679 case bsonTimestamp: // unsigned
680 return cmp(rL._storage.timestampValue, rR._storage.timestampValue);
681
682 case Date: // signed
683 return cmp(rL._storage.dateValue, rR._storage.dateValue);
684
685 // Numbers should compare by equivalence even if different types
686
687 case NumberDecimal: {
688 switch (rType) {
689 case NumberDecimal:
690 return compareDecimals(rL._storage.getDecimal(), rR._storage.getDecimal());
691 case NumberInt:
692 return compareDecimalToInt(rL._storage.getDecimal(), rR._storage.intValue);
693 case NumberLong:
694 return compareDecimalToLong(rL._storage.getDecimal(), rR._storage.longValue);
695 case NumberDouble:
696 return compareDecimalToDouble(rL._storage.getDecimal(),
697 rR._storage.doubleValue);
698 default:
699 invariant(false);
700 }
701 }
702
703 case NumberInt: {
704 // All types can precisely represent all NumberInts, so it is safe to simply convert to
705 // whatever rhs's type is.
706 switch (rType) {
707 case NumberInt:
708 return compareInts(rL._storage.intValue, rR._storage.intValue);
709 case NumberLong:
710 return compareLongs(rL._storage.intValue, rR._storage.longValue);
711 case NumberDouble:
712 return compareDoubles(rL._storage.intValue, rR._storage.doubleValue);
713 case NumberDecimal:
714 return compareIntToDecimal(rL._storage.intValue, rR._storage.getDecimal());
715 default:
716 invariant(false);
717 }
718 }
719
720 case NumberLong: {
721 switch (rType) {
722 case NumberLong:
723 return compareLongs(rL._storage.longValue, rR._storage.longValue);
724 case NumberInt:
725 return compareLongs(rL._storage.longValue, rR._storage.intValue);
726 case NumberDouble:
727 return compareLongToDouble(rL._storage.longValue, rR._storage.doubleValue);
728 case NumberDecimal:
729 return compareLongToDecimal(rL._storage.longValue, rR._storage.getDecimal());
730 default:
731 invariant(false);
732 }
733 }
734
735 case NumberDouble: {
736 switch (rType) {
737 case NumberDouble:
738 return compareDoubles(rL._storage.doubleValue, rR._storage.doubleValue);
739 case NumberInt:
740 return compareDoubles(rL._storage.doubleValue, rR._storage.intValue);
741 case NumberLong:
742 return compareDoubleToLong(rL._storage.doubleValue, rR._storage.longValue);
743 case NumberDecimal:
744 return compareDoubleToDecimal(rL._storage.doubleValue,
745 rR._storage.getDecimal());
746 default:
747 invariant(false);
748 }
749 }
750
751 case jstOID:
752 return memcmp(rL._storage.oid, rR._storage.oid, OID::kOIDSize);
753
754 case String: {
755 if (!stringComparator) {
756 return rL.getStringData().compare(rR.getStringData());
757 }
758
759 return stringComparator->compare(rL.getStringData(), rR.getStringData());
760 }
761
762 case Code:
763 case Symbol:
764 return rL.getStringData().compare(rR.getStringData());
765
766 case Object:
767 return Document::compare(rL.getDocument(), rR.getDocument(), stringComparator);
768
769 case Array: {
770 const vector<Value>& lArr = rL.getArray();
771 const vector<Value>& rArr = rR.getArray();
772
773 const size_t elems = std::min(lArr.size(), rArr.size());
774 for (size_t i = 0; i < elems; i++) {
775 // compare the two corresponding elements
776 ret = Value::compare(lArr[i], rArr[i], stringComparator);
777 if (ret)
778 return ret; // values are unequal
779 }
780
781 // if we get here we are either equal or one is prefix of the other
782 return cmp(lArr.size(), rArr.size());
783 }
784
785 case DBRef: {
786 intrusive_ptr<const RCDBRef> l = rL._storage.getDBRef();
787 intrusive_ptr<const RCDBRef> r = rR._storage.getDBRef();
788 ret = cmp(l->ns.size(), r->ns.size());
789 if (ret)
790 return ret;
791
792 return l->oid.compare(r->oid);
793 }
794
795 case BinData: {
796 ret = cmp(rL.getStringData().size(), rR.getStringData().size());
797 if (ret)
798 return ret;
799
800 // Need to compare as an unsigned char rather than enum since BSON uses memcmp
801 ret = cmp(rL._storage.binSubType, rR._storage.binSubType);
802 if (ret)
803 return ret;
804
805 return rL.getStringData().compare(rR.getStringData());
806 }
807
808 case RegEx:
809 // same as String in this impl but keeping order same as
810 // BSONElement::compareElements().
811 return rL.getStringData().compare(rR.getStringData());
812
813 case CodeWScope: {
814 intrusive_ptr<const RCCodeWScope> l = rL._storage.getCodeWScope();
815 intrusive_ptr<const RCCodeWScope> r = rR._storage.getCodeWScope();
816
817 ret = l->code.compare(r->code);
818 if (ret)
819 return ret;
820
821 return l->scope.woCompare(r->scope);
822 }
823 }
824 verify(false);
825 }
826
hash_combine(size_t & seed,const StringData::ComparatorInterface * stringComparator) const827 void Value::hash_combine(size_t& seed,
828 const StringData::ComparatorInterface* stringComparator) const {
829 BSONType type = getType();
830
831 boost::hash_combine(seed, canonicalizeBSONType(type));
832
833 switch (type) {
834 // Order of types is the same as in Value::compare() and BSONElement::compareElements().
835
836 // These are valueless types
837 case EOO:
838 case Undefined:
839 case jstNULL:
840 case MaxKey:
841 case MinKey:
842 return;
843
844 case Bool:
845 boost::hash_combine(seed, getBool());
846 break;
847
848 case bsonTimestamp:
849 case Date:
850 MONGO_STATIC_ASSERT(sizeof(_storage.dateValue) == sizeof(_storage.timestampValue));
851 boost::hash_combine(seed, _storage.dateValue);
852 break;
853
854 case mongo::NumberDecimal: {
855 const Decimal128 dcml = getDecimal();
856 if (dcml.toAbs().isGreater(Decimal128(std::numeric_limits<double>::max(),
857 Decimal128::kRoundTo34Digits,
858 Decimal128::kRoundTowardZero)) &&
859 !dcml.isInfinite() && !dcml.isNaN()) {
860 // Normalize our decimal to force equivalent decimals
861 // in the same cohort to hash to the same value
862 Decimal128 dcmlNorm(dcml.normalize());
863 boost::hash_combine(seed, dcmlNorm.getValue().low64);
864 boost::hash_combine(seed, dcmlNorm.getValue().high64);
865 break;
866 }
867 // Else, fall through and convert the decimal to a double and hash.
868 // At this point the decimal fits into the range of doubles, is infinity, or is NaN,
869 // which doubles have a cheaper representation for.
870 }
871 // This converts all numbers to doubles, which ignores the low-order bits of
872 // NumberLongs > 2**53 and precise decimal numbers without double representations,
873 // but that is ok since the hash will still be the same for equal numbers and is
874 // still likely to be different for different numbers. (Note: this issue only
875 // applies for decimals when they are inside of the valid double range. See
876 // the above case.)
877 // SERVER-16851
878 case NumberDouble:
879 case NumberLong:
880 case NumberInt: {
881 const double dbl = getDouble();
882 if (std::isnan(dbl)) {
883 boost::hash_combine(seed, numeric_limits<double>::quiet_NaN());
884 } else {
885 boost::hash_combine(seed, dbl);
886 }
887 break;
888 }
889
890 case jstOID:
891 getOid().hash_combine(seed);
892 break;
893
894 case Code:
895 case Symbol: {
896 StringData sd = getStringData();
897 MurmurHash3_x86_32(sd.rawData(), sd.size(), seed, &seed);
898 break;
899 }
900
901 case String: {
902 StringData sd = getStringData();
903 if (stringComparator) {
904 stringComparator->hash_combine(seed, sd);
905 } else {
906 MurmurHash3_x86_32(sd.rawData(), sd.size(), seed, &seed);
907 }
908 break;
909 }
910
911 case Object:
912 getDocument().hash_combine(seed, stringComparator);
913 break;
914
915 case Array: {
916 const vector<Value>& vec = getArray();
917 for (size_t i = 0; i < vec.size(); i++)
918 vec[i].hash_combine(seed, stringComparator);
919 break;
920 }
921
922 case DBRef:
923 boost::hash_combine(seed, _storage.getDBRef()->ns);
924 _storage.getDBRef()->oid.hash_combine(seed);
925 break;
926
927
928 case BinData: {
929 StringData sd = getStringData();
930 MurmurHash3_x86_32(sd.rawData(), sd.size(), seed, &seed);
931 boost::hash_combine(seed, _storage.binDataType());
932 break;
933 }
934
935 case RegEx: {
936 StringData sd = getStringData();
937 MurmurHash3_x86_32(sd.rawData(), sd.size(), seed, &seed);
938 break;
939 }
940
941 case CodeWScope: {
942 intrusive_ptr<const RCCodeWScope> cws = _storage.getCodeWScope();
943 SimpleStringDataComparator::kInstance.hash_combine(seed, cws->code);
944 SimpleBSONObjComparator::kInstance.hash_combine(seed, cws->scope);
945 break;
946 }
947 }
948 }
949
getWidestNumeric(BSONType lType,BSONType rType)950 BSONType Value::getWidestNumeric(BSONType lType, BSONType rType) {
951 if (lType == NumberDouble) {
952 switch (rType) {
953 case NumberDecimal:
954 return NumberDecimal;
955
956 case NumberDouble:
957 case NumberLong:
958 case NumberInt:
959 return NumberDouble;
960
961 default:
962 break;
963 }
964 } else if (lType == NumberLong) {
965 switch (rType) {
966 case NumberDecimal:
967 return NumberDecimal;
968
969 case NumberDouble:
970 return NumberDouble;
971
972 case NumberLong:
973 case NumberInt:
974 return NumberLong;
975
976 default:
977 break;
978 }
979 } else if (lType == NumberInt) {
980 switch (rType) {
981 case NumberDecimal:
982 return NumberDecimal;
983
984 case NumberDouble:
985 return NumberDouble;
986
987 case NumberLong:
988 return NumberLong;
989
990 case NumberInt:
991 return NumberInt;
992
993 default:
994 break;
995 }
996 } else if (lType == NumberDecimal) {
997 switch (rType) {
998 case NumberInt:
999 case NumberLong:
1000 case NumberDouble:
1001 case NumberDecimal:
1002 return NumberDecimal;
1003
1004 default:
1005 break;
1006 }
1007 }
1008
1009 // Reachable, but callers must subsequently err out in this case.
1010 return Undefined;
1011 }
1012
integral() const1013 bool Value::integral() const {
1014 switch (getType()) {
1015 case NumberInt:
1016 return true;
1017 case NumberLong:
1018 return (_storage.longValue <= numeric_limits<int>::max() &&
1019 _storage.longValue >= numeric_limits<int>::min());
1020 case NumberDouble:
1021 return (_storage.doubleValue <= numeric_limits<int>::max() &&
1022 _storage.doubleValue >= numeric_limits<int>::min() &&
1023 _storage.doubleValue == static_cast<int>(_storage.doubleValue));
1024 case NumberDecimal: {
1025 // If we are able to convert the decimal to an int32_t without an rounding errors,
1026 // then it is integral.
1027 uint32_t signalingFlags = Decimal128::kNoFlag;
1028 (void)_storage.getDecimal().toIntExact(&signalingFlags);
1029 return signalingFlags == Decimal128::kNoFlag;
1030 }
1031 default:
1032 return false;
1033 }
1034 }
1035
getApproximateSize() const1036 size_t Value::getApproximateSize() const {
1037 switch (getType()) {
1038 case Code:
1039 case RegEx:
1040 case Symbol:
1041 case BinData:
1042 case String:
1043 return sizeof(Value) + (_storage.shortStr
1044 ? 0 // string stored inline, so no extra mem usage
1045 : sizeof(RCString) + _storage.getString().size());
1046
1047 case Object:
1048 return sizeof(Value) + getDocument().getApproximateSize();
1049
1050 case Array: {
1051 size_t size = sizeof(Value);
1052 size += sizeof(RCVector);
1053 const size_t n = getArray().size();
1054 for (size_t i = 0; i < n; ++i) {
1055 size += getArray()[i].getApproximateSize();
1056 }
1057 return size;
1058 }
1059
1060 case CodeWScope:
1061 return sizeof(Value) + sizeof(RCCodeWScope) + _storage.getCodeWScope()->code.size() +
1062 _storage.getCodeWScope()->scope.objsize();
1063
1064 case DBRef:
1065 return sizeof(Value) + sizeof(RCDBRef) + _storage.getDBRef()->ns.size();
1066
1067 case NumberDecimal:
1068 return sizeof(Value) + sizeof(RCDecimal);
1069
1070 // These types are always contained within the Value
1071 case EOO:
1072 case MinKey:
1073 case MaxKey:
1074 case NumberDouble:
1075 case jstOID:
1076 case Bool:
1077 case Date:
1078 case NumberInt:
1079 case bsonTimestamp:
1080 case NumberLong:
1081 case jstNULL:
1082 case Undefined:
1083 return sizeof(Value);
1084 }
1085 verify(false);
1086 }
1087
toString() const1088 string Value::toString() const {
1089 // TODO use StringBuilder when operator << is ready
1090 stringstream out;
1091 out << *this;
1092 return out.str();
1093 }
1094
operator <<(ostream & out,const Value & val)1095 ostream& operator<<(ostream& out, const Value& val) {
1096 switch (val.getType()) {
1097 case EOO:
1098 return out << "MISSING";
1099 case MinKey:
1100 return out << "MinKey";
1101 case MaxKey:
1102 return out << "MaxKey";
1103 case jstOID:
1104 return out << val.getOid();
1105 case String:
1106 return out << '"' << val.getString() << '"';
1107 case RegEx:
1108 return out << '/' << val.getRegex() << '/' << val.getRegexFlags();
1109 case Symbol:
1110 return out << "Symbol(\"" << val.getSymbol() << "\")";
1111 case Code:
1112 return out << "Code(\"" << val.getCode() << "\")";
1113 case Bool:
1114 return out << (val.getBool() ? "true" : "false");
1115 case NumberDecimal:
1116 return out << val.getDecimal().toString();
1117 case NumberDouble:
1118 return out << val.getDouble();
1119 case NumberLong:
1120 return out << val.getLong();
1121 case NumberInt:
1122 return out << val.getInt();
1123 case jstNULL:
1124 return out << "null";
1125 case Undefined:
1126 return out << "undefined";
1127 case Date:
1128 return out << TimeZoneDatabase::utcZone().formatDate(kISOFormatString,
1129 val.coerceToDate());
1130 case bsonTimestamp:
1131 return out << val.getTimestamp().toString();
1132 case Object:
1133 return out << val.getDocument().toString();
1134 case Array: {
1135 out << "[";
1136 const size_t n = val.getArray().size();
1137 for (size_t i = 0; i < n; i++) {
1138 if (i)
1139 out << ", ";
1140 out << val.getArray()[i];
1141 }
1142 out << "]";
1143 return out;
1144 }
1145
1146 case CodeWScope:
1147 return out << "CodeWScope(\"" << val._storage.getCodeWScope()->code << "\", "
1148 << val._storage.getCodeWScope()->scope << ')';
1149
1150 case BinData:
1151 return out << "BinData(" << val._storage.binDataType() << ", \""
1152 << toHex(val._storage.getString().rawData(), val._storage.getString().size())
1153 << "\")";
1154
1155 case DBRef:
1156 return out << "DBRef(\"" << val._storage.getDBRef()->ns << "\", "
1157 << val._storage.getDBRef()->oid << ')';
1158 }
1159
1160 // Not in default case to trigger better warning if a case is missing
1161 verify(false);
1162 }
1163
serializeForSorter(BufBuilder & buf) const1164 void Value::serializeForSorter(BufBuilder& buf) const {
1165 buf.appendChar(getType());
1166 switch (getType()) {
1167 // type-only types
1168 case EOO:
1169 case MinKey:
1170 case MaxKey:
1171 case jstNULL:
1172 case Undefined:
1173 break;
1174
1175 // simple types
1176 case jstOID:
1177 buf.appendStruct(_storage.oid);
1178 break;
1179 case NumberInt:
1180 buf.appendNum(_storage.intValue);
1181 break;
1182 case NumberLong:
1183 buf.appendNum(_storage.longValue);
1184 break;
1185 case NumberDouble:
1186 buf.appendNum(_storage.doubleValue);
1187 break;
1188 case NumberDecimal:
1189 buf.appendNum(_storage.getDecimal());
1190 break;
1191 case Bool:
1192 buf.appendChar(_storage.boolValue);
1193 break;
1194 case Date:
1195 buf.appendNum(_storage.dateValue);
1196 break;
1197 case bsonTimestamp:
1198 buf.appendStruct(getTimestamp());
1199 break;
1200
1201 // types that are like strings
1202 case String:
1203 case Symbol:
1204 case Code: {
1205 StringData str = getStringData();
1206 buf.appendNum(int(str.size()));
1207 buf.appendStr(str, /*NUL byte*/ false);
1208 break;
1209 }
1210
1211 case BinData: {
1212 StringData str = getStringData();
1213 buf.appendChar(_storage.binDataType());
1214 buf.appendNum(int(str.size()));
1215 buf.appendStr(str, /*NUL byte*/ false);
1216 break;
1217 }
1218
1219 case RegEx:
1220 buf.appendStr(getRegex(), /*NUL byte*/ true);
1221 buf.appendStr(getRegexFlags(), /*NUL byte*/ true);
1222 break;
1223
1224 case Object:
1225 getDocument().serializeForSorter(buf);
1226 break;
1227
1228 case DBRef:
1229 buf.appendStruct(_storage.getDBRef()->oid);
1230 buf.appendStr(_storage.getDBRef()->ns, /*NUL byte*/ true);
1231 break;
1232
1233 case CodeWScope: {
1234 intrusive_ptr<const RCCodeWScope> cws = _storage.getCodeWScope();
1235 buf.appendNum(int(cws->code.size()));
1236 buf.appendStr(cws->code, /*NUL byte*/ false);
1237 cws->scope.serializeForSorter(buf);
1238 break;
1239 }
1240
1241 case Array: {
1242 const vector<Value>& array = getArray();
1243 const int numElems = array.size();
1244 buf.appendNum(numElems);
1245 for (int i = 0; i < numElems; i++)
1246 array[i].serializeForSorter(buf);
1247 break;
1248 }
1249 }
1250 }
1251
deserializeForSorter(BufReader & buf,const SorterDeserializeSettings & settings)1252 Value Value::deserializeForSorter(BufReader& buf, const SorterDeserializeSettings& settings) {
1253 const BSONType type = BSONType(buf.read<signed char>()); // need sign extension for MinKey
1254 switch (type) {
1255 // type-only types
1256 case EOO:
1257 case MinKey:
1258 case MaxKey:
1259 case jstNULL:
1260 case Undefined:
1261 return Value(ValueStorage(type));
1262
1263 // simple types
1264 case jstOID:
1265 return Value(OID::from(buf.skip(OID::kOIDSize)));
1266 case NumberInt:
1267 return Value(buf.read<LittleEndian<int>>().value);
1268 case NumberLong:
1269 return Value(buf.read<LittleEndian<long long>>().value);
1270 case NumberDouble:
1271 return Value(buf.read<LittleEndian<double>>().value);
1272 case NumberDecimal:
1273 return Value(Decimal128(buf.read<LittleEndian<Decimal128::Value>>().value));
1274 case Bool:
1275 return Value(bool(buf.read<char>()));
1276 case Date:
1277 return Value(Date_t::fromMillisSinceEpoch(buf.read<LittleEndian<long long>>().value));
1278 case bsonTimestamp:
1279 return Value(buf.read<Timestamp>());
1280
1281 // types that are like strings
1282 case String:
1283 case Symbol:
1284 case Code: {
1285 int size = buf.read<LittleEndian<int>>();
1286 const char* str = static_cast<const char*>(buf.skip(size));
1287 return Value(ValueStorage(type, StringData(str, size)));
1288 }
1289
1290 case BinData: {
1291 BinDataType bdt = BinDataType(buf.read<unsigned char>());
1292 int size = buf.read<LittleEndian<int>>();
1293 const void* data = buf.skip(size);
1294 return Value(BSONBinData(data, size, bdt));
1295 }
1296
1297 case RegEx: {
1298 StringData regex = buf.readCStr();
1299 StringData flags = buf.readCStr();
1300 return Value(BSONRegEx(regex, flags));
1301 }
1302
1303 case Object:
1304 return Value(
1305 Document::deserializeForSorter(buf, Document::SorterDeserializeSettings()));
1306
1307 case DBRef: {
1308 OID oid = OID::from(buf.skip(OID::kOIDSize));
1309 StringData ns = buf.readCStr();
1310 return Value(BSONDBRef(ns, oid));
1311 }
1312
1313 case CodeWScope: {
1314 int size = buf.read<LittleEndian<int>>();
1315 const char* str = static_cast<const char*>(buf.skip(size));
1316 BSONObj bson = BSONObj::deserializeForSorter(buf, BSONObj::SorterDeserializeSettings());
1317 return Value(BSONCodeWScope(StringData(str, size), bson));
1318 }
1319
1320 case Array: {
1321 const int numElems = buf.read<LittleEndian<int>>();
1322 vector<Value> array;
1323 array.reserve(numElems);
1324 for (int i = 0; i < numElems; i++)
1325 array.push_back(deserializeForSorter(buf, settings));
1326 return Value(std::move(array));
1327 }
1328 }
1329 verify(false);
1330 }
1331
serializeForIDL(StringData fieldName,BSONObjBuilder * builder) const1332 void Value::serializeForIDL(StringData fieldName, BSONObjBuilder* builder) const {
1333 addToBsonObj(builder, fieldName);
1334 }
1335
serializeForIDL(BSONArrayBuilder * builder) const1336 void Value::serializeForIDL(BSONArrayBuilder* builder) const {
1337 addToBsonArray(builder);
1338 }
1339
deserializeForIDL(const BSONElement & element)1340 Value Value::deserializeForIDL(const BSONElement& element) {
1341 return Value(element);
1342 }
1343
1344 } // namespace mongo
1345