2 /**
3  *    Copyright (C) 2018-present MongoDB, Inc.
4  *
5  *    This program is free software: you can redistribute it and/or modify
6  *    it under the terms of the Server Side Public License, version 1,
7  *    as published by MongoDB, Inc.
8  *
9  *    This program is distributed in the hope that it will be useful,
10  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *    Server Side Public License for more details.
13  *
14  *    You should have received a copy of the Server Side Public License
15  *    along with this program. If not, see
16  *    <http://www.mongodb.com/licensing/server-side-public-license>.
17  *
18  *    As a special exception, the copyright holders give permission to link the
19  *    code of portions of this program with the OpenSSL library under certain
20  *    conditions as described in each individual source file and distribute
21  *    linked combinations including the program with the OpenSSL library. You
22  *    must comply with the Server Side Public License in all respects for
23  *    all of the code used other than as permitted herein. If you modify file(s)
24  *    with this exception, you may extend this exception to your version of the
25  *    file(s), but you are not obligated to do so. If you do not wish to do so,
26  *    delete this exception statement from your version. If you delete this
27  *    exception statement from all source files in the program, then also delete
28  *    it in the license file.
29  */
31 #pragma once
33 #include "mongo/db/pipeline/document_internal.h"
35 #include <boost/functional/hash.hpp>
36 #include <boost/intrusive_ptr.hpp>
38 #include "mongo/base/string_data.h"
39 #include "mongo/base/string_data_comparator_interface.h"
40 #include "mongo/bson/util/builder.h"
42 namespace mongo {
43 class BSONObj;
44 class FieldIterator;
45 class FieldPath;
46 class Value;
47 class MutableDocument;
49 /** An internal class that represents the position of a field in a document.
50  *
51  *  This is a low-level class that you usually don't need to worry about.
52  *
53  *  The main use of this class for clients is to allow refetching or
54  *  setting a field without looking it up again. It has a default
55  *  constructor that represents a field not being in a document. It also
56  *  has a method 'bool found()' that tells you if a field was found.
57  *
58  *  For more details see document_internal.h
59  */
60 class Position;
62 /** A Document is similar to a BSONObj but with a different in-memory representation.
63  *
64  *  A Document can be treated as a const std::map<std::string, const Value> that is
65  *  very cheap to copy and is Assignable.  Therefore, it is acceptable to
66  *  pass and return by Value. Note that the data in a Document is
67  *  immutable, but you can replace a Document instance with assignment.
68  *
69  *  See Also: Value class in Value.h
70  */
71 class Document {
72 public:
73     /**
74      * Operator overloads for relops return a DeferredComparison which can subsequently be evaluated
75      * by a DocumentComparator.
76      */
77     struct DeferredComparison {
78         enum class Type {
79             kLT,
80             kLTE,
81             kEQ,
82             kGT,
83             kGTE,
84             kNE,
85         };
DeferredComparisonDeferredComparison87         DeferredComparison(Type type, const Document& lhs, const Document& rhs)
88             : type(type), lhs(lhs), rhs(rhs) {}
90         Type type;
91         const Document& lhs;
92         const Document& rhs;
93     };
95     static constexpr StringData metaFieldTextScore = "$textScore"_sd;
96     static constexpr StringData metaFieldRandVal = "$randVal"_sd;
97     static constexpr StringData metaFieldSortKey = "$sortKey"_sd;
99     static const std::vector<StringData> allMetadataFieldNames;
101     /// Empty Document (does no allocation)
Document()102     Document() {}
104     /// Create a new Document deep-converted from the given BSONObj.
105     explicit Document(const BSONObj& bson);
107     /**
108      * Create a new document from key, value pairs. Enables constructing a document using this
109      * syntax:
110      * auto document = Document{{"hello", "world"}, {"number": 1}};
111      */
112     Document(std::initializer_list<std::pair<StringData, ImplicitValue>> initializerList);
swap(Document & rhs)114     void swap(Document& rhs) {
115         _storage.swap(rhs._storage);
116     }
118     /// Look up a field by key name. Returns Value() if no such field. O(1)
119     const Value operator[](StringData key) const {
120         return getField(key);
121     }
getField(StringData key)122     const Value getField(StringData key) const {
123         return storage().getField(key);
124     }
126     /// Look up a field by Position. See positionOf and getNestedField.
127     const Value operator[](Position pos) const {
128         return getField(pos);
129     }
getField(Position pos)130     const Value getField(Position pos) const {
131         return storage().getField(pos).val;
132     }
134     /**
135      * Returns the Value stored at the location given by 'path', or Value() if no such path exists.
136      * If 'positions' is non-null, it will be filled with a path suitable to pass to
137      * MutableDocument::setNestedField().
138      */
139     const Value getNestedField(const FieldPath& path,
140                                std::vector<Position>* positions = nullptr) const;
142     /// Number of fields in this document. O(n)
size()143     size_t size() const {
144         return storage().size();
145     }
147     /// True if this document has no fields.
empty()148     bool empty() const {
149         return !_storage || storage().iterator().atEnd();
150     }
152     /// Create a new FieldIterator that can be used to examine the Document's fields in order.
153     FieldIterator fieldIterator() const;
155     /// Convenience type for dealing with fields. Used by FieldIterator.
156     typedef std::pair<StringData, Value> FieldPair;
158     /** Get the approximate storage size of the document and sub-values in bytes.
159      *  Note: Some memory may be shared with other Documents or between fields within
160      *        a single Document so this can overestimate usage.
161      */
162     size_t getApproximateSize() const;
164     /**
165      * Compare two documents. Most callers should prefer using DocumentComparator instead. See
166      * document_comparator.h for details.
167      *
168      *  BSON document field order is significant, so this just goes through
169      *  the fields in order.  The comparison is done in roughly the same way
170      *  as strings are compared, but comparing one field at a time instead
171      *  of one character at a time.
172      *
173      *  Pass a non-null StringData::ComparatorInterface if special string comparison semantics are
174      *  required. If the comparator is null, then a simple binary compare is used for strings. This
175      *  comparator is only used for string *values*; field names are always compared using simple
176      *  binary compare.
177      *
178      *  Note: This does not consider metadata when comparing documents.
179      *
180      *  @returns an integer less than zero, zero, or an integer greater than
181      *           zero, depending on whether lhs < rhs, lhs == rhs, or lhs > rhs
182      *  Warning: may return values other than -1, 0, or 1
183      */
184     static int compare(const Document& lhs,
185                        const Document& rhs,
186                        const StringData::ComparatorInterface* stringComparator);
188     std::string toString() const;
190     friend std::ostream& operator<<(std::ostream& out, const Document& doc) {
191         return out << doc.toString();
192     }
194     /** Calculate a hash value.
195      *
196      * Meant to be used to create composite hashes suitable for
197      * hashed container classes such as unordered_map.
198      */
199     void hash_combine(size_t& seed, const StringData::ComparatorInterface* stringComparator) const;
201     /**
202      * Serializes this document to the BSONObj under construction in 'builder'. Metadata is not
203      * included. Throws a AssertionException if 'recursionLevel' exceeds the maximum allowable
204      * depth.
205      */
206     void toBson(BSONObjBuilder* builder, size_t recursionLevel = 1) const;
207     BSONObj toBson() const;
209     /**
210      * Like toBson, but includes metadata at the top-level.
211      * Output is parseable by fromBsonWithMetaData
212      */
213     BSONObj toBsonWithMetaData(bool includeSortKey = true) const;
215     /**
216      * Like Document(BSONObj) but treats top-level fields with special names as metadata.
217      * Special field names are available as static constants on this class with names starting
218      * with metaField.
219      */
220     static Document fromBsonWithMetaData(const BSONObj& bson);
222     /**
223      * Given a BSON object that may have metadata fields added as part of toBsonWithMetadata(),
224      * returns the same object without any of the metadata fields.
225      */
226     static BSONObj stripMetadataFields(const BSONObj& bsonWithMetadata);
228     // Support BSONObjBuilder and BSONArrayBuilder "stream" API
229     friend BSONObjBuilder& operator<<(BSONObjBuilderValueStream& builder, const Document& d);
231     /** Return the abstract Position of a field, suitable to pass to operator[] or getField().
232      *  This can potentially save time if you need to refer to a field multiple times.
233      */
positionOf(StringData fieldName)234     Position positionOf(StringData fieldName) const {
235         return storage().findField(fieldName);
236     }
238     /** Clone a document.
239      *
240      *  This should only be called by MutableDocument and tests
241      *
242      *  The new document shares all the fields' values with the original.
243      *  This is not a deep copy.  Only the fields on the top-level document
244      *  are cloned.
245      */
clone()246     Document clone() const {
247         return Document(storage().clone().get());
248     }
hasTextScore()250     bool hasTextScore() const {
251         return storage().hasTextScore();
252     }
getTextScore()253     double getTextScore() const {
254         return storage().getTextScore();
255     }
hasRandMetaField()257     bool hasRandMetaField() const {
258         return storage().hasRandMetaField();
259     }
getRandMetaField()260     double getRandMetaField() const {
261         return storage().getRandMetaField();
262     }
hasSortKeyMetaField()264     bool hasSortKeyMetaField() const {
265         return storage().hasSortKeyMetaField();
266     }
getSortKeyMetaField()267     BSONObj getSortKeyMetaField() const {
268         return storage().getSortKeyMetaField();
269     }
271     /// members for Sorter
272     struct SorterDeserializeSettings {};  // unused
273     void serializeForSorter(BufBuilder& buf) const;
274     static Document deserializeForSorter(BufReader& buf, const SorterDeserializeSettings&);
memUsageForSorter()275     int memUsageForSorter() const {
276         return getApproximateSize();
277     }
getOwned()278     Document getOwned() const {
279         return *this;
280     }
282     /// only for testing
getPtr()283     const void* getPtr() const {
284         return _storage.get();
285     }
287 private:
288     friend class FieldIterator;
289     friend class ValueStorage;
290     friend class MutableDocument;
291     friend class MutableValue;
Document(const DocumentStorage * ptr)293     explicit Document(const DocumentStorage* ptr) : _storage(ptr){};
storage()295     const DocumentStorage& storage() const {
296         return (_storage ? *_storage : DocumentStorage::emptyDoc());
297     }
298     boost::intrusive_ptr<const DocumentStorage> _storage;
299 };
301 //
302 // Comparison API.
303 //
304 // Document instances can be compared either using Document::compare() or via operator overloads.
305 // Most callers should prefer operator overloads. Note that the operator overloads return a
306 // DeferredComparison, which must be subsequently evaluated by a DocumentComparator. See
307 // document_comparator.h for details.
308 //
310 inline Document::DeferredComparison operator==(const Document& lhs, const Document& rhs) {
311     return Document::DeferredComparison(Document::DeferredComparison::Type::kEQ, lhs, rhs);
312 }
314 inline Document::DeferredComparison operator!=(const Document& lhs, const Document& rhs) {
315     return Document::DeferredComparison(Document::DeferredComparison::Type::kNE, lhs, rhs);
316 }
318 inline Document::DeferredComparison operator<(const Document& lhs, const Document& rhs) {
319     return Document::DeferredComparison(Document::DeferredComparison::Type::kLT, lhs, rhs);
320 }
322 inline Document::DeferredComparison operator<=(const Document& lhs, const Document& rhs) {
323     return Document::DeferredComparison(Document::DeferredComparison::Type::kLTE, lhs, rhs);
324 }
326 inline Document::DeferredComparison operator>(const Document& lhs, const Document& rhs) {
327     return Document::DeferredComparison(Document::DeferredComparison::Type::kGT, lhs, rhs);
328 }
330 inline Document::DeferredComparison operator>=(const Document& lhs, const Document& rhs) {
331     return Document::DeferredComparison(Document::DeferredComparison::Type::kGTE, lhs, rhs);
332 }
334 /** This class is returned by MutableDocument to allow you to modify its values.
335  *  You are not allowed to hold variables of this type (enforced by the type system).
336  */
337 class MutableValue {
338 public:
339     void operator=(const Value& v) {
340         _val = v;
341     }
343     void operator=(Value&& v) {
344         _val = std::move(v);
345     }
347     /** These are designed to allow things like mutDoc["a"]["b"]["c"] = Value(10);
348      *  It is safe to use even on nonexistent fields.
349      */
350     MutableValue operator[](StringData key) {
351         return getField(key);
352     }
353     MutableValue operator[](Position pos) {
354         return getField(pos);
355     }
357     MutableValue getField(StringData key);
358     MutableValue getField(Position pos);
360 private:
361     friend class MutableDocument;
363     /// can only be constructed or copied by self and friends
MutableValue(const MutableValue & other)364     MutableValue(const MutableValue& other) : _val(other._val) {}
MutableValue(Value & val)365     explicit MutableValue(Value& val) : _val(val) {}
367     /// Used by MutableDocument(MutableValue)
getDocPtr()368     const RefCountable*& getDocPtr() {
369         if (_val.getType() != Object || _val._storage.genericRCPtr == NULL) {
370             // If the current value isn't an object we replace it with a Object-typed Value.
371             // Note that we can't just use Document() here because that is a NULL pointer and
372             // Value doesn't refcount NULL pointers. This led to a memory leak (SERVER-10554)
373             // because MutableDocument::newStorage() would set a non-NULL pointer into the Value
374             // without setting the refCounter bit. While allocating a DocumentStorage here could
375             // result in an allocation where none is needed, in practice this is only called
376             // when we are about to add a field to the sub-document so this just changes where
377             // the allocation is done.
378             _val = Value(Document(new DocumentStorage()));
379         }
381         return _val._storage.genericRCPtr;
382     }
384     MutableValue& operator=(const MutableValue&);  // not assignable with another MutableValue
386     Value& _val;
387 };
389 /** MutableDocument is a Document builder that supports both adding and updating fields.
390  *
391  *  This class fills a similar role to BSONObjBuilder, but allows you to
392  *  change existing fields and more easily write to sub-Documents.
393  *
394  *  To preserve the immutability of Documents, MutableDocument will
395  *  shallow-clone its storage on write (COW) if it is shared with any other
396  *  Documents.
397  */
398 class MutableDocument {
399     MONGO_DISALLOW_COPYING(MutableDocument);
401 public:
402     /** Create a new empty Document.
403      *
404      *  @param expectedFields a hint at what the number of fields will be, if known.
405      *         this can be used to increase memory allocation efficiency. There is
406      *         no impact on correctness if this field over or under estimates.
407      *
408      *  TODO: find some way to convey field-name sizes to make even more efficient
409      */
MutableDocument()410     MutableDocument() : _storageHolder(NULL), _storage(_storageHolder) {}
411     explicit MutableDocument(size_t expectedFields);
413     /// No copy of data yet. Copy-on-write. See storage()
MutableDocument(Document d)414     explicit MutableDocument(Document d) : _storageHolder(NULL), _storage(_storageHolder) {
415         reset(std::move(d));
416     }
~MutableDocument()418     ~MutableDocument() {
419         if (_storageHolder)
420             intrusive_ptr_release(_storageHolder);
421     }
423     /** Replace the current base Document with the argument
424      *
425      *  All Positions from the passed in Document are valid and refer to the
426      *  same field in this MutableDocument.
427      */
428     void reset(Document d = Document()) {
429         reset(std::move(d._storage));
430     }
432     /** Add the given field to the Document.
433      *
434      *  BSON documents' fields are ordered; the new Field will be
435      *  appended to the current list of fields.
436      *
437      *  Unlike getField/setField, addField does not look for a field with the
438      *  same name and therefore cannot be used to update fields.
439      *
440      *  It is an error to add a field that has the same name as another field.
441      *
442      *  TODO: This is currently allowed but getField only gets first field.
443      *        Decide what level of support is needed for duplicate fields.
444      *        If duplicates are not allowed, consider removing this method.
445      */
addField(StringData fieldName,const Value & val)446     void addField(StringData fieldName, const Value& val) {
447         storage().appendField(fieldName) = val;
448     }
450     /** Update field by key. If there is no field with that key, add one.
451      *
452      *  If the new value is missing(), the field is logically removed.
453      */
454     MutableValue operator[](StringData key) {
455         return getField(key);
456     }
setField(StringData key,const Value & val)457     void setField(StringData key, const Value& val) {
458         getField(key) = val;
459     }
getField(StringData key)460     MutableValue getField(StringData key) {
461         return MutableValue(storage().getField(key));
462     }
464     /// Update field by Position. Must already be a valid Position.
465     MutableValue operator[](Position pos) {
466         return getField(pos);
467     }
setField(Position pos,const Value & val)468     void setField(Position pos, const Value& val) {
469         getField(pos) = val;
470     }
getField(Position pos)471     MutableValue getField(Position pos) {
472         return MutableValue(storage().getField(pos).val);
473     }
475     /// Logically remove a field. Note that memory usage does not decrease.
remove(StringData key)476     void remove(StringData key) {
477         getField(key) = Value();
478     }
removeNestedField(const std::vector<Position> & positions)479     void removeNestedField(const std::vector<Position>& positions) {
480         getNestedField(positions) = Value();
481     }
483     /** Gets/Sets a nested field given a path.
484      *
485      *  All fields along path are created as empty Documents if they don't exist
486      *  or are any other type.
487      */
488     MutableValue getNestedField(const FieldPath& dottedField);
setNestedField(const FieldPath & dottedField,const Value & val)489     void setNestedField(const FieldPath& dottedField, const Value& val) {
490         getNestedField(dottedField) = val;
491     }
493     /// Takes positions vector from Document::getNestedField. All fields in path must exist.
494     MutableValue getNestedField(const std::vector<Position>& positions);
setNestedField(const std::vector<Position> & positions,const Value & val)495     void setNestedField(const std::vector<Position>& positions, const Value& val) {
496         getNestedField(positions) = val;
497     }
499     /**
500      * Copies all metadata from source if it has any.
501      * Note: does not clear metadata from this.
502      */
copyMetaDataFrom(const Document & source)503     void copyMetaDataFrom(const Document& source) {
504         storage().copyMetaDataFrom(source.storage());
505     }
setTextScore(double score)507     void setTextScore(double score) {
508         storage().setTextScore(score);
509     }
setRandMetaField(double val)511     void setRandMetaField(double val) {
512         storage().setRandMetaField(val);
513     }
setSortKeyMetaField(BSONObj sortKey)515     void setSortKeyMetaField(BSONObj sortKey) {
516         storage().setSortKeyMetaField(sortKey);
517     }
519     /** Convert to a read-only document and release reference.
520      *
521      *  Call this to indicate that you are done with this Document and will
522      *  not be making further changes from this MutableDocument.
523      *
524      *  TODO: there are some optimizations that may make sense at freeze time.
525      */
freeze()526     Document freeze() {
527         // This essentially moves _storage into a new Document by way of temp.
528         Document ret;
529         boost::intrusive_ptr<const DocumentStorage> temp(storagePtr(), /*inc_ref_count=*/false);
530         temp.swap(ret._storage);
531         _storage = NULL;
532         return ret;
533     }
535     /// Used to simplify the common pattern of creating a value of the document.
freezeToValue()536     Value freezeToValue() {
537         return Value(freeze());
538     }
540     /** Borrow a readable reference to this Document.
541      *
542      *  Note that unlike freeze(), this indicates intention to continue
543      *  modifying this document. The returned Document will not observe
544      *  future changes to this MutableDocument.
545      */
peek()546     Document peek() {
547         return Document(storagePtr());
548     }
getApproximateSize()550     size_t getApproximateSize() {
551         return peek().getApproximateSize();
552     }
554 private:
555     friend class MutableValue;  // for access to next constructor
MutableDocument(MutableValue mv)556     explicit MutableDocument(MutableValue mv) : _storageHolder(NULL), _storage(mv.getDocPtr()) {}
reset(boost::intrusive_ptr<const DocumentStorage> ds)558     void reset(boost::intrusive_ptr<const DocumentStorage> ds) {
559         if (_storage)
560             intrusive_ptr_release(_storage);
561         _storage = ds.detach();
562     }
564     // This is split into 3 functions to speed up the fast-path
storage()565     DocumentStorage& storage() {
566         if (MONGO_unlikely(!_storage))
567             return newStorage();
569         if (MONGO_unlikely(_storage->isShared()))
570             return clonedStorage();
572         // This function exists to ensure this is safe
573         return const_cast<DocumentStorage&>(*storagePtr());
574     }
newStorage()575     DocumentStorage& newStorage() {
576         reset(new DocumentStorage);
577         return const_cast<DocumentStorage&>(*storagePtr());
578     }
clonedStorage()579     DocumentStorage& clonedStorage() {
580         reset(storagePtr()->clone());
581         return const_cast<DocumentStorage&>(*storagePtr());
582     }
584     // recursive helpers for same-named public methods
585     MutableValue getNestedFieldHelper(const FieldPath& dottedField, size_t level);
586     MutableValue getNestedFieldHelper(const std::vector<Position>& positions, size_t level);
588     // this should only be called by storage methods and peek/freeze
storagePtr()589     const DocumentStorage* storagePtr() const {
590         dassert(!_storage || typeid(*_storage) == typeid(const DocumentStorage));
591         return static_cast<const DocumentStorage*>(_storage);
592     }
594     // These are both const to prevent modifications bypassing storage() method.
595     // They always point to NULL or an object with dynamic type DocumentStorage.
596     const RefCountable* _storageHolder;  // Only used in constructors and destructor
597     const RefCountable*& _storage;  // references either above member or genericRCPtr in a Value
598 };
600 /// This is the public iterator over a document
601 class FieldIterator {
602 public:
FieldIterator(const Document & doc)603     explicit FieldIterator(const Document& doc) : _doc(doc), _it(_doc.storage().iterator()) {}
605     /// Ask if there are more fields to return.
more()606     bool more() const {
607         return !_it.atEnd();
608     }
610     /// Get next item and advance iterator
next()611     Document::FieldPair next() {
612         verify(more());
614         Document::FieldPair fp(_it->nameSD(), _it->val);
615         _it.advance();
616         return fp;
617     }
619 private:
620     // We'll hang on to the original document to ensure we keep its storage alive
621     Document _doc;
622     DocumentStorageIterator _it;
623 };
625 /// Macro to create Document literals. Syntax is the same as the BSON("name" << 123) macro.
626 #define DOC(fields) ((DocumentStream() << fields).done())
628 /** Macro to create Array-typed Value literals.
629  *  Syntax is the same as the BSON_ARRAY(123 << "foo") macro.
630  */
631 #define DOC_ARRAY(fields) ((ValueArrayStream() << fields).done())
634 // These classes are only for the implementation of the DOC and DOC_ARRAY macros.
635 // They should not be used for any other reason.
636 class DocumentStream {
637     // The stream alternates between DocumentStream taking a fieldname
638     // and ValueStream taking a Value.
639     class ValueStream {
640     public:
ValueStream(DocumentStream & builder)641         ValueStream(DocumentStream& builder) : builder(builder) {}
643         DocumentStream& operator<<(const Value& val) {
644             builder._md[name] = val;
645             return builder;
646         }
648         /// support anything directly supported by a value constructor
649         template <typename T>
650         DocumentStream& operator<<(const T& val) {
651             return *this << Value(val);
652         }
654         StringData name;
655         DocumentStream& builder;
656     };
658 public:
DocumentStream()659     DocumentStream() : _stream(*this) {}
661     ValueStream& operator<<(StringData name) {
662         _stream.name = name;
663         return _stream;
664     }
done()666     Document done() {
667         return _md.freeze();
668     }
670 private:
671     ValueStream _stream;
672     MutableDocument _md;
673 };
675 class ValueArrayStream {
676 public:
677     ValueArrayStream& operator<<(const Value& val) {
678         _array.push_back(val);
679         return *this;
680     }
682     /// support anything directly supported by a value constructor
683     template <typename T>
684     ValueArrayStream& operator<<(const T& val) {
685         return *this << Value(val);
686     }
done()688     Value done() {
689         return Value(std::move(_array));
690     }
692 private:
693     std::vector<Value> _array;
694 };
swap(mongo::Document & lhs,mongo::Document & rhs)696 inline void swap(mongo::Document& lhs, mongo::Document& rhs) {
697     lhs.swap(rhs);
698 }
700 /* ======================= INLINED IMPLEMENTATIONS ========================== */
fieldIterator()702 inline FieldIterator Document::fieldIterator() const {
703     return FieldIterator(*this);
704 }
getField(Position pos)706 inline MutableValue MutableValue::getField(Position pos) {
707     return MutableDocument(*this).getField(pos);
708 }
getField(StringData key)709 inline MutableValue MutableValue::getField(StringData key) {
710     return MutableDocument(*this).getField(key);
711 }
712 }