1 
2 /**
3  *    Copyright (C) 2018-present MongoDB, Inc.
4  *
5  *    This program is free software: you can redistribute it and/or modify
6  *    it under the terms of the Server Side Public License, version 1,
7  *    as published by MongoDB, Inc.
8  *
9  *    This program is distributed in the hope that it will be useful,
10  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *    Server Side Public License for more details.
13  *
14  *    You should have received a copy of the Server Side Public License
15  *    along with this program. If not, see
16  *    <http://www.mongodb.com/licensing/server-side-public-license>.
17  *
18  *    As a special exception, the copyright holders give permission to link the
19  *    code of portions of this program with the OpenSSL library under certain
20  *    conditions as described in each individual source file and distribute
21  *    linked combinations including the program with the OpenSSL library. You
22  *    must comply with the Server Side Public License in all respects for
23  *    all of the code used other than as permitted herein. If you modify file(s)
24  *    with this exception, you may extend this exception to your version of the
25  *    file(s), but you are not obligated to do so. If you do not wish to do so,
26  *    delete this exception statement from your version. If you delete this
27  *    exception statement from all source files in the program, then also delete
28  *    it in the license file.
29  */
30 
31 #include "mongo/platform/basic.h"
32 
33 #include "mongo/db/pipeline/document.h"
34 
35 #include <boost/functional/hash.hpp>
36 
37 #include "mongo/bson/bson_depth.h"
38 #include "mongo/db/jsobj.h"
39 #include "mongo/db/pipeline/field_path.h"
40 #include "mongo/util/mongoutils/str.h"
41 
42 namespace mongo {
43 using namespace mongoutils;
44 using boost::intrusive_ptr;
45 using std::string;
46 using std::vector;
47 
48 const DocumentStorage DocumentStorage::kEmptyDoc;
49 
50 const std::vector<StringData> Document::allMetadataFieldNames = {
51     Document::metaFieldTextScore, Document::metaFieldRandVal, Document::metaFieldSortKey};
52 
findField(StringData requested) const53 Position DocumentStorage::findField(StringData requested) const {
54     int reqSize = requested.size();  // get size calculation out of the way if needed
55 
56     if (_numFields >= HASH_TAB_MIN) {  // hash lookup
57         const unsigned bucket = bucketForKey(requested);
58 
59         Position pos = _hashTab[bucket];
60         while (pos.found()) {
61             const ValueElement& elem = getField(pos);
62             if (elem.nameLen == reqSize && memcmp(requested.rawData(), elem._name, reqSize) == 0) {
63                 return pos;
64             }
65 
66             // possible collision
67             pos = elem.nextCollision;
68         }
69     } else {  // linear scan
70         for (DocumentStorageIterator it = iteratorAll(); !it.atEnd(); it.advance()) {
71             if (it->nameLen == reqSize && memcmp(requested.rawData(), it->_name, reqSize) == 0) {
72                 return it.position();
73             }
74         }
75     }
76 
77     // if we got here, there's no such field
78     return Position();
79 }
80 
appendField(StringData name)81 Value& DocumentStorage::appendField(StringData name) {
82     Position pos = getNextPosition();
83     const int nameSize = name.size();
84 
85     // these are the same for everyone
86     const Position nextCollision;
87     const Value value;
88 
89     // Make room for new field (and padding at end for alignment)
90     const unsigned newUsed = ValueElement::align(_usedBytes + sizeof(ValueElement) + nameSize);
91     if (_buffer + newUsed > _bufferEnd)
92         alloc(newUsed);
93     _usedBytes = newUsed;
94 
95     // Append structure of a ValueElement
96     char* dest = _buffer + pos.index;  // must be after alloc since it changes _buffer
97 #define append(x)                  \
98     memcpy(dest, &(x), sizeof(x)); \
99     dest += sizeof(x)
100     append(value);
101     append(nextCollision);
102     append(nameSize);
103     name.copyTo(dest, true);
104 // Padding for alignment handled above
105 #undef append
106 
107     // Make sure next field starts where we expect it
108     fassert(16486, getField(pos).next()->ptr() == _buffer + _usedBytes);
109 
110     _numFields++;
111 
112     if (_numFields > HASH_TAB_MIN) {
113         addFieldToHashTable(pos);
114     } else if (_numFields == HASH_TAB_MIN) {
115         // adds all fields to hash table (including the one we just added)
116         rehash();
117     }
118 
119     return getField(pos).val;
120 }
121 
122 // Call after adding field to _fields and increasing _numFields
addFieldToHashTable(Position pos)123 void DocumentStorage::addFieldToHashTable(Position pos) {
124     ValueElement& elem = getField(pos);
125     elem.nextCollision = Position();
126 
127     const unsigned bucket = bucketForKey(elem.nameSD());
128 
129     Position* posPtr = &_hashTab[bucket];
130     while (posPtr->found()) {
131         // collision: walk links and add new to end
132         posPtr = &getField(*posPtr).nextCollision;
133     }
134     *posPtr = Position(pos.index);
135 }
136 
alloc(unsigned newSize)137 void DocumentStorage::alloc(unsigned newSize) {
138     const bool firstAlloc = !_buffer;
139     const bool doingRehash = needRehash();
140     const size_t oldCapacity = _bufferEnd - _buffer;
141 
142     // make new bucket count big enough
143     while (needRehash() || hashTabBuckets() < HASH_TAB_INIT_SIZE)
144         _hashTabMask = hashTabBuckets() * 2 - 1;
145 
146     // only allocate power-of-two sized space > 128 bytes
147     size_t capacity = 128;
148     while (capacity < newSize + hashTabBytes())
149         capacity *= 2;
150 
151     uassert(16490, "Tried to make oversized document", capacity <= size_t(BufferMaxSize));
152 
153     std::unique_ptr<char[]> oldBuf(_buffer);
154     _buffer = new char[capacity];
155     _bufferEnd = _buffer + capacity - hashTabBytes();
156 
157     if (!firstAlloc) {
158         // This just copies the elements
159         memcpy(_buffer, oldBuf.get(), _usedBytes);
160 
161         if (_numFields >= HASH_TAB_MIN) {
162             // if we were hashing, deal with the hash table
163             if (doingRehash) {
164                 rehash();
165             } else {
166                 // no rehash needed so just slide table down to new position
167                 memcpy(_hashTab, oldBuf.get() + oldCapacity, hashTabBytes());
168             }
169         }
170     }
171 }
172 
reserveFields(size_t expectedFields)173 void DocumentStorage::reserveFields(size_t expectedFields) {
174     fassert(16487, !_buffer);
175 
176     unsigned buckets = HASH_TAB_INIT_SIZE;
177     while (buckets < expectedFields)
178         buckets *= 2;
179     _hashTabMask = buckets - 1;
180 
181     // Using expectedFields+1 to allow space for long field names
182     const size_t newSize = (expectedFields + 1) * ValueElement::align(sizeof(ValueElement));
183 
184     uassert(16491, "Tried to make oversized document", newSize <= size_t(BufferMaxSize));
185 
186     _buffer = new char[newSize + hashTabBytes()];
187     _bufferEnd = _buffer + newSize;
188 }
189 
clone() const190 intrusive_ptr<DocumentStorage> DocumentStorage::clone() const {
191     intrusive_ptr<DocumentStorage> out(new DocumentStorage());
192 
193     // Make a copy of the buffer.
194     // It is very important that the positions of each field are the same after cloning.
195     const size_t bufferBytes = allocatedBytes();
196     out->_buffer = new char[bufferBytes];
197     out->_bufferEnd = out->_buffer + (_bufferEnd - _buffer);
198     if (bufferBytes > 0) {
199         memcpy(out->_buffer, _buffer, bufferBytes);
200     }
201 
202     // Copy remaining fields
203     out->_usedBytes = _usedBytes;
204     out->_numFields = _numFields;
205     out->_hashTabMask = _hashTabMask;
206     out->_metaFields = _metaFields;
207     out->_textScore = _textScore;
208     out->_randVal = _randVal;
209     out->_sortKey = _sortKey.getOwned();
210 
211     // Tell values that they have been memcpyed (updates ref counts)
212     for (DocumentStorageIterator it = out->iteratorAll(); !it.atEnd(); it.advance()) {
213         it->val.memcpyed();
214     }
215 
216     return out;
217 }
218 
~DocumentStorage()219 DocumentStorage::~DocumentStorage() {
220     std::unique_ptr<char[]> deleteBufferAtScopeEnd(_buffer);
221 
222     for (DocumentStorageIterator it = iteratorAll(); !it.atEnd(); it.advance()) {
223         it->val.~Value();  // explicit destructor call
224     }
225 }
226 
Document(const BSONObj & bson)227 Document::Document(const BSONObj& bson) {
228     MutableDocument md(bson.nFields());
229 
230     BSONObjIterator it(bson);
231     while (it.more()) {
232         BSONElement bsonElement(it.next());
233         md.addField(bsonElement.fieldNameStringData(), Value(bsonElement));
234     }
235 
236     *this = md.freeze();
237 }
238 
Document(std::initializer_list<std::pair<StringData,ImplicitValue>> initializerList)239 Document::Document(std::initializer_list<std::pair<StringData, ImplicitValue>> initializerList) {
240     MutableDocument mutableDoc(initializerList.size());
241 
242     for (auto&& pair : initializerList) {
243         mutableDoc.addField(pair.first, pair.second);
244     }
245 
246     *this = mutableDoc.freeze();
247 }
248 
operator <<(BSONObjBuilderValueStream & builder,const Document & doc)249 BSONObjBuilder& operator<<(BSONObjBuilderValueStream& builder, const Document& doc) {
250     BSONObjBuilder subobj(builder.subobjStart());
251     doc.toBson(&subobj);
252     subobj.doneFast();
253     return builder.builder();
254 }
255 
toBson(BSONObjBuilder * builder,size_t recursionLevel) const256 void Document::toBson(BSONObjBuilder* builder, size_t recursionLevel) const {
257     uassert(ErrorCodes::Overflow,
258             str::stream() << "cannot convert document to BSON because it exceeds the limit of "
259                           << BSONDepth::getMaxAllowableDepth()
260                           << " levels of nesting",
261             recursionLevel <= BSONDepth::getMaxAllowableDepth());
262 
263     for (DocumentStorageIterator it = storage().iterator(); !it.atEnd(); it.advance()) {
264         it->val.addToBsonObj(builder, it->nameSD(), recursionLevel);
265     }
266 }
267 
toBson() const268 BSONObj Document::toBson() const {
269     BSONObjBuilder bb;
270     toBson(&bb);
271     return bb.obj();
272 }
273 
274 constexpr StringData Document::metaFieldTextScore;
275 constexpr StringData Document::metaFieldRandVal;
276 constexpr StringData Document::metaFieldSortKey;
277 
toBsonWithMetaData(bool includeSortKey) const278 BSONObj Document::toBsonWithMetaData(bool includeSortKey) const {
279     BSONObjBuilder bb;
280     toBson(&bb);
281     if (hasTextScore())
282         bb.append(metaFieldTextScore, getTextScore());
283     if (hasRandMetaField())
284         bb.append(metaFieldRandVal, getRandMetaField());
285     if (includeSortKey && hasSortKeyMetaField())
286         bb.append(metaFieldSortKey, getSortKeyMetaField());
287     return bb.obj();
288 }
289 
fromBsonWithMetaData(const BSONObj & bson)290 Document Document::fromBsonWithMetaData(const BSONObj& bson) {
291     MutableDocument md;
292 
293     BSONObjIterator it(bson);
294     while (it.more()) {
295         BSONElement elem(it.next());
296         auto fieldName = elem.fieldNameStringData();
297         if (fieldName[0] == '$') {
298             if (fieldName == metaFieldTextScore) {
299                 md.setTextScore(elem.Double());
300                 continue;
301             } else if (fieldName == metaFieldRandVal) {
302                 md.setRandMetaField(elem.Double());
303                 continue;
304             } else if (fieldName == metaFieldSortKey) {
305                 md.setSortKeyMetaField(elem.Obj());
306                 continue;
307             }
308         }
309 
310         // Note: this will not parse out metadata in embedded documents.
311         md.addField(fieldName, Value(elem));
312     }
313 
314     return md.freeze();
315 }
316 
MutableDocument(size_t expectedFields)317 MutableDocument::MutableDocument(size_t expectedFields)
318     : _storageHolder(NULL), _storage(_storageHolder) {
319     if (expectedFields) {
320         storage().reserveFields(expectedFields);
321     }
322 }
323 
getNestedFieldHelper(const FieldPath & dottedField,size_t level)324 MutableValue MutableDocument::getNestedFieldHelper(const FieldPath& dottedField, size_t level) {
325     if (level == dottedField.getPathLength() - 1) {
326         return getField(dottedField.getFieldName(level));
327     } else {
328         MutableDocument nested(getField(dottedField.getFieldName(level)));
329         return nested.getNestedFieldHelper(dottedField, level + 1);
330     }
331 }
332 
getNestedField(const FieldPath & dottedField)333 MutableValue MutableDocument::getNestedField(const FieldPath& dottedField) {
334     fassert(16601, dottedField.getPathLength());
335     return getNestedFieldHelper(dottedField, 0);
336 }
337 
getNestedFieldHelper(const vector<Position> & positions,size_t level)338 MutableValue MutableDocument::getNestedFieldHelper(const vector<Position>& positions,
339                                                    size_t level) {
340     if (level == positions.size() - 1) {
341         return getField(positions[level]);
342     } else {
343         MutableDocument nested(getField(positions[level]));
344         return nested.getNestedFieldHelper(positions, level + 1);
345     }
346 }
347 
getNestedField(const vector<Position> & positions)348 MutableValue MutableDocument::getNestedField(const vector<Position>& positions) {
349     fassert(16488, !positions.empty());
350     return getNestedFieldHelper(positions, 0);
351 }
352 
getNestedFieldHelper(const Document & doc,const FieldPath & fieldNames,vector<Position> * positions,size_t level)353 static Value getNestedFieldHelper(const Document& doc,
354                                   const FieldPath& fieldNames,
355                                   vector<Position>* positions,
356                                   size_t level) {
357     const auto fieldName = fieldNames.getFieldName(level);
358     const Position pos = doc.positionOf(fieldName);
359 
360     if (!pos.found())
361         return Value();
362 
363     if (positions)
364         positions->push_back(pos);
365 
366     if (level == fieldNames.getPathLength() - 1)
367         return doc.getField(pos);
368 
369     Value val = doc.getField(pos);
370     if (val.getType() != Object)
371         return Value();
372 
373     return getNestedFieldHelper(val.getDocument(), fieldNames, positions, level + 1);
374 }
375 
getNestedField(const FieldPath & path,vector<Position> * positions) const376 const Value Document::getNestedField(const FieldPath& path, vector<Position>* positions) const {
377     fassert(16489, path.getPathLength());
378     return getNestedFieldHelper(*this, path, positions, 0);
379 }
380 
getApproximateSize() const381 size_t Document::getApproximateSize() const {
382     if (!_storage)
383         return 0;  // we've allocated no memory
384 
385     size_t size = sizeof(DocumentStorage);
386     size += storage().allocatedBytes();
387 
388     for (DocumentStorageIterator it = storage().iterator(); !it.atEnd(); it.advance()) {
389         size += it->val.getApproximateSize();
390         size -= sizeof(Value);  // already accounted for above
391     }
392 
393     return size;
394 }
395 
hash_combine(size_t & seed,const StringData::ComparatorInterface * stringComparator) const396 void Document::hash_combine(size_t& seed,
397                             const StringData::ComparatorInterface* stringComparator) const {
398     for (DocumentStorageIterator it = storage().iterator(); !it.atEnd(); it.advance()) {
399         StringData name = it->nameSD();
400         boost::hash_range(seed, name.rawData(), name.rawData() + name.size());
401         it->val.hash_combine(seed, stringComparator);
402     }
403 }
404 
compare(const Document & rL,const Document & rR,const StringData::ComparatorInterface * stringComparator)405 int Document::compare(const Document& rL,
406                       const Document& rR,
407                       const StringData::ComparatorInterface* stringComparator) {
408     DocumentStorageIterator lIt = rL.storage().iterator();
409     DocumentStorageIterator rIt = rR.storage().iterator();
410 
411     while (true) {
412         if (lIt.atEnd()) {
413             if (rIt.atEnd())
414                 return 0;  // documents are the same length
415 
416             return -1;  // left document is shorter
417         }
418 
419         if (rIt.atEnd())
420             return 1;  // right document is shorter
421 
422         const ValueElement& rField = rIt.get();
423         const ValueElement& lField = lIt.get();
424 
425         // For compatibility with BSONObj::woCompare() consider the canonical type of values
426         // before considerting their names.
427         if (lField.val.getType() != rField.val.getType()) {
428             const int rCType = canonicalizeBSONType(rField.val.getType());
429             const int lCType = canonicalizeBSONType(lField.val.getType());
430             if (lCType != rCType)
431                 return lCType < rCType ? -1 : 1;
432         }
433 
434         const int nameCmp = lField.nameSD().compare(rField.nameSD());
435         if (nameCmp)
436             return nameCmp;  // field names are unequal
437 
438         const int valueCmp = Value::compare(lField.val, rField.val, stringComparator);
439         if (valueCmp)
440             return valueCmp;  // fields are unequal
441 
442         rIt.advance();
443         lIt.advance();
444     }
445 }
446 
toString() const447 string Document::toString() const {
448     if (empty())
449         return "{}";
450 
451     StringBuilder out;
452     const char* prefix = "{";
453 
454     for (DocumentStorageIterator it = storage().iterator(); !it.atEnd(); it.advance()) {
455         out << prefix << it->nameSD() << ": " << it->val.toString();
456         prefix = ", ";
457     }
458     out << '}';
459 
460     return out.str();
461 }
462 
serializeForSorter(BufBuilder & buf) const463 void Document::serializeForSorter(BufBuilder& buf) const {
464     const int numElems = size();
465     buf.appendNum(numElems);
466 
467     for (DocumentStorageIterator it = storage().iterator(); !it.atEnd(); it.advance()) {
468         buf.appendStr(it->nameSD(), /*NUL byte*/ true);
469         it->val.serializeForSorter(buf);
470     }
471 
472     if (hasTextScore()) {
473         buf.appendNum(char(DocumentStorage::MetaType::TEXT_SCORE + 1));
474         buf.appendNum(getTextScore());
475     }
476     if (hasRandMetaField()) {
477         buf.appendNum(char(DocumentStorage::MetaType::RAND_VAL + 1));
478         buf.appendNum(getRandMetaField());
479     }
480     if (hasSortKeyMetaField()) {
481         buf.appendNum(char(DocumentStorage::MetaType::SORT_KEY + 1));
482         getSortKeyMetaField().appendSelfToBufBuilder(buf);
483     }
484     buf.appendNum(char(0));
485 }
486 
deserializeForSorter(BufReader & buf,const SorterDeserializeSettings &)487 Document Document::deserializeForSorter(BufReader& buf, const SorterDeserializeSettings&) {
488     const int numElems = buf.read<LittleEndian<int>>();
489     MutableDocument doc(numElems);
490     for (int i = 0; i < numElems; i++) {
491         StringData name = buf.readCStr();
492         doc.addField(name, Value::deserializeForSorter(buf, Value::SorterDeserializeSettings()));
493     }
494 
495     while (char marker = buf.read<char>()) {
496         if (marker == char(DocumentStorage::MetaType::TEXT_SCORE) + 1) {
497             doc.setTextScore(buf.read<LittleEndian<double>>());
498         } else if (marker == char(DocumentStorage::MetaType::RAND_VAL) + 1) {
499             doc.setRandMetaField(buf.read<LittleEndian<double>>());
500         } else if (marker == char(DocumentStorage::MetaType::SORT_KEY) + 1) {
501             doc.setSortKeyMetaField(
502                 BSONObj::deserializeForSorter(buf, BSONObj::SorterDeserializeSettings()));
503         } else {
504             uasserted(28744, "Unrecognized marker, unable to deserialize buffer");
505         }
506     }
507 
508     return doc.freeze();
509 }
510 }
511