1
2 /**
3 * Copyright (C) 2018-present MongoDB, Inc.
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the Server Side Public License, version 1,
7 * as published by MongoDB, Inc.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * Server Side Public License for more details.
13 *
14 * You should have received a copy of the Server Side Public License
15 * along with this program. If not, see
16 * <http://www.mongodb.com/licensing/server-side-public-license>.
17 *
18 * As a special exception, the copyright holders give permission to link the
19 * code of portions of this program with the OpenSSL library under certain
20 * conditions as described in each individual source file and distribute
21 * linked combinations including the program with the OpenSSL library. You
22 * must comply with the Server Side Public License in all respects for
23 * all of the code used other than as permitted herein. If you modify file(s)
24 * with this exception, you may extend this exception to your version of the
25 * file(s), but you are not obligated to do so. If you do not wish to do so,
26 * delete this exception statement from your version. If you delete this
27 * exception statement from all source files in the program, then also delete
28 * it in the license file.
29 */
30
31 #include "mongo/platform/basic.h"
32
33 #include "mongo/db/pipeline/document.h"
34
35 #include <boost/functional/hash.hpp>
36
37 #include "mongo/bson/bson_depth.h"
38 #include "mongo/db/jsobj.h"
39 #include "mongo/db/pipeline/field_path.h"
40 #include "mongo/util/mongoutils/str.h"
41
42 namespace mongo {
43 using namespace mongoutils;
44 using boost::intrusive_ptr;
45 using std::string;
46 using std::vector;
47
48 const DocumentStorage DocumentStorage::kEmptyDoc;
49
50 const std::vector<StringData> Document::allMetadataFieldNames = {
51 Document::metaFieldTextScore, Document::metaFieldRandVal, Document::metaFieldSortKey};
52
findField(StringData requested) const53 Position DocumentStorage::findField(StringData requested) const {
54 int reqSize = requested.size(); // get size calculation out of the way if needed
55
56 if (_numFields >= HASH_TAB_MIN) { // hash lookup
57 const unsigned bucket = bucketForKey(requested);
58
59 Position pos = _hashTab[bucket];
60 while (pos.found()) {
61 const ValueElement& elem = getField(pos);
62 if (elem.nameLen == reqSize && memcmp(requested.rawData(), elem._name, reqSize) == 0) {
63 return pos;
64 }
65
66 // possible collision
67 pos = elem.nextCollision;
68 }
69 } else { // linear scan
70 for (DocumentStorageIterator it = iteratorAll(); !it.atEnd(); it.advance()) {
71 if (it->nameLen == reqSize && memcmp(requested.rawData(), it->_name, reqSize) == 0) {
72 return it.position();
73 }
74 }
75 }
76
77 // if we got here, there's no such field
78 return Position();
79 }
80
appendField(StringData name)81 Value& DocumentStorage::appendField(StringData name) {
82 Position pos = getNextPosition();
83 const int nameSize = name.size();
84
85 // these are the same for everyone
86 const Position nextCollision;
87 const Value value;
88
89 // Make room for new field (and padding at end for alignment)
90 const unsigned newUsed = ValueElement::align(_usedBytes + sizeof(ValueElement) + nameSize);
91 if (_buffer + newUsed > _bufferEnd)
92 alloc(newUsed);
93 _usedBytes = newUsed;
94
95 // Append structure of a ValueElement
96 char* dest = _buffer + pos.index; // must be after alloc since it changes _buffer
97 #define append(x) \
98 memcpy(dest, &(x), sizeof(x)); \
99 dest += sizeof(x)
100 append(value);
101 append(nextCollision);
102 append(nameSize);
103 name.copyTo(dest, true);
104 // Padding for alignment handled above
105 #undef append
106
107 // Make sure next field starts where we expect it
108 fassert(16486, getField(pos).next()->ptr() == _buffer + _usedBytes);
109
110 _numFields++;
111
112 if (_numFields > HASH_TAB_MIN) {
113 addFieldToHashTable(pos);
114 } else if (_numFields == HASH_TAB_MIN) {
115 // adds all fields to hash table (including the one we just added)
116 rehash();
117 }
118
119 return getField(pos).val;
120 }
121
122 // Call after adding field to _fields and increasing _numFields
addFieldToHashTable(Position pos)123 void DocumentStorage::addFieldToHashTable(Position pos) {
124 ValueElement& elem = getField(pos);
125 elem.nextCollision = Position();
126
127 const unsigned bucket = bucketForKey(elem.nameSD());
128
129 Position* posPtr = &_hashTab[bucket];
130 while (posPtr->found()) {
131 // collision: walk links and add new to end
132 posPtr = &getField(*posPtr).nextCollision;
133 }
134 *posPtr = Position(pos.index);
135 }
136
alloc(unsigned newSize)137 void DocumentStorage::alloc(unsigned newSize) {
138 const bool firstAlloc = !_buffer;
139 const bool doingRehash = needRehash();
140 const size_t oldCapacity = _bufferEnd - _buffer;
141
142 // make new bucket count big enough
143 while (needRehash() || hashTabBuckets() < HASH_TAB_INIT_SIZE)
144 _hashTabMask = hashTabBuckets() * 2 - 1;
145
146 // only allocate power-of-two sized space > 128 bytes
147 size_t capacity = 128;
148 while (capacity < newSize + hashTabBytes())
149 capacity *= 2;
150
151 uassert(16490, "Tried to make oversized document", capacity <= size_t(BufferMaxSize));
152
153 std::unique_ptr<char[]> oldBuf(_buffer);
154 _buffer = new char[capacity];
155 _bufferEnd = _buffer + capacity - hashTabBytes();
156
157 if (!firstAlloc) {
158 // This just copies the elements
159 memcpy(_buffer, oldBuf.get(), _usedBytes);
160
161 if (_numFields >= HASH_TAB_MIN) {
162 // if we were hashing, deal with the hash table
163 if (doingRehash) {
164 rehash();
165 } else {
166 // no rehash needed so just slide table down to new position
167 memcpy(_hashTab, oldBuf.get() + oldCapacity, hashTabBytes());
168 }
169 }
170 }
171 }
172
reserveFields(size_t expectedFields)173 void DocumentStorage::reserveFields(size_t expectedFields) {
174 fassert(16487, !_buffer);
175
176 unsigned buckets = HASH_TAB_INIT_SIZE;
177 while (buckets < expectedFields)
178 buckets *= 2;
179 _hashTabMask = buckets - 1;
180
181 // Using expectedFields+1 to allow space for long field names
182 const size_t newSize = (expectedFields + 1) * ValueElement::align(sizeof(ValueElement));
183
184 uassert(16491, "Tried to make oversized document", newSize <= size_t(BufferMaxSize));
185
186 _buffer = new char[newSize + hashTabBytes()];
187 _bufferEnd = _buffer + newSize;
188 }
189
clone() const190 intrusive_ptr<DocumentStorage> DocumentStorage::clone() const {
191 intrusive_ptr<DocumentStorage> out(new DocumentStorage());
192
193 // Make a copy of the buffer.
194 // It is very important that the positions of each field are the same after cloning.
195 const size_t bufferBytes = allocatedBytes();
196 out->_buffer = new char[bufferBytes];
197 out->_bufferEnd = out->_buffer + (_bufferEnd - _buffer);
198 if (bufferBytes > 0) {
199 memcpy(out->_buffer, _buffer, bufferBytes);
200 }
201
202 // Copy remaining fields
203 out->_usedBytes = _usedBytes;
204 out->_numFields = _numFields;
205 out->_hashTabMask = _hashTabMask;
206 out->_metaFields = _metaFields;
207 out->_textScore = _textScore;
208 out->_randVal = _randVal;
209 out->_sortKey = _sortKey.getOwned();
210
211 // Tell values that they have been memcpyed (updates ref counts)
212 for (DocumentStorageIterator it = out->iteratorAll(); !it.atEnd(); it.advance()) {
213 it->val.memcpyed();
214 }
215
216 return out;
217 }
218
~DocumentStorage()219 DocumentStorage::~DocumentStorage() {
220 std::unique_ptr<char[]> deleteBufferAtScopeEnd(_buffer);
221
222 for (DocumentStorageIterator it = iteratorAll(); !it.atEnd(); it.advance()) {
223 it->val.~Value(); // explicit destructor call
224 }
225 }
226
Document(const BSONObj & bson)227 Document::Document(const BSONObj& bson) {
228 MutableDocument md(bson.nFields());
229
230 BSONObjIterator it(bson);
231 while (it.more()) {
232 BSONElement bsonElement(it.next());
233 md.addField(bsonElement.fieldNameStringData(), Value(bsonElement));
234 }
235
236 *this = md.freeze();
237 }
238
Document(std::initializer_list<std::pair<StringData,ImplicitValue>> initializerList)239 Document::Document(std::initializer_list<std::pair<StringData, ImplicitValue>> initializerList) {
240 MutableDocument mutableDoc(initializerList.size());
241
242 for (auto&& pair : initializerList) {
243 mutableDoc.addField(pair.first, pair.second);
244 }
245
246 *this = mutableDoc.freeze();
247 }
248
operator <<(BSONObjBuilderValueStream & builder,const Document & doc)249 BSONObjBuilder& operator<<(BSONObjBuilderValueStream& builder, const Document& doc) {
250 BSONObjBuilder subobj(builder.subobjStart());
251 doc.toBson(&subobj);
252 subobj.doneFast();
253 return builder.builder();
254 }
255
toBson(BSONObjBuilder * builder,size_t recursionLevel) const256 void Document::toBson(BSONObjBuilder* builder, size_t recursionLevel) const {
257 uassert(ErrorCodes::Overflow,
258 str::stream() << "cannot convert document to BSON because it exceeds the limit of "
259 << BSONDepth::getMaxAllowableDepth()
260 << " levels of nesting",
261 recursionLevel <= BSONDepth::getMaxAllowableDepth());
262
263 for (DocumentStorageIterator it = storage().iterator(); !it.atEnd(); it.advance()) {
264 it->val.addToBsonObj(builder, it->nameSD(), recursionLevel);
265 }
266 }
267
toBson() const268 BSONObj Document::toBson() const {
269 BSONObjBuilder bb;
270 toBson(&bb);
271 return bb.obj();
272 }
273
274 constexpr StringData Document::metaFieldTextScore;
275 constexpr StringData Document::metaFieldRandVal;
276 constexpr StringData Document::metaFieldSortKey;
277
toBsonWithMetaData(bool includeSortKey) const278 BSONObj Document::toBsonWithMetaData(bool includeSortKey) const {
279 BSONObjBuilder bb;
280 toBson(&bb);
281 if (hasTextScore())
282 bb.append(metaFieldTextScore, getTextScore());
283 if (hasRandMetaField())
284 bb.append(metaFieldRandVal, getRandMetaField());
285 if (includeSortKey && hasSortKeyMetaField())
286 bb.append(metaFieldSortKey, getSortKeyMetaField());
287 return bb.obj();
288 }
289
fromBsonWithMetaData(const BSONObj & bson)290 Document Document::fromBsonWithMetaData(const BSONObj& bson) {
291 MutableDocument md;
292
293 BSONObjIterator it(bson);
294 while (it.more()) {
295 BSONElement elem(it.next());
296 auto fieldName = elem.fieldNameStringData();
297 if (fieldName[0] == '$') {
298 if (fieldName == metaFieldTextScore) {
299 md.setTextScore(elem.Double());
300 continue;
301 } else if (fieldName == metaFieldRandVal) {
302 md.setRandMetaField(elem.Double());
303 continue;
304 } else if (fieldName == metaFieldSortKey) {
305 md.setSortKeyMetaField(elem.Obj());
306 continue;
307 }
308 }
309
310 // Note: this will not parse out metadata in embedded documents.
311 md.addField(fieldName, Value(elem));
312 }
313
314 return md.freeze();
315 }
316
MutableDocument(size_t expectedFields)317 MutableDocument::MutableDocument(size_t expectedFields)
318 : _storageHolder(NULL), _storage(_storageHolder) {
319 if (expectedFields) {
320 storage().reserveFields(expectedFields);
321 }
322 }
323
getNestedFieldHelper(const FieldPath & dottedField,size_t level)324 MutableValue MutableDocument::getNestedFieldHelper(const FieldPath& dottedField, size_t level) {
325 if (level == dottedField.getPathLength() - 1) {
326 return getField(dottedField.getFieldName(level));
327 } else {
328 MutableDocument nested(getField(dottedField.getFieldName(level)));
329 return nested.getNestedFieldHelper(dottedField, level + 1);
330 }
331 }
332
getNestedField(const FieldPath & dottedField)333 MutableValue MutableDocument::getNestedField(const FieldPath& dottedField) {
334 fassert(16601, dottedField.getPathLength());
335 return getNestedFieldHelper(dottedField, 0);
336 }
337
getNestedFieldHelper(const vector<Position> & positions,size_t level)338 MutableValue MutableDocument::getNestedFieldHelper(const vector<Position>& positions,
339 size_t level) {
340 if (level == positions.size() - 1) {
341 return getField(positions[level]);
342 } else {
343 MutableDocument nested(getField(positions[level]));
344 return nested.getNestedFieldHelper(positions, level + 1);
345 }
346 }
347
getNestedField(const vector<Position> & positions)348 MutableValue MutableDocument::getNestedField(const vector<Position>& positions) {
349 fassert(16488, !positions.empty());
350 return getNestedFieldHelper(positions, 0);
351 }
352
getNestedFieldHelper(const Document & doc,const FieldPath & fieldNames,vector<Position> * positions,size_t level)353 static Value getNestedFieldHelper(const Document& doc,
354 const FieldPath& fieldNames,
355 vector<Position>* positions,
356 size_t level) {
357 const auto fieldName = fieldNames.getFieldName(level);
358 const Position pos = doc.positionOf(fieldName);
359
360 if (!pos.found())
361 return Value();
362
363 if (positions)
364 positions->push_back(pos);
365
366 if (level == fieldNames.getPathLength() - 1)
367 return doc.getField(pos);
368
369 Value val = doc.getField(pos);
370 if (val.getType() != Object)
371 return Value();
372
373 return getNestedFieldHelper(val.getDocument(), fieldNames, positions, level + 1);
374 }
375
getNestedField(const FieldPath & path,vector<Position> * positions) const376 const Value Document::getNestedField(const FieldPath& path, vector<Position>* positions) const {
377 fassert(16489, path.getPathLength());
378 return getNestedFieldHelper(*this, path, positions, 0);
379 }
380
getApproximateSize() const381 size_t Document::getApproximateSize() const {
382 if (!_storage)
383 return 0; // we've allocated no memory
384
385 size_t size = sizeof(DocumentStorage);
386 size += storage().allocatedBytes();
387
388 for (DocumentStorageIterator it = storage().iterator(); !it.atEnd(); it.advance()) {
389 size += it->val.getApproximateSize();
390 size -= sizeof(Value); // already accounted for above
391 }
392
393 return size;
394 }
395
hash_combine(size_t & seed,const StringData::ComparatorInterface * stringComparator) const396 void Document::hash_combine(size_t& seed,
397 const StringData::ComparatorInterface* stringComparator) const {
398 for (DocumentStorageIterator it = storage().iterator(); !it.atEnd(); it.advance()) {
399 StringData name = it->nameSD();
400 boost::hash_range(seed, name.rawData(), name.rawData() + name.size());
401 it->val.hash_combine(seed, stringComparator);
402 }
403 }
404
compare(const Document & rL,const Document & rR,const StringData::ComparatorInterface * stringComparator)405 int Document::compare(const Document& rL,
406 const Document& rR,
407 const StringData::ComparatorInterface* stringComparator) {
408 DocumentStorageIterator lIt = rL.storage().iterator();
409 DocumentStorageIterator rIt = rR.storage().iterator();
410
411 while (true) {
412 if (lIt.atEnd()) {
413 if (rIt.atEnd())
414 return 0; // documents are the same length
415
416 return -1; // left document is shorter
417 }
418
419 if (rIt.atEnd())
420 return 1; // right document is shorter
421
422 const ValueElement& rField = rIt.get();
423 const ValueElement& lField = lIt.get();
424
425 // For compatibility with BSONObj::woCompare() consider the canonical type of values
426 // before considerting their names.
427 if (lField.val.getType() != rField.val.getType()) {
428 const int rCType = canonicalizeBSONType(rField.val.getType());
429 const int lCType = canonicalizeBSONType(lField.val.getType());
430 if (lCType != rCType)
431 return lCType < rCType ? -1 : 1;
432 }
433
434 const int nameCmp = lField.nameSD().compare(rField.nameSD());
435 if (nameCmp)
436 return nameCmp; // field names are unequal
437
438 const int valueCmp = Value::compare(lField.val, rField.val, stringComparator);
439 if (valueCmp)
440 return valueCmp; // fields are unequal
441
442 rIt.advance();
443 lIt.advance();
444 }
445 }
446
toString() const447 string Document::toString() const {
448 if (empty())
449 return "{}";
450
451 StringBuilder out;
452 const char* prefix = "{";
453
454 for (DocumentStorageIterator it = storage().iterator(); !it.atEnd(); it.advance()) {
455 out << prefix << it->nameSD() << ": " << it->val.toString();
456 prefix = ", ";
457 }
458 out << '}';
459
460 return out.str();
461 }
462
serializeForSorter(BufBuilder & buf) const463 void Document::serializeForSorter(BufBuilder& buf) const {
464 const int numElems = size();
465 buf.appendNum(numElems);
466
467 for (DocumentStorageIterator it = storage().iterator(); !it.atEnd(); it.advance()) {
468 buf.appendStr(it->nameSD(), /*NUL byte*/ true);
469 it->val.serializeForSorter(buf);
470 }
471
472 if (hasTextScore()) {
473 buf.appendNum(char(DocumentStorage::MetaType::TEXT_SCORE + 1));
474 buf.appendNum(getTextScore());
475 }
476 if (hasRandMetaField()) {
477 buf.appendNum(char(DocumentStorage::MetaType::RAND_VAL + 1));
478 buf.appendNum(getRandMetaField());
479 }
480 if (hasSortKeyMetaField()) {
481 buf.appendNum(char(DocumentStorage::MetaType::SORT_KEY + 1));
482 getSortKeyMetaField().appendSelfToBufBuilder(buf);
483 }
484 buf.appendNum(char(0));
485 }
486
deserializeForSorter(BufReader & buf,const SorterDeserializeSettings &)487 Document Document::deserializeForSorter(BufReader& buf, const SorterDeserializeSettings&) {
488 const int numElems = buf.read<LittleEndian<int>>();
489 MutableDocument doc(numElems);
490 for (int i = 0; i < numElems; i++) {
491 StringData name = buf.readCStr();
492 doc.addField(name, Value::deserializeForSorter(buf, Value::SorterDeserializeSettings()));
493 }
494
495 while (char marker = buf.read<char>()) {
496 if (marker == char(DocumentStorage::MetaType::TEXT_SCORE) + 1) {
497 doc.setTextScore(buf.read<LittleEndian<double>>());
498 } else if (marker == char(DocumentStorage::MetaType::RAND_VAL) + 1) {
499 doc.setRandMetaField(buf.read<LittleEndian<double>>());
500 } else if (marker == char(DocumentStorage::MetaType::SORT_KEY) + 1) {
501 doc.setSortKeyMetaField(
502 BSONObj::deserializeForSorter(buf, BSONObj::SorterDeserializeSettings()));
503 } else {
504 uasserted(28744, "Unrecognized marker, unable to deserialize buffer");
505 }
506 }
507
508 return doc.freeze();
509 }
510 }
511