1
2 /**
3 * Copyright (C) 2018-present MongoDB, Inc.
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the Server Side Public License, version 1,
7 * as published by MongoDB, Inc.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * Server Side Public License for more details.
13 *
14 * You should have received a copy of the Server Side Public License
15 * along with this program. If not, see
16 * <http://www.mongodb.com/licensing/server-side-public-license>.
17 *
18 * As a special exception, the copyright holders give permission to link the
19 * code of portions of this program with the OpenSSL library under certain
20 * conditions as described in each individual source file and distribute
21 * linked combinations including the program with the OpenSSL library. You
22 * must comply with the Server Side Public License in all respects for
23 * all of the code used other than as permitted herein. If you modify file(s)
24 * with this exception, you may extend this exception to your version of the
25 * file(s), but you are not obligated to do so. If you do not wish to do so,
26 * delete this exception statement from your version. If you delete this
27 * exception statement from all source files in the program, then also delete
28 * it in the license file.
29 */
30
31 #include "mongo/platform/basic.h"
32
33 #include "mongo/bson/mutable/document.h"
34
35 #include <cstdlib>
36 #include <cstring>
37 #include <limits>
38 #include <type_traits>
39 #include <vector>
40
41 #include "mongo/base/static_assert.h"
42 #include "mongo/bson/inline_decls.h"
43 #include "mongo/bson/mutable/damage_vector.h"
44 #include "mongo/util/debug_util.h"
45
46 namespace mongo {
47 namespace mutablebson {
48
49 /** Mutable BSON Implementation Overview
50 *
51 * If you haven't read it already, please read the 'Mutable BSON Overview' comment in
52 * document.h before reading further.
53 *
54 * In the following discussion, the capitalized terms 'Element' and 'Document' refer to
55 * the classes of the same name. At times, it is also necessary to refer to abstract
56 * 'elements' or 'documents', in the sense of bsonspec.org. These latter uses are
57 * non-capitalized. In the BSON specification, there are two 'classes' of
58 * elements. 'Primitive' or 'leaf' elements are those elements which do not contain other
59 * elements. In practice, all BSON types except 'Array' and 'Object' are primitives. The
60 * CodeWScope type is an exception, but one that we sidestep by considering its BSONObj
61 * payload to be opaque.
62 *
63 * A mutable BSON Document and its component Elements are implemented in terms of four
64 * data structures. These structures are owned by a Document::Impl object. Each Document
65 * owns a unique Document::Impl, which owns the relevant data structures and provides
66 * accessors, mutators, and helper methods related to those data structures. Understanding
67 * these data structures is critical for understanding how the system as a whole operates.
68 *
69 * - The 'Elements Vector': This is a std::vector<ElementRep>, where 'ElementRep' is a
70 * structure type defined below that contains the detailed information about an entity
71 * in the Document (e.g. an Object, or an Array, or a NumberLong, etc.). The 'Element'
72 * and 'ConstElement' objects contain a pointer to a Document (which allows us to reach
73 * the Document::Impl for the Document), and an index into the Elements Vector in the
74 * Document::Impl. These two pieces of information make it possible for us to obtain the
75 * ElementRep associated with a given Element. Note that the Elements Vector is append
76 * only: ElementReps are never removed from it, even if the cooresponding Element is
77 * removed from the Document. By never removing ElementReps, and by using indexes into
78 * the Elements Vector, we can ensure that Elements are never invalidated. Note that
79 * every Document comes with an automatically provided 'root' element of mongo::Object
80 * type. The ElementRep for the root is always in the first slot (index zero) of the
81 * Elements Vector.
82 *
83 * - The 'Leaf Builder': This is a standard BSONObjBuilder. When a request is made to the
84 * Document to add new data to the Document via one of the Document::makeElement[TYPE]
85 * calls, the element is constructed by invoking the appropriate method on the Leaf
86 * Builder, forwarding the arguments provided to the call on Document. This results in a
87 * contiguous region of memory which encodes this element, capturing its field name, its
88 * type, and the bytes that encode its value, in the same way it normally does when
89 * using BSONObjBuilder. We then build an ElementRep that indexes into the BufBuilder
90 * behind the BSONObjBuilder (more on how this happens below, in the section on the
91 * 'Objects Vector'), then insert that new ElementRep into the ElementsVector, and
92 * finally return an Element that dereferences to the new ElementRep. Subsequently,
93 * requests for the type, fieldname or value bytes via the Element are satisfied by
94 * obtaining the contiguous memory region for the element, which may be used to
95 * construct a BSONElement over that memory region.
96 *
97 * - The 'Objects Vector': This is a std::vector<BSONObj>. Any BSONObj object that
98 * provides values for parts of the Document is stored in the Objects Vector. For
99 * instance, in 'Example 2' from document.h, the Document we construct wraps an existing
100 * BSONObj, which is passed in to the Document constructor. That BSONObj would be stored
101 * in the Objects Vector. The data content of the BSONObj is not copied, but the BSONObj
102 * is copied, so the if the BSONObj is counted, we will up its refcount. In any event
103 * the lifetime of the BSONObj must exceed our lifetime by some mechanism. ElementReps
104 * that represent the component elements of the BSONObj store the index of their
105 * supporting BSONObj into the 'objIdx' field of ElementRep. Later, when Elements
106 * referring to those ElementReps are asked for properties like the field name or type
107 * of the Element, the underlying memory region in the appropriate BSONObj may be
108 * examined to provide the relevant data.
109 *
110 * - The 'Field Name Heap': For some elements, particularly those in the Leaf Builder or
111 * those embedded in a BSONObj in the Objects Vector, we can easily obtain the field
112 * name by reading it from the encoded BSON. However, some elements are not so
113 * fortunate. Newly created elements of mongo::Array or mongo::Object type, for
114 * instance, don't have a memory region that provides values. In such cases, the field
115 * name is stored in the field name heap, which is simply std::vector<char>, where the
116 * field names are null-byte-delimited. ElementsReps for such elements store an offset
117 * into the Field Name Heap, and when asked for their field name simply return a pointer
118 * to the string data the offset identifies. This exploits the fact that in BSON, valid
119 * field names are null terinated and do not contain embedded null bytes.
120 *
121 * - The 'root' Element. Each Document contains a well known Element, which always refers
122 * to a pre-constructed ElementRep at offset zero in the Elements Vector. This is an
123 * Object element, and it is considered as the root of the document tree. It is possible
124 * for ElementReps to exist in the Document data structures, but not be in a child
125 * relationship to the root Element. Newly created Elements, for instance, are in this
126 * sort of 'detached' state until they are attched to another element. Only Element's
127 * that are children of the root element are traversed when calling top level
128 * serialization or comparision operations on Document.
129 *
130 * When you construct a Document that obtains its values from an underlying BSONObj, the
131 * entire BSONObj is not 'unpacked' into ElementReps at Document construction
132 * time. Instead, as you ask for Elements with the Element navigation API, the Elements
133 * for children and siblings are created on demand. Subobjects which are never visited
134 * will never have ElementReps constructed for them. Similarly, when writing a Document
135 * back out to a builder, regions of memory that provide values for the Document and which
136 * have not been modified will be block copied, instead of being recursively explored and
137 * written.
138 *
139 * To see how these data structures interoperate, we will walk through an example. You may
140 * want to read the comments for ElementRep before tackling the example, since we will
141 * refer to the internal state of ElementRep here. The example code used here exists as a
142 * unit test in mutable_bson_test.cpp as (Documentation, Example3).
143 *
144 *
145 * Legend:
146 * oi : objIdx
147 * +/- : bitfield state (s: serialized, a: array)
148 * x : invalid/empty rep idx
149 * ? : opaque rep idx
150 * ls/rs: left/right sibling
151 * lc/rc: left/right child
152 * p : parent
153
154 static const char inJson[] =
155 "{"
156 " 'xs': { 'x' : 'x', 'X' : 'X' },"
157 " 'ys': { 'y' : 'y' }"
158 "}";
159 mongo::BSONObj inObj = mongo::fromjson(inJson);
160 mmb::Document doc(inObj);
161
162 * _elements
163 * oi flags offset ls rs lc rc p
164 * +-----------------------------------------------------------------------------+
165 * 0 | 1 | s:- | ... | off 0 into _fieldNames | x | x | ? | ? | x |
166 * +-----------------------------------------------------------------------------+
167 *
168 * _objects
169 * +-----------------------------------------------------------------------------+
170 * | BSONObj for _leafBuilder | BSONObj for inObj | |
171 * +-----------------------------------------------------------------------------+
172 *
173 * _fieldNames
174 * +-----------------------------------------------------------------------------+
175 * | \0 |
176 * +-----------------------------------------------------------------------------+
177 *
178 * _leafBuf
179 * +-----------------------------------------------------------------------------+
180 * | {} |
181 * +-----------------------------------------------------------------------------+
182
183
184 mmb::Element root = doc.root();
185 mmb::Element xs = root.leftChild();
186
187 * _elements
188 * oi flags offset ls rs lc rc p
189 * +-----------------------------------------------------------------------------+
190 * 0 | 1 | s:- | ... | off 0 into _fieldNames | x | x | 1 | ? | x | *
191 * 1 | 1 | s:+ | ... | off of 'xs' into _objects[1] | x | ? | ? | ? | 0 | *
192 * +-----------------------------------------------------------------------------+
193 *
194 * _objects
195 * +-----------------------------------------------------------------------------+
196 * | BSONObj for _leafBuilder | BSONObj for inObj | |
197 * +-----------------------------------------------------------------------------+
198 *
199 * _fieldNames
200 * +-----------------------------------------------------------------------------+
201 * | \0 |
202 * +-----------------------------------------------------------------------------+
203 *
204 * _leafBuf
205 * +-----------------------------------------------------------------------------+
206 * | {} |
207 * +-----------------------------------------------------------------------------+
208
209
210 mmb::Element ys = xs.rightSibling();
211
212 * _elements
213 * oi flags offset ls rs lc rc p
214 * +-----------------------------------------------------------------------------+
215 * 0 | 1 | s:- | ... | off 0 into _fieldNames | x | x | 1 | ? | x |
216 * 1 | 1 | s:+ | ... | off of 'xs' into _objects[1] | x | 2 | ? | ? | 0 | *
217 * 2 | 1 | s:+ | ... | off of 'ys' into _objects[1] | 1 | ? | ? | ? | 0 | *
218 * +-----------------------------------------------------------------------------+
219 *
220 * _objects
221 * +-----------------------------------------------------------------------------+
222 * | BSONObj for _leafBuilder | BSONObj for inObj | |
223 * +-----------------------------------------------------------------------------+
224 *
225 * _fieldNames
226 * +-----------------------------------------------------------------------------+
227 * | \0 |
228 * +-----------------------------------------------------------------------------+
229 *
230 * _leafBuf
231 * +-----------------------------------------------------------------------------+
232 * | {} |
233 * +-----------------------------------------------------------------------------+
234
235
236 mmb::Element dne = ys.rightSibling();
237
238 * _elements
239 * oi flags offset ls rs lc rc p
240 * +-----------------------------------------------------------------------------+
241 * 0 | 1 | s:- | ... | off 0 into _fieldNames | x | x | 1 | 2 | x | *
242 * 1 | 1 | s:+ | ... | off of 'xs' into _objects[1] | x | 2 | ? | ? | 0 |
243 * 2 | 1 | s:+ | ... | off of 'ys' into _objects[1] | 1 | x | ? | ? | 0 | *
244 * +-----------------------------------------------------------------------------+
245 *
246 * _objects
247 * +-----------------------------------------------------------------------------+
248 * | BSONObj for _leafBuilder | BSONObj for inObj | |
249 * +-----------------------------------------------------------------------------+
250 *
251 * _fieldNames
252 * +-----------------------------------------------------------------------------+
253 * | \0 |
254 * +-----------------------------------------------------------------------------+
255 *
256 * _leafBuf
257 * +-----------------------------------------------------------------------------+
258 * | {} |
259 * +-----------------------------------------------------------------------------+
260
261
262 mmb::Element ycaps = doc.makeElementString("Y", "Y");
263
264 * _elements
265 * oi flags offset ls rs lc rc p
266 * +-----------------------------------------------------------------------------+
267 * 0 | 1 | s:- | ... | off 0 into _fieldNames | x | x | 1 | 2 | x |
268 * 1 | 1 | s:+ | ... | off of 'xs' into _objects[1] | x | 2 | ? | ? | 0 |
269 * 2 | 1 | s:+ | ... | off of 'ys' into _objects[1] | 1 | x | ? | ? | 0 |
270 * 3 | 0 | s:+ | ... | off of 'Y' into _objects[0] | x | x | x | x | x | *
271 * +-----------------------------------------------------------------------------+
272 *
273 * _objects
274 * +-----------------------------------------------------------------------------+
275 * | BSONObj for _leafBuilder | BSONObj for inObj | |
276 * +-----------------------------------------------------------------------------+
277 *
278 * _fieldNames
279 * +-----------------------------------------------------------------------------+
280 * | \0 |
281 * +-----------------------------------------------------------------------------+
282 *
283 * _leafBuf
284 * +-----------------------------------------------------------------------------+
285 * | { "Y" : "Y" } | *
286 * +-----------------------------------------------------------------------------+
287
288
289 ys.pushBack(ycaps);
290
291 * _elements
292 * oi flags offset ls rs lc rc p
293 * +-----------------------------------------------------------------------------+
294 * 0 | 1 | s:- | ... | off 0 into _fieldNames | x | x | 1 | 2 | x |
295 * 1 | 1 | s:+ | ... | off of 'xs' into _objects[1] | x | 2 | ? | ? | 0 |
296 * 2 | 1 | s:- | ... | off of 'ys' into _objects[1] | 1 | x | 4 | 3 | 0 | *
297 * 3 | 0 | s:+ | ... | off of 'Y' into _objects[0] | 4 | x | x | x | 2 | *
298 * 4 | 1 | s:+ | ... | off of 'ys.y' into _objects[1] | x | 3 | x | x | 2 | *
299 * +-----------------------------------------------------------------------------+
300 *
301 * _objects
302 * +-----------------------------------------------------------------------------+
303 * | BSONObj for _leafBuilder | BSONObj for inObj | |
304 * +-----------------------------------------------------------------------------+
305 *
306 * _fieldNames
307 * +-----------------------------------------------------------------------------+
308 * | \0 |
309 * +-----------------------------------------------------------------------------+
310 *
311 * _leafBuf
312 * +-----------------------------------------------------------------------------+
313 * | { "Y" : "Y" } |
314 * +-----------------------------------------------------------------------------+
315
316
317 mmb::Element pun = doc.makeElementArray("why");
318
319 * _elements
320 * oi flags offset ls rs lc rc p
321 * +-----------------------------------------------------------------------------+
322 * 0 | 1 | s:- | ... | off 0 into _fieldNames | x | x | 1 | 2 | x |
323 * 1 | 1 | s:+ | ... | off of 'xs' into _objects[1] | x | 2 | ? | ? | 0 |
324 * 2 | 1 | s:- | ... | off of 'ys' into _objects[1] | 1 | x | 4 | 3 | 0 |
325 * 3 | 0 | s:+ | ... | off of 'Y' into _objects[0] | 4 | x | x | x | 2 |
326 * 4 | 1 | s:+ | ... | off of 'ys.y' into _objects[1] | x | 3 | x | x | 2 |
327 * 5 | -1 | s:- | a:+ | ... | off of 'why' into _fieldNames | x | x | x | x | x | *
328 * +-----------------------------------------------------------------------------+
329 *
330 * _objects
331 * +-----------------------------------------------------------------------------+
332 * | BSONObj for _leafBuilder | BSONObj for inObj | |
333 * +-----------------------------------------------------------------------------+
334 *
335 * _fieldNames
336 * +-----------------------------------------------------------------------------+
337 * | \0why\0 | *
338 * +-----------------------------------------------------------------------------+
339 *
340 * _leafBuf
341 * +-----------------------------------------------------------------------------+
342 * | { "Y" : "Y" } |
343 * +-----------------------------------------------------------------------------+
344
345
346 ys.pushBack(pun);
347
348 * _elements
349 * oi flags offset ls rs lc rc p
350 * +-----------------------------------------------------------------------------+
351 * 0 | 1 | s:- | ... | off 0 into _fieldNames | x | x | 1 | 2 | x |
352 * 1 | 1 | s:+ | ... | off of 'xs' into _objects[1] | x | 2 | ? | ? | 0 |
353 * 2 | 1 | s:- | ... | off of 'ys' into _objects[1] | 1 | x | 4 | 5 | 0 | *
354 * 3 | 0 | s:+ | ... | off of 'Y' into _objects[0] | 4 | 5 | x | x | 2 | *
355 * 4 | 1 | s:+ | ... | off of 'ys.y' into _objects[1] | x | 3 | x | x | 2 |
356 * 5 | -1 | s:- | a:+ | ... | off of 'why' into _fieldNames | 3 | x | x | x | 2 | *
357 * +-----------------------------------------------------------------------------+
358 *
359 * _objects
360 * +-----------------------------------------------------------------------------+
361 * | BSONObj for _leafBuilder | BSONObj for inObj | |
362 * +-----------------------------------------------------------------------------+
363 *
364 * _fieldNames
365 * +-----------------------------------------------------------------------------+
366 * | \0why\0 |
367 * +-----------------------------------------------------------------------------+
368 *
369 * _leafBuf
370 * +-----------------------------------------------------------------------------+
371 * | { "Y" : "Y" } |
372 * +-----------------------------------------------------------------------------+
373
374
375 pun.appendString("na", "not");
376
377 * _elements
378 * oi flags offset ls rs lc rc p
379 * +-----------------------------------------------------------------------------+
380 * 0 | 1 | s:- | ... | off 0 into _fieldNames | x | x | 1 | 2 | x |
381 * 1 | 1 | s:+ | ... | off of 'xs' into _objects[1] | x | 2 | ? | ? | 0 |
382 * 2 | 1 | s:- | ... | off of 'ys' into _objects[1] | 1 | x | 4 | 5 | 0 |
383 * 3 | 0 | s:+ | ... | off of 'Y' into _objects[0] | 4 | 5 | x | x | 2 |
384 * 4 | 1 | s:+ | ... | off of 'ys.y' into _objects[1] | x | 3 | x | x | 2 |
385 * 5 | -1 | s:- | a:+ | ... | off of 'why' into _fieldNames | 3 | x | 6 | 6 | 2 | *
386 * 6 | 0 | s:+ | ... | off of 'na' into _objects[0] | x | x | x | x | 5 | *
387 * +-----------------------------------------------------------------------------+
388 *
389 * _objects
390 * +-----------------------------------------------------------------------------+
391 * | BSONObj for _leafBuilder | BSONObj for inObj | |
392 * +-----------------------------------------------------------------------------+
393 *
394 * _fieldNames
395 * +-----------------------------------------------------------------------------+
396 * | \0why\0 |
397 * +-----------------------------------------------------------------------------+
398 *
399 * _leafBuf
400 * +-----------------------------------------------------------------------------+
401 * | { "Y" : "Y", "na" : "not" } | *
402 * +-----------------------------------------------------------------------------+
403 *
404 */
405
406 // Work around http://gcc.gnu.org/bugzilla/show_bug.cgi?id=29365. Note that the selection of
407 // minor version 4 is somewhat arbitrary. It does appear that the fix for this was backported
408 // to earlier versions. This is a conservative choice that we can revisit later. We need the
409 // __clang__ here because Clang claims to be gcc of some version.
410 #if defined(__clang__) || !defined(__GNUC__) || (__GNUC__ > 4) || \
411 (__GNUC__ == 4 && __GNUC_MINOR__ >= 4)
412 namespace {
413 #endif
414
415 // The designated field name for the root element.
416 constexpr auto kRootFieldName = ""_sd;
417
418 // How many reps do we cache before we spill to heap. Use a power of two. For debug
419 // builds we make this very small so it is less likely to mask vector invalidation
420 // logic errors. We don't make it zero so that we do execute the fastRep code paths.
421 const size_t kFastReps = kDebugBuild ? 2 : 128;
422
423 // An ElementRep contains the information necessary to locate the data for an Element,
424 // and the topology information for how the Element is related to other Elements in the
425 // document.
426 #pragma pack(push, 1)
427 struct ElementRep {
428 // Builds an ElementRep in its correct default state. This is used instead of a default
429 // constructor or NSDMIs to ensure that this type stays trivial so that vectors of it are cheap
430 // to grow.
431 static ElementRep makeDefaultRep();
432
433 // The index of the BSONObj that provides the value for this Element. For nodes
434 // where serialized is 'false', this value may be kInvalidObjIdx to indicate that
435 // the Element does not have a supporting BSONObj.
436 typedef uint16_t ObjIdx;
437 ObjIdx objIdx;
438
439 // This bit is true if this ElementRep identifies a completely serialized
440 // BSONElement (i.e. a region of memory with a bson type byte, a fieldname, and an
441 // encoded value). Changes to children of a serialized element will cause it to be
442 // marked as unserialized.
443 uint16_t serialized : 1;
444
445 // For object like Elements where we cannot determine the type of the object by
446 // looking a region of memory, the 'array' bit allows us to determine whether we
447 // are an object or an array.
448 uint16_t array : 1;
449
450 // Reserved for future use.
451 uint16_t reserved : 14;
452
453 // This word either gives the offset into the BSONObj associated with this
454 // ElementRep where this serialized BSON element may be located, or the offset into
455 // the _fieldNames member of the Document where the field name for this BSON
456 // element may be located.
457 uint32_t offset;
458
459 // The indexes of our left and right siblings in the Document.
460 struct {
461 Element::RepIdx left;
462 Element::RepIdx right;
463 } sibling;
464
465 // The indexes of our left and right chidren in the Document.
466 struct {
467 Element::RepIdx left;
468 Element::RepIdx right;
469 } child;
470
471 // The index of our parent in the Document.
472 Element::RepIdx parent;
473
474 // The size of the field name and the total element size are cached to allow quickly
475 // constructing a BSONElement object. These fields are private (even though the rest of this
476 // struct is public) because of the somewhat complex requirements to update and use them
477 // correctly.
setFieldNameSizeAndTotalSizemongo::mutablebson::__anon0bdfad420111::ElementRep478 void setFieldNameSizeAndTotalSize(int fieldNameSize, int totalSize) {
479 _fieldNameSize = fieldNameSize <= std::numeric_limits<int16_t>::max() ? fieldNameSize : -1;
480 _totalSize = totalSize <= std::numeric_limits<int16_t>::max() ? totalSize : -1;
481 }
482
toSerializedElementmongo::mutablebson::__anon0bdfad420111::ElementRep483 BSONElement toSerializedElement(const BSONObj& holder) const {
484 return BSONElement(holder.objdata() + offset, //
485 _fieldNameSize,
486 _totalSize,
487 BSONElement::CachedSizeTag());
488 }
489
490 private:
491 // The cached sizes for this element, or -1 if unknown or too big to fit.
492 // TODO consider putting _fieldNameSize in the reserved bit field above to allow larger total
493 // sizes to be cached. Alternatively, could use an 8/24 split uint32_t. Since BSONObj is limited
494 // to just over 16MB, that will cover all practical element sizes. For now, this is fine since
495 // computing the size is a trivial cost when working with elements larger 32KB.
496 int16_t _fieldNameSize;
497 int16_t _totalSize;
498 };
499 #pragma pack(pop)
500
501 MONGO_STATIC_ASSERT(sizeof(ElementRep) == 32);
502
503 // We want ElementRep to be a POD so Document::Impl can grow the std::vector with
504 // memmove.
505 //
506 MONGO_STATIC_ASSERT(std::is_trivial<ElementRep>::value);
507
508 // The ElementRep for the root element is always zero.
509 const Element::RepIdx kRootRepIdx = Element::RepIdx(0);
510
511 // This is the object index for elements in the leaf heap.
512 const ElementRep::ObjIdx kLeafObjIdx = ElementRep::ObjIdx(0);
513
514 // This is the sentinel value to indicate that we have no supporting BSONObj.
515 const ElementRep::ObjIdx kInvalidObjIdx = ElementRep::ObjIdx(-1);
516
517 // This is the highest valid object index that does not overlap sentinel values.
518 const ElementRep::ObjIdx kMaxObjIdx = ElementRep::ObjIdx(-2);
519
makeDefaultRep()520 ElementRep ElementRep::makeDefaultRep() {
521 ElementRep out;
522 out.objIdx = kInvalidObjIdx;
523 out.serialized = false;
524 out.array = false;
525 out.reserved = 0;
526 out.offset = 0;
527 out.sibling = {Element::kInvalidRepIdx, Element::kInvalidRepIdx};
528 out.child = {Element::kInvalidRepIdx, Element::kInvalidRepIdx};
529 out.parent = Element::kInvalidRepIdx;
530 out._fieldNameSize = -1;
531 out._totalSize = -1;
532 return out;
533 }
534
535 // Returns the offset of 'elt' within 'object' as a uint32_t. The element must be part
536 // of the object or the behavior is undefined.
getElementOffset(const BSONObj & object,const BSONElement & elt)537 uint32_t getElementOffset(const BSONObj& object, const BSONElement& elt) {
538 dassert(!elt.eoo());
539 const char* const objRaw = object.objdata();
540 const char* const eltRaw = elt.rawdata();
541 dassert(objRaw < eltRaw);
542 dassert(eltRaw < objRaw + object.objsize());
543 dassert(eltRaw + elt.size() <= objRaw + object.objsize());
544 const ptrdiff_t offset = eltRaw - objRaw;
545 // BSON documents express their size as an int32_t so we should always be able to
546 // express the offset as a uint32_t.
547 invariant(offset > 0);
548 invariant(offset <= std::numeric_limits<int32_t>::max());
549 return offset;
550 }
551
552 // Returns true if this ElementRep is 'detached' from all other elements and can be
553 // added as a child, which helps ensure that we maintain a tree rather than a graph
554 // when adding new elements to the tree. The root element is never considered to be
555 // attachable.
canAttach(const Element::RepIdx id,const ElementRep & rep)556 bool canAttach(const Element::RepIdx id, const ElementRep& rep) {
557 return (id != kRootRepIdx) && (rep.sibling.left == Element::kInvalidRepIdx) &&
558 (rep.sibling.right == Element::kInvalidRepIdx) && (rep.parent == Element::kInvalidRepIdx);
559 }
560
561 // Returns a Status describing why 'canAttach' returned false. This function should not
562 // be inlined since it just makes the callers larger for no real gain.
563 NOINLINE_DECL Status getAttachmentError(const ElementRep& rep);
getAttachmentError(const ElementRep & rep)564 Status getAttachmentError(const ElementRep& rep) {
565 if (rep.sibling.left != Element::kInvalidRepIdx)
566 return Status(ErrorCodes::IllegalOperation, "dangling left sibling");
567 if (rep.sibling.right != Element::kInvalidRepIdx)
568 return Status(ErrorCodes::IllegalOperation, "dangling right sibling");
569 if (rep.parent != Element::kInvalidRepIdx)
570 return Status(ErrorCodes::IllegalOperation, "dangling parent");
571 return Status(ErrorCodes::IllegalOperation, "cannot add the root as a child");
572 }
573
574
575 // Enable paranoid mode to force a reallocation on mutation of the princple data
576 // structures in Document::Impl. This is really slow, but can be very helpful if you
577 // suspect an invalidation logic error and want to find it with valgrind. Paranoid mode
578 // only works in debug mode; it is ignored in release builds.
579 const bool paranoid = false;
580
581 #if defined(__clang__) || !defined(__GNUC__) || (__GNUC__ > 4) || \
582 (__GNUC__ == 4 && __GNUC_MINOR__ >= 4)
583 } // namespace
584 #endif
585
586 /** Document::Impl holds the Document state. Please see the file comment above for details
587 * on the fields of Impl and how they are used to realize the implementation of mutable
588 * BSON. Impl provides various utility methods to insert, lookup, and interrogate the
589 * Elements, BSONObj objects, field names, and builders associated with the Document.
590 *
591 * TODO: At some point, we could remove the firewall and inline the members of Impl into
592 * Document.
593 */
594 class Document::Impl {
595 MONGO_DISALLOW_COPYING(Impl);
596
597 public:
Impl(Document::InPlaceMode inPlaceMode)598 Impl(Document::InPlaceMode inPlaceMode)
599 : _numElements(0),
600 _slowElements(),
601 _objects(),
602 _fieldNames(),
603 _leafBuf(),
604 _leafBuilder(_leafBuf),
605 _fieldNameScratch(),
606 _damages(),
607 _inPlaceMode(inPlaceMode) {
608 // We always have a BSONObj for the leaves, and we often have
609 // one for our base document, so reserve 2.
610 _objects.reserve(2);
611
612 // We always have at least one byte for the root field name, and we would like
613 // to be able to hold a few short field names without reallocation.
614 _fieldNames.reserve(8);
615
616 // We need an object at _objects[0] so that we can access leaf elements we
617 // construct with the leaf builder in the same way we access elements serialized in
618 // other BSONObjs. So we call asTempObj on the builder and store the result in slot
619 // 0.
620 dassert(_objects.size() == kLeafObjIdx);
621 _objects.push_back(_leafBuilder.asTempObj());
622 dassert(_leafBuf.len() != 0);
623 }
624
~Impl()625 ~Impl() {
626 _leafBuilder.abandon();
627 }
628
reset(Document::InPlaceMode inPlaceMode)629 void reset(Document::InPlaceMode inPlaceMode) {
630 // Clear out the state in the vectors.
631 _slowElements.clear();
632 _numElements = 0;
633
634 _objects.clear();
635 _fieldNames.clear();
636
637 // There is no way to reset the state of a BSONObjBuilder, so we need to call its
638 // dtor, reset the underlying buf, and re-invoke the constructor in-place.
639 _leafBuilder.abandon();
640 _leafBuilder.~BSONObjBuilder();
641 _leafBuf.reset();
642 new (&_leafBuilder) BSONObjBuilder(_leafBuf);
643
644 _fieldNameScratch.clear();
645 _damages.clear();
646 _inPlaceMode = inPlaceMode;
647
648 // Ensure that we start in the same state as the ctor would leave us in.
649 _objects.push_back(_leafBuilder.asTempObj());
650 }
651
652 // Obtain the ElementRep for the given rep id.
getElementRep(Element::RepIdx id)653 ElementRep& getElementRep(Element::RepIdx id) {
654 return const_cast<ElementRep&>(const_cast<const Impl*>(this)->getElementRep(id));
655 }
656
657 // Obtain the ElementRep for the given rep id.
getElementRep(Element::RepIdx id) const658 const ElementRep& getElementRep(Element::RepIdx id) const {
659 dassert(id < _numElements);
660 if (id < kFastReps)
661 return _fastElements[id];
662 else
663 return _slowElements[id - kFastReps];
664 }
665
666 // Construct and return a new default initialized ElementRep. The RepIdx identifying
667 // the new rep is returned in the out parameter.
makeNewRep(Element::RepIdx * newIdx)668 ElementRep& makeNewRep(Element::RepIdx* newIdx) {
669 const Element::RepIdx id = *newIdx = _numElements++;
670
671 if (id < kFastReps) {
672 return _fastElements[id] = ElementRep::makeDefaultRep();
673 } else {
674 invariant(id <= Element::kMaxRepIdx);
675
676 if (kDebugBuild && paranoid) {
677 // Force all reps to new addresses to help catch invalid rep usage.
678 std::vector<ElementRep> newSlowElements(_slowElements);
679 _slowElements.swap(newSlowElements);
680 }
681
682 return *_slowElements.insert(_slowElements.end(), ElementRep::makeDefaultRep());
683 }
684 }
685
686 // Insert a new ElementRep for a leaf element at the given offset and return its ID.
insertLeafElement(int offset,int fieldNameSize=-1,int totalSize=-1)687 Element::RepIdx insertLeafElement(int offset, int fieldNameSize = -1, int totalSize = -1) {
688 // BufBuilder hands back sizes in 'int's.
689 Element::RepIdx inserted;
690 ElementRep& rep = makeNewRep(&inserted);
691
692 rep.setFieldNameSizeAndTotalSize(fieldNameSize, totalSize);
693 rep.objIdx = kLeafObjIdx;
694 rep.serialized = true;
695 dassert(offset >= 0);
696 // TODO: Is this a legitimate possibility?
697 dassert(static_cast<unsigned int>(offset) < std::numeric_limits<uint32_t>::max());
698 rep.offset = offset;
699 _objects[kLeafObjIdx] = _leafBuilder.asTempObj();
700 return inserted;
701 }
702
703 // Obtain the object builder for the leaves.
leafBuilder()704 BSONObjBuilder& leafBuilder() {
705 return _leafBuilder;
706 }
707
708 // Obtain the BSONObj for the given object id.
getObject(ElementRep::ObjIdx objIdx)709 BSONObj& getObject(ElementRep::ObjIdx objIdx) {
710 dassert(objIdx < _objects.size());
711 return _objects[objIdx];
712 }
713
714 // Obtain the BSONObj for the given object id.
getObject(ElementRep::ObjIdx objIdx) const715 const BSONObj& getObject(ElementRep::ObjIdx objIdx) const {
716 dassert(objIdx < _objects.size());
717 return _objects[objIdx];
718 }
719
720 // Insert the given BSONObj and return an ID for it.
insertObject(const BSONObj & newObj)721 ElementRep::ObjIdx insertObject(const BSONObj& newObj) {
722 const size_t objIdx = _objects.size();
723 invariant(objIdx <= kMaxObjIdx);
724 _objects.push_back(newObj);
725 if (kDebugBuild && paranoid) {
726 // Force reallocation to catch use after invalidation.
727 std::vector<BSONObj> new_objects(_objects);
728 _objects.swap(new_objects);
729 }
730 return objIdx;
731 }
732
733 // Given a RepIdx, return the BSONElement that it represents.
getSerializedElement(const ElementRep & rep) const734 BSONElement getSerializedElement(const ElementRep& rep) const {
735 return rep.toSerializedElement(getObject(rep.objIdx));
736 }
737
738 // A helper method that either inserts the field name into the field name heap and
739 // updates element.
insertFieldName(ElementRep & rep,StringData fieldName)740 void insertFieldName(ElementRep& rep, StringData fieldName) {
741 dassert(!rep.serialized);
742 rep.offset = insertFieldName(fieldName);
743 }
744
745 // Retrieve the fieldName, given a rep.
getFieldName(const ElementRep & rep) const746 StringData getFieldName(const ElementRep& rep) const {
747 // The root element has no field name.
748 if (&rep == &getElementRep(kRootRepIdx))
749 return StringData();
750
751 if (rep.serialized || (rep.objIdx != kInvalidObjIdx))
752 return getSerializedElement(rep).fieldNameStringData();
753
754 return getFieldName(rep.offset);
755 }
756
getFieldNameForNewElement(const ElementRep & rep)757 StringData getFieldNameForNewElement(const ElementRep& rep) {
758 StringData result = getFieldName(rep);
759 if (rep.objIdx == kLeafObjIdx) {
760 _fieldNameScratch.assign(result.rawData(), result.size());
761 result = StringData(_fieldNameScratch);
762 }
763 return result;
764 }
765
766 // Retrieve the type, given a rep.
getType(const ElementRep & rep) const767 BSONType getType(const ElementRep& rep) const {
768 // The root element is always an Object.
769 if (&rep == &getElementRep(kRootRepIdx))
770 return mongo::Object;
771
772 if (rep.serialized || (rep.objIdx != kInvalidObjIdx))
773 return getSerializedElement(rep).type();
774
775 return rep.array ? mongo::Array : mongo::Object;
776 }
777
isLeafType(BSONType type)778 static bool isLeafType(BSONType type) {
779 return ((type != mongo::Object) && (type != mongo::Array));
780 }
781
782 // Returns true if rep is not an object or array.
isLeaf(const ElementRep & rep) const783 bool isLeaf(const ElementRep& rep) const {
784 return isLeafType(getType(rep));
785 }
786
isLeaf(const BSONElement & elt) const787 bool isLeaf(const BSONElement& elt) const {
788 return isLeafType(elt.type());
789 }
790
791 // Returns true if rep's value can be provided as a BSONElement.
hasValue(const ElementRep & rep) const792 bool hasValue(const ElementRep& rep) const {
793 // The root element may be marked serialized, but it doesn't have a BSONElement
794 // representation.
795 if (&rep == &getElementRep(kRootRepIdx))
796 return false;
797
798 return rep.serialized;
799 }
800
801 // Return the index of the left child of the Element with index 'index', resolving the
802 // left child to a realized Element if it is currently opaque. This may also cause the
803 // parent elements child.right entry to be updated.
resolveLeftChild(Element::RepIdx index)804 Element::RepIdx resolveLeftChild(Element::RepIdx index) {
805 dassert(index != Element::kInvalidRepIdx);
806 dassert(index != Element::kOpaqueRepIdx);
807
808 // If the left child is anything other than opaque, then we are done here.
809 ElementRep* rep = &getElementRep(index);
810 if (rep->child.left != Element::kOpaqueRepIdx)
811 return rep->child.left;
812
813 // It should be impossible to have an opaque left child and be non-serialized,
814 dassert(rep->serialized);
815 BSONElement childElt =
816 (hasValue(*rep) ? getSerializedElement(*rep).embeddedObject() : getObject(rep->objIdx))
817 .firstElement();
818
819 if (!childElt.eoo()) {
820 // Do this now before other writes so compiler can exploit knowing
821 // that we are not eoo.
822 const int32_t fieldNameSize = childElt.fieldNameSize();
823 const int32_t totalSize = childElt.size();
824
825 Element::RepIdx inserted;
826 ElementRep& newRep = makeNewRep(&inserted);
827 // Calling makeNewRep invalidates rep since it may cause a reallocation of
828 // the element vector. After calling insertElement, we reacquire rep.
829 rep = &getElementRep(index);
830
831 newRep.serialized = true;
832 newRep.objIdx = rep->objIdx;
833 newRep.offset = getElementOffset(getObject(rep->objIdx), childElt);
834 newRep.parent = index;
835 newRep.sibling.right = Element::kOpaqueRepIdx;
836 // If this new object has possible substructure, mark its children as opaque.
837 if (!isLeaf(childElt)) {
838 newRep.child.left = Element::kOpaqueRepIdx;
839 newRep.child.right = Element::kOpaqueRepIdx;
840 }
841 newRep.setFieldNameSizeAndTotalSize(fieldNameSize, totalSize);
842 rep->child.left = inserted;
843 } else {
844 rep->child.left = Element::kInvalidRepIdx;
845 rep->child.right = Element::kInvalidRepIdx;
846 }
847
848 dassert(rep->child.left != Element::kOpaqueRepIdx);
849 return rep->child.left;
850 }
851
852 // Return the index of the right child of the Element with index 'index', resolving any
853 // opaque nodes. Note that this may require resolving all of the right siblings of the
854 // left child.
resolveRightChild(Element::RepIdx index)855 Element::RepIdx resolveRightChild(Element::RepIdx index) {
856 dassert(index != Element::kInvalidRepIdx);
857 dassert(index != Element::kOpaqueRepIdx);
858
859 Element::RepIdx current = getElementRep(index).child.right;
860 if (current == Element::kOpaqueRepIdx) {
861 current = resolveLeftChild(index);
862 while (current != Element::kInvalidRepIdx) {
863 Element::RepIdx next = resolveRightSibling(current);
864 if (next == Element::kInvalidRepIdx)
865 break;
866 current = next;
867 }
868
869 // The resolveRightSibling calls should have eventually updated this nodes right
870 // child pointer to point to the node we are about to return.
871 dassert(getElementRep(index).child.right == current);
872 }
873
874 return current;
875 }
876
877 // Return the index of the right sibling of the Element with index 'index', resolving
878 // the right sibling to a realized Element if it is currently opaque.
resolveRightSibling(Element::RepIdx index)879 Element::RepIdx resolveRightSibling(Element::RepIdx index) {
880 dassert(index != Element::kInvalidRepIdx);
881 dassert(index != Element::kOpaqueRepIdx);
882
883 // If the right sibling is anything other than opaque, then we are done here.
884 ElementRep* rep = &getElementRep(index);
885 if (rep->sibling.right != Element::kOpaqueRepIdx)
886 return rep->sibling.right;
887
888 BSONElement elt = getSerializedElement(*rep);
889 BSONElement rightElt(elt.rawdata() + elt.size());
890
891 if (!rightElt.eoo()) {
892 // Do this now before other writes so compiler can exploit knowing
893 // that we are not eoo.
894 const int32_t fieldNameSize = rightElt.fieldNameSize();
895 const int32_t totalSize = rightElt.size();
896
897 Element::RepIdx inserted;
898 ElementRep& newRep = makeNewRep(&inserted);
899 // Calling makeNewRep invalidates rep since it may cause a reallocation of
900 // the element vector. After calling insertElement, we reacquire rep.
901 rep = &getElementRep(index);
902
903 newRep.serialized = true;
904 newRep.objIdx = rep->objIdx;
905 newRep.offset = getElementOffset(getObject(rep->objIdx), rightElt);
906 newRep.parent = rep->parent;
907 newRep.sibling.left = index;
908 newRep.sibling.right = Element::kOpaqueRepIdx;
909 // If this new object has possible substructure, mark its children as opaque.
910 if (!isLeaf(rightElt)) {
911 newRep.child.left = Element::kOpaqueRepIdx;
912 newRep.child.right = Element::kOpaqueRepIdx;
913 }
914 newRep.setFieldNameSizeAndTotalSize(fieldNameSize, totalSize);
915 rep->sibling.right = inserted;
916 } else {
917 rep->sibling.right = Element::kInvalidRepIdx;
918 // If we have found the end of this object, then our (necessarily existing)
919 // parent's necessarily opaque right child is now determined to be us.
920 dassert(rep->parent <= Element::kMaxRepIdx);
921 ElementRep& parentRep = getElementRep(rep->parent);
922 dassert(parentRep.child.right == Element::kOpaqueRepIdx);
923 parentRep.child.right = index;
924 }
925
926 dassert(rep->sibling.right != Element::kOpaqueRepIdx);
927 return rep->sibling.right;
928 }
929
930 // Find the ElementRep at index 'index', and mark it and all of its currently
931 // serialized parents as non-serialized.
deserialize(Element::RepIdx index)932 void deserialize(Element::RepIdx index) {
933 while (index != Element::kInvalidRepIdx) {
934 ElementRep& rep = getElementRep(index);
935 // It does not make sense for leaf Elements to become deserialized, and
936 // requests to do so indicate a bug in the implementation of the library.
937 dassert(!isLeaf(rep));
938 if (!rep.serialized)
939 break;
940 rep.serialized = false;
941 index = rep.parent;
942 }
943 }
944
doesNotAlias(StringData s) const945 inline bool doesNotAlias(StringData s) const {
946 // StringData may come from either the field name heap or the leaf builder.
947 return doesNotAliasLeafBuilder(s) && !inFieldNameHeap(s.rawData());
948 }
949
doesNotAliasLeafBuilder(StringData s) const950 inline bool doesNotAliasLeafBuilder(StringData s) const {
951 return !inLeafBuilder(s.rawData());
952 }
953
doesNotAlias(const BSONElement & e) const954 inline bool doesNotAlias(const BSONElement& e) const {
955 // A BSONElement could alias the leaf builder.
956 return !inLeafBuilder(e.rawdata());
957 }
958
doesNotAlias(const BSONObj & o) const959 inline bool doesNotAlias(const BSONObj& o) const {
960 // A BSONObj could alias the leaf buildr.
961 return !inLeafBuilder(o.objdata());
962 }
963
964 // Returns true if 'data' points within the leaf BufBuilder.
inLeafBuilder(const char * data) const965 inline bool inLeafBuilder(const char* data) const {
966 // TODO: Write up something documenting that the following is technically UB due
967 // to illegality of comparing pointers to different aggregates for ordering. Also,
968 // do we need to do anything to prevent the optimizer from compiling this out on
969 // that basis? I've seen clang do that. We may need to declare these volatile. On
970 // the other hand, these should only be being called under a dassert, so the
971 // optimizer is maybe not in play, and the UB is unlikely to be a problem in
972 // practice.
973 const char* const start = _leafBuf.buf();
974 const char* const end = start + _leafBuf.len();
975 return (data >= start) && (data < end);
976 }
977
978 // Returns true if 'data' points within the field name heap.
inFieldNameHeap(const char * data) const979 inline bool inFieldNameHeap(const char* data) const {
980 if (_fieldNames.empty())
981 return false;
982 const char* const start = &_fieldNames.front();
983 const char* const end = &_fieldNames.back();
984 return (data >= start) && (data < end);
985 }
986
reserveDamageEvents(size_t expectedEvents)987 void reserveDamageEvents(size_t expectedEvents) {
988 _damages.reserve(expectedEvents);
989 }
990
getInPlaceUpdates(DamageVector * damages,const char ** source,size_t * size)991 bool getInPlaceUpdates(DamageVector* damages, const char** source, size_t* size) {
992 // If some operations were not in-place, set source to NULL and return false to
993 // inform upstream that we are not returning in-place result data.
994 if (_inPlaceMode == Document::kInPlaceDisabled) {
995 damages->clear();
996 *source = NULL;
997 if (size)
998 *size = 0;
999 return false;
1000 }
1001
1002 // Set up the source and source size out parameters.
1003 *source = _objects[0].objdata();
1004 if (size)
1005 *size = _objects[0].objsize();
1006
1007 // Swap our damage event queue with upstream, and reset ours to an empty vector. In
1008 // princple, we can do another round of in-place updates.
1009 damages->swap(_damages);
1010 _damages.clear();
1011
1012 return true;
1013 }
1014
disableInPlaceUpdates()1015 void disableInPlaceUpdates() {
1016 _inPlaceMode = Document::kInPlaceDisabled;
1017 }
1018
getCurrentInPlaceMode() const1019 Document::InPlaceMode getCurrentInPlaceMode() const {
1020 return _inPlaceMode;
1021 }
1022
isInPlaceModeEnabled() const1023 bool isInPlaceModeEnabled() const {
1024 return getCurrentInPlaceMode() == Document::kInPlaceEnabled;
1025 }
1026
recordDamageEvent(DamageEvent::OffsetSizeType targetOffset,DamageEvent::OffsetSizeType sourceOffset,size_t size)1027 void recordDamageEvent(DamageEvent::OffsetSizeType targetOffset,
1028 DamageEvent::OffsetSizeType sourceOffset,
1029 size_t size) {
1030 _damages.push_back(DamageEvent());
1031 _damages.back().targetOffset = targetOffset;
1032 _damages.back().sourceOffset = sourceOffset;
1033 _damages.back().size = size;
1034 if (kDebugBuild && paranoid) {
1035 // Force damage events to new addresses to catch invalidation errors.
1036 DamageVector new_damages(_damages);
1037 _damages.swap(new_damages);
1038 }
1039 }
1040
1041 // Check all preconditions on doing an in-place update, except for size match.
canUpdateInPlace(const ElementRep & sourceRep,const ElementRep & targetRep)1042 bool canUpdateInPlace(const ElementRep& sourceRep, const ElementRep& targetRep) {
1043 // NOTE: CodeWScope might arguably be excluded since it has substructure, but
1044 // mutable doesn't permit navigation into its document, so we can handle it.
1045
1046 // We can only do an in-place update to an element that is serialized and is not in
1047 // the leaf heap.
1048 //
1049 // TODO: In the future, we can replace values in the leaf heap if they are of the
1050 // same size as the origin was. For now, we don't support that.
1051 if (!hasValue(targetRep) || (targetRep.objIdx == kLeafObjIdx))
1052 return false;
1053
1054 // sourceRep should be newly created, so it must have a value representation.
1055 dassert(hasValue(sourceRep));
1056
1057 // For a target that has substructure, we only permit in-place updates if there
1058 // cannot be ElementReps that reference data within the target. We don't need to
1059 // worry about ElementReps for source, since it is newly created. The only way
1060 // there can be ElementReps referring into substructure is if the Element has
1061 // non-empty non-opaque child references.
1062 if (!isLeaf(targetRep)) {
1063 if (((targetRep.child.left != Element::kOpaqueRepIdx) &&
1064 (targetRep.child.left != Element::kInvalidRepIdx)) ||
1065 ((targetRep.child.right != Element::kOpaqueRepIdx) &&
1066 (targetRep.child.right != Element::kInvalidRepIdx)))
1067 return false;
1068 }
1069
1070 return true;
1071 }
1072
1073 template <typename Builder>
1074 void writeElement(Element::RepIdx repIdx,
1075 Builder* builder,
1076 const StringData* fieldName = NULL) const;
1077
1078 template <typename Builder>
1079 void writeChildren(Element::RepIdx repIdx, Builder* builder) const;
1080
1081 private:
1082 // Insert the given field name into the field name heap, and return an ID for this
1083 // field name.
insertFieldName(StringData fieldName)1084 int32_t insertFieldName(StringData fieldName) {
1085 const uint32_t id = _fieldNames.size();
1086 if (!fieldName.empty())
1087 _fieldNames.insert(
1088 _fieldNames.end(), fieldName.rawData(), fieldName.rawData() + fieldName.size());
1089 _fieldNames.push_back('\0');
1090 if (kDebugBuild && paranoid) {
1091 // Force names to new addresses to catch invalidation errors.
1092 std::vector<char> new_fieldNames(_fieldNames);
1093 _fieldNames.swap(new_fieldNames);
1094 }
1095 return id;
1096 }
1097
1098 // Retrieve the field name with the given id.
getFieldName(uint32_t fieldNameId) const1099 StringData getFieldName(uint32_t fieldNameId) const {
1100 dassert(fieldNameId < _fieldNames.size());
1101 return &_fieldNames[fieldNameId];
1102 }
1103
1104 size_t _numElements;
1105 ElementRep _fastElements[kFastReps];
1106 std::vector<ElementRep> _slowElements;
1107
1108 std::vector<BSONObj> _objects;
1109 std::vector<char> _fieldNames;
1110
1111 // We own a BufBuilder to avoid BSONObjBuilder's ref-count mechanism which would throw
1112 // off our offset calculations.
1113 BufBuilder _leafBuf;
1114 BSONObjBuilder _leafBuilder;
1115
1116 // Sometimes, we need a temporary storage area for a fieldName, because the source of
1117 // the fieldName is in the same buffer that we want to write to, potentially
1118 // reallocating it. In such cases, we temporarily store the value here, rather than
1119 // creating and destroying a string and its buffer each time.
1120 std::string _fieldNameScratch;
1121
1122 // Queue of damage events and status bit for whether in-place updates are possible.
1123 DamageVector _damages;
1124 Document::InPlaceMode _inPlaceMode;
1125 };
1126
addSiblingLeft(Element e)1127 Status Element::addSiblingLeft(Element e) {
1128 invariant(ok());
1129 invariant(e.ok());
1130 invariant(_doc == e._doc);
1131
1132 Document::Impl& impl = getDocument().getImpl();
1133 ElementRep& newRep = impl.getElementRep(e._repIdx);
1134
1135 // check that new element roots a clean subtree.
1136 if (!canAttach(e._repIdx, newRep))
1137 return getAttachmentError(newRep);
1138
1139 ElementRep& thisRep = impl.getElementRep(_repIdx);
1140
1141 dassert(thisRep.parent != kOpaqueRepIdx);
1142 if (thisRep.parent == kInvalidRepIdx)
1143 return Status(ErrorCodes::IllegalOperation,
1144 "Attempt to add a sibling to an element without a parent");
1145
1146 ElementRep& parentRep = impl.getElementRep(thisRep.parent);
1147 dassert(!impl.isLeaf(parentRep));
1148
1149 impl.disableInPlaceUpdates();
1150
1151 // The new element shares our parent.
1152 newRep.parent = thisRep.parent;
1153
1154 // We are the new element's right sibling.
1155 newRep.sibling.right = _repIdx;
1156
1157 // The new element's left sibling is our left sibling.
1158 newRep.sibling.left = thisRep.sibling.left;
1159
1160 // If the new element has a left sibling after the adjustments above, then that left
1161 // sibling must be updated to have the new element as its right sibling.
1162 if (newRep.sibling.left != kInvalidRepIdx)
1163 impl.getElementRep(thisRep.sibling.left).sibling.right = e._repIdx;
1164
1165 // The new element becomes our left sibling.
1166 thisRep.sibling.left = e._repIdx;
1167
1168 // If we were our parent's left child, then we no longer are. Make the new right
1169 // sibling the right child.
1170 if (parentRep.child.left == _repIdx)
1171 parentRep.child.left = e._repIdx;
1172
1173 impl.deserialize(thisRep.parent);
1174
1175 return Status::OK();
1176 }
1177
addSiblingRight(Element e)1178 Status Element::addSiblingRight(Element e) {
1179 invariant(ok());
1180 invariant(e.ok());
1181 invariant(_doc == e._doc);
1182
1183 Document::Impl& impl = getDocument().getImpl();
1184 ElementRep* newRep = &impl.getElementRep(e._repIdx);
1185
1186 // check that new element roots a clean subtree.
1187 if (!canAttach(e._repIdx, *newRep))
1188 return getAttachmentError(*newRep);
1189
1190 ElementRep* thisRep = &impl.getElementRep(_repIdx);
1191
1192 dassert(thisRep->parent != kOpaqueRepIdx);
1193 if (thisRep->parent == kInvalidRepIdx)
1194 return Status(ErrorCodes::IllegalOperation,
1195 "Attempt to add a sibling to an element without a parent");
1196
1197 ElementRep* parentRep = &impl.getElementRep(thisRep->parent);
1198 dassert(!impl.isLeaf(*parentRep));
1199
1200 impl.disableInPlaceUpdates();
1201
1202 // If our current right sibling is opaque it needs to be resolved. This will invalidate
1203 // our reps so we need to reacquire them.
1204 Element::RepIdx rightSiblingIdx = thisRep->sibling.right;
1205 if (rightSiblingIdx == kOpaqueRepIdx) {
1206 rightSiblingIdx = impl.resolveRightSibling(_repIdx);
1207 dassert(rightSiblingIdx != kOpaqueRepIdx);
1208 newRep = &impl.getElementRep(e._repIdx);
1209 thisRep = &impl.getElementRep(_repIdx);
1210 parentRep = &impl.getElementRep(thisRep->parent);
1211 }
1212
1213 // The new element shares our parent.
1214 newRep->parent = thisRep->parent;
1215
1216 // We are the new element's left sibling.
1217 newRep->sibling.left = _repIdx;
1218
1219 // The new element right sibling is our right sibling.
1220 newRep->sibling.right = rightSiblingIdx;
1221
1222 // The new element becomes our right sibling.
1223 thisRep->sibling.right = e._repIdx;
1224
1225 // If the new element has a right sibling after the adjustments above, then that right
1226 // sibling must be updated to have the new element as its left sibling.
1227 if (newRep->sibling.right != kInvalidRepIdx)
1228 impl.getElementRep(rightSiblingIdx).sibling.left = e._repIdx;
1229
1230 // If we were our parent's right child, then we no longer are. Make the new right
1231 // sibling the right child.
1232 if (parentRep->child.right == _repIdx)
1233 parentRep->child.right = e._repIdx;
1234
1235 impl.deserialize(thisRep->parent);
1236
1237 return Status::OK();
1238 }
1239
remove()1240 Status Element::remove() {
1241 invariant(ok());
1242 Document::Impl& impl = getDocument().getImpl();
1243
1244 // We need to realize any opaque right sibling, because we are going to need to set its
1245 // left sibling. Do this before acquiring thisRep since otherwise we would potentially
1246 // invalidate it.
1247 impl.resolveRightSibling(_repIdx);
1248
1249 ElementRep& thisRep = impl.getElementRep(_repIdx);
1250
1251 if (thisRep.parent == kInvalidRepIdx)
1252 return Status(ErrorCodes::IllegalOperation, "trying to remove a parentless element");
1253 impl.disableInPlaceUpdates();
1254
1255 // If our right sibling is not the end of the object, then set its left sibling to be
1256 // our left sibling.
1257 if (thisRep.sibling.right != kInvalidRepIdx)
1258 impl.getElementRep(thisRep.sibling.right).sibling.left = thisRep.sibling.left;
1259
1260 // Similarly, if our left sibling is not the beginning of the obejct, then set its
1261 // right sibling to be our right sibling.
1262 if (thisRep.sibling.left != kInvalidRepIdx) {
1263 ElementRep& leftRep = impl.getElementRep(thisRep.sibling.left);
1264 leftRep.sibling.right = thisRep.sibling.right;
1265 }
1266
1267 // If this element was our parent's right child, then our left sibling is the new right
1268 // child.
1269 ElementRep& parentRep = impl.getElementRep(thisRep.parent);
1270 if (parentRep.child.right == _repIdx)
1271 parentRep.child.right = thisRep.sibling.left;
1272
1273 // Similarly, if this element was our parent's left child, then our right sibling is
1274 // the new left child.
1275 if (parentRep.child.left == _repIdx)
1276 parentRep.child.left = thisRep.sibling.right;
1277
1278 impl.deserialize(thisRep.parent);
1279
1280 // The Element becomes detached.
1281 thisRep.parent = kInvalidRepIdx;
1282 thisRep.sibling.left = kInvalidRepIdx;
1283 thisRep.sibling.right = kInvalidRepIdx;
1284
1285 return Status::OK();
1286 }
1287
rename(StringData newName)1288 Status Element::rename(StringData newName) {
1289 invariant(ok());
1290 Document::Impl& impl = getDocument().getImpl();
1291
1292 if (_repIdx == kRootRepIdx)
1293 return Status(ErrorCodes::IllegalOperation,
1294 "Invalid attempt to rename the root element of a document");
1295
1296 dassert(impl.doesNotAlias(newName));
1297
1298 // TODO: Some rename operations may be possible to do in-place.
1299 impl.disableInPlaceUpdates();
1300
1301 // Operations below may invalidate thisRep, so we may need to reacquire it.
1302 ElementRep* thisRep = &impl.getElementRep(_repIdx);
1303
1304 // For non-leaf serialized elements, we can realize any opaque relatives and then
1305 // convert ourselves to deserialized.
1306 if (thisRep->objIdx != kInvalidObjIdx && !impl.isLeaf(*thisRep)) {
1307 const bool array = (impl.getType(*thisRep) == mongo::Array);
1308
1309 // Realize any opaque right sibling or left child now, since otherwise we will lose
1310 // the ability to do so.
1311 impl.resolveLeftChild(_repIdx);
1312 impl.resolveRightSibling(_repIdx);
1313
1314 // The resolve calls above may have invalidated thisRep, we need to reacquire it.
1315 thisRep = &impl.getElementRep(_repIdx);
1316
1317 // Set this up as a non-supported deserialized element. We will set the fieldName
1318 // in the else clause in the block below.
1319 impl.deserialize(_repIdx);
1320
1321 thisRep->array = array;
1322
1323 // TODO: If we ever want to be able to add to the left or right of an opaque object
1324 // without expanding, this may need to change.
1325 thisRep->objIdx = kInvalidObjIdx;
1326 }
1327
1328 if (impl.hasValue(*thisRep)) {
1329 // For leaf elements we just create a new Element with the current value and
1330 // replace. Note that the 'setValue' call below will invalidate thisRep.
1331 Element replacement = _doc->makeElementWithNewFieldName(newName, *this);
1332 setValue(replacement._repIdx).transitional_ignore();
1333 } else {
1334 // The easy case: just update what our field name offset refers to.
1335 impl.insertFieldName(*thisRep, newName);
1336 }
1337
1338 return Status::OK();
1339 }
1340
leftChild() const1341 Element Element::leftChild() const {
1342 invariant(ok());
1343
1344 // Capturing Document::Impl by non-const ref exploits the constness loophole
1345 // created by our Impl so that we can let leftChild be lazily evaluated, even for a
1346 // const Element.
1347 Document::Impl& impl = _doc->getImpl();
1348 const Element::RepIdx leftChildIdx = impl.resolveLeftChild(_repIdx);
1349 dassert(leftChildIdx != kOpaqueRepIdx);
1350 return Element(_doc, leftChildIdx);
1351 }
1352
rightChild() const1353 Element Element::rightChild() const {
1354 invariant(ok());
1355
1356 // Capturing Document::Impl by non-const ref exploits the constness loophole
1357 // created by our Impl so that we can let leftChild be lazily evaluated, even for a
1358 // const Element.
1359 Document::Impl& impl = _doc->getImpl();
1360 const Element::RepIdx rightChildIdx = impl.resolveRightChild(_repIdx);
1361 dassert(rightChildIdx != kOpaqueRepIdx);
1362 return Element(_doc, rightChildIdx);
1363 }
1364
hasChildren() const1365 bool Element::hasChildren() const {
1366 invariant(ok());
1367 // Capturing Document::Impl by non-const ref exploits the constness loophole
1368 // created by our Impl so that we can let leftChild be lazily evaluated, even for a
1369 // const Element.
1370 Document::Impl& impl = _doc->getImpl();
1371 return impl.resolveLeftChild(_repIdx) != kInvalidRepIdx;
1372 }
1373
leftSibling(size_t distance) const1374 Element Element::leftSibling(size_t distance) const {
1375 invariant(ok());
1376 const Document::Impl& impl = getDocument().getImpl();
1377 Element::RepIdx current = _repIdx;
1378 while ((current != kInvalidRepIdx) && (distance-- != 0)) {
1379 // We are (currently) never left opaque, so don't need to resolve.
1380 current = impl.getElementRep(current).sibling.left;
1381 }
1382 return Element(_doc, current);
1383 }
1384
rightSibling(size_t distance) const1385 Element Element::rightSibling(size_t distance) const {
1386 invariant(ok());
1387
1388 // Capturing Document::Impl by non-const ref exploits the constness loophole
1389 // created by our Impl so that we can let rightSibling be lazily evaluated, even for a
1390 // const Element.
1391 Document::Impl& impl = _doc->getImpl();
1392 Element::RepIdx current = _repIdx;
1393 while ((current != kInvalidRepIdx) && (distance-- != 0))
1394 current = impl.resolveRightSibling(current);
1395 return Element(_doc, current);
1396 }
1397
parent() const1398 Element Element::parent() const {
1399 invariant(ok());
1400 const Document::Impl& impl = getDocument().getImpl();
1401 const Element::RepIdx parentIdx = impl.getElementRep(_repIdx).parent;
1402 dassert(parentIdx != kOpaqueRepIdx);
1403 return Element(_doc, parentIdx);
1404 }
1405
findNthChild(size_t n) const1406 Element Element::findNthChild(size_t n) const {
1407 invariant(ok());
1408 Document::Impl& impl = _doc->getImpl();
1409 Element::RepIdx current = _repIdx;
1410 current = impl.resolveLeftChild(current);
1411 while ((current != kInvalidRepIdx) && (n-- != 0))
1412 current = impl.resolveRightSibling(current);
1413 return Element(_doc, current);
1414 }
1415
findFirstChildNamed(StringData name) const1416 Element Element::findFirstChildNamed(StringData name) const {
1417 invariant(ok());
1418 Document::Impl& impl = _doc->getImpl();
1419 invariant(getType() != BSONType::Array);
1420 Element::RepIdx current = _repIdx;
1421 current = impl.resolveLeftChild(current);
1422 // TODO: Could DRY this loop with the identical logic in findElementNamed.
1423 while ((current != kInvalidRepIdx) && (impl.getFieldName(impl.getElementRep(current)) != name))
1424 current = impl.resolveRightSibling(current);
1425 return Element(_doc, current);
1426 }
1427
findElementNamed(StringData name) const1428 Element Element::findElementNamed(StringData name) const {
1429 invariant(ok());
1430 Document::Impl& impl = _doc->getImpl();
1431 Element::RepIdx current = _repIdx;
1432 while ((current != kInvalidRepIdx) && (impl.getFieldName(impl.getElementRep(current)) != name))
1433 current = impl.resolveRightSibling(current);
1434 return Element(_doc, current);
1435 }
1436
countSiblingsLeft() const1437 size_t Element::countSiblingsLeft() const {
1438 invariant(ok());
1439 const Document::Impl& impl = getDocument().getImpl();
1440 Element::RepIdx current = _repIdx;
1441 size_t result = 0;
1442 while (true) {
1443 // We are (currently) never left opaque, so don't need to resolve.
1444 current = impl.getElementRep(current).sibling.left;
1445 if (current == kInvalidRepIdx)
1446 break;
1447 ++result;
1448 }
1449 return result;
1450 }
1451
countSiblingsRight() const1452 size_t Element::countSiblingsRight() const {
1453 invariant(ok());
1454 Document::Impl& impl = _doc->getImpl();
1455 Element::RepIdx current = _repIdx;
1456 size_t result = 0;
1457 while (true) {
1458 current = impl.resolveRightSibling(current);
1459 if (current == kInvalidRepIdx)
1460 break;
1461 ++result;
1462 }
1463 return result;
1464 }
1465
countChildren() const1466 size_t Element::countChildren() const {
1467 invariant(ok());
1468 Document::Impl& impl = _doc->getImpl();
1469 Element::RepIdx current = _repIdx;
1470 current = impl.resolveLeftChild(current);
1471 size_t result = 0;
1472 while (current != kInvalidRepIdx) {
1473 ++result;
1474 current = impl.resolveRightSibling(current);
1475 }
1476 return result;
1477 }
1478
hasValue() const1479 bool Element::hasValue() const {
1480 invariant(ok());
1481 const Document::Impl& impl = getDocument().getImpl();
1482 const ElementRep& thisRep = impl.getElementRep(_repIdx);
1483 return impl.hasValue(thisRep);
1484 }
1485
isNumeric() const1486 bool Element::isNumeric() const {
1487 invariant(ok());
1488 const Document::Impl& impl = getDocument().getImpl();
1489 const ElementRep& thisRep = impl.getElementRep(_repIdx);
1490 const BSONType type = impl.getType(thisRep);
1491 return ((type == mongo::NumberLong) || (type == mongo::NumberInt) ||
1492 (type == mongo::NumberDouble) || (type == mongo::NumberDecimal));
1493 }
1494
isIntegral() const1495 bool Element::isIntegral() const {
1496 invariant(ok());
1497 const Document::Impl& impl = getDocument().getImpl();
1498 const ElementRep& thisRep = impl.getElementRep(_repIdx);
1499 const BSONType type = impl.getType(thisRep);
1500 return ((type == mongo::NumberLong) || (type == mongo::NumberInt));
1501 }
1502
getValue() const1503 const BSONElement Element::getValue() const {
1504 invariant(ok());
1505 const Document::Impl& impl = getDocument().getImpl();
1506 const ElementRep& thisRep = impl.getElementRep(_repIdx);
1507 if (impl.hasValue(thisRep))
1508 return impl.getSerializedElement(thisRep);
1509 return BSONElement();
1510 }
1511
getValueSafeNum() const1512 SafeNum Element::getValueSafeNum() const {
1513 switch (getType()) {
1514 case mongo::NumberInt:
1515 return static_cast<int32_t>(getValueInt());
1516 case mongo::NumberLong:
1517 return static_cast<int64_t>(getValueLong());
1518 case mongo::NumberDouble:
1519 return getValueDouble();
1520 case mongo::NumberDecimal:
1521 return getValueDecimal();
1522 default:
1523 return SafeNum();
1524 }
1525 }
1526
compareWithElement(const ConstElement & other,const StringData::ComparatorInterface * comparator,bool considerFieldName) const1527 int Element::compareWithElement(const ConstElement& other,
1528 const StringData::ComparatorInterface* comparator,
1529 bool considerFieldName) const {
1530 invariant(ok());
1531 invariant(other.ok());
1532
1533 // Short circuit a tautological compare.
1534 if ((_repIdx == other.getIdx()) && (_doc == &other.getDocument()))
1535 return 0;
1536
1537 // If either Element can represent its current value as a BSONElement, then we can
1538 // obtain its value and use compareWithBSONElement. If both Elements have a
1539 // representation as a BSONElement, compareWithBSONElement will notice that the first
1540 // argument has a value and delegate to BSONElement::woCompare.
1541
1542 const Document::Impl& impl = getDocument().getImpl();
1543 const ElementRep& thisRep = impl.getElementRep(_repIdx);
1544
1545 // Subtle: we must negate the comparison result here because we are reversing the
1546 // argument order in this call.
1547 //
1548 // TODO: Andy has suggested that this may not be legal since woCompare is not reflexive
1549 // in all cases.
1550 if (impl.hasValue(thisRep))
1551 return -other.compareWithBSONElement(
1552 impl.getSerializedElement(thisRep), comparator, considerFieldName);
1553
1554 const Document::Impl& oimpl = other.getDocument().getImpl();
1555 const ElementRep& otherRep = oimpl.getElementRep(other.getIdx());
1556
1557 if (oimpl.hasValue(otherRep))
1558 return compareWithBSONElement(
1559 oimpl.getSerializedElement(otherRep), comparator, considerFieldName);
1560
1561 // Leaf elements should always have a value, so we should only be dealing with Objects
1562 // or Arrays here.
1563 dassert(!impl.isLeaf(thisRep));
1564 dassert(!oimpl.isLeaf(otherRep));
1565
1566 // Obtain the canonical types for this Element and the BSONElement, if they are
1567 // different use the difference as the result. Please see BSONElement::woCompare for
1568 // details. We know that thisRep is not a number, so we don't need to check that
1569 // particular case.
1570 const int leftCanonType = canonicalizeBSONType(impl.getType(thisRep));
1571 const int rightCanonType = canonicalizeBSONType(oimpl.getType(otherRep));
1572 const int diffCanon = leftCanonType - rightCanonType;
1573 if (diffCanon != 0)
1574 return diffCanon;
1575
1576 // If we are considering field names, and the field names do not compare as equal,
1577 // return the field name ordering as the element ordering.
1578 if (considerFieldName) {
1579 const int fnamesComp = impl.getFieldName(thisRep).compare(oimpl.getFieldName(otherRep));
1580 if (fnamesComp != 0)
1581 return fnamesComp;
1582 }
1583
1584 const bool considerChildFieldNames =
1585 (impl.getType(thisRep) != mongo::Array) && (oimpl.getType(otherRep) != mongo::Array);
1586
1587 // We are dealing with either two objects, or two arrays. We need to consider the child
1588 // elements individually. We walk two iterators forward over the children and compare
1589 // them. Length mismatches are handled by checking early for reaching the end of the
1590 // children.
1591 ConstElement thisIter = leftChild();
1592 ConstElement otherIter = other.leftChild();
1593
1594 while (true) {
1595 if (!thisIter.ok())
1596 return !otherIter.ok() ? 0 : -1;
1597 if (!otherIter.ok())
1598 return 1;
1599
1600 const int result =
1601 thisIter.compareWithElement(otherIter, comparator, considerChildFieldNames);
1602 if (result != 0)
1603 return result;
1604
1605 thisIter = thisIter.rightSibling();
1606 otherIter = otherIter.rightSibling();
1607 }
1608 }
1609
compareWithBSONElement(const BSONElement & other,const StringData::ComparatorInterface * comparator,bool considerFieldName) const1610 int Element::compareWithBSONElement(const BSONElement& other,
1611 const StringData::ComparatorInterface* comparator,
1612 bool considerFieldName) const {
1613 invariant(ok());
1614
1615 const Document::Impl& impl = getDocument().getImpl();
1616 const ElementRep& thisRep = impl.getElementRep(_repIdx);
1617
1618 // If we have a representation as a BSONElement, we can just use BSONElement::woCompare
1619 // to do the entire comparison.
1620 if (impl.hasValue(thisRep))
1621 return impl.getSerializedElement(thisRep).woCompare(other, considerFieldName, comparator);
1622
1623 // Leaf elements should always have a value, so we should only be dealing with Objects
1624 // or Arrays here.
1625 dassert(!impl.isLeaf(thisRep));
1626
1627 // Obtain the canonical types for this Element and the BSONElement, if they are
1628 // different use the difference as the result. Please see BSONElement::woCompare for
1629 // details. We know that thisRep is not a number, so we don't need to check that
1630 // particular case.
1631 const int leftCanonType = canonicalizeBSONType(impl.getType(thisRep));
1632 const int rightCanonType = canonicalizeBSONType(other.type());
1633 const int diffCanon = leftCanonType - rightCanonType;
1634 if (diffCanon != 0)
1635 return diffCanon;
1636
1637 // If we are considering field names, and the field names do not compare as equal,
1638 // return the field name ordering as the element ordering.
1639 if (considerFieldName) {
1640 const int fnamesComp = impl.getFieldName(thisRep).compare(other.fieldNameStringData());
1641 if (fnamesComp != 0)
1642 return fnamesComp;
1643 }
1644
1645 const bool considerChildFieldNames =
1646 (impl.getType(thisRep) != mongo::Array) && (other.type() != mongo::Array);
1647
1648 return compareWithBSONObj(other.Obj(), comparator, considerChildFieldNames);
1649 }
1650
compareWithBSONObj(const BSONObj & other,const StringData::ComparatorInterface * comparator,bool considerFieldName) const1651 int Element::compareWithBSONObj(const BSONObj& other,
1652 const StringData::ComparatorInterface* comparator,
1653 bool considerFieldName) const {
1654 invariant(ok());
1655
1656 const Document::Impl& impl = getDocument().getImpl();
1657 const ElementRep& thisRep = impl.getElementRep(_repIdx);
1658 invariant(!impl.isLeaf(thisRep));
1659
1660 // We are dealing with either two objects, or two arrays. We need to consider the child
1661 // elements individually. We walk two iterators forward over the children and compare
1662 // them. Length mismatches are handled by checking early for reaching the end of the
1663 // children.
1664 ConstElement thisIter = leftChild();
1665 BSONObjIterator otherIter(other);
1666
1667 while (true) {
1668 const BSONElement otherVal = otherIter.next();
1669
1670 if (!thisIter.ok())
1671 return otherVal.eoo() ? 0 : -1;
1672 if (otherVal.eoo())
1673 return 1;
1674
1675 const int result = thisIter.compareWithBSONElement(otherVal, comparator, considerFieldName);
1676 if (result != 0)
1677 return result;
1678
1679 thisIter = thisIter.rightSibling();
1680 }
1681 }
1682
writeTo(BSONObjBuilder * const builder) const1683 void Element::writeTo(BSONObjBuilder* const builder) const {
1684 invariant(ok());
1685 const Document::Impl& impl = getDocument().getImpl();
1686 const ElementRep& thisRep = impl.getElementRep(_repIdx);
1687 invariant(impl.getType(thisRep) == mongo::Object);
1688 if (thisRep.parent == kInvalidRepIdx && _repIdx == kRootRepIdx) {
1689 // If this is the root element, then we need to handle it differently, since it
1690 // doesn't have a field name and should embed directly, rather than as an object.
1691 impl.writeChildren(_repIdx, builder);
1692 } else {
1693 impl.writeElement(_repIdx, builder);
1694 }
1695 }
1696
writeArrayTo(BSONArrayBuilder * const builder) const1697 void Element::writeArrayTo(BSONArrayBuilder* const builder) const {
1698 invariant(ok());
1699 const Document::Impl& impl = getDocument().getImpl();
1700 const ElementRep& thisRep = impl.getElementRep(_repIdx);
1701 invariant(impl.getType(thisRep) == mongo::Array);
1702 return impl.writeChildren(_repIdx, builder);
1703 }
1704
setValueDouble(const double value)1705 Status Element::setValueDouble(const double value) {
1706 invariant(ok());
1707 Document::Impl& impl = getDocument().getImpl();
1708 ElementRep thisRep = impl.getElementRep(_repIdx);
1709 const StringData fieldName = impl.getFieldNameForNewElement(thisRep);
1710 Element newValue = getDocument().makeElementDouble(fieldName, value);
1711 return setValue(newValue._repIdx);
1712 }
1713
setValueString(StringData value)1714 Status Element::setValueString(StringData value) {
1715 invariant(ok());
1716 Document::Impl& impl = getDocument().getImpl();
1717
1718 dassert(impl.doesNotAlias(value));
1719
1720 const ElementRep& thisRep = impl.getElementRep(_repIdx);
1721 const StringData fieldName = impl.getFieldNameForNewElement(thisRep);
1722 Element newValue = getDocument().makeElementString(fieldName, value);
1723 return setValue(newValue._repIdx);
1724 }
1725
setValueObject(const BSONObj & value)1726 Status Element::setValueObject(const BSONObj& value) {
1727 invariant(ok());
1728 Document::Impl& impl = getDocument().getImpl();
1729
1730 dassert(impl.doesNotAlias(value));
1731
1732 const ElementRep& thisRep = impl.getElementRep(_repIdx);
1733 const StringData fieldName = impl.getFieldNameForNewElement(thisRep);
1734 Element newValue = getDocument().makeElementObject(fieldName, value);
1735 return setValue(newValue._repIdx);
1736 }
1737
setValueArray(const BSONObj & value)1738 Status Element::setValueArray(const BSONObj& value) {
1739 invariant(ok());
1740 Document::Impl& impl = getDocument().getImpl();
1741
1742 dassert(impl.doesNotAlias(value));
1743
1744 const ElementRep& thisRep = impl.getElementRep(_repIdx);
1745 const StringData fieldName = impl.getFieldNameForNewElement(thisRep);
1746 Element newValue = getDocument().makeElementArray(fieldName, value);
1747 return setValue(newValue._repIdx);
1748 }
1749
setValueBinary(const uint32_t len,mongo::BinDataType binType,const void * const data)1750 Status Element::setValueBinary(const uint32_t len,
1751 mongo::BinDataType binType,
1752 const void* const data) {
1753 invariant(ok());
1754 Document::Impl& impl = getDocument().getImpl();
1755
1756 // TODO: Alias check for binary data?
1757
1758 const ElementRep& thisRep = impl.getElementRep(_repIdx);
1759 const StringData fieldName = impl.getFieldNameForNewElement(thisRep);
1760 Element newValue = getDocument().makeElementBinary(fieldName, len, binType, data);
1761 return setValue(newValue._repIdx);
1762 }
1763
setValueUndefined()1764 Status Element::setValueUndefined() {
1765 invariant(ok());
1766 Document::Impl& impl = getDocument().getImpl();
1767 const ElementRep& thisRep = impl.getElementRep(_repIdx);
1768 const StringData fieldName = impl.getFieldNameForNewElement(thisRep);
1769 Element newValue = getDocument().makeElementUndefined(fieldName);
1770 return setValue(newValue._repIdx);
1771 }
1772
setValueOID(const OID value)1773 Status Element::setValueOID(const OID value) {
1774 invariant(ok());
1775 Document::Impl& impl = getDocument().getImpl();
1776 const ElementRep& thisRep = impl.getElementRep(_repIdx);
1777 const StringData fieldName = impl.getFieldNameForNewElement(thisRep);
1778 Element newValue = getDocument().makeElementOID(fieldName, value);
1779 return setValue(newValue._repIdx);
1780 }
1781
setValueBool(const bool value)1782 Status Element::setValueBool(const bool value) {
1783 invariant(ok());
1784 Document::Impl& impl = getDocument().getImpl();
1785 ElementRep thisRep = impl.getElementRep(_repIdx);
1786 const StringData fieldName = impl.getFieldNameForNewElement(thisRep);
1787 Element newValue = getDocument().makeElementBool(fieldName, value);
1788 return setValue(newValue._repIdx);
1789 }
1790
setValueDate(const Date_t value)1791 Status Element::setValueDate(const Date_t value) {
1792 invariant(ok());
1793 Document::Impl& impl = getDocument().getImpl();
1794 const ElementRep& thisRep = impl.getElementRep(_repIdx);
1795 const StringData fieldName = impl.getFieldNameForNewElement(thisRep);
1796 Element newValue = getDocument().makeElementDate(fieldName, value);
1797 return setValue(newValue._repIdx);
1798 }
1799
setValueNull()1800 Status Element::setValueNull() {
1801 invariant(ok());
1802 Document::Impl& impl = getDocument().getImpl();
1803 const ElementRep& thisRep = impl.getElementRep(_repIdx);
1804 const StringData fieldName = impl.getFieldNameForNewElement(thisRep);
1805 Element newValue = getDocument().makeElementNull(fieldName);
1806 return setValue(newValue._repIdx);
1807 }
1808
setValueRegex(StringData re,StringData flags)1809 Status Element::setValueRegex(StringData re, StringData flags) {
1810 invariant(ok());
1811 Document::Impl& impl = getDocument().getImpl();
1812
1813 dassert(impl.doesNotAlias(re));
1814 dassert(impl.doesNotAlias(flags));
1815
1816 const ElementRep& thisRep = impl.getElementRep(_repIdx);
1817 const StringData fieldName = impl.getFieldNameForNewElement(thisRep);
1818 Element newValue = getDocument().makeElementRegex(fieldName, re, flags);
1819 return setValue(newValue._repIdx);
1820 }
1821
setValueDBRef(StringData ns,const OID oid)1822 Status Element::setValueDBRef(StringData ns, const OID oid) {
1823 invariant(ok());
1824 Document::Impl& impl = getDocument().getImpl();
1825
1826 dassert(impl.doesNotAlias(ns));
1827
1828 const ElementRep& thisRep = impl.getElementRep(_repIdx);
1829 const StringData fieldName = impl.getFieldNameForNewElement(thisRep);
1830 Element newValue = getDocument().makeElementDBRef(fieldName, ns, oid);
1831 return setValue(newValue._repIdx);
1832 }
1833
setValueCode(StringData value)1834 Status Element::setValueCode(StringData value) {
1835 invariant(ok());
1836 Document::Impl& impl = getDocument().getImpl();
1837
1838 dassert(impl.doesNotAlias(value));
1839
1840 const ElementRep& thisRep = impl.getElementRep(_repIdx);
1841 const StringData fieldName = impl.getFieldNameForNewElement(thisRep);
1842 Element newValue = getDocument().makeElementCode(fieldName, value);
1843 return setValue(newValue._repIdx);
1844 }
1845
setValueSymbol(StringData value)1846 Status Element::setValueSymbol(StringData value) {
1847 invariant(ok());
1848 Document::Impl& impl = getDocument().getImpl();
1849
1850 dassert(impl.doesNotAlias(value));
1851
1852 const ElementRep& thisRep = impl.getElementRep(_repIdx);
1853 const StringData fieldName = impl.getFieldNameForNewElement(thisRep);
1854 Element newValue = getDocument().makeElementSymbol(fieldName, value);
1855 return setValue(newValue._repIdx);
1856 }
1857
setValueCodeWithScope(StringData code,const BSONObj & scope)1858 Status Element::setValueCodeWithScope(StringData code, const BSONObj& scope) {
1859 invariant(ok());
1860 Document::Impl& impl = getDocument().getImpl();
1861
1862 dassert(impl.doesNotAlias(code));
1863 dassert(impl.doesNotAlias(scope));
1864
1865 const ElementRep& thisRep = impl.getElementRep(_repIdx);
1866 const StringData fieldName = impl.getFieldNameForNewElement(thisRep);
1867 Element newValue = getDocument().makeElementCodeWithScope(fieldName, code, scope);
1868 return setValue(newValue._repIdx);
1869 }
1870
setValueInt(const int32_t value)1871 Status Element::setValueInt(const int32_t value) {
1872 invariant(ok());
1873 Document::Impl& impl = getDocument().getImpl();
1874 ElementRep thisRep = impl.getElementRep(_repIdx);
1875 const StringData fieldName = impl.getFieldNameForNewElement(thisRep);
1876 Element newValue = getDocument().makeElementInt(fieldName, value);
1877 return setValue(newValue._repIdx);
1878 }
1879
setValueTimestamp(const Timestamp value)1880 Status Element::setValueTimestamp(const Timestamp value) {
1881 invariant(ok());
1882 Document::Impl& impl = getDocument().getImpl();
1883 const ElementRep& thisRep = impl.getElementRep(_repIdx);
1884 const StringData fieldName = impl.getFieldNameForNewElement(thisRep);
1885 Element newValue = getDocument().makeElementTimestamp(fieldName, value);
1886 return setValue(newValue._repIdx);
1887 }
1888
setValueLong(const int64_t value)1889 Status Element::setValueLong(const int64_t value) {
1890 invariant(ok());
1891 Document::Impl& impl = getDocument().getImpl();
1892 ElementRep thisRep = impl.getElementRep(_repIdx);
1893 const StringData fieldName = impl.getFieldNameForNewElement(thisRep);
1894 Element newValue = getDocument().makeElementLong(fieldName, value);
1895 return setValue(newValue._repIdx);
1896 }
1897
setValueDecimal(const Decimal128 value)1898 Status Element::setValueDecimal(const Decimal128 value) {
1899 invariant(ok());
1900 Document::Impl& impl = getDocument().getImpl();
1901 ElementRep thisRep = impl.getElementRep(_repIdx);
1902 const StringData fieldName = impl.getFieldNameForNewElement(thisRep);
1903 Element newValue = getDocument().makeElementDecimal(fieldName, value);
1904 return setValue(newValue._repIdx);
1905 }
1906
setValueMinKey()1907 Status Element::setValueMinKey() {
1908 invariant(ok());
1909 Document::Impl& impl = getDocument().getImpl();
1910 const ElementRep& thisRep = impl.getElementRep(_repIdx);
1911 const StringData fieldName = impl.getFieldNameForNewElement(thisRep);
1912 Element newValue = getDocument().makeElementMinKey(fieldName);
1913 return setValue(newValue._repIdx);
1914 }
1915
setValueMaxKey()1916 Status Element::setValueMaxKey() {
1917 invariant(ok());
1918 Document::Impl& impl = getDocument().getImpl();
1919 const ElementRep& thisRep = impl.getElementRep(_repIdx);
1920 const StringData fieldName = impl.getFieldNameForNewElement(thisRep);
1921 Element newValue = getDocument().makeElementMaxKey(fieldName);
1922 return setValue(newValue._repIdx);
1923 }
1924
setValueBSONElement(const BSONElement & value)1925 Status Element::setValueBSONElement(const BSONElement& value) {
1926 invariant(ok());
1927
1928 if (value.type() == mongo::EOO)
1929 return Status(ErrorCodes::IllegalOperation, "Can't set Element value to EOO");
1930
1931 Document::Impl& impl = getDocument().getImpl();
1932
1933 dassert(impl.doesNotAlias(value));
1934
1935 ElementRep thisRep = impl.getElementRep(_repIdx);
1936 const StringData fieldName = impl.getFieldNameForNewElement(thisRep);
1937 Element newValue = getDocument().makeElementWithNewFieldName(fieldName, value);
1938 return setValue(newValue._repIdx);
1939 }
1940
setValueSafeNum(const SafeNum value)1941 Status Element::setValueSafeNum(const SafeNum value) {
1942 invariant(ok());
1943 switch (value.type()) {
1944 case mongo::NumberInt:
1945 return setValueInt(value._value.int32Val);
1946 case mongo::NumberLong:
1947 return setValueLong(value._value.int64Val);
1948 case mongo::NumberDouble:
1949 return setValueDouble(value._value.doubleVal);
1950 case mongo::NumberDecimal:
1951 return setValueDecimal(Decimal128(value._value.decimalVal));
1952 default:
1953 return Status(ErrorCodes::UnsupportedFormat,
1954 "Don't know how to handle unexpected SafeNum type");
1955 }
1956 }
1957
setValueElement(ConstElement setFrom)1958 Status Element::setValueElement(ConstElement setFrom) {
1959 invariant(ok());
1960
1961 // Can't set to your own root element, since this would create a circular document.
1962 if (_doc->root() == setFrom) {
1963 return Status(ErrorCodes::IllegalOperation,
1964 "Attempt to set an element to its own document's root");
1965 }
1966
1967 // Setting to self is a no-op.
1968 //
1969 // Setting the root is always an error so we want to fall through to the error handling in this
1970 // case.
1971 if (*this == setFrom && _repIdx != kRootRepIdx) {
1972 return Status::OK();
1973 }
1974
1975 Document::Impl& impl = getDocument().getImpl();
1976 ElementRep thisRep = impl.getElementRep(_repIdx);
1977 const StringData fieldName = impl.getFieldNameForNewElement(thisRep);
1978 Element newValue = getDocument().makeElementWithNewFieldName(fieldName, setFrom);
1979 return setValue(newValue._repIdx);
1980 }
1981
getType() const1982 BSONType Element::getType() const {
1983 invariant(ok());
1984 const Document::Impl& impl = getDocument().getImpl();
1985 const ElementRep& thisRep = impl.getElementRep(_repIdx);
1986 return impl.getType(thisRep);
1987 }
1988
getFieldName() const1989 StringData Element::getFieldName() const {
1990 invariant(ok());
1991 const Document::Impl& impl = getDocument().getImpl();
1992 const ElementRep& thisRep = impl.getElementRep(_repIdx);
1993 return impl.getFieldName(thisRep);
1994 }
1995
addChild(Element e,bool front)1996 Status Element::addChild(Element e, bool front) {
1997 // No need to invariant(ok()) since we are only called from methods that have done so.
1998 dassert(ok());
1999
2000 invariant(e.ok());
2001 invariant(_doc == e._doc);
2002
2003 Document::Impl& impl = getDocument().getImpl();
2004 ElementRep& newRep = impl.getElementRep(e._repIdx);
2005
2006 // check that new element roots a clean subtree.
2007 if (!canAttach(e._repIdx, newRep))
2008 return getAttachmentError(newRep);
2009
2010 // Check that this element is eligible for children.
2011 ElementRep& thisRep = impl.getElementRep(_repIdx);
2012 if (impl.isLeaf(thisRep))
2013 return Status(ErrorCodes::IllegalOperation,
2014 "Attempt to add a child element to a non-object element");
2015
2016 impl.disableInPlaceUpdates();
2017
2018 // TODO: In both of the following cases, we call two public API methods each. We can
2019 // probably do better by writing this explicitly here and drying it with the public
2020 // addSiblingLeft and addSiblingRight implementations.
2021 if (front) {
2022 // TODO: It is cheap to get the left child. However, it still means creating a rep
2023 // for it. Can we do better?
2024 Element lc = leftChild();
2025 if (lc.ok())
2026 return lc.addSiblingLeft(e);
2027 } else {
2028 // TODO: It is expensive to get the right child, since we have to build reps for
2029 // all of the opaque children. But in principle, we don't really need them. Could
2030 // we potentially add this element as a right child, leaving its left sibling
2031 // opaque? We would at minimum need to update leftSibling, which currently assumes
2032 // that your left sibling is never opaque. But adding new Elements to the end is a
2033 // quite common operation, so it would be nice if we could do this efficiently.
2034 Element rc = rightChild();
2035 if (rc.ok())
2036 return rc.addSiblingRight(e);
2037 }
2038
2039 // It must be the case that we have no children, so the new element becomes both the
2040 // right and left child of this node.
2041 dassert((thisRep.child.left == kInvalidRepIdx) && (thisRep.child.right == kInvalidRepIdx));
2042 thisRep.child.left = thisRep.child.right = e._repIdx;
2043 newRep.parent = _repIdx;
2044 impl.deserialize(_repIdx);
2045 return Status::OK();
2046 }
2047
setValue(const Element::RepIdx newValueIdx)2048 Status Element::setValue(const Element::RepIdx newValueIdx) {
2049 // No need to invariant(ok()) since we are only called from methods that have done so.
2050 dassert(ok());
2051
2052 if (_repIdx == kRootRepIdx)
2053 return Status(ErrorCodes::IllegalOperation, "Cannot call setValue on the root object");
2054
2055 Document::Impl& impl = getDocument().getImpl();
2056
2057 // Establish our right sibling in case it is opaque. Otherwise, we would lose the
2058 // ability to do so after the modifications below. It is important that this occur
2059 // before we acquire thisRep and valueRep since otherwise we would potentially
2060 // invalidate them.
2061 impl.resolveRightSibling(_repIdx);
2062
2063 ElementRep& thisRep = impl.getElementRep(_repIdx);
2064 ElementRep& valueRep = impl.getElementRep(newValueIdx);
2065
2066 if (impl.isInPlaceModeEnabled() && impl.canUpdateInPlace(valueRep, thisRep)) {
2067 // Get the BSONElement representations of the existing and new value, so we can
2068 // check if they are size compatible.
2069 BSONElement thisElt = impl.getSerializedElement(thisRep);
2070 BSONElement valueElt = impl.getSerializedElement(valueRep);
2071
2072 if (thisElt.size() == valueElt.size()) {
2073 // The old and new elements are size compatible. Compute the base offsets
2074 // of each BSONElement in the object in which it resides. We use these to
2075 // calculate the source and target offsets in the damage entries we are
2076 // going to write.
2077
2078 const DamageEvent::OffsetSizeType targetBaseOffset =
2079 getElementOffset(impl.getObject(thisRep.objIdx), thisElt);
2080
2081 const DamageEvent::OffsetSizeType sourceBaseOffset =
2082 getElementOffset(impl.getObject(valueRep.objIdx), valueElt);
2083
2084 // If this is a type change, record a damage event for the new type.
2085 if (thisElt.type() != valueElt.type()) {
2086 impl.recordDamageEvent(targetBaseOffset, sourceBaseOffset, 1);
2087 }
2088
2089 dassert(thisElt.fieldNameSize() == valueElt.fieldNameSize());
2090 dassert(thisElt.valuesize() == valueElt.valuesize());
2091
2092 // Record a damage event for the new value data.
2093 impl.recordDamageEvent(targetBaseOffset + thisElt.fieldNameSize() + 1,
2094 sourceBaseOffset + thisElt.fieldNameSize() + 1,
2095 thisElt.valuesize());
2096 } else {
2097 // We couldn't do it in place, so disable future in-place updates.
2098 impl.disableInPlaceUpdates();
2099 }
2100 }
2101
2102 // If we are not rootish, then wire in the new value among our relations.
2103 if (thisRep.parent != kInvalidRepIdx) {
2104 valueRep.parent = thisRep.parent;
2105 valueRep.sibling.left = thisRep.sibling.left;
2106 valueRep.sibling.right = thisRep.sibling.right;
2107 }
2108
2109 // Copy the rep for value to our slot so that our repIdx is unmodified.
2110 thisRep = valueRep;
2111
2112 // Be nice and clear out the source rep to make debugging easier.
2113 valueRep = ElementRep();
2114
2115 impl.deserialize(thisRep.parent);
2116 return Status::OK();
2117 }
2118
2119
2120 namespace {
2121
2122 // A helper for Element::writeElement below. For cases where we are building inside an
2123 // array, we want to ignore field names. So the specialization for BSONArrayBuilder ignores
2124 // the third parameter.
2125 template <typename Builder>
2126 struct SubBuilder;
2127
2128 template <>
2129 struct SubBuilder<BSONObjBuilder> {
SubBuildermongo::mutablebson::__anon0bdfad420411::SubBuilder2130 SubBuilder(BSONObjBuilder* builder, BSONType type, StringData fieldName)
2131 : buffer((type == mongo::Array) ? builder->subarrayStart(fieldName)
2132 : builder->subobjStart(fieldName)) {}
2133 BufBuilder& buffer;
2134 };
2135
2136 template <>
2137 struct SubBuilder<BSONArrayBuilder> {
SubBuildermongo::mutablebson::__anon0bdfad420411::SubBuilder2138 SubBuilder(BSONArrayBuilder* builder, BSONType type, StringData)
2139 : buffer((type == mongo::Array) ? builder->subarrayStart() : builder->subobjStart()) {}
2140 BufBuilder& buffer;
2141 };
2142
appendElement(BSONObjBuilder * builder,const BSONElement & element,const StringData * fieldName)2143 static void appendElement(BSONObjBuilder* builder,
2144 const BSONElement& element,
2145 const StringData* fieldName) {
2146 if (fieldName)
2147 builder->appendAs(element, *fieldName);
2148 else
2149 builder->append(element);
2150 }
2151
2152 // BSONArrayBuilder should not be appending elements with a fieldName
appendElement(BSONArrayBuilder * builder,const BSONElement & element,const StringData * fieldName)2153 static void appendElement(BSONArrayBuilder* builder,
2154 const BSONElement& element,
2155 const StringData* fieldName) {
2156 invariant(!fieldName);
2157 builder->append(element);
2158 }
2159
2160 } // namespace
2161
2162 template <typename Builder>
writeElement(Element::RepIdx repIdx,Builder * builder,const StringData * fieldName) const2163 void Document::Impl::writeElement(Element::RepIdx repIdx,
2164 Builder* builder,
2165 const StringData* fieldName) const {
2166 const ElementRep& rep = getElementRep(repIdx);
2167
2168 if (hasValue(rep)) {
2169 appendElement(builder, getSerializedElement(rep), fieldName);
2170 } else {
2171 const BSONType type = getType(rep);
2172 const StringData subName = fieldName ? *fieldName : getFieldName(rep);
2173 SubBuilder<Builder> subBuilder(builder, type, subName);
2174
2175 // Otherwise, this is a 'dirty leaf', which is impossible.
2176 dassert((type == mongo::Array) || (type == mongo::Object));
2177
2178 if (type == mongo::Array) {
2179 BSONArrayBuilder child_builder(subBuilder.buffer);
2180 writeChildren(repIdx, &child_builder);
2181 child_builder.doneFast();
2182 } else {
2183 BSONObjBuilder child_builder(subBuilder.buffer);
2184 writeChildren(repIdx, &child_builder);
2185 child_builder.doneFast();
2186 }
2187 }
2188 }
2189
2190 template <typename Builder>
writeChildren(Element::RepIdx repIdx,Builder * builder) const2191 void Document::Impl::writeChildren(Element::RepIdx repIdx, Builder* builder) const {
2192 // TODO: In theory, I think we can walk rightwards building a write region from all
2193 // serialized embedded children that share an obj id and form a contiguous memory
2194 // region. For arrays we would need to know something about how many elements we wrote
2195 // that way so that the indexes would come out right.
2196 //
2197 // However, that involves walking the memory twice: once to build the copy region, and
2198 // another time to actually copy it. It is unclear if this is better than just walking
2199 // it once with the recursive solution.
2200
2201 const ElementRep& rep = getElementRep(repIdx);
2202
2203 // OK, need to resolve left if we haven't done that yet.
2204 Element::RepIdx current = rep.child.left;
2205 if (current == Element::kOpaqueRepIdx)
2206 current = const_cast<Impl*>(this)->resolveLeftChild(repIdx);
2207
2208 // We need to write the element, and then walk rightwards.
2209 while (current != Element::kInvalidRepIdx) {
2210 writeElement(current, builder);
2211
2212 // If we have an opaque region to the right, and we are not in an array, then we
2213 // can bulk copy from the end of the element we just wrote to the end of our
2214 // parent.
2215 const ElementRep& currentRep = getElementRep(current);
2216
2217 if (currentRep.sibling.right == Element::kOpaqueRepIdx) {
2218 // Obtain the current parent, so we can see if we can bulk copy the right
2219 // siblings.
2220 const ElementRep& parentRep = getElementRep(currentRep.parent);
2221
2222 // Bulk copying right only works on objects
2223 if ((getType(parentRep) == mongo::Object) && (currentRep.objIdx != kInvalidObjIdx) &&
2224 (currentRep.objIdx == parentRep.objIdx)) {
2225 BSONElement currentElt = getSerializedElement(currentRep);
2226 const uint32_t currentSize = currentElt.size();
2227
2228 const BSONObj parentObj = (currentRep.parent == kRootRepIdx)
2229 ? getObject(parentRep.objIdx)
2230 : getSerializedElement(parentRep).Obj();
2231 const uint32_t parentSize = parentObj.objsize();
2232
2233 const uint32_t currentEltOffset = getElementOffset(parentObj, currentElt);
2234 const uint32_t nextEltOffset = currentEltOffset + currentSize;
2235
2236 const char* copyBegin = parentObj.objdata() + nextEltOffset;
2237 const uint32_t copyBytes = parentSize - nextEltOffset;
2238
2239 // The -1 is because we don't want to copy in the terminal EOO.
2240 builder->bb().appendBuf(copyBegin, copyBytes - 1);
2241
2242 // We are done with all children.
2243 break;
2244 }
2245
2246 // We couldn't bulk copy, and our right sibling is opaque. We need to
2247 // resolve. Note that the call to resolve may invalidate 'currentRep', so
2248 // rather than falling through and acquiring the index by examining currentRep,
2249 // update it with the return value of resolveRightSibling and restart the loop.
2250 current = const_cast<Impl*>(this)->resolveRightSibling(current);
2251 continue;
2252 }
2253
2254 current = currentRep.sibling.right;
2255 }
2256 }
2257
Document()2258 Document::Document() : _impl(new Impl(Document::kInPlaceDisabled)), _root(makeRootElement()) {
2259 dassert(_root._repIdx == kRootRepIdx);
2260 }
2261
Document(const BSONObj & value,InPlaceMode inPlaceMode)2262 Document::Document(const BSONObj& value, InPlaceMode inPlaceMode)
2263 : _impl(new Impl(inPlaceMode)), _root(makeRootElement(value)) {
2264 dassert(_root._repIdx == kRootRepIdx);
2265 }
2266
reset()2267 void Document::reset() {
2268 _impl->reset(Document::kInPlaceDisabled);
2269 MONGO_COMPILER_VARIABLE_UNUSED const Element newRoot = makeRootElement();
2270 dassert(newRoot._repIdx == _root._repIdx);
2271 dassert(_root._repIdx == kRootRepIdx);
2272 }
2273
reset(const BSONObj & value,InPlaceMode inPlaceMode)2274 void Document::reset(const BSONObj& value, InPlaceMode inPlaceMode) {
2275 _impl->reset(inPlaceMode);
2276 MONGO_COMPILER_VARIABLE_UNUSED const Element newRoot = makeRootElement(value);
2277 dassert(newRoot._repIdx == _root._repIdx);
2278 dassert(_root._repIdx == kRootRepIdx);
2279 }
2280
~Document()2281 Document::~Document() {}
2282
reserveDamageEvents(size_t expectedEvents)2283 void Document::reserveDamageEvents(size_t expectedEvents) {
2284 return getImpl().reserveDamageEvents(expectedEvents);
2285 }
2286
getInPlaceUpdates(DamageVector * damages,const char ** source,size_t * size)2287 bool Document::getInPlaceUpdates(DamageVector* damages, const char** source, size_t* size) {
2288 return getImpl().getInPlaceUpdates(damages, source, size);
2289 }
2290
disableInPlaceUpdates()2291 void Document::disableInPlaceUpdates() {
2292 return getImpl().disableInPlaceUpdates();
2293 }
2294
getCurrentInPlaceMode() const2295 Document::InPlaceMode Document::getCurrentInPlaceMode() const {
2296 return getImpl().getCurrentInPlaceMode();
2297 }
2298
makeElementDouble(StringData fieldName,const double value)2299 Element Document::makeElementDouble(StringData fieldName, const double value) {
2300 Impl& impl = getImpl();
2301 dassert(impl.doesNotAlias(fieldName));
2302
2303 BSONObjBuilder& builder = impl.leafBuilder();
2304 const int leafRef = builder.len();
2305 builder.append(fieldName, value);
2306 return Element(this,
2307 impl.insertLeafElement(leafRef, fieldName.size() + 1, builder.len() - leafRef));
2308 }
2309
makeElementString(StringData fieldName,StringData value)2310 Element Document::makeElementString(StringData fieldName, StringData value) {
2311 Impl& impl = getImpl();
2312 dassert(impl.doesNotAlias(fieldName));
2313 dassert(impl.doesNotAlias(value));
2314
2315 BSONObjBuilder& builder = impl.leafBuilder();
2316 const int leafRef = builder.len();
2317 builder.append(fieldName, value);
2318 return Element(this,
2319 impl.insertLeafElement(leafRef, fieldName.size() + 1, builder.len() - leafRef));
2320 }
2321
makeElementObject(StringData fieldName)2322 Element Document::makeElementObject(StringData fieldName) {
2323 Impl& impl = getImpl();
2324 dassert(impl.doesNotAlias(fieldName));
2325
2326 Element::RepIdx newEltIdx;
2327 ElementRep& newElt = impl.makeNewRep(&newEltIdx);
2328 impl.insertFieldName(newElt, fieldName);
2329 return Element(this, newEltIdx);
2330 }
2331
makeElementObject(StringData fieldName,const BSONObj & value)2332 Element Document::makeElementObject(StringData fieldName, const BSONObj& value) {
2333 Impl& impl = getImpl();
2334 dassert(impl.doesNotAliasLeafBuilder(fieldName));
2335 dassert(impl.doesNotAlias(value));
2336
2337 // Copy the provided values into the leaf builder.
2338 BSONObjBuilder& builder = impl.leafBuilder();
2339 const int leafRef = builder.len();
2340 builder.append(fieldName, value);
2341 Element::RepIdx newEltIdx =
2342 impl.insertLeafElement(leafRef, fieldName.size() + 1, builder.len() - leafRef);
2343 ElementRep& newElt = impl.getElementRep(newEltIdx);
2344
2345 newElt.child.left = Element::kOpaqueRepIdx;
2346 newElt.child.right = Element::kOpaqueRepIdx;
2347
2348 return Element(this, newEltIdx);
2349 }
2350
makeElementArray(StringData fieldName)2351 Element Document::makeElementArray(StringData fieldName) {
2352 Impl& impl = getImpl();
2353 dassert(impl.doesNotAlias(fieldName));
2354
2355 Element::RepIdx newEltIdx;
2356 ElementRep& newElt = impl.makeNewRep(&newEltIdx);
2357 newElt.array = true;
2358 impl.insertFieldName(newElt, fieldName);
2359 return Element(this, newEltIdx);
2360 }
2361
makeElementArray(StringData fieldName,const BSONObj & value)2362 Element Document::makeElementArray(StringData fieldName, const BSONObj& value) {
2363 Impl& impl = getImpl();
2364 dassert(impl.doesNotAliasLeafBuilder(fieldName));
2365 dassert(impl.doesNotAlias(value));
2366
2367 // Copy the provided array values into the leaf builder.
2368 BSONObjBuilder& builder = impl.leafBuilder();
2369 const int leafRef = builder.len();
2370 builder.appendArray(fieldName, value);
2371 Element::RepIdx newEltIdx =
2372 impl.insertLeafElement(leafRef, fieldName.size() + 1, builder.len() - leafRef);
2373 ElementRep& newElt = impl.getElementRep(newEltIdx);
2374 newElt.child.left = Element::kOpaqueRepIdx;
2375 newElt.child.right = Element::kOpaqueRepIdx;
2376 return Element(this, newEltIdx);
2377 }
2378
makeElementBinary(StringData fieldName,const uint32_t len,const mongo::BinDataType binType,const void * const data)2379 Element Document::makeElementBinary(StringData fieldName,
2380 const uint32_t len,
2381 const mongo::BinDataType binType,
2382 const void* const data) {
2383 Impl& impl = getImpl();
2384 dassert(impl.doesNotAlias(fieldName));
2385 // TODO: Alias check 'data'?
2386
2387 BSONObjBuilder& builder = impl.leafBuilder();
2388 const int leafRef = builder.len();
2389 builder.appendBinData(fieldName, len, binType, data);
2390 return Element(this,
2391 impl.insertLeafElement(leafRef, fieldName.size() + 1, builder.len() - leafRef));
2392 }
2393
makeElementUndefined(StringData fieldName)2394 Element Document::makeElementUndefined(StringData fieldName) {
2395 Impl& impl = getImpl();
2396 dassert(impl.doesNotAlias(fieldName));
2397
2398 BSONObjBuilder& builder = impl.leafBuilder();
2399 const int leafRef = builder.len();
2400 builder.appendUndefined(fieldName);
2401 return Element(this,
2402 impl.insertLeafElement(leafRef, fieldName.size() + 1, builder.len() - leafRef));
2403 }
2404
makeElementNewOID(StringData fieldName)2405 Element Document::makeElementNewOID(StringData fieldName) {
2406 OID newOID;
2407 newOID.init();
2408 return makeElementOID(fieldName, newOID);
2409 }
2410
makeElementOID(StringData fieldName,const OID value)2411 Element Document::makeElementOID(StringData fieldName, const OID value) {
2412 Impl& impl = getImpl();
2413 dassert(impl.doesNotAlias(fieldName));
2414
2415 BSONObjBuilder& builder = impl.leafBuilder();
2416 const int leafRef = builder.len();
2417 builder.append(fieldName, value);
2418 return Element(this,
2419 impl.insertLeafElement(leafRef, fieldName.size() + 1, builder.len() - leafRef));
2420 }
2421
makeElementBool(StringData fieldName,const bool value)2422 Element Document::makeElementBool(StringData fieldName, const bool value) {
2423 Impl& impl = getImpl();
2424 dassert(impl.doesNotAlias(fieldName));
2425
2426 BSONObjBuilder& builder = impl.leafBuilder();
2427 const int leafRef = builder.len();
2428 builder.appendBool(fieldName, value);
2429 return Element(this,
2430 impl.insertLeafElement(leafRef, fieldName.size() + 1, builder.len() - leafRef));
2431 }
2432
makeElementDate(StringData fieldName,const Date_t value)2433 Element Document::makeElementDate(StringData fieldName, const Date_t value) {
2434 Impl& impl = getImpl();
2435 dassert(impl.doesNotAlias(fieldName));
2436
2437 BSONObjBuilder& builder = impl.leafBuilder();
2438 const int leafRef = builder.len();
2439 builder.appendDate(fieldName, value);
2440 return Element(this,
2441 impl.insertLeafElement(leafRef, fieldName.size() + 1, builder.len() - leafRef));
2442 }
2443
makeElementNull(StringData fieldName)2444 Element Document::makeElementNull(StringData fieldName) {
2445 Impl& impl = getImpl();
2446 dassert(impl.doesNotAlias(fieldName));
2447
2448 BSONObjBuilder& builder = impl.leafBuilder();
2449 const int leafRef = builder.len();
2450 builder.appendNull(fieldName);
2451 return Element(this,
2452 impl.insertLeafElement(leafRef, fieldName.size() + 1, builder.len() - leafRef));
2453 }
2454
makeElementRegex(StringData fieldName,StringData re,StringData flags)2455 Element Document::makeElementRegex(StringData fieldName, StringData re, StringData flags) {
2456 Impl& impl = getImpl();
2457 dassert(impl.doesNotAlias(fieldName));
2458 dassert(impl.doesNotAlias(re));
2459 dassert(impl.doesNotAlias(flags));
2460
2461 BSONObjBuilder& builder = impl.leafBuilder();
2462 const int leafRef = builder.len();
2463 builder.appendRegex(fieldName, re, flags);
2464 return Element(this,
2465 impl.insertLeafElement(leafRef, fieldName.size() + 1, builder.len() - leafRef));
2466 }
2467
makeElementDBRef(StringData fieldName,StringData ns,const OID value)2468 Element Document::makeElementDBRef(StringData fieldName, StringData ns, const OID value) {
2469 Impl& impl = getImpl();
2470 dassert(impl.doesNotAlias(fieldName));
2471 BSONObjBuilder& builder = impl.leafBuilder();
2472 const int leafRef = builder.len();
2473 builder.appendDBRef(fieldName, ns, value);
2474 return Element(this,
2475 impl.insertLeafElement(leafRef, fieldName.size() + 1, builder.len() - leafRef));
2476 }
2477
makeElementCode(StringData fieldName,StringData value)2478 Element Document::makeElementCode(StringData fieldName, StringData value) {
2479 Impl& impl = getImpl();
2480 dassert(impl.doesNotAlias(fieldName));
2481 dassert(impl.doesNotAlias(value));
2482
2483 BSONObjBuilder& builder = impl.leafBuilder();
2484 const int leafRef = builder.len();
2485 builder.appendCode(fieldName, value);
2486 return Element(this,
2487 impl.insertLeafElement(leafRef, fieldName.size() + 1, builder.len() - leafRef));
2488 }
2489
makeElementSymbol(StringData fieldName,StringData value)2490 Element Document::makeElementSymbol(StringData fieldName, StringData value) {
2491 Impl& impl = getImpl();
2492 dassert(impl.doesNotAlias(fieldName));
2493 dassert(impl.doesNotAlias(value));
2494
2495 BSONObjBuilder& builder = impl.leafBuilder();
2496 const int leafRef = builder.len();
2497 builder.appendSymbol(fieldName, value);
2498 return Element(this,
2499 impl.insertLeafElement(leafRef, fieldName.size() + 1, builder.len() - leafRef));
2500 }
2501
makeElementCodeWithScope(StringData fieldName,StringData code,const BSONObj & scope)2502 Element Document::makeElementCodeWithScope(StringData fieldName,
2503 StringData code,
2504 const BSONObj& scope) {
2505 Impl& impl = getImpl();
2506 dassert(impl.doesNotAlias(fieldName));
2507 dassert(impl.doesNotAlias(code));
2508 dassert(impl.doesNotAlias(scope));
2509
2510 BSONObjBuilder& builder = impl.leafBuilder();
2511 const int leafRef = builder.len();
2512 builder.appendCodeWScope(fieldName, code, scope);
2513 return Element(this,
2514 impl.insertLeafElement(leafRef, fieldName.size() + 1, builder.len() - leafRef));
2515 }
2516
makeElementInt(StringData fieldName,const int32_t value)2517 Element Document::makeElementInt(StringData fieldName, const int32_t value) {
2518 Impl& impl = getImpl();
2519 dassert(impl.doesNotAlias(fieldName));
2520
2521 BSONObjBuilder& builder = impl.leafBuilder();
2522 const int leafRef = builder.len();
2523 builder.append(fieldName, value);
2524 return Element(this,
2525 impl.insertLeafElement(leafRef, fieldName.size() + 1, builder.len() - leafRef));
2526 }
2527
makeElementTimestamp(StringData fieldName,const Timestamp value)2528 Element Document::makeElementTimestamp(StringData fieldName, const Timestamp value) {
2529 Impl& impl = getImpl();
2530 dassert(impl.doesNotAlias(fieldName));
2531
2532 BSONObjBuilder& builder = impl.leafBuilder();
2533 const int leafRef = builder.len();
2534 builder.append(fieldName, value);
2535 return Element(this,
2536 impl.insertLeafElement(leafRef, fieldName.size() + 1, builder.len() - leafRef));
2537 }
2538
makeElementLong(StringData fieldName,const int64_t value)2539 Element Document::makeElementLong(StringData fieldName, const int64_t value) {
2540 Impl& impl = getImpl();
2541 dassert(impl.doesNotAlias(fieldName));
2542
2543 BSONObjBuilder& builder = impl.leafBuilder();
2544 const int leafRef = builder.len();
2545 builder.append(fieldName, static_cast<long long int>(value));
2546 return Element(this,
2547 impl.insertLeafElement(leafRef, fieldName.size() + 1, builder.len() - leafRef));
2548 }
2549
makeElementDecimal(StringData fieldName,const Decimal128 value)2550 Element Document::makeElementDecimal(StringData fieldName, const Decimal128 value) {
2551 Impl& impl = getImpl();
2552 dassert(impl.doesNotAlias(fieldName));
2553
2554 BSONObjBuilder& builder = impl.leafBuilder();
2555 const int leafRef = builder.len();
2556 builder.append(fieldName, value);
2557 return Element(this,
2558 impl.insertLeafElement(leafRef, fieldName.size() + 1, builder.len() - leafRef));
2559 }
2560
makeElementMinKey(StringData fieldName)2561 Element Document::makeElementMinKey(StringData fieldName) {
2562 Impl& impl = getImpl();
2563 dassert(impl.doesNotAlias(fieldName));
2564
2565 BSONObjBuilder& builder = impl.leafBuilder();
2566 const int leafRef = builder.len();
2567 builder.appendMinKey(fieldName);
2568 return Element(this,
2569 impl.insertLeafElement(leafRef, fieldName.size() + 1, builder.len() - leafRef));
2570 }
2571
makeElementMaxKey(StringData fieldName)2572 Element Document::makeElementMaxKey(StringData fieldName) {
2573 Impl& impl = getImpl();
2574 dassert(impl.doesNotAlias(fieldName));
2575
2576 BSONObjBuilder& builder = impl.leafBuilder();
2577 const int leafRef = builder.len();
2578 builder.appendMaxKey(fieldName);
2579 return Element(this,
2580 impl.insertLeafElement(leafRef, fieldName.size() + 1, builder.len() - leafRef));
2581 }
2582
makeElement(const BSONElement & value)2583 Element Document::makeElement(const BSONElement& value) {
2584 Impl& impl = getImpl();
2585
2586 // Attempts to create an EOO element are translated to returning an invalid
2587 // Element. For array and object nodes, we flow through the custom
2588 // makeElement{Object|Array} methods, since they have special logic to deal with
2589 // opaqueness. Otherwise, we can just insert via appendAs.
2590 if (value.type() == mongo::EOO)
2591 return end();
2592 else if (value.type() == mongo::Object)
2593 return makeElementObject(value.fieldNameStringData(), value.Obj());
2594 else if (value.type() == mongo::Array)
2595 return makeElementArray(value.fieldNameStringData(), value.Obj());
2596 else {
2597 dassert(impl.doesNotAlias(value));
2598 BSONObjBuilder& builder = impl.leafBuilder();
2599 const int leafRef = builder.len();
2600 builder.append(value);
2601 return Element(this, impl.insertLeafElement(leafRef, value.fieldNameSize(), value.size()));
2602 }
2603 }
2604
makeElementWithNewFieldName(StringData fieldName,const BSONElement & value)2605 Element Document::makeElementWithNewFieldName(StringData fieldName, const BSONElement& value) {
2606 Impl& impl = getImpl();
2607
2608 // See the above makeElement for notes on these cases.
2609 if (value.type() == mongo::EOO)
2610 return end();
2611 else if (value.type() == mongo::Object)
2612 return makeElementObject(fieldName, value.Obj());
2613 else if (value.type() == mongo::Array)
2614 return makeElementArray(fieldName, value.Obj());
2615 else {
2616 dassert(getImpl().doesNotAliasLeafBuilder(fieldName));
2617 dassert(getImpl().doesNotAlias(value));
2618 BSONObjBuilder& builder = impl.leafBuilder();
2619 const int leafRef = builder.len();
2620 builder.appendAs(value, fieldName);
2621 return Element(
2622 this, impl.insertLeafElement(leafRef, fieldName.size() + 1, builder.len() - leafRef));
2623 }
2624 }
2625
makeElementSafeNum(StringData fieldName,SafeNum value)2626 Element Document::makeElementSafeNum(StringData fieldName, SafeNum value) {
2627 dassert(getImpl().doesNotAlias(fieldName));
2628
2629 switch (value.type()) {
2630 case mongo::NumberInt:
2631 return makeElementInt(fieldName, value._value.int32Val);
2632 case mongo::NumberLong:
2633 return makeElementLong(fieldName, value._value.int64Val);
2634 case mongo::NumberDouble:
2635 return makeElementDouble(fieldName, value._value.doubleVal);
2636 case mongo::NumberDecimal:
2637 return makeElementDecimal(fieldName, Decimal128(value._value.decimalVal));
2638 default:
2639 // Return an invalid element to indicate that we failed.
2640 return end();
2641 }
2642 }
2643
makeElement(ConstElement element)2644 Element Document::makeElement(ConstElement element) {
2645 return makeElement(element, NULL);
2646 }
2647
makeElementWithNewFieldName(StringData fieldName,ConstElement element)2648 Element Document::makeElementWithNewFieldName(StringData fieldName, ConstElement element) {
2649 return makeElement(element, &fieldName);
2650 }
2651
makeRootElement()2652 Element Document::makeRootElement() {
2653 return makeElementObject(kRootFieldName);
2654 }
2655
makeRootElement(const BSONObj & value)2656 Element Document::makeRootElement(const BSONObj& value) {
2657 Impl& impl = getImpl();
2658 Element::RepIdx newEltIdx = Element::kInvalidRepIdx;
2659 ElementRep* newElt = &impl.makeNewRep(&newEltIdx);
2660
2661 // A BSONObj provided for the root Element is stored in _objects rather than being
2662 // copied like all other BSONObjs.
2663 newElt->objIdx = impl.insertObject(value);
2664 impl.insertFieldName(*newElt, kRootFieldName);
2665
2666 // Strictly, the following is a lie: the root isn't serialized, because it doesn't
2667 // have a contiguous fieldname. However, it is a useful fiction to pretend that it
2668 // is, so we can easily check if we have a 'pristine' document state by checking if
2669 // the root is marked as serialized.
2670 newElt->serialized = true;
2671
2672 // If the provided value is empty, mark it as having no children, otherwise mark the
2673 // children as opaque.
2674 if (value.isEmpty())
2675 newElt->child.left = Element::kInvalidRepIdx;
2676 else
2677 newElt->child.left = Element::kOpaqueRepIdx;
2678 newElt->child.right = newElt->child.left;
2679
2680 return Element(this, newEltIdx);
2681 }
2682
makeElement(ConstElement element,const StringData * fieldName)2683 Element Document::makeElement(ConstElement element, const StringData* fieldName) {
2684 Impl& impl = getImpl();
2685
2686 if (this == &element.getDocument()) {
2687 // If the Element that we want to build from belongs to this Document, then we have
2688 // to first copy it to the side, and then back in, since otherwise we might be
2689 // attempting both read to and write from the underlying BufBuilder simultaneously,
2690 // which will not work.
2691 BSONObjBuilder builder;
2692 impl.writeElement(element.getIdx(), &builder, fieldName);
2693 BSONObj built = builder.done();
2694 BSONElement newElement = built.firstElement();
2695 return makeElement(newElement);
2696
2697 } else {
2698 // If the Element belongs to another document, then we can just stream it into our
2699 // builder. We still do need to dassert that the field name doesn't alias us
2700 // somehow.
2701 if (fieldName) {
2702 dassert(impl.doesNotAlias(*fieldName));
2703 }
2704 BSONObjBuilder& builder = impl.leafBuilder();
2705 const int leafRef = builder.len();
2706
2707 const Impl& oImpl = element.getDocument().getImpl();
2708 oImpl.writeElement(element.getIdx(), &builder, fieldName);
2709 return Element(this,
2710 impl.insertLeafElement(leafRef,
2711 fieldName ? fieldName->size() + 1 : -1,
2712 builder.len() - leafRef));
2713 }
2714 }
2715
getImpl()2716 inline Document::Impl& Document::getImpl() {
2717 // Don't use unique_ptr<Impl>::operator* since it may generate assertions that the
2718 // pointer is non-null, but we already know that to be always and forever true, and
2719 // otherwise the assertion code gets spammed into every method that inlines the call to
2720 // this function. We just dereference the pointer returned from 'get' ourselves.
2721 return *_impl.get();
2722 }
2723
getImpl() const2724 inline const Document::Impl& Document::getImpl() const {
2725 return *_impl.get();
2726 }
2727
2728 } // namespace mutablebson
2729 } // namespace mongo
2730