1 2 /** 3 * Copyright (C) 2018-present MongoDB, Inc. 4 * 5 * This program is free software: you can redistribute it and/or modify 6 * it under the terms of the Server Side Public License, version 1, 7 * as published by MongoDB, Inc. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * Server Side Public License for more details. 13 * 14 * You should have received a copy of the Server Side Public License 15 * along with this program. If not, see 16 * <http://www.mongodb.com/licensing/server-side-public-license>. 17 * 18 * As a special exception, the copyright holders give permission to link the 19 * code of portions of this program with the OpenSSL library under certain 20 * conditions as described in each individual source file and distribute 21 * linked combinations including the program with the OpenSSL library. You 22 * must comply with the Server Side Public License in all respects for 23 * all of the code used other than as permitted herein. If you modify file(s) 24 * with this exception, you may extend this exception to your version of the 25 * file(s), but you are not obligated to do so. If you do not wish to do so, 26 * delete this exception statement from your version. If you delete this 27 * exception statement from all source files in the program, then also delete 28 * it in the license file. 29 */ 30 31 #pragma once 32 33 #include <third_party/murmurhash3/MurmurHash3.h> 34 35 #include <bitset> 36 #include <boost/intrusive_ptr.hpp> 37 38 #include "mongo/base/static_assert.h" 39 #include "mongo/db/pipeline/value.h" 40 #include "mongo/util/intrusive_counter.h" 41 42 namespace mongo { 43 /** Helper class to make the position in a document abstract 44 * Warning: This is NOT guaranteed to be the ordered position. 45 * eg. the first field may not be at Position(0) 46 */ 47 class Position { 48 public: 49 // This represents "not found" similar to std::string::npos Position()50 Position() : index(static_cast<unsigned>(-1)) {} found()51 bool found() const { 52 return index != Position().index; 53 } 54 55 bool operator==(Position rhs) const { 56 return this->index == rhs.index; 57 } 58 bool operator!=(Position rhs) const { 59 return !(*this == rhs); 60 } 61 62 // For debugging and ASSERT_EQUALS in tests. 63 template <typename OStream> 64 friend OStream& operator<<(OStream& stream, Position p) { 65 return stream << p.index; 66 } 67 68 private: Position(size_t i)69 explicit Position(size_t i) : index(i) {} 70 unsigned index; 71 friend class DocumentStorage; 72 friend class DocumentStorageIterator; 73 }; 74 75 #pragma pack(1) 76 /** This is how values are stored in the DocumentStorage buffer 77 * Internal class. Consumers shouldn't care about this. 78 */ 79 class ValueElement { 80 MONGO_DISALLOW_COPYING(ValueElement); 81 82 public: 83 Value val; 84 Position nextCollision; // Position of next field with same hashBucket 85 const int nameLen; // doesn't include '\0' 86 const char _name[1]; // pointer to start of name (use nameSD instead) 87 next()88 ValueElement* next() { 89 return align(plusBytes(sizeof(ValueElement) + nameLen)); 90 } 91 next()92 const ValueElement* next() const { 93 return align(plusBytes(sizeof(ValueElement) + nameLen)); 94 } 95 nameSD()96 StringData nameSD() const { 97 return StringData(_name, nameLen); 98 } 99 100 101 // helpers for doing pointer arithmetic with this class ptr()102 char* ptr() { 103 return reinterpret_cast<char*>(this); 104 } ptr()105 const char* ptr() const { 106 return reinterpret_cast<const char*>(this); 107 } plusBytes(size_t bytes)108 const ValueElement* plusBytes(size_t bytes) const { 109 return reinterpret_cast<const ValueElement*>(ptr() + bytes); 110 } plusBytes(size_t bytes)111 ValueElement* plusBytes(size_t bytes) { 112 return reinterpret_cast<ValueElement*>(ptr() + bytes); 113 } 114 115 // Round number or pointer up to N-byte boundary. No change if already aligned. 116 template <typename T> align(T size)117 static T align(T size) { 118 const intmax_t ALIGNMENT = 8; // must be power of 2 and <= 16 (malloc alignment) 119 // Can't use c++ cast because of conversion between intmax_t and both ints and pointers 120 return (T)(((intmax_t)(size) + (ALIGNMENT - 1)) & ~(ALIGNMENT - 1)); 121 } 122 123 private: 124 ValueElement(); // this class should never be constructed 125 ~ValueElement(); // or destructed 126 }; 127 // Real size is sizeof(ValueElement) + nameLen 128 #pragma pack() 129 MONGO_STATIC_ASSERT(sizeof(ValueElement) == (sizeof(Value) + sizeof(Position) + sizeof(int) + 1)); 130 131 // This is an internal class for Document. See FieldIterator for the public version. 132 class DocumentStorageIterator { 133 public: 134 // DocumentStorage::iterator() and iteratorAll() are easier to use DocumentStorageIterator(const ValueElement * first,const ValueElement * end,bool includeMissing)135 DocumentStorageIterator(const ValueElement* first, const ValueElement* end, bool includeMissing) 136 : _first(first), _it(first), _end(end), _includeMissing(includeMissing) { 137 if (!_includeMissing) 138 skipMissing(); 139 } 140 atEnd()141 bool atEnd() const { 142 return _it == _end; 143 } 144 get()145 const ValueElement& get() const { 146 return *_it; 147 } 148 position()149 Position position() const { 150 return Position(_it->ptr() - _first->ptr()); 151 } 152 advance()153 void advance() { 154 advanceOne(); 155 if (!_includeMissing) 156 skipMissing(); 157 } 158 159 const ValueElement* operator->() { 160 return _it; 161 } 162 const ValueElement& operator*() { 163 return *_it; 164 } 165 166 private: advanceOne()167 void advanceOne() { 168 _it = _it->next(); 169 } 170 skipMissing()171 void skipMissing() { 172 while (!atEnd() && _it->val.missing()) { 173 advanceOne(); 174 } 175 } 176 177 const ValueElement* _first; 178 const ValueElement* _it; 179 const ValueElement* _end; 180 bool _includeMissing; 181 }; 182 183 /// Storage class used by both Document and MutableDocument 184 class DocumentStorage : public RefCountable { 185 public: DocumentStorage()186 DocumentStorage() 187 : _buffer(NULL), 188 _bufferEnd(NULL), 189 _usedBytes(0), 190 _numFields(0), 191 _hashTabMask(0), 192 _metaFields(), 193 _textScore(0), 194 _randVal(0) {} 195 196 ~DocumentStorage(); 197 198 enum MetaType : char { 199 TEXT_SCORE, 200 RAND_VAL, 201 SORT_KEY, 202 203 NUM_FIELDS 204 }; 205 emptyDoc()206 static const DocumentStorage& emptyDoc() { 207 return kEmptyDoc; 208 } 209 size()210 size_t size() const { 211 // can't use _numFields because it includes removed Fields 212 size_t count = 0; 213 for (DocumentStorageIterator it = iterator(); !it.atEnd(); it.advance()) 214 count++; 215 return count; 216 } 217 218 /// Returns the position of the next field to be inserted getNextPosition()219 Position getNextPosition() const { 220 return Position(_usedBytes); 221 } 222 223 /// Returns the position of the named field (may be missing) or Position() 224 Position findField(StringData name) const; 225 226 // Document uses these getField(Position pos)227 const ValueElement& getField(Position pos) const { 228 verify(pos.found()); 229 return *(_firstElement->plusBytes(pos.index)); 230 } getField(StringData name)231 Value getField(StringData name) const { 232 Position pos = findField(name); 233 if (!pos.found()) 234 return Value(); 235 return getField(pos).val; 236 } 237 238 // MutableDocument uses these getField(Position pos)239 ValueElement& getField(Position pos) { 240 verify(pos.found()); 241 return *(_firstElement->plusBytes(pos.index)); 242 } getField(StringData name)243 Value& getField(StringData name) { 244 Position pos = findField(name); 245 if (!pos.found()) 246 return appendField(name); // TODO: find a way to avoid hashing name twice 247 return getField(pos).val; 248 } 249 250 /// Adds a new field with missing Value at the end of the document 251 Value& appendField(StringData name); 252 253 /** Preallocates space for fields. Use this to attempt to prevent buffer growth. 254 * This is only valid to call before anything is added to the document. 255 */ 256 void reserveFields(size_t expectedFields); 257 258 /// This skips missing values iterator()259 DocumentStorageIterator iterator() const { 260 return DocumentStorageIterator(_firstElement, end(), false); 261 } 262 263 /// This includes missing values iteratorAll()264 DocumentStorageIterator iteratorAll() const { 265 return DocumentStorageIterator(_firstElement, end(), true); 266 } 267 268 /// Shallow copy of this. Caller owns memory. 269 boost::intrusive_ptr<DocumentStorage> clone() const; 270 allocatedBytes()271 size_t allocatedBytes() const { 272 return !_buffer ? 0 : (_bufferEnd - _buffer + hashTabBytes()); 273 } 274 275 /** 276 * Copies all metadata from source if it has any. 277 * Note: does not clear metadata from this. 278 */ copyMetaDataFrom(const DocumentStorage & source)279 void copyMetaDataFrom(const DocumentStorage& source) { 280 if (source.hasTextScore()) { 281 setTextScore(source.getTextScore()); 282 } 283 if (source.hasRandMetaField()) { 284 setRandMetaField(source.getRandMetaField()); 285 } 286 if (source.hasSortKeyMetaField()) { 287 setSortKeyMetaField(source.getSortKeyMetaField()); 288 } 289 } 290 hasTextScore()291 bool hasTextScore() const { 292 return _metaFields.test(MetaType::TEXT_SCORE); 293 } getTextScore()294 double getTextScore() const { 295 return _textScore; 296 } setTextScore(double score)297 void setTextScore(double score) { 298 _metaFields.set(MetaType::TEXT_SCORE); 299 _textScore = score; 300 } 301 hasRandMetaField()302 bool hasRandMetaField() const { 303 return _metaFields.test(MetaType::RAND_VAL); 304 } getRandMetaField()305 double getRandMetaField() const { 306 return _randVal; 307 } setRandMetaField(double val)308 void setRandMetaField(double val) { 309 _metaFields.set(MetaType::RAND_VAL); 310 _randVal = val; 311 } 312 hasSortKeyMetaField()313 bool hasSortKeyMetaField() const { 314 return _metaFields.test(MetaType::SORT_KEY); 315 } getSortKeyMetaField()316 BSONObj getSortKeyMetaField() const { 317 return _sortKey; 318 } setSortKeyMetaField(BSONObj sortKey)319 void setSortKeyMetaField(BSONObj sortKey) { 320 _metaFields.set(MetaType::SORT_KEY); 321 _sortKey = sortKey.getOwned(); 322 } 323 324 private: 325 /// Same as lastElement->next() or firstElement() if empty. end()326 const ValueElement* end() const { 327 return _firstElement ? _firstElement->plusBytes(_usedBytes) : nullptr; 328 } 329 330 /// Allocates space in _buffer. Copies existing data if there is any. 331 void alloc(unsigned newSize); 332 333 /// Call after adding field to _buffer and increasing _numFields 334 void addFieldToHashTable(Position pos); 335 336 // assumes _hashTabMask is (power of two) - 1 hashTabBuckets()337 unsigned hashTabBuckets() const { 338 return _hashTabMask + 1; 339 } hashTabBytes()340 unsigned hashTabBytes() const { 341 return hashTabBuckets() * sizeof(Position); 342 } 343 344 /// rehash on buffer growth if load-factor > .5 (attempt to keep lf < 1 when full) needRehash()345 bool needRehash() const { 346 return _numFields * 2 > hashTabBuckets(); 347 } 348 349 /// Initialize empty hash table hashTabInit()350 void hashTabInit() { 351 memset(_hashTab, -1, hashTabBytes()); 352 } 353 hashKey(StringData name)354 static unsigned hashKey(StringData name) { 355 // TODO consider FNV-1a once we have a better benchmark corpus 356 unsigned out; 357 MurmurHash3_x86_32(name.rawData(), name.size(), 0, &out); 358 return out; 359 } 360 bucketForKey(StringData name)361 unsigned bucketForKey(StringData name) const { 362 return hashKey(name) & _hashTabMask; 363 } 364 365 /// Adds all fields to the hash table rehash()366 void rehash() { 367 hashTabInit(); 368 for (DocumentStorageIterator it = iteratorAll(); !it.atEnd(); it.advance()) 369 addFieldToHashTable(it.position()); 370 } 371 372 enum { 373 HASH_TAB_INIT_SIZE = 8, // must be power of 2 374 HASH_TAB_MIN = 4, // don't hash fields for docs smaller than this 375 // set to 1 to always hash 376 }; 377 378 // _buffer layout: 379 // ------------------------------------------------------------------------------- 380 // | ValueElement1 Name1 | ValueElement2 Name2 | ... FREE SPACE ... | Hash Table | 381 // ------------------------------------------------------------------------------- 382 // ^ _buffer and _firstElement point here ^ 383 // _bufferEnd and _hashTab point here ^ 384 // 385 // 386 // When the buffer grows, the hash table moves to the new end. 387 union { 388 char* _buffer; 389 ValueElement* _firstElement; 390 }; 391 392 union { 393 // pointer to "end" of _buffer element space and start of hash table (same position) 394 char* _bufferEnd; 395 Position* _hashTab; // table lazily initialized once _numFields == HASH_TAB_MIN 396 }; 397 398 unsigned _usedBytes; // position where next field would start 399 unsigned _numFields; // this includes removed fields 400 unsigned _hashTabMask; // equal to hashTabBuckets()-1 but used more often 401 402 std::bitset<MetaType::NUM_FIELDS> _metaFields; 403 double _textScore; 404 double _randVal; 405 BSONObj _sortKey; 406 // When adding a field, make sure to update clone() method 407 408 // Defined in document.cpp 409 static const DocumentStorage kEmptyDoc; 410 }; 411 } 412