1 /* hasher.cpp
2 *
3 * Defines a simple hash function class
4 */
5
6
7 /**
8 * Copyright (C) 2018-present MongoDB, Inc.
9 *
10 * This program is free software: you can redistribute it and/or modify
11 * it under the terms of the Server Side Public License, version 1,
12 * as published by MongoDB, Inc.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * Server Side Public License for more details.
18 *
19 * You should have received a copy of the Server Side Public License
20 * along with this program. If not, see
21 * <http://www.mongodb.com/licensing/server-side-public-license>.
22 *
23 * As a special exception, the copyright holders give permission to link the
24 * code of portions of this program with the OpenSSL library under certain
25 * conditions as described in each individual source file and distribute
26 * linked combinations including the program with the OpenSSL library. You
27 * must comply with the Server Side Public License in all respects for
28 * all of the code used other than as permitted herein. If you modify file(s)
29 * with this exception, you may extend this exception to your version of the
30 * file(s), but you are not obligated to do so. If you do not wish to do so,
31 * delete this exception statement from your version. If you delete this
32 * exception statement from all source files in the program, then also delete
33 * it in the license file.
34 */
35
36 #include "mongo/db/hasher.h"
37
38
39 #include "mongo/db/jsobj.h"
40 #include "mongo/util/md5.hpp"
41 #include "mongo/util/startup_test.h"
42
43 namespace mongo {
44
45 using std::unique_ptr;
46
47 namespace {
48
49 typedef unsigned char HashDigest[16];
50
51 class Hasher {
52 MONGO_DISALLOW_COPYING(Hasher);
53
54 public:
55 explicit Hasher(HashSeed seed);
~Hasher()56 ~Hasher(){};
57
58 // pointer to next part of input key, length in bytes to read
59 void addData(const void* keyData, size_t numBytes);
60
61 // finish computing the hash, put the result in the digest
62 // only call this once per Hasher
63 void finish(HashDigest out);
64
65 private:
66 md5_state_t _md5State;
67 HashSeed _seed;
68 };
69
Hasher(HashSeed seed)70 Hasher::Hasher(HashSeed seed) : _seed(seed) {
71 md5_init(&_md5State);
72 md5_append(&_md5State, reinterpret_cast<const md5_byte_t*>(&_seed), sizeof(_seed));
73 }
74
addData(const void * keyData,size_t numBytes)75 void Hasher::addData(const void* keyData, size_t numBytes) {
76 md5_append(&_md5State, static_cast<const md5_byte_t*>(keyData), numBytes);
77 }
78
finish(HashDigest out)79 void Hasher::finish(HashDigest out) {
80 md5_finish(&_md5State, out);
81 }
82
recursiveHash(Hasher * h,const BSONElement & e,bool includeFieldName)83 void recursiveHash(Hasher* h, const BSONElement& e, bool includeFieldName) {
84 int canonicalType = endian::nativeToLittle(e.canonicalType());
85 h->addData(&canonicalType, sizeof(canonicalType));
86
87 if (includeFieldName) {
88 h->addData(e.fieldName(), e.fieldNameSize());
89 }
90
91 if (!e.mayEncapsulate()) {
92 // if there are no embedded objects (subobjects or arrays),
93 // compute the hash, squashing numeric types to 64-bit ints
94 if (e.isNumber()) {
95 // Use safeNumberLong, it is well-defined for troublesome doubles.
96 const auto i = endian::nativeToLittle(e.safeNumberLong());
97 h->addData(&i, sizeof(i));
98 } else {
99 h->addData(e.value(), e.valuesize());
100 }
101 } else {
102 // else identify the subobject.
103 // hash any preceding stuff (in the case of codeWscope)
104 // then each sub-element
105 // then finish with the EOO element.
106 BSONObj b;
107 if (e.type() == CodeWScope) {
108 h->addData(e.codeWScopeCode(), e.codeWScopeCodeLen());
109 b = e.codeWScopeObject();
110 } else {
111 b = e.embeddedObject();
112 }
113 BSONObjIterator i(b);
114 while (i.moreWithEOO()) {
115 BSONElement el = i.next();
116 recursiveHash(h, el, true);
117 }
118 }
119 }
120
121 struct HasherUnitTest : public StartupTest {
runmongo::__anon16db7bf90111::HasherUnitTest122 void run() {
123 // Hard-coded check to ensure the hash function is consistent across platforms
124 BSONObj o = BSON("check" << 42);
125 verify(BSONElementHasher::hash64(o.firstElement(), 0) == -944302157085130861LL);
126 }
127 } hasherUnitTest;
128
129 } // namespace
130
hash64(const BSONElement & e,HashSeed seed)131 long long int BSONElementHasher::hash64(const BSONElement& e, HashSeed seed) {
132 Hasher h(seed);
133 recursiveHash(&h, e, false);
134 HashDigest d;
135 h.finish(d);
136 // HashDigest is actually 16 bytes, but we just read 8 bytes
137 ConstDataView digestView(reinterpret_cast<const char*>(d));
138 return digestView.read<LittleEndian<long long int>>();
139 }
140
141 } // namespace mongo
142