1 /* hasher.cpp
2  *
3  * Defines a simple hash function class
4  */
5 
6 
7 /**
8  *    Copyright (C) 2018-present MongoDB, Inc.
9  *
10  *    This program is free software: you can redistribute it and/or modify
11  *    it under the terms of the Server Side Public License, version 1,
12  *    as published by MongoDB, Inc.
13  *
14  *    This program is distributed in the hope that it will be useful,
15  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
16  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  *    Server Side Public License for more details.
18  *
19  *    You should have received a copy of the Server Side Public License
20  *    along with this program. If not, see
21  *    <http://www.mongodb.com/licensing/server-side-public-license>.
22  *
23  *    As a special exception, the copyright holders give permission to link the
24  *    code of portions of this program with the OpenSSL library under certain
25  *    conditions as described in each individual source file and distribute
26  *    linked combinations including the program with the OpenSSL library. You
27  *    must comply with the Server Side Public License in all respects for
28  *    all of the code used other than as permitted herein. If you modify file(s)
29  *    with this exception, you may extend this exception to your version of the
30  *    file(s), but you are not obligated to do so. If you do not wish to do so,
31  *    delete this exception statement from your version. If you delete this
32  *    exception statement from all source files in the program, then also delete
33  *    it in the license file.
34  */
35 
36 #include "mongo/db/hasher.h"
37 
38 
39 #include "mongo/db/jsobj.h"
40 #include "mongo/util/md5.hpp"
41 #include "mongo/util/startup_test.h"
42 
43 namespace mongo {
44 
45 using std::unique_ptr;
46 
47 namespace {
48 
49 typedef unsigned char HashDigest[16];
50 
51 class Hasher {
52     MONGO_DISALLOW_COPYING(Hasher);
53 
54 public:
55     explicit Hasher(HashSeed seed);
~Hasher()56     ~Hasher(){};
57 
58     // pointer to next part of input key, length in bytes to read
59     void addData(const void* keyData, size_t numBytes);
60 
61     // finish computing the hash, put the result in the digest
62     // only call this once per Hasher
63     void finish(HashDigest out);
64 
65 private:
66     md5_state_t _md5State;
67     HashSeed _seed;
68 };
69 
Hasher(HashSeed seed)70 Hasher::Hasher(HashSeed seed) : _seed(seed) {
71     md5_init(&_md5State);
72     md5_append(&_md5State, reinterpret_cast<const md5_byte_t*>(&_seed), sizeof(_seed));
73 }
74 
addData(const void * keyData,size_t numBytes)75 void Hasher::addData(const void* keyData, size_t numBytes) {
76     md5_append(&_md5State, static_cast<const md5_byte_t*>(keyData), numBytes);
77 }
78 
finish(HashDigest out)79 void Hasher::finish(HashDigest out) {
80     md5_finish(&_md5State, out);
81 }
82 
recursiveHash(Hasher * h,const BSONElement & e,bool includeFieldName)83 void recursiveHash(Hasher* h, const BSONElement& e, bool includeFieldName) {
84     int canonicalType = endian::nativeToLittle(e.canonicalType());
85     h->addData(&canonicalType, sizeof(canonicalType));
86 
87     if (includeFieldName) {
88         h->addData(e.fieldName(), e.fieldNameSize());
89     }
90 
91     if (!e.mayEncapsulate()) {
92         // if there are no embedded objects (subobjects or arrays),
93         // compute the hash, squashing numeric types to 64-bit ints
94         if (e.isNumber()) {
95             // Use safeNumberLong, it is well-defined for troublesome doubles.
96             const auto i = endian::nativeToLittle(e.safeNumberLong());
97             h->addData(&i, sizeof(i));
98         } else {
99             h->addData(e.value(), e.valuesize());
100         }
101     } else {
102         // else identify the subobject.
103         // hash any preceding stuff (in the case of codeWscope)
104         // then each sub-element
105         // then finish with the EOO element.
106         BSONObj b;
107         if (e.type() == CodeWScope) {
108             h->addData(e.codeWScopeCode(), e.codeWScopeCodeLen());
109             b = e.codeWScopeObject();
110         } else {
111             b = e.embeddedObject();
112         }
113         BSONObjIterator i(b);
114         while (i.moreWithEOO()) {
115             BSONElement el = i.next();
116             recursiveHash(h, el, true);
117         }
118     }
119 }
120 
121 struct HasherUnitTest : public StartupTest {
runmongo::__anon16db7bf90111::HasherUnitTest122     void run() {
123         // Hard-coded check to ensure the hash function is consistent across platforms
124         BSONObj o = BSON("check" << 42);
125         verify(BSONElementHasher::hash64(o.firstElement(), 0) == -944302157085130861LL);
126     }
127 } hasherUnitTest;
128 
129 }  // namespace
130 
hash64(const BSONElement & e,HashSeed seed)131 long long int BSONElementHasher::hash64(const BSONElement& e, HashSeed seed) {
132     Hasher h(seed);
133     recursiveHash(&h, e, false);
134     HashDigest d;
135     h.finish(d);
136     // HashDigest is actually 16 bytes, but we just read 8 bytes
137     ConstDataView digestView(reinterpret_cast<const char*>(d));
138     return digestView.read<LittleEndian<long long int>>();
139 }
140 
141 }  // namespace mongo
142