1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 //-----------------------------------------------------------------------------
19 // MurmurHash3 was written by Austin Appleby, and is placed in the public
20 // domain. The author hereby disclaims copyright to this source code.
21
22 // Note - The x86 and x64 versions do _not_ produce the same results, as the
23 // algorithms are optimized for their respective platforms. You can still
24 // compile and run any of them on any platform, but your performance with the
25 // non-native version will be less than optimal.
26
27 #include "parquet/murmur3.h"
28
29 namespace parquet {
30
31 #if defined(_MSC_VER)
32
33 #define FORCE_INLINE __forceinline
34 #define ROTL64(x, y) _rotl64(x, y)
35
36 #else // defined(_MSC_VER)
37
38 #define FORCE_INLINE inline __attribute__((always_inline))
39 inline uint64_t rotl64(uint64_t x, int8_t r) { return (x << r) | (x >> (64 - r)); }
40 #define ROTL64(x, y) rotl64(x, y)
41
42 #endif // !defined(_MSC_VER)
43
44 #define BIG_CONSTANT(x) (x##LLU)
45
46 //-----------------------------------------------------------------------------
47 // Block read - if your platform needs to do endian-swapping or can only
48 // handle aligned reads, do the conversion here
49
getblock32(const uint32_t * p,int i)50 FORCE_INLINE uint32_t getblock32(const uint32_t* p, int i) { return p[i]; }
51
getblock64(const uint64_t * p,int i)52 FORCE_INLINE uint64_t getblock64(const uint64_t* p, int i) { return p[i]; }
53
54 //-----------------------------------------------------------------------------
55 // Finalization mix - force all bits of a hash block to avalanche
56
fmix32(uint32_t h)57 FORCE_INLINE uint32_t fmix32(uint32_t h) {
58 h ^= h >> 16;
59 h *= 0x85ebca6b;
60 h ^= h >> 13;
61 h *= 0xc2b2ae35;
62 h ^= h >> 16;
63
64 return h;
65 }
66
67 //----------
68
fmix64(uint64_t k)69 FORCE_INLINE uint64_t fmix64(uint64_t k) {
70 k ^= k >> 33;
71 k *= BIG_CONSTANT(0xff51afd7ed558ccd);
72 k ^= k >> 33;
73 k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
74 k ^= k >> 33;
75
76 return k;
77 }
78
79 //-----------------------------------------------------------------------------
80
Hash_x64_128(const void * key,const int len,const uint32_t seed,uint64_t out[2])81 void Hash_x64_128(const void* key, const int len, const uint32_t seed, uint64_t out[2]) {
82 const uint8_t* data = (const uint8_t*)key;
83 const int nblocks = len / 16;
84
85 uint64_t h1 = seed;
86 uint64_t h2 = seed;
87
88 const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
89 const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
90
91 //----------
92 // body
93
94 const uint64_t* blocks = (const uint64_t*)(data);
95
96 for (int i = 0; i < nblocks; i++) {
97 uint64_t k1 = getblock64(blocks, i * 2 + 0);
98 uint64_t k2 = getblock64(blocks, i * 2 + 1);
99
100 k1 *= c1;
101 k1 = ROTL64(k1, 31);
102 k1 *= c2;
103 h1 ^= k1;
104
105 h1 = ROTL64(h1, 27);
106 h1 += h2;
107 h1 = h1 * 5 + 0x52dce729;
108
109 k2 *= c2;
110 k2 = ROTL64(k2, 33);
111 k2 *= c1;
112 h2 ^= k2;
113
114 h2 = ROTL64(h2, 31);
115 h2 += h1;
116 h2 = h2 * 5 + 0x38495ab5;
117 }
118
119 //----------
120 // tail
121
122 const uint8_t* tail = (const uint8_t*)(data + nblocks * 16);
123
124 uint64_t k1 = 0;
125 uint64_t k2 = 0;
126
127 switch (len & 15) {
128 case 15:
129 k2 ^= ((uint64_t)tail[14]) << 48; // fall through
130 case 14:
131 k2 ^= ((uint64_t)tail[13]) << 40; // fall through
132 case 13:
133 k2 ^= ((uint64_t)tail[12]) << 32; // fall through
134 case 12:
135 k2 ^= ((uint64_t)tail[11]) << 24; // fall through
136 case 11:
137 k2 ^= ((uint64_t)tail[10]) << 16; // fall through
138 case 10:
139 k2 ^= ((uint64_t)tail[9]) << 8; // fall through
140 case 9:
141 k2 ^= ((uint64_t)tail[8]) << 0;
142 k2 *= c2;
143 k2 = ROTL64(k2, 33);
144 k2 *= c1;
145 h2 ^= k2; // fall through
146
147 case 8:
148 k1 ^= ((uint64_t)tail[7]) << 56; // fall through
149 case 7:
150 k1 ^= ((uint64_t)tail[6]) << 48; // fall through
151 case 6:
152 k1 ^= ((uint64_t)tail[5]) << 40; // fall through
153 case 5:
154 k1 ^= ((uint64_t)tail[4]) << 32; // fall through
155 case 4:
156 k1 ^= ((uint64_t)tail[3]) << 24; // fall through
157 case 3:
158 k1 ^= ((uint64_t)tail[2]) << 16; // fall through
159 case 2:
160 k1 ^= ((uint64_t)tail[1]) << 8; // fall through
161 case 1:
162 k1 ^= ((uint64_t)tail[0]) << 0;
163 k1 *= c1;
164 k1 = ROTL64(k1, 31);
165 k1 *= c2;
166 h1 ^= k1;
167 }
168
169 //----------
170 // finalization
171
172 h1 ^= len;
173 h2 ^= len;
174
175 h1 += h2;
176 h2 += h1;
177
178 h1 = fmix64(h1);
179 h2 = fmix64(h2);
180
181 h1 += h2;
182 h2 += h1;
183
184 reinterpret_cast<uint64_t*>(out)[0] = h1;
185 reinterpret_cast<uint64_t*>(out)[1] = h2;
186 }
187
188 template <typename T>
HashHelper(T value,uint32_t seed)189 uint64_t HashHelper(T value, uint32_t seed) {
190 uint64_t output[2];
191 Hash_x64_128(reinterpret_cast<void*>(&value), sizeof(T), seed, output);
192 return output[0];
193 }
194
Hash(int32_t value) const195 uint64_t MurmurHash3::Hash(int32_t value) const { return HashHelper(value, seed_); }
196
Hash(int64_t value) const197 uint64_t MurmurHash3::Hash(int64_t value) const { return HashHelper(value, seed_); }
198
Hash(float value) const199 uint64_t MurmurHash3::Hash(float value) const { return HashHelper(value, seed_); }
200
Hash(double value) const201 uint64_t MurmurHash3::Hash(double value) const { return HashHelper(value, seed_); }
202
Hash(const FLBA * value,uint32_t len) const203 uint64_t MurmurHash3::Hash(const FLBA* value, uint32_t len) const {
204 uint64_t out[2];
205 Hash_x64_128(reinterpret_cast<const void*>(value->ptr), len, seed_, out);
206 return out[0];
207 }
208
Hash(const Int96 * value) const209 uint64_t MurmurHash3::Hash(const Int96* value) const {
210 uint64_t out[2];
211 Hash_x64_128(reinterpret_cast<const void*>(value->value), sizeof(value->value), seed_,
212 out);
213 return out[0];
214 }
215
Hash(const ByteArray * value) const216 uint64_t MurmurHash3::Hash(const ByteArray* value) const {
217 uint64_t out[2];
218 Hash_x64_128(reinterpret_cast<const void*>(value->ptr), value->len, seed_, out);
219 return out[0];
220 }
221
222 } // namespace parquet
223