1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements.  See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership.  The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License.  You may obtain a copy of the License at
8 //
9 //   http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied.  See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 //-----------------------------------------------------------------------------
19 // MurmurHash3 was written by Austin Appleby, and is placed in the public
20 // domain. The author hereby disclaims copyright to this source code.
21 
22 // Note - The x86 and x64 versions do _not_ produce the same results, as the
23 // algorithms are optimized for their respective platforms. You can still
24 // compile and run any of them on any platform, but your performance with the
25 // non-native version will be less than optimal.
26 
27 #include "parquet/murmur3.h"
28 
29 namespace parquet {
30 
31 #if defined(_MSC_VER)
32 
33 #define FORCE_INLINE __forceinline
34 #define ROTL64(x, y) _rotl64(x, y)
35 
36 #else  // defined(_MSC_VER)
37 
38 #define FORCE_INLINE inline __attribute__((always_inline))
39 inline uint64_t rotl64(uint64_t x, int8_t r) { return (x << r) | (x >> (64 - r)); }
40 #define ROTL64(x, y) rotl64(x, y)
41 
42 #endif  // !defined(_MSC_VER)
43 
44 #define BIG_CONSTANT(x) (x##LLU)
45 
46 //-----------------------------------------------------------------------------
47 // Block read - if your platform needs to do endian-swapping or can only
48 // handle aligned reads, do the conversion here
49 
getblock32(const uint32_t * p,int i)50 FORCE_INLINE uint32_t getblock32(const uint32_t* p, int i) { return p[i]; }
51 
getblock64(const uint64_t * p,int i)52 FORCE_INLINE uint64_t getblock64(const uint64_t* p, int i) { return p[i]; }
53 
54 //-----------------------------------------------------------------------------
55 // Finalization mix - force all bits of a hash block to avalanche
56 
fmix32(uint32_t h)57 FORCE_INLINE uint32_t fmix32(uint32_t h) {
58   h ^= h >> 16;
59   h *= 0x85ebca6b;
60   h ^= h >> 13;
61   h *= 0xc2b2ae35;
62   h ^= h >> 16;
63 
64   return h;
65 }
66 
67 //----------
68 
fmix64(uint64_t k)69 FORCE_INLINE uint64_t fmix64(uint64_t k) {
70   k ^= k >> 33;
71   k *= BIG_CONSTANT(0xff51afd7ed558ccd);
72   k ^= k >> 33;
73   k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
74   k ^= k >> 33;
75 
76   return k;
77 }
78 
79 //-----------------------------------------------------------------------------
80 
Hash_x64_128(const void * key,const int len,const uint32_t seed,uint64_t out[2])81 void Hash_x64_128(const void* key, const int len, const uint32_t seed, uint64_t out[2]) {
82   const uint8_t* data = (const uint8_t*)key;
83   const int nblocks = len / 16;
84 
85   uint64_t h1 = seed;
86   uint64_t h2 = seed;
87 
88   const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
89   const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
90 
91   //----------
92   // body
93 
94   const uint64_t* blocks = (const uint64_t*)(data);
95 
96   for (int i = 0; i < nblocks; i++) {
97     uint64_t k1 = getblock64(blocks, i * 2 + 0);
98     uint64_t k2 = getblock64(blocks, i * 2 + 1);
99 
100     k1 *= c1;
101     k1 = ROTL64(k1, 31);
102     k1 *= c2;
103     h1 ^= k1;
104 
105     h1 = ROTL64(h1, 27);
106     h1 += h2;
107     h1 = h1 * 5 + 0x52dce729;
108 
109     k2 *= c2;
110     k2 = ROTL64(k2, 33);
111     k2 *= c1;
112     h2 ^= k2;
113 
114     h2 = ROTL64(h2, 31);
115     h2 += h1;
116     h2 = h2 * 5 + 0x38495ab5;
117   }
118 
119   //----------
120   // tail
121 
122   const uint8_t* tail = (const uint8_t*)(data + nblocks * 16);
123 
124   uint64_t k1 = 0;
125   uint64_t k2 = 0;
126 
127   switch (len & 15) {
128     case 15:
129       k2 ^= ((uint64_t)tail[14]) << 48;  // fall through
130     case 14:
131       k2 ^= ((uint64_t)tail[13]) << 40;  // fall through
132     case 13:
133       k2 ^= ((uint64_t)tail[12]) << 32;  // fall through
134     case 12:
135       k2 ^= ((uint64_t)tail[11]) << 24;  // fall through
136     case 11:
137       k2 ^= ((uint64_t)tail[10]) << 16;  // fall through
138     case 10:
139       k2 ^= ((uint64_t)tail[9]) << 8;  // fall through
140     case 9:
141       k2 ^= ((uint64_t)tail[8]) << 0;
142       k2 *= c2;
143       k2 = ROTL64(k2, 33);
144       k2 *= c1;
145       h2 ^= k2;  // fall through
146 
147     case 8:
148       k1 ^= ((uint64_t)tail[7]) << 56;  // fall through
149     case 7:
150       k1 ^= ((uint64_t)tail[6]) << 48;  // fall through
151     case 6:
152       k1 ^= ((uint64_t)tail[5]) << 40;  // fall through
153     case 5:
154       k1 ^= ((uint64_t)tail[4]) << 32;  // fall through
155     case 4:
156       k1 ^= ((uint64_t)tail[3]) << 24;  // fall through
157     case 3:
158       k1 ^= ((uint64_t)tail[2]) << 16;  // fall through
159     case 2:
160       k1 ^= ((uint64_t)tail[1]) << 8;  // fall through
161     case 1:
162       k1 ^= ((uint64_t)tail[0]) << 0;
163       k1 *= c1;
164       k1 = ROTL64(k1, 31);
165       k1 *= c2;
166       h1 ^= k1;
167   }
168 
169   //----------
170   // finalization
171 
172   h1 ^= len;
173   h2 ^= len;
174 
175   h1 += h2;
176   h2 += h1;
177 
178   h1 = fmix64(h1);
179   h2 = fmix64(h2);
180 
181   h1 += h2;
182   h2 += h1;
183 
184   reinterpret_cast<uint64_t*>(out)[0] = h1;
185   reinterpret_cast<uint64_t*>(out)[1] = h2;
186 }
187 
188 template <typename T>
HashHelper(T value,uint32_t seed)189 uint64_t HashHelper(T value, uint32_t seed) {
190   uint64_t output[2];
191   Hash_x64_128(reinterpret_cast<void*>(&value), sizeof(T), seed, output);
192   return output[0];
193 }
194 
Hash(int32_t value) const195 uint64_t MurmurHash3::Hash(int32_t value) const { return HashHelper(value, seed_); }
196 
Hash(int64_t value) const197 uint64_t MurmurHash3::Hash(int64_t value) const { return HashHelper(value, seed_); }
198 
Hash(float value) const199 uint64_t MurmurHash3::Hash(float value) const { return HashHelper(value, seed_); }
200 
Hash(double value) const201 uint64_t MurmurHash3::Hash(double value) const { return HashHelper(value, seed_); }
202 
Hash(const FLBA * value,uint32_t len) const203 uint64_t MurmurHash3::Hash(const FLBA* value, uint32_t len) const {
204   uint64_t out[2];
205   Hash_x64_128(reinterpret_cast<const void*>(value->ptr), len, seed_, out);
206   return out[0];
207 }
208 
Hash(const Int96 * value) const209 uint64_t MurmurHash3::Hash(const Int96* value) const {
210   uint64_t out[2];
211   Hash_x64_128(reinterpret_cast<const void*>(value->value), sizeof(value->value), seed_,
212                out);
213   return out[0];
214 }
215 
Hash(const ByteArray * value) const216 uint64_t MurmurHash3::Hash(const ByteArray* value) const {
217   uint64_t out[2];
218   Hash_x64_128(reinterpret_cast<const void*>(value->ptr), value->len, seed_, out);
219   return out[0];
220 }
221 
222 }  // namespace parquet
223