1 /*
2 * Copyright (c) 2016-2018 Positive Technologies, https://www.ptsecurity.com,
3 * Fast Positive Hash.
4 *
5 * Portions Copyright (c) 2010-2018 Leonid Yuriev <leo@yuriev.ru>,
6 * The 1Hippeus project (t1h).
7 *
8 * This software is provided 'as-is', without any express or implied
9 * warranty. In no event will the authors be held liable for any damages
10 * arising from the use of this software.
11 *
12 * Permission is granted to anyone to use this software for any purpose,
13 * including commercial applications, and to alter it and redistribute it
14 * freely, subject to the following restrictions:
15 *
16 * 1. The origin of this software must not be misrepresented; you must not
17 * claim that you wrote the original software. If you use this software
18 * in a product, an acknowledgement in the product documentation would be
19 * appreciated but is not required.
20 * 2. Altered source versions must be plainly marked as such, and must not be
21 * misrepresented as being the original software.
22 * 3. This notice may not be removed or altered from any source distribution.
23 */
24
25 /*
26 * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" }
27 * by [Positive Technologies](https://www.ptsecurity.ru)
28 *
29 * Briefly, it is a 64-bit Hash Function:
30 * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64,
31 * but portable and without penalties it can run on any 64-bit CPU.
32 * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash
33 * and all others portable hash-functions (which do not use specific
34 * hardware tricks).
35 * 3. Not suitable for cryptography.
36 *
37 * The Future will Positive. Всё будет хорошо.
38 *
39 * ACKNOWLEDGEMENT:
40 * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев)
41 * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta!
42 */
43
44 #include "config.h"
45 #include "t1ha_bits.h"
46
init_ab(t1ha_state256_t * s,uint64_t x,uint64_t y)47 static __always_inline void init_ab(t1ha_state256_t *s, uint64_t x,
48 uint64_t y) {
49 s->n.a = x;
50 s->n.b = y;
51 }
52
init_cd(t1ha_state256_t * s,uint64_t x,uint64_t y)53 static __always_inline void init_cd(t1ha_state256_t *s, uint64_t x,
54 uint64_t y) {
55 s->n.c = rot64(y, 23) + ~x;
56 s->n.d = ~y + rot64(x, 19);
57 }
58
59 /* TODO: C++ template in the next version */
60 #define T1HA2_UPDATE(ENDIANNES, ALIGNESS, state, v) \
61 do { \
62 t1ha_state256_t *const s = state; \
63 const uint64_t w0 = fetch64_##ENDIANNES##_##ALIGNESS(v + 0); \
64 const uint64_t w1 = fetch64_##ENDIANNES##_##ALIGNESS(v + 1); \
65 const uint64_t w2 = fetch64_##ENDIANNES##_##ALIGNESS(v + 2); \
66 const uint64_t w3 = fetch64_##ENDIANNES##_##ALIGNESS(v + 3); \
67 \
68 const uint64_t d02 = w0 + rot64(w2 + s->n.d, 56); \
69 const uint64_t c13 = w1 + rot64(w3 + s->n.c, 19); \
70 s->n.c ^= s->n.a + rot64(w0, 57); \
71 s->n.d ^= s->n.b + rot64(w1, 38); \
72 s->n.b ^= prime_6 * (c13 + w2); \
73 s->n.a ^= prime_5 * (d02 + w3); \
74 } while (0)
75
squash(t1ha_state256_t * s)76 static __always_inline void squash(t1ha_state256_t *s) {
77 s->n.a ^= prime_6 * (s->n.c + rot64(s->n.d, 23));
78 s->n.b ^= prime_5 * (rot64(s->n.c, 19) + s->n.d);
79 }
80
81 /* TODO: C++ template in the next version */
82 #define T1HA2_LOOP(ENDIANNES, ALIGNESS, state, data, len) \
83 do { \
84 const void *detent = (const uint8_t *)data + len - 31; \
85 do { \
86 const uint64_t *v = (const uint64_t *)data; \
87 data = (const uint64_t *)data + 4; \
88 prefetch(data); \
89 T1HA2_UPDATE(le, ALIGNESS, state, v); \
90 } while (likely(data < detent)); \
91 } while (0)
92
93 /* TODO: C++ template in the next version */
94 #define T1HA2_TAIL_AB(ENDIANNES, ALIGNESS, state, data, len) \
95 do { \
96 t1ha_state256_t *const s = state; \
97 const uint64_t *v = (const uint64_t *)data; \
98 switch (len) { \
99 default: \
100 mixup64(&s->n.a, &s->n.b, fetch64_##ENDIANNES##_##ALIGNESS(v++), \
101 prime_4); \
102 /* fall through */ \
103 case 24: \
104 case 23: \
105 case 22: \
106 case 21: \
107 case 20: \
108 case 19: \
109 case 18: \
110 case 17: \
111 mixup64(&s->n.b, &s->n.a, fetch64_##ENDIANNES##_##ALIGNESS(v++), \
112 prime_3); \
113 /* fall through */ \
114 case 16: \
115 case 15: \
116 case 14: \
117 case 13: \
118 case 12: \
119 case 11: \
120 case 10: \
121 case 9: \
122 mixup64(&s->n.a, &s->n.b, fetch64_##ENDIANNES##_##ALIGNESS(v++), \
123 prime_2); \
124 /* fall through */ \
125 case 8: \
126 case 7: \
127 case 6: \
128 case 5: \
129 case 4: \
130 case 3: \
131 case 2: \
132 case 1: \
133 mixup64(&s->n.b, &s->n.a, tail64_##ENDIANNES##_##ALIGNESS(v, len), \
134 prime_1); \
135 /* fall through */ \
136 case 0: \
137 return final64(s->n.a, s->n.b); \
138 } \
139 } while (0)
140
141 /* TODO: C++ template in the next version */
142 #define T1HA2_TAIL_ABCD(ENDIANNES, ALIGNESS, state, data, len) \
143 do { \
144 t1ha_state256_t *const s = state; \
145 const uint64_t *v = (const uint64_t *)data; \
146 switch (len) { \
147 default: \
148 mixup64(&s->n.a, &s->n.d, fetch64_##ENDIANNES##_##ALIGNESS(v++), \
149 prime_4); \
150 /* fall through */ \
151 case 24: \
152 case 23: \
153 case 22: \
154 case 21: \
155 case 20: \
156 case 19: \
157 case 18: \
158 case 17: \
159 mixup64(&s->n.b, &s->n.a, fetch64_##ENDIANNES##_##ALIGNESS(v++), \
160 prime_3); \
161 /* fall through */ \
162 case 16: \
163 case 15: \
164 case 14: \
165 case 13: \
166 case 12: \
167 case 11: \
168 case 10: \
169 case 9: \
170 mixup64(&s->n.c, &s->n.b, fetch64_##ENDIANNES##_##ALIGNESS(v++), \
171 prime_2); \
172 /* fall through */ \
173 case 8: \
174 case 7: \
175 case 6: \
176 case 5: \
177 case 4: \
178 case 3: \
179 case 2: \
180 case 1: \
181 mixup64(&s->n.d, &s->n.c, tail64_##ENDIANNES##_##ALIGNESS(v, len), \
182 prime_1); \
183 /* fall through */ \
184 case 0: \
185 return final128(s->n.a, s->n.b, s->n.c, s->n.d, extra_result); \
186 } \
187 } while (0)
188
final128(uint64_t a,uint64_t b,uint64_t c,uint64_t d,uint64_t * h)189 static __always_inline uint64_t final128(uint64_t a, uint64_t b, uint64_t c,
190 uint64_t d, uint64_t *h) {
191 mixup64(&a, &b, rot64(c, 41) ^ d, prime_0);
192 mixup64(&b, &c, rot64(d, 23) ^ a, prime_6);
193 mixup64(&c, &d, rot64(a, 19) ^ b, prime_5);
194 mixup64(&d, &a, rot64(b, 31) ^ c, prime_4);
195 *h = c + d;
196 return a ^ b;
197 }
198
199 //------------------------------------------------------------------------------
200
t1ha2_atonce(const void * data,size_t length,uint64_t seed)201 uint64_t t1ha2_atonce(const void *data, size_t length, uint64_t seed) {
202 t1ha_state256_t state;
203 init_ab(&state, seed, length);
204
205 #if T1HA_CONFIG_UNALIGNED_ACCESS == T1HA_CONFIG_UNALIGNED_ACCESS__EFFICIENT
206 if (unlikely(length > 32)) {
207 init_cd(&state, seed, length);
208 T1HA2_LOOP(le, unaligned, &state, data, length);
209 squash(&state);
210 length &= 31;
211 }
212 T1HA2_TAIL_AB(le, unaligned, &state, data, length);
213 #else
214 const bool misaligned = (((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0;
215 if (misaligned) {
216 if (unlikely(length > 32)) {
217 init_cd(&state, seed, length);
218 T1HA2_LOOP(le, unaligned, &state, data, length);
219 squash(&state);
220 length &= 31;
221 }
222 T1HA2_TAIL_AB(le, unaligned, &state, data, length);
223 } else {
224 if (unlikely(length > 32)) {
225 init_cd(&state, seed, length);
226 T1HA2_LOOP(le, aligned, &state, data, length);
227 squash(&state);
228 length &= 31;
229 }
230 T1HA2_TAIL_AB(le, aligned, &state, data, length);
231 }
232 #endif
233 }
234
t1ha2_atonce128(uint64_t * __restrict extra_result,const void * __restrict data,size_t length,uint64_t seed)235 uint64_t t1ha2_atonce128(uint64_t *__restrict extra_result,
236 const void *__restrict data, size_t length,
237 uint64_t seed) {
238 t1ha_state256_t state;
239 init_ab(&state, seed, length);
240 init_cd(&state, seed, length);
241
242 #if T1HA_CONFIG_UNALIGNED_ACCESS == T1HA_CONFIG_UNALIGNED_ACCESS__EFFICIENT
243 if (unlikely(length > 32)) {
244 T1HA2_LOOP(le, unaligned, &state, data, length);
245 length &= 31;
246 }
247 T1HA2_TAIL_ABCD(le, unaligned, &state, data, length);
248 #else
249 const bool misaligned = (((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0;
250 if (misaligned) {
251 if (unlikely(length > 32)) {
252 T1HA2_LOOP(le, unaligned, &state, data, length);
253 length &= 31;
254 }
255 T1HA2_TAIL_ABCD(le, unaligned, &state, data, length);
256 } else {
257 if (unlikely(length > 32)) {
258 T1HA2_LOOP(le, aligned, &state, data, length);
259 length &= 31;
260 }
261 T1HA2_TAIL_ABCD(le, aligned, &state, data, length);
262 }
263 #endif
264 }
265
266 //------------------------------------------------------------------------------
267
t1ha2_init(t1ha_context_t * ctx,uint64_t seed_x,uint64_t seed_y)268 void t1ha2_init(t1ha_context_t *ctx, uint64_t seed_x, uint64_t seed_y) {
269 init_ab(&ctx->state, seed_x, seed_y);
270 init_cd(&ctx->state, seed_x, seed_y);
271 ctx->partial = 0;
272 ctx->total = 0;
273 }
274
t1ha2_update(t1ha_context_t * __restrict ctx,const void * __restrict data,size_t length)275 void t1ha2_update(t1ha_context_t *__restrict ctx, const void *__restrict data,
276 size_t length) {
277 ctx->total += length;
278
279 if (ctx->partial) {
280 const size_t left = 32 - ctx->partial;
281 const size_t chunk = (length >= left) ? left : length;
282 memcpy(ctx->buffer.bytes + ctx->partial, data, chunk);
283 ctx->partial += chunk;
284 if (ctx->partial < 32) {
285 assert(left >= length);
286 return;
287 }
288 ctx->partial = 0;
289 data = (const uint8_t *)data + chunk;
290 length -= chunk;
291 T1HA2_UPDATE(le, aligned, &ctx->state, ctx->buffer.u64);
292 }
293
294 if (length >= 32) {
295 #if T1HA_CONFIG_UNALIGNED_ACCESS == T1HA_CONFIG_UNALIGNED_ACCESS__EFFICIENT
296 T1HA2_LOOP(le, unaligned, &ctx->state, data, length);
297 #else
298 const bool misaligned = (((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0;
299 if (misaligned) {
300 T1HA2_LOOP(le, unaligned, &ctx->state, data, length);
301 } else {
302 T1HA2_LOOP(le, aligned, &ctx->state, data, length);
303 }
304 #endif
305 length &= 31;
306 }
307
308 if (length)
309 memcpy(ctx->buffer.bytes, data, ctx->partial = length);
310 }
311
t1ha2_final(t1ha_context_t * __restrict ctx,uint64_t * __restrict extra_result)312 uint64_t t1ha2_final(t1ha_context_t *__restrict ctx,
313 uint64_t *__restrict extra_result) {
314 uint64_t bits = (ctx->total << 3) ^ (UINT64_C(1) << 63);
315 #if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
316 bits = bswap64(bits);
317 #endif
318 t1ha2_update(ctx, &bits, 8);
319
320 if (likely(!extra_result)) {
321 squash(&ctx->state);
322 T1HA2_TAIL_AB(le, aligned, &ctx->state, ctx->buffer.u64, ctx->partial);
323 }
324
325 T1HA2_TAIL_ABCD(le, aligned, &ctx->state, ctx->buffer.u64, ctx->partial);
326 }
327