1 /*
2  *  Copyright (c) 2016-2018 Positive Technologies, https://www.ptsecurity.com,
3  *  Fast Positive Hash.
4  *
5  *  Portions Copyright (c) 2010-2018 Leonid Yuriev <leo@yuriev.ru>,
6  *  The 1Hippeus project (t1h).
7  *
8  *  This software is provided 'as-is', without any express or implied
9  *  warranty. In no event will the authors be held liable for any damages
10  *  arising from the use of this software.
11  *
12  *  Permission is granted to anyone to use this software for any purpose,
13  *  including commercial applications, and to alter it and redistribute it
14  *  freely, subject to the following restrictions:
15  *
16  *  1. The origin of this software must not be misrepresented; you must not
17  *     claim that you wrote the original software. If you use this software
18  *     in a product, an acknowledgement in the product documentation would be
19  *     appreciated but is not required.
20  *  2. Altered source versions must be plainly marked as such, and must not be
21  *     misrepresented as being the original software.
22  *  3. This notice may not be removed or altered from any source distribution.
23  */
24 
25 /*
26  * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" }
27  * by [Positive Technologies](https://www.ptsecurity.ru)
28  *
29  * Briefly, it is a 64-bit Hash Function:
30  *  1. Created for 64-bit little-endian platforms, in predominantly for x86_64,
31  *     but portable and without penalties it can run on any 64-bit CPU.
32  *  2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash
33  *     and all others portable hash-functions (which do not use specific
34  *     hardware tricks).
35  *  3. Not suitable for cryptography.
36  *
37  * The Future will Positive. Всё будет хорошо.
38  *
39  * ACKNOWLEDGEMENT:
40  * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев)
41  * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta!
42  */
43 
44 #include "config.h"
45 #include "t1ha_bits.h"
46 
init_ab(t1ha_state256_t * s,uint64_t x,uint64_t y)47 static __always_inline void init_ab(t1ha_state256_t *s, uint64_t x,
48                                     uint64_t y) {
49   s->n.a = x;
50   s->n.b = y;
51 }
52 
init_cd(t1ha_state256_t * s,uint64_t x,uint64_t y)53 static __always_inline void init_cd(t1ha_state256_t *s, uint64_t x,
54                                     uint64_t y) {
55   s->n.c = rot64(y, 23) + ~x;
56   s->n.d = ~y + rot64(x, 19);
57 }
58 
59 /* TODO: C++ template in the next version */
60 #define T1HA2_UPDATE(ENDIANNES, ALIGNESS, state, v)                            \
61   do {                                                                         \
62     t1ha_state256_t *const s = state;                                          \
63     const uint64_t w0 = fetch64_##ENDIANNES##_##ALIGNESS(v + 0);               \
64     const uint64_t w1 = fetch64_##ENDIANNES##_##ALIGNESS(v + 1);               \
65     const uint64_t w2 = fetch64_##ENDIANNES##_##ALIGNESS(v + 2);               \
66     const uint64_t w3 = fetch64_##ENDIANNES##_##ALIGNESS(v + 3);               \
67                                                                                \
68     const uint64_t d02 = w0 + rot64(w2 + s->n.d, 56);                          \
69     const uint64_t c13 = w1 + rot64(w3 + s->n.c, 19);                          \
70     s->n.c ^= s->n.a + rot64(w0, 57);                                          \
71     s->n.d ^= s->n.b + rot64(w1, 38);                                          \
72     s->n.b ^= prime_6 * (c13 + w2);                                            \
73     s->n.a ^= prime_5 * (d02 + w3);                                            \
74   } while (0)
75 
squash(t1ha_state256_t * s)76 static __always_inline void squash(t1ha_state256_t *s) {
77   s->n.a ^= prime_6 * (s->n.c + rot64(s->n.d, 23));
78   s->n.b ^= prime_5 * (rot64(s->n.c, 19) + s->n.d);
79 }
80 
81 /* TODO: C++ template in the next version */
82 #define T1HA2_LOOP(ENDIANNES, ALIGNESS, state, data, len)                      \
83   do {                                                                         \
84     const void *detent = (const uint8_t *)data + len - 31;                     \
85     do {                                                                       \
86       const uint64_t *v = (const uint64_t *)data;                              \
87       data = (const uint64_t *)data + 4;                                       \
88       prefetch(data);                                                          \
89       T1HA2_UPDATE(le, ALIGNESS, state, v);                                    \
90     } while (likely(data < detent));                                           \
91   } while (0)
92 
93 /* TODO: C++ template in the next version */
94 #define T1HA2_TAIL_AB(ENDIANNES, ALIGNESS, state, data, len)                   \
95   do {                                                                         \
96     t1ha_state256_t *const s = state;                                          \
97     const uint64_t *v = (const uint64_t *)data;                                \
98     switch (len) {                                                             \
99     default:                                                                   \
100       mixup64(&s->n.a, &s->n.b, fetch64_##ENDIANNES##_##ALIGNESS(v++),         \
101               prime_4);                                                        \
102     /* fall through */                                                         \
103     case 24:                                                                   \
104     case 23:                                                                   \
105     case 22:                                                                   \
106     case 21:                                                                   \
107     case 20:                                                                   \
108     case 19:                                                                   \
109     case 18:                                                                   \
110     case 17:                                                                   \
111       mixup64(&s->n.b, &s->n.a, fetch64_##ENDIANNES##_##ALIGNESS(v++),         \
112               prime_3);                                                        \
113     /* fall through */                                                         \
114     case 16:                                                                   \
115     case 15:                                                                   \
116     case 14:                                                                   \
117     case 13:                                                                   \
118     case 12:                                                                   \
119     case 11:                                                                   \
120     case 10:                                                                   \
121     case 9:                                                                    \
122       mixup64(&s->n.a, &s->n.b, fetch64_##ENDIANNES##_##ALIGNESS(v++),         \
123               prime_2);                                                        \
124     /* fall through */                                                         \
125     case 8:                                                                    \
126     case 7:                                                                    \
127     case 6:                                                                    \
128     case 5:                                                                    \
129     case 4:                                                                    \
130     case 3:                                                                    \
131     case 2:                                                                    \
132     case 1:                                                                    \
133       mixup64(&s->n.b, &s->n.a, tail64_##ENDIANNES##_##ALIGNESS(v, len),       \
134               prime_1);                                                        \
135     /* fall through */                                                         \
136     case 0:                                                                    \
137       return final64(s->n.a, s->n.b);                                          \
138     }                                                                          \
139   } while (0)
140 
141 /* TODO: C++ template in the next version */
142 #define T1HA2_TAIL_ABCD(ENDIANNES, ALIGNESS, state, data, len)                 \
143   do {                                                                         \
144     t1ha_state256_t *const s = state;                                          \
145     const uint64_t *v = (const uint64_t *)data;                                \
146     switch (len) {                                                             \
147     default:                                                                   \
148       mixup64(&s->n.a, &s->n.d, fetch64_##ENDIANNES##_##ALIGNESS(v++),         \
149               prime_4);                                                        \
150     /* fall through */                                                         \
151     case 24:                                                                   \
152     case 23:                                                                   \
153     case 22:                                                                   \
154     case 21:                                                                   \
155     case 20:                                                                   \
156     case 19:                                                                   \
157     case 18:                                                                   \
158     case 17:                                                                   \
159       mixup64(&s->n.b, &s->n.a, fetch64_##ENDIANNES##_##ALIGNESS(v++),         \
160               prime_3);                                                        \
161     /* fall through */                                                         \
162     case 16:                                                                   \
163     case 15:                                                                   \
164     case 14:                                                                   \
165     case 13:                                                                   \
166     case 12:                                                                   \
167     case 11:                                                                   \
168     case 10:                                                                   \
169     case 9:                                                                    \
170       mixup64(&s->n.c, &s->n.b, fetch64_##ENDIANNES##_##ALIGNESS(v++),         \
171               prime_2);                                                        \
172     /* fall through */                                                         \
173     case 8:                                                                    \
174     case 7:                                                                    \
175     case 6:                                                                    \
176     case 5:                                                                    \
177     case 4:                                                                    \
178     case 3:                                                                    \
179     case 2:                                                                    \
180     case 1:                                                                    \
181       mixup64(&s->n.d, &s->n.c, tail64_##ENDIANNES##_##ALIGNESS(v, len),       \
182               prime_1);                                                        \
183     /* fall through */                                                         \
184     case 0:                                                                    \
185       return final128(s->n.a, s->n.b, s->n.c, s->n.d, extra_result);           \
186     }                                                                          \
187   } while (0)
188 
final128(uint64_t a,uint64_t b,uint64_t c,uint64_t d,uint64_t * h)189 static __always_inline uint64_t final128(uint64_t a, uint64_t b, uint64_t c,
190                                          uint64_t d, uint64_t *h) {
191   mixup64(&a, &b, rot64(c, 41) ^ d, prime_0);
192   mixup64(&b, &c, rot64(d, 23) ^ a, prime_6);
193   mixup64(&c, &d, rot64(a, 19) ^ b, prime_5);
194   mixup64(&d, &a, rot64(b, 31) ^ c, prime_4);
195   *h = c + d;
196   return a ^ b;
197 }
198 
199 //------------------------------------------------------------------------------
200 
t1ha2_atonce(const void * data,size_t length,uint64_t seed)201 uint64_t t1ha2_atonce(const void *data, size_t length, uint64_t seed) {
202   t1ha_state256_t state;
203   init_ab(&state, seed, length);
204 
205 #if T1HA_CONFIG_UNALIGNED_ACCESS == T1HA_CONFIG_UNALIGNED_ACCESS__EFFICIENT
206   if (unlikely(length > 32)) {
207     init_cd(&state, seed, length);
208     T1HA2_LOOP(le, unaligned, &state, data, length);
209     squash(&state);
210     length &= 31;
211   }
212   T1HA2_TAIL_AB(le, unaligned, &state, data, length);
213 #else
214   const bool misaligned = (((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0;
215   if (misaligned) {
216     if (unlikely(length > 32)) {
217       init_cd(&state, seed, length);
218       T1HA2_LOOP(le, unaligned, &state, data, length);
219       squash(&state);
220       length &= 31;
221     }
222     T1HA2_TAIL_AB(le, unaligned, &state, data, length);
223   } else {
224     if (unlikely(length > 32)) {
225       init_cd(&state, seed, length);
226       T1HA2_LOOP(le, aligned, &state, data, length);
227       squash(&state);
228       length &= 31;
229     }
230     T1HA2_TAIL_AB(le, aligned, &state, data, length);
231   }
232 #endif
233 }
234 
t1ha2_atonce128(uint64_t * __restrict extra_result,const void * __restrict data,size_t length,uint64_t seed)235 uint64_t t1ha2_atonce128(uint64_t *__restrict extra_result,
236                          const void *__restrict data, size_t length,
237                          uint64_t seed) {
238   t1ha_state256_t state;
239   init_ab(&state, seed, length);
240   init_cd(&state, seed, length);
241 
242 #if T1HA_CONFIG_UNALIGNED_ACCESS == T1HA_CONFIG_UNALIGNED_ACCESS__EFFICIENT
243   if (unlikely(length > 32)) {
244     T1HA2_LOOP(le, unaligned, &state, data, length);
245     length &= 31;
246   }
247   T1HA2_TAIL_ABCD(le, unaligned, &state, data, length);
248 #else
249   const bool misaligned = (((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0;
250   if (misaligned) {
251     if (unlikely(length > 32)) {
252       T1HA2_LOOP(le, unaligned, &state, data, length);
253       length &= 31;
254     }
255     T1HA2_TAIL_ABCD(le, unaligned, &state, data, length);
256   } else {
257     if (unlikely(length > 32)) {
258       T1HA2_LOOP(le, aligned, &state, data, length);
259       length &= 31;
260     }
261     T1HA2_TAIL_ABCD(le, aligned, &state, data, length);
262   }
263 #endif
264 }
265 
266 //------------------------------------------------------------------------------
267 
t1ha2_init(t1ha_context_t * ctx,uint64_t seed_x,uint64_t seed_y)268 void t1ha2_init(t1ha_context_t *ctx, uint64_t seed_x, uint64_t seed_y) {
269   init_ab(&ctx->state, seed_x, seed_y);
270   init_cd(&ctx->state, seed_x, seed_y);
271   ctx->partial = 0;
272   ctx->total = 0;
273 }
274 
t1ha2_update(t1ha_context_t * __restrict ctx,const void * __restrict data,size_t length)275 void t1ha2_update(t1ha_context_t *__restrict ctx, const void *__restrict data,
276                   size_t length) {
277   ctx->total += length;
278 
279   if (ctx->partial) {
280     const size_t left = 32 - ctx->partial;
281     const size_t chunk = (length >= left) ? left : length;
282     memcpy(ctx->buffer.bytes + ctx->partial, data, chunk);
283     ctx->partial += chunk;
284     if (ctx->partial < 32) {
285       assert(left >= length);
286       return;
287     }
288     ctx->partial = 0;
289     data = (const uint8_t *)data + chunk;
290     length -= chunk;
291     T1HA2_UPDATE(le, aligned, &ctx->state, ctx->buffer.u64);
292   }
293 
294   if (length >= 32) {
295 #if T1HA_CONFIG_UNALIGNED_ACCESS == T1HA_CONFIG_UNALIGNED_ACCESS__EFFICIENT
296     T1HA2_LOOP(le, unaligned, &ctx->state, data, length);
297 #else
298     const bool misaligned = (((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0;
299     if (misaligned) {
300       T1HA2_LOOP(le, unaligned, &ctx->state, data, length);
301     } else {
302       T1HA2_LOOP(le, aligned, &ctx->state, data, length);
303     }
304 #endif
305     length &= 31;
306   }
307 
308   if (length)
309     memcpy(ctx->buffer.bytes, data, ctx->partial = length);
310 }
311 
t1ha2_final(t1ha_context_t * __restrict ctx,uint64_t * __restrict extra_result)312 uint64_t t1ha2_final(t1ha_context_t *__restrict ctx,
313                      uint64_t *__restrict extra_result) {
314   uint64_t bits = (ctx->total << 3) ^ (UINT64_C(1) << 63);
315 #if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
316   bits = bswap64(bits);
317 #endif
318   t1ha2_update(ctx, &bits, 8);
319 
320   if (likely(!extra_result)) {
321     squash(&ctx->state);
322     T1HA2_TAIL_AB(le, aligned, &ctx->state, ctx->buffer.u64, ctx->partial);
323   }
324 
325   T1HA2_TAIL_ABCD(le, aligned, &ctx->state, ctx->buffer.u64, ctx->partial);
326 }
327