xref: /linux/include/linux/xxhash.h (revision 7f317d34)
15d240522SNick Terrell /*
25d240522SNick Terrell  * xxHash - Extremely Fast Hash algorithm
35d240522SNick Terrell  * Copyright (C) 2012-2016, Yann Collet.
45d240522SNick Terrell  *
55d240522SNick Terrell  * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
65d240522SNick Terrell  *
75d240522SNick Terrell  * Redistribution and use in source and binary forms, with or without
85d240522SNick Terrell  * modification, are permitted provided that the following conditions are
95d240522SNick Terrell  * met:
105d240522SNick Terrell  *
115d240522SNick Terrell  *   * Redistributions of source code must retain the above copyright
125d240522SNick Terrell  *     notice, this list of conditions and the following disclaimer.
135d240522SNick Terrell  *   * Redistributions in binary form must reproduce the above
145d240522SNick Terrell  *     copyright notice, this list of conditions and the following disclaimer
155d240522SNick Terrell  *     in the documentation and/or other materials provided with the
165d240522SNick Terrell  *     distribution.
175d240522SNick Terrell  *
185d240522SNick Terrell  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
195d240522SNick Terrell  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
205d240522SNick Terrell  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
215d240522SNick Terrell  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
225d240522SNick Terrell  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
235d240522SNick Terrell  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
245d240522SNick Terrell  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
255d240522SNick Terrell  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
265d240522SNick Terrell  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
275d240522SNick Terrell  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
285d240522SNick Terrell  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
295d240522SNick Terrell  *
305d240522SNick Terrell  * This program is free software; you can redistribute it and/or modify it under
315d240522SNick Terrell  * the terms of the GNU General Public License version 2 as published by the
325d240522SNick Terrell  * Free Software Foundation. This program is dual-licensed; you may select
335d240522SNick Terrell  * either version 2 of the GNU General Public License ("GPL") or BSD license
345d240522SNick Terrell  * ("BSD").
355d240522SNick Terrell  *
365d240522SNick Terrell  * You can contact the author at:
37*7f317d34SAlexander A. Klimov  * - xxHash homepage: https://cyan4973.github.io/xxHash/
385d240522SNick Terrell  * - xxHash source repository: https://github.com/Cyan4973/xxHash
395d240522SNick Terrell  */
405d240522SNick Terrell 
415d240522SNick Terrell /*
425d240522SNick Terrell  * Notice extracted from xxHash homepage:
435d240522SNick Terrell  *
445d240522SNick Terrell  * xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
455d240522SNick Terrell  * It also successfully passes all tests from the SMHasher suite.
465d240522SNick Terrell  *
475d240522SNick Terrell  * Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2
485d240522SNick Terrell  * Duo @3GHz)
495d240522SNick Terrell  *
505d240522SNick Terrell  * Name            Speed       Q.Score   Author
515d240522SNick Terrell  * xxHash          5.4 GB/s     10
525d240522SNick Terrell  * CrapWow         3.2 GB/s      2       Andrew
535d240522SNick Terrell  * MumurHash 3a    2.7 GB/s     10       Austin Appleby
545d240522SNick Terrell  * SpookyHash      2.0 GB/s     10       Bob Jenkins
555d240522SNick Terrell  * SBox            1.4 GB/s      9       Bret Mulvey
565d240522SNick Terrell  * Lookup3         1.2 GB/s      9       Bob Jenkins
575d240522SNick Terrell  * SuperFastHash   1.2 GB/s      1       Paul Hsieh
585d240522SNick Terrell  * CityHash64      1.05 GB/s    10       Pike & Alakuijala
595d240522SNick Terrell  * FNV             0.55 GB/s     5       Fowler, Noll, Vo
605d240522SNick Terrell  * CRC32           0.43 GB/s     9
615d240522SNick Terrell  * MD5-32          0.33 GB/s    10       Ronald L. Rivest
625d240522SNick Terrell  * SHA1-32         0.28 GB/s    10
635d240522SNick Terrell  *
645d240522SNick Terrell  * Q.Score is a measure of quality of the hash function.
655d240522SNick Terrell  * It depends on successfully passing SMHasher test set.
665d240522SNick Terrell  * 10 is a perfect score.
675d240522SNick Terrell  *
685d240522SNick Terrell  * A 64-bits version, named xxh64 offers much better speed,
695d240522SNick Terrell  * but for 64-bits applications only.
705d240522SNick Terrell  * Name     Speed on 64 bits    Speed on 32 bits
715d240522SNick Terrell  * xxh64       13.8 GB/s            1.9 GB/s
725d240522SNick Terrell  * xxh32        6.8 GB/s            6.0 GB/s
735d240522SNick Terrell  */
745d240522SNick Terrell 
755d240522SNick Terrell #ifndef XXHASH_H
765d240522SNick Terrell #define XXHASH_H
775d240522SNick Terrell 
785d240522SNick Terrell #include <linux/types.h>
795d240522SNick Terrell 
805d240522SNick Terrell /*-****************************
815d240522SNick Terrell  * Simple Hash Functions
825d240522SNick Terrell  *****************************/
835d240522SNick Terrell 
845d240522SNick Terrell /**
855d240522SNick Terrell  * xxh32() - calculate the 32-bit hash of the input with a given seed.
865d240522SNick Terrell  *
875d240522SNick Terrell  * @input:  The data to hash.
885d240522SNick Terrell  * @length: The length of the data to hash.
895d240522SNick Terrell  * @seed:   The seed can be used to alter the result predictably.
905d240522SNick Terrell  *
915d240522SNick Terrell  * Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s
925d240522SNick Terrell  *
935d240522SNick Terrell  * Return:  The 32-bit hash of the data.
945d240522SNick Terrell  */
955d240522SNick Terrell uint32_t xxh32(const void *input, size_t length, uint32_t seed);
965d240522SNick Terrell 
975d240522SNick Terrell /**
985d240522SNick Terrell  * xxh64() - calculate the 64-bit hash of the input with a given seed.
995d240522SNick Terrell  *
1005d240522SNick Terrell  * @input:  The data to hash.
1015d240522SNick Terrell  * @length: The length of the data to hash.
1025d240522SNick Terrell  * @seed:   The seed can be used to alter the result predictably.
1035d240522SNick Terrell  *
1045d240522SNick Terrell  * This function runs 2x faster on 64-bit systems, but slower on 32-bit systems.
1055d240522SNick Terrell  *
1065d240522SNick Terrell  * Return:  The 64-bit hash of the data.
1075d240522SNick Terrell  */
1085d240522SNick Terrell uint64_t xxh64(const void *input, size_t length, uint64_t seed);
1095d240522SNick Terrell 
1100b9df58bSTimofey Titovets /**
1110b9df58bSTimofey Titovets  * xxhash() - calculate wordsize hash of the input with a given seed
1120b9df58bSTimofey Titovets  * @input:  The data to hash.
1130b9df58bSTimofey Titovets  * @length: The length of the data to hash.
1140b9df58bSTimofey Titovets  * @seed:   The seed can be used to alter the result predictably.
1150b9df58bSTimofey Titovets  *
1160b9df58bSTimofey Titovets  * If the hash does not need to be comparable between machines with
1170b9df58bSTimofey Titovets  * different word sizes, this function will call whichever of xxh32()
1180b9df58bSTimofey Titovets  * or xxh64() is faster.
1190b9df58bSTimofey Titovets  *
1200b9df58bSTimofey Titovets  * Return:  wordsize hash of the data.
1210b9df58bSTimofey Titovets  */
1220b9df58bSTimofey Titovets 
xxhash(const void * input,size_t length,uint64_t seed)1230b9df58bSTimofey Titovets static inline unsigned long xxhash(const void *input, size_t length,
1240b9df58bSTimofey Titovets 				   uint64_t seed)
1250b9df58bSTimofey Titovets {
1260b9df58bSTimofey Titovets #if BITS_PER_LONG == 64
1270b9df58bSTimofey Titovets        return xxh64(input, length, seed);
1280b9df58bSTimofey Titovets #else
1290b9df58bSTimofey Titovets        return xxh32(input, length, seed);
1300b9df58bSTimofey Titovets #endif
1310b9df58bSTimofey Titovets }
1320b9df58bSTimofey Titovets 
1335d240522SNick Terrell /*-****************************
1345d240522SNick Terrell  * Streaming Hash Functions
1355d240522SNick Terrell  *****************************/
1365d240522SNick Terrell 
1375d240522SNick Terrell /*
1385d240522SNick Terrell  * These definitions are only meant to allow allocation of XXH state
1395d240522SNick Terrell  * statically, on stack, or in a struct for example.
1405d240522SNick Terrell  * Do not use members directly.
1415d240522SNick Terrell  */
1425d240522SNick Terrell 
1435d240522SNick Terrell /**
1445d240522SNick Terrell  * struct xxh32_state - private xxh32 state, do not use members directly
1455d240522SNick Terrell  */
1465d240522SNick Terrell struct xxh32_state {
1475d240522SNick Terrell 	uint32_t total_len_32;
1485d240522SNick Terrell 	uint32_t large_len;
1495d240522SNick Terrell 	uint32_t v1;
1505d240522SNick Terrell 	uint32_t v2;
1515d240522SNick Terrell 	uint32_t v3;
1525d240522SNick Terrell 	uint32_t v4;
1535d240522SNick Terrell 	uint32_t mem32[4];
1545d240522SNick Terrell 	uint32_t memsize;
1555d240522SNick Terrell };
1565d240522SNick Terrell 
1575d240522SNick Terrell /**
1585d240522SNick Terrell  * struct xxh32_state - private xxh64 state, do not use members directly
1595d240522SNick Terrell  */
1605d240522SNick Terrell struct xxh64_state {
1615d240522SNick Terrell 	uint64_t total_len;
1625d240522SNick Terrell 	uint64_t v1;
1635d240522SNick Terrell 	uint64_t v2;
1645d240522SNick Terrell 	uint64_t v3;
1655d240522SNick Terrell 	uint64_t v4;
1665d240522SNick Terrell 	uint64_t mem64[4];
1675d240522SNick Terrell 	uint32_t memsize;
1685d240522SNick Terrell };
1695d240522SNick Terrell 
1705d240522SNick Terrell /**
1715d240522SNick Terrell  * xxh32_reset() - reset the xxh32 state to start a new hashing operation
1725d240522SNick Terrell  *
1735d240522SNick Terrell  * @state: The xxh32 state to reset.
1745d240522SNick Terrell  * @seed:  Initialize the hash state with this seed.
1755d240522SNick Terrell  *
1765d240522SNick Terrell  * Call this function on any xxh32_state to prepare for a new hashing operation.
1775d240522SNick Terrell  */
1785d240522SNick Terrell void xxh32_reset(struct xxh32_state *state, uint32_t seed);
1795d240522SNick Terrell 
1805d240522SNick Terrell /**
1815d240522SNick Terrell  * xxh32_update() - hash the data given and update the xxh32 state
1825d240522SNick Terrell  *
1835d240522SNick Terrell  * @state:  The xxh32 state to update.
1845d240522SNick Terrell  * @input:  The data to hash.
1855d240522SNick Terrell  * @length: The length of the data to hash.
1865d240522SNick Terrell  *
1875d240522SNick Terrell  * After calling xxh32_reset() call xxh32_update() as many times as necessary.
1885d240522SNick Terrell  *
1895d240522SNick Terrell  * Return:  Zero on success, otherwise an error code.
1905d240522SNick Terrell  */
1915d240522SNick Terrell int xxh32_update(struct xxh32_state *state, const void *input, size_t length);
1925d240522SNick Terrell 
1935d240522SNick Terrell /**
1945d240522SNick Terrell  * xxh32_digest() - produce the current xxh32 hash
1955d240522SNick Terrell  *
1965d240522SNick Terrell  * @state: Produce the current xxh32 hash of this state.
1975d240522SNick Terrell  *
1985d240522SNick Terrell  * A hash value can be produced at any time. It is still possible to continue
1995d240522SNick Terrell  * inserting input into the hash state after a call to xxh32_digest(), and
2005d240522SNick Terrell  * generate new hashes later on, by calling xxh32_digest() again.
2015d240522SNick Terrell  *
2025d240522SNick Terrell  * Return: The xxh32 hash stored in the state.
2035d240522SNick Terrell  */
2045d240522SNick Terrell uint32_t xxh32_digest(const struct xxh32_state *state);
2055d240522SNick Terrell 
2065d240522SNick Terrell /**
2075d240522SNick Terrell  * xxh64_reset() - reset the xxh64 state to start a new hashing operation
2085d240522SNick Terrell  *
2095d240522SNick Terrell  * @state: The xxh64 state to reset.
2105d240522SNick Terrell  * @seed:  Initialize the hash state with this seed.
2115d240522SNick Terrell  */
2125d240522SNick Terrell void xxh64_reset(struct xxh64_state *state, uint64_t seed);
2135d240522SNick Terrell 
2145d240522SNick Terrell /**
2155d240522SNick Terrell  * xxh64_update() - hash the data given and update the xxh64 state
2165d240522SNick Terrell  * @state:  The xxh64 state to update.
2175d240522SNick Terrell  * @input:  The data to hash.
2185d240522SNick Terrell  * @length: The length of the data to hash.
2195d240522SNick Terrell  *
2205d240522SNick Terrell  * After calling xxh64_reset() call xxh64_update() as many times as necessary.
2215d240522SNick Terrell  *
2225d240522SNick Terrell  * Return:  Zero on success, otherwise an error code.
2235d240522SNick Terrell  */
2245d240522SNick Terrell int xxh64_update(struct xxh64_state *state, const void *input, size_t length);
2255d240522SNick Terrell 
2265d240522SNick Terrell /**
2275d240522SNick Terrell  * xxh64_digest() - produce the current xxh64 hash
2285d240522SNick Terrell  *
2295d240522SNick Terrell  * @state: Produce the current xxh64 hash of this state.
2305d240522SNick Terrell  *
2315d240522SNick Terrell  * A hash value can be produced at any time. It is still possible to continue
2325d240522SNick Terrell  * inserting input into the hash state after a call to xxh64_digest(), and
2335d240522SNick Terrell  * generate new hashes later on, by calling xxh64_digest() again.
2345d240522SNick Terrell  *
2355d240522SNick Terrell  * Return: The xxh64 hash stored in the state.
2365d240522SNick Terrell  */
2375d240522SNick Terrell uint64_t xxh64_digest(const struct xxh64_state *state);
2385d240522SNick Terrell 
2395d240522SNick Terrell /*-**************************
2405d240522SNick Terrell  * Utils
2415d240522SNick Terrell  ***************************/
2425d240522SNick Terrell 
2435d240522SNick Terrell /**
2445d240522SNick Terrell  * xxh32_copy_state() - copy the source state into the destination state
2455d240522SNick Terrell  *
2465d240522SNick Terrell  * @src: The source xxh32 state.
2475d240522SNick Terrell  * @dst: The destination xxh32 state.
2485d240522SNick Terrell  */
2495d240522SNick Terrell void xxh32_copy_state(struct xxh32_state *dst, const struct xxh32_state *src);
2505d240522SNick Terrell 
2515d240522SNick Terrell /**
2525d240522SNick Terrell  * xxh64_copy_state() - copy the source state into the destination state
2535d240522SNick Terrell  *
2545d240522SNick Terrell  * @src: The source xxh64 state.
2555d240522SNick Terrell  * @dst: The destination xxh64 state.
2565d240522SNick Terrell  */
2575d240522SNick Terrell void xxh64_copy_state(struct xxh64_state *dst, const struct xxh64_state *src);
2585d240522SNick Terrell 
2595d240522SNick Terrell #endif /* XXHASH_H */
260