1 /* This is a small program used in order to understand the collision rate
2  * of CRC64 (ISO version) VS other stronger hashing functions in the context
3  * of hashing keys for the Redis "tracking" feature (client side caching
4  * assisted by the server).
5  *
6  * The program attempts to hash keys with common names in the form of
7  *
8  *  prefix:<counter>
9  *
10  * And counts the resulting collisions generated in the 24 bits of output
11  * needed for the tracking feature invalidation table (16 millions + entries)
12  *
13  * Compile with:
14  *
15  *  cc -O2 ./tracking_collisions.c ../src/crc64.c ../src/sha1.c
16  *  ./a.out
17  *
18  * --------------------------------------------------------------------------
19  *
20  * Copyright (C) 2019 Salvatore Sanfilippo
21  * This code is released under the BSD 2 clause license.
22  */
23 
24 #include <stdlib.h>
25 #include <stdint.h>
26 #include <string.h>
27 #include <stdio.h>
28 #include "../src/crc64.h"
29 #include "../src/sha1.h"
30 
31 #define TABLE_SIZE (1<<24)
32 int Table[TABLE_SIZE];
33 
crc64Hash(char * key,size_t len)34 uint64_t crc64Hash(char *key, size_t len) {
35     return crc64(0,(unsigned char*)key,len);
36 }
37 
sha1Hash(char * key,size_t len)38 uint64_t sha1Hash(char *key, size_t len) {
39     SHA1_CTX ctx;
40     unsigned char hash[20];
41 
42     SHA1Init(&ctx);
43     SHA1Update(&ctx,(unsigned char*)key,len);
44     SHA1Final(hash,&ctx);
45     uint64_t hash64;
46     memcpy(&hash64,hash,sizeof(hash64));
47     return hash64;
48 }
49 
50 /* Test the hashing function provided as callback and return the
51  * number of collisions found. */
testHashingFunction(uint64_t (* hash)(char *,size_t))52 unsigned long testHashingFunction(uint64_t (*hash)(char *, size_t)) {
53     unsigned long collisions = 0;
54     memset(Table,0,sizeof(Table));
55     char *prefixes[] = {"object", "message", "user", NULL};
56     for (int i = 0; prefixes[i] != NULL; i++) {
57         for (int j = 0; j < TABLE_SIZE/2; j++) {
58             char keyname[128];
59             size_t keylen = snprintf(keyname,sizeof(keyname),"%s:%d",
60                                      prefixes[i],j);
61             uint64_t bucket = hash(keyname,keylen) % TABLE_SIZE;
62             if (Table[bucket]) {
63                 collisions++;
64             } else {
65                 Table[bucket] = 1;
66             }
67         }
68     }
69     return collisions;
70 }
71 
main(void)72 int main(void) {
73     printf("SHA1 : %lu\n", testHashingFunction(sha1Hash));
74     printf("CRC64: %lu\n", testHashingFunction(crc64Hash));
75     return 0;
76 }
77