1 #include "KeysetTest.h"
2 
3 #include "Platform.h"
4 #include "Random.h"
5 
6 #include <map>
7 #include <set>
8 
9 //-----------------------------------------------------------------------------
10 // This should hopefully be a thorough and uambiguous test of whether a hash
11 // is correctly implemented on a given platform
12 
VerificationTest(pfHash hash,const int hashbits,uint32_t expected,bool verbose)13 bool VerificationTest ( pfHash hash, const int hashbits, uint32_t expected, bool verbose )
14 {
15   const int hashbytes = hashbits / 8;
16 
17   uint8_t * key    = new uint8_t[256];
18   uint8_t * hashes = new uint8_t[hashbytes * 256];
19   uint8_t * final  = new uint8_t[hashbytes];
20 
21   memset(key,0,256);
22   memset(hashes,0,hashbytes*256);
23   memset(final,0,hashbytes);
24 
25   // Hash keys of the form {0}, {0,1}, {0,1,2}... up to N=255,using 256-N as
26   // the seed
27 
28   for(int i = 0; i < 256; i++)
29   {
30     key[i] = (uint8_t)i;
31 
32     hash(key,i,256-i,&hashes[i*hashbytes]);
33   }
34 
35   // Then hash the result array
36 
37   hash(hashes,hashbytes*256,0,final);
38 
39   // The first four bytes of that hash, interpreted as a little-endian integer, is our
40   // verification value
41 
42   uint32_t verification = (final[0] << 0) | (final[1] << 8) | (final[2] << 16) | (final[3] << 24);
43 
44   delete [] key;
45   delete [] hashes;
46   delete [] final;
47 
48   //----------
49 
50   if(expected != verification)
51   {
52     if(verbose) printf("Verification value 0x%08X : Failed! (Expected 0x%08x)\n",verification,expected);
53     return false;
54   }
55   else
56   {
57     if(verbose) printf("Verification value 0x%08X : Passed!\n",verification);
58     return true;
59   }
60 }
61 
62 //----------------------------------------------------------------------------
63 // Basic sanity checks -
64 
65 // A hash function should not be reading outside the bounds of the key.
66 
67 // Flipping a bit of a key should, with overwhelmingly high probability,
68 // result in a different hash.
69 
70 // Hashing the same key twice should always produce the same result.
71 
72 // The memory alignment of the key should not affect the hash result.
73 
SanityTest(pfHash hash,const int hashbits)74 bool SanityTest ( pfHash hash, const int hashbits )
75 {
76   printf("Running sanity check 1");
77 
78   Rand r(883741);
79 
80   bool result = true;
81 
82   const int hashbytes = hashbits/8;
83   const int reps = 10;
84   const int keymax = 256;
85   const int pad = 16;
86   const int buflen = keymax + pad*3;
87 
88   uint8_t * buffer1 = new uint8_t[buflen];
89   uint8_t * buffer2 = new uint8_t[buflen];
90 
91   uint8_t * hash1 = new uint8_t[hashbytes];
92   uint8_t * hash2 = new uint8_t[hashbytes];
93 
94   //----------
95 
96   for(int irep = 0; irep < reps; irep++)
97   {
98     if(irep % (reps/10) == 0) printf(".");
99 
100     for(int len = 4; len <= keymax; len++)
101     {
102       for(int offset = pad; offset < pad*2; offset++)
103       {
104         uint8_t * key1 = &buffer1[pad];
105         uint8_t * key2 = &buffer2[pad+offset];
106 
107         r.rand_p(buffer1,buflen);
108         r.rand_p(buffer2,buflen);
109 
110         memcpy(key2,key1,len);
111 
112         hash(key1,len,0,hash1);
113 
114         for(int bit = 0; bit < (len * 8); bit++)
115         {
116           // Flip a bit, hash the key -> we should get a different result.
117 
118           flipbit(key2,len,bit);
119           hash(key2,len,0,hash2);
120 
121           if(memcmp(hash1,hash2,hashbytes) == 0)
122           {
123             result = false;
124           }
125 
126           // Flip it back, hash again -> we should get the original result.
127 
128           flipbit(key2,len,bit);
129           hash(key2,len,0,hash2);
130 
131           if(memcmp(hash1,hash2,hashbytes) != 0)
132           {
133             result = false;
134           }
135         }
136       }
137     }
138   }
139 
140   if(result == false)
141   {
142     printf("*********FAIL*********\n");
143   }
144   else
145   {
146     printf("PASS\n");
147   }
148 
149   delete [] buffer1;
150   delete [] buffer2;
151 
152   delete [] hash1;
153   delete [] hash2;
154 
155   return result;
156 }
157 
158 //----------------------------------------------------------------------------
159 // Appending zero bytes to a key should always cause it to produce a different
160 // hash value
161 
AppendedZeroesTest(pfHash hash,const int hashbits)162 void AppendedZeroesTest ( pfHash hash, const int hashbits )
163 {
164   printf("Running sanity check 2");
165 
166   Rand r(173994);
167 
168   const int hashbytes = hashbits/8;
169 
170   for(int rep = 0; rep < 100; rep++)
171   {
172     if(rep % 10 == 0) printf(".");
173 
174     unsigned char key[256];
175 
176     memset(key,0,sizeof(key));
177 
178     r.rand_p(key,32);
179 
180     uint32_t h1[16];
181     uint32_t h2[16];
182 
183     memset(h1,0,hashbytes);
184     memset(h2,0,hashbytes);
185 
186     for(int i = 0; i < 32; i++)
187     {
188       hash(key,32+i,0,h1);
189 
190       if(memcmp(h1,h2,hashbytes) == 0)
191       {
192         printf("\n*********FAIL*********\n");
193         return;
194       }
195 
196       memcpy(h2,h1,hashbytes);
197     }
198   }
199 
200   printf("PASS\n");
201 }
202 
203 //-----------------------------------------------------------------------------
204 // Generate all keys of up to N bytes containing two non-zero bytes
205 
TwoBytesKeygen(int maxlen,KeyCallback & c)206 void TwoBytesKeygen ( int maxlen, KeyCallback & c )
207 {
208   //----------
209   // Compute # of keys
210 
211   int keycount = 0;
212 
213   for(int i = 2; i <= maxlen; i++) keycount += (int)chooseK(i,2);
214 
215   keycount *= 255*255;
216 
217   for(int i = 2; i <= maxlen; i++) keycount += i*255;
218 
219   printf("Keyset 'TwoBytes' - up-to-%d-byte keys, %d total keys\n",maxlen, keycount);
220 
221   c.reserve(keycount);
222 
223   //----------
224   // Add all keys with one non-zero byte
225 
226   uint8_t key[256];
227 
228   memset(key,0,256);
229 
230   for(int keylen = 2; keylen <= maxlen; keylen++)
231   for(int byteA = 0; byteA < keylen; byteA++)
232   {
233     for(int valA = 1; valA <= 255; valA++)
234     {
235       key[byteA] = (uint8_t)valA;
236 
237       c(key,keylen);
238     }
239 
240     key[byteA] = 0;
241   }
242 
243   //----------
244   // Add all keys with two non-zero bytes
245 
246   for(int keylen = 2; keylen <= maxlen; keylen++)
247   for(int byteA = 0; byteA < keylen-1; byteA++)
248   for(int byteB = byteA+1; byteB < keylen; byteB++)
249   {
250     for(int valA = 1; valA <= 255; valA++)
251     {
252       key[byteA] = (uint8_t)valA;
253 
254       for(int valB = 1; valB <= 255; valB++)
255       {
256         key[byteB] = (uint8_t)valB;
257         c(key,keylen);
258       }
259 
260       key[byteB] = 0;
261     }
262 
263     key[byteA] = 0;
264   }
265 }
266 
267 //-----------------------------------------------------------------------------
268 
269 template< typename hashtype >
DumpCollisionMap(CollisionMap<hashtype,ByteVec> & cmap)270 void DumpCollisionMap ( CollisionMap<hashtype,ByteVec> & cmap )
271 {
272   typedef CollisionMap<hashtype,ByteVec> cmap_t;
273 
274   for(typename cmap_t::iterator it = cmap.begin(); it != cmap.end(); ++it)
275   {
276     const hashtype & hash = (*it).first;
277 
278     printf("Hash - ");
279     printbytes(&hash,sizeof(hashtype));
280     printf("\n");
281 
282     std::vector<ByteVec> & keys = (*it).second;
283 
284     for(int i = 0; i < (int)keys.size(); i++)
285     {
286       ByteVec & key = keys[i];
287 
288       printf("Key  - ");
289       printbytes(&key[0],(int)key.size());
290       printf("\n");
291     }
292     printf("\n");
293   }
294 
295 }
296 
297 // test code
298 
ReportCollisions(pfHash hash)299 void ReportCollisions ( pfHash hash )
300 {
301   printf("Hashing keyset\n");
302 
303   std::vector<uint128_t> hashes;
304 
305   HashCallback<uint128_t> c(hash,hashes);
306 
307   TwoBytesKeygen(20,c);
308 
309   printf("%d hashes\n",(int)hashes.size());
310 
311   printf("Finding collisions\n");
312 
313   HashSet<uint128_t> collisions;
314 
315   FindCollisions(hashes,collisions,1000);
316 
317   printf("%d collisions\n",(int)collisions.size());
318 
319   printf("Mapping collisions\n");
320 
321   CollisionMap<uint128_t,ByteVec> cmap;
322 
323   CollisionCallback<uint128_t> c2(hash,collisions,cmap);
324 
325   TwoBytesKeygen(20,c2);
326 
327   printf("Dumping collisions\n");
328 
329   DumpCollisionMap(cmap);
330 }
331