1 //-----------------------------------------------------------------------------
2 // Keyset tests generate various sorts of difficult-to-hash keysets and compare
3 // the distribution and collision frequency of the hash results against an
4 // ideal random distribution
5 
6 // The sanity checks are also in this cpp/h
7 
8 #pragma once
9 
10 #include "Types.h"
11 #include "Stats.h"
12 #include "Random.h"   // for rand_p
13 
14 #include <algorithm>  // for std::swap
15 #include <assert.h>
16 
17 //-----------------------------------------------------------------------------
18 // Sanity tests
19 
20 bool VerificationTest   ( pfHash hash, const int hashbits, uint32_t expected, bool verbose );
21 bool SanityTest         ( pfHash hash, const int hashbits );
22 void AppendedZeroesTest ( pfHash hash, const int hashbits );
23 
24 //-----------------------------------------------------------------------------
25 // Keyset 'Combination' - all possible combinations of input blocks
26 
27 template< typename hashtype >
CombinationKeygenRecurse(uint32_t * key,int len,int maxlen,uint32_t * blocks,int blockcount,pfHash hash,std::vector<hashtype> & hashes)28 void CombinationKeygenRecurse ( uint32_t * key, int len, int maxlen,
29                   uint32_t * blocks, int blockcount,
30                 pfHash hash, std::vector<hashtype> & hashes )
31 {
32   if(len == maxlen) return;
33 
34   for(int i = 0; i < blockcount; i++)
35   {
36     key[len] = blocks[i];
37 
38     //if(len == maxlen-1)
39     {
40       hashtype h;
41       hash(key,(len+1) * sizeof(uint32_t),0,&h);
42       hashes.push_back(h);
43     }
44 
45     //else
46     {
47       CombinationKeygenRecurse(key,len+1,maxlen,blocks,blockcount,hash,hashes);
48     }
49   }
50 }
51 
52 template< typename hashtype >
CombinationKeyTest(hashfunc<hashtype> hash,int maxlen,uint32_t * blocks,int blockcount,bool testColl,bool testDist,bool drawDiagram)53 bool CombinationKeyTest ( hashfunc<hashtype> hash, int maxlen, uint32_t * blocks, int blockcount, bool testColl, bool testDist, bool drawDiagram )
54 {
55   printf("Keyset 'Combination' - up to %d blocks from a set of %d - ",maxlen,blockcount);
56 
57   //----------
58 
59   std::vector<hashtype> hashes;
60 
61   uint32_t * key = new uint32_t[maxlen];
62 
63   CombinationKeygenRecurse<hashtype>(key,0,maxlen,blocks,blockcount,hash,hashes);
64 
65   delete [] key;
66 
67   printf("%d keys\n",(int)hashes.size());
68 
69   //----------
70 
71   bool result = true;
72 
73   result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram);
74 
75   printf("\n");
76 
77   return result;
78 }
79 
80 //----------------------------------------------------------------------------
81 // Keyset 'Permutation' - given a set of 32-bit blocks, generate keys
82 // consisting of all possible permutations of those blocks
83 
84 template< typename hashtype >
PermutationKeygenRecurse(pfHash hash,uint32_t * blocks,int blockcount,int k,std::vector<hashtype> & hashes)85 void PermutationKeygenRecurse ( pfHash hash, uint32_t * blocks, int blockcount, int k, std::vector<hashtype> & hashes )
86 {
87   if(k == blockcount-1)
88   {
89     hashtype h;
90 
91     hash(blocks,blockcount * sizeof(uint32_t),0,&h);
92 
93     hashes.push_back(h);
94 
95     return;
96   }
97 
98   for(int i = k; i < blockcount; i++)
99   {
100     std::swap(blocks[k],blocks[i]);
101 
102     PermutationKeygenRecurse(hash,blocks,blockcount,k+1,hashes);
103 
104     std::swap(blocks[k],blocks[i]);
105   }
106 }
107 
108 template< typename hashtype >
PermutationKeyTest(hashfunc<hashtype> hash,uint32_t * blocks,int blockcount,bool testColl,bool testDist,bool drawDiagram)109 bool PermutationKeyTest ( hashfunc<hashtype> hash, uint32_t * blocks, int blockcount, bool testColl, bool testDist, bool drawDiagram )
110 {
111   printf("Keyset 'Permutation' - %d blocks - ",blockcount);
112 
113   //----------
114 
115   std::vector<hashtype> hashes;
116 
117   PermutationKeygenRecurse<hashtype>(hash,blocks,blockcount,0,hashes);
118 
119   printf("%d keys\n",(int)hashes.size());
120 
121   //----------
122 
123   bool result = true;
124 
125   result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram);
126 
127   printf("\n");
128 
129   return result;
130 }
131 
132 //-----------------------------------------------------------------------------
133 // Keyset 'Sparse' - generate all possible N-bit keys with up to K bits set
134 
135 template < typename keytype, typename hashtype >
SparseKeygenRecurse(pfHash hash,int start,int bitsleft,bool inclusive,keytype & k,std::vector<hashtype> & hashes)136 void SparseKeygenRecurse ( pfHash hash, int start, int bitsleft, bool inclusive, keytype & k, std::vector<hashtype> & hashes )
137 {
138   const int nbytes = sizeof(keytype);
139   const int nbits = nbytes * 8;
140 
141   hashtype h;
142 
143   for(int i = start; i < nbits; i++)
144   {
145     flipbit(&k,nbytes,i);
146 
147     if(inclusive || (bitsleft == 1))
148     {
149       hash(&k,sizeof(keytype),0,&h);
150       hashes.push_back(h);
151     }
152 
153     if(bitsleft > 1)
154     {
155       SparseKeygenRecurse(hash,i+1,bitsleft-1,inclusive,k,hashes);
156     }
157 
158     flipbit(&k,nbytes,i);
159   }
160 }
161 
162 //----------
163 
164 template < int keybits, typename hashtype >
SparseKeyTest(hashfunc<hashtype> hash,const int setbits,bool inclusive,bool testColl,bool testDist,bool drawDiagram)165 bool SparseKeyTest ( hashfunc<hashtype> hash, const int setbits, bool inclusive, bool testColl, bool testDist, bool drawDiagram  )
166 {
167   printf("Keyset 'Sparse' - %d-bit keys with %s %d bits set - ",keybits, inclusive ? "up to" : "exactly", setbits);
168 
169   typedef Blob<keybits> keytype;
170 
171   std::vector<hashtype> hashes;
172 
173   keytype k;
174   memset(&k,0,sizeof(k));
175 
176   if(inclusive)
177   {
178     hashtype h;
179 
180     hash(&k,sizeof(keytype),0,&h);
181 
182     hashes.push_back(h);
183   }
184 
185   SparseKeygenRecurse(hash,0,setbits,inclusive,k,hashes);
186 
187   printf("%d keys\n",(int)hashes.size());
188 
189   bool result = true;
190 
191   result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram);
192 
193   printf("\n");
194 
195   return result;
196 }
197 
198 //-----------------------------------------------------------------------------
199 // Keyset 'Windows' - for all possible N-bit windows of a K-bit key, generate
200 // all possible keys with bits set in that window
201 
202 template < typename keytype, typename hashtype >
WindowedKeyTest(hashfunc<hashtype> hash,const int windowbits,bool testCollision,bool testDistribution,bool drawDiagram)203 bool WindowedKeyTest ( hashfunc<hashtype> hash, const int windowbits, bool testCollision, bool testDistribution, bool drawDiagram )
204 {
205   const int keybits = sizeof(keytype) * 8;
206   const int keycount = 1 << windowbits;
207 
208   std::vector<hashtype> hashes;
209   hashes.resize(keycount);
210 
211   bool result = true;
212 
213   int testcount = keybits;
214 
215   printf("Keyset 'Windowed' - %3d-bit key, %3d-bit window - %d tests, %d keys per test\n",keybits,windowbits,testcount,keycount);
216 
217   for(int j = 0; j <= testcount; j++)
218   {
219     int minbit = j;
220 
221     keytype key;
222 
223     for(int i = 0; i < keycount; i++)
224     {
225       key = i;
226       //key = key << minbit;
227 
228       lrot(&key,sizeof(keytype),minbit);
229 
230       hash(&key,sizeof(keytype),0,&hashes[i]);
231     }
232 
233     printf("Window at %3d - ",j);
234 
235     result &= TestHashList(hashes,testCollision,testDistribution,drawDiagram);
236 
237     //printf("\n");
238   }
239 
240   return result;
241 }
242 
243 //-----------------------------------------------------------------------------
244 // Keyset 'Cyclic' - generate keys that consist solely of N repetitions of M
245 // bytes.
246 
247 // (This keyset type is designed to make MurmurHash2 fail)
248 
249 template < typename hashtype >
CyclicKeyTest(pfHash hash,int cycleLen,int cycleReps,const int keycount,bool drawDiagram)250 bool CyclicKeyTest ( pfHash hash, int cycleLen, int cycleReps, const int keycount, bool drawDiagram )
251 {
252   printf("Keyset 'Cyclic' - %d cycles of %d bytes - %d keys\n",cycleReps,cycleLen,keycount);
253 
254   Rand r(483723);
255 
256   std::vector<hashtype> hashes;
257   hashes.resize(keycount);
258 
259   int keyLen = cycleLen * cycleReps;
260 
261   uint8_t * cycle = new uint8_t[cycleLen + 16];
262   uint8_t * key = new uint8_t[keyLen];
263 
264   //----------
265 
266   for(int i = 0; i < keycount; i++)
267   {
268     r.rand_p(cycle,cycleLen);
269 
270     *(uint32_t*)cycle = f3mix(i ^ 0x746a94f1);
271 
272     for(int j = 0; j < keyLen; j++)
273     {
274       key[j] = cycle[j % cycleLen];
275     }
276 
277     hash(key,keyLen,0,&hashes[i]);
278   }
279 
280   //----------
281 
282   bool result = true;
283 
284   result &= TestHashList(hashes,true,true,drawDiagram);
285   printf("\n");
286 
287   delete [] cycle;
288   delete [] key;
289 
290   return result;
291 }
292 
293 //-----------------------------------------------------------------------------
294 // Keyset 'TwoBytes' - generate all keys up to length N with two non-zero bytes
295 
296 void TwoBytesKeygen ( int maxlen, KeyCallback & c );
297 
298 template < typename hashtype >
TwoBytesTest2(pfHash hash,int maxlen,bool drawDiagram)299 bool TwoBytesTest2 ( pfHash hash, int maxlen, bool drawDiagram )
300 {
301   std::vector<hashtype> hashes;
302 
303   HashCallback<hashtype> c(hash,hashes);
304 
305   TwoBytesKeygen(maxlen,c);
306 
307   bool result = true;
308 
309   result &= TestHashList(hashes,true,true,drawDiagram);
310   printf("\n");
311 
312   return result;
313 }
314 
315 //-----------------------------------------------------------------------------
316 // Keyset 'Text' - generate all keys of the form "prefix"+"core"+"suffix",
317 // where "core" consists of all possible combinations of the given character
318 // set of length N.
319 
320 template < typename hashtype >
TextKeyTest(hashfunc<hashtype> hash,const char * prefix,const char * coreset,const int corelen,const char * suffix,bool drawDiagram)321 bool TextKeyTest ( hashfunc<hashtype> hash, const char * prefix, const char * coreset, const int corelen, const char * suffix, bool drawDiagram )
322 {
323   const int prefixlen = (int)strlen(prefix);
324   const int suffixlen = (int)strlen(suffix);
325   const int corecount = (int)strlen(coreset);
326 
327   const int keybytes = prefixlen + corelen + suffixlen;
328   const int keycount = (int)pow(double(corecount),double(corelen));
329 
330   printf("Keyset 'Text' - keys of form \"%s[",prefix);
331   for(int i = 0; i < corelen; i++) printf("X");
332   printf("]%s\" - %d keys\n",suffix,keycount);
333 
334   uint8_t * key = new uint8_t[keybytes+1];
335 
336   key[keybytes] = 0;
337 
338   memcpy(key,prefix,prefixlen);
339   memcpy(key+prefixlen+corelen,suffix,suffixlen);
340 
341   //----------
342 
343   std::vector<hashtype> hashes;
344   hashes.resize(keycount);
345 
346   for(int i = 0; i < keycount; i++)
347   {
348     int t = i;
349 
350     for(int j = 0; j < corelen; j++)
351     {
352       key[prefixlen+j] = coreset[t % corecount]; t /= corecount;
353     }
354 
355     hash(key,keybytes,0,&hashes[i]);
356   }
357 
358   //----------
359 
360   bool result = true;
361 
362   result &= TestHashList(hashes,true,true,drawDiagram);
363 
364   printf("\n");
365 
366   delete [] key;
367 
368   return result;
369 }
370 
371 //-----------------------------------------------------------------------------
372 // Keyset 'Zeroes' - keys consisting of all zeroes, differing only in length
373 
374 // We reuse one block of empty bytes, otherwise the RAM cost is enormous.
375 
376 template < typename hashtype >
ZeroKeyTest(pfHash hash,bool drawDiagram)377 bool ZeroKeyTest ( pfHash hash, bool drawDiagram )
378 {
379   int keycount = 64*1024;
380 
381   printf("Keyset 'Zeroes' - %d keys\n",keycount);
382 
383   unsigned char * nullblock = new unsigned char[keycount];
384   memset(nullblock,0,keycount);
385 
386   //----------
387 
388   std::vector<hashtype> hashes;
389 
390   hashes.resize(keycount);
391 
392   for(int i = 0; i < keycount; i++)
393   {
394     hash(nullblock,i,0,&hashes[i]);
395   }
396 
397   bool result = true;
398 
399   result &= TestHashList(hashes,true,true,drawDiagram);
400 
401   printf("\n");
402 
403   delete [] nullblock;
404 
405   return result;
406 }
407 
408 //-----------------------------------------------------------------------------
409 // Keyset 'Seed' - hash "the quick brown fox..." using different seeds
410 
411 template < typename hashtype >
SeedTest(pfHash hash,int keycount,bool drawDiagram)412 bool SeedTest ( pfHash hash, int keycount, bool drawDiagram )
413 {
414   printf("Keyset 'Seed' - %d keys\n",keycount);
415 
416   const char * text = "The quick brown fox jumps over the lazy dog";
417   const int len = (int)strlen(text);
418 
419   //----------
420 
421   std::vector<hashtype> hashes;
422 
423   hashes.resize(keycount);
424 
425   for(int i = 0; i < keycount; i++)
426   {
427     hash(text,len,i,&hashes[i]);
428   }
429 
430   bool result = true;
431 
432   result &= TestHashList(hashes,true,true,drawDiagram);
433 
434   printf("\n");
435 
436   return result;
437 }
438 
439 //-----------------------------------------------------------------------------
440