1 //-----------------------------------------------------------------------------
2 // Differential collision & distribution tests - generate a bunch of random keys,
3 // see what happens to the hash value when we flip a few bits of the key.
4 
5 #pragma once
6 
7 #include "Types.h"
8 #include "Stats.h"      // for chooseUpToK
9 #include "KeysetTest.h" // for SparseKeygenRecurse
10 #include "Random.h"
11 
12 #include <vector>
13 #include <algorithm>
14 #include <stdio.h>
15 
16 //-----------------------------------------------------------------------------
17 // Sort through the differentials, ignoring collisions that only occured once
18 // (these could be false positives). If we find collisions of 3 or more, the
19 // differential test fails.
20 
21 template < class keytype >
ProcessDifferentials(std::vector<keytype> & diffs,int reps,bool dumpCollisions)22 bool ProcessDifferentials ( std::vector<keytype> & diffs, int reps, bool dumpCollisions )
23 {
24   std::sort(diffs.begin(), diffs.end());
25 
26   int count = 1;
27   int ignore = 0;
28 
29   bool result = true;
30 
31   if(diffs.size())
32   {
33     keytype kp = diffs[0];
34 
35     for(int i = 1; i < (int)diffs.size(); i++)
36     {
37       if(diffs[i] == kp)
38       {
39         count++;
40         continue;
41       }
42       else
43       {
44         if(count > 1)
45         {
46           result = false;
47 
48           double pct = 100 * (double(count) / double(reps));
49 
50           if(dumpCollisions)
51           {
52             printbits((unsigned char*)&kp,sizeof(kp));
53             printf(" - %4.2f%%\n", pct );
54           }
55         }
56         else
57         {
58           ignore++;
59         }
60 
61         kp = diffs[i];
62         count = 1;
63       }
64     }
65 
66     if(count > 1)
67     {
68       double pct = 100 * (double(count) / double(reps));
69 
70       if(dumpCollisions)
71       {
72         printbits((unsigned char*)&kp,sizeof(kp));
73         printf(" - %4.2f%%\n", pct );
74       }
75     }
76     else
77     {
78       ignore++;
79     }
80   }
81 
82   printf("%d total collisions, of which %d single collisions were ignored",(int)diffs.size(),ignore);
83 
84   if(result == false)
85   {
86     printf(" !!!!! ");
87   }
88 
89   printf("\n");
90   printf("\n");
91 
92   return result;
93 }
94 
95 //-----------------------------------------------------------------------------
96 // Check all possible keybits-choose-N differentials for collisions, report
97 // ones that occur significantly more often than expected.
98 
99 // Random collisions can happen with probability 1 in 2^32 - if we do more than
100 // 2^32 tests, we'll probably see some spurious random collisions, so don't report
101 // them.
102 
103 template < typename keytype, typename hashtype >
DiffTestRecurse(pfHash hash,keytype & k1,keytype & k2,hashtype & h1,hashtype & h2,int start,int bitsleft,std::vector<keytype> & diffs)104 void DiffTestRecurse ( pfHash hash, keytype & k1, keytype & k2, hashtype & h1, hashtype & h2, int start, int bitsleft, std::vector<keytype> & diffs )
105 {
106   const int bits = sizeof(keytype)*8;
107 
108   for(int i = start; i < bits; i++)
109   {
110     flipbit(&k2,sizeof(k2),i);
111     bitsleft--;
112 
113     hash(&k2,sizeof(k2),0,&h2);
114 
115     if(h1 == h2)
116     {
117       diffs.push_back(k1 ^ k2);
118     }
119 
120     if(bitsleft)
121     {
122       DiffTestRecurse(hash,k1,k2,h1,h2,i+1,bitsleft,diffs);
123     }
124 
125     flipbit(&k2,sizeof(k2),i);
126     bitsleft++;
127   }
128 }
129 
130 //----------
131 
132 template < typename keytype, typename hashtype >
DiffTest(pfHash hash,int diffbits,int reps,bool dumpCollisions)133 bool DiffTest ( pfHash hash, int diffbits, int reps, bool dumpCollisions )
134 {
135   const int keybits = sizeof(keytype) * 8;
136   const int hashbits = sizeof(hashtype) * 8;
137 
138   double diffcount = chooseUpToK(keybits,diffbits);
139   double testcount = (diffcount * double(reps));
140   double expected  = testcount / pow(2.0,double(hashbits));
141 
142   Rand r(100);
143 
144   std::vector<keytype> diffs;
145 
146   keytype k1,k2;
147   hashtype h1,h2;
148 
149   printf("Testing %0.f up-to-%d-bit differentials in %d-bit keys -> %d bit hashes.\n",diffcount,diffbits,keybits,hashbits);
150   printf("%d reps, %0.f total tests, expecting %2.2f random collisions",reps,testcount,expected);
151 
152   for(int i = 0; i < reps; i++)
153   {
154     if(i % (reps/10) == 0) printf(".");
155 
156     r.rand_p(&k1,sizeof(keytype));
157     k2 = k1;
158 
159     hash(&k1,sizeof(k1),0,(uint32_t*)&h1);
160 
161     DiffTestRecurse<keytype,hashtype>(hash,k1,k2,h1,h2,0,diffbits,diffs);
162   }
163   printf("\n");
164 
165   bool result = true;
166 
167   result &= ProcessDifferentials(diffs,reps,dumpCollisions);
168 
169   return result;
170 }
171 
172 //-----------------------------------------------------------------------------
173 // Differential distribution test - for each N-bit input differential, generate
174 // a large set of differential key pairs, hash them, and test the output
175 // differentials using our distribution test code.
176 
177 // This is a very hard test to pass - even if the hash values are well-distributed,
178 // the differences between hash values may not be. It's also not entirely relevant
179 // for testing hash functions, but it's still interesting.
180 
181 // This test is a _lot_ of work, as it's essentially a full keyset test for
182 // each of a potentially huge number of input differentials. To speed things
183 // along, we do only a few distribution tests per keyset instead of the full
184 // grid.
185 
186 // #TODO - put diagram drawing back on
187 
188 template < typename keytype, typename hashtype >
DiffDistTest(pfHash hash,const int diffbits,int trials,double & worst,double & avg)189 void DiffDistTest ( pfHash hash, const int diffbits, int trials, double & worst, double & avg )
190 {
191   std::vector<keytype>  keys(trials);
192   std::vector<hashtype> A(trials),B(trials);
193 
194   for(int i = 0; i < trials; i++)
195   {
196     rand_p(&keys[i],sizeof(keytype));
197 
198     hash(&keys[i],sizeof(keytype),0,(uint32_t*)&A[i]);
199   }
200 
201   //----------
202 
203   std::vector<keytype> diffs;
204 
205   keytype temp(0);
206 
207   SparseKeygenRecurse<keytype>(0,diffbits,true,temp,diffs);
208 
209   //----------
210 
211   worst = 0;
212   avg = 0;
213 
214   hashtype h2;
215 
216   for(size_t j = 0; j < diffs.size(); j++)
217   {
218     keytype & d = diffs[j];
219 
220     for(int i = 0; i < trials; i++)
221     {
222       keytype k2 = keys[i] ^ d;
223 
224       hash(&k2,sizeof(k2),0,&h2);
225 
226       B[i] = A[i] ^ h2;
227     }
228 
229     double dworst,davg;
230 
231     TestDistributionFast(B,dworst,davg);
232 
233     avg += davg;
234     worst = (dworst > worst) ? dworst : worst;
235   }
236 
237   avg /= double(diffs.size());
238 }
239 
240 //-----------------------------------------------------------------------------
241 // Simpler differential-distribution test - for all 1-bit differentials,
242 // generate random key pairs and run full distribution/collision tests on the
243 // hash differentials
244 
245 template < typename keytype, typename hashtype >
DiffDistTest2(pfHash hash)246 bool DiffDistTest2 ( pfHash hash  )
247 {
248   Rand r(857374);
249 
250   int keybits = sizeof(keytype) * 8;
251   const int keycount = 256*256*32;
252   keytype k;
253 
254   std::vector<hashtype> hashes(keycount);
255   hashtype h1,h2;
256 
257   bool result = true;
258 
259   for(int keybit = 0; keybit < keybits; keybit++)
260   {
261     printf("Testing bit %d\n",keybit);
262 
263     for(int i = 0; i < keycount; i++)
264     {
265       r.rand_p(&k,sizeof(keytype));
266 
267       hash(&k,sizeof(keytype),0,&h1);
268       flipbit(&k,sizeof(keytype),keybit);
269       hash(&k,sizeof(keytype),0,&h2);
270 
271       hashes[i] = h1 ^ h2;
272     }
273 
274     result &= TestHashList<hashtype>(hashes,true,true,true);
275     printf("\n");
276   }
277 
278   return result;
279 }
280 
281 //----------------------------------------------------------------------------
282