1 #ifdef DS_PERF
2 #define _CRT_SECURE_NO_WARNINGS
3 #define _CRT_NONSTDC_NO_DEPRECATE
4 #define _CRT_NON_CONFORMING_SWPRINTFS
5 
6 
7 //#define STBDS_INTERNAL_SMALL_BUCKET    // make 64-bit bucket fit both keys and hash bits
8 //#define STBDS_SIPHASH_2_4     // performance test 1_3 against 2_4
9 //#define STBDS_INTERNAL_BUCKET_START    // don't bother offseting differently within bucket for different hash values
10 //#define STBDS_FLUSH_CACHE  (1u<<20) // do this much memory traffic to flush the cache between some benchmarking measurements
11 
12 #include <stdio.h>
13 
14 #define WIN32_LEAN_AND_MEAN
15 #include <windows.h>
16 #define STB_DEFINE
17 #define STB_NO_REGISTRY
18 #include "../stb.h"
19 #endif
20 
21 #ifdef DS_TEST
22 #define STBDS_UNIT_TESTS
23 #define STBDS_SMALL_BUCKET
24 #endif
25 
26 #ifdef DS_STATS
27 #define STBDS_STATISTICS
28 #endif
29 
30 #ifndef DS_PERF
31 #define STBDS_ASSERT assert
32 #include <assert.h>
33 #endif
34 
35 #define STB_DS_IMPLEMENTATION
36 #include "../stb_ds.h"
37 
38 size_t churn_inserts, churn_deletes;
39 
churn(int a,int b,int count)40 void churn(int a, int b, int count)
41 {
42   struct { int key,value; } *map=NULL;
43   int i,j,n,k;
44   for (i=0; i < a; ++i)
45     hmput(map,i,i+1);
46   for (n=0; n < count; ++n) {
47     for (j=a; j < b; ++j,++i) {
48       hmput(map,i,i+1);
49     }
50     assert(hmlen(map) == b);
51     for (j=a; j < b; ++j) {
52       k=i-j-1;
53       k = hmdel(map,k);
54       assert(k != 0);
55     }
56     assert(hmlen(map) == a);
57   }
58   hmfree(map);
59   churn_inserts = i;
60   churn_deletes = (b-a) * n;
61 }
62 
63 #ifdef DS_TEST
64 #include <stdio.h>
main(int argc,char ** argv)65 int main(int argc, char **argv)
66 {
67   stbds_unit_tests();
68   churn(0,100,1);
69   churn(3,7,50000);
70   churn(3,15,50000);
71   churn(16, 48, 25000);
72   churn(10, 15, 25000);
73   churn(200,500, 5000);
74   churn(2000,5000, 500);
75   churn(20000,50000, 50);
76   printf("Ok!");
77   return 0;
78 }
79 #endif
80 
81 #ifdef DS_STATS
82 #define MAX(a,b) ((a) > (b) ? (a) : (b))
83 size_t max_hit_probes, max_miss_probes, total_put_probes, total_miss_probes, churn_misses;
churn_stats(int a,int b,int count)84 void churn_stats(int a, int b, int count)
85 {
86   struct { int key,value; } *map=NULL;
87   int i,j,n,k;
88   churn_misses = 0;
89   for (i=0; i < a; ++i) {
90     hmput(map,i,i+1);
91     max_hit_probes = MAX(max_hit_probes, stbds_hash_probes);
92     total_put_probes += stbds_hash_probes;
93     stbds_hash_probes = 0;
94   }
95 
96   for (n=0; n < count; ++n) {
97     for (j=a; j < b; ++j,++i) {
98       hmput(map,i,i+1);
99       max_hit_probes = MAX(max_hit_probes, stbds_hash_probes);
100       total_put_probes += stbds_hash_probes;
101       stbds_hash_probes = 0;
102     }
103     for (j=0; j < (b-a)*10; ++j) {
104       k=i+j;
105       (void) hmgeti(map,k); // miss
106       max_miss_probes = MAX(max_miss_probes, stbds_hash_probes);
107       total_miss_probes += stbds_hash_probes;
108       stbds_hash_probes = 0;
109       ++churn_misses;
110     }
111     assert(hmlen(map) == b);
112     for (j=a; j < b; ++j) {
113       k=i-j-1;
114       k = hmdel(map,k);
115       stbds_hash_probes = 0;
116       assert(k);
117     }
118     assert(hmlen(map) == a);
119   }
120   hmfree(map);
121   churn_inserts = i;
122   churn_deletes = (b-a) * n;
123 }
124 
reset_stats(void)125 void reset_stats(void)
126 {
127   stbds_array_grow=0,
128   stbds_hash_grow=0;
129   stbds_hash_shrink=0;
130   stbds_hash_rebuild=0;
131   stbds_hash_probes=0;
132   stbds_hash_alloc=0;
133   stbds_rehash_probes=0;
134   stbds_rehash_items=0;
135   max_hit_probes = 0;
136   max_miss_probes = 0;
137   total_put_probes = 0;
138   total_miss_probes = 0;
139 }
140 
print_churn_probe_stats(char * str)141 void print_churn_probe_stats(char *str)
142 {
143   printf("Probes: %3d max hit, %3d max miss, %4.2f avg hit, %4.2f avg miss: %s\n",
144     (int) max_hit_probes, (int) max_miss_probes, (float) total_put_probes / churn_inserts, (float) total_miss_probes / churn_misses, str);
145   reset_stats();
146 }
147 
main(int arg,char ** argv)148 int main(int arg, char **argv)
149 {
150   churn_stats(0,500000,1); print_churn_probe_stats("Inserting 500000 items");
151   churn_stats(0,500000,1); print_churn_probe_stats("Inserting 500000 items");
152   churn_stats(0,500000,1); print_churn_probe_stats("Inserting 500000 items");
153   churn_stats(0,500000,1); print_churn_probe_stats("Inserting 500000 items");
154   churn_stats(49000,50000,500); print_churn_probe_stats("Deleting/Inserting 500000 items");
155   churn_stats(49000,50000,500); print_churn_probe_stats("Deleting/Inserting 500000 items");
156   churn_stats(49000,50000,500); print_churn_probe_stats("Deleting/Inserting 500000 items");
157   churn_stats(49000,50000,500); print_churn_probe_stats("Deleting/Inserting 500000 items");
158   return 0;
159 }
160 #endif
161 
162 
163 #ifdef DS_PERF
164 //char *strdup(const char *foo) { return 0; }
165 //int stricmp(const char *a, const char *b) { return 0; }
166 //int strnicmp(const char *a, const char *b, size_t n) { return 0; }
167 
168 unsigned __int64 t0, xsum, mn,mx,count;
begin(void)169 void begin(void)
170 {
171   LARGE_INTEGER m;
172   QueryPerformanceCounter(&m);
173   t0 = m.QuadPart;
174   xsum = 0;
175   count = 0;
176   mx = 0;
177   mn = ~(unsigned __int64) 0;
178 }
179 
measure(void)180 void measure(void)
181 {
182   unsigned __int64 t1, t;
183   LARGE_INTEGER m;
184   QueryPerformanceCounter(&m);
185   t1 = m.QuadPart;
186   t = t1-t0;
187   if (t1 < t0)
188     printf("ALERT: QueryPerformanceCounter was unordered!\n");
189   if (t < mn) mn = t;
190   if (t > mx) mx = t;
191   xsum += t;
192   ++count;
193   t0 = t1;
194 }
195 
dont_measure(void)196 void dont_measure(void)
197 {
198   LARGE_INTEGER m;
199   QueryPerformanceCounter(&m);
200   t0 = m.QuadPart;
201 }
202 
203 double timer;
end(void)204 double end(void)
205 {
206   LARGE_INTEGER m;
207   QueryPerformanceFrequency(&m);
208 
209   if (count > 3) {
210     // discard the highest and lowest
211     xsum -= mn;
212     xsum -= mx;
213     count -= 2;
214   }
215   timer = (double) (xsum) / count / m.QuadPart * 1000;
216   return timer;
217 }
218 
build(int a,int b,int count,int step)219 void build(int a, int b, int count, int step)
220 {
221   struct { int key,value; } *map=NULL;
222   int i,n;
223   for (i=0; i < a; ++i) {
224     n = i*step;
225     hmput(map,n,i+1);
226   }
227   measure();
228   churn_inserts = i;
229   hmfree(map);
230   dont_measure();
231 }
232 
233 #ifdef STB__INCLUDE_STB_H
build_stb(int a,int b,int count,int step)234 void build_stb(int a, int b, int count, int step)
235 {
236   stb_idict *d = stb_idict_new_size(8);
237   int i;
238   for (i=0; i < a; ++i)
239     stb_idict_add(d, i*step, i+1);
240   measure();
241   churn_inserts = i;
242   stb_idict_destroy(d);
243   dont_measure();
244 }
245 
multibuild_stb(int a,int b,int count,int step,int tables)246 void multibuild_stb(int a, int b, int count, int step, int tables)
247 {
248   stb_idict *d[50000];
249   int i,q;
250   for (q=0; q < tables; ++q)
251     d[q] = stb_idict_new_size(8);
252   dont_measure();
253   for (i=0; i < a; ++i)
254     for (q=0; q < tables; ++q)
255       stb_idict_add(d[q], i*step+q*771, i+1);
256   measure();
257   churn_inserts = i;
258   for (q=0; q < tables; ++q)
259     stb_idict_destroy(d[q]);
260   dont_measure();
261 }
262 
multisearch_stb(int a,int start,int end,int step,int tables)263 int multisearch_stb(int a, int start, int end, int step, int tables)
264 {
265   stb_idict *d[50000];
266   int i,q,total=0,v;
267   for (q=0; q < tables; ++q)
268     d[q] = stb_idict_new_size(8);
269   for (q=0; q < tables; ++q)
270     for (i=0; i < a; ++i)
271       stb_idict_add(d[q], i*step+q*771, i+1);
272   dont_measure();
273   for (i=start; i < end; ++i)
274     for (q=0; q < tables; ++q)
275       if (stb_idict_get_flag(d[q], i*step+q*771, &v))
276         total += v;
277   measure();
278   churn_inserts = i;
279   for (q=0; q < tables; ++q)
280     stb_idict_destroy(d[q]);
281   dont_measure();
282   return total;
283 }
284 #endif
285 
multisearch(int a,int start,int end,int step,int tables)286 int multisearch(int a, int start, int end, int step, int tables)
287 {
288   struct { int key,value; } *hash[50000];
289   int i,q,total=0;
290   for (q=0; q < tables; ++q)
291     hash[q] = NULL;
292   for (q=0; q < tables; ++q)
293     for (i=0; i < a; ++i)
294       hmput(hash[q], i*step+q*771, i+1);
295   dont_measure();
296   for (i=start; i < end; ++i)
297     for (q=0; q < tables; ++q)
298       total += hmget(hash[q], i*step+q*771);
299   measure();
300   churn_inserts = i;
301   for (q=0; q < tables; ++q)
302     hmfree(hash[q]);
303   dont_measure();
304   return total;
305 }
306 
churn_skip(unsigned int a,unsigned int b,int count)307 void churn_skip(unsigned int a, unsigned int b, int count)
308 {
309   struct { unsigned int key,value; } *map=NULL;
310   unsigned int i,j,n,k;
311   for (i=0; i < a; ++i)
312     hmput(map,i,i+1);
313   dont_measure();
314   for (n=0; n < count; ++n) {
315     for (j=a; j < b; ++j,++i) {
316       hmput(map,i,i+1);
317     }
318     assert(hmlen(map) == b);
319     for (j=a; j < b; ++j) {
320       k=i-j-1;
321       k = hmdel(map,k);
322       assert(k != 0);
323     }
324     assert(hmlen(map) == a);
325   }
326   measure();
327   churn_inserts = i;
328   churn_deletes = (b-a) * n;
329   hmfree(map);
330   dont_measure();
331 }
332 
333 typedef struct { int n[8]; } str32;
churn32(int a,int b,int count,int include_startup)334 void churn32(int a, int b, int count, int include_startup)
335 {
336   struct { str32 key; int value; } *map=NULL;
337   int i,j,n;
338   str32 key = { 0 };
339   for (i=0; i < a; ++i) {
340     key.n[0] = i;
341     hmput(map,key,i+1);
342   }
343   if (!include_startup)
344     dont_measure();
345   for (n=0; n < count; ++n) {
346     for (j=a; j < b; ++j,++i) {
347       key.n[0] = i;
348       hmput(map,key,i+1);
349     }
350     assert(hmlen(map) == b);
351     for (j=a; j < b; ++j) {
352       key.n[0] = i-j-1;
353       hmdel(map,key);
354     }
355     assert(hmlen(map) == a);
356   }
357   measure();
358   hmfree(map);
359   churn_inserts = i;
360   churn_deletes = (b-a) * n;
361   dont_measure();
362 }
363 
364 typedef struct { int n[32]; } str256;
churn256(int a,int b,int count,int include_startup)365 void churn256(int a, int b, int count, int include_startup)
366 {
367   struct { str256 key; int value; } *map=NULL;
368   int i,j,n;
369   str256 key = { 0 };
370   for (i=0; i < a; ++i) {
371     key.n[0] = i;
372     hmput(map,key,i+1);
373   }
374   if (!include_startup)
375     dont_measure();
376   for (n=0; n < count; ++n) {
377     for (j=a; j < b; ++j,++i) {
378       key.n[0] = i;
379       hmput(map,key,i+1);
380     }
381     assert(hmlen(map) == b);
382     for (j=a; j < b; ++j) {
383       key.n[0] = i-j-1;
384       hmdel(map,key);
385     }
386     assert(hmlen(map) == a);
387   }
388   measure();
389   hmfree(map);
390   churn_inserts = i;
391   churn_deletes = (b-a) * n;
392   dont_measure();
393 }
394 
churn8(int a,int b,int count,int include_startup)395 void churn8(int a, int b, int count, int include_startup)
396 {
397   struct { size_t key,value; } *map=NULL;
398   int i,j,n,k;
399   for (i=0; i < a; ++i)
400     hmput(map,i,i+1);
401   if (!include_startup)
402     dont_measure();
403   for (n=0; n < count; ++n) {
404     for (j=a; j < b; ++j,++i) {
405       hmput(map,i,i+1);
406     }
407     assert(hmlen(map) == b);
408     for (j=a; j < b; ++j) {
409       k=i-j-1;
410       k = hmdel(map,k);
411       assert(k != 0);
412     }
413     assert(hmlen(map) == a);
414   }
415   measure();
416   hmfree(map);
417   churn_inserts = i;
418   churn_deletes = (b-a) * n;
419   dont_measure();
420 }
421 
multichurn4(int a,int b,int count,int include_startup,int tables)422 void multichurn4(int a, int b, int count, int include_startup, int tables)
423 {
424   struct { int key,value; } *map[50000];
425   int i,j,n,k,q;
426   for (q=0; q < tables; ++q)
427     map[q] = NULL;
428   dont_measure();
429 
430   for (i=0; i < a; ++i)
431     for (q=0; q < tables; ++q)
432       hmput(map[q],i,i+1);
433   if (!include_startup)
434     dont_measure();
435   for (n=0; n < count; ++n) {
436     for (j=a; j < b; ++j,++i) {
437       for (q=0; q < tables; ++q)
438         hmput(map[q],i,i+1);
439     }
440     assert(hmlen(map[0]) == b);
441     for (j=a; j < b; ++j) {
442       k=i-j-1;
443       for (q=0; q < tables; ++q)
444         k = hmdel(map[q],k);
445       assert(k != 0);
446     }
447     assert(hmlen(map[0]) == a);
448   }
449   measure();
450   for (q=0; q < tables; ++q)
451     hmfree(map[q]);
452   churn_inserts = i * tables;
453   churn_deletes = (b-a) * n * tables;
454   dont_measure();
455 }
456 
457 struct {
458   unsigned __int64 start;
459   unsigned __int64 end;
460   int    table_size;
461 } mstats[32][4000];
462 
463 const int first_step = 64;
464 const int last_step = 384-48; // 32M
465 
measure_build4(int step_log2)466 void measure_build4(int step_log2)
467 {
468   double length;
469   int i,j,k=0;
470   int step = 1 << step_log2;
471   unsigned __int64 t0,t1;
472   struct { int key,value; } *map=NULL;
473   double rdtsc_scale;
474   begin();
475   t0 = __rdtsc();
476 
477   mstats[0][0].start = __rdtsc();
478   for (i=0; i < 256; ++i) {
479     hmput(map,k,k+1);
480     k += step;
481   }
482   mstats[0][first_step-1].end = __rdtsc();
483   mstats[0][first_step-1].table_size = k >> step_log2;
484   for (j=first_step; j < last_step; ++j) {
485     for (i=0; i < (1<<(j>>4)); ++i) {
486       hmput(map, k,k+1);
487       k += step;
488     }
489     mstats[0][j].end = __rdtsc();
490     mstats[0][j].table_size = k >> step_log2;
491   }
492   t1 = __rdtsc();
493   measure();
494   hmfree(map);
495   length = end();
496   rdtsc_scale = length / (t1-t0) * 1000;
497 
498   for (j=1; j < last_step; ++j)
499     mstats[0][j].start = mstats[0][0].start;
500   for (j=first_step-1; j < last_step; ++j) {
501     printf("%12.4f,%12d,%12d,0,0,0\n", (mstats[0][j].end - mstats[0][j].start) * rdtsc_scale, mstats[0][j].table_size, mstats[0][j].table_size);
502   }
503 }
504 
505 #ifdef STBDS_FLUSH_CACHE
506 static int cache_index;
507 char dummy[8][STBDS_FLUSH_CACHE];
508 
flush_cache(void)509 int flush_cache(void)
510 {
511   memmove(dummy[cache_index],dummy[cache_index]+1, sizeof(dummy[cache_index])-1);
512   cache_index = (cache_index+1)%8;
513   return dummy[cache_index][0];
514 }
515 #else
flush_cache(void)516 int flush_cache(void) { return 0; }
517 #endif
518 
measure_average_lookup4(int step_log2)519 int measure_average_lookup4(int step_log2)
520 {
521   int total;
522   double length;
523   int i,j,k=0,q;
524   int step = 1 << step_log2;
525   unsigned __int64 t0,t1;
526   struct { int key,value; } *map=NULL;
527   double rdtsc_scale;
528   begin();
529   t0 = __rdtsc();
530 
531   for (i=0; i < 128; ++i) {
532     hmput(map,k,k+1);
533     k += step;
534   }
535   for (j=first_step; j <= last_step; ++j) {
536     total += flush_cache();
537     mstats[0][j].start = __rdtsc();
538     for (q=i=0; i < 50000; ++i) {
539        total += hmget(map, q); // hit
540        if (++q == k) q = 0;
541     }
542     mstats[0][j].end = __rdtsc();
543     mstats[0][j].table_size = k;
544     total += flush_cache();
545     mstats[1][j].start = __rdtsc();
546     for (i=0; i < 50000; ++i) {
547        total += hmget(map, i+k); // miss
548     }
549     mstats[1][j].end = __rdtsc();
550     mstats[1][j].table_size = k;
551 
552     // expand table
553     for (i=0; i < (1<<(j>>4)); ++i) {
554       hmput(map, k,k+1);
555       k += step;
556     }
557   }
558 
559   t1 = __rdtsc();
560   measure();
561   hmfree(map);
562   length = end();
563   rdtsc_scale = length / (t1-t0) * 1000;
564 
565   for (j=first_step; j <= last_step; ++j) {
566     // time,table_size,numins,numhit,nummiss,numperflush
567     printf("%12.4f,%12d,0,50000,0,0\n", (mstats[0][j].end - mstats[0][j].start) * rdtsc_scale, mstats[0][j].table_size);
568   }
569   for (j=first_step; j <= last_step; ++j) {
570     printf("%12.4f,%12d,0,0,50000,0\n", (mstats[1][j].end - mstats[1][j].start) * rdtsc_scale, mstats[1][j].table_size);
571   }
572   return total;
573 }
574 
measure_worst_lookup4_a(int step_log2)575 int measure_worst_lookup4_a(int step_log2)
576 {
577   int total;
578   double length;
579   int i,j,k=0,q,worst_q,n,z,attempts;
580   int step = 1 << step_log2;
581   unsigned __int64 t0,t1;
582   unsigned __int64 m0,m1,worst;
583   struct { int key,value; } *map=NULL;
584   double rdtsc_scale;
585   begin();
586   t0 = __rdtsc();
587 
588   memset(mstats, 0, sizeof(mstats));
589   for (j=first_step; j <= last_step; ++j)
590     mstats[0][j].end = mstats[1][j].end = ~(unsigned __int64) 0;
591 
592   for(attempts=0; attempts < 2; ++attempts) {
593     k = 0;
594     stbds_rand_seed(0); // force us to get the same table every time
595     for (i=0; i < 128; ++i) {
596       hmput(map,k,k+1);
597       k += step;
598     }
599     for (j=first_step; j <= last_step; ++j) {
600       unsigned __int64 times[32];
601 
602       // find the worst hit time
603       for (z=0; z < 2; ++z) { // try the bisectioning measurement 4 times
604         worst = 0;
605         for (n=0; n < 10; ++n) { // test 400 keys total
606           // find the worst time to hit 20 keys
607           q=0;
608           worst_q = 0;
609           total += flush_cache();
610           m0 = __rdtsc();
611           for (i=0; i < 20; ++i) {
612             total += hmget(map, q); // hit
613             if (++q == k) q = 0;
614           }
615           m1 = __rdtsc();
616           // for each n, check if this is the worst lookup we've seen
617           if (m1 - m0 > worst) {
618             worst = m1-m0;
619             worst_q = q - i;
620             if (worst_q < 0) q += k;
621           }
622         }
623         // after 400 keys, take the worst 20 keys, and try each one
624         worst = 0;
625         q = worst_q;
626         for (i=0; i < 20; ++i) {
627           total += flush_cache();
628           m0 = __rdtsc();
629           total += hmget(map, q); // hit
630           m1 = __rdtsc();
631           if (m1 - m0 > worst)
632             worst = m1-m0;
633           if (++q == k) q = 0;
634         }
635         times[z] = worst;
636       }
637       // find the worst time in the bunch
638       worst = 0;
639       for (i=0; i < z; ++i)
640         if (times[i] > worst)
641           worst = times[i];
642       // take the best of 'attempts', to discard outliers
643       if (worst < mstats[0][j].end)
644         mstats[0][j].end = worst;
645       mstats[0][j].start = 0;
646       mstats[0][j].table_size = k >> step_log2;
647 
648       // find the worst miss time
649       for (z=0; z < 8; ++z) { // try the bisectioning measurement 8 times
650         worst = 0;
651         for (n=0; n < 20; ++n) { // test 400 keys total
652           // find the worst time to hit 20 keys
653           q=k;
654           worst_q = 0;
655           total += flush_cache();
656           m0 = __rdtsc();
657           for (i=0; i < 20; ++i) {
658             total += hmget(map, q); // hit
659           }
660           m1 = __rdtsc();
661           // for each n, check if this is the worst lookup we've seen
662           if (m1 - m0 > worst) {
663             worst = m1-m0;
664             worst_q = q - i;
665           }
666         }
667         // after 400 keys, take the worst 20 keys, and try each one
668         worst = 0;
669         q = worst_q;
670         for (i=0; i < 20; ++i) {
671           total += flush_cache();
672           m0 = __rdtsc();
673           total += hmget(map, q); // hit
674           m1 = __rdtsc();
675           if (m1 - m0 > worst)
676             worst = m1-m0;
677         }
678         times[z] = worst;
679       }
680       // find the worst time in the bunch
681       worst = 0;
682       for (i=0; i < z; ++i)
683         if (times[i] > worst)
684           worst = times[i];
685       if (worst < mstats[1][j].end)
686         mstats[1][j].end = worst;
687       mstats[1][j].start = 0;
688       mstats[1][j].table_size = k >> step_log2;
689 
690       // expand table
691       for (i=0; i < (1<<(j>>4)); ++i) {
692         hmput(map, k,k+1);
693         k += step;
694       }
695     }
696     hmfree(map);
697   }
698 
699   t1 = __rdtsc();
700   measure();
701   length = end();
702   rdtsc_scale = length / (t1-t0) * 1000;
703 
704   for (j=first_step; j <= last_step; ++j) {
705     printf("%12.4f,%12d,0,1,0,1\n", (mstats[0][j].end - mstats[0][j].start) * rdtsc_scale, mstats[0][j].table_size);
706   }
707   for (j=first_step; j <= last_step; ++j) {
708     printf("%12.4f,%12d,0,0,1,1\n", (mstats[1][j].end - mstats[1][j].start) * rdtsc_scale, mstats[1][j].table_size);
709   }
710   return total;
711 }
712 
measure_worst_lookup4_b(int step_log2)713 int measure_worst_lookup4_b(int step_log2)
714 {
715   int total;
716   double length;
717   int i,j,k=0,q,worst_q,n,z,attempts;
718   int step = 1 << step_log2;
719   unsigned __int64 t0,t1;
720   unsigned __int64 m0,m1,worst;
721   struct { int key,value; } *map=NULL;
722   double rdtsc_scale;
723   begin();
724   t0 = __rdtsc();
725 
726   memset(mstats, 0, sizeof(mstats));
727   for (j=first_step; j <= last_step; ++j)
728     mstats[0][j].end = mstats[1][j].end = ~(unsigned __int64) 0;
729 
730   k = 0;
731   stbds_rand_seed(0); // force us to get the same table every time
732   for (i=0; i < 128; ++i) {
733     hmput(map,k,k+1);
734     k += step;
735   }
736   for (j=first_step; j <= last_step; ++j) {
737     unsigned __int64 times[32];
738 
739     // find the worst hit time
740     for (z=0; z < 8; ++z) { // try this 8 times
741       worst = 0;
742       q=0;
743       for (i=0; i < 5000; ++i) {
744         total += hmget(map, q);
745         m0 = __rdtsc();
746         total += hmget(map, q);
747         m1 = __rdtsc();
748         if (m1 - m0 > worst) {
749           worst = m1-m0;
750           worst_q = q - i;
751         }
752         if (++q == k) q = 0;
753       }
754       // now retry with the worst one, but find the shortest time for it
755       worst = ~(unsigned __int64) 0;
756       for (i=0; i < 4; ++i) {
757         total += flush_cache();
758         m0 = __rdtsc();
759         total += hmget(map,worst_q);
760         m1 = __rdtsc();
761         if (m1-m0 < worst)
762           worst = m1-m0;
763       }
764       times[z] = worst;
765     }
766 
767     // find the worst of those
768     worst = 0;
769     for (i=0; i < z; ++i)
770       if (times[i] > worst)
771         worst = times[i];
772     mstats[0][j].start = 0;
773     mstats[0][j].end = worst;
774     mstats[0][j].table_size = k;
775 
776     // find the worst miss time
777     for (z=0; z < 8; ++z) { // try this 8 times
778       worst = 0;
779       q=k;
780       for (i=0; i < 5000; ++i) {
781         total += hmget(map, q);
782         m0 = __rdtsc();
783         total += hmget(map, q);
784         m1 = __rdtsc();
785         if (m1 - m0 > worst) {
786           worst = m1-m0;
787           worst_q = q - i;
788         }
789         //printf("%6llu ", m1-m0);
790       }
791       // now retry with the worst one, but find the shortest time for it
792       worst = ~(unsigned __int64) 0;
793       for (i=0; i < 4; ++i) {
794         total += flush_cache();
795         m0 = __rdtsc();
796         total += hmget(map,worst_q);
797         m1 = __rdtsc();
798         if (m1-m0 < worst)
799           worst = m1-m0;
800       }
801       times[z] = worst;
802     }
803 
804     // find the worst of those
805     worst = 0;
806     for (i=0; i < z; ++i)
807       if (times[i] > worst)
808         worst = times[i];
809     mstats[1][j].start = 0;
810     mstats[1][j].end = worst;
811     mstats[1][j].table_size = k;
812 
813     // expand table
814     for (i=0; i < (1<<(j>>4)); ++i) {
815       hmput(map, k,k+1);
816       k += step;
817     }
818   }
819   hmfree(map);
820 
821   t1 = __rdtsc();
822   measure();
823   length = end();
824   rdtsc_scale = length / (t1-t0) * 1000;
825 
826   for (j=first_step+1; j <= last_step; ++j) {
827     printf("%12.4f,%12d,0,1,0,1\n", (mstats[0][j].end - mstats[0][j].start) * rdtsc_scale, mstats[0][j].table_size);
828   }
829   for (j=first_step+1; j <= last_step; ++j) {
830     printf("%12.4f,%12d,0,0,1,1\n", (mstats[1][j].end - mstats[1][j].start) * rdtsc_scale, mstats[1][j].table_size);
831   }
832   return total;
833 }
834 
measure_uncached_lookup4(int step_log2)835 int measure_uncached_lookup4(int step_log2)
836 {
837   int total;
838   double length;
839   int i,j,k=0,q;
840   int step = 1 << step_log2;
841   unsigned __int64 t0,t1;
842   struct { int key,value; } *map=NULL;
843   double rdtsc_scale;
844   begin();
845   t0 = __rdtsc();
846 
847   map = NULL;
848   for (i=0; i < 128; ++i) {
849     hmput(map,k,k+1);
850     k += step;
851   }
852   for (j=first_step; j <= last_step; ++j) {
853     mstats[0][j].start = __rdtsc();
854     mstats[0][j].end = 0;
855     for (q=i=0; i < 512; ++i) {
856       if ((i & 3) == 0) {
857         mstats[0][j].end += __rdtsc();
858         total += flush_cache();
859         mstats[0][j].start += __rdtsc();
860       }
861       total += hmget(map, q); // hit
862       if (++q == k) q = 0;
863     }
864     mstats[0][j].end += __rdtsc();
865     mstats[0][j].table_size = k;
866     total += flush_cache();
867     mstats[1][j].end = 0;
868     mstats[1][j].start = __rdtsc();
869     for (i=0; i < 512; ++i) {
870       if ((i & 3) == 0) {
871         mstats[1][j].end += __rdtsc();
872         total += flush_cache();
873         mstats[1][j].start += __rdtsc();
874       }
875       total += hmget(map, i+k); // miss
876     }
877     mstats[1][j].end += __rdtsc();
878     mstats[1][j].table_size = k;
879 
880     // expand table
881     for (i=0; i < (1<<(j>>4)); ++i) {
882       hmput(map, k,k+1);
883       k += step;
884     }
885   }
886   hmfree(map);
887 
888   t1 = __rdtsc();
889   measure();
890   length = end();
891   rdtsc_scale = length / (t1-t0) * 1000;
892 
893   for (j=first_step; j <= last_step; ++j) {
894     printf("%12.4f,%12d,0,512,0,4\n", (mstats[0][j].end - mstats[0][j].start) * rdtsc_scale, mstats[0][j].table_size);
895   }
896   for (j=first_step; j <= last_step; ++j) {
897     printf("%12.4f,%12d,0,0,512,4\n", (mstats[1][j].end - mstats[1][j].start) * rdtsc_scale, mstats[1][j].table_size);
898   }
899   return total;
900 }
901 
902 
903 
904 
main(int arg,char ** argv)905 int main(int arg, char **argv)
906 {
907   int n,s,w;
908   double worst = 0;
909 
910   printf("# size_t=%d,", (int) sizeof(size_t));
911 
912 // number of cache-lines
913 #ifdef STBDS_SMALL_BUCKET
914   printf("cacheline=%d,", 1);
915 #else
916   printf("cacheline=%d,", sizeof(size_t)==8 ? 2 : 1);
917 #endif
918 #ifdef STBDS_FLUSH_CACHE
919   printf("%d,", (int) stbds_log2(STBDS_FLUSH_CACHE));
920 #else
921   printf("0,");
922 #endif
923 #ifdef STBDS_BUCKET_START    // don't bother offseting differently within bucket for different hash values
924   printf("STBDS_BUCKET_START,");
925 #else
926   printf(",");
927 #endif
928 #ifdef STBDS_SIPHASH_2_4
929   printf("STBDS_SIPHASH_2_4,");
930 #else
931   printf(",");
932 #endif
933   printf("\n");
934 
935   measure_worst_lookup4_b(0);
936   //measure_worst_lookup4_a(0);
937   measure_average_lookup4(0);
938   measure_uncached_lookup4(0);
939   measure_build4(0);
940   return 0;
941 
942 #if 0
943   begin(); for (n=0; n < 2000; ++n) { build_stb(2000,0,0,1);          } end(); printf("  // %7.2fms :      2,000 inserts creating 2K table\n", timer);
944   begin(); for (n=0; n <  500; ++n) { build_stb(20000,0,0,1);         } end(); printf("  // %7.2fms :     20,000 inserts creating 20K table\n", timer);
945   begin(); for (n=0; n <  100; ++n) { build_stb(200000,0,0,1);        } end(); printf("  // %7.2fms :    200,000 inserts creating 200K table\n", timer);
946   begin(); for (n=0; n <   10; ++n) { build_stb(2000000,0,0,1);       } end(); printf("  // %7.2fms :  2,000,000 inserts creating 2M table\n", timer);
947   begin(); for (n=0; n <    5; ++n) { build_stb(20000000,0,0,1);      } end(); printf("  // %7.2fms : 20,000,000 inserts creating 20M table\n", timer);
948 #endif
949 
950 #if 0
951   begin(); for (n=0; n < 2000; ++n) { churn32(2000,0,0,1);          } end(); printf("  // %7.2fms :      2,000 inserts creating 2K table w/ 32-byte key\n", timer);
952   begin(); for (n=0; n <  500; ++n) { churn32(20000,0,0,1);         } end(); printf("  // %7.2fms :     20,000 inserts creating 20K table w/ 32-byte key\n", timer);
953   begin(); for (n=0; n <  100; ++n) { churn32(200000,0,0,1);        } end(); printf("  // %7.2fms :    200,000 inserts creating 200K table w/ 32-byte key\n", timer);
954   begin(); for (n=0; n <   10; ++n) { churn32(2000000,0,0,1);       } end(); printf("  // %7.2fms :  2,000,000 inserts creating 2M table w/ 32-byte key\n", timer);
955   begin(); for (n=0; n <    5; ++n) { churn32(20000000,0,0,1);      } end(); printf("  // %7.2fms : 20,000,000 inserts creating 20M table w/ 32-byte key\n", timer);
956 
957   begin(); for (n=0; n < 2000; ++n) { churn256(2000,0,0,1);          } end(); printf("  // %7.2fms :      2,000 inserts creating 2K table w/ 256-byte key\n", timer);
958   begin(); for (n=0; n <  500; ++n) { churn256(20000,0,0,1);         } end(); printf("  // %7.2fms :     20,000 inserts creating 20K table w/ 256-byte key\n", timer);
959   begin(); for (n=0; n <  100; ++n) { churn256(200000,0,0,1);        } end(); printf("  // %7.2fms :    200,000 inserts creating 200K table w/ 256-byte key\n", timer);
960   begin(); for (n=0; n <   10; ++n) { churn256(2000000,0,0,1);       } end(); printf("  // %7.2fms :  2,000,000 inserts creating 2M table w/ 256-byte key\n", timer);
961   begin(); for (n=0; n <    5; ++n) { churn256(20000000,0,0,1);      } end(); printf("  // %7.2fms : 20,000,000 inserts creating 20M table w/ 256-byte key\n", timer);
962 #endif
963 
964   begin(); for (n=0; n <   20; ++n) { multisearch_stb(2000,0,2000,1,1000);    } end(); printf("  // %7.2fms : 2,000,000 hits on 1,000   2K table w/ 4-byte key\n", timer);
965   begin(); for (n=0; n <   10; ++n) { multisearch_stb(20000,0,2000,1,1000);   } end(); printf("  // %7.2fms : 2,000,000 hits on 1,000  20K table w/ 4-byte key\n", timer);
966   begin(); for (n=0; n <    6; ++n) { multisearch_stb(200000,0,2000,1,1000);  } end(); printf("  // %7.2fms : 2,000,000 hits on 1,000 200K table w/ 4-byte key\n", timer);
967   begin(); for (n=0; n <    2; ++n) { multisearch_stb(2000000,0,20000,1,100); } end(); printf("  // %7.2fms : 2,000,000 hits on   100   2M table w/ 4-byte key\n", timer);
968 
969   begin(); for (n=0; n <   20; ++n) { multisearch    (2000,0,2000,1,1000);    } end(); printf("  // %7.2fms : 2,000,000 hits on 1,000   2K table w/ 4-byte key\n", timer);
970   begin(); for (n=0; n <   10; ++n) { multisearch    (20000,0,2000,1,1000);   } end(); printf("  // %7.2fms : 2,000,000 hits on 1,000  20K table w/ 4-byte key\n", timer);
971   begin(); for (n=0; n <    6; ++n) { multisearch    (200000,0,2000,1,1000);  } end(); printf("  // %7.2fms : 2,000,000 hits on 1,000 200K table w/ 4-byte key\n", timer);
972   begin(); for (n=0; n <    2; ++n) { multisearch    (2000000,0,20000,1,100); } end(); printf("  // %7.2fms : 2,000,000 hits on   100   2M table w/ 4-byte key\n", timer);
973 
974 
975 #if 1
976   begin(); for (n=0; n <    2; ++n) { multibuild_stb(2000,0,0,1,10000); } end(); printf("  // %7.2fms : 20,000,000 inserts creating 10,000   2K table w/ 4-byte key\n", timer);
977   begin(); for (n=0; n <    2; ++n) { multibuild_stb(20000,0,0,1,1000); } end(); printf("  // %7.2fms : 20,000,000 inserts creating  1,000  20K table w/ 4-byte key\n", timer);
978   begin(); for (n=0; n <    2; ++n) { multibuild_stb(200000,0,0,1,100); } end(); printf("  // %7.2fms : 20,000,000 inserts creating    100 200K table w/ 4-byte key\n", timer);
979   begin(); for (n=0; n <    2; ++n) { multibuild_stb(2000000,0,0,1,10); } end(); printf("  // %7.2fms : 20,000,000 inserts creating     10   2M table w/ 4-byte key\n", timer);
980 
981   begin(); for (n=0; n <    2; ++n) { multichurn4(2000,0,0,1,10000); } end(); printf("  // %7.2fms : 20,000,000 inserts creating 10,000   2K table w/ 4-byte key\n", timer);
982   begin(); for (n=0; n <    2; ++n) { multichurn4(20000,0,0,1,1000); } end(); printf("  // %7.2fms : 20,000,000 inserts creating  1,000  20K table w/ 4-byte key\n", timer);
983   begin(); for (n=0; n <    2; ++n) { multichurn4(200000,0,0,1,100); } end(); printf("  // %7.2fms : 20,000,000 inserts creating    100 200K table w/ 4-byte key\n", timer);
984   begin(); for (n=0; n <    2; ++n) { multichurn4(2000000,0,0,1,10); } end(); printf("  // %7.2fms : 20,000,000 inserts creating     10   2M table w/ 4-byte key\n", timer);
985 #endif
986 
987   begin(); for (n=0; n < 2000; ++n) { build(2000,0,0,1);          } end(); printf("  // %7.2fms :      2,000 inserts creating 2K table w/ 4-byte key\n", timer);
988   begin(); for (n=0; n <  500; ++n) { build(20000,0,0,1);         } end(); printf("  // %7.2fms :     20,000 inserts creating 20K table w/ 4-byte key\n", timer);
989   begin(); for (n=0; n <  100; ++n) { build(200000,0,0,1);        } end(); printf("  // %7.2fms :    200,000 inserts creating 200K table w/ 4-byte key\n", timer);
990   begin(); for (n=0; n <   10; ++n) { build(2000000,0,0,1);       } end(); printf("  // %7.2fms :  2,000,000 inserts creating 2M table w/ 4-byte key\n", timer);
991   begin(); for (n=0; n <    5; ++n) { build(20000000,0,0,1);      } end(); printf("  // %7.2fms : 20,000,000 inserts creating 20M table w/ 4-byte key\n", timer);
992 
993   begin(); for (n=0; n < 2000; ++n) { churn8(2000,0,0,1);          } end(); printf("  // %7.2fms :      2,000 inserts creating 2K table w/ 8-byte key\n", timer);
994   begin(); for (n=0; n <  500; ++n) { churn8(20000,0,0,1);         } end(); printf("  // %7.2fms :     20,000 inserts creating 20K table w/ 8-byte key\n", timer);
995   begin(); for (n=0; n <  100; ++n) { churn8(200000,0,0,1);        } end(); printf("  // %7.2fms :    200,000 inserts creating 200K table w/ 8-byte key\n", timer);
996   begin(); for (n=0; n <   10; ++n) { churn8(2000000,0,0,1);       } end(); printf("  // %7.2fms :  2,000,000 inserts creating 2M table w/ 8-byte key\n", timer);
997   begin(); for (n=0; n <    5; ++n) { churn8(20000000,0,0,1);      } end(); printf("  // %7.2fms : 20,000,000 inserts creating 20M table w/ 8-byte key\n", timer);
998 
999   begin(); for (n=0; n <   60; ++n) { churn_skip(2000,2100,5000);            } end(); printf("  // %7.2fms : 500,000 inserts & deletes in 2K table\n", timer);
1000   begin(); for (n=0; n <   30; ++n) { churn_skip(20000,21000,500);           } end(); printf("  // %7.2fms : 500,000 inserts & deletes in 20K table\n", timer);
1001   begin(); for (n=0; n <   15; ++n) { churn_skip(200000,201000,500);         } end(); printf("  // %7.2fms : 500,000 inserts & deletes in 200K table\n", timer);
1002   begin(); for (n=0; n <    8; ++n) { churn_skip(2000000,2001000,500);       } end(); printf("  // %7.2fms : 500,000 inserts & deletes in 2M table\n", timer);
1003   begin(); for (n=0; n <    5; ++n) { churn_skip(20000000,20001000,500);     } end(); printf("  // %7.2fms : 500,000 inserts & deletes in 20M table\n", timer);
1004   begin(); for (n=0; n <    1; ++n) { churn_skip(200000000u,200001000u,500); } end(); printf("  // %7.2fms : 500,000 inserts & deletes in 200M table\n", timer);
1005   // even though the above measures a roughly fixed amount of work, we still have to build the table n times, hence the fewer measurements each time
1006 
1007   begin(); for (n=0; n <   60; ++n) { churn_skip(1000,3000,250);             } end(); printf("  // %7.2fms :    500,000 inserts & deletes in 2K table\n", timer);
1008   begin(); for (n=0; n <   15; ++n) { churn_skip(10000,30000,25);            } end(); printf("  // %7.2fms :    500,000 inserts & deletes in 20K table\n", timer);
1009   begin(); for (n=0; n <    7; ++n) { churn_skip(100000,300000,10);          } end(); printf("  // %7.2fms :  2,000,000 inserts & deletes in 200K table\n", timer);
1010   begin(); for (n=0; n <    2; ++n) { churn_skip(1000000,3000000,10);        } end(); printf("  // %7.2fms : 20,000,000 inserts & deletes in 2M table\n", timer);
1011 
1012   // search for bad intervals.. in practice this just seems to measure execution variance
1013   for (s = 2; s < 64; ++s) {
1014     begin(); for (n=0; n < 50; ++n) { build(200000,0,0,s); } end();
1015     if (timer > worst) {
1016       worst = timer;
1017       w = s;
1018     }
1019   }
1020   for (; s <= 1024; s *= 2) {
1021     begin(); for (n=0; n < 50; ++n) { build(200000,0,0,s); } end();
1022     if (timer > worst) {
1023       worst = timer;
1024       w = s;
1025     }
1026   }
1027   printf("  // %7.2fms(%d)   : Worst time from inserting 200,000 items with spacing %d.\n", worst, w, w);
1028 
1029   return 0;
1030 }
1031 #endif
1032