1 #ifdef DS_PERF
2 #define _CRT_SECURE_NO_WARNINGS
3 #define _CRT_NONSTDC_NO_DEPRECATE
4 #define _CRT_NON_CONFORMING_SWPRINTFS
5 
6 
7 //#define STBDS_INTERNAL_SMALL_BUCKET    // make 64-bit bucket fit both keys and hash bits
8 //#define STBDS_SIPHASH_2_4     // performance test 1_3 against 2_4
9 //#define STBDS_INTERNAL_BUCKET_START    // don't bother offseting differently within bucket for different hash values
10 //#define STBDS_FLUSH_CACHE  (1u<<20) // do this much memory traffic to flush the cache between some benchmarking measurements
11 
12 #define WIN32_LEAN_AND_MEAN
13 #include <windows.h>
14 #define STB_DEFINE
15 #define STB_NO_REGISTRY
16 #include "../stb.h"
17 #include <stdio.h>
18 #endif
19 
20 #ifdef DS_TEST
21 #define STBDS_UNIT_TESTS
22 #define STBDS_SMALL_BUCKET
23 #endif
24 
25 #ifdef DS_STATS
26 #define STBDS_STATISTICS
27 #endif
28 
29 #ifndef DS_PERF
30 #define STBDS_ASSERT assert
31 #include <assert.h>
32 #endif
33 
34 #define STB_DS_IMPLEMENTATION
35 #include "../stb_ds.h"
36 
37 size_t churn_inserts, churn_deletes;
38 
churn(int a,int b,int count)39 void churn(int a, int b, int count)
40 {
41   struct { int key,value; } *map=NULL;
42   int i,j,n,k;
43   for (i=0; i < a; ++i)
44     hmput(map,i,i+1);
45   for (n=0; n < count; ++n) {
46     for (j=a; j < b; ++j,++i) {
47       hmput(map,i,i+1);
48     }
49     assert(hmlen(map) == b);
50     for (j=a; j < b; ++j) {
51       k=i-j-1;
52       k = hmdel(map,k);
53       assert(k != 0);
54     }
55     assert(hmlen(map) == a);
56   }
57   hmfree(map);
58   churn_inserts = i;
59   churn_deletes = (b-a) * n;
60 }
61 
62 #ifdef DS_TEST
63 #include <stdio.h>
main(int argc,char ** argv)64 int main(int argc, char **argv)
65 {
66   stbds_unit_tests();
67   churn(0,100,1);
68   churn(3,7,50000);
69   churn(3,15,50000);
70   churn(16, 48, 25000);
71   churn(10, 15, 25000);
72   churn(200,500, 5000);
73   churn(2000,5000, 500);
74   churn(20000,50000, 50);
75   printf("Ok!");
76   return 0;
77 }
78 #endif
79 
80 #ifdef DS_STATS
81 #define MAX(a,b) ((a) > (b) ? (a) : (b))
82 size_t max_hit_probes, max_miss_probes, total_put_probes, total_miss_probes, churn_misses;
churn_stats(int a,int b,int count)83 void churn_stats(int a, int b, int count)
84 {
85   struct { int key,value; } *map=NULL;
86   int i,j,n,k;
87   churn_misses = 0;
88   for (i=0; i < a; ++i) {
89     hmput(map,i,i+1);
90     max_hit_probes = MAX(max_hit_probes, stbds_hash_probes);
91     total_put_probes += stbds_hash_probes;
92     stbds_hash_probes = 0;
93   }
94 
95   for (n=0; n < count; ++n) {
96     for (j=a; j < b; ++j,++i) {
97       hmput(map,i,i+1);
98       max_hit_probes = MAX(max_hit_probes, stbds_hash_probes);
99       total_put_probes += stbds_hash_probes;
100       stbds_hash_probes = 0;
101     }
102     for (j=0; j < (b-a)*10; ++j) {
103       k=i+j;
104       (void) hmgeti(map,k); // miss
105       max_miss_probes = MAX(max_miss_probes, stbds_hash_probes);
106       total_miss_probes += stbds_hash_probes;
107       stbds_hash_probes = 0;
108       ++churn_misses;
109     }
110     assert(hmlen(map) == b);
111     for (j=a; j < b; ++j) {
112       k=i-j-1;
113       k = hmdel(map,k);
114       stbds_hash_probes = 0;
115       assert(k);
116     }
117     assert(hmlen(map) == a);
118   }
119   hmfree(map);
120   churn_inserts = i;
121   churn_deletes = (b-a) * n;
122 }
123 
reset_stats(void)124 void reset_stats(void)
125 {
126   stbds_array_grow=0,
127   stbds_hash_grow=0;
128   stbds_hash_shrink=0;
129   stbds_hash_rebuild=0;
130   stbds_hash_probes=0;
131   stbds_hash_alloc=0;
132   stbds_rehash_probes=0;
133   stbds_rehash_items=0;
134   max_hit_probes = 0;
135   max_miss_probes = 0;
136   total_put_probes = 0;
137   total_miss_probes = 0;
138 }
139 
print_churn_probe_stats(char * str)140 void print_churn_probe_stats(char *str)
141 {
142   printf("Probes: %3d max hit, %3d max miss, %4.2f avg hit, %4.2f avg miss: %s\n",
143     (int) max_hit_probes, (int) max_miss_probes, (float) total_put_probes / churn_inserts, (float) total_miss_probes / churn_misses, str);
144   reset_stats();
145 }
146 
main(int arg,char ** argv)147 int main(int arg, char **argv)
148 {
149   churn_stats(0,500000,1); print_churn_probe_stats("Inserting 500000 items");
150   churn_stats(0,500000,1); print_churn_probe_stats("Inserting 500000 items");
151   churn_stats(0,500000,1); print_churn_probe_stats("Inserting 500000 items");
152   churn_stats(0,500000,1); print_churn_probe_stats("Inserting 500000 items");
153   churn_stats(49000,50000,500); print_churn_probe_stats("Deleting/Inserting 500000 items");
154   churn_stats(49000,50000,500); print_churn_probe_stats("Deleting/Inserting 500000 items");
155   churn_stats(49000,50000,500); print_churn_probe_stats("Deleting/Inserting 500000 items");
156   churn_stats(49000,50000,500); print_churn_probe_stats("Deleting/Inserting 500000 items");
157   return 0;
158 }
159 #endif
160 
161 
162 #ifdef DS_PERF
163 //char *strdup(const char *foo) { return 0; }
164 //int stricmp(const char *a, const char *b) { return 0; }
165 //int strnicmp(const char *a, const char *b, size_t n) { return 0; }
166 
167 unsigned __int64 t0, xsum, mn,mx,count;
begin(void)168 void begin(void)
169 {
170   LARGE_INTEGER m;
171   QueryPerformanceCounter(&m);
172   t0 = m.QuadPart;
173   xsum = 0;
174   count = 0;
175   mx = 0;
176   mn = ~(unsigned __int64) 0;
177 }
178 
measure(void)179 void measure(void)
180 {
181   unsigned __int64 t1, t;
182   LARGE_INTEGER m;
183   QueryPerformanceCounter(&m);
184   t1 = m.QuadPart;
185   t = t1-t0;
186   if (t1 < t0)
187     printf("ALERT: QueryPerformanceCounter was unordered!\n");
188   if (t < mn) mn = t;
189   if (t > mx) mx = t;
190   xsum += t;
191   ++count;
192   t0 = t1;
193 }
194 
dont_measure(void)195 void dont_measure(void)
196 {
197   LARGE_INTEGER m;
198   QueryPerformanceCounter(&m);
199   t0 = m.QuadPart;
200 }
201 
202 double timer;
end(void)203 double end(void)
204 {
205   LARGE_INTEGER m;
206   QueryPerformanceFrequency(&m);
207 
208   if (count > 3) {
209     // discard the highest and lowest
210     xsum -= mn;
211     xsum -= mx;
212     count -= 2;
213   }
214   timer = (double) (xsum) / count / m.QuadPart * 1000;
215   return timer;
216 }
217 
build(int a,int b,int count,int step)218 void build(int a, int b, int count, int step)
219 {
220   struct { int key,value; } *map=NULL;
221   int i,n;
222   for (i=0; i < a; ++i) {
223     n = i*step;
224     hmput(map,n,i+1);
225   }
226   measure();
227   churn_inserts = i;
228   hmfree(map);
229   dont_measure();
230 }
231 
232 #ifdef STB__INCLUDE_STB_H
build_stb(int a,int b,int count,int step)233 void build_stb(int a, int b, int count, int step)
234 {
235   stb_idict *d = stb_idict_new_size(8);
236   int i;
237   for (i=0; i < a; ++i)
238     stb_idict_add(d, i*step, i+1);
239   measure();
240   churn_inserts = i;
241   stb_idict_destroy(d);
242   dont_measure();
243 }
244 
multibuild_stb(int a,int b,int count,int step,int tables)245 void multibuild_stb(int a, int b, int count, int step, int tables)
246 {
247   stb_idict *d[50000];
248   int i,q;
249   for (q=0; q < tables; ++q)
250     d[q] = stb_idict_new_size(8);
251   dont_measure();
252   for (i=0; i < a; ++i)
253     for (q=0; q < tables; ++q)
254       stb_idict_add(d[q], i*step+q*771, i+1);
255   measure();
256   churn_inserts = i;
257   for (q=0; q < tables; ++q)
258     stb_idict_destroy(d[q]);
259   dont_measure();
260 }
261 
multisearch_stb(int a,int start,int end,int step,int tables)262 int multisearch_stb(int a, int start, int end, int step, int tables)
263 {
264   stb_idict *d[50000];
265   int i,q,total=0,v;
266   for (q=0; q < tables; ++q)
267     d[q] = stb_idict_new_size(8);
268   for (q=0; q < tables; ++q)
269     for (i=0; i < a; ++i)
270       stb_idict_add(d[q], i*step+q*771, i+1);
271   dont_measure();
272   for (i=start; i < end; ++i)
273     for (q=0; q < tables; ++q)
274       if (stb_idict_get_flag(d[q], i*step+q*771, &v))
275         total += v;
276   measure();
277   churn_inserts = i;
278   for (q=0; q < tables; ++q)
279     stb_idict_destroy(d[q]);
280   dont_measure();
281   return total;
282 }
283 #endif
284 
multisearch(int a,int start,int end,int step,int tables)285 int multisearch(int a, int start, int end, int step, int tables)
286 {
287   struct { int key,value; } *hash[50000];
288   int i,q,total=0;
289   for (q=0; q < tables; ++q)
290     hash[q] = NULL;
291   for (q=0; q < tables; ++q)
292     for (i=0; i < a; ++i)
293       hmput(hash[q], i*step+q*771, i+1);
294   dont_measure();
295   for (i=start; i < end; ++i)
296     for (q=0; q < tables; ++q)
297       total += hmget(hash[q], i*step+q*771);
298   measure();
299   churn_inserts = i;
300   for (q=0; q < tables; ++q)
301     hmfree(hash[q]);
302   dont_measure();
303   return total;
304 }
305 
churn_skip(unsigned int a,unsigned int b,int count)306 void churn_skip(unsigned int a, unsigned int b, int count)
307 {
308   struct { unsigned int key,value; } *map=NULL;
309   unsigned int i,j,n,k;
310   for (i=0; i < a; ++i)
311     hmput(map,i,i+1);
312   dont_measure();
313   for (n=0; n < count; ++n) {
314     for (j=a; j < b; ++j,++i) {
315       hmput(map,i,i+1);
316     }
317     assert(hmlen(map) == b);
318     for (j=a; j < b; ++j) {
319       k=i-j-1;
320       k = hmdel(map,k);
321       assert(k != 0);
322     }
323     assert(hmlen(map) == a);
324   }
325   measure();
326   churn_inserts = i;
327   churn_deletes = (b-a) * n;
328   hmfree(map);
329   dont_measure();
330 }
331 
332 typedef struct { int n[8]; } str32;
churn32(int a,int b,int count,int include_startup)333 void churn32(int a, int b, int count, int include_startup)
334 {
335   struct { str32 key; int value; } *map=NULL;
336   int i,j,n;
337   str32 key = { 0 };
338   for (i=0; i < a; ++i) {
339     key.n[0] = i;
340     hmput(map,key,i+1);
341   }
342   if (!include_startup)
343     dont_measure();
344   for (n=0; n < count; ++n) {
345     for (j=a; j < b; ++j,++i) {
346       key.n[0] = i;
347       hmput(map,key,i+1);
348     }
349     assert(hmlen(map) == b);
350     for (j=a; j < b; ++j) {
351       key.n[0] = i-j-1;
352       hmdel(map,key);
353     }
354     assert(hmlen(map) == a);
355   }
356   measure();
357   hmfree(map);
358   churn_inserts = i;
359   churn_deletes = (b-a) * n;
360   dont_measure();
361 }
362 
363 typedef struct { int n[32]; } str256;
churn256(int a,int b,int count,int include_startup)364 void churn256(int a, int b, int count, int include_startup)
365 {
366   struct { str256 key; int value; } *map=NULL;
367   int i,j,n;
368   str256 key = { 0 };
369   for (i=0; i < a; ++i) {
370     key.n[0] = i;
371     hmput(map,key,i+1);
372   }
373   if (!include_startup)
374     dont_measure();
375   for (n=0; n < count; ++n) {
376     for (j=a; j < b; ++j,++i) {
377       key.n[0] = i;
378       hmput(map,key,i+1);
379     }
380     assert(hmlen(map) == b);
381     for (j=a; j < b; ++j) {
382       key.n[0] = i-j-1;
383       hmdel(map,key);
384     }
385     assert(hmlen(map) == a);
386   }
387   measure();
388   hmfree(map);
389   churn_inserts = i;
390   churn_deletes = (b-a) * n;
391   dont_measure();
392 }
393 
churn8(int a,int b,int count,int include_startup)394 void churn8(int a, int b, int count, int include_startup)
395 {
396   struct { size_t key,value; } *map=NULL;
397   int i,j,n,k;
398   for (i=0; i < a; ++i)
399     hmput(map,i,i+1);
400   if (!include_startup)
401     dont_measure();
402   for (n=0; n < count; ++n) {
403     for (j=a; j < b; ++j,++i) {
404       hmput(map,i,i+1);
405     }
406     assert(hmlen(map) == b);
407     for (j=a; j < b; ++j) {
408       k=i-j-1;
409       k = hmdel(map,k);
410       assert(k != 0);
411     }
412     assert(hmlen(map) == a);
413   }
414   measure();
415   hmfree(map);
416   churn_inserts = i;
417   churn_deletes = (b-a) * n;
418   dont_measure();
419 }
420 
multichurn4(int a,int b,int count,int include_startup,int tables)421 void multichurn4(int a, int b, int count, int include_startup, int tables)
422 {
423   struct { int key,value; } *map[50000];
424   int i,j,n,k,q;
425   for (q=0; q < tables; ++q)
426     map[q] = NULL;
427   dont_measure();
428 
429   for (i=0; i < a; ++i)
430     for (q=0; q < tables; ++q)
431       hmput(map[q],i,i+1);
432   if (!include_startup)
433     dont_measure();
434   for (n=0; n < count; ++n) {
435     for (j=a; j < b; ++j,++i) {
436       for (q=0; q < tables; ++q)
437         hmput(map[q],i,i+1);
438     }
439     assert(hmlen(map[0]) == b);
440     for (j=a; j < b; ++j) {
441       k=i-j-1;
442       for (q=0; q < tables; ++q)
443         k = hmdel(map[q],k);
444       assert(k != 0);
445     }
446     assert(hmlen(map[0]) == a);
447   }
448   measure();
449   for (q=0; q < tables; ++q)
450     hmfree(map[q]);
451   churn_inserts = i * tables;
452   churn_deletes = (b-a) * n * tables;
453   dont_measure();
454 }
455 
456 struct {
457   unsigned __int64 start;
458   unsigned __int64 end;
459   int    table_size;
460 } mstats[32][4000];
461 
462 const int first_step = 64;
463 const int last_step = 384-48; // 32M
464 
measure_build4(int step_log2)465 void measure_build4(int step_log2)
466 {
467   double length;
468   int i,j,k=0;
469   int step = 1 << step_log2;
470   unsigned __int64 t0,t1;
471   struct { int key,value; } *map=NULL;
472   double rdtsc_scale;
473   begin();
474   t0 = __rdtsc();
475 
476   mstats[0][0].start = __rdtsc();
477   for (i=0; i < 256; ++i) {
478     hmput(map,k,k+1);
479     k += step;
480   }
481   mstats[0][first_step-1].end = __rdtsc();
482   mstats[0][first_step-1].table_size = k >> step_log2;
483   for (j=first_step; j < last_step; ++j) {
484     for (i=0; i < (1<<(j>>4)); ++i) {
485       hmput(map, k,k+1);
486       k += step;
487     }
488     mstats[0][j].end = __rdtsc();
489     mstats[0][j].table_size = k >> step_log2;
490   }
491   t1 = __rdtsc();
492   measure();
493   hmfree(map);
494   length = end();
495   rdtsc_scale = length / (t1-t0) * 1000;
496 
497   for (j=1; j < last_step; ++j)
498     mstats[0][j].start = mstats[0][0].start;
499   for (j=first_step-1; j < last_step; ++j) {
500     printf("%12.4f,%12d,%12d,0,0,0\n", (mstats[0][j].end - mstats[0][j].start) * rdtsc_scale, mstats[0][j].table_size, mstats[0][j].table_size);
501   }
502 }
503 
504 #ifdef STBDS_FLUSH_CACHE
505 static int cache_index;
506 char dummy[8][STBDS_FLUSH_CACHE];
507 
flush_cache(void)508 int flush_cache(void)
509 {
510   memmove(dummy[cache_index],dummy[cache_index]+1, sizeof(dummy[cache_index])-1);
511   cache_index = (cache_index+1)%8;
512   return dummy[cache_index][0];
513 }
514 #else
flush_cache(void)515 int flush_cache(void) { return 0; }
516 #endif
517 
measure_average_lookup4(int step_log2)518 int measure_average_lookup4(int step_log2)
519 {
520   int total;
521   double length;
522   int i,j,k=0,q;
523   int step = 1 << step_log2;
524   unsigned __int64 t0,t1;
525   struct { int key,value; } *map=NULL;
526   double rdtsc_scale;
527   begin();
528   t0 = __rdtsc();
529 
530   for (i=0; i < 128; ++i) {
531     hmput(map,k,k+1);
532     k += step;
533   }
534   for (j=first_step; j <= last_step; ++j) {
535     total += flush_cache();
536     mstats[0][j].start = __rdtsc();
537     for (q=i=0; i < 50000; ++i) {
538        total += hmget(map, q); // hit
539        if (++q == k) q = 0;
540     }
541     mstats[0][j].end = __rdtsc();
542     mstats[0][j].table_size = k;
543     total += flush_cache();
544     mstats[1][j].start = __rdtsc();
545     for (i=0; i < 50000; ++i) {
546        total += hmget(map, i+k); // miss
547     }
548     mstats[1][j].end = __rdtsc();
549     mstats[1][j].table_size = k;
550 
551     // expand table
552     for (i=0; i < (1<<(j>>4)); ++i) {
553       hmput(map, k,k+1);
554       k += step;
555     }
556   }
557 
558   t1 = __rdtsc();
559   measure();
560   hmfree(map);
561   length = end();
562   rdtsc_scale = length / (t1-t0) * 1000;
563 
564   for (j=first_step; j <= last_step; ++j) {
565     // time,table_size,numins,numhit,nummiss,numperflush
566     printf("%12.4f,%12d,0,50000,0,0\n", (mstats[0][j].end - mstats[0][j].start) * rdtsc_scale, mstats[0][j].table_size);
567   }
568   for (j=first_step; j <= last_step; ++j) {
569     printf("%12.4f,%12d,0,0,50000,0\n", (mstats[1][j].end - mstats[1][j].start) * rdtsc_scale, mstats[1][j].table_size);
570   }
571   return total;
572 }
573 
measure_worst_lookup4_a(int step_log2)574 int measure_worst_lookup4_a(int step_log2)
575 {
576   int total;
577   double length;
578   int i,j,k=0,q,worst_q,n,z,attempts;
579   int step = 1 << step_log2;
580   unsigned __int64 t0,t1;
581   unsigned __int64 m0,m1,worst;
582   struct { int key,value; } *map=NULL;
583   double rdtsc_scale;
584   begin();
585   t0 = __rdtsc();
586 
587   memset(mstats, 0, sizeof(mstats));
588   for (j=first_step; j <= last_step; ++j)
589     mstats[0][j].end = mstats[1][j].end = ~(unsigned __int64) 0;
590 
591   for(attempts=0; attempts < 2; ++attempts) {
592     k = 0;
593     stbds_rand_seed(0); // force us to get the same table every time
594     for (i=0; i < 128; ++i) {
595       hmput(map,k,k+1);
596       k += step;
597     }
598     for (j=first_step; j <= last_step; ++j) {
599       unsigned __int64 times[32];
600 
601       // find the worst hit time
602       for (z=0; z < 2; ++z) { // try the bisectioning measurement 4 times
603         worst = 0;
604         for (n=0; n < 10; ++n) { // test 400 keys total
605           // find the worst time to hit 20 keys
606           q=0;
607           worst_q = 0;
608           total += flush_cache();
609           m0 = __rdtsc();
610           for (i=0; i < 20; ++i) {
611             total += hmget(map, q); // hit
612             if (++q == k) q = 0;
613           }
614           m1 = __rdtsc();
615           // for each n, check if this is the worst lookup we've seen
616           if (m1 - m0 > worst) {
617             worst = m1-m0;
618             worst_q = q - i;
619             if (worst_q < 0) q += k;
620           }
621         }
622         // after 400 keys, take the worst 20 keys, and try each one
623         worst = 0;
624         q = worst_q;
625         for (i=0; i < 20; ++i) {
626           total += flush_cache();
627           m0 = __rdtsc();
628           total += hmget(map, q); // hit
629           m1 = __rdtsc();
630           if (m1 - m0 > worst)
631             worst = m1-m0;
632           if (++q == k) q = 0;
633         }
634         times[z] = worst;
635       }
636       // find the worst time in the bunch
637       worst = 0;
638       for (i=0; i < z; ++i)
639         if (times[i] > worst)
640           worst = times[i];
641       // take the best of 'attempts', to discard outliers
642       if (worst < mstats[0][j].end)
643         mstats[0][j].end = worst;
644       mstats[0][j].start = 0;
645       mstats[0][j].table_size = k >> step_log2;
646 
647       // find the worst miss time
648       for (z=0; z < 8; ++z) { // try the bisectioning measurement 8 times
649         worst = 0;
650         for (n=0; n < 20; ++n) { // test 400 keys total
651           // find the worst time to hit 20 keys
652           q=k;
653           worst_q = 0;
654           total += flush_cache();
655           m0 = __rdtsc();
656           for (i=0; i < 20; ++i) {
657             total += hmget(map, q); // hit
658           }
659           m1 = __rdtsc();
660           // for each n, check if this is the worst lookup we've seen
661           if (m1 - m0 > worst) {
662             worst = m1-m0;
663             worst_q = q - i;
664           }
665         }
666         // after 400 keys, take the worst 20 keys, and try each one
667         worst = 0;
668         q = worst_q;
669         for (i=0; i < 20; ++i) {
670           total += flush_cache();
671           m0 = __rdtsc();
672           total += hmget(map, q); // hit
673           m1 = __rdtsc();
674           if (m1 - m0 > worst)
675             worst = m1-m0;
676         }
677         times[z] = worst;
678       }
679       // find the worst time in the bunch
680       worst = 0;
681       for (i=0; i < z; ++i)
682         if (times[i] > worst)
683           worst = times[i];
684       if (worst < mstats[1][j].end)
685         mstats[1][j].end = worst;
686       mstats[1][j].start = 0;
687       mstats[1][j].table_size = k >> step_log2;
688 
689       // expand table
690       for (i=0; i < (1<<(j>>4)); ++i) {
691         hmput(map, k,k+1);
692         k += step;
693       }
694     }
695     hmfree(map);
696   }
697 
698   t1 = __rdtsc();
699   measure();
700   length = end();
701   rdtsc_scale = length / (t1-t0) * 1000;
702 
703   for (j=first_step; j <= last_step; ++j) {
704     printf("%12.4f,%12d,0,1,0,1\n", (mstats[0][j].end - mstats[0][j].start) * rdtsc_scale, mstats[0][j].table_size);
705   }
706   for (j=first_step; j <= last_step; ++j) {
707     printf("%12.4f,%12d,0,0,1,1\n", (mstats[1][j].end - mstats[1][j].start) * rdtsc_scale, mstats[1][j].table_size);
708   }
709   return total;
710 }
711 
measure_worst_lookup4_b(int step_log2)712 int measure_worst_lookup4_b(int step_log2)
713 {
714   int total;
715   double length;
716   int i,j,k=0,q,worst_q,n,z,attempts;
717   int step = 1 << step_log2;
718   unsigned __int64 t0,t1;
719   unsigned __int64 m0,m1,worst;
720   struct { int key,value; } *map=NULL;
721   double rdtsc_scale;
722   begin();
723   t0 = __rdtsc();
724 
725   memset(mstats, 0, sizeof(mstats));
726   for (j=first_step; j <= last_step; ++j)
727     mstats[0][j].end = mstats[1][j].end = ~(unsigned __int64) 0;
728 
729   k = 0;
730   stbds_rand_seed(0); // force us to get the same table every time
731   for (i=0; i < 128; ++i) {
732     hmput(map,k,k+1);
733     k += step;
734   }
735   for (j=first_step; j <= last_step; ++j) {
736     unsigned __int64 times[32];
737 
738     // find the worst hit time
739     for (z=0; z < 8; ++z) { // try this 8 times
740       worst = 0;
741       q=0;
742       for (i=0; i < 5000; ++i) {
743         total += hmget(map, q);
744         m0 = __rdtsc();
745         total += hmget(map, q);
746         m1 = __rdtsc();
747         if (m1 - m0 > worst) {
748           worst = m1-m0;
749           worst_q = q - i;
750         }
751         if (++q == k) q = 0;
752       }
753       // now retry with the worst one, but find the shortest time for it
754       worst = ~(unsigned __int64) 0;
755       for (i=0; i < 4; ++i) {
756         total += flush_cache();
757         m0 = __rdtsc();
758         total += hmget(map,worst_q);
759         m1 = __rdtsc();
760         if (m1-m0 < worst)
761           worst = m1-m0;
762       }
763       times[z] = worst;
764     }
765 
766     // find the worst of those
767     worst = 0;
768     for (i=0; i < z; ++i)
769       if (times[i] > worst)
770         worst = times[i];
771     mstats[0][j].start = 0;
772     mstats[0][j].end = worst;
773     mstats[0][j].table_size = k;
774 
775     // find the worst miss time
776     for (z=0; z < 8; ++z) { // try this 8 times
777       worst = 0;
778       q=k;
779       for (i=0; i < 5000; ++i) {
780         total += hmget(map, q);
781         m0 = __rdtsc();
782         total += hmget(map, q);
783         m1 = __rdtsc();
784         if (m1 - m0 > worst) {
785           worst = m1-m0;
786           worst_q = q - i;
787         }
788         //printf("%6llu ", m1-m0);
789       }
790       // now retry with the worst one, but find the shortest time for it
791       worst = ~(unsigned __int64) 0;
792       for (i=0; i < 4; ++i) {
793         total += flush_cache();
794         m0 = __rdtsc();
795         total += hmget(map,worst_q);
796         m1 = __rdtsc();
797         if (m1-m0 < worst)
798           worst = m1-m0;
799       }
800       times[z] = worst;
801     }
802 
803     // find the worst of those
804     worst = 0;
805     for (i=0; i < z; ++i)
806       if (times[i] > worst)
807         worst = times[i];
808     mstats[1][j].start = 0;
809     mstats[1][j].end = worst;
810     mstats[1][j].table_size = k;
811 
812     // expand table
813     for (i=0; i < (1<<(j>>4)); ++i) {
814       hmput(map, k,k+1);
815       k += step;
816     }
817   }
818   hmfree(map);
819 
820   t1 = __rdtsc();
821   measure();
822   length = end();
823   rdtsc_scale = length / (t1-t0) * 1000;
824 
825   for (j=first_step+1; j <= last_step; ++j) {
826     printf("%12.4f,%12d,0,1,0,1\n", (mstats[0][j].end - mstats[0][j].start) * rdtsc_scale, mstats[0][j].table_size);
827   }
828   for (j=first_step+1; j <= last_step; ++j) {
829     printf("%12.4f,%12d,0,0,1,1\n", (mstats[1][j].end - mstats[1][j].start) * rdtsc_scale, mstats[1][j].table_size);
830   }
831   return total;
832 }
833 
measure_uncached_lookup4(int step_log2)834 int measure_uncached_lookup4(int step_log2)
835 {
836   int total;
837   double length;
838   int i,j,k=0,q;
839   int step = 1 << step_log2;
840   unsigned __int64 t0,t1;
841   struct { int key,value; } *map=NULL;
842   double rdtsc_scale;
843   begin();
844   t0 = __rdtsc();
845 
846   map = NULL;
847   for (i=0; i < 128; ++i) {
848     hmput(map,k,k+1);
849     k += step;
850   }
851   for (j=first_step; j <= last_step; ++j) {
852     mstats[0][j].start = __rdtsc();
853     mstats[0][j].end = 0;
854     for (q=i=0; i < 512; ++i) {
855       if ((i & 3) == 0) {
856         mstats[0][j].end += __rdtsc();
857         total += flush_cache();
858         mstats[0][j].start += __rdtsc();
859       }
860       total += hmget(map, q); // hit
861       if (++q == k) q = 0;
862     }
863     mstats[0][j].end += __rdtsc();
864     mstats[0][j].table_size = k;
865     total += flush_cache();
866     mstats[1][j].end = 0;
867     mstats[1][j].start = __rdtsc();
868     for (i=0; i < 512; ++i) {
869       if ((i & 3) == 0) {
870         mstats[1][j].end += __rdtsc();
871         total += flush_cache();
872         mstats[1][j].start += __rdtsc();
873       }
874       total += hmget(map, i+k); // miss
875     }
876     mstats[1][j].end += __rdtsc();
877     mstats[1][j].table_size = k;
878 
879     // expand table
880     for (i=0; i < (1<<(j>>4)); ++i) {
881       hmput(map, k,k+1);
882       k += step;
883     }
884   }
885   hmfree(map);
886 
887   t1 = __rdtsc();
888   measure();
889   length = end();
890   rdtsc_scale = length / (t1-t0) * 1000;
891 
892   for (j=first_step; j <= last_step; ++j) {
893     printf("%12.4f,%12d,0,512,0,4\n", (mstats[0][j].end - mstats[0][j].start) * rdtsc_scale, mstats[0][j].table_size);
894   }
895   for (j=first_step; j <= last_step; ++j) {
896     printf("%12.4f,%12d,0,0,512,4\n", (mstats[1][j].end - mstats[1][j].start) * rdtsc_scale, mstats[1][j].table_size);
897   }
898   return total;
899 }
900 
901 
902 
903 
main(int arg,char ** argv)904 int main(int arg, char **argv)
905 {
906   int n,s,w;
907   double worst = 0;
908 
909   printf("# size_t=%d,", (int) sizeof(size_t));
910 
911 // number of cache-lines
912 #ifdef STBDS_SMALL_BUCKET
913   printf("cacheline=%d,", 1);
914 #else
915   printf("cacheline=%d,", sizeof(size_t)==8 ? 2 : 1);
916 #endif
917 #ifdef STBDS_FLUSH_CACHE
918   printf("%d,", (int) stbds_log2(STBDS_FLUSH_CACHE));
919 #else
920   printf("0,");
921 #endif
922 #ifdef STBDS_BUCKET_START    // don't bother offseting differently within bucket for different hash values
923   printf("STBDS_BUCKET_START,");
924 #else
925   printf(",");
926 #endif
927 #ifdef STBDS_SIPHASH_2_4
928   printf("STBDS_SIPHASH_2_4,");
929 #else
930   printf(",");
931 #endif
932   printf("\n");
933 
934   measure_worst_lookup4_b(0);
935   //measure_worst_lookup4_a(0);
936   measure_average_lookup4(0);
937   measure_uncached_lookup4(0);
938   measure_build4(0);
939   return 0;
940 
941 #if 0
942   begin(); for (n=0; n < 2000; ++n) { build_stb(2000,0,0,1);          } end(); printf("  // %7.2fms :      2,000 inserts creating 2K table\n", timer);
943   begin(); for (n=0; n <  500; ++n) { build_stb(20000,0,0,1);         } end(); printf("  // %7.2fms :     20,000 inserts creating 20K table\n", timer);
944   begin(); for (n=0; n <  100; ++n) { build_stb(200000,0,0,1);        } end(); printf("  // %7.2fms :    200,000 inserts creating 200K table\n", timer);
945   begin(); for (n=0; n <   10; ++n) { build_stb(2000000,0,0,1);       } end(); printf("  // %7.2fms :  2,000,000 inserts creating 2M table\n", timer);
946   begin(); for (n=0; n <    5; ++n) { build_stb(20000000,0,0,1);      } end(); printf("  // %7.2fms : 20,000,000 inserts creating 20M table\n", timer);
947 #endif
948 
949 #if 0
950   begin(); for (n=0; n < 2000; ++n) { churn32(2000,0,0,1);          } end(); printf("  // %7.2fms :      2,000 inserts creating 2K table w/ 32-byte key\n", timer);
951   begin(); for (n=0; n <  500; ++n) { churn32(20000,0,0,1);         } end(); printf("  // %7.2fms :     20,000 inserts creating 20K table w/ 32-byte key\n", timer);
952   begin(); for (n=0; n <  100; ++n) { churn32(200000,0,0,1);        } end(); printf("  // %7.2fms :    200,000 inserts creating 200K table w/ 32-byte key\n", timer);
953   begin(); for (n=0; n <   10; ++n) { churn32(2000000,0,0,1);       } end(); printf("  // %7.2fms :  2,000,000 inserts creating 2M table w/ 32-byte key\n", timer);
954   begin(); for (n=0; n <    5; ++n) { churn32(20000000,0,0,1);      } end(); printf("  // %7.2fms : 20,000,000 inserts creating 20M table w/ 32-byte key\n", timer);
955 
956   begin(); for (n=0; n < 2000; ++n) { churn256(2000,0,0,1);          } end(); printf("  // %7.2fms :      2,000 inserts creating 2K table w/ 256-byte key\n", timer);
957   begin(); for (n=0; n <  500; ++n) { churn256(20000,0,0,1);         } end(); printf("  // %7.2fms :     20,000 inserts creating 20K table w/ 256-byte key\n", timer);
958   begin(); for (n=0; n <  100; ++n) { churn256(200000,0,0,1);        } end(); printf("  // %7.2fms :    200,000 inserts creating 200K table w/ 256-byte key\n", timer);
959   begin(); for (n=0; n <   10; ++n) { churn256(2000000,0,0,1);       } end(); printf("  // %7.2fms :  2,000,000 inserts creating 2M table w/ 256-byte key\n", timer);
960   begin(); for (n=0; n <    5; ++n) { churn256(20000000,0,0,1);      } end(); printf("  // %7.2fms : 20,000,000 inserts creating 20M table w/ 256-byte key\n", timer);
961 #endif
962 
963   begin(); for (n=0; n <   20; ++n) { multisearch_stb(2000,0,2000,1,1000);    } end(); printf("  // %7.2fms : 2,000,000 hits on 1,000   2K table w/ 4-byte key\n", timer);
964   begin(); for (n=0; n <   10; ++n) { multisearch_stb(20000,0,2000,1,1000);   } end(); printf("  // %7.2fms : 2,000,000 hits on 1,000  20K table w/ 4-byte key\n", timer);
965   begin(); for (n=0; n <    6; ++n) { multisearch_stb(200000,0,2000,1,1000);  } end(); printf("  // %7.2fms : 2,000,000 hits on 1,000 200K table w/ 4-byte key\n", timer);
966   begin(); for (n=0; n <    2; ++n) { multisearch_stb(2000000,0,20000,1,100); } end(); printf("  // %7.2fms : 2,000,000 hits on   100   2M table w/ 4-byte key\n", timer);
967 
968   begin(); for (n=0; n <   20; ++n) { multisearch    (2000,0,2000,1,1000);    } end(); printf("  // %7.2fms : 2,000,000 hits on 1,000   2K table w/ 4-byte key\n", timer);
969   begin(); for (n=0; n <   10; ++n) { multisearch    (20000,0,2000,1,1000);   } end(); printf("  // %7.2fms : 2,000,000 hits on 1,000  20K table w/ 4-byte key\n", timer);
970   begin(); for (n=0; n <    6; ++n) { multisearch    (200000,0,2000,1,1000);  } end(); printf("  // %7.2fms : 2,000,000 hits on 1,000 200K table w/ 4-byte key\n", timer);
971   begin(); for (n=0; n <    2; ++n) { multisearch    (2000000,0,20000,1,100); } end(); printf("  // %7.2fms : 2,000,000 hits on   100   2M table w/ 4-byte key\n", timer);
972 
973 
974 #if 1
975   begin(); for (n=0; n <    2; ++n) { multibuild_stb(2000,0,0,1,10000); } end(); printf("  // %7.2fms : 20,000,000 inserts creating 10,000   2K table w/ 4-byte key\n", timer);
976   begin(); for (n=0; n <    2; ++n) { multibuild_stb(20000,0,0,1,1000); } end(); printf("  // %7.2fms : 20,000,000 inserts creating  1,000  20K table w/ 4-byte key\n", timer);
977   begin(); for (n=0; n <    2; ++n) { multibuild_stb(200000,0,0,1,100); } end(); printf("  // %7.2fms : 20,000,000 inserts creating    100 200K table w/ 4-byte key\n", timer);
978   begin(); for (n=0; n <    2; ++n) { multibuild_stb(2000000,0,0,1,10); } end(); printf("  // %7.2fms : 20,000,000 inserts creating     10   2M table w/ 4-byte key\n", timer);
979 
980   begin(); for (n=0; n <    2; ++n) { multichurn4(2000,0,0,1,10000); } end(); printf("  // %7.2fms : 20,000,000 inserts creating 10,000   2K table w/ 4-byte key\n", timer);
981   begin(); for (n=0; n <    2; ++n) { multichurn4(20000,0,0,1,1000); } end(); printf("  // %7.2fms : 20,000,000 inserts creating  1,000  20K table w/ 4-byte key\n", timer);
982   begin(); for (n=0; n <    2; ++n) { multichurn4(200000,0,0,1,100); } end(); printf("  // %7.2fms : 20,000,000 inserts creating    100 200K table w/ 4-byte key\n", timer);
983   begin(); for (n=0; n <    2; ++n) { multichurn4(2000000,0,0,1,10); } end(); printf("  // %7.2fms : 20,000,000 inserts creating     10   2M table w/ 4-byte key\n", timer);
984 #endif
985 
986   begin(); for (n=0; n < 2000; ++n) { build(2000,0,0,1);          } end(); printf("  // %7.2fms :      2,000 inserts creating 2K table w/ 4-byte key\n", timer);
987   begin(); for (n=0; n <  500; ++n) { build(20000,0,0,1);         } end(); printf("  // %7.2fms :     20,000 inserts creating 20K table w/ 4-byte key\n", timer);
988   begin(); for (n=0; n <  100; ++n) { build(200000,0,0,1);        } end(); printf("  // %7.2fms :    200,000 inserts creating 200K table w/ 4-byte key\n", timer);
989   begin(); for (n=0; n <   10; ++n) { build(2000000,0,0,1);       } end(); printf("  // %7.2fms :  2,000,000 inserts creating 2M table w/ 4-byte key\n", timer);
990   begin(); for (n=0; n <    5; ++n) { build(20000000,0,0,1);      } end(); printf("  // %7.2fms : 20,000,000 inserts creating 20M table w/ 4-byte key\n", timer);
991 
992   begin(); for (n=0; n < 2000; ++n) { churn8(2000,0,0,1);          } end(); printf("  // %7.2fms :      2,000 inserts creating 2K table w/ 8-byte key\n", timer);
993   begin(); for (n=0; n <  500; ++n) { churn8(20000,0,0,1);         } end(); printf("  // %7.2fms :     20,000 inserts creating 20K table w/ 8-byte key\n", timer);
994   begin(); for (n=0; n <  100; ++n) { churn8(200000,0,0,1);        } end(); printf("  // %7.2fms :    200,000 inserts creating 200K table w/ 8-byte key\n", timer);
995   begin(); for (n=0; n <   10; ++n) { churn8(2000000,0,0,1);       } end(); printf("  // %7.2fms :  2,000,000 inserts creating 2M table w/ 8-byte key\n", timer);
996   begin(); for (n=0; n <    5; ++n) { churn8(20000000,0,0,1);      } end(); printf("  // %7.2fms : 20,000,000 inserts creating 20M table w/ 8-byte key\n", timer);
997 
998   begin(); for (n=0; n <   60; ++n) { churn_skip(2000,2100,5000);            } end(); printf("  // %7.2fms : 500,000 inserts & deletes in 2K table\n", timer);
999   begin(); for (n=0; n <   30; ++n) { churn_skip(20000,21000,500);           } end(); printf("  // %7.2fms : 500,000 inserts & deletes in 20K table\n", timer);
1000   begin(); for (n=0; n <   15; ++n) { churn_skip(200000,201000,500);         } end(); printf("  // %7.2fms : 500,000 inserts & deletes in 200K table\n", timer);
1001   begin(); for (n=0; n <    8; ++n) { churn_skip(2000000,2001000,500);       } end(); printf("  // %7.2fms : 500,000 inserts & deletes in 2M table\n", timer);
1002   begin(); for (n=0; n <    5; ++n) { churn_skip(20000000,20001000,500);     } end(); printf("  // %7.2fms : 500,000 inserts & deletes in 20M table\n", timer);
1003   begin(); for (n=0; n <    1; ++n) { churn_skip(200000000u,200001000u,500); } end(); printf("  // %7.2fms : 500,000 inserts & deletes in 200M table\n", timer);
1004   // even though the above measures a roughly fixed amount of work, we still have to build the table n times, hence the fewer measurements each time
1005 
1006   begin(); for (n=0; n <   60; ++n) { churn_skip(1000,3000,250);             } end(); printf("  // %7.2fms :    500,000 inserts & deletes in 2K table\n", timer);
1007   begin(); for (n=0; n <   15; ++n) { churn_skip(10000,30000,25);            } end(); printf("  // %7.2fms :    500,000 inserts & deletes in 20K table\n", timer);
1008   begin(); for (n=0; n <    7; ++n) { churn_skip(100000,300000,10);          } end(); printf("  // %7.2fms :  2,000,000 inserts & deletes in 200K table\n", timer);
1009   begin(); for (n=0; n <    2; ++n) { churn_skip(1000000,3000000,10);        } end(); printf("  // %7.2fms : 20,000,000 inserts & deletes in 2M table\n", timer);
1010 
1011   // search for bad intervals.. in practice this just seems to measure execution variance
1012   for (s = 2; s < 64; ++s) {
1013     begin(); for (n=0; n < 50; ++n) { build(200000,0,0,s); } end();
1014     if (timer > worst) {
1015       worst = timer;
1016       w = s;
1017     }
1018   }
1019   for (; s <= 1024; s *= 2) {
1020     begin(); for (n=0; n < 50; ++n) { build(200000,0,0,s); } end();
1021     if (timer > worst) {
1022       worst = timer;
1023       w = s;
1024     }
1025   }
1026   printf("  // %7.2fms(%d)   : Worst time from inserting 200,000 items with spacing %d.\n", worst, w, w);
1027 
1028   return 0;
1029 }
1030 #endif
1031