1 #ifdef DS_PERF
2 #define _CRT_SECURE_NO_WARNINGS
3 #define _CRT_NONSTDC_NO_DEPRECATE
4 #define _CRT_NON_CONFORMING_SWPRINTFS
5
6
7 //#define STBDS_INTERNAL_SMALL_BUCKET // make 64-bit bucket fit both keys and hash bits
8 //#define STBDS_SIPHASH_2_4 // performance test 1_3 against 2_4
9 //#define STBDS_INTERNAL_BUCKET_START // don't bother offseting differently within bucket for different hash values
10 //#define STBDS_FLUSH_CACHE (1u<<20) // do this much memory traffic to flush the cache between some benchmarking measurements
11
12 #define WIN32_LEAN_AND_MEAN
13 #include <windows.h>
14 #define STB_DEFINE
15 #define STB_NO_REGISTRY
16 #include "../stb.h"
17 #include <stdio.h>
18 #endif
19
20 #ifdef DS_TEST
21 #define STBDS_UNIT_TESTS
22 #define STBDS_SMALL_BUCKET
23 #endif
24
25 #ifdef DS_STATS
26 #define STBDS_STATISTICS
27 #endif
28
29 #ifndef DS_PERF
30 #define STBDS_ASSERT assert
31 #include <assert.h>
32 #endif
33
34 #define STB_DS_IMPLEMENTATION
35 #include "../stb_ds.h"
36
37 size_t churn_inserts, churn_deletes;
38
churn(int a,int b,int count)39 void churn(int a, int b, int count)
40 {
41 struct { int key,value; } *map=NULL;
42 int i,j,n,k;
43 for (i=0; i < a; ++i)
44 hmput(map,i,i+1);
45 for (n=0; n < count; ++n) {
46 for (j=a; j < b; ++j,++i) {
47 hmput(map,i,i+1);
48 }
49 assert(hmlen(map) == b);
50 for (j=a; j < b; ++j) {
51 k=i-j-1;
52 k = hmdel(map,k);
53 assert(k != 0);
54 }
55 assert(hmlen(map) == a);
56 }
57 hmfree(map);
58 churn_inserts = i;
59 churn_deletes = (b-a) * n;
60 }
61
62 #ifdef DS_TEST
63 #include <stdio.h>
main(int argc,char ** argv)64 int main(int argc, char **argv)
65 {
66 stbds_unit_tests();
67 churn(0,100,1);
68 churn(3,7,50000);
69 churn(3,15,50000);
70 churn(16, 48, 25000);
71 churn(10, 15, 25000);
72 churn(200,500, 5000);
73 churn(2000,5000, 500);
74 churn(20000,50000, 50);
75 printf("Ok!");
76 return 0;
77 }
78 #endif
79
80 #ifdef DS_STATS
81 #define MAX(a,b) ((a) > (b) ? (a) : (b))
82 size_t max_hit_probes, max_miss_probes, total_put_probes, total_miss_probes, churn_misses;
churn_stats(int a,int b,int count)83 void churn_stats(int a, int b, int count)
84 {
85 struct { int key,value; } *map=NULL;
86 int i,j,n,k;
87 churn_misses = 0;
88 for (i=0; i < a; ++i) {
89 hmput(map,i,i+1);
90 max_hit_probes = MAX(max_hit_probes, stbds_hash_probes);
91 total_put_probes += stbds_hash_probes;
92 stbds_hash_probes = 0;
93 }
94
95 for (n=0; n < count; ++n) {
96 for (j=a; j < b; ++j,++i) {
97 hmput(map,i,i+1);
98 max_hit_probes = MAX(max_hit_probes, stbds_hash_probes);
99 total_put_probes += stbds_hash_probes;
100 stbds_hash_probes = 0;
101 }
102 for (j=0; j < (b-a)*10; ++j) {
103 k=i+j;
104 (void) hmgeti(map,k); // miss
105 max_miss_probes = MAX(max_miss_probes, stbds_hash_probes);
106 total_miss_probes += stbds_hash_probes;
107 stbds_hash_probes = 0;
108 ++churn_misses;
109 }
110 assert(hmlen(map) == b);
111 for (j=a; j < b; ++j) {
112 k=i-j-1;
113 k = hmdel(map,k);
114 stbds_hash_probes = 0;
115 assert(k);
116 }
117 assert(hmlen(map) == a);
118 }
119 hmfree(map);
120 churn_inserts = i;
121 churn_deletes = (b-a) * n;
122 }
123
reset_stats(void)124 void reset_stats(void)
125 {
126 stbds_array_grow=0,
127 stbds_hash_grow=0;
128 stbds_hash_shrink=0;
129 stbds_hash_rebuild=0;
130 stbds_hash_probes=0;
131 stbds_hash_alloc=0;
132 stbds_rehash_probes=0;
133 stbds_rehash_items=0;
134 max_hit_probes = 0;
135 max_miss_probes = 0;
136 total_put_probes = 0;
137 total_miss_probes = 0;
138 }
139
print_churn_probe_stats(char * str)140 void print_churn_probe_stats(char *str)
141 {
142 printf("Probes: %3d max hit, %3d max miss, %4.2f avg hit, %4.2f avg miss: %s\n",
143 (int) max_hit_probes, (int) max_miss_probes, (float) total_put_probes / churn_inserts, (float) total_miss_probes / churn_misses, str);
144 reset_stats();
145 }
146
main(int arg,char ** argv)147 int main(int arg, char **argv)
148 {
149 churn_stats(0,500000,1); print_churn_probe_stats("Inserting 500000 items");
150 churn_stats(0,500000,1); print_churn_probe_stats("Inserting 500000 items");
151 churn_stats(0,500000,1); print_churn_probe_stats("Inserting 500000 items");
152 churn_stats(0,500000,1); print_churn_probe_stats("Inserting 500000 items");
153 churn_stats(49000,50000,500); print_churn_probe_stats("Deleting/Inserting 500000 items");
154 churn_stats(49000,50000,500); print_churn_probe_stats("Deleting/Inserting 500000 items");
155 churn_stats(49000,50000,500); print_churn_probe_stats("Deleting/Inserting 500000 items");
156 churn_stats(49000,50000,500); print_churn_probe_stats("Deleting/Inserting 500000 items");
157 return 0;
158 }
159 #endif
160
161
162 #ifdef DS_PERF
163 //char *strdup(const char *foo) { return 0; }
164 //int stricmp(const char *a, const char *b) { return 0; }
165 //int strnicmp(const char *a, const char *b, size_t n) { return 0; }
166
167 unsigned __int64 t0, xsum, mn,mx,count;
begin(void)168 void begin(void)
169 {
170 LARGE_INTEGER m;
171 QueryPerformanceCounter(&m);
172 t0 = m.QuadPart;
173 xsum = 0;
174 count = 0;
175 mx = 0;
176 mn = ~(unsigned __int64) 0;
177 }
178
measure(void)179 void measure(void)
180 {
181 unsigned __int64 t1, t;
182 LARGE_INTEGER m;
183 QueryPerformanceCounter(&m);
184 t1 = m.QuadPart;
185 t = t1-t0;
186 if (t1 < t0)
187 printf("ALERT: QueryPerformanceCounter was unordered!\n");
188 if (t < mn) mn = t;
189 if (t > mx) mx = t;
190 xsum += t;
191 ++count;
192 t0 = t1;
193 }
194
dont_measure(void)195 void dont_measure(void)
196 {
197 LARGE_INTEGER m;
198 QueryPerformanceCounter(&m);
199 t0 = m.QuadPart;
200 }
201
202 double timer;
end(void)203 double end(void)
204 {
205 LARGE_INTEGER m;
206 QueryPerformanceFrequency(&m);
207
208 if (count > 3) {
209 // discard the highest and lowest
210 xsum -= mn;
211 xsum -= mx;
212 count -= 2;
213 }
214 timer = (double) (xsum) / count / m.QuadPart * 1000;
215 return timer;
216 }
217
build(int a,int b,int count,int step)218 void build(int a, int b, int count, int step)
219 {
220 struct { int key,value; } *map=NULL;
221 int i,n;
222 for (i=0; i < a; ++i) {
223 n = i*step;
224 hmput(map,n,i+1);
225 }
226 measure();
227 churn_inserts = i;
228 hmfree(map);
229 dont_measure();
230 }
231
232 #ifdef STB__INCLUDE_STB_H
build_stb(int a,int b,int count,int step)233 void build_stb(int a, int b, int count, int step)
234 {
235 stb_idict *d = stb_idict_new_size(8);
236 int i;
237 for (i=0; i < a; ++i)
238 stb_idict_add(d, i*step, i+1);
239 measure();
240 churn_inserts = i;
241 stb_idict_destroy(d);
242 dont_measure();
243 }
244
multibuild_stb(int a,int b,int count,int step,int tables)245 void multibuild_stb(int a, int b, int count, int step, int tables)
246 {
247 stb_idict *d[50000];
248 int i,q;
249 for (q=0; q < tables; ++q)
250 d[q] = stb_idict_new_size(8);
251 dont_measure();
252 for (i=0; i < a; ++i)
253 for (q=0; q < tables; ++q)
254 stb_idict_add(d[q], i*step+q*771, i+1);
255 measure();
256 churn_inserts = i;
257 for (q=0; q < tables; ++q)
258 stb_idict_destroy(d[q]);
259 dont_measure();
260 }
261
multisearch_stb(int a,int start,int end,int step,int tables)262 int multisearch_stb(int a, int start, int end, int step, int tables)
263 {
264 stb_idict *d[50000];
265 int i,q,total=0,v;
266 for (q=0; q < tables; ++q)
267 d[q] = stb_idict_new_size(8);
268 for (q=0; q < tables; ++q)
269 for (i=0; i < a; ++i)
270 stb_idict_add(d[q], i*step+q*771, i+1);
271 dont_measure();
272 for (i=start; i < end; ++i)
273 for (q=0; q < tables; ++q)
274 if (stb_idict_get_flag(d[q], i*step+q*771, &v))
275 total += v;
276 measure();
277 churn_inserts = i;
278 for (q=0; q < tables; ++q)
279 stb_idict_destroy(d[q]);
280 dont_measure();
281 return total;
282 }
283 #endif
284
multisearch(int a,int start,int end,int step,int tables)285 int multisearch(int a, int start, int end, int step, int tables)
286 {
287 struct { int key,value; } *hash[50000];
288 int i,q,total=0;
289 for (q=0; q < tables; ++q)
290 hash[q] = NULL;
291 for (q=0; q < tables; ++q)
292 for (i=0; i < a; ++i)
293 hmput(hash[q], i*step+q*771, i+1);
294 dont_measure();
295 for (i=start; i < end; ++i)
296 for (q=0; q < tables; ++q)
297 total += hmget(hash[q], i*step+q*771);
298 measure();
299 churn_inserts = i;
300 for (q=0; q < tables; ++q)
301 hmfree(hash[q]);
302 dont_measure();
303 return total;
304 }
305
churn_skip(unsigned int a,unsigned int b,int count)306 void churn_skip(unsigned int a, unsigned int b, int count)
307 {
308 struct { unsigned int key,value; } *map=NULL;
309 unsigned int i,j,n,k;
310 for (i=0; i < a; ++i)
311 hmput(map,i,i+1);
312 dont_measure();
313 for (n=0; n < count; ++n) {
314 for (j=a; j < b; ++j,++i) {
315 hmput(map,i,i+1);
316 }
317 assert(hmlen(map) == b);
318 for (j=a; j < b; ++j) {
319 k=i-j-1;
320 k = hmdel(map,k);
321 assert(k != 0);
322 }
323 assert(hmlen(map) == a);
324 }
325 measure();
326 churn_inserts = i;
327 churn_deletes = (b-a) * n;
328 hmfree(map);
329 dont_measure();
330 }
331
332 typedef struct { int n[8]; } str32;
churn32(int a,int b,int count,int include_startup)333 void churn32(int a, int b, int count, int include_startup)
334 {
335 struct { str32 key; int value; } *map=NULL;
336 int i,j,n;
337 str32 key = { 0 };
338 for (i=0; i < a; ++i) {
339 key.n[0] = i;
340 hmput(map,key,i+1);
341 }
342 if (!include_startup)
343 dont_measure();
344 for (n=0; n < count; ++n) {
345 for (j=a; j < b; ++j,++i) {
346 key.n[0] = i;
347 hmput(map,key,i+1);
348 }
349 assert(hmlen(map) == b);
350 for (j=a; j < b; ++j) {
351 key.n[0] = i-j-1;
352 hmdel(map,key);
353 }
354 assert(hmlen(map) == a);
355 }
356 measure();
357 hmfree(map);
358 churn_inserts = i;
359 churn_deletes = (b-a) * n;
360 dont_measure();
361 }
362
363 typedef struct { int n[32]; } str256;
churn256(int a,int b,int count,int include_startup)364 void churn256(int a, int b, int count, int include_startup)
365 {
366 struct { str256 key; int value; } *map=NULL;
367 int i,j,n;
368 str256 key = { 0 };
369 for (i=0; i < a; ++i) {
370 key.n[0] = i;
371 hmput(map,key,i+1);
372 }
373 if (!include_startup)
374 dont_measure();
375 for (n=0; n < count; ++n) {
376 for (j=a; j < b; ++j,++i) {
377 key.n[0] = i;
378 hmput(map,key,i+1);
379 }
380 assert(hmlen(map) == b);
381 for (j=a; j < b; ++j) {
382 key.n[0] = i-j-1;
383 hmdel(map,key);
384 }
385 assert(hmlen(map) == a);
386 }
387 measure();
388 hmfree(map);
389 churn_inserts = i;
390 churn_deletes = (b-a) * n;
391 dont_measure();
392 }
393
churn8(int a,int b,int count,int include_startup)394 void churn8(int a, int b, int count, int include_startup)
395 {
396 struct { size_t key,value; } *map=NULL;
397 int i,j,n,k;
398 for (i=0; i < a; ++i)
399 hmput(map,i,i+1);
400 if (!include_startup)
401 dont_measure();
402 for (n=0; n < count; ++n) {
403 for (j=a; j < b; ++j,++i) {
404 hmput(map,i,i+1);
405 }
406 assert(hmlen(map) == b);
407 for (j=a; j < b; ++j) {
408 k=i-j-1;
409 k = hmdel(map,k);
410 assert(k != 0);
411 }
412 assert(hmlen(map) == a);
413 }
414 measure();
415 hmfree(map);
416 churn_inserts = i;
417 churn_deletes = (b-a) * n;
418 dont_measure();
419 }
420
multichurn4(int a,int b,int count,int include_startup,int tables)421 void multichurn4(int a, int b, int count, int include_startup, int tables)
422 {
423 struct { int key,value; } *map[50000];
424 int i,j,n,k,q;
425 for (q=0; q < tables; ++q)
426 map[q] = NULL;
427 dont_measure();
428
429 for (i=0; i < a; ++i)
430 for (q=0; q < tables; ++q)
431 hmput(map[q],i,i+1);
432 if (!include_startup)
433 dont_measure();
434 for (n=0; n < count; ++n) {
435 for (j=a; j < b; ++j,++i) {
436 for (q=0; q < tables; ++q)
437 hmput(map[q],i,i+1);
438 }
439 assert(hmlen(map[0]) == b);
440 for (j=a; j < b; ++j) {
441 k=i-j-1;
442 for (q=0; q < tables; ++q)
443 k = hmdel(map[q],k);
444 assert(k != 0);
445 }
446 assert(hmlen(map[0]) == a);
447 }
448 measure();
449 for (q=0; q < tables; ++q)
450 hmfree(map[q]);
451 churn_inserts = i * tables;
452 churn_deletes = (b-a) * n * tables;
453 dont_measure();
454 }
455
456 struct {
457 unsigned __int64 start;
458 unsigned __int64 end;
459 int table_size;
460 } mstats[32][4000];
461
462 const int first_step = 64;
463 const int last_step = 384-48; // 32M
464
measure_build4(int step_log2)465 void measure_build4(int step_log2)
466 {
467 double length;
468 int i,j,k=0;
469 int step = 1 << step_log2;
470 unsigned __int64 t0,t1;
471 struct { int key,value; } *map=NULL;
472 double rdtsc_scale;
473 begin();
474 t0 = __rdtsc();
475
476 mstats[0][0].start = __rdtsc();
477 for (i=0; i < 256; ++i) {
478 hmput(map,k,k+1);
479 k += step;
480 }
481 mstats[0][first_step-1].end = __rdtsc();
482 mstats[0][first_step-1].table_size = k >> step_log2;
483 for (j=first_step; j < last_step; ++j) {
484 for (i=0; i < (1<<(j>>4)); ++i) {
485 hmput(map, k,k+1);
486 k += step;
487 }
488 mstats[0][j].end = __rdtsc();
489 mstats[0][j].table_size = k >> step_log2;
490 }
491 t1 = __rdtsc();
492 measure();
493 hmfree(map);
494 length = end();
495 rdtsc_scale = length / (t1-t0) * 1000;
496
497 for (j=1; j < last_step; ++j)
498 mstats[0][j].start = mstats[0][0].start;
499 for (j=first_step-1; j < last_step; ++j) {
500 printf("%12.4f,%12d,%12d,0,0,0\n", (mstats[0][j].end - mstats[0][j].start) * rdtsc_scale, mstats[0][j].table_size, mstats[0][j].table_size);
501 }
502 }
503
504 #ifdef STBDS_FLUSH_CACHE
505 static int cache_index;
506 char dummy[8][STBDS_FLUSH_CACHE];
507
flush_cache(void)508 int flush_cache(void)
509 {
510 memmove(dummy[cache_index],dummy[cache_index]+1, sizeof(dummy[cache_index])-1);
511 cache_index = (cache_index+1)%8;
512 return dummy[cache_index][0];
513 }
514 #else
flush_cache(void)515 int flush_cache(void) { return 0; }
516 #endif
517
measure_average_lookup4(int step_log2)518 int measure_average_lookup4(int step_log2)
519 {
520 int total;
521 double length;
522 int i,j,k=0,q;
523 int step = 1 << step_log2;
524 unsigned __int64 t0,t1;
525 struct { int key,value; } *map=NULL;
526 double rdtsc_scale;
527 begin();
528 t0 = __rdtsc();
529
530 for (i=0; i < 128; ++i) {
531 hmput(map,k,k+1);
532 k += step;
533 }
534 for (j=first_step; j <= last_step; ++j) {
535 total += flush_cache();
536 mstats[0][j].start = __rdtsc();
537 for (q=i=0; i < 50000; ++i) {
538 total += hmget(map, q); // hit
539 if (++q == k) q = 0;
540 }
541 mstats[0][j].end = __rdtsc();
542 mstats[0][j].table_size = k;
543 total += flush_cache();
544 mstats[1][j].start = __rdtsc();
545 for (i=0; i < 50000; ++i) {
546 total += hmget(map, i+k); // miss
547 }
548 mstats[1][j].end = __rdtsc();
549 mstats[1][j].table_size = k;
550
551 // expand table
552 for (i=0; i < (1<<(j>>4)); ++i) {
553 hmput(map, k,k+1);
554 k += step;
555 }
556 }
557
558 t1 = __rdtsc();
559 measure();
560 hmfree(map);
561 length = end();
562 rdtsc_scale = length / (t1-t0) * 1000;
563
564 for (j=first_step; j <= last_step; ++j) {
565 // time,table_size,numins,numhit,nummiss,numperflush
566 printf("%12.4f,%12d,0,50000,0,0\n", (mstats[0][j].end - mstats[0][j].start) * rdtsc_scale, mstats[0][j].table_size);
567 }
568 for (j=first_step; j <= last_step; ++j) {
569 printf("%12.4f,%12d,0,0,50000,0\n", (mstats[1][j].end - mstats[1][j].start) * rdtsc_scale, mstats[1][j].table_size);
570 }
571 return total;
572 }
573
measure_worst_lookup4_a(int step_log2)574 int measure_worst_lookup4_a(int step_log2)
575 {
576 int total;
577 double length;
578 int i,j,k=0,q,worst_q,n,z,attempts;
579 int step = 1 << step_log2;
580 unsigned __int64 t0,t1;
581 unsigned __int64 m0,m1,worst;
582 struct { int key,value; } *map=NULL;
583 double rdtsc_scale;
584 begin();
585 t0 = __rdtsc();
586
587 memset(mstats, 0, sizeof(mstats));
588 for (j=first_step; j <= last_step; ++j)
589 mstats[0][j].end = mstats[1][j].end = ~(unsigned __int64) 0;
590
591 for(attempts=0; attempts < 2; ++attempts) {
592 k = 0;
593 stbds_rand_seed(0); // force us to get the same table every time
594 for (i=0; i < 128; ++i) {
595 hmput(map,k,k+1);
596 k += step;
597 }
598 for (j=first_step; j <= last_step; ++j) {
599 unsigned __int64 times[32];
600
601 // find the worst hit time
602 for (z=0; z < 2; ++z) { // try the bisectioning measurement 4 times
603 worst = 0;
604 for (n=0; n < 10; ++n) { // test 400 keys total
605 // find the worst time to hit 20 keys
606 q=0;
607 worst_q = 0;
608 total += flush_cache();
609 m0 = __rdtsc();
610 for (i=0; i < 20; ++i) {
611 total += hmget(map, q); // hit
612 if (++q == k) q = 0;
613 }
614 m1 = __rdtsc();
615 // for each n, check if this is the worst lookup we've seen
616 if (m1 - m0 > worst) {
617 worst = m1-m0;
618 worst_q = q - i;
619 if (worst_q < 0) q += k;
620 }
621 }
622 // after 400 keys, take the worst 20 keys, and try each one
623 worst = 0;
624 q = worst_q;
625 for (i=0; i < 20; ++i) {
626 total += flush_cache();
627 m0 = __rdtsc();
628 total += hmget(map, q); // hit
629 m1 = __rdtsc();
630 if (m1 - m0 > worst)
631 worst = m1-m0;
632 if (++q == k) q = 0;
633 }
634 times[z] = worst;
635 }
636 // find the worst time in the bunch
637 worst = 0;
638 for (i=0; i < z; ++i)
639 if (times[i] > worst)
640 worst = times[i];
641 // take the best of 'attempts', to discard outliers
642 if (worst < mstats[0][j].end)
643 mstats[0][j].end = worst;
644 mstats[0][j].start = 0;
645 mstats[0][j].table_size = k >> step_log2;
646
647 // find the worst miss time
648 for (z=0; z < 8; ++z) { // try the bisectioning measurement 8 times
649 worst = 0;
650 for (n=0; n < 20; ++n) { // test 400 keys total
651 // find the worst time to hit 20 keys
652 q=k;
653 worst_q = 0;
654 total += flush_cache();
655 m0 = __rdtsc();
656 for (i=0; i < 20; ++i) {
657 total += hmget(map, q); // hit
658 }
659 m1 = __rdtsc();
660 // for each n, check if this is the worst lookup we've seen
661 if (m1 - m0 > worst) {
662 worst = m1-m0;
663 worst_q = q - i;
664 }
665 }
666 // after 400 keys, take the worst 20 keys, and try each one
667 worst = 0;
668 q = worst_q;
669 for (i=0; i < 20; ++i) {
670 total += flush_cache();
671 m0 = __rdtsc();
672 total += hmget(map, q); // hit
673 m1 = __rdtsc();
674 if (m1 - m0 > worst)
675 worst = m1-m0;
676 }
677 times[z] = worst;
678 }
679 // find the worst time in the bunch
680 worst = 0;
681 for (i=0; i < z; ++i)
682 if (times[i] > worst)
683 worst = times[i];
684 if (worst < mstats[1][j].end)
685 mstats[1][j].end = worst;
686 mstats[1][j].start = 0;
687 mstats[1][j].table_size = k >> step_log2;
688
689 // expand table
690 for (i=0; i < (1<<(j>>4)); ++i) {
691 hmput(map, k,k+1);
692 k += step;
693 }
694 }
695 hmfree(map);
696 }
697
698 t1 = __rdtsc();
699 measure();
700 length = end();
701 rdtsc_scale = length / (t1-t0) * 1000;
702
703 for (j=first_step; j <= last_step; ++j) {
704 printf("%12.4f,%12d,0,1,0,1\n", (mstats[0][j].end - mstats[0][j].start) * rdtsc_scale, mstats[0][j].table_size);
705 }
706 for (j=first_step; j <= last_step; ++j) {
707 printf("%12.4f,%12d,0,0,1,1\n", (mstats[1][j].end - mstats[1][j].start) * rdtsc_scale, mstats[1][j].table_size);
708 }
709 return total;
710 }
711
measure_worst_lookup4_b(int step_log2)712 int measure_worst_lookup4_b(int step_log2)
713 {
714 int total;
715 double length;
716 int i,j,k=0,q,worst_q,n,z,attempts;
717 int step = 1 << step_log2;
718 unsigned __int64 t0,t1;
719 unsigned __int64 m0,m1,worst;
720 struct { int key,value; } *map=NULL;
721 double rdtsc_scale;
722 begin();
723 t0 = __rdtsc();
724
725 memset(mstats, 0, sizeof(mstats));
726 for (j=first_step; j <= last_step; ++j)
727 mstats[0][j].end = mstats[1][j].end = ~(unsigned __int64) 0;
728
729 k = 0;
730 stbds_rand_seed(0); // force us to get the same table every time
731 for (i=0; i < 128; ++i) {
732 hmput(map,k,k+1);
733 k += step;
734 }
735 for (j=first_step; j <= last_step; ++j) {
736 unsigned __int64 times[32];
737
738 // find the worst hit time
739 for (z=0; z < 8; ++z) { // try this 8 times
740 worst = 0;
741 q=0;
742 for (i=0; i < 5000; ++i) {
743 total += hmget(map, q);
744 m0 = __rdtsc();
745 total += hmget(map, q);
746 m1 = __rdtsc();
747 if (m1 - m0 > worst) {
748 worst = m1-m0;
749 worst_q = q - i;
750 }
751 if (++q == k) q = 0;
752 }
753 // now retry with the worst one, but find the shortest time for it
754 worst = ~(unsigned __int64) 0;
755 for (i=0; i < 4; ++i) {
756 total += flush_cache();
757 m0 = __rdtsc();
758 total += hmget(map,worst_q);
759 m1 = __rdtsc();
760 if (m1-m0 < worst)
761 worst = m1-m0;
762 }
763 times[z] = worst;
764 }
765
766 // find the worst of those
767 worst = 0;
768 for (i=0; i < z; ++i)
769 if (times[i] > worst)
770 worst = times[i];
771 mstats[0][j].start = 0;
772 mstats[0][j].end = worst;
773 mstats[0][j].table_size = k;
774
775 // find the worst miss time
776 for (z=0; z < 8; ++z) { // try this 8 times
777 worst = 0;
778 q=k;
779 for (i=0; i < 5000; ++i) {
780 total += hmget(map, q);
781 m0 = __rdtsc();
782 total += hmget(map, q);
783 m1 = __rdtsc();
784 if (m1 - m0 > worst) {
785 worst = m1-m0;
786 worst_q = q - i;
787 }
788 //printf("%6llu ", m1-m0);
789 }
790 // now retry with the worst one, but find the shortest time for it
791 worst = ~(unsigned __int64) 0;
792 for (i=0; i < 4; ++i) {
793 total += flush_cache();
794 m0 = __rdtsc();
795 total += hmget(map,worst_q);
796 m1 = __rdtsc();
797 if (m1-m0 < worst)
798 worst = m1-m0;
799 }
800 times[z] = worst;
801 }
802
803 // find the worst of those
804 worst = 0;
805 for (i=0; i < z; ++i)
806 if (times[i] > worst)
807 worst = times[i];
808 mstats[1][j].start = 0;
809 mstats[1][j].end = worst;
810 mstats[1][j].table_size = k;
811
812 // expand table
813 for (i=0; i < (1<<(j>>4)); ++i) {
814 hmput(map, k,k+1);
815 k += step;
816 }
817 }
818 hmfree(map);
819
820 t1 = __rdtsc();
821 measure();
822 length = end();
823 rdtsc_scale = length / (t1-t0) * 1000;
824
825 for (j=first_step+1; j <= last_step; ++j) {
826 printf("%12.4f,%12d,0,1,0,1\n", (mstats[0][j].end - mstats[0][j].start) * rdtsc_scale, mstats[0][j].table_size);
827 }
828 for (j=first_step+1; j <= last_step; ++j) {
829 printf("%12.4f,%12d,0,0,1,1\n", (mstats[1][j].end - mstats[1][j].start) * rdtsc_scale, mstats[1][j].table_size);
830 }
831 return total;
832 }
833
measure_uncached_lookup4(int step_log2)834 int measure_uncached_lookup4(int step_log2)
835 {
836 int total;
837 double length;
838 int i,j,k=0,q;
839 int step = 1 << step_log2;
840 unsigned __int64 t0,t1;
841 struct { int key,value; } *map=NULL;
842 double rdtsc_scale;
843 begin();
844 t0 = __rdtsc();
845
846 map = NULL;
847 for (i=0; i < 128; ++i) {
848 hmput(map,k,k+1);
849 k += step;
850 }
851 for (j=first_step; j <= last_step; ++j) {
852 mstats[0][j].start = __rdtsc();
853 mstats[0][j].end = 0;
854 for (q=i=0; i < 512; ++i) {
855 if ((i & 3) == 0) {
856 mstats[0][j].end += __rdtsc();
857 total += flush_cache();
858 mstats[0][j].start += __rdtsc();
859 }
860 total += hmget(map, q); // hit
861 if (++q == k) q = 0;
862 }
863 mstats[0][j].end += __rdtsc();
864 mstats[0][j].table_size = k;
865 total += flush_cache();
866 mstats[1][j].end = 0;
867 mstats[1][j].start = __rdtsc();
868 for (i=0; i < 512; ++i) {
869 if ((i & 3) == 0) {
870 mstats[1][j].end += __rdtsc();
871 total += flush_cache();
872 mstats[1][j].start += __rdtsc();
873 }
874 total += hmget(map, i+k); // miss
875 }
876 mstats[1][j].end += __rdtsc();
877 mstats[1][j].table_size = k;
878
879 // expand table
880 for (i=0; i < (1<<(j>>4)); ++i) {
881 hmput(map, k,k+1);
882 k += step;
883 }
884 }
885 hmfree(map);
886
887 t1 = __rdtsc();
888 measure();
889 length = end();
890 rdtsc_scale = length / (t1-t0) * 1000;
891
892 for (j=first_step; j <= last_step; ++j) {
893 printf("%12.4f,%12d,0,512,0,4\n", (mstats[0][j].end - mstats[0][j].start) * rdtsc_scale, mstats[0][j].table_size);
894 }
895 for (j=first_step; j <= last_step; ++j) {
896 printf("%12.4f,%12d,0,0,512,4\n", (mstats[1][j].end - mstats[1][j].start) * rdtsc_scale, mstats[1][j].table_size);
897 }
898 return total;
899 }
900
901
902
903
main(int arg,char ** argv)904 int main(int arg, char **argv)
905 {
906 int n,s,w;
907 double worst = 0;
908
909 printf("# size_t=%d,", (int) sizeof(size_t));
910
911 // number of cache-lines
912 #ifdef STBDS_SMALL_BUCKET
913 printf("cacheline=%d,", 1);
914 #else
915 printf("cacheline=%d,", sizeof(size_t)==8 ? 2 : 1);
916 #endif
917 #ifdef STBDS_FLUSH_CACHE
918 printf("%d,", (int) stbds_log2(STBDS_FLUSH_CACHE));
919 #else
920 printf("0,");
921 #endif
922 #ifdef STBDS_BUCKET_START // don't bother offseting differently within bucket for different hash values
923 printf("STBDS_BUCKET_START,");
924 #else
925 printf(",");
926 #endif
927 #ifdef STBDS_SIPHASH_2_4
928 printf("STBDS_SIPHASH_2_4,");
929 #else
930 printf(",");
931 #endif
932 printf("\n");
933
934 measure_worst_lookup4_b(0);
935 //measure_worst_lookup4_a(0);
936 measure_average_lookup4(0);
937 measure_uncached_lookup4(0);
938 measure_build4(0);
939 return 0;
940
941 #if 0
942 begin(); for (n=0; n < 2000; ++n) { build_stb(2000,0,0,1); } end(); printf(" // %7.2fms : 2,000 inserts creating 2K table\n", timer);
943 begin(); for (n=0; n < 500; ++n) { build_stb(20000,0,0,1); } end(); printf(" // %7.2fms : 20,000 inserts creating 20K table\n", timer);
944 begin(); for (n=0; n < 100; ++n) { build_stb(200000,0,0,1); } end(); printf(" // %7.2fms : 200,000 inserts creating 200K table\n", timer);
945 begin(); for (n=0; n < 10; ++n) { build_stb(2000000,0,0,1); } end(); printf(" // %7.2fms : 2,000,000 inserts creating 2M table\n", timer);
946 begin(); for (n=0; n < 5; ++n) { build_stb(20000000,0,0,1); } end(); printf(" // %7.2fms : 20,000,000 inserts creating 20M table\n", timer);
947 #endif
948
949 #if 0
950 begin(); for (n=0; n < 2000; ++n) { churn32(2000,0,0,1); } end(); printf(" // %7.2fms : 2,000 inserts creating 2K table w/ 32-byte key\n", timer);
951 begin(); for (n=0; n < 500; ++n) { churn32(20000,0,0,1); } end(); printf(" // %7.2fms : 20,000 inserts creating 20K table w/ 32-byte key\n", timer);
952 begin(); for (n=0; n < 100; ++n) { churn32(200000,0,0,1); } end(); printf(" // %7.2fms : 200,000 inserts creating 200K table w/ 32-byte key\n", timer);
953 begin(); for (n=0; n < 10; ++n) { churn32(2000000,0,0,1); } end(); printf(" // %7.2fms : 2,000,000 inserts creating 2M table w/ 32-byte key\n", timer);
954 begin(); for (n=0; n < 5; ++n) { churn32(20000000,0,0,1); } end(); printf(" // %7.2fms : 20,000,000 inserts creating 20M table w/ 32-byte key\n", timer);
955
956 begin(); for (n=0; n < 2000; ++n) { churn256(2000,0,0,1); } end(); printf(" // %7.2fms : 2,000 inserts creating 2K table w/ 256-byte key\n", timer);
957 begin(); for (n=0; n < 500; ++n) { churn256(20000,0,0,1); } end(); printf(" // %7.2fms : 20,000 inserts creating 20K table w/ 256-byte key\n", timer);
958 begin(); for (n=0; n < 100; ++n) { churn256(200000,0,0,1); } end(); printf(" // %7.2fms : 200,000 inserts creating 200K table w/ 256-byte key\n", timer);
959 begin(); for (n=0; n < 10; ++n) { churn256(2000000,0,0,1); } end(); printf(" // %7.2fms : 2,000,000 inserts creating 2M table w/ 256-byte key\n", timer);
960 begin(); for (n=0; n < 5; ++n) { churn256(20000000,0,0,1); } end(); printf(" // %7.2fms : 20,000,000 inserts creating 20M table w/ 256-byte key\n", timer);
961 #endif
962
963 begin(); for (n=0; n < 20; ++n) { multisearch_stb(2000,0,2000,1,1000); } end(); printf(" // %7.2fms : 2,000,000 hits on 1,000 2K table w/ 4-byte key\n", timer);
964 begin(); for (n=0; n < 10; ++n) { multisearch_stb(20000,0,2000,1,1000); } end(); printf(" // %7.2fms : 2,000,000 hits on 1,000 20K table w/ 4-byte key\n", timer);
965 begin(); for (n=0; n < 6; ++n) { multisearch_stb(200000,0,2000,1,1000); } end(); printf(" // %7.2fms : 2,000,000 hits on 1,000 200K table w/ 4-byte key\n", timer);
966 begin(); for (n=0; n < 2; ++n) { multisearch_stb(2000000,0,20000,1,100); } end(); printf(" // %7.2fms : 2,000,000 hits on 100 2M table w/ 4-byte key\n", timer);
967
968 begin(); for (n=0; n < 20; ++n) { multisearch (2000,0,2000,1,1000); } end(); printf(" // %7.2fms : 2,000,000 hits on 1,000 2K table w/ 4-byte key\n", timer);
969 begin(); for (n=0; n < 10; ++n) { multisearch (20000,0,2000,1,1000); } end(); printf(" // %7.2fms : 2,000,000 hits on 1,000 20K table w/ 4-byte key\n", timer);
970 begin(); for (n=0; n < 6; ++n) { multisearch (200000,0,2000,1,1000); } end(); printf(" // %7.2fms : 2,000,000 hits on 1,000 200K table w/ 4-byte key\n", timer);
971 begin(); for (n=0; n < 2; ++n) { multisearch (2000000,0,20000,1,100); } end(); printf(" // %7.2fms : 2,000,000 hits on 100 2M table w/ 4-byte key\n", timer);
972
973
974 #if 1
975 begin(); for (n=0; n < 2; ++n) { multibuild_stb(2000,0,0,1,10000); } end(); printf(" // %7.2fms : 20,000,000 inserts creating 10,000 2K table w/ 4-byte key\n", timer);
976 begin(); for (n=0; n < 2; ++n) { multibuild_stb(20000,0,0,1,1000); } end(); printf(" // %7.2fms : 20,000,000 inserts creating 1,000 20K table w/ 4-byte key\n", timer);
977 begin(); for (n=0; n < 2; ++n) { multibuild_stb(200000,0,0,1,100); } end(); printf(" // %7.2fms : 20,000,000 inserts creating 100 200K table w/ 4-byte key\n", timer);
978 begin(); for (n=0; n < 2; ++n) { multibuild_stb(2000000,0,0,1,10); } end(); printf(" // %7.2fms : 20,000,000 inserts creating 10 2M table w/ 4-byte key\n", timer);
979
980 begin(); for (n=0; n < 2; ++n) { multichurn4(2000,0,0,1,10000); } end(); printf(" // %7.2fms : 20,000,000 inserts creating 10,000 2K table w/ 4-byte key\n", timer);
981 begin(); for (n=0; n < 2; ++n) { multichurn4(20000,0,0,1,1000); } end(); printf(" // %7.2fms : 20,000,000 inserts creating 1,000 20K table w/ 4-byte key\n", timer);
982 begin(); for (n=0; n < 2; ++n) { multichurn4(200000,0,0,1,100); } end(); printf(" // %7.2fms : 20,000,000 inserts creating 100 200K table w/ 4-byte key\n", timer);
983 begin(); for (n=0; n < 2; ++n) { multichurn4(2000000,0,0,1,10); } end(); printf(" // %7.2fms : 20,000,000 inserts creating 10 2M table w/ 4-byte key\n", timer);
984 #endif
985
986 begin(); for (n=0; n < 2000; ++n) { build(2000,0,0,1); } end(); printf(" // %7.2fms : 2,000 inserts creating 2K table w/ 4-byte key\n", timer);
987 begin(); for (n=0; n < 500; ++n) { build(20000,0,0,1); } end(); printf(" // %7.2fms : 20,000 inserts creating 20K table w/ 4-byte key\n", timer);
988 begin(); for (n=0; n < 100; ++n) { build(200000,0,0,1); } end(); printf(" // %7.2fms : 200,000 inserts creating 200K table w/ 4-byte key\n", timer);
989 begin(); for (n=0; n < 10; ++n) { build(2000000,0,0,1); } end(); printf(" // %7.2fms : 2,000,000 inserts creating 2M table w/ 4-byte key\n", timer);
990 begin(); for (n=0; n < 5; ++n) { build(20000000,0,0,1); } end(); printf(" // %7.2fms : 20,000,000 inserts creating 20M table w/ 4-byte key\n", timer);
991
992 begin(); for (n=0; n < 2000; ++n) { churn8(2000,0,0,1); } end(); printf(" // %7.2fms : 2,000 inserts creating 2K table w/ 8-byte key\n", timer);
993 begin(); for (n=0; n < 500; ++n) { churn8(20000,0,0,1); } end(); printf(" // %7.2fms : 20,000 inserts creating 20K table w/ 8-byte key\n", timer);
994 begin(); for (n=0; n < 100; ++n) { churn8(200000,0,0,1); } end(); printf(" // %7.2fms : 200,000 inserts creating 200K table w/ 8-byte key\n", timer);
995 begin(); for (n=0; n < 10; ++n) { churn8(2000000,0,0,1); } end(); printf(" // %7.2fms : 2,000,000 inserts creating 2M table w/ 8-byte key\n", timer);
996 begin(); for (n=0; n < 5; ++n) { churn8(20000000,0,0,1); } end(); printf(" // %7.2fms : 20,000,000 inserts creating 20M table w/ 8-byte key\n", timer);
997
998 begin(); for (n=0; n < 60; ++n) { churn_skip(2000,2100,5000); } end(); printf(" // %7.2fms : 500,000 inserts & deletes in 2K table\n", timer);
999 begin(); for (n=0; n < 30; ++n) { churn_skip(20000,21000,500); } end(); printf(" // %7.2fms : 500,000 inserts & deletes in 20K table\n", timer);
1000 begin(); for (n=0; n < 15; ++n) { churn_skip(200000,201000,500); } end(); printf(" // %7.2fms : 500,000 inserts & deletes in 200K table\n", timer);
1001 begin(); for (n=0; n < 8; ++n) { churn_skip(2000000,2001000,500); } end(); printf(" // %7.2fms : 500,000 inserts & deletes in 2M table\n", timer);
1002 begin(); for (n=0; n < 5; ++n) { churn_skip(20000000,20001000,500); } end(); printf(" // %7.2fms : 500,000 inserts & deletes in 20M table\n", timer);
1003 begin(); for (n=0; n < 1; ++n) { churn_skip(200000000u,200001000u,500); } end(); printf(" // %7.2fms : 500,000 inserts & deletes in 200M table\n", timer);
1004 // even though the above measures a roughly fixed amount of work, we still have to build the table n times, hence the fewer measurements each time
1005
1006 begin(); for (n=0; n < 60; ++n) { churn_skip(1000,3000,250); } end(); printf(" // %7.2fms : 500,000 inserts & deletes in 2K table\n", timer);
1007 begin(); for (n=0; n < 15; ++n) { churn_skip(10000,30000,25); } end(); printf(" // %7.2fms : 500,000 inserts & deletes in 20K table\n", timer);
1008 begin(); for (n=0; n < 7; ++n) { churn_skip(100000,300000,10); } end(); printf(" // %7.2fms : 2,000,000 inserts & deletes in 200K table\n", timer);
1009 begin(); for (n=0; n < 2; ++n) { churn_skip(1000000,3000000,10); } end(); printf(" // %7.2fms : 20,000,000 inserts & deletes in 2M table\n", timer);
1010
1011 // search for bad intervals.. in practice this just seems to measure execution variance
1012 for (s = 2; s < 64; ++s) {
1013 begin(); for (n=0; n < 50; ++n) { build(200000,0,0,s); } end();
1014 if (timer > worst) {
1015 worst = timer;
1016 w = s;
1017 }
1018 }
1019 for (; s <= 1024; s *= 2) {
1020 begin(); for (n=0; n < 50; ++n) { build(200000,0,0,s); } end();
1021 if (timer > worst) {
1022 worst = timer;
1023 w = s;
1024 }
1025 }
1026 printf(" // %7.2fms(%d) : Worst time from inserting 200,000 items with spacing %d.\n", worst, w, w);
1027
1028 return 0;
1029 }
1030 #endif
1031