1 #ifdef DS_PERF
2 #define _CRT_SECURE_NO_WARNINGS
3 #define _CRT_NONSTDC_NO_DEPRECATE
4 #define _CRT_NON_CONFORMING_SWPRINTFS
5
6
7 //#define STBDS_INTERNAL_SMALL_BUCKET // make 64-bit bucket fit both keys and hash bits
8 //#define STBDS_SIPHASH_2_4 // performance test 1_3 against 2_4
9 //#define STBDS_INTERNAL_BUCKET_START // don't bother offseting differently within bucket for different hash values
10 //#define STBDS_FLUSH_CACHE (1u<<20) // do this much memory traffic to flush the cache between some benchmarking measurements
11
12 #include <stdio.h>
13
14 #define WIN32_LEAN_AND_MEAN
15 #include <windows.h>
16 #define STB_DEFINE
17 #define STB_NO_REGISTRY
18 #include "../stb.h"
19 #endif
20
21 #ifdef DS_TEST
22 #define STBDS_UNIT_TESTS
23 #define STBDS_SMALL_BUCKET
24 #endif
25
26 #ifdef DS_STATS
27 #define STBDS_STATISTICS
28 #endif
29
30 #ifndef DS_PERF
31 #define STBDS_ASSERT assert
32 #include <assert.h>
33 #endif
34
35 #define STB_DS_IMPLEMENTATION
36 #include "../stb_ds.h"
37
38 size_t churn_inserts, churn_deletes;
39
churn(int a,int b,int count)40 void churn(int a, int b, int count)
41 {
42 struct { int key,value; } *map=NULL;
43 int i,j,n,k;
44 for (i=0; i < a; ++i)
45 hmput(map,i,i+1);
46 for (n=0; n < count; ++n) {
47 for (j=a; j < b; ++j,++i) {
48 hmput(map,i,i+1);
49 }
50 assert(hmlen(map) == b);
51 for (j=a; j < b; ++j) {
52 k=i-j-1;
53 k = hmdel(map,k);
54 assert(k != 0);
55 }
56 assert(hmlen(map) == a);
57 }
58 hmfree(map);
59 churn_inserts = i;
60 churn_deletes = (b-a) * n;
61 }
62
63 #ifdef DS_TEST
64 #include <stdio.h>
main(int argc,char ** argv)65 int main(int argc, char **argv)
66 {
67 stbds_unit_tests();
68 churn(0,100,1);
69 churn(3,7,50000);
70 churn(3,15,50000);
71 churn(16, 48, 25000);
72 churn(10, 15, 25000);
73 churn(200,500, 5000);
74 churn(2000,5000, 500);
75 churn(20000,50000, 50);
76 printf("Ok!");
77 return 0;
78 }
79 #endif
80
81 #ifdef DS_STATS
82 #define MAX(a,b) ((a) > (b) ? (a) : (b))
83 size_t max_hit_probes, max_miss_probes, total_put_probes, total_miss_probes, churn_misses;
churn_stats(int a,int b,int count)84 void churn_stats(int a, int b, int count)
85 {
86 struct { int key,value; } *map=NULL;
87 int i,j,n,k;
88 churn_misses = 0;
89 for (i=0; i < a; ++i) {
90 hmput(map,i,i+1);
91 max_hit_probes = MAX(max_hit_probes, stbds_hash_probes);
92 total_put_probes += stbds_hash_probes;
93 stbds_hash_probes = 0;
94 }
95
96 for (n=0; n < count; ++n) {
97 for (j=a; j < b; ++j,++i) {
98 hmput(map,i,i+1);
99 max_hit_probes = MAX(max_hit_probes, stbds_hash_probes);
100 total_put_probes += stbds_hash_probes;
101 stbds_hash_probes = 0;
102 }
103 for (j=0; j < (b-a)*10; ++j) {
104 k=i+j;
105 (void) hmgeti(map,k); // miss
106 max_miss_probes = MAX(max_miss_probes, stbds_hash_probes);
107 total_miss_probes += stbds_hash_probes;
108 stbds_hash_probes = 0;
109 ++churn_misses;
110 }
111 assert(hmlen(map) == b);
112 for (j=a; j < b; ++j) {
113 k=i-j-1;
114 k = hmdel(map,k);
115 stbds_hash_probes = 0;
116 assert(k);
117 }
118 assert(hmlen(map) == a);
119 }
120 hmfree(map);
121 churn_inserts = i;
122 churn_deletes = (b-a) * n;
123 }
124
reset_stats(void)125 void reset_stats(void)
126 {
127 stbds_array_grow=0,
128 stbds_hash_grow=0;
129 stbds_hash_shrink=0;
130 stbds_hash_rebuild=0;
131 stbds_hash_probes=0;
132 stbds_hash_alloc=0;
133 stbds_rehash_probes=0;
134 stbds_rehash_items=0;
135 max_hit_probes = 0;
136 max_miss_probes = 0;
137 total_put_probes = 0;
138 total_miss_probes = 0;
139 }
140
print_churn_probe_stats(char * str)141 void print_churn_probe_stats(char *str)
142 {
143 printf("Probes: %3d max hit, %3d max miss, %4.2f avg hit, %4.2f avg miss: %s\n",
144 (int) max_hit_probes, (int) max_miss_probes, (float) total_put_probes / churn_inserts, (float) total_miss_probes / churn_misses, str);
145 reset_stats();
146 }
147
main(int arg,char ** argv)148 int main(int arg, char **argv)
149 {
150 churn_stats(0,500000,1); print_churn_probe_stats("Inserting 500000 items");
151 churn_stats(0,500000,1); print_churn_probe_stats("Inserting 500000 items");
152 churn_stats(0,500000,1); print_churn_probe_stats("Inserting 500000 items");
153 churn_stats(0,500000,1); print_churn_probe_stats("Inserting 500000 items");
154 churn_stats(49000,50000,500); print_churn_probe_stats("Deleting/Inserting 500000 items");
155 churn_stats(49000,50000,500); print_churn_probe_stats("Deleting/Inserting 500000 items");
156 churn_stats(49000,50000,500); print_churn_probe_stats("Deleting/Inserting 500000 items");
157 churn_stats(49000,50000,500); print_churn_probe_stats("Deleting/Inserting 500000 items");
158 return 0;
159 }
160 #endif
161
162
163 #ifdef DS_PERF
164 //char *strdup(const char *foo) { return 0; }
165 //int stricmp(const char *a, const char *b) { return 0; }
166 //int strnicmp(const char *a, const char *b, size_t n) { return 0; }
167
168 unsigned __int64 t0, xsum, mn,mx,count;
begin(void)169 void begin(void)
170 {
171 LARGE_INTEGER m;
172 QueryPerformanceCounter(&m);
173 t0 = m.QuadPart;
174 xsum = 0;
175 count = 0;
176 mx = 0;
177 mn = ~(unsigned __int64) 0;
178 }
179
measure(void)180 void measure(void)
181 {
182 unsigned __int64 t1, t;
183 LARGE_INTEGER m;
184 QueryPerformanceCounter(&m);
185 t1 = m.QuadPart;
186 t = t1-t0;
187 if (t1 < t0)
188 printf("ALERT: QueryPerformanceCounter was unordered!\n");
189 if (t < mn) mn = t;
190 if (t > mx) mx = t;
191 xsum += t;
192 ++count;
193 t0 = t1;
194 }
195
dont_measure(void)196 void dont_measure(void)
197 {
198 LARGE_INTEGER m;
199 QueryPerformanceCounter(&m);
200 t0 = m.QuadPart;
201 }
202
203 double timer;
end(void)204 double end(void)
205 {
206 LARGE_INTEGER m;
207 QueryPerformanceFrequency(&m);
208
209 if (count > 3) {
210 // discard the highest and lowest
211 xsum -= mn;
212 xsum -= mx;
213 count -= 2;
214 }
215 timer = (double) (xsum) / count / m.QuadPart * 1000;
216 return timer;
217 }
218
build(int a,int b,int count,int step)219 void build(int a, int b, int count, int step)
220 {
221 struct { int key,value; } *map=NULL;
222 int i,n;
223 for (i=0; i < a; ++i) {
224 n = i*step;
225 hmput(map,n,i+1);
226 }
227 measure();
228 churn_inserts = i;
229 hmfree(map);
230 dont_measure();
231 }
232
233 #ifdef STB__INCLUDE_STB_H
build_stb(int a,int b,int count,int step)234 void build_stb(int a, int b, int count, int step)
235 {
236 stb_idict *d = stb_idict_new_size(8);
237 int i;
238 for (i=0; i < a; ++i)
239 stb_idict_add(d, i*step, i+1);
240 measure();
241 churn_inserts = i;
242 stb_idict_destroy(d);
243 dont_measure();
244 }
245
multibuild_stb(int a,int b,int count,int step,int tables)246 void multibuild_stb(int a, int b, int count, int step, int tables)
247 {
248 stb_idict *d[50000];
249 int i,q;
250 for (q=0; q < tables; ++q)
251 d[q] = stb_idict_new_size(8);
252 dont_measure();
253 for (i=0; i < a; ++i)
254 for (q=0; q < tables; ++q)
255 stb_idict_add(d[q], i*step+q*771, i+1);
256 measure();
257 churn_inserts = i;
258 for (q=0; q < tables; ++q)
259 stb_idict_destroy(d[q]);
260 dont_measure();
261 }
262
multisearch_stb(int a,int start,int end,int step,int tables)263 int multisearch_stb(int a, int start, int end, int step, int tables)
264 {
265 stb_idict *d[50000];
266 int i,q,total=0,v;
267 for (q=0; q < tables; ++q)
268 d[q] = stb_idict_new_size(8);
269 for (q=0; q < tables; ++q)
270 for (i=0; i < a; ++i)
271 stb_idict_add(d[q], i*step+q*771, i+1);
272 dont_measure();
273 for (i=start; i < end; ++i)
274 for (q=0; q < tables; ++q)
275 if (stb_idict_get_flag(d[q], i*step+q*771, &v))
276 total += v;
277 measure();
278 churn_inserts = i;
279 for (q=0; q < tables; ++q)
280 stb_idict_destroy(d[q]);
281 dont_measure();
282 return total;
283 }
284 #endif
285
multisearch(int a,int start,int end,int step,int tables)286 int multisearch(int a, int start, int end, int step, int tables)
287 {
288 struct { int key,value; } *hash[50000];
289 int i,q,total=0;
290 for (q=0; q < tables; ++q)
291 hash[q] = NULL;
292 for (q=0; q < tables; ++q)
293 for (i=0; i < a; ++i)
294 hmput(hash[q], i*step+q*771, i+1);
295 dont_measure();
296 for (i=start; i < end; ++i)
297 for (q=0; q < tables; ++q)
298 total += hmget(hash[q], i*step+q*771);
299 measure();
300 churn_inserts = i;
301 for (q=0; q < tables; ++q)
302 hmfree(hash[q]);
303 dont_measure();
304 return total;
305 }
306
churn_skip(unsigned int a,unsigned int b,int count)307 void churn_skip(unsigned int a, unsigned int b, int count)
308 {
309 struct { unsigned int key,value; } *map=NULL;
310 unsigned int i,j,n,k;
311 for (i=0; i < a; ++i)
312 hmput(map,i,i+1);
313 dont_measure();
314 for (n=0; n < count; ++n) {
315 for (j=a; j < b; ++j,++i) {
316 hmput(map,i,i+1);
317 }
318 assert(hmlen(map) == b);
319 for (j=a; j < b; ++j) {
320 k=i-j-1;
321 k = hmdel(map,k);
322 assert(k != 0);
323 }
324 assert(hmlen(map) == a);
325 }
326 measure();
327 churn_inserts = i;
328 churn_deletes = (b-a) * n;
329 hmfree(map);
330 dont_measure();
331 }
332
333 typedef struct { int n[8]; } str32;
churn32(int a,int b,int count,int include_startup)334 void churn32(int a, int b, int count, int include_startup)
335 {
336 struct { str32 key; int value; } *map=NULL;
337 int i,j,n;
338 str32 key = { 0 };
339 for (i=0; i < a; ++i) {
340 key.n[0] = i;
341 hmput(map,key,i+1);
342 }
343 if (!include_startup)
344 dont_measure();
345 for (n=0; n < count; ++n) {
346 for (j=a; j < b; ++j,++i) {
347 key.n[0] = i;
348 hmput(map,key,i+1);
349 }
350 assert(hmlen(map) == b);
351 for (j=a; j < b; ++j) {
352 key.n[0] = i-j-1;
353 hmdel(map,key);
354 }
355 assert(hmlen(map) == a);
356 }
357 measure();
358 hmfree(map);
359 churn_inserts = i;
360 churn_deletes = (b-a) * n;
361 dont_measure();
362 }
363
364 typedef struct { int n[32]; } str256;
churn256(int a,int b,int count,int include_startup)365 void churn256(int a, int b, int count, int include_startup)
366 {
367 struct { str256 key; int value; } *map=NULL;
368 int i,j,n;
369 str256 key = { 0 };
370 for (i=0; i < a; ++i) {
371 key.n[0] = i;
372 hmput(map,key,i+1);
373 }
374 if (!include_startup)
375 dont_measure();
376 for (n=0; n < count; ++n) {
377 for (j=a; j < b; ++j,++i) {
378 key.n[0] = i;
379 hmput(map,key,i+1);
380 }
381 assert(hmlen(map) == b);
382 for (j=a; j < b; ++j) {
383 key.n[0] = i-j-1;
384 hmdel(map,key);
385 }
386 assert(hmlen(map) == a);
387 }
388 measure();
389 hmfree(map);
390 churn_inserts = i;
391 churn_deletes = (b-a) * n;
392 dont_measure();
393 }
394
churn8(int a,int b,int count,int include_startup)395 void churn8(int a, int b, int count, int include_startup)
396 {
397 struct { size_t key,value; } *map=NULL;
398 int i,j,n,k;
399 for (i=0; i < a; ++i)
400 hmput(map,i,i+1);
401 if (!include_startup)
402 dont_measure();
403 for (n=0; n < count; ++n) {
404 for (j=a; j < b; ++j,++i) {
405 hmput(map,i,i+1);
406 }
407 assert(hmlen(map) == b);
408 for (j=a; j < b; ++j) {
409 k=i-j-1;
410 k = hmdel(map,k);
411 assert(k != 0);
412 }
413 assert(hmlen(map) == a);
414 }
415 measure();
416 hmfree(map);
417 churn_inserts = i;
418 churn_deletes = (b-a) * n;
419 dont_measure();
420 }
421
multichurn4(int a,int b,int count,int include_startup,int tables)422 void multichurn4(int a, int b, int count, int include_startup, int tables)
423 {
424 struct { int key,value; } *map[50000];
425 int i,j,n,k,q;
426 for (q=0; q < tables; ++q)
427 map[q] = NULL;
428 dont_measure();
429
430 for (i=0; i < a; ++i)
431 for (q=0; q < tables; ++q)
432 hmput(map[q],i,i+1);
433 if (!include_startup)
434 dont_measure();
435 for (n=0; n < count; ++n) {
436 for (j=a; j < b; ++j,++i) {
437 for (q=0; q < tables; ++q)
438 hmput(map[q],i,i+1);
439 }
440 assert(hmlen(map[0]) == b);
441 for (j=a; j < b; ++j) {
442 k=i-j-1;
443 for (q=0; q < tables; ++q)
444 k = hmdel(map[q],k);
445 assert(k != 0);
446 }
447 assert(hmlen(map[0]) == a);
448 }
449 measure();
450 for (q=0; q < tables; ++q)
451 hmfree(map[q]);
452 churn_inserts = i * tables;
453 churn_deletes = (b-a) * n * tables;
454 dont_measure();
455 }
456
457 struct {
458 unsigned __int64 start;
459 unsigned __int64 end;
460 int table_size;
461 } mstats[32][4000];
462
463 const int first_step = 64;
464 const int last_step = 384-48; // 32M
465
measure_build4(int step_log2)466 void measure_build4(int step_log2)
467 {
468 double length;
469 int i,j,k=0;
470 int step = 1 << step_log2;
471 unsigned __int64 t0,t1;
472 struct { int key,value; } *map=NULL;
473 double rdtsc_scale;
474 begin();
475 t0 = __rdtsc();
476
477 mstats[0][0].start = __rdtsc();
478 for (i=0; i < 256; ++i) {
479 hmput(map,k,k+1);
480 k += step;
481 }
482 mstats[0][first_step-1].end = __rdtsc();
483 mstats[0][first_step-1].table_size = k >> step_log2;
484 for (j=first_step; j < last_step; ++j) {
485 for (i=0; i < (1<<(j>>4)); ++i) {
486 hmput(map, k,k+1);
487 k += step;
488 }
489 mstats[0][j].end = __rdtsc();
490 mstats[0][j].table_size = k >> step_log2;
491 }
492 t1 = __rdtsc();
493 measure();
494 hmfree(map);
495 length = end();
496 rdtsc_scale = length / (t1-t0) * 1000;
497
498 for (j=1; j < last_step; ++j)
499 mstats[0][j].start = mstats[0][0].start;
500 for (j=first_step-1; j < last_step; ++j) {
501 printf("%12.4f,%12d,%12d,0,0,0\n", (mstats[0][j].end - mstats[0][j].start) * rdtsc_scale, mstats[0][j].table_size, mstats[0][j].table_size);
502 }
503 }
504
505 #ifdef STBDS_FLUSH_CACHE
506 static int cache_index;
507 char dummy[8][STBDS_FLUSH_CACHE];
508
flush_cache(void)509 int flush_cache(void)
510 {
511 memmove(dummy[cache_index],dummy[cache_index]+1, sizeof(dummy[cache_index])-1);
512 cache_index = (cache_index+1)%8;
513 return dummy[cache_index][0];
514 }
515 #else
flush_cache(void)516 int flush_cache(void) { return 0; }
517 #endif
518
measure_average_lookup4(int step_log2)519 int measure_average_lookup4(int step_log2)
520 {
521 int total;
522 double length;
523 int i,j,k=0,q;
524 int step = 1 << step_log2;
525 unsigned __int64 t0,t1;
526 struct { int key,value; } *map=NULL;
527 double rdtsc_scale;
528 begin();
529 t0 = __rdtsc();
530
531 for (i=0; i < 128; ++i) {
532 hmput(map,k,k+1);
533 k += step;
534 }
535 for (j=first_step; j <= last_step; ++j) {
536 total += flush_cache();
537 mstats[0][j].start = __rdtsc();
538 for (q=i=0; i < 50000; ++i) {
539 total += hmget(map, q); // hit
540 if (++q == k) q = 0;
541 }
542 mstats[0][j].end = __rdtsc();
543 mstats[0][j].table_size = k;
544 total += flush_cache();
545 mstats[1][j].start = __rdtsc();
546 for (i=0; i < 50000; ++i) {
547 total += hmget(map, i+k); // miss
548 }
549 mstats[1][j].end = __rdtsc();
550 mstats[1][j].table_size = k;
551
552 // expand table
553 for (i=0; i < (1<<(j>>4)); ++i) {
554 hmput(map, k,k+1);
555 k += step;
556 }
557 }
558
559 t1 = __rdtsc();
560 measure();
561 hmfree(map);
562 length = end();
563 rdtsc_scale = length / (t1-t0) * 1000;
564
565 for (j=first_step; j <= last_step; ++j) {
566 // time,table_size,numins,numhit,nummiss,numperflush
567 printf("%12.4f,%12d,0,50000,0,0\n", (mstats[0][j].end - mstats[0][j].start) * rdtsc_scale, mstats[0][j].table_size);
568 }
569 for (j=first_step; j <= last_step; ++j) {
570 printf("%12.4f,%12d,0,0,50000,0\n", (mstats[1][j].end - mstats[1][j].start) * rdtsc_scale, mstats[1][j].table_size);
571 }
572 return total;
573 }
574
measure_worst_lookup4_a(int step_log2)575 int measure_worst_lookup4_a(int step_log2)
576 {
577 int total;
578 double length;
579 int i,j,k=0,q,worst_q,n,z,attempts;
580 int step = 1 << step_log2;
581 unsigned __int64 t0,t1;
582 unsigned __int64 m0,m1,worst;
583 struct { int key,value; } *map=NULL;
584 double rdtsc_scale;
585 begin();
586 t0 = __rdtsc();
587
588 memset(mstats, 0, sizeof(mstats));
589 for (j=first_step; j <= last_step; ++j)
590 mstats[0][j].end = mstats[1][j].end = ~(unsigned __int64) 0;
591
592 for(attempts=0; attempts < 2; ++attempts) {
593 k = 0;
594 stbds_rand_seed(0); // force us to get the same table every time
595 for (i=0; i < 128; ++i) {
596 hmput(map,k,k+1);
597 k += step;
598 }
599 for (j=first_step; j <= last_step; ++j) {
600 unsigned __int64 times[32];
601
602 // find the worst hit time
603 for (z=0; z < 2; ++z) { // try the bisectioning measurement 4 times
604 worst = 0;
605 for (n=0; n < 10; ++n) { // test 400 keys total
606 // find the worst time to hit 20 keys
607 q=0;
608 worst_q = 0;
609 total += flush_cache();
610 m0 = __rdtsc();
611 for (i=0; i < 20; ++i) {
612 total += hmget(map, q); // hit
613 if (++q == k) q = 0;
614 }
615 m1 = __rdtsc();
616 // for each n, check if this is the worst lookup we've seen
617 if (m1 - m0 > worst) {
618 worst = m1-m0;
619 worst_q = q - i;
620 if (worst_q < 0) q += k;
621 }
622 }
623 // after 400 keys, take the worst 20 keys, and try each one
624 worst = 0;
625 q = worst_q;
626 for (i=0; i < 20; ++i) {
627 total += flush_cache();
628 m0 = __rdtsc();
629 total += hmget(map, q); // hit
630 m1 = __rdtsc();
631 if (m1 - m0 > worst)
632 worst = m1-m0;
633 if (++q == k) q = 0;
634 }
635 times[z] = worst;
636 }
637 // find the worst time in the bunch
638 worst = 0;
639 for (i=0; i < z; ++i)
640 if (times[i] > worst)
641 worst = times[i];
642 // take the best of 'attempts', to discard outliers
643 if (worst < mstats[0][j].end)
644 mstats[0][j].end = worst;
645 mstats[0][j].start = 0;
646 mstats[0][j].table_size = k >> step_log2;
647
648 // find the worst miss time
649 for (z=0; z < 8; ++z) { // try the bisectioning measurement 8 times
650 worst = 0;
651 for (n=0; n < 20; ++n) { // test 400 keys total
652 // find the worst time to hit 20 keys
653 q=k;
654 worst_q = 0;
655 total += flush_cache();
656 m0 = __rdtsc();
657 for (i=0; i < 20; ++i) {
658 total += hmget(map, q); // hit
659 }
660 m1 = __rdtsc();
661 // for each n, check if this is the worst lookup we've seen
662 if (m1 - m0 > worst) {
663 worst = m1-m0;
664 worst_q = q - i;
665 }
666 }
667 // after 400 keys, take the worst 20 keys, and try each one
668 worst = 0;
669 q = worst_q;
670 for (i=0; i < 20; ++i) {
671 total += flush_cache();
672 m0 = __rdtsc();
673 total += hmget(map, q); // hit
674 m1 = __rdtsc();
675 if (m1 - m0 > worst)
676 worst = m1-m0;
677 }
678 times[z] = worst;
679 }
680 // find the worst time in the bunch
681 worst = 0;
682 for (i=0; i < z; ++i)
683 if (times[i] > worst)
684 worst = times[i];
685 if (worst < mstats[1][j].end)
686 mstats[1][j].end = worst;
687 mstats[1][j].start = 0;
688 mstats[1][j].table_size = k >> step_log2;
689
690 // expand table
691 for (i=0; i < (1<<(j>>4)); ++i) {
692 hmput(map, k,k+1);
693 k += step;
694 }
695 }
696 hmfree(map);
697 }
698
699 t1 = __rdtsc();
700 measure();
701 length = end();
702 rdtsc_scale = length / (t1-t0) * 1000;
703
704 for (j=first_step; j <= last_step; ++j) {
705 printf("%12.4f,%12d,0,1,0,1\n", (mstats[0][j].end - mstats[0][j].start) * rdtsc_scale, mstats[0][j].table_size);
706 }
707 for (j=first_step; j <= last_step; ++j) {
708 printf("%12.4f,%12d,0,0,1,1\n", (mstats[1][j].end - mstats[1][j].start) * rdtsc_scale, mstats[1][j].table_size);
709 }
710 return total;
711 }
712
measure_worst_lookup4_b(int step_log2)713 int measure_worst_lookup4_b(int step_log2)
714 {
715 int total;
716 double length;
717 int i,j,k=0,q,worst_q,n,z,attempts;
718 int step = 1 << step_log2;
719 unsigned __int64 t0,t1;
720 unsigned __int64 m0,m1,worst;
721 struct { int key,value; } *map=NULL;
722 double rdtsc_scale;
723 begin();
724 t0 = __rdtsc();
725
726 memset(mstats, 0, sizeof(mstats));
727 for (j=first_step; j <= last_step; ++j)
728 mstats[0][j].end = mstats[1][j].end = ~(unsigned __int64) 0;
729
730 k = 0;
731 stbds_rand_seed(0); // force us to get the same table every time
732 for (i=0; i < 128; ++i) {
733 hmput(map,k,k+1);
734 k += step;
735 }
736 for (j=first_step; j <= last_step; ++j) {
737 unsigned __int64 times[32];
738
739 // find the worst hit time
740 for (z=0; z < 8; ++z) { // try this 8 times
741 worst = 0;
742 q=0;
743 for (i=0; i < 5000; ++i) {
744 total += hmget(map, q);
745 m0 = __rdtsc();
746 total += hmget(map, q);
747 m1 = __rdtsc();
748 if (m1 - m0 > worst) {
749 worst = m1-m0;
750 worst_q = q - i;
751 }
752 if (++q == k) q = 0;
753 }
754 // now retry with the worst one, but find the shortest time for it
755 worst = ~(unsigned __int64) 0;
756 for (i=0; i < 4; ++i) {
757 total += flush_cache();
758 m0 = __rdtsc();
759 total += hmget(map,worst_q);
760 m1 = __rdtsc();
761 if (m1-m0 < worst)
762 worst = m1-m0;
763 }
764 times[z] = worst;
765 }
766
767 // find the worst of those
768 worst = 0;
769 for (i=0; i < z; ++i)
770 if (times[i] > worst)
771 worst = times[i];
772 mstats[0][j].start = 0;
773 mstats[0][j].end = worst;
774 mstats[0][j].table_size = k;
775
776 // find the worst miss time
777 for (z=0; z < 8; ++z) { // try this 8 times
778 worst = 0;
779 q=k;
780 for (i=0; i < 5000; ++i) {
781 total += hmget(map, q);
782 m0 = __rdtsc();
783 total += hmget(map, q);
784 m1 = __rdtsc();
785 if (m1 - m0 > worst) {
786 worst = m1-m0;
787 worst_q = q - i;
788 }
789 //printf("%6llu ", m1-m0);
790 }
791 // now retry with the worst one, but find the shortest time for it
792 worst = ~(unsigned __int64) 0;
793 for (i=0; i < 4; ++i) {
794 total += flush_cache();
795 m0 = __rdtsc();
796 total += hmget(map,worst_q);
797 m1 = __rdtsc();
798 if (m1-m0 < worst)
799 worst = m1-m0;
800 }
801 times[z] = worst;
802 }
803
804 // find the worst of those
805 worst = 0;
806 for (i=0; i < z; ++i)
807 if (times[i] > worst)
808 worst = times[i];
809 mstats[1][j].start = 0;
810 mstats[1][j].end = worst;
811 mstats[1][j].table_size = k;
812
813 // expand table
814 for (i=0; i < (1<<(j>>4)); ++i) {
815 hmput(map, k,k+1);
816 k += step;
817 }
818 }
819 hmfree(map);
820
821 t1 = __rdtsc();
822 measure();
823 length = end();
824 rdtsc_scale = length / (t1-t0) * 1000;
825
826 for (j=first_step+1; j <= last_step; ++j) {
827 printf("%12.4f,%12d,0,1,0,1\n", (mstats[0][j].end - mstats[0][j].start) * rdtsc_scale, mstats[0][j].table_size);
828 }
829 for (j=first_step+1; j <= last_step; ++j) {
830 printf("%12.4f,%12d,0,0,1,1\n", (mstats[1][j].end - mstats[1][j].start) * rdtsc_scale, mstats[1][j].table_size);
831 }
832 return total;
833 }
834
measure_uncached_lookup4(int step_log2)835 int measure_uncached_lookup4(int step_log2)
836 {
837 int total;
838 double length;
839 int i,j,k=0,q;
840 int step = 1 << step_log2;
841 unsigned __int64 t0,t1;
842 struct { int key,value; } *map=NULL;
843 double rdtsc_scale;
844 begin();
845 t0 = __rdtsc();
846
847 map = NULL;
848 for (i=0; i < 128; ++i) {
849 hmput(map,k,k+1);
850 k += step;
851 }
852 for (j=first_step; j <= last_step; ++j) {
853 mstats[0][j].start = __rdtsc();
854 mstats[0][j].end = 0;
855 for (q=i=0; i < 512; ++i) {
856 if ((i & 3) == 0) {
857 mstats[0][j].end += __rdtsc();
858 total += flush_cache();
859 mstats[0][j].start += __rdtsc();
860 }
861 total += hmget(map, q); // hit
862 if (++q == k) q = 0;
863 }
864 mstats[0][j].end += __rdtsc();
865 mstats[0][j].table_size = k;
866 total += flush_cache();
867 mstats[1][j].end = 0;
868 mstats[1][j].start = __rdtsc();
869 for (i=0; i < 512; ++i) {
870 if ((i & 3) == 0) {
871 mstats[1][j].end += __rdtsc();
872 total += flush_cache();
873 mstats[1][j].start += __rdtsc();
874 }
875 total += hmget(map, i+k); // miss
876 }
877 mstats[1][j].end += __rdtsc();
878 mstats[1][j].table_size = k;
879
880 // expand table
881 for (i=0; i < (1<<(j>>4)); ++i) {
882 hmput(map, k,k+1);
883 k += step;
884 }
885 }
886 hmfree(map);
887
888 t1 = __rdtsc();
889 measure();
890 length = end();
891 rdtsc_scale = length / (t1-t0) * 1000;
892
893 for (j=first_step; j <= last_step; ++j) {
894 printf("%12.4f,%12d,0,512,0,4\n", (mstats[0][j].end - mstats[0][j].start) * rdtsc_scale, mstats[0][j].table_size);
895 }
896 for (j=first_step; j <= last_step; ++j) {
897 printf("%12.4f,%12d,0,0,512,4\n", (mstats[1][j].end - mstats[1][j].start) * rdtsc_scale, mstats[1][j].table_size);
898 }
899 return total;
900 }
901
902
903
904
main(int arg,char ** argv)905 int main(int arg, char **argv)
906 {
907 int n,s,w;
908 double worst = 0;
909
910 printf("# size_t=%d,", (int) sizeof(size_t));
911
912 // number of cache-lines
913 #ifdef STBDS_SMALL_BUCKET
914 printf("cacheline=%d,", 1);
915 #else
916 printf("cacheline=%d,", sizeof(size_t)==8 ? 2 : 1);
917 #endif
918 #ifdef STBDS_FLUSH_CACHE
919 printf("%d,", (int) stbds_log2(STBDS_FLUSH_CACHE));
920 #else
921 printf("0,");
922 #endif
923 #ifdef STBDS_BUCKET_START // don't bother offseting differently within bucket for different hash values
924 printf("STBDS_BUCKET_START,");
925 #else
926 printf(",");
927 #endif
928 #ifdef STBDS_SIPHASH_2_4
929 printf("STBDS_SIPHASH_2_4,");
930 #else
931 printf(",");
932 #endif
933 printf("\n");
934
935 measure_worst_lookup4_b(0);
936 //measure_worst_lookup4_a(0);
937 measure_average_lookup4(0);
938 measure_uncached_lookup4(0);
939 measure_build4(0);
940 return 0;
941
942 #if 0
943 begin(); for (n=0; n < 2000; ++n) { build_stb(2000,0,0,1); } end(); printf(" // %7.2fms : 2,000 inserts creating 2K table\n", timer);
944 begin(); for (n=0; n < 500; ++n) { build_stb(20000,0,0,1); } end(); printf(" // %7.2fms : 20,000 inserts creating 20K table\n", timer);
945 begin(); for (n=0; n < 100; ++n) { build_stb(200000,0,0,1); } end(); printf(" // %7.2fms : 200,000 inserts creating 200K table\n", timer);
946 begin(); for (n=0; n < 10; ++n) { build_stb(2000000,0,0,1); } end(); printf(" // %7.2fms : 2,000,000 inserts creating 2M table\n", timer);
947 begin(); for (n=0; n < 5; ++n) { build_stb(20000000,0,0,1); } end(); printf(" // %7.2fms : 20,000,000 inserts creating 20M table\n", timer);
948 #endif
949
950 #if 0
951 begin(); for (n=0; n < 2000; ++n) { churn32(2000,0,0,1); } end(); printf(" // %7.2fms : 2,000 inserts creating 2K table w/ 32-byte key\n", timer);
952 begin(); for (n=0; n < 500; ++n) { churn32(20000,0,0,1); } end(); printf(" // %7.2fms : 20,000 inserts creating 20K table w/ 32-byte key\n", timer);
953 begin(); for (n=0; n < 100; ++n) { churn32(200000,0,0,1); } end(); printf(" // %7.2fms : 200,000 inserts creating 200K table w/ 32-byte key\n", timer);
954 begin(); for (n=0; n < 10; ++n) { churn32(2000000,0,0,1); } end(); printf(" // %7.2fms : 2,000,000 inserts creating 2M table w/ 32-byte key\n", timer);
955 begin(); for (n=0; n < 5; ++n) { churn32(20000000,0,0,1); } end(); printf(" // %7.2fms : 20,000,000 inserts creating 20M table w/ 32-byte key\n", timer);
956
957 begin(); for (n=0; n < 2000; ++n) { churn256(2000,0,0,1); } end(); printf(" // %7.2fms : 2,000 inserts creating 2K table w/ 256-byte key\n", timer);
958 begin(); for (n=0; n < 500; ++n) { churn256(20000,0,0,1); } end(); printf(" // %7.2fms : 20,000 inserts creating 20K table w/ 256-byte key\n", timer);
959 begin(); for (n=0; n < 100; ++n) { churn256(200000,0,0,1); } end(); printf(" // %7.2fms : 200,000 inserts creating 200K table w/ 256-byte key\n", timer);
960 begin(); for (n=0; n < 10; ++n) { churn256(2000000,0,0,1); } end(); printf(" // %7.2fms : 2,000,000 inserts creating 2M table w/ 256-byte key\n", timer);
961 begin(); for (n=0; n < 5; ++n) { churn256(20000000,0,0,1); } end(); printf(" // %7.2fms : 20,000,000 inserts creating 20M table w/ 256-byte key\n", timer);
962 #endif
963
964 begin(); for (n=0; n < 20; ++n) { multisearch_stb(2000,0,2000,1,1000); } end(); printf(" // %7.2fms : 2,000,000 hits on 1,000 2K table w/ 4-byte key\n", timer);
965 begin(); for (n=0; n < 10; ++n) { multisearch_stb(20000,0,2000,1,1000); } end(); printf(" // %7.2fms : 2,000,000 hits on 1,000 20K table w/ 4-byte key\n", timer);
966 begin(); for (n=0; n < 6; ++n) { multisearch_stb(200000,0,2000,1,1000); } end(); printf(" // %7.2fms : 2,000,000 hits on 1,000 200K table w/ 4-byte key\n", timer);
967 begin(); for (n=0; n < 2; ++n) { multisearch_stb(2000000,0,20000,1,100); } end(); printf(" // %7.2fms : 2,000,000 hits on 100 2M table w/ 4-byte key\n", timer);
968
969 begin(); for (n=0; n < 20; ++n) { multisearch (2000,0,2000,1,1000); } end(); printf(" // %7.2fms : 2,000,000 hits on 1,000 2K table w/ 4-byte key\n", timer);
970 begin(); for (n=0; n < 10; ++n) { multisearch (20000,0,2000,1,1000); } end(); printf(" // %7.2fms : 2,000,000 hits on 1,000 20K table w/ 4-byte key\n", timer);
971 begin(); for (n=0; n < 6; ++n) { multisearch (200000,0,2000,1,1000); } end(); printf(" // %7.2fms : 2,000,000 hits on 1,000 200K table w/ 4-byte key\n", timer);
972 begin(); for (n=0; n < 2; ++n) { multisearch (2000000,0,20000,1,100); } end(); printf(" // %7.2fms : 2,000,000 hits on 100 2M table w/ 4-byte key\n", timer);
973
974
975 #if 1
976 begin(); for (n=0; n < 2; ++n) { multibuild_stb(2000,0,0,1,10000); } end(); printf(" // %7.2fms : 20,000,000 inserts creating 10,000 2K table w/ 4-byte key\n", timer);
977 begin(); for (n=0; n < 2; ++n) { multibuild_stb(20000,0,0,1,1000); } end(); printf(" // %7.2fms : 20,000,000 inserts creating 1,000 20K table w/ 4-byte key\n", timer);
978 begin(); for (n=0; n < 2; ++n) { multibuild_stb(200000,0,0,1,100); } end(); printf(" // %7.2fms : 20,000,000 inserts creating 100 200K table w/ 4-byte key\n", timer);
979 begin(); for (n=0; n < 2; ++n) { multibuild_stb(2000000,0,0,1,10); } end(); printf(" // %7.2fms : 20,000,000 inserts creating 10 2M table w/ 4-byte key\n", timer);
980
981 begin(); for (n=0; n < 2; ++n) { multichurn4(2000,0,0,1,10000); } end(); printf(" // %7.2fms : 20,000,000 inserts creating 10,000 2K table w/ 4-byte key\n", timer);
982 begin(); for (n=0; n < 2; ++n) { multichurn4(20000,0,0,1,1000); } end(); printf(" // %7.2fms : 20,000,000 inserts creating 1,000 20K table w/ 4-byte key\n", timer);
983 begin(); for (n=0; n < 2; ++n) { multichurn4(200000,0,0,1,100); } end(); printf(" // %7.2fms : 20,000,000 inserts creating 100 200K table w/ 4-byte key\n", timer);
984 begin(); for (n=0; n < 2; ++n) { multichurn4(2000000,0,0,1,10); } end(); printf(" // %7.2fms : 20,000,000 inserts creating 10 2M table w/ 4-byte key\n", timer);
985 #endif
986
987 begin(); for (n=0; n < 2000; ++n) { build(2000,0,0,1); } end(); printf(" // %7.2fms : 2,000 inserts creating 2K table w/ 4-byte key\n", timer);
988 begin(); for (n=0; n < 500; ++n) { build(20000,0,0,1); } end(); printf(" // %7.2fms : 20,000 inserts creating 20K table w/ 4-byte key\n", timer);
989 begin(); for (n=0; n < 100; ++n) { build(200000,0,0,1); } end(); printf(" // %7.2fms : 200,000 inserts creating 200K table w/ 4-byte key\n", timer);
990 begin(); for (n=0; n < 10; ++n) { build(2000000,0,0,1); } end(); printf(" // %7.2fms : 2,000,000 inserts creating 2M table w/ 4-byte key\n", timer);
991 begin(); for (n=0; n < 5; ++n) { build(20000000,0,0,1); } end(); printf(" // %7.2fms : 20,000,000 inserts creating 20M table w/ 4-byte key\n", timer);
992
993 begin(); for (n=0; n < 2000; ++n) { churn8(2000,0,0,1); } end(); printf(" // %7.2fms : 2,000 inserts creating 2K table w/ 8-byte key\n", timer);
994 begin(); for (n=0; n < 500; ++n) { churn8(20000,0,0,1); } end(); printf(" // %7.2fms : 20,000 inserts creating 20K table w/ 8-byte key\n", timer);
995 begin(); for (n=0; n < 100; ++n) { churn8(200000,0,0,1); } end(); printf(" // %7.2fms : 200,000 inserts creating 200K table w/ 8-byte key\n", timer);
996 begin(); for (n=0; n < 10; ++n) { churn8(2000000,0,0,1); } end(); printf(" // %7.2fms : 2,000,000 inserts creating 2M table w/ 8-byte key\n", timer);
997 begin(); for (n=0; n < 5; ++n) { churn8(20000000,0,0,1); } end(); printf(" // %7.2fms : 20,000,000 inserts creating 20M table w/ 8-byte key\n", timer);
998
999 begin(); for (n=0; n < 60; ++n) { churn_skip(2000,2100,5000); } end(); printf(" // %7.2fms : 500,000 inserts & deletes in 2K table\n", timer);
1000 begin(); for (n=0; n < 30; ++n) { churn_skip(20000,21000,500); } end(); printf(" // %7.2fms : 500,000 inserts & deletes in 20K table\n", timer);
1001 begin(); for (n=0; n < 15; ++n) { churn_skip(200000,201000,500); } end(); printf(" // %7.2fms : 500,000 inserts & deletes in 200K table\n", timer);
1002 begin(); for (n=0; n < 8; ++n) { churn_skip(2000000,2001000,500); } end(); printf(" // %7.2fms : 500,000 inserts & deletes in 2M table\n", timer);
1003 begin(); for (n=0; n < 5; ++n) { churn_skip(20000000,20001000,500); } end(); printf(" // %7.2fms : 500,000 inserts & deletes in 20M table\n", timer);
1004 begin(); for (n=0; n < 1; ++n) { churn_skip(200000000u,200001000u,500); } end(); printf(" // %7.2fms : 500,000 inserts & deletes in 200M table\n", timer);
1005 // even though the above measures a roughly fixed amount of work, we still have to build the table n times, hence the fewer measurements each time
1006
1007 begin(); for (n=0; n < 60; ++n) { churn_skip(1000,3000,250); } end(); printf(" // %7.2fms : 500,000 inserts & deletes in 2K table\n", timer);
1008 begin(); for (n=0; n < 15; ++n) { churn_skip(10000,30000,25); } end(); printf(" // %7.2fms : 500,000 inserts & deletes in 20K table\n", timer);
1009 begin(); for (n=0; n < 7; ++n) { churn_skip(100000,300000,10); } end(); printf(" // %7.2fms : 2,000,000 inserts & deletes in 200K table\n", timer);
1010 begin(); for (n=0; n < 2; ++n) { churn_skip(1000000,3000000,10); } end(); printf(" // %7.2fms : 20,000,000 inserts & deletes in 2M table\n", timer);
1011
1012 // search for bad intervals.. in practice this just seems to measure execution variance
1013 for (s = 2; s < 64; ++s) {
1014 begin(); for (n=0; n < 50; ++n) { build(200000,0,0,s); } end();
1015 if (timer > worst) {
1016 worst = timer;
1017 w = s;
1018 }
1019 }
1020 for (; s <= 1024; s *= 2) {
1021 begin(); for (n=0; n < 50; ++n) { build(200000,0,0,s); } end();
1022 if (timer > worst) {
1023 worst = timer;
1024 w = s;
1025 }
1026 }
1027 printf(" // %7.2fms(%d) : Worst time from inserting 200,000 items with spacing %d.\n", worst, w, w);
1028
1029 return 0;
1030 }
1031 #endif
1032