1 /*
2  * This is an optimized version of the following C++ program:
3  *
4  *   http://keithlea.com/javabench/src/cpp/hash.cpp
5  *
6  * Keith in his benchmark (http://keithlea.com/javabench/data) showed that the
7  * Java implementation is twice as fast as the C++ version. In fact, this is
8  * only because the C++ implementation is substandard. Most importantly, Keith
9  * is using "sprintf()" to convert an integer to a string, which is known to be
10  * extremely inefficient.
11  */
12 #include <stdio.h>
13 #include "khash.h"
KHASH_MAP_INIT_STR(str,int)14 KHASH_MAP_INIT_STR(str, int)
15 
16 inline void int2str(int c, int base, char *ret)
17 {
18 	const char *tab = "0123456789abcdef";
19 	if (c == 0) ret[0] = '0', ret[1] = 0;
20 	else {
21 		int l, x, y;
22 		char buf[16];
23 		for (l = 0, x = c < 0? -c : c; x > 0; x /= base) buf[l++] = tab[x%base];
24 		if (c < 0) buf[l++] = '-';
25 		for (x = l - 1, y = 0; x >= 0; --x) ret[y++] = buf[x];
26 		ret[y] = 0;
27 	}
28 }
29 
30 #ifndef _USE_STRDUP
31 #define BLOCK_SIZE 0x100000
main(int argc,char * argv[])32 int main(int argc, char *argv[])
33 {
34 	char **mem = 0;
35 	int i, l, n = 1000000, ret, block_end = 0, curr = 0, c = 0;
36 	khash_t(str) *h;
37 	h = kh_init(str);
38 	if (argc > 1) n = atoi(argv[1]);
39 	mem = malloc(sizeof(void*));
40 	mem[0] = malloc(BLOCK_SIZE); // memory buffer to avoid memory fragmentation
41 	curr = block_end = 0;
42 	for (i = 1; i <= n; ++i) {
43 		char buf[16];
44 		int2str(i, 16, buf);
45 		khint_t k = kh_put(str, h, buf, &ret);
46 		l = strlen(buf) + 1;
47 		if (block_end + l > BLOCK_SIZE) {
48 			++curr; block_end = 0;
49 			mem = realloc(mem, (curr + 1) * sizeof(void*));
50 			mem[curr] = malloc(BLOCK_SIZE);
51 		}
52 		memcpy(mem[curr] + block_end, buf, l);
53 		kh_key(h, k) = mem[curr] + block_end;
54 		block_end += l;
55 		kh_val(h, k) = i;
56 	}
57 	for (i = 1; i <= n; ++i) {
58 		char buf[16];
59 		int2str(i, 10, buf);
60 		khint_t k = kh_get(str, h, buf);
61 		if (k != kh_end(h)) ++c;
62 	}
63 	printf("%d\n", c);
64 	for (ret = 0; ret <= curr; ++ret) free(mem[ret]);
65 	free(mem);
66 	kh_destroy(str, h);
67 	return 0;
68 }
69 #else // _USE_STRDUP
main(int argc,char * argv[])70 int main(int argc, char *argv[])
71 {
72 	int i, l, n = 1000000, ret, c = 0;
73 	khash_t(str) *h;
74 	khint_t k;
75 	h = kh_init(str);
76 	if (argc > 1) n = atoi(argv[1]);
77 	for (i = 1; i <= n; ++i) {
78 		char buf[16];
79 		int2str(i, 16, buf);
80 		k = kh_put(str, h, strdup(buf), &ret);
81 		kh_val(h, k) = i;
82 	}
83 	for (i = 1; i <= n; ++i) {
84 		char buf[16];
85 		int2str(i, 10, buf);
86 		k = kh_get(str, h, buf);
87 		if (k != kh_end(h)) ++c;
88 	}
89 	for (k = kh_begin(h); k != kh_end(h); ++k) // explicitly freeing memory takes 10-20% CPU time.
90 		if (kh_exist(h, k)) free((char*)kh_key(h, k));
91 	printf("%d\n", c);
92 	kh_destroy(str, h);
93 	return 0;
94 }
95 #endif
96