1 /* Simple speed tests for a hash of strings. */
2 #include <ccan/htable/htable_type.h>
3 #include <ccan/htable/htable.c>
4 #include <ccan/tal/str/str.h>
5 #include <ccan/tal/grab_file/grab_file.h>
6 #include <ccan/tal/tal.h>
7 #include <ccan/hash/hash.h>
8 #include <ccan/time/time.h>
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <time.h>
13 #include <unistd.h>
14 #include <sys/time.h>
15 
16 static size_t hashcount;
17 
strkey(const char * str)18 static const char *strkey(const char *str)
19 {
20 	return str;
21 }
22 
hash_str(const char * key)23 static size_t hash_str(const char *key)
24 {
25 	hashcount++;
26 	return hash(key, strlen(key), 0);
27 }
28 
cmp(const char * obj,const char * key)29 static bool cmp(const char *obj, const char *key)
30 {
31 	return strcmp(obj, key) == 0;
32 }
33 
34 HTABLE_DEFINE_TYPE(char, strkey, hash_str, cmp, htable_str);
35 
36 /* Nanoseconds per operation */
normalize(const struct timeabs * start,const struct timeabs * stop,unsigned int num)37 static size_t normalize(const struct timeabs *start,
38 			const struct timeabs *stop,
39 			unsigned int num)
40 {
41 	return time_to_nsec(time_divide(time_between(*stop, *start), num));
42 }
43 
main(int argc,char * argv[])44 int main(int argc, char *argv[])
45 {
46 	size_t i, j, num;
47 	struct timeabs start, stop;
48 	struct htable_str ht;
49 	char **words, **misswords;
50 
51 	words = tal_strsplit(NULL, grab_file(NULL,
52 					     argv[1] ? argv[1] : "/usr/share/dict/words"), "\n",
53 			     STR_NO_EMPTY);
54 	htable_str_init(&ht);
55 	num = tal_count(words) - 1;
56 	/* Note that on my system, num is just > 98304, where we double! */
57 	printf("%zu words\n", num);
58 
59 	/* Append and prepend last char for miss testing. */
60 	misswords = tal_arr(words, char *, num);
61 	for (i = 0; i < num; i++) {
62 		char lastc;
63 		if (strlen(words[i]))
64 			lastc = words[i][strlen(words[i])-1];
65 		else
66 			lastc = 'z';
67 		misswords[i] = tal_fmt(misswords, "%c%s%c%c",
68 				       lastc, words[i], lastc, lastc);
69 	}
70 
71 	printf("#01: Initial insert: ");
72 	fflush(stdout);
73 	start = time_now();
74 	for (i = 0; i < num; i++)
75 		htable_str_add(&ht, words[i]);
76 	stop = time_now();
77 	printf(" %zu ns\n", normalize(&start, &stop, num));
78 
79 	printf("Bytes allocated: %zu\n",
80 	       sizeof(ht.raw.table[0]) << ht.raw.bits);
81 
82 	printf("#02: Initial lookup (match): ");
83 	fflush(stdout);
84 	start = time_now();
85 	for (i = 0; i < num; i++)
86 		if (htable_str_get(&ht, words[i]) != words[i])
87 			abort();
88 	stop = time_now();
89 	printf(" %zu ns\n", normalize(&start, &stop, num));
90 
91 	printf("#03: Initial lookup (miss): ");
92 	fflush(stdout);
93 	start = time_now();
94 	for (i = 0; i < num; i++) {
95 		if (htable_str_get(&ht, misswords[i]))
96 			abort();
97 	}
98 	stop = time_now();
99 	printf(" %zu ns\n", normalize(&start, &stop, num));
100 
101 	/* Lookups in order are very cache-friendly for judy; try random */
102 	printf("#04: Initial lookup (random): ");
103 	fflush(stdout);
104 	start = time_now();
105 	for (i = 0, j = 0; i < num; i++, j = (j + 10007) % num)
106 		if (htable_str_get(&ht, words[j]) != words[j])
107 			abort();
108 	stop = time_now();
109 	printf(" %zu ns\n", normalize(&start, &stop, num));
110 
111 	hashcount = 0;
112 	printf("#05: Initial delete all: ");
113 	fflush(stdout);
114 	start = time_now();
115 	for (i = 0; i < num; i++)
116 		if (!htable_str_del(&ht, words[i]))
117 			abort();
118 	stop = time_now();
119 	printf(" %zu ns\n", normalize(&start, &stop, num));
120 
121 	printf("#06: Initial re-inserting: ");
122 	fflush(stdout);
123 	start = time_now();
124 	for (i = 0; i < num; i++)
125 		htable_str_add(&ht, words[i]);
126 	stop = time_now();
127 	printf(" %zu ns\n", normalize(&start, &stop, num));
128 
129 	hashcount = 0;
130 	printf("#07: Deleting first half: ");
131 	fflush(stdout);
132 	start = time_now();
133 	for (i = 0; i < num; i+=2)
134 		if (!htable_str_del(&ht, words[i]))
135 			abort();
136 	stop = time_now();
137 	printf(" %zu ns\n", normalize(&start, &stop, num));
138 
139 	printf("#08: Adding (a different) half: ");
140 	fflush(stdout);
141 
142 	start = time_now();
143 	for (i = 0; i < num; i+=2)
144 		htable_str_add(&ht, misswords[i]);
145 	stop = time_now();
146 	printf(" %zu ns\n", normalize(&start, &stop, num));
147 
148 	printf("#09: Lookup after half-change (match): ");
149 	fflush(stdout);
150 	start = time_now();
151 	for (i = 1; i < num; i+=2)
152 		if (htable_str_get(&ht, words[i]) != words[i])
153 			abort();
154 	for (i = 0; i < num; i+=2) {
155 		if (htable_str_get(&ht, misswords[i]) != misswords[i])
156 			abort();
157 	}
158 	stop = time_now();
159 	printf(" %zu ns\n", normalize(&start, &stop, num));
160 
161 	printf("#10: Lookup after half-change (miss): ");
162 	fflush(stdout);
163 	start = time_now();
164 	for (i = 0; i < num; i+=2)
165 		if (htable_str_get(&ht, words[i]))
166 			abort();
167 	for (i = 1; i < num; i+=2) {
168 		if (htable_str_get(&ht, misswords[i]))
169 			abort();
170 	}
171 	stop = time_now();
172 	printf(" %zu ns\n", normalize(&start, &stop, num));
173 
174 	/* Hashtables with delete markers can fill with markers over time.
175 	 * so do some changes to see how it operates in long-term. */
176 	printf("#11: Churn 1: ");
177 	start = time_now();
178 	for (j = 0; j < num; j+=2) {
179 		if (!htable_str_del(&ht, misswords[j]))
180 			abort();
181 		if (!htable_str_add(&ht, words[j]))
182 			abort();
183 	}
184 	stop = time_now();
185 	printf(" %zu ns\n", normalize(&start, &stop, num));
186 
187 	printf("#12: Churn 2: ");
188 	start = time_now();
189 	for (j = 1; j < num; j+=2) {
190 		if (!htable_str_del(&ht, words[j]))
191 			abort();
192 		if (!htable_str_add(&ht, misswords[j]))
193 			abort();
194 	}
195 	stop = time_now();
196 	printf(" %zu ns\n", normalize(&start, &stop, num));
197 
198 	printf("#13: Churn 3: ");
199 	start = time_now();
200 	for (j = 1; j < num; j+=2) {
201 		if (!htable_str_del(&ht, misswords[j]))
202 			abort();
203 		if (!htable_str_add(&ht, words[j]))
204 			abort();
205 	}
206 	stop = time_now();
207 	printf(" %zu ns\n", normalize(&start, &stop, num));
208 
209 	/* Now it's back to normal... */
210 	printf("#14: Post-Churn lookup (match): ");
211 	fflush(stdout);
212 	start = time_now();
213 	for (i = 0; i < num; i++)
214 		if (htable_str_get(&ht, words[i]) != words[i])
215 			abort();
216 	stop = time_now();
217 	printf(" %zu ns\n", normalize(&start, &stop, num));
218 
219 	printf("#15: Post-Churn lookup (miss): ");
220 	fflush(stdout);
221 	start = time_now();
222 	for (i = 0; i < num; i++) {
223 		if (htable_str_get(&ht, misswords[i]))
224 			abort();
225 	}
226 	stop = time_now();
227 	printf(" %zu ns\n", normalize(&start, &stop, num));
228 
229 	/* Lookups in order are very cache-friendly for judy; try random */
230 	printf("#16: Post-Churn lookup (random): ");
231 	fflush(stdout);
232 	start = time_now();
233 	for (i = 0, j = 0; i < num; i++, j = (j + 10007) % num)
234 		if (htable_str_get(&ht, words[j]) != words[j])
235 			abort();
236 	stop = time_now();
237 	printf(" %zu ns\n", normalize(&start, &stop, num));
238 
239 	return 0;
240 }
241