1 /*
2   Copyright 2011-2014 David Robillard <http://drobilla.net>
3 
4   Permission to use, copy, modify, and/or distribute this software for any
5   purpose with or without fee is hereby granted, provided that the above
6   copyright notice and this permission notice appear in all copies.
7 
8   THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9   WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10   MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11   ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
16 
17 #include "bench.h"
18 
19 #include <ctype.h>
20 #include <stdarg.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24 
25 #include <glib.h>
26 
27 #include "zix/ampatree.h"
28 #include "zix/chunk.h"
29 #include "zix/hash.h"
30 #include "zix/patree.h"
31 #include "zix/trie.h"
32 
33 const unsigned seed = 1;
34 
35 static int
test_fail(const char * fmt,...)36 test_fail(const char* fmt, ...)
37 {
38 	va_list args;
39 	va_start(args, fmt);
40 	fprintf(stderr, "error: ");
41 	vfprintf(stderr, fmt, args);
42 	va_end(args);
43 	return 1;
44 }
45 
46 int
main(int argc,char ** argv)47 main(int argc, char** argv)
48 {
49 	if (argc != 2) {
50 		return test_fail("Usage: %s INPUT_FILE\n", argv[0]);
51 	}
52 
53 	const char* file = argv[1];
54 	FILE*       fd   = fopen(file, "r");
55 	if (!fd) {
56 		return test_fail("Failed to open file %s\n", file);
57 	}
58 
59 	size_t max_n_strings = 100000000;
60 
61 	/* Read input strings */
62 	char**  strings      = NULL;
63 	size_t* lengths      = NULL;
64 	size_t  n_strings    = 0;
65 	char*   buf          = calloc(1, 1);
66 	size_t  buf_len      = 1;
67 	size_t  this_str_len = 0;
68 	for (char c; (c = fgetc(fd)) != EOF;) {
69 		if (isspace(c)) {
70 			if (this_str_len == 0) {
71 				continue;
72 			}
73 			strings = realloc(strings, (n_strings + 1) * sizeof(char*));
74 			lengths = realloc(lengths, (n_strings + 1) * sizeof(size_t));
75 			strings[n_strings] = malloc(buf_len);
76 			lengths[n_strings] = this_str_len;
77 			memcpy(strings[n_strings], buf, buf_len);
78 			this_str_len = 0;
79 			if (++n_strings == max_n_strings) {
80 				break;
81 			}
82 		} else {
83 			++this_str_len;
84 			if (buf_len < this_str_len + 1) {
85 				buf_len = this_str_len + 1;
86 				buf = realloc(buf, buf_len);
87 			}
88 			buf[this_str_len - 1] = c;
89 			buf[this_str_len] = '\0';
90 		}
91 	}
92 
93 	fclose(fd);
94 
95 	FILE* insert_dat = fopen("dict_insert.txt", "w");
96 	FILE* search_dat = fopen("dict_search.txt", "w");
97 	fprintf(insert_dat, "# n\tGHashTable\tZixHash\tZixPatree\tZixTrie\tZixAMPatree\n");
98 	fprintf(search_dat, "# n\tGHashTable\tZixHash\tZixPatree\tZixTrie\tZixAMPatree\n");
99 
100 	for (size_t n = n_strings / 16; n <= n_strings; n *= 2) {
101 		printf("Benchmarking n = %zu\n", n);
102 		ZixPatree*   patree   = zix_patree_new();
103 		ZixAMPatree* ampatree = zix_ampatree_new();
104 		ZixTrie*     trie     = zix_trie_new();
105 		GHashTable*  hash     = g_hash_table_new(g_str_hash, g_str_equal);
106 		ZixHash*     zhash    = zix_hash_new((ZixHashFunc)zix_chunk_hash,
107 		                                     (ZixEqualFunc)zix_chunk_equal,
108 		                                     sizeof(ZixChunk));
109 		fprintf(insert_dat, "%zu", n);
110 		fprintf(search_dat, "%zu", n);
111 
112 		// Benchmark insertion
113 
114 		// GHashTable
115 		struct timespec insert_start = bench_start();
116 		for (size_t i = 0; i < n; ++i) {
117 			g_hash_table_insert(hash, strings[i], strings[i]);
118 		}
119 		fprintf(insert_dat, "\t%lf", bench_end(&insert_start));
120 
121 		// ZixHash
122 		insert_start = bench_start();
123 		for (size_t i = 0; i < n; ++i) {
124 			const ZixChunk chunk = { strings[i], lengths[i] + 1 };
125 			ZixStatus st = zix_hash_insert(zhash, &chunk, NULL);
126 			if (st && st != ZIX_STATUS_EXISTS) {
127 				return test_fail("Failed to insert `%s'\n", strings[i]);
128 			}
129 		}
130 		fprintf(insert_dat, "\t%lf", bench_end(&insert_start));
131 
132 		// ZixPatree
133 		insert_start = bench_start();
134 		for (size_t i = 0; i < n; ++i) {
135 			ZixStatus st = zix_patree_insert(patree, strings[i], lengths[i]);
136 			if (st && st != ZIX_STATUS_EXISTS) {
137 				return test_fail("Failed to insert `%s'\n", strings[i]);
138 			}
139 		}
140 		fprintf(insert_dat, "\t%lf", bench_end(&insert_start));
141 
142 		// ZixTrie
143 		insert_start = bench_start();
144 		for (size_t i = 0; i < n; ++i) {
145 			ZixStatus st = zix_trie_insert(trie, strings[i], lengths[i]);
146 			if (st && st != ZIX_STATUS_EXISTS) {
147 				return test_fail("Failed to insert `%s'\n", strings[i]);
148 			}
149 		}
150 		fprintf(insert_dat, "\t%lf", bench_end(&insert_start));
151 
152 		// ZixAMPatree
153 		insert_start = bench_start();
154 		for (size_t i = 0; i < n; ++i) {
155 			ZixStatus st = zix_ampatree_insert(ampatree, strings[i], lengths[i]);
156 			if (st && st != ZIX_STATUS_EXISTS) {
157 				return test_fail("Failed to insert `%s'\n", strings[i]);
158 			}
159 		}
160 		fprintf(insert_dat, "\t%lf\n", bench_end(&insert_start));
161 
162 		// Benchmark search
163 
164 		// GHashTable
165 		srand(seed);
166 		struct timespec search_start = bench_start();
167 		for (size_t i = 0; i < n; ++i) {
168 			const size_t index = rand() % n;
169 			char* match = g_hash_table_lookup(hash, strings[index]);
170 			if (strcmp(match, strings[index])) {
171 				return test_fail("Bad match for `%s'\n", strings[index]);
172 			}
173 		}
174 		fprintf(search_dat, "\t%lf", bench_end(&search_start));
175 
176 		// ZixHash
177 		srand(seed);
178 		search_start = bench_start();
179 		for (size_t i = 0; i < n; ++i) {
180 			const size_t    index = rand() % n;
181 			const ZixChunk  key   = { strings[index], lengths[index] + 1 };
182 			const ZixChunk* match = NULL;
183 			if (!(match = zix_hash_find(zhash, &key))) {
184 				return test_fail("Hash: Failed to find `%s'\n", strings[index]);
185 			}
186 			if (strcmp(match->buf, strings[index])) {
187 				return test_fail("Hash: Bad match %p for `%s': `%s'\n",
188 				                 (void*)match, strings[index], match->buf);
189 			}
190 		}
191 		fprintf(search_dat, "\t%lf", bench_end(&search_start));
192 
193 		// ZixPatree
194 		srand(seed);
195 		search_start = bench_start();
196 		for (size_t i = 0; i < n; ++i) {
197 			const size_t index = rand() % n;
198 			const char* match = NULL;
199 			if (zix_patree_find(patree, strings[index], &match)) {
200 				return test_fail("Patree: Failed to find `%s'\n", strings[index]);
201 			}
202 			if (strcmp(match, strings[index])) {
203 				return test_fail("Patree: Bad match for `%s'\n", strings[index]);
204 			}
205 		}
206 		fprintf(search_dat, "\t%lf", bench_end(&search_start));
207 
208 		// ZixTrie
209 		srand(seed);
210 		search_start = bench_start();
211 		for (size_t i = 0; i < n; ++i) {
212 			const size_t index = rand() % n;
213 			const char* match = NULL;
214 			if (zix_trie_find(trie, strings[index], &match)) {
215 				return test_fail("Trie: Failed to find `%s'\n", strings[index]);
216 			}
217 			if (strcmp(match, strings[index])) {
218 				return test_fail("Trie: Bad match `%s' for `%s'\n",
219 				                 match, strings[index]);
220 			}
221 		}
222 		fprintf(search_dat, "\t%lf", bench_end(&search_start));
223 
224 		// ZixAMPatree
225 		srand(seed);
226 		search_start = bench_start();
227 		for (size_t i = 0; i < n; ++i) {
228 			const size_t index = rand() % n;
229 			const char* match = NULL;
230 			if (zix_ampatree_find(ampatree, strings[index], &match)) {
231 				return test_fail("AMPatree: Failed to find `%s'\n", strings[index]);
232 			}
233 			if (strcmp(match, strings[index])) {
234 				return test_fail("AMPatree: Bad match `%s' for `%s'\n",
235 				                 match, strings[index]);
236 			}
237 		}
238 		fprintf(search_dat, "\t%lf\n", bench_end(&search_start));
239 
240 		zix_patree_free(patree);
241 		zix_ampatree_free(ampatree);
242 		zix_trie_free(trie);
243 		zix_hash_free(zhash);
244 		g_hash_table_unref(hash);
245 	}
246 
247 	fclose(insert_dat);
248 	fclose(search_dat);
249 
250 	fprintf(stderr, "Wrote dict_insert.txt dict_search.txt\n");
251 
252 	return 0;
253 }
254