1 /* Copyright (c) 2006-2018 Dovecot authors, see the included COPYING file */
2
3 #include "lib.h"
4 #include "array.h"
5 #include "file-lock.h"
6 #include "istream.h"
7 #include "time-util.h"
8 #include "unichar.h"
9 #include "squat-trie.h"
10 #include "squat-uidlist.h"
11
12 #include <stdio.h>
13 #include <unistd.h>
14 #include <fcntl.h>
15 #include <time.h>
16 #include <sys/time.h>
17
result_print(ARRAY_TYPE (seq_range)* result)18 static void result_print(ARRAY_TYPE(seq_range) *result)
19 {
20 const struct seq_range *range;
21 unsigned int i, count;
22
23 range = array_get(result, &count);
24 for (i = 0; i < count; i++) {
25 if (i != 0)
26 printf(",");
27 printf("%u", range[i].seq1);
28 if (range[i].seq1 != range[i].seq2)
29 printf("-%u", range[i].seq2);
30 }
31 printf("\n");
32 }
33
main(int argc ATTR_UNUSED,char * argv[])34 int main(int argc ATTR_UNUSED, char *argv[])
35 {
36 const char *trie_path = "/tmp/squat-test-index.search";
37 const char *uidlist_path = "/tmp/squat-test-index.search.uids";
38 struct squat_trie *trie;
39 struct squat_trie_build_context *build_ctx;
40 struct istream *input;
41 struct stat trie_st, uidlist_st;
42 ARRAY_TYPE(seq_range) definite_uids, maybe_uids;
43 char *line, *str, buf[4096];
44 buffer_t *valid;
45 int ret, fd;
46 unsigned int last = 0, seq = 1, node_count, uidlist_count;
47 size_t len;
48 enum squat_index_type index_type;
49 bool data_header = TRUE, first = TRUE, skip_body = FALSE;
50 bool mime_header = TRUE;
51 size_t trie_mem, uidlist_mem;
52 clock_t clock_start, clock_end;
53 struct timeval tv_start, tv_end;
54 double cputime;
55
56 lib_init();
57 i_unlink_if_exists(trie_path);
58 i_unlink_if_exists(uidlist_path);
59 trie = squat_trie_init(trie_path, time(NULL),
60 FILE_LOCK_METHOD_FCNTL, 0, 0600, (gid_t)-1);
61
62 clock_start = clock();
63 i_gettimeofday(&tv_start);
64
65 fd = open(argv[1], O_RDONLY);
66 if (fd == -1)
67 return 1;
68
69 if (squat_trie_build_init(trie, &build_ctx) < 0)
70 return 1;
71
72 valid = buffer_create_dynamic(default_pool, 4096);
73 input = i_stream_create_fd(fd, SIZE_MAX);
74 ret = 0;
75 while (ret == 0 && (line = i_stream_read_next_line(input)) != NULL) {
76 if (last != input->v_offset/(1024*100)) {
77 fprintf(stderr, "\r%ukB", (unsigned)(input->v_offset/1024));
78 fflush(stderr);
79 last = input->v_offset/(1024*100);
80 }
81 if (str_begins(line, "From ")) {
82 if (!first)
83 seq++;
84 data_header = TRUE;
85 skip_body = FALSE;
86 mime_header = TRUE;
87 continue;
88 }
89 first = FALSE;
90
91 if (str_begins(line, "--")) {
92 skip_body = FALSE;
93 mime_header = TRUE;
94 }
95
96 if (mime_header) {
97 if (*line == '\0') {
98 data_header = FALSE;
99 mime_header = FALSE;
100 continue;
101 }
102
103 if (strncasecmp(line, "Content-Type:", 13) == 0 &&
104 strncasecmp(line, "Content-Type: text/", 19) != 0 &&
105 strncasecmp(line, "Content-Type: message/", 22) != 0)
106 skip_body = TRUE;
107 else if (strncasecmp(line, "Content-Transfer-Encoding: base64", 33) == 0)
108 skip_body = TRUE;
109 } else if (skip_body)
110 continue;
111 if (*line == '\0')
112 continue;
113
114 /* we're actually indexing here headers as bodies and bodies
115 as headers. it doesn't really matter in this test, and
116 fixing it would require storing headers temporarily
117 elsewhere and index them only after the body */
118 index_type = !data_header ? SQUAT_INDEX_TYPE_HEADER :
119 SQUAT_INDEX_TYPE_BODY;
120
121 buffer_set_used_size(valid, 0);
122 len = strlen(line);
123 if (uni_utf8_get_valid_data((const unsigned char *)line,
124 len, valid)) {
125 ret = squat_trie_build_more(build_ctx, seq, index_type,
126 (const void *)line, len);
127 } else if (valid->used > 0) {
128 ret = squat_trie_build_more(build_ctx, seq, index_type,
129 valid->data, valid->used);
130 }
131 }
132 buffer_free(&valid);
133 if (squat_trie_build_deinit(&build_ctx, NULL) < 0)
134 ret = -1;
135 if (ret < 0) {
136 printf("build broken\n");
137 return 1;
138 }
139
140 clock_end = clock();
141 i_gettimeofday(&tv_end);
142
143 cputime = (double)(clock_end - clock_start) / CLOCKS_PER_SEC;
144 fprintf(stderr, "\n - Index time: %.2f CPU seconds, "
145 "%.2f real seconds (%.02fMB/CPUs)\n", cputime,
146 timeval_diff_msecs(&tv_end, &tv_start)/1000.0,
147 input->v_offset / cputime / (1024*1024));
148
149 if (stat(trie_path, &trie_st) < 0)
150 i_error("stat(%s) failed: %m", trie_path);
151 if (stat(uidlist_path, &uidlist_st) < 0)
152 i_error("stat(%s) failed: %m", uidlist_path);
153
154 trie_mem = squat_trie_mem_used(trie, &node_count);
155 uidlist_mem = squat_uidlist_mem_used(squat_trie_get_uidlist(trie),
156 &uidlist_count);
157 fprintf(stderr, " - memory: %uk for trie, %uk for uidlist\n",
158 (unsigned)(trie_mem/1024), (unsigned)(uidlist_mem/1024));
159 fprintf(stderr, " - %"PRIuUOFF_T" bytes in %u nodes (%.02f%%)\n",
160 trie_st.st_size, node_count,
161 trie_st.st_size / (float)input->v_offset * 100.0);
162 fprintf(stderr, " - %"PRIuUOFF_T" bytes in %u UID lists (%.02f%%)\n",
163 uidlist_st.st_size, uidlist_count,
164 uidlist_st.st_size / (float)input->v_offset * 100.0);
165 fprintf(stderr, " - %"PRIuUOFF_T" bytes total of %"
166 PRIuUOFF_T" (%.02f%%)\n",
167 (trie_st.st_size + uidlist_st.st_size), input->v_offset,
168 (trie_st.st_size + uidlist_st.st_size) /
169 (float)input->v_offset * 100.0);
170
171 i_stream_unref(&input);
172 i_close_fd(&fd);
173
174 i_array_init(&definite_uids, 128);
175 i_array_init(&maybe_uids, 128);
176 while ((str = fgets(buf, sizeof(buf), stdin)) != NULL) {
177 ret = strlen(str)-1;
178 str[ret] = 0;
179
180 i_gettimeofday(&tv_start);
181 ret = squat_trie_lookup(trie, str, SQUAT_INDEX_TYPE_HEADER |
182 SQUAT_INDEX_TYPE_BODY,
183 &definite_uids, &maybe_uids);
184 if (ret < 0)
185 printf("error\n");
186 else {
187 i_gettimeofday(&tv_end);
188 printf(" - Search took %.05f CPU seconds\n",
189 timeval_diff_usecs(&tv_end, &tv_start)/1000000.0);
190 printf(" - definite uids: ");
191 result_print(&definite_uids);
192 printf(" - maybe uids: ");
193 result_print(&maybe_uids);
194 }
195 }
196 return 0;
197 }
198