1 /*
2 mairix - message index builder and finder for maildir folders.
3
4 **********************************************************************
5 * Copyright (C) Richard P. Curnow 2002-2004, 2005
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of version 2 of the GNU General Public License as
9 * published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 **********************************************************************
21 */
22
23 /* Functions for handling tokens */
24
25 #include <assert.h>
26 #include <ctype.h>
27 #include "mairix.h"
28
init_matches(struct matches * m)29 static void init_matches(struct matches *m) {/*{{{*/
30 m->msginfo = NULL;
31 m->n = 0;
32 m->max = 0;
33 m->highest = 0;
34 }
35 /*}}}*/
new_token(void)36 struct token *new_token(void)/*{{{*/
37 {
38 struct token *result = new(struct token);
39 result->text = NULL;
40 init_matches(&result->match0);
41 return result;
42 }
43 /*}}}*/
new_token2(void)44 struct token2 *new_token2(void)/*{{{*/
45 {
46 struct token2 *result = new(struct token2);
47 result->text = NULL;
48 init_matches(&result->match0);
49 init_matches(&result->match1);
50 return result;
51 }
52 /*}}}*/
free_token(struct token * x)53 void free_token(struct token *x)/*{{{*/
54 {
55 if (x->text) free(x->text);
56 if (x->match0.msginfo) free(x->match0.msginfo);
57 free(x);
58 }
59 /*}}}*/
free_token2(struct token2 * x)60 void free_token2(struct token2 *x)/*{{{*/
61 {
62 if (x->text) free(x->text);
63 if (x->match0.msginfo) free(x->match0.msginfo);
64 if (x->match1.msginfo) free(x->match1.msginfo);
65 free(x);
66 }
67 /*}}}*/
new_toktable(void)68 struct toktable *new_toktable(void)/*{{{*/
69 {
70 struct toktable *result = new(struct toktable);
71 result->tokens = NULL;
72 result->n = 0;
73 result->hwm = 0;
74 result->size = 0;
75 return result;
76 }
77 /*}}}*/
new_toktable2(void)78 struct toktable2 *new_toktable2(void)/*{{{*/
79 {
80 struct toktable2 *result = new(struct toktable2);
81 result->tokens = NULL;
82 result->n = 0;
83 result->hwm = 0;
84 result->size = 0;
85 return result;
86 }
87 /*}}}*/
free_toktable(struct toktable * x)88 void free_toktable(struct toktable *x)/*{{{*/
89 {
90 if (x->tokens) {
91 int i;
92 for (i=0; i<x->size; i++) {
93 if (x->tokens[i]) {
94 free_token(x->tokens[i]);
95 }
96 }
97 free(x->tokens);
98 }
99 free(x);
100 }
101 /*}}}*/
free_toktable2(struct toktable2 * x)102 void free_toktable2(struct toktable2 *x)/*{{{*/
103 {
104 if (x->tokens) {
105 int i;
106 for (i=0; i<x->size; i++) {
107 if (x->tokens[i]) {
108 free_token2(x->tokens[i]);
109 }
110 }
111 free(x->tokens);
112 }
113 free(x);
114 }
115 /*}}}*/
116 /* FIXME : This stuff really needs cleaning up. */
enlarge_toktable(struct toktable * table)117 static void enlarge_toktable(struct toktable *table)/*{{{*/
118 {
119 if (table->size == 0) {
120 int i;
121 /* initial allocation */
122 table->size = 1024;
123 table->mask = table->size - 1;
124 table->tokens = new_array(struct token *, table->size);
125 for (i=0; i<table->size; i++) {
126 table->tokens[i] = NULL;
127 }
128 } else {
129 struct token **old_tokens;
130 int old_size = table->size;
131 int i;
132 /* reallocate */
133 old_tokens = table->tokens;
134 table->size <<= 1;
135 table->mask = table->size - 1;
136 table->tokens = new_array(struct token *, table->size);
137 for (i=0; i<table->size; i++) {
138 table->tokens[i] = NULL;
139 }
140 for (i=0; i<old_size; i++) {
141 unsigned long new_index;
142 if (old_tokens[i]) {
143 new_index = old_tokens[i]->hashval & table->mask;
144 while (table->tokens[new_index]) {
145 new_index++;
146 new_index &= table->mask;
147 }
148 table->tokens[new_index] = old_tokens[i];
149 }
150 }
151 free(old_tokens);
152 }
153 table->hwm = (table->size >> 2) + (table->size >> 3); /* allow 3/8 of nodes to be used */
154 }
155 /*}}}*/
enlarge_toktable2(struct toktable2 * table)156 static void enlarge_toktable2(struct toktable2 *table)/*{{{*/
157 {
158 if (table->size == 0) {
159 int i;
160 /* initial allocation */
161 table->size = 1024;
162 table->mask = table->size - 1;
163 table->tokens = new_array(struct token2 *, table->size);
164 for (i=0; i<table->size; i++) {
165 table->tokens[i] = NULL;
166 }
167 } else {
168 struct token2 **old_tokens;
169 int old_size = table->size;
170 int i;
171 /* reallocate */
172 old_tokens = table->tokens;
173 table->size <<= 1;
174 table->mask = table->size - 1;
175 table->tokens = new_array(struct token2 *, table->size);
176 for (i=0; i<table->size; i++) {
177 table->tokens[i] = NULL;
178 }
179 for (i=0; i<old_size; i++) {
180 unsigned long new_index;
181 if (old_tokens[i]) {
182 new_index = old_tokens[i]->hashval & table->mask;
183 while (table->tokens[new_index]) {
184 new_index++;
185 new_index &= table->mask;
186 }
187 table->tokens[new_index] = old_tokens[i];
188 }
189 }
190 free(old_tokens);
191 }
192 table->hwm = (table->size >> 2) + (table->size >> 3); /* allow 3/8 of nodes to be used */
193 }
194 /*}}}*/
insert_value(unsigned char * x,int val)195 static int insert_value(unsigned char *x, int val)/*{{{*/
196 {
197 assert(val >= 0);
198 if (val <= 127) {
199 *x = val;
200 return 1;
201 } else if (val <= 16383) {
202 *x++ = (val >> 8) | 0x80;
203 *x = (val & 0xff);
204 return 2;
205 } else {
206 int a = (val >> 24);
207 assert (a <= 63);
208 *x++ = a | 0xc0;
209 *x++ = ((val >> 16) & 0xff);
210 *x++ = ((val >> 8) & 0xff);
211 *x = (val & 0xff);
212 return 4;
213 }
214 }
215 /*}}}*/
check_and_enlarge_encoding(struct matches * m)216 void check_and_enlarge_encoding(struct matches *m)/*{{{*/
217 {
218 if (m->n + 4 >= m->max) {
219 if (m->max == 0) {
220 m->max = 16;
221 } else {
222 m->max += (m->max >> 1);
223 }
224 m->msginfo = grow_array(unsigned char, m->max, m->msginfo);
225 }
226 }
227 /*}}}*/
insert_index_on_encoding(struct matches * m,int idx)228 void insert_index_on_encoding(struct matches *m, int idx)/*{{{*/
229 {
230 if (m->n == 0) {
231 /* Always encode value */
232 m->n += insert_value(m->msginfo + m->n, idx);
233 } else {
234 assert(idx >= m->highest);
235 if (idx > m->highest) {
236 int increment = idx - m->highest;
237 m->n += insert_value(m->msginfo + m->n, increment);
238 } else {
239 /* token has already been seen in this file */
240 }
241 }
242 m->highest = idx;
243 }
244 /*}}}*/
add_token_in_file(int file_index,unsigned int hash_key,char * tok_text,struct toktable * table)245 void add_token_in_file(int file_index, unsigned int hash_key, char *tok_text, struct toktable *table)/*{{{*/
246 {
247 unsigned long hash;
248 int index;
249 struct token *tok;
250 char *lc_tok_text;
251 char *p;
252
253 lc_tok_text = new_string((char*)tok_text);
254 for (p = lc_tok_text; *p; p++) {
255 *p = tolower(*(unsigned char *) p);
256 }
257 /* 2nd arg is string length */
258 hash = hashfn((unsigned char *) lc_tok_text, p - lc_tok_text, hash_key);
259
260 if (table->n >= table->hwm) {
261 enlarge_toktable(table);
262 }
263
264 index = hash & table->mask;
265 while (table->tokens[index]) {
266 /* strcmp ok as text has been tolower'd earlier */
267 if (!strcmp(lc_tok_text, table->tokens[index]->text))
268 break;
269 index++;
270 index &= table->mask;
271 }
272
273 if (!table->tokens[index]) {
274 /* Allocate new */
275 struct token *new_tok = new_token();
276 /* New token takes ownership of lc_tok_text, no need to free that later. */
277 new_tok->text = (char *) lc_tok_text;
278 new_tok->hashval = hash; /* save full width for later */
279 table->tokens[index] = new_tok;
280 ++table->n;
281 } else {
282 free(lc_tok_text);
283 }
284
285 tok = table->tokens[index];
286
287 check_and_enlarge_encoding(&tok->match0);
288 insert_index_on_encoding(&tok->match0, file_index);
289 }
290 /*}}}*/
add_token2_in_file(int file_index,unsigned int hash_key,char * tok_text,struct toktable2 * table,int add_to_chain1)291 void add_token2_in_file(int file_index, unsigned int hash_key, char *tok_text, struct toktable2 *table, int add_to_chain1)/*{{{*/
292 {
293 unsigned long hash;
294 int index;
295 struct token2 *tok;
296 char *lc_tok_text;
297 char *p;
298
299 lc_tok_text = new_string(tok_text);
300 for (p = lc_tok_text; *p; p++) {
301 *p = tolower(*(unsigned char *) p);
302 }
303 /* 2nd arg is string length */
304 hash = hashfn((unsigned char *) lc_tok_text, p - lc_tok_text, hash_key);
305
306 if (table->n >= table->hwm) {
307 enlarge_toktable2(table);
308 }
309
310 index = hash & table->mask;
311 while (table->tokens[index]) {
312 /* strcmp ok as text has been tolower'd earlier */
313 if (!strcmp(lc_tok_text, table->tokens[index]->text))
314 break;
315 index++;
316 index &= table->mask;
317 }
318
319 if (!table->tokens[index]) {
320 /* Allocate new */
321 struct token2 *new_tok = new_token2();
322 /* New token takes ownership of lc_tok_text, no need to free that later. */
323 new_tok->text = lc_tok_text;
324 new_tok->hashval = hash; /* save full width for later */
325 table->tokens[index] = new_tok;
326 ++table->n;
327 } else {
328 free(lc_tok_text);
329 }
330
331 tok = table->tokens[index];
332
333 check_and_enlarge_encoding(&tok->match0);
334 insert_index_on_encoding(&tok->match0, file_index);
335 if (add_to_chain1) {
336 check_and_enlarge_encoding(&tok->match1);
337 insert_index_on_encoding(&tok->match1, file_index);
338 }
339 }
340 /*}}}*/
341
342
343
344
345