1 /*
2   mairix - message index builder and finder for maildir folders.
3 
4  **********************************************************************
5  * Copyright (C) Richard P. Curnow  2002-2004, 2005
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of version 2 of the GNU General Public License as
9  * published by the Free Software Foundation.
10  *
11  * This program is distributed in the hope that it will be useful, but
12  * WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License along
17  * with this program; if not, write to the Free Software Foundation, Inc.,
18  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19  *
20  **********************************************************************
21  */
22 
23 /* Functions for handling tokens */
24 
25 #include <assert.h>
26 #include <ctype.h>
27 #include "mairix.h"
28 
init_matches(struct matches * m)29 static void init_matches(struct matches *m) {/*{{{*/
30   m->msginfo = NULL;
31   m->n = 0;
32   m->max = 0;
33   m->highest = 0;
34 }
35 /*}}}*/
new_token(void)36 struct token *new_token(void)/*{{{*/
37 {
38   struct token *result = new(struct token);
39   result->text = NULL;
40   init_matches(&result->match0);
41   return result;
42 }
43 /*}}}*/
new_token2(void)44 struct token2 *new_token2(void)/*{{{*/
45 {
46   struct token2 *result = new(struct token2);
47   result->text = NULL;
48   init_matches(&result->match0);
49   init_matches(&result->match1);
50   return result;
51 }
52 /*}}}*/
free_token(struct token * x)53 void free_token(struct token *x)/*{{{*/
54 {
55   if (x->text) free(x->text);
56   if (x->match0.msginfo) free(x->match0.msginfo);
57   free(x);
58 }
59 /*}}}*/
free_token2(struct token2 * x)60 void free_token2(struct token2 *x)/*{{{*/
61 {
62   if (x->text) free(x->text);
63   if (x->match0.msginfo) free(x->match0.msginfo);
64   if (x->match1.msginfo) free(x->match1.msginfo);
65   free(x);
66 }
67 /*}}}*/
new_toktable(void)68 struct toktable *new_toktable(void)/*{{{*/
69 {
70   struct toktable *result = new(struct toktable);
71   result->tokens = NULL;
72   result->n = 0;
73   result->hwm = 0;
74   result->size = 0;
75   return result;
76 }
77 /*}}}*/
new_toktable2(void)78 struct toktable2 *new_toktable2(void)/*{{{*/
79 {
80   struct toktable2 *result = new(struct toktable2);
81   result->tokens = NULL;
82   result->n = 0;
83   result->hwm = 0;
84   result->size = 0;
85   return result;
86 }
87 /*}}}*/
free_toktable(struct toktable * x)88 void free_toktable(struct toktable *x)/*{{{*/
89 {
90   if (x->tokens) {
91     int i;
92     for (i=0; i<x->size; i++) {
93       if (x->tokens[i]) {
94         free_token(x->tokens[i]);
95       }
96     }
97     free(x->tokens);
98   }
99   free(x);
100 }
101 /*}}}*/
free_toktable2(struct toktable2 * x)102 void free_toktable2(struct toktable2 *x)/*{{{*/
103 {
104   if (x->tokens) {
105     int i;
106     for (i=0; i<x->size; i++) {
107       if (x->tokens[i]) {
108         free_token2(x->tokens[i]);
109       }
110     }
111     free(x->tokens);
112   }
113   free(x);
114 }
115 /*}}}*/
116 /* FIXME : This stuff really needs cleaning up. */
enlarge_toktable(struct toktable * table)117 static void enlarge_toktable(struct toktable *table)/*{{{*/
118 {
119   if (table->size == 0) {
120     int i;
121     /* initial allocation */
122     table->size = 1024;
123     table->mask = table->size - 1;
124     table->tokens = new_array(struct token *, table->size);
125     for (i=0; i<table->size; i++) {
126       table->tokens[i] = NULL;
127     }
128   } else {
129     struct token **old_tokens;
130     int old_size = table->size;
131     int i;
132     /* reallocate */
133     old_tokens = table->tokens;
134     table->size <<= 1;
135     table->mask = table->size - 1;
136     table->tokens = new_array(struct token *, table->size);
137     for (i=0; i<table->size; i++) {
138       table->tokens[i] = NULL;
139     }
140     for (i=0; i<old_size; i++) {
141       unsigned long new_index;
142       if (old_tokens[i]) {
143         new_index = old_tokens[i]->hashval & table->mask;
144         while (table->tokens[new_index]) {
145           new_index++;
146           new_index &= table->mask;
147         }
148         table->tokens[new_index] = old_tokens[i];
149       }
150     }
151     free(old_tokens);
152   }
153   table->hwm = (table->size >> 2) + (table->size >> 3); /* allow 3/8 of nodes to be used */
154 }
155 /*}}}*/
enlarge_toktable2(struct toktable2 * table)156 static void enlarge_toktable2(struct toktable2 *table)/*{{{*/
157 {
158   if (table->size == 0) {
159     int i;
160     /* initial allocation */
161     table->size = 1024;
162     table->mask = table->size - 1;
163     table->tokens = new_array(struct token2 *, table->size);
164     for (i=0; i<table->size; i++) {
165       table->tokens[i] = NULL;
166     }
167   } else {
168     struct token2 **old_tokens;
169     int old_size = table->size;
170     int i;
171     /* reallocate */
172     old_tokens = table->tokens;
173     table->size <<= 1;
174     table->mask = table->size - 1;
175     table->tokens = new_array(struct token2 *, table->size);
176     for (i=0; i<table->size; i++) {
177       table->tokens[i] = NULL;
178     }
179     for (i=0; i<old_size; i++) {
180       unsigned long new_index;
181       if (old_tokens[i]) {
182         new_index = old_tokens[i]->hashval & table->mask;
183         while (table->tokens[new_index]) {
184           new_index++;
185           new_index &= table->mask;
186         }
187         table->tokens[new_index] = old_tokens[i];
188       }
189     }
190     free(old_tokens);
191   }
192   table->hwm = (table->size >> 2) + (table->size >> 3); /* allow 3/8 of nodes to be used */
193 }
194 /*}}}*/
insert_value(unsigned char * x,int val)195 static int insert_value(unsigned char *x, int val)/*{{{*/
196 {
197   assert(val >= 0);
198   if (val <= 127) {
199     *x = val;
200     return 1;
201   } else if (val <= 16383) {
202     *x++ = (val >> 8) | 0x80;
203     *x   = (val & 0xff);
204     return 2;
205   } else {
206     int a = (val >> 24);
207     assert (a <= 63);
208     *x++ = a | 0xc0;
209     *x++ = ((val >> 16) & 0xff);
210     *x++ = ((val >>  8) & 0xff);
211     *x   = (val         & 0xff);
212     return 4;
213   }
214 }
215 /*}}}*/
check_and_enlarge_encoding(struct matches * m)216 void check_and_enlarge_encoding(struct matches *m)/*{{{*/
217 {
218   if (m->n + 4 >= m->max) {
219     if (m->max == 0) {
220       m->max = 16;
221     } else {
222       m->max += (m->max >> 1);
223     }
224     m->msginfo = grow_array(unsigned char, m->max, m->msginfo);
225   }
226 }
227 /*}}}*/
insert_index_on_encoding(struct matches * m,int idx)228 void insert_index_on_encoding(struct matches *m, int idx)/*{{{*/
229 {
230   if (m->n == 0) {
231     /* Always encode value */
232     m->n += insert_value(m->msginfo + m->n, idx);
233   } else {
234     assert(idx >= m->highest);
235     if (idx > m->highest) {
236       int increment = idx - m->highest;
237       m->n += insert_value(m->msginfo + m->n, increment);
238     } else {
239       /* token has already been seen in this file */
240     }
241   }
242   m->highest = idx;
243 }
244 /*}}}*/
add_token_in_file(int file_index,unsigned int hash_key,char * tok_text,struct toktable * table)245 void add_token_in_file(int file_index, unsigned int hash_key, char *tok_text, struct toktable *table)/*{{{*/
246 {
247   unsigned long hash;
248   int index;
249   struct token *tok;
250   char *lc_tok_text;
251   char *p;
252 
253   lc_tok_text = new_string((char*)tok_text);
254   for (p = lc_tok_text; *p; p++) {
255     *p = tolower(*(unsigned char *) p);
256   }
257   /* 2nd arg is string length */
258   hash = hashfn((unsigned char *) lc_tok_text, p - lc_tok_text, hash_key);
259 
260   if (table->n >= table->hwm) {
261     enlarge_toktable(table);
262   }
263 
264   index = hash & table->mask;
265   while (table->tokens[index]) {
266     /* strcmp ok as text has been tolower'd earlier */
267     if (!strcmp(lc_tok_text, table->tokens[index]->text))
268       break;
269     index++;
270     index &= table->mask;
271   }
272 
273   if (!table->tokens[index]) {
274     /* Allocate new */
275     struct token *new_tok = new_token();
276     /* New token takes ownership of lc_tok_text, no need to free that later. */
277     new_tok->text = (char *) lc_tok_text;
278     new_tok->hashval = hash; /* save full width for later */
279     table->tokens[index] = new_tok;
280     ++table->n;
281   } else {
282     free(lc_tok_text);
283   }
284 
285   tok = table->tokens[index];
286 
287   check_and_enlarge_encoding(&tok->match0);
288   insert_index_on_encoding(&tok->match0, file_index);
289 }
290 /*}}}*/
add_token2_in_file(int file_index,unsigned int hash_key,char * tok_text,struct toktable2 * table,int add_to_chain1)291 void add_token2_in_file(int file_index, unsigned int hash_key, char *tok_text, struct toktable2 *table, int add_to_chain1)/*{{{*/
292 {
293   unsigned long hash;
294   int index;
295   struct token2 *tok;
296   char *lc_tok_text;
297   char *p;
298 
299   lc_tok_text = new_string(tok_text);
300   for (p = lc_tok_text; *p; p++) {
301     *p = tolower(*(unsigned char *) p);
302   }
303   /* 2nd arg is string length */
304   hash = hashfn((unsigned char *) lc_tok_text, p - lc_tok_text, hash_key);
305 
306   if (table->n >= table->hwm) {
307     enlarge_toktable2(table);
308   }
309 
310   index = hash & table->mask;
311   while (table->tokens[index]) {
312     /* strcmp ok as text has been tolower'd earlier */
313     if (!strcmp(lc_tok_text, table->tokens[index]->text))
314       break;
315     index++;
316     index &= table->mask;
317   }
318 
319   if (!table->tokens[index]) {
320     /* Allocate new */
321     struct token2 *new_tok = new_token2();
322     /* New token takes ownership of lc_tok_text, no need to free that later. */
323     new_tok->text = lc_tok_text;
324     new_tok->hashval = hash; /* save full width for later */
325     table->tokens[index] = new_tok;
326     ++table->n;
327   } else {
328     free(lc_tok_text);
329   }
330 
331   tok = table->tokens[index];
332 
333   check_and_enlarge_encoding(&tok->match0);
334   insert_index_on_encoding(&tok->match0, file_index);
335   if (add_to_chain1) {
336     check_and_enlarge_encoding(&tok->match1);
337     insert_index_on_encoding(&tok->match1, file_index);
338   }
339 }
340 /*}}}*/
341 
342 
343 
344 
345