1 /* $Id: diction.c,v 1.16 2011/06/28 00:13:48 sbajic Exp $ */
2 
3 /*
4  DSPAM
5  COPYRIGHT (C) 2002-2012 DSPAM PROJECT
6 
7  This program is free software: you can redistribute it and/or modify
8  it under the terms of the GNU Affero General Public License as
9  published by the Free Software Foundation, either version 3 of the
10  License, or (at your option) any later version.
11 
12  This program is distributed in the hope that it will be useful,
13  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  GNU Affero General Public License for more details.
16 
17  You should have received a copy of the GNU Affero General Public License
18  along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 
20 */
21 
22 /*
23  *  diction.c - subset of lexical data
24  *
25  *  DESCRIPTION
26  *    a diction is a subset of lexical data from a user's dictionary. in the
27  *    context used within DSPAM, a diction is all of the matching lexical
28  *    information from the current message being processed. the diction is
29  *    loaded/stored by the storage driver and managed primarily by libdspam.
30  */
31 
32 #ifdef HAVE_CONFIG_H
33 #include <auto-config.h>
34 #endif
35 
36 #include <stdlib.h>
37 #include <stdio.h>
38 #include <string.h>
39 
40 #include "diction.h"
41 
42 static unsigned long _ds_prime_list[] = {
43   53ul, 97ul, 193ul, 389ul, 769ul,
44   1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
45   49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
46   1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
47   50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
48   1610612741ul, 3221225473ul, 4294967291ul
49 };
50 
51 ds_diction_t
ds_diction_create(unsigned long size)52 ds_diction_create (unsigned long size)
53 {
54   ds_diction_t diction = (ds_diction_t) calloc(1, sizeof(struct _ds_diction));
55   int i = 0;
56 
57   if (!diction) {
58     perror("ds_diction_create: calloc() failed");
59     return NULL;
60   }
61 
62   while (_ds_prime_list[i] < size)
63     { i++; }
64 
65   diction->size = _ds_prime_list[i];
66   diction->items = 0;
67   diction->tbl =
68     (struct _ds_term **) calloc(diction->size, sizeof (struct _ds_term *));
69   if (!diction->tbl)
70   {
71     perror("ds_diction_create: calloc() failed");
72     free(diction);
73     return NULL;
74   }
75 
76   diction->order = nt_create(NT_INDEX);
77   diction->chained_order = nt_create(NT_INDEX);
78   if (!diction->order || !diction->chained_order) {
79     nt_destroy(diction->order);
80     nt_destroy(diction->chained_order);
81     free(diction->tbl);
82     free(diction);
83     return NULL;
84   }
85 
86   return diction;
87 }
88 
89 void
ds_diction_destroy(ds_diction_t diction)90 ds_diction_destroy (ds_diction_t diction)
91 {
92   ds_term_t term, next;
93   ds_cursor_t cur;
94 
95   if (!diction) return;
96 
97   cur = ds_diction_cursor(diction);
98   if (!cur) {
99     perror("ds_diction_destroy: ds_diction_cursor() failed");
100     return;
101   }
102 
103   term = ds_diction_next(cur);
104   while(term)
105   {
106     next = ds_diction_next(cur);
107     ds_diction_delete(diction, term->key);
108     term = next;
109   }
110   ds_diction_close(cur);
111 
112   nt_destroy(diction->order);
113   nt_destroy(diction->chained_order);
114   free(diction->tbl);
115   free(diction);
116   return;
117 }
118 
119 ds_term_t
ds_diction_term_create(ds_key_t key,const char * name)120 ds_diction_term_create (ds_key_t key, const char *name)
121 {
122   ds_term_t term = (ds_term_t) calloc(1, sizeof(struct _ds_term));
123 
124   if (!term) {
125     perror("ds_diction_term_create: calloc() failed");
126   } else {
127     term->key = key;
128     term->frequency = 1;
129     term->type = 'D';
130     if (name)
131       term->name = strdup(name);
132   }
133   return term;
134 }
135 
136 ds_term_t
ds_diction_find(ds_diction_t diction,ds_key_t key)137 ds_diction_find (ds_diction_t diction, ds_key_t key)
138 {
139   ds_term_t term;
140 
141   term = diction->tbl[key % diction->size];
142   while (term)
143   {
144     if (key == term->key)
145       return term;
146     term = term->next;
147   }
148 
149   return NULL;
150 }
151 
152 ds_term_t
ds_diction_touch(ds_diction_t diction,ds_key_t key,const char * name,int flags)153 ds_diction_touch(
154   ds_diction_t diction,
155   ds_key_t key,
156   const char *name,
157   int flags)
158 {
159   unsigned long bucket = key % diction->size;
160   ds_term_t parent = NULL;
161   ds_term_t insert = NULL;
162   ds_term_t term;
163 
164   term = diction->tbl[bucket];
165   while (term) {
166     if (key == term->key) {
167       insert = term;
168       break;
169     }
170     parent = term;
171     term = term->next;
172   }
173 
174   if (!insert) {
175     insert = ds_diction_term_create(key, name);
176     if (!insert) {
177       perror("ds_diction_touch: ds_diction_term_create() failed");
178       return NULL;
179     }
180     diction->items++;
181     if (parent)
182       parent->next = insert;
183     else
184       diction->tbl[bucket] = insert;
185   } else {
186     if (!insert->name && name)
187       insert->name = strdup(name);
188     insert->frequency++;
189   }
190 
191   if (flags & DSD_CONTEXT) {
192     if (flags & DSD_CHAINED)
193       nt_add(diction->chained_order, insert);
194     else
195       nt_add(diction->order, insert);
196   }
197 
198   return insert;
199 }
200 
201 void
ds_diction_delete(ds_diction_t diction,ds_key_t key)202 ds_diction_delete(ds_diction_t diction, ds_key_t key)
203 {
204   unsigned long bucket = key % diction->size;
205   ds_term_t parent = NULL;
206   ds_term_t delete = NULL;
207   ds_term_t term;
208 
209   term = diction->tbl[bucket];
210 
211   while(term) {
212     if (key == term->key) {
213       delete = term;
214       break;
215     }
216     parent = term;
217     term = term->next;
218   }
219 
220   if (delete) {
221     if (parent)
222       parent->next = delete->next;
223     else
224       diction->tbl[bucket] = delete->next;
225 
226     free(delete->name);
227     free(delete);
228     diction->items--;
229   }
230   return;
231 }
232 
233 ds_cursor_t
ds_diction_cursor(ds_diction_t diction)234 ds_diction_cursor (ds_diction_t diction)
235 {
236   ds_cursor_t cur = (ds_cursor_t) calloc(1, sizeof(struct _ds_diction_c));
237 
238   if (!cur) {
239     perror("ds_diction_cursor: calloc() failed");
240     return NULL;
241   }
242   cur->diction    = diction;
243   cur->iter_index = 0;
244   cur->iter_next  = NULL;
245   return cur;
246 }
247 
248 ds_term_t
ds_diction_next(ds_cursor_t cur)249 ds_diction_next (ds_cursor_t cur)
250 {
251   unsigned long bucket;
252   ds_term_t term;
253   ds_term_t tbl_term;
254 
255   if (!cur)
256     return NULL;
257 
258   term = cur->iter_next;
259   if (term) {
260     cur->iter_next = term->next;
261     return term;
262   }
263 
264   while (cur->iter_index < cur->diction->size) {
265     bucket = cur->iter_index;
266     cur->iter_index++;
267     tbl_term = cur->diction->tbl[bucket];
268     if (tbl_term) {
269       cur->iter_next = tbl_term->next;
270       return (tbl_term);
271     }
272   }
273 
274   return NULL;
275 }
276 
277 void
ds_diction_close(ds_cursor_t cur)278 ds_diction_close (ds_cursor_t cur)
279 {
280   free(cur);
281   return;
282 }
283 
284 int
ds_diction_setstat(ds_diction_t diction,ds_key_t key,ds_spam_stat_t s)285 ds_diction_setstat (ds_diction_t diction, ds_key_t key, ds_spam_stat_t s)
286 {
287   ds_term_t term = ds_diction_find(diction, key);
288 
289   if (term) {
290     term->s.probability = s->probability;
291     term->s.spam_hits = s->spam_hits;
292     term->s.innocent_hits = s->innocent_hits;
293     term->s.status = s->status;
294     term->s.offset = s->offset;
295     return 0;
296   }
297   return -1;
298 }
299 
ds_diction_addstat(ds_diction_t diction,ds_key_t key,ds_spam_stat_t s)300 int ds_diction_addstat (ds_diction_t diction, ds_key_t key, ds_spam_stat_t s)
301 {
302   ds_term_t term = ds_diction_find(diction, key);
303 
304   if (term) {
305     term->s.probability += s->probability;
306     term->s.spam_hits += s->spam_hits;
307     term->s.innocent_hits += s->innocent_hits;
308     if (!term->s.offset)
309       term->s.offset = s->offset;
310     if (s->status & TST_DISK)
311       term->s.status |= TST_DISK;
312     if (s->status & TST_DIRTY)
313       term->s.status |= TST_DIRTY;
314     return 0;
315   }
316   return -1;
317 }
318 
319 int
ds_diction_getstat(ds_diction_t diction,ds_key_t key,ds_spam_stat_t s)320 ds_diction_getstat  (ds_diction_t diction, ds_key_t key, ds_spam_stat_t s)
321 {
322   ds_term_t term = ds_diction_find(diction, key);
323 
324   if (term) {
325     s->probability = term->s.probability;
326     s->spam_hits = term->s.spam_hits;
327     s->innocent_hits = term->s.innocent_hits;
328     s->status = term->s.status;
329     s->offset = term->s.offset;
330     return 0;
331   }
332   return -1;
333 }
334 
335