1 /* $Id: diction.c,v 1.16 2011/06/28 00:13:48 sbajic Exp $ */
2
3 /*
4 DSPAM
5 COPYRIGHT (C) 2002-2012 DSPAM PROJECT
6
7 This program is free software: you can redistribute it and/or modify
8 it under the terms of the GNU Affero General Public License as
9 published by the Free Software Foundation, either version 3 of the
10 License, or (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU Affero General Public License for more details.
16
17 You should have received a copy of the GNU Affero General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>.
19
20 */
21
22 /*
23 * diction.c - subset of lexical data
24 *
25 * DESCRIPTION
26 * a diction is a subset of lexical data from a user's dictionary. in the
27 * context used within DSPAM, a diction is all of the matching lexical
28 * information from the current message being processed. the diction is
29 * loaded/stored by the storage driver and managed primarily by libdspam.
30 */
31
32 #ifdef HAVE_CONFIG_H
33 #include <auto-config.h>
34 #endif
35
36 #include <stdlib.h>
37 #include <stdio.h>
38 #include <string.h>
39
40 #include "diction.h"
41
42 static unsigned long _ds_prime_list[] = {
43 53ul, 97ul, 193ul, 389ul, 769ul,
44 1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
45 49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
46 1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
47 50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
48 1610612741ul, 3221225473ul, 4294967291ul
49 };
50
51 ds_diction_t
ds_diction_create(unsigned long size)52 ds_diction_create (unsigned long size)
53 {
54 ds_diction_t diction = (ds_diction_t) calloc(1, sizeof(struct _ds_diction));
55 int i = 0;
56
57 if (!diction) {
58 perror("ds_diction_create: calloc() failed");
59 return NULL;
60 }
61
62 while (_ds_prime_list[i] < size)
63 { i++; }
64
65 diction->size = _ds_prime_list[i];
66 diction->items = 0;
67 diction->tbl =
68 (struct _ds_term **) calloc(diction->size, sizeof (struct _ds_term *));
69 if (!diction->tbl)
70 {
71 perror("ds_diction_create: calloc() failed");
72 free(diction);
73 return NULL;
74 }
75
76 diction->order = nt_create(NT_INDEX);
77 diction->chained_order = nt_create(NT_INDEX);
78 if (!diction->order || !diction->chained_order) {
79 nt_destroy(diction->order);
80 nt_destroy(diction->chained_order);
81 free(diction->tbl);
82 free(diction);
83 return NULL;
84 }
85
86 return diction;
87 }
88
89 void
ds_diction_destroy(ds_diction_t diction)90 ds_diction_destroy (ds_diction_t diction)
91 {
92 ds_term_t term, next;
93 ds_cursor_t cur;
94
95 if (!diction) return;
96
97 cur = ds_diction_cursor(diction);
98 if (!cur) {
99 perror("ds_diction_destroy: ds_diction_cursor() failed");
100 return;
101 }
102
103 term = ds_diction_next(cur);
104 while(term)
105 {
106 next = ds_diction_next(cur);
107 ds_diction_delete(diction, term->key);
108 term = next;
109 }
110 ds_diction_close(cur);
111
112 nt_destroy(diction->order);
113 nt_destroy(diction->chained_order);
114 free(diction->tbl);
115 free(diction);
116 return;
117 }
118
119 ds_term_t
ds_diction_term_create(ds_key_t key,const char * name)120 ds_diction_term_create (ds_key_t key, const char *name)
121 {
122 ds_term_t term = (ds_term_t) calloc(1, sizeof(struct _ds_term));
123
124 if (!term) {
125 perror("ds_diction_term_create: calloc() failed");
126 } else {
127 term->key = key;
128 term->frequency = 1;
129 term->type = 'D';
130 if (name)
131 term->name = strdup(name);
132 }
133 return term;
134 }
135
136 ds_term_t
ds_diction_find(ds_diction_t diction,ds_key_t key)137 ds_diction_find (ds_diction_t diction, ds_key_t key)
138 {
139 ds_term_t term;
140
141 term = diction->tbl[key % diction->size];
142 while (term)
143 {
144 if (key == term->key)
145 return term;
146 term = term->next;
147 }
148
149 return NULL;
150 }
151
152 ds_term_t
ds_diction_touch(ds_diction_t diction,ds_key_t key,const char * name,int flags)153 ds_diction_touch(
154 ds_diction_t diction,
155 ds_key_t key,
156 const char *name,
157 int flags)
158 {
159 unsigned long bucket = key % diction->size;
160 ds_term_t parent = NULL;
161 ds_term_t insert = NULL;
162 ds_term_t term;
163
164 term = diction->tbl[bucket];
165 while (term) {
166 if (key == term->key) {
167 insert = term;
168 break;
169 }
170 parent = term;
171 term = term->next;
172 }
173
174 if (!insert) {
175 insert = ds_diction_term_create(key, name);
176 if (!insert) {
177 perror("ds_diction_touch: ds_diction_term_create() failed");
178 return NULL;
179 }
180 diction->items++;
181 if (parent)
182 parent->next = insert;
183 else
184 diction->tbl[bucket] = insert;
185 } else {
186 if (!insert->name && name)
187 insert->name = strdup(name);
188 insert->frequency++;
189 }
190
191 if (flags & DSD_CONTEXT) {
192 if (flags & DSD_CHAINED)
193 nt_add(diction->chained_order, insert);
194 else
195 nt_add(diction->order, insert);
196 }
197
198 return insert;
199 }
200
201 void
ds_diction_delete(ds_diction_t diction,ds_key_t key)202 ds_diction_delete(ds_diction_t diction, ds_key_t key)
203 {
204 unsigned long bucket = key % diction->size;
205 ds_term_t parent = NULL;
206 ds_term_t delete = NULL;
207 ds_term_t term;
208
209 term = diction->tbl[bucket];
210
211 while(term) {
212 if (key == term->key) {
213 delete = term;
214 break;
215 }
216 parent = term;
217 term = term->next;
218 }
219
220 if (delete) {
221 if (parent)
222 parent->next = delete->next;
223 else
224 diction->tbl[bucket] = delete->next;
225
226 free(delete->name);
227 free(delete);
228 diction->items--;
229 }
230 return;
231 }
232
233 ds_cursor_t
ds_diction_cursor(ds_diction_t diction)234 ds_diction_cursor (ds_diction_t diction)
235 {
236 ds_cursor_t cur = (ds_cursor_t) calloc(1, sizeof(struct _ds_diction_c));
237
238 if (!cur) {
239 perror("ds_diction_cursor: calloc() failed");
240 return NULL;
241 }
242 cur->diction = diction;
243 cur->iter_index = 0;
244 cur->iter_next = NULL;
245 return cur;
246 }
247
248 ds_term_t
ds_diction_next(ds_cursor_t cur)249 ds_diction_next (ds_cursor_t cur)
250 {
251 unsigned long bucket;
252 ds_term_t term;
253 ds_term_t tbl_term;
254
255 if (!cur)
256 return NULL;
257
258 term = cur->iter_next;
259 if (term) {
260 cur->iter_next = term->next;
261 return term;
262 }
263
264 while (cur->iter_index < cur->diction->size) {
265 bucket = cur->iter_index;
266 cur->iter_index++;
267 tbl_term = cur->diction->tbl[bucket];
268 if (tbl_term) {
269 cur->iter_next = tbl_term->next;
270 return (tbl_term);
271 }
272 }
273
274 return NULL;
275 }
276
277 void
ds_diction_close(ds_cursor_t cur)278 ds_diction_close (ds_cursor_t cur)
279 {
280 free(cur);
281 return;
282 }
283
284 int
ds_diction_setstat(ds_diction_t diction,ds_key_t key,ds_spam_stat_t s)285 ds_diction_setstat (ds_diction_t diction, ds_key_t key, ds_spam_stat_t s)
286 {
287 ds_term_t term = ds_diction_find(diction, key);
288
289 if (term) {
290 term->s.probability = s->probability;
291 term->s.spam_hits = s->spam_hits;
292 term->s.innocent_hits = s->innocent_hits;
293 term->s.status = s->status;
294 term->s.offset = s->offset;
295 return 0;
296 }
297 return -1;
298 }
299
ds_diction_addstat(ds_diction_t diction,ds_key_t key,ds_spam_stat_t s)300 int ds_diction_addstat (ds_diction_t diction, ds_key_t key, ds_spam_stat_t s)
301 {
302 ds_term_t term = ds_diction_find(diction, key);
303
304 if (term) {
305 term->s.probability += s->probability;
306 term->s.spam_hits += s->spam_hits;
307 term->s.innocent_hits += s->innocent_hits;
308 if (!term->s.offset)
309 term->s.offset = s->offset;
310 if (s->status & TST_DISK)
311 term->s.status |= TST_DISK;
312 if (s->status & TST_DIRTY)
313 term->s.status |= TST_DIRTY;
314 return 0;
315 }
316 return -1;
317 }
318
319 int
ds_diction_getstat(ds_diction_t diction,ds_key_t key,ds_spam_stat_t s)320 ds_diction_getstat (ds_diction_t diction, ds_key_t key, ds_spam_stat_t s)
321 {
322 ds_term_t term = ds_diction_find(diction, key);
323
324 if (term) {
325 s->probability = term->s.probability;
326 s->spam_hits = term->s.spam_hits;
327 s->innocent_hits = term->s.innocent_hits;
328 s->status = term->s.status;
329 s->offset = term->s.offset;
330 return 0;
331 }
332 return -1;
333 }
334
335