1 /*
2 ** Modular Logfile Analyzer
3 ** Copyright 2000 Jan Kneschke <jan@kneschke.de>
4 **
5 ** Homepage: http://www.modlogan.org
6 **
7 
8     This program is free software; you can redistribute it and/or modify
9     it under the terms of the GNU General Public License as published by
10     the Free Software Foundation; either version 2 of the License, or
11     (at your option) any later version, and provided that the above
12     copyright and permission notice is included with all distributed
13     copies of this or derived software.
14 
15     This program is distributed in the hope that it will be useful,
16     but WITHOUT ANY WARRANTY; without even the implied warranty of
17     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18     GNU General Public License for more details.
19 
20     You should have received a copy of the GNU General Public License
21     along with this program; if not, write to the Free Software
22     Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
23 
24 **
25 ** $Id: mhash.c,v 1.23 2004/08/27 20:07:37 ostborn Exp $
26 */
27 
28 #include <stdlib.h>
29 #include <stdio.h>
30 #include <time.h>
31 #include <string.h>
32 #include <assert.h>
33 
34 #include "config.h"
35 #include "mconfig.h"
36 #include "mlist.h"
37 #include "mhash.h"
38 #include "mdatatypes.h"
39 
40 #define M_HASH_RESIZE_CHECK_DEFAULT 8
41 
42 extern size_t mem_mhash_size;
43 extern size_t mem_mhash_count;
44 
45 int mhash_unfold(mhash *h, mlist *l ) {
46 	int i = 0;
47 	for ( i = 0; i < h->size; i++) {
48 		if (h->data[i]->list) {
49 			mlist *hl;
50 
51 			for (hl = h->data[i]->list; hl; hl = hl->next) {
52 				mdata *data;
53 				if (!hl->data) continue;
54 
55 				data = mdata_copy(hl->data);
56 
57 				mlist_insert(l, data);
58 			}
59 		}
60 	}
61 	return 0;
62 }
63 
64 int mhash_unfold_sorted_limited(mhash *h, mlist *l, int count ) {
65 	int i, j, max;
66 	mdata *data = NULL, *ins_data;
67 #if 0
68 	fprintf(stderr, "%s.%d: h: %p, l: %p, count: %d\n", __FILE__, __LINE__, h, l, count);
69 #endif
70 	for ( j = 0; j < count; j++) {
71 		data = NULL;
72 
73 		max = 0;
74 
75 		for ( i = 0; i < h->size; i++) {
76 			if (h->data[i]->list) {
77 				mlist *hl;
78 
79 				hl = h->data[i]->list;
80 				while (hl) {
81 					if (hl->data) {
82 						if (mdata_get_count(hl->data) > max) {
83 							max = mdata_get_count(hl->data);
84 							data = hl->data;
85 						}
86 					}
87 					hl = hl->next;
88 				}
89 			}
90 		}
91 
92 		if (data) {
93 			ins_data = mdata_copy(data);
94 
95 			mlist_insert(l, ins_data);
96 	/* marking element as processed */
97 			mdata_set_count(data, - mdata_get_count(data));
98 		}
99 	}
100 
101 	/* resetting count */
102 
103 	for ( i = 0; i < h->size; i++) {
104 		if (h->data[i]->list) {
105 			mlist *hl;
106 
107 			hl = h->data[i]->list;
108 			while (hl) {
109 				if (hl->data) {
110 					if (mdata_get_count(hl->data) < 0) {
111 						data = hl->data;
112 
113 						mdata_set_count(hl->data, - mdata_get_count(hl->data));
114 					}
115 				}
116 				hl = hl->next;
117 			}
118 		}
119 	}
120 
121 	return 0;
122 }
123 
124 int mhash_unfold_sorted_limited_vcount(mhash *h, mlist *l, int count ) {
125 	int i, j;
126 	double max;
127 	mdata *data = NULL, *ins_data;
128 #if 0
129 	fprintf(stderr, "%s.%d: h: %p, l: %p, count: %d\n", __FILE__, __LINE__, h, l, count);
130 #endif
131 	for ( j = 0; j < count; j++) {
132 		data = NULL;
133 
134 		max = 0.0;
135 
136 		for ( i = 0; i < h->size; i++) {
137 			if (h->data[i]->list) {
138 				mlist *hl;
139 
140 				hl = h->data[i]->list;
141 				while (hl) {
142 					if (hl->data) {
143 						if (mdata_get_count(hl->data) > 0) { /* negative counts mark already processed values */
144 							if (mdata_get_vcount(hl->data) > max) {
145 								max = mdata_get_vcount(hl->data);
146 								data = hl->data;
147 							}
148 						}
149 					}
150 					hl = hl->next;
151 				}
152 			}
153 		}
154 
155 		if (data) {
156 			ins_data = mdata_copy(data);
157 
158 			mlist_insert(l, ins_data);
159 			/* marking element as processed */
160 			/* marking is done using count as well, because vcount is _unsigned_ */
161 			mdata_set_count(data, - mdata_get_count(data));
162 		}
163 	}
164 
165 	/* resetting count */
166 
167 	for ( i = 0; i < h->size; i++) {
168 		if (h->data[i]->list) {
169 			mlist *hl;
170 
171 			hl = h->data[i]->list;
172 			while (hl) {
173 				if (hl->data) {
174 					if (mdata_get_count(hl->data) < 0) {
175 						data = hl->data;
176 
177 						mdata_set_count(hl->data, - mdata_get_count(hl->data));
178 					}
179 				}
180 				hl = hl->next;
181 			}
182 		}
183 	}
184 
185 	return 0;
186 }
187 
188 int mhash_calc (mhash *h, const char *str) {
189 	unsigned long i = 0;
190 
191 	while (*str)
192 		i = *str++ + 31 * i;
193 
194 	return (i % h->size);
195 }
196 
197 mhash *mhash_init (int size) {
198 	int i = 0;
199 
200 	mhash *h;
201 
202 	mem_mhash_count++;
203 
204 	h = malloc(sizeof(mhash));
205 	assert(h);
206 	memset(h, 0, sizeof(mhash));
207 
208 	mem_mhash_size += sizeof(mhash);
209 
210 	h->size = size;
211 	h->data = malloc(sizeof(mhash_data *) * h->size);
212 	h->resize_check = M_HASH_RESIZE_CHECK_DEFAULT;
213 
214 	mem_mhash_size += sizeof(mhash_data *) * h->size;
215 
216 	for (i = 0; i < h->size; i++) {
217 		h->data[i] = malloc(sizeof(mhash_data));
218 		assert(h->data[i]);
219 		h->data[i]->list = mlist_init();
220 		h->data[i]->size = 0;
221 	}
222 
223 	mem_mhash_size += sizeof(mhash_data) * h->size;
224 
225 	return h;
226 }
227 
228 int mhash_free (mhash *h) {
229 	int i = 0;
230 
231 	for (i = 0; i < h->size; i++) {
232 		mlist_free(h->data[i]->list);
233 		free(h->data[i]);
234 	}
235 	free(h->data);
236 	free(h);
237 
238 	return 0;
239 }
240 
241 int mhash_resize(mhash *h, int newsize) {
242 	mhash *nh = mhash_init(newsize);
243 	int i = 0;
244 
245 	for (i = 0; i < h->size; i++) {
246 		mlist *l;
247 
248 		l = h->data[i]->list;
249 
250 		for (l = h->data[i]->list; l && l->data; l = l->next) {
251 			int newndx;
252 			mdata *data = l->data;
253 			char *key = NULL;
254 
255 			key = data->key;
256 
257 			if (key == NULL) return -1;
258 
259 			newndx = mhash_calc(nh, key);
260 
261 			/* insert data into the new hash */
262 			if (mlist_insert_sorted(nh->data[newndx]->list, data) == 1) {
263 				nh->data[newndx]->size++;
264 			} else {
265 				fprintf(stderr, "%s.%d: ??\n", __FILE__, __LINE__);
266 			}
267 
268 			/* remove the pointer in the old list */
269 			l->data = NULL;
270 		}
271 		/* remove the list */
272 		mlist_free(h->data[i]->list);
273 
274 		/* unset the list in the old hash */
275 		h->data[i]->list = NULL;
276 
277 		/* free the list-handle in the old hash */
278 		free(h->data[i]);
279 	}
280 	/* free the data-handle in the old-hash */
281 	free(h->data);
282 
283 	/* move the new data-set and the new size to the old hash-structure */
284 	h->data = nh->data;
285 	h->size = nh->size;
286 	h->resize_check = M_HASH_RESIZE_CHECK_DEFAULT;
287 
288 	free(nh);
289 
290 	return 0;
291 }
292 
293 int mhash_insert_sorted(mhash *h, mdata *data) {
294 	int ndx = 0, i;
295 	char *key = NULL;
296 
297 	/* set a upper limit for the hash size */
298 	if (h->size <= 1024 && h->resize_check <= 0) {
299 		/* resize the hash if neccesary */
300 		int highest = 0;
301 		for ( i = 0; i < h->size; i++) {
302 			if (h->data[i]->size >= 32) {
303 				mhash_resize(h, h->size * 2 + 1);
304 				break;
305 			}
306 			if (h->data[i]->size > highest) {
307 				highest = h->data[i]->size;
308 			}
309 		}
310 
311 		if (i == h->size) {
312 			/* no resize happend */
313 
314 			/* set new resize-check */
315 			h->resize_check = M_HASH_RESIZE_CHECK_DEFAULT;
316 			if ((32 - highest) > h->resize_check) {
317 				/* we have enough space to delay the resize-check even more */
318 				h->resize_check = (32 - highest);
319 			}
320 		} else {
321 			int highest = 0;
322 
323 			for ( i = 0; i < h->size; i++) {
324 				if (h->data[i]->size > highest) {
325 					highest = h->data[i]->size;
326 				}
327 			}
328 
329 			if ((32 - highest) > h->resize_check) {
330 				/* we have enough space to delay the resize-check even more */
331 				h->resize_check = (32 - highest);
332 			}
333 		}
334 	} else {
335 	}
336 
337 	if (data == NULL) return -1;
338 
339 	key = data->key;
340 
341 	if (key == NULL) return -1;
342 
343 	ndx = mhash_calc(h, key);
344 
345 	if (mlist_insert_sorted(h->data[ndx]->list, data) == 1) {
346 		h->data[ndx]->size++;
347 		h->resize_check--;
348 	} else {
349 #if 0
350 		if (mlist_count(h->data[ndx]->list) != h->data[ndx]->size)
351 			fprintf(stderr, "%s.%d: append (%d == %d) ??\n", __FILE__, __LINE__,
352 				mlist_count(h->data[ndx]->list),
353 				h->data[ndx]->size);
354 #endif
355 	}
356 
357 	return 0;
358 }
359 
360 int mhash_insert_replace(mhash *h, mdata *data) {
361 	int ndx = 0;
362 	mlist *l;
363 
364 	if (data == NULL) return -1;
365 
366 	if (data->key == NULL) return -1;
367 
368 	ndx = mhash_calc(h, data->key);
369 	for (l = h->data[ndx]->list; l && l->data; l = l->next) {
370 		if (0 == strcmp(l->data->key, data->key)) {
371 			/* found */
372 			mdata_free(l->data);
373 
374 			l->data = data;
375 			break;
376 		}
377 	}
378 
379 	if (l == NULL || l->data == NULL) {
380 		if (mlist_insert_sorted(h->data[ndx]->list, data) == 1) {
381 			h->data[ndx]->size++;
382 		}
383 	}
384 	return 0;
385 }
386 
387 
388 int mhash_count(mhash *h) {
389 	int i, c = 0;
390 	if (!h) return 0;
391 
392 	for ( i = 0; i < h->size; i++) {
393 #if 0
394 		c += mlist_count(h->data[i]->list);
395 #else
396 		c += h->data[i]->size;
397 #endif
398 	}
399 
400 	return c;
401 }
402 
403 mdata *mhash_get_data(mhash *h, const char *str) {
404 	int ndx;
405 
406 	ndx = mhash_calc(h, str);
407 
408 	return mlist_get_data(h->data[ndx]->list, str);
409 }
410 
411 int mhash_in_hash(mhash *h, const char *str) {
412 	int ndx;
413 
414 	ndx = mhash_calc(h, str);
415 
416 	return mlist_in_list(h->data[ndx]->list, str);
417 }
418 
419 mdata **mhash_sorted_to_marray(const mhash *h, int sortby, int sortdir) {
420 	int j = 0; /* number of data-elements */
421 	int i;
422 	mdata **md, **r_md;
423 	int *a, *b;
424 
425 	/* count the data elements */
426 	for ( i = 0; i < h->size; i++) {
427 		if (h->data[i]->list) {
428 			mlist *hl;
429 
430 			for (hl = h->data[i]->list; hl; hl = hl->next) {
431 				if (hl->data) {
432 					j++;
433 				}
434 			}
435 		}
436 	}
437 
438 	/* unfold the hash */
439 	md = malloc(sizeof(mdata *) * j);
440 	/* add one for the NULL termination */
441 	r_md = malloc(sizeof(mdata *) * (j + 1));
442 	a = malloc(sizeof(int) * j);
443 	b = malloc(sizeof(int) * j);
444 	j = 0;
445 	for ( i = 0; i < h->size; i++) {
446 		if (h->data[i]->list) {
447 			mlist *hl;
448 
449 			for (hl = h->data[i]->list; hl; hl = hl->next) {
450 				if (hl->data) {
451 					a[j] = j;
452 					md[j++] = hl->data;
453 				}
454 			}
455 		}
456 	}
457 	/* sort */
458 	mdata_array_msort(md, a, b, 0, j-1, sortby, sortdir);
459 	/* 'a' contains the sorted indexes */
460 
461 	/* copy the data-element into the returned array */
462 	for (i = 0; i < j; i++)
463 		r_md[i] = md[a[i]];
464 
465 	r_md[i] = NULL;
466 
467 	free(a);
468 	free(b);
469 	free(md);
470 
471 	return r_md;
472 }
473 
474 int mhash_get_value(mhash *h, const char *key) {
475 	int i;
476 	if (!h) return 0;
477 
478 	for ( i = 0; i < h->size; i++) {
479 		mlist *l;
480 
481 		for (l = h->data[i]->list; l && l->data; l = l->next) {
482 			mdata *data = l->data;
483 
484 			if (0 == strcmp(key, data->key)) {
485 				return mdata_get_count(data);
486 			}
487 		}
488 	}
489 
490 	return 0;
491 }
492 
493 long mhash_sumup(mhash *h) {
494 	int i, c = 0;
495 	if (!h) return 0;
496 
497 	for ( i = 0; i < h->size; i++) {
498 		c += mlist_sumup(h->data[i]->list);
499 	}
500 
501 	return c;
502 }
503 
504 double mhash_sumup_vcount(mhash *h) {
505 	int i;
506 	double c = 0;
507 	if (!h) return 0;
508 
509 	for ( i = 0; i < h->size; i++) {
510 		c += mlist_sumup_vcount(h->data[i]->list);
511 	}
512 
513 	return c;
514 }
515 
516