1 /*
2 pblhash.c - hash table implementation
3
4 Copyright (C) 2002 - 2009 Peter Graf
5
6 This file is part of PBL - The Program Base Library.
7 PBL is free software.
8
9 This library is free software; you can redistribute it and/or
10 modify it under the terms of the GNU Lesser General Public
11 License as published by the Free Software Foundation; either
12 version 2.1 of the License, or (at your option) any later version.
13
14 This library is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 Lesser General Public License for more details.
18
19 You should have received a copy of the GNU Lesser General Public
20 License along with this library; if not, write to the Free Software
21 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22
23 For more information on the Program Base Library or Peter Graf,
24 please see: http://www.mission-base.com/.
25
26 $Log: pblhash.c,v $
27 Revision 1.18 2010/05/30 20:06:45 peter
28 Removed warnings found by 'Microsoft Visual C++ 2010'.
29
30 Revision 1.17 2009/10/20 21:08:00 peter
31 Added the pblHtCurrentKey function.
32
33 Revision 1.16 2009/03/15 21:29:29 peter
34 *** empty log message ***
35
36 Revision 1.15 2009/03/11 23:48:44 peter
37 More tests and clean up.
38
39 Revision 1.14 2009/03/08 20:56:50 peter
40 port to gcc (Ubuntu 4.3.2-1ubuntu12) 4.3.2.
41 Exposing the hash set and tree set interfaces.
42
43
44 Revision 1.9 2009/02/03 16:40:14 peter
45 PBL vesion 1.04, optimizations,
46 MAC OS X port, port to Microsoft Visual C++ 2008 Express Edition,
47 exposing the array list and the linked list interface
48
49
50 Revision 1.3 2003/10/24 23:33:13 peter
51 using a faster hash function
52 using LRU on the hash bucket lists
53
54 Revision 1.2 2002/09/12 20:46:30 peter
55 added the isam file handling to the library
56
57 Revision 1.1 2002/09/05 13:45:01 peter
58 Initial revision
59 */
60
61 /*
62 * make sure "strings <exe> | grep Id | sort -u" shows the source file versions
63 */
64 char* pblhash_c_id = "$Id: pblhash.c,v 1.18 2010/05/30 20:06:45 peter Exp $";
65
66 #include <stdio.h>
67 #include <memory.h>
68
69 #ifndef __APPLE__
70 #include <stdlib.h>
71 #endif
72
73 #include "pbl.h"
74
75 /*****************************************************************************/
76 /* #defines */
77 /*****************************************************************************/
78 #define PBL_HASHTABLE_SIZE 1019
79
80 /*****************************************************************************/
81 /* typedefs */
82 /*****************************************************************************/
83
84 typedef struct pbl_hashitem_s
85 {
86 void * key;
87 size_t keylen;
88
89 void * data;
90
91 struct pbl_hashitem_s * next;
92 struct pbl_hashitem_s * prev;
93
94 struct pbl_hashitem_s * bucketnext;
95 struct pbl_hashitem_s * bucketprev;
96
97 } pbl_hashitem_t;
98
99 typedef struct pbl_hashbucket_s
100 {
101 pbl_hashitem_t * head;
102 pbl_hashitem_t * tail;
103
104 } pbl_hashbucket_t;
105
106 struct pbl_hashtable_s
107 {
108 char * magic;
109 int currentdeleted;
110 pbl_hashitem_t * head;
111 pbl_hashitem_t * tail;
112 pbl_hashitem_t * current;
113 pbl_hashbucket_t * buckets;
114
115 };
116 typedef struct pbl_hashtable_s pbl_hashtable_t;
117
118 /*****************************************************************************/
119 /* globals */
120 /*****************************************************************************/
121
122 /*****************************************************************************/
123 /* functions */
124 /*****************************************************************************/
125 #ifndef WIN32
126 /*
127 * The hash function used.
128 *
129 * Taken from the wikipedia page on hash function.
130 * Not used, included just for tests and reference.
131 */
pblHt_jenkins_one_at_a_time_hash(const unsigned char * key,size_t key_len)132 unsigned int pblHt_jenkins_one_at_a_time_hash(const unsigned char * key, size_t key_len)
133 {
134 unsigned int hash = 0;
135 size_t i;
136
137 for (i = 0; i < key_len; i++) {
138 hash += key[i];
139 hash += (hash << 10);
140 hash ^= (hash >> 6);
141 }
142 hash += (hash << 3);
143 hash ^= (hash >> 11);
144 hash += (hash << 15);
145 return hash;
146 }
147
148 #include <stdint.h>
149 #undef get16bits
150 #if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \
151 || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__)
152 #define get16bits(d) (*((const uint16_t *) (d)))
153 #endif
154
155 #if !defined (get16bits)
156 #define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8)\
157 +(uint32_t)(((const uint8_t *)(d))[0]) )
158 #endif
159
160 /*
161 * An alternative hash function.
162 *
163 * Copyright 2004-2008 by Paul Hsieh
164 * Not used, included just for tests and reference.
165 */
pblHt_SuperFastHash(const unsigned char * data,size_t len)166 uint32_t pblHt_SuperFastHash (const unsigned char * data, size_t len) {
167 uint32_t hash = len, tmp;
168 int rem;
169
170 if (len <= 0 || data == NULL) return 0;
171
172 rem = len & 3;
173 len >>= 2;
174
175 /* Main loop */
176 for (;len > 0; len--) {
177 hash += get16bits (data);
178 tmp = (get16bits (data+2) << 11) ^ hash;
179 hash = (hash << 16) ^ tmp;
180 data += 2*sizeof (uint16_t);
181 hash += hash >> 11;
182 }
183
184 /* Handle end cases */
185 switch (rem) {
186 case 3: hash += get16bits (data);
187 hash ^= hash << 16;
188 hash ^= data[sizeof (uint16_t)] << 18;
189 hash += hash >> 11;
190 break;
191 case 2: hash += get16bits (data);
192 hash ^= hash << 11;
193 hash += hash >> 17;
194 break;
195 case 1: hash += *data;
196 hash ^= hash << 10;
197 hash += hash >> 1;
198 }
199
200 /* Force "avalanching" of final 127 bits */
201 hash ^= hash << 3;
202 hash += hash >> 5;
203 hash ^= hash << 4;
204 hash += hash >> 17;
205 hash ^= hash << 25;
206 hash += hash >> 6;
207
208 return hash;
209 }
210
211 #endif
212
213 /*
214 * An alternative hash function.
215 *
216 * I took the code from the net, it contained the following:
217 *
218 * Author J. Zobel, April 2001.
219 * Permission to use this code is freely granted, provided that this
220 * statement is retained.
221 */
222
pblHt_J_Zobel_Hash(const unsigned char * key,size_t keylen)223 int pblHt_J_Zobel_Hash( const unsigned char * key, size_t keylen )
224 {
225 int ret = 104729;
226
227 for( ; keylen-- > 0; key++ )
228 {
229 ret ^= ( (ret << 5) + *key + (ret >> 2) );
230 }
231
232 return ( ret & 0x7fffffff );
233 }
234
235 /*
236 * Calculates the hash value of a buffer.
237 */
238
pblHtHashValue(const unsigned char * key,size_t keylen)239 int pblHtHashValue( const unsigned char * key, size_t keylen )
240 {
241 return ( pblHt_J_Zobel_Hash( key, keylen ) & 0x7fffffff );
242 }
243
244 /*
245 * Calculates the hash value of a '\o' terminated string
246 */
247
pblHtHashValueOfString(const unsigned char * key)248 int pblHtHashValueOfString( const unsigned char * key )
249 {
250 return ( pblHt_J_Zobel_Hash( key, strlen( (char*)key ) ) & 0x7fffffff );
251 }
252
253 /*
254 * Calculate index into hash table.
255 */
256
pblHtHashIndex(const unsigned char * key,size_t keylen)257 static int pblHtHashIndex( const unsigned char * key, size_t keylen )
258 {
259 return( pblHtHashValue( key, keylen ) % PBL_HASHTABLE_SIZE );
260 }
261
262 /**
263 * Create a new hash table.
264 *
265 * @return pblHashTable_t * retptr != NULL: A pointer to new hash table.
266 * @return pblHashTable_t * retptr == NULL: An error, see pbl_errno:
267 * <BR> PBL_ERROR_OUT_OF_MEMORY: Out of memory.
268 */
pblHtCreate(void)269 pblHashTable_t * pblHtCreate( void )
270 {
271 pbl_hashtable_t * ht;
272
273 ht = (pbl_hashtable_t *)pbl_malloc0( "pblHtCreate hashtable", sizeof( pbl_hashtable_t ) );
274 if( !ht )
275 {
276 return( 0 );
277 }
278
279 ht->buckets = (pbl_hashbucket_t *)pbl_malloc0( "pblHtCreate buckets",
280 sizeof( pbl_hashbucket_t ) * PBL_HASHTABLE_SIZE);
281 if( !ht->buckets )
282 {
283 PBL_FREE( ht );
284 return( 0 );
285 }
286
287 /*
288 * set the magic marker of the hashtable
289 */
290 ht->magic = pblhash_c_id;
291
292 return( ( pblHashTable_t * )ht );
293 }
294
295 /**
296 * Insert a key / data pair into a hash table.
297 *
298 * Only the pointer to the data is stored in the hash table,
299 * no space is malloced for the data!
300 *
301 * @return int ret == 0: Ok.
302 * @return int ret == -1: An error, see pbl_errno:
303 * <BR> PBL_ERROR_EXISTS - An item with the same key already exists.
304 * <BR> PBL_ERROR_OUT_OF_MEMORY - Out of memory.
305 */
306
pblHtInsert(pblHashTable_t * h,void * key,size_t keylen,void * dataptr)307 int pblHtInsert(
308 pblHashTable_t * h, /** Hash table to insert to */
309 void * key, /** Key to insert */
310 size_t keylen, /** Length of that key */
311 void * dataptr /** Dataptr to insert */
312 )
313 {
314 pbl_hashtable_t * ht = ( pbl_hashtable_t * )h;
315 pbl_hashbucket_t * bucket = 0;
316 pbl_hashitem_t * item = 0;
317
318 int hashval = pblHtHashIndex( key, keylen );
319
320 bucket = ht->buckets + hashval;
321
322 if( keylen < (size_t)1 )
323 {
324 /*
325 * the length of the key can not be smaller than 1
326 */
327 pbl_errno = PBL_ERROR_EXISTS;
328 return( -1 );
329 }
330
331 for( item = bucket->head; item; item = item->bucketnext )
332 {
333 if(( item->keylen == keylen ) && !memcmp( item->key, key, keylen ))
334 {
335 snprintf( pbl_errstr, PBL_ERRSTR_LEN,
336 "insert of duplicate item in hashtable\n" );
337 pbl_errno = PBL_ERROR_EXISTS;
338 return( -1 );
339 }
340 }
341
342 item = (pbl_hashitem_t *)pbl_malloc0( "pblHtInsert hashitem", sizeof( pbl_hashitem_t ) );
343 if( !item )
344 {
345 return( -1 );
346 }
347
348 item->key = pbl_memdup( "pblHtInsert item->key", key, keylen );
349 if( !item->key )
350 {
351 PBL_FREE( item );
352 return( -1 );
353 }
354 item->keylen = keylen;
355 item->data = dataptr;
356
357 /*
358 * link the item
359 */
360 PBL_LIST_PUSH( bucket->head, bucket->tail, item, bucketnext, bucketprev );
361 PBL_LIST_APPEND( ht->head, ht->tail, item, next, prev );
362
363 ht->current = item;
364 return( 0 );
365 }
366
367 /**
368 * Search for a key in a hash table.
369 *
370 * @return void * retptr != NULL: The pointer to data of item found.
371 * @return void * retptr == NULL: An error, see pbl_errno:
372 * <BR>PBL_ERROR_NOT_FOUND - No item found with the given key.
373 */
374
pblHtLookup(pblHashTable_t * h,void * key,size_t keylen)375 void * pblHtLookup(
376 pblHashTable_t * h, /** Hash table to search in */
377 void * key, /** Key to search */
378 size_t keylen /** Length of that key */
379 )
380 {
381 pbl_hashtable_t * ht = ( pbl_hashtable_t * )h;
382 pbl_hashbucket_t * bucket = 0;
383 pbl_hashitem_t * item = 0;
384
385 int hashval = pblHtHashIndex( key, keylen );
386
387 bucket = ht->buckets + hashval;
388
389 for( item = bucket->head; item; item = item->bucketnext )
390 {
391 if(( item->keylen == keylen ) && !memcmp( item->key, key, keylen ))
392 {
393 ht->current = item;
394 ht->currentdeleted = 0;
395
396 /*
397 * if the item is not the first in the chain
398 */
399 if( item != bucket->head )
400 {
401 /*
402 * make the item the first in the chain
403 */
404 PBL_LIST_UNLINK( bucket->head, bucket->tail, item,
405 bucketnext, bucketprev );
406 PBL_LIST_PUSH( bucket->head, bucket->tail, item,
407 bucketnext, bucketprev );
408 }
409
410 return( item->data );
411 }
412 }
413
414 pbl_errno = PBL_ERROR_NOT_FOUND;
415
416 return( 0 );
417 }
418
419 /**
420 * Get data of first key in hash table.
421 *
422 * This call and \Ref{pblHtNext} can be used in order to loop through all items
423 * stored in a hash table.
424 *
425 * <PRE>
426 Example:
427
428 for( data = pblHtFirst( h ); data; data = pblHtNext( h ))
429 {
430 do something with the data pointer
431 }
432 </PRE>
433
434 * @return void * retptr != NULL: The pointer to data of first item.
435 * @return void * retptr == NULL: An error, see pbl_errno:
436 * <BR>PBL_ERROR_NOT_FOUND - The hash table is empty.
437 */
438
pblHtFirst(pblHashTable_t * h)439 void * pblHtFirst(
440 pblHashTable_t * h /** Hash table to look in */
441 )
442 {
443 pbl_hashtable_t * ht = ( pbl_hashtable_t * )h;
444 pbl_hashitem_t * item = 0;
445
446 item = ht->head;
447 if( item )
448 {
449 ht->current = item;
450 ht->currentdeleted = 0;
451 return( item->data );
452 }
453
454 pbl_errno = PBL_ERROR_NOT_FOUND;
455 return( 0 );
456 }
457
458 /**
459 * Get data of next key in hash table.
460 *
461 * This call and \Ref{pblHtFirst} can be used in order to loop through all items
462 * stored in a hash table.
463 *
464 * <PRE>
465 Example:
466
467 for( data = pblHtFirst( h ); data; data = pblHtNext( h ))
468 {
469 do something with the data pointer
470 }
471 </PRE>
472
473 * @return void * retptr != NULL: The pointer to data of next item.
474 * @return void * retptr == NULL: An error, see pbl_errno:
475 * <BR>PBL_ERROR_NOT_FOUND - There is no next item in the hash table.
476 */
477
pblHtNext(pblHashTable_t * h)478 void * pblHtNext(
479 pblHashTable_t * h /** Hash table to look in */
480 )
481 {
482 pbl_hashtable_t * ht = ( pbl_hashtable_t * )h;
483 pbl_hashitem_t * item = 0;
484
485 if( ht->current )
486 {
487 if( ht->currentdeleted )
488 {
489 item = ht->current;
490 }
491 else
492 {
493 item = ht->current->next;
494 }
495 ht->currentdeleted = 0;
496 }
497 if( item )
498 {
499 ht->current = item;
500 return( item->data );
501 }
502
503 pbl_errno = PBL_ERROR_NOT_FOUND;
504 return( 0 );
505 }
506
507 /**
508 * Get key of current item in hash table.
509 *
510 * Parameter keylen is optional, if it is given, it will
511 * be set to the length of the key returned.
512 *
513 * @return void * retptr != NULL: The pointer to the key of the current item.
514 * @return void * retptr == NULL: An error, see pbl_errno:
515 * <BR>PBL_ERROR_NOT_FOUND - There is no current item in the hash table.
516 */
517
pblHtCurrentKey(pblHashTable_t * h,size_t * keylen)518 void * pblHtCurrentKey(
519 pblHashTable_t * h, /** Hash table to look in */
520 size_t * keylen /** OPT: Length of the key on return */
521 )
522 {
523 pbl_hashtable_t * ht = ( pbl_hashtable_t * )h;
524
525 if( ht->current )
526 {
527 if( keylen )
528 {
529 *keylen = ht->current->keylen;
530 }
531 return( ht->current->key );
532 }
533
534 pbl_errno = PBL_ERROR_NOT_FOUND;
535 return( 0 );
536 }
537
538 /**
539 * Get data of current key in hash table.
540 *
541 * @return void * retptr != NULL: The pointer to data of current item.
542 * @return void * retptr == NULL: An error, see pbl_errno:
543 * <BR>PBL_ERROR_NOT_FOUND - There is no current item in the hash table.
544 */
545
pblHtCurrent(pblHashTable_t * h)546 void * pblHtCurrent(
547 pblHashTable_t * h /** Hash table to look in */
548 )
549 {
550 pbl_hashtable_t * ht = ( pbl_hashtable_t * )h;
551
552 if( ht->current )
553 {
554 return( ht->current->data );
555 }
556
557 pbl_errno = PBL_ERROR_NOT_FOUND;
558 return( 0 );
559 }
560
561 /**
562 * Remove an item from the hash table.
563 *
564 * Parameters key and keylen are optional, if they are not given
565 * the current record is deleted
566 *
567 * If the current record is removed the pointer to the current record
568 * is moved to the next record.
569 *
570 * <PRE>
571 Example:
572
573 for( data = pblHtFirst( h ); data; data = pblHtRemove( h, 0, 0 ))
574 {
575 this loop removes all items from a hash table
576 }
577 </PRE>
578 *
579 * If the current record is moved by this function the next call to
580 * \Ref{pblHtNext} will return the data of the then current record.
581 * Therefore the following code does what is expected:
582 * It visits all items of the hash table and removes the expired ones.
583 *
584 * <PRE>
585 for( data = pblHtFirst( h ); data; data = pblHtNext( h ))
586 {
587 if( needs to be deleted( data ))
588 {
589 pblHtRemove( h, 0, 0 );
590 }
591 }
592 </PRE>
593
594 * @return int ret == 0: Ok.
595 * @return int ret == -1: An error, see pbl_errno:
596 * <BR>PBL_ERROR_NOT_FOUND - The current item is not positioned
597 * or there is no item with the given key.
598 */
599
pblHtRemove(pblHashTable_t * h,void * key,size_t keylen)600 int pblHtRemove(
601 pblHashTable_t * h, /** Hash table to remove from */
602 void * key, /** OPT: Key to remove */
603 size_t keylen /** OPT: Length of that key */
604 )
605 {
606 pbl_hashtable_t * ht = ( pbl_hashtable_t * )h;
607 pbl_hashbucket_t * bucket = 0;
608 pbl_hashitem_t * item = 0;
609
610 int hashval = 0;
611
612 if( keylen && key )
613 {
614 hashval = pblHtHashIndex( key, keylen );
615 bucket = ht->buckets + hashval;
616
617 for( item = bucket->head; item; item = item->bucketnext )
618 {
619 if(( item->keylen == keylen ) && !memcmp( item->key, key, keylen ))
620 {
621 break;
622 }
623 }
624 }
625 else
626 {
627 item = ht->current;
628
629 if( item )
630 {
631 hashval = pblHtHashIndex( item->key, item->keylen );
632 bucket = ht->buckets + hashval;
633 }
634 }
635
636 if( item )
637 {
638 if( item == ht->current )
639 {
640 ht->currentdeleted = 1;
641 ht->current = item->next;
642 }
643
644 /*
645 * unlink the item
646 */
647 PBL_LIST_UNLINK( bucket->head, bucket->tail, item,
648 bucketnext, bucketprev );
649 PBL_LIST_UNLINK( ht->head, ht->tail, item, next, prev );
650
651 PBL_FREE( item->key );
652 PBL_FREE( item );
653 return( 0 );
654 }
655
656 pbl_errno = PBL_ERROR_NOT_FOUND;
657 return( -1 );
658 }
659
660 /**
661 * Delete a hash table.
662 *
663 * The hash table has to be empty!
664 *
665 * @return int ret == 0: Ok.
666 * @return int ret == -1: An error, see pbl_errno:
667 * @return PBL_ERROR_EXISTS - The hash table is not empty.
668 */
669
pblHtDelete(pblHashTable_t * h)670 int pblHtDelete(
671 pblHashTable_t * h /** Hash table to delete */
672 )
673 {
674 pbl_hashtable_t * ht = ( pbl_hashtable_t * )h;
675
676 if( ht->head )
677 {
678 pbl_errno = PBL_ERROR_EXISTS;
679 return( -1 );
680 }
681
682 PBL_FREE( ht->buckets );
683 PBL_FREE( ht );
684
685 return( 0 );
686 }
687
688