1 /*
2  pblhash.c - hash table implementation
3 
4  Copyright (C) 2002 - 2009   Peter Graf
5 
6    This file is part of PBL - The Program Base Library.
7    PBL is free software.
8 
9     This library is free software; you can redistribute it and/or
10     modify it under the terms of the GNU Lesser General Public
11     License as published by the Free Software Foundation; either
12     version 2.1 of the License, or (at your option) any later version.
13 
14     This library is distributed in the hope that it will be useful,
15     but WITHOUT ANY WARRANTY; without even the implied warranty of
16     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17     Lesser General Public License for more details.
18 
19     You should have received a copy of the GNU Lesser General Public
20     License along with this library; if not, write to the Free Software
21     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
22 
23    For more information on the Program Base Library or Peter Graf,
24    please see: http://www.mission-base.com/.
25 
26     $Log: pblhash.c,v $
27     Revision 1.18  2010/05/30 20:06:45  peter
28     Removed warnings found by 'Microsoft Visual C++ 2010'.
29 
30     Revision 1.17  2009/10/20 21:08:00  peter
31     Added the pblHtCurrentKey function.
32 
33     Revision 1.16  2009/03/15 21:29:29  peter
34     *** empty log message ***
35 
36     Revision 1.15  2009/03/11 23:48:44  peter
37     More tests and clean up.
38 
39     Revision 1.14  2009/03/08 20:56:50  peter
40     port to gcc (Ubuntu 4.3.2-1ubuntu12) 4.3.2.
41     Exposing the hash set and tree set interfaces.
42 
43 
44     Revision 1.9  2009/02/03 16:40:14  peter
45     PBL vesion 1.04, optimizations,
46     MAC OS X port, port to Microsoft Visual C++ 2008 Express Edition,
47     exposing the array list and the linked list interface
48 
49 
50     Revision 1.3  2003/10/24 23:33:13  peter
51     using a faster hash function
52     using LRU on the hash bucket lists
53 
54     Revision 1.2  2002/09/12 20:46:30  peter
55     added the isam file handling to the library
56 
57     Revision 1.1  2002/09/05 13:45:01  peter
58     Initial revision
59 */
60 
61 /*
62  * make sure "strings <exe> | grep Id | sort -u" shows the source file versions
63  */
64 char* pblhash_c_id = "$Id: pblhash.c,v 1.18 2010/05/30 20:06:45 peter Exp $";
65 
66 #include <stdio.h>
67 #include <memory.h>
68 
69 #ifndef __APPLE__
70 #include <stdlib.h>
71 #endif
72 
73 #include "pbl.h"
74 
75 /*****************************************************************************/
76 /* #defines                                                                  */
77 /*****************************************************************************/
78 #define PBL_HASHTABLE_SIZE      1019
79 
80 /*****************************************************************************/
81 /* typedefs                                                                  */
82 /*****************************************************************************/
83 
84 typedef struct pbl_hashitem_s
85 {
86     void                  * key;
87     size_t                  keylen;
88 
89     void                  * data;
90 
91     struct pbl_hashitem_s * next;
92     struct pbl_hashitem_s * prev;
93 
94     struct pbl_hashitem_s * bucketnext;
95     struct pbl_hashitem_s * bucketprev;
96 
97 } pbl_hashitem_t;
98 
99 typedef struct pbl_hashbucket_s
100 {
101     pbl_hashitem_t * head;
102     pbl_hashitem_t * tail;
103 
104 } pbl_hashbucket_t;
105 
106 struct pbl_hashtable_s
107 {
108     char             * magic;
109     int                currentdeleted;
110     pbl_hashitem_t   * head;
111     pbl_hashitem_t   * tail;
112     pbl_hashitem_t   * current;
113     pbl_hashbucket_t * buckets;
114 
115 };
116 typedef struct pbl_hashtable_s pbl_hashtable_t;
117 
118 /*****************************************************************************/
119 /* globals                                                                   */
120 /*****************************************************************************/
121 
122 /*****************************************************************************/
123 /* functions                                                                 */
124 /*****************************************************************************/
125 #ifndef WIN32
126 /*
127  * The hash function used.
128  *
129  * Taken from the wikipedia page on hash function.
130  * Not used, included just for tests and reference.
131  */
pblHt_jenkins_one_at_a_time_hash(const unsigned char * key,size_t key_len)132 unsigned int pblHt_jenkins_one_at_a_time_hash(const unsigned char * key, size_t key_len)
133 {
134     unsigned int hash = 0;
135     size_t i;
136 
137     for (i = 0; i < key_len; i++) {
138         hash += key[i];
139         hash += (hash << 10);
140         hash ^= (hash >> 6);
141     }
142     hash += (hash << 3);
143     hash ^= (hash >> 11);
144     hash += (hash << 15);
145     return hash;
146 }
147 
148 #include <stdint.h>
149 #undef get16bits
150 #if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \
151   || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__)
152 #define get16bits(d) (*((const uint16_t *) (d)))
153 #endif
154 
155 #if !defined (get16bits)
156 #define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8)\
157                        +(uint32_t)(((const uint8_t *)(d))[0]) )
158 #endif
159 
160 /*
161  * An alternative hash function.
162  *
163  * Copyright 2004-2008 by Paul Hsieh
164  * Not used, included just for tests and reference.
165  */
pblHt_SuperFastHash(const unsigned char * data,size_t len)166 uint32_t pblHt_SuperFastHash (const unsigned char * data, size_t len) {
167 uint32_t hash = len, tmp;
168 int rem;
169 
170     if (len <= 0 || data == NULL) return 0;
171 
172     rem = len & 3;
173     len >>= 2;
174 
175     /* Main loop */
176     for (;len > 0; len--) {
177         hash  += get16bits (data);
178         tmp    = (get16bits (data+2) << 11) ^ hash;
179         hash   = (hash << 16) ^ tmp;
180         data  += 2*sizeof (uint16_t);
181         hash  += hash >> 11;
182     }
183 
184     /* Handle end cases */
185     switch (rem) {
186         case 3: hash += get16bits (data);
187                 hash ^= hash << 16;
188                 hash ^= data[sizeof (uint16_t)] << 18;
189                 hash += hash >> 11;
190                 break;
191         case 2: hash += get16bits (data);
192                 hash ^= hash << 11;
193                 hash += hash >> 17;
194                 break;
195         case 1: hash += *data;
196                 hash ^= hash << 10;
197                 hash += hash >> 1;
198     }
199 
200     /* Force "avalanching" of final 127 bits */
201     hash ^= hash << 3;
202     hash += hash >> 5;
203     hash ^= hash << 4;
204     hash += hash >> 17;
205     hash ^= hash << 25;
206     hash += hash >> 6;
207 
208     return hash;
209 }
210 
211 #endif
212 
213 /*
214  * An alternative hash function.
215  *
216  * I took the code from the net, it contained the following:
217  *
218  * Author J. Zobel, April 2001.
219  * Permission to use this code is freely granted, provided that this
220  * statement is retained.
221  */
222 
pblHt_J_Zobel_Hash(const unsigned char * key,size_t keylen)223 int pblHt_J_Zobel_Hash( const unsigned char * key, size_t keylen )
224 {
225     int ret = 104729;
226 
227     for( ; keylen-- > 0; key++ )
228     {
229         ret ^= ( (ret << 5) + *key + (ret >> 2) );
230     }
231 
232     return ( ret & 0x7fffffff );
233 }
234 
235 /*
236  * Calculates the hash value of a buffer.
237  */
238 
pblHtHashValue(const unsigned char * key,size_t keylen)239 int pblHtHashValue( const unsigned char * key, size_t keylen )
240 {
241     return ( pblHt_J_Zobel_Hash( key, keylen ) & 0x7fffffff );
242 }
243 
244 /*
245  * Calculates the hash value of a '\o' terminated string
246  */
247 
pblHtHashValueOfString(const unsigned char * key)248 int pblHtHashValueOfString( const unsigned char * key )
249 {
250     return ( pblHt_J_Zobel_Hash( key, strlen( (char*)key ) ) & 0x7fffffff );
251 }
252 
253 /*
254  * Calculate index into hash table.
255  */
256 
pblHtHashIndex(const unsigned char * key,size_t keylen)257 static int pblHtHashIndex( const unsigned char * key, size_t keylen )
258 {
259     return( pblHtHashValue( key, keylen ) % PBL_HASHTABLE_SIZE );
260 }
261 
262 /**
263  * Create a new hash table.
264  *
265  * @return pblHashTable_t * retptr != NULL: A pointer to new hash table.
266  * @return pblHashTable_t * retptr == NULL: An error, see pbl_errno:
267  * <BR>    PBL_ERROR_OUT_OF_MEMORY: Out of memory.
268  */
pblHtCreate(void)269 pblHashTable_t * pblHtCreate( void )
270 {
271     pbl_hashtable_t * ht;
272 
273     ht = (pbl_hashtable_t *)pbl_malloc0( "pblHtCreate hashtable", sizeof( pbl_hashtable_t ) );
274     if( !ht )
275     {
276         return( 0 );
277     }
278 
279     ht->buckets = (pbl_hashbucket_t *)pbl_malloc0( "pblHtCreate buckets",
280                                sizeof( pbl_hashbucket_t ) * PBL_HASHTABLE_SIZE);
281     if( !ht->buckets )
282     {
283         PBL_FREE( ht );
284         return( 0 );
285     }
286 
287     /*
288      * set the magic marker of the hashtable
289      */
290     ht->magic = pblhash_c_id;
291 
292     return( ( pblHashTable_t * )ht );
293 }
294 
295 /**
296  * Insert a key / data pair into a hash table.
297  *
298  * Only the pointer to the data is stored in the hash table,
299  * no space is malloced for the data!
300  *
301  * @return  int ret == 0: Ok.
302  * @return  int ret == -1: An error, see pbl_errno:
303  * <BR>    PBL_ERROR_EXISTS - An item with the same key already exists.
304  * <BR>    PBL_ERROR_OUT_OF_MEMORY - Out of memory.
305  */
306 
pblHtInsert(pblHashTable_t * h,void * key,size_t keylen,void * dataptr)307 int pblHtInsert(
308 pblHashTable_t          * h,      /** Hash table to insert to             */
309 void                    * key,    /** Key to insert                       */
310 size_t                    keylen, /** Length of that key                  */
311 void                    * dataptr /** Dataptr to insert                   */
312 )
313 {
314     pbl_hashtable_t  * ht = ( pbl_hashtable_t * )h;
315     pbl_hashbucket_t * bucket = 0;
316     pbl_hashitem_t   * item = 0;
317 
318     int                hashval = pblHtHashIndex( key, keylen );
319 
320     bucket = ht->buckets + hashval;
321 
322     if( keylen < (size_t)1 )
323     {
324         /*
325          * the length of the key can not be smaller than 1
326          */
327         pbl_errno = PBL_ERROR_EXISTS;
328         return( -1 );
329     }
330 
331     for( item = bucket->head; item; item = item->bucketnext )
332     {
333         if(( item->keylen == keylen ) && !memcmp( item->key, key, keylen ))
334         {
335             snprintf( pbl_errstr, PBL_ERRSTR_LEN,
336                       "insert of duplicate item in hashtable\n" );
337             pbl_errno = PBL_ERROR_EXISTS;
338             return( -1 );
339         }
340     }
341 
342     item = (pbl_hashitem_t *)pbl_malloc0( "pblHtInsert hashitem", sizeof( pbl_hashitem_t ) );
343     if( !item )
344     {
345         return( -1 );
346     }
347 
348     item->key = pbl_memdup( "pblHtInsert item->key", key, keylen );
349     if( !item->key )
350     {
351         PBL_FREE( item );
352         return( -1 );
353     }
354     item->keylen = keylen;
355     item->data = dataptr;
356 
357     /*
358      * link the item
359      */
360     PBL_LIST_PUSH( bucket->head, bucket->tail, item, bucketnext, bucketprev );
361     PBL_LIST_APPEND( ht->head, ht->tail, item, next, prev );
362 
363     ht->current = item;
364     return( 0 );
365 }
366 
367 /**
368  * Search for a key in a hash table.
369  *
370  * @return void * retptr != NULL: The pointer to data of item found.
371  * @return void * retptr == NULL: An error, see pbl_errno:
372  * <BR>PBL_ERROR_NOT_FOUND - No item found with the given key.
373  */
374 
pblHtLookup(pblHashTable_t * h,void * key,size_t keylen)375 void * pblHtLookup(
376 pblHashTable_t              * h,      /** Hash table to search in          */
377 void                        * key,    /** Key to search                    */
378 size_t                        keylen  /** Length of that key               */
379 )
380 {
381     pbl_hashtable_t  * ht = ( pbl_hashtable_t * )h;
382     pbl_hashbucket_t * bucket = 0;
383     pbl_hashitem_t   * item = 0;
384 
385     int                hashval = pblHtHashIndex( key, keylen );
386 
387     bucket = ht->buckets + hashval;
388 
389     for( item = bucket->head; item; item = item->bucketnext )
390     {
391         if(( item->keylen == keylen ) && !memcmp( item->key, key, keylen ))
392         {
393             ht->current = item;
394             ht->currentdeleted = 0;
395 
396             /*
397              * if the item is not the first in the chain
398              */
399             if( item != bucket->head )
400             {
401                 /*
402                  * make the item the first in the chain
403                  */
404                 PBL_LIST_UNLINK( bucket->head, bucket->tail, item,
405                                  bucketnext, bucketprev );
406                 PBL_LIST_PUSH( bucket->head, bucket->tail, item,
407                                bucketnext, bucketprev );
408             }
409 
410             return( item->data );
411         }
412     }
413 
414     pbl_errno = PBL_ERROR_NOT_FOUND;
415 
416     return( 0 );
417 }
418 
419 /**
420  * Get data of first key in hash table.
421  *
422  * This call and \Ref{pblHtNext} can be used in order to loop through all items
423  * stored in a hash table.
424  *
425  * <PRE>
426    Example:
427 
428    for( data = pblHtFirst( h ); data; data = pblHtNext( h ))
429    {
430        do something with the data pointer
431    }
432    </PRE>
433 
434  * @return void * retptr != NULL: The pointer to data of first item.
435  * @return void * retptr == NULL: An error, see pbl_errno:
436  * <BR>PBL_ERROR_NOT_FOUND - The hash table is empty.
437  */
438 
pblHtFirst(pblHashTable_t * h)439 void * pblHtFirst(
440 pblHashTable_t              * h       /** Hash table to look in            */
441 )
442 {
443     pbl_hashtable_t  * ht = ( pbl_hashtable_t * )h;
444     pbl_hashitem_t   * item = 0;
445 
446     item = ht->head;
447     if( item )
448     {
449         ht->current = item;
450         ht->currentdeleted = 0;
451         return( item->data );
452     }
453 
454     pbl_errno = PBL_ERROR_NOT_FOUND;
455     return( 0 );
456 }
457 
458 /**
459  * Get data of next key in hash table.
460  *
461  * This call and \Ref{pblHtFirst} can be used in order to loop through all items
462  * stored in a hash table.
463  *
464  * <PRE>
465    Example:
466 
467    for( data = pblHtFirst( h ); data; data = pblHtNext( h ))
468    {
469        do something with the data pointer
470    }
471    </PRE>
472 
473  * @return void * retptr != NULL: The pointer to data of next item.
474  * @return void * retptr == NULL: An error, see pbl_errno:
475  * <BR>PBL_ERROR_NOT_FOUND - There is no next item in the hash table.
476  */
477 
pblHtNext(pblHashTable_t * h)478 void * pblHtNext(
479 pblHashTable_t              * h       /** Hash table to look in            */
480 )
481 {
482     pbl_hashtable_t  * ht = ( pbl_hashtable_t * )h;
483     pbl_hashitem_t   * item = 0;
484 
485     if( ht->current )
486     {
487         if( ht->currentdeleted )
488         {
489             item = ht->current;
490         }
491         else
492         {
493             item = ht->current->next;
494         }
495         ht->currentdeleted = 0;
496     }
497     if( item )
498     {
499         ht->current = item;
500         return( item->data );
501     }
502 
503     pbl_errno = PBL_ERROR_NOT_FOUND;
504     return( 0 );
505 }
506 
507 /**
508  * Get key of current item in hash table.
509  *
510  * Parameter keylen is optional, if it is given, it will
511  * be set to the length of the key returned.
512  *
513  * @return void * retptr != NULL: The pointer to the key of the current item.
514  * @return void * retptr == NULL: An error, see pbl_errno:
515  * <BR>PBL_ERROR_NOT_FOUND - There is no current item in the hash table.
516  */
517 
pblHtCurrentKey(pblHashTable_t * h,size_t * keylen)518 void * pblHtCurrentKey(
519 pblHashTable_t          * h,      /** Hash table to look in               */
520 size_t                  * keylen  /** OPT: Length of the key on return    */
521 )
522 {
523     pbl_hashtable_t  * ht = ( pbl_hashtable_t * )h;
524 
525     if( ht->current )
526     {
527         if( keylen )
528         {
529             *keylen = ht->current->keylen;
530         }
531         return( ht->current->key );
532     }
533 
534     pbl_errno = PBL_ERROR_NOT_FOUND;
535     return( 0 );
536 }
537 
538 /**
539  * Get data of current key in hash table.
540  *
541  * @return void * retptr != NULL: The pointer to data of current item.
542  * @return void * retptr == NULL: An error, see pbl_errno:
543  * <BR>PBL_ERROR_NOT_FOUND - There is no current item in the hash table.
544  */
545 
pblHtCurrent(pblHashTable_t * h)546 void * pblHtCurrent(
547 pblHashTable_t              * h       /** Hash table to look in            */
548 )
549 {
550     pbl_hashtable_t  * ht = ( pbl_hashtable_t * )h;
551 
552     if( ht->current )
553     {
554         return( ht->current->data );
555     }
556 
557     pbl_errno = PBL_ERROR_NOT_FOUND;
558     return( 0 );
559 }
560 
561 /**
562  * Remove an item from the hash table.
563  *
564  * Parameters key and keylen are optional, if they are not given
565  * the current record is deleted
566  *
567  * If the current record is removed the pointer to the current record
568  * is moved to the next record.
569  *
570  * <PRE>
571    Example:
572 
573    for( data = pblHtFirst( h ); data; data = pblHtRemove( h, 0, 0 ))
574    {
575        this loop removes all items from a hash table
576    }
577    </PRE>
578  *
579  * If the current record is moved by this function the next call to
580  * \Ref{pblHtNext} will return the data of the then current record.
581  * Therefore the following code does what is expected:
582  * It visits all items of the hash table and removes the expired ones.
583  *
584  * <PRE>
585    for( data = pblHtFirst( h ); data; data = pblHtNext( h ))
586    {
587        if( needs to be deleted( data ))
588        {
589            pblHtRemove( h, 0, 0 );
590        }
591    }
592    </PRE>
593 
594  * @return int ret == 0: Ok.
595  * @return int ret == -1: An error, see pbl_errno:
596  * <BR>PBL_ERROR_NOT_FOUND - The current item is not positioned
597  *                          or there is no item with the given key.
598  */
599 
pblHtRemove(pblHashTable_t * h,void * key,size_t keylen)600 int pblHtRemove(
601 pblHashTable_t            * h,     /** Hash table to remove from           */
602 void                      * key,   /** OPT: Key to remove                  */
603 size_t                      keylen /** OPT: Length of that key             */
604 )
605 {
606     pbl_hashtable_t  * ht = ( pbl_hashtable_t * )h;
607     pbl_hashbucket_t * bucket = 0;
608     pbl_hashitem_t   * item = 0;
609 
610     int                hashval = 0;
611 
612     if( keylen && key )
613     {
614         hashval = pblHtHashIndex( key, keylen );
615         bucket = ht->buckets + hashval;
616 
617         for( item = bucket->head; item; item = item->bucketnext )
618         {
619             if(( item->keylen == keylen ) && !memcmp( item->key, key, keylen ))
620             {
621                 break;
622             }
623         }
624     }
625     else
626     {
627         item = ht->current;
628 
629         if( item )
630         {
631             hashval = pblHtHashIndex( item->key, item->keylen );
632             bucket = ht->buckets + hashval;
633         }
634     }
635 
636     if( item )
637     {
638         if( item == ht->current )
639         {
640             ht->currentdeleted = 1;
641             ht->current = item->next;
642         }
643 
644         /*
645          * unlink the item
646          */
647         PBL_LIST_UNLINK( bucket->head, bucket->tail, item,
648                          bucketnext, bucketprev );
649         PBL_LIST_UNLINK( ht->head, ht->tail, item, next, prev );
650 
651         PBL_FREE( item->key );
652         PBL_FREE( item );
653         return( 0 );
654     }
655 
656     pbl_errno = PBL_ERROR_NOT_FOUND;
657     return( -1 );
658 }
659 
660 /**
661  * Delete a hash table.
662  *
663  * The hash table has to be empty!
664  *
665  * @return int ret == 0: Ok.
666  * @return int ret == -1: An error, see pbl_errno:
667  * @return     PBL_ERROR_EXISTS - The hash table is not empty.
668  */
669 
pblHtDelete(pblHashTable_t * h)670 int pblHtDelete(
671 pblHashTable_t * h        /** Hash table to delete */
672 )
673 {
674     pbl_hashtable_t  * ht = ( pbl_hashtable_t * )h;
675 
676     if( ht->head )
677     {
678         pbl_errno = PBL_ERROR_EXISTS;
679         return( -1 );
680     }
681 
682     PBL_FREE( ht->buckets );
683     PBL_FREE( ht );
684 
685     return( 0 );
686 }
687 
688