1  /************************************************************************/
2  /*                                                                      */
3  /*                Centre for Speech Technology Research                 */
4  /*                     University of Edinburgh, UK                      */
5  /*                       Copyright (c) 1996,1997                        */
6  /*                        All Rights Reserved.                          */
7  /*                                                                      */
8  /*  Permission is hereby granted, free of charge, to use and distribute */
9  /*  this software and its documentation without restriction, including  */
10  /*  without limitation the rights to use, copy, modify, merge, publish, */
11  /*  distribute, sublicense, and/or sell copies of this work, and to     */
12  /*  permit persons to whom this work is furnished to do so, subject to  */
13  /*  the following conditions:                                           */
14  /*   1. The code must retain the above copyright notice, this list of   */
15  /*      conditions and the following disclaimer.                        */
16  /*   2. Any modifications must be clearly marked as such.               */
17  /*   3. Original authors' names are not deleted.                        */
18  /*   4. The authors' names are not used to endorse or promote products  */
19  /*      derived from this software without specific prior written       */
20  /*      permission.                                                     */
21  /*                                                                      */
22  /*  THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK       */
23  /*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING     */
24  /*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT  */
25  /*  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE    */
26  /*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES   */
27  /*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN  */
28  /*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,         */
29  /*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF      */
30  /*  THIS SOFTWARE.                                                      */
31  /*                                                                      */
32  /************************************************************************/
33  /*                 Author: Richard Caley (rjc@cstr.ed.ac.uk)            */
34  /************************************************************************/
35 
36 #ifndef __EST_THASH_H__
37 #define __EST_THASH_H__
38 
39 #include <iostream>
40 
41 using namespace std;
42 
43 #include "EST_String.h"
44 #include "EST_system.h"
45 #include "EST_bool.h"
46 #include "EST_TIterator.h"
47 
48 #include "instantiate/EST_THashI.h"
49 #include "instantiate/EST_TStringHashI.h"
50 
51 /**@name Hash Tables
52   *
53   * @author Richard Caley <rjc@cstr.ed.ac.uk>
54   * @version $Id: EST_THash.h,v 1.6 2004/09/29 08:24:17 robert Exp $
55   */
56 //@{
57 
58 /** This is just a convenient place to put some useful hash functions.
59   */
60 class EST_HashFunctions {
61 public:
62   /// A generally useful hash function.
63   static unsigned int DefaultHash(const void *data, size_t size, unsigned int n);
64 
65   /// A hash function for strings.
66   static  unsigned int StringHash(const EST_String &key, unsigned int size);
67 };
68 
69 template<class K, class V>  class EST_THash;
70 
71 /** This class is used in hash tables to hold a key value pair.
72   * Not much to say beyond that.
73   */
74 template<class K, class V>
75 class EST_Hash_Pair {
76 public:
77   /// The key
78   K k;
79   /// The value
80   V v;
81 
82 private:
83   /// Pointer used to chain entries into buckets.
84   EST_Hash_Pair<K,V> *next;
85 
86   /// The hash table must be a friend so it can see the pointer.
87   friend class EST_THash<K, V>;
88 };
89 
90 /** An open hash table. The number of buckets should be set to allow
91   * enough space that there are relatively few entries per bucket on
92   * average.
93   */
94 
95 template<class K, class V>
96 class EST_THash : protected EST_HashFunctions {
97 
98 private:
99   /// Something to return when there is no entry in the table.
100   static V Dummy_Value;
101   static K Dummy_Key;
102 
103   /// Total number of entries.
104   unsigned int p_num_entries;
105 
106   /// Number of buckets.
107   unsigned int p_num_buckets;
108 
109   /// Pointer to an array of <variable>p_num_buckets</variable>buckets.
110   EST_Hash_Pair<K,V> **p_buckets;
111 
112   /// The hash function to use on this table.
113   unsigned int (*p_hash_function)(const K &key, unsigned int size);
114 
115 public:
116   /** Create a table with the given number of buckets. Optionally setting
117     * a custom hash function.
118     */
119   EST_THash(int size,
120 	    unsigned int (*hash_function)(const K &key, unsigned int size)= NULL);
121 
122   /// Create a copy
123   EST_THash(const EST_THash<K,V> &from);
124 
125   /// Destroy the table.
126   ~EST_THash(void);
127 
128   /// Empty the table.
129   void clear(void);
130 
131   /// Return the total number of entries in the table.
num_entries(void)132   unsigned int num_entries(void) const
133     { return p_num_entries; };
134 
135   /// Does the key have an entry?
136   int present(const K &key) const;
137 
138   /** Return the value associated with the key.
139     * <parameter>found</parameter> is set to whether such an entry was found.
140     */
141   V &val(const K &key, int &found) const;
142 
143   /// Return the value associated with the key.
val(const K & key)144   V &val(const K &key) const {int x; return val(key, x); }
145 
146   const K &key(const V &val, int &found) const;
key(const V & val)147   const K &key(const V &val) const {int x; return key(val, x); }
148 
149   /// Copy all entries
150   void copy(const EST_THash<K,V> &from);
151 
152   /// Apply <parameter>func</parameter> to each entry in the table.
153   void map(void (*func)(K&, V&));
154 
155   /// Add an entry to the table.
156   int add_item(const K &key, const V &value, int no_search = 0);
157 
158   /// Remove an entry from the table.
159   int remove_item(const K &rkey, int quiet = 0);
160 
161   /// Assignment is a copy operation
162   EST_THash<K,V> &operator = (const EST_THash<K,V> &from);
163 
164   /// Print the table to <parameter>stream</parameter> in a human readable format.
165   void dump(ostream &stream, int all=0);
166 
167   /**@name Pair Iteration
168     *
169     * This iterator steps through the table returning key-value pairs.
170     */
171   //@{
172 protected:
173   /** A position in the table is given by a bucket number and a
174     * pointer into the bucket.
175     */
176     // struct IPointer{  unsigned int b; EST_Hash_Pair<K, V> *p; };
177     struct IPointer_s{  unsigned int b; EST_Hash_Pair<K, V> *p; };
178 
179     typedef struct IPointer_s IPointer;
180 
181   /// Shift to point at something.
skip_blank(IPointer & ip)182   void skip_blank(IPointer &ip) const
183     {
184       while (ip.p==NULL && ip.b<p_num_buckets)
185 	{ip.b++; ip.p = ip.b<p_num_buckets?p_buckets[ip.b]:0; }
186     }
187 
188   /// Go to start of the table.
point_to_first(IPointer & ip)189   void point_to_first(IPointer &ip) const
190     { ip.b=0; ip.p = ip.b<p_num_buckets?p_buckets[ip.b]:0;
191     skip_blank(ip);}
192 
193   /// Move pointer forwards, at the end of the bucket, move down.
move_pointer_forwards(IPointer & ip)194   void move_pointer_forwards(IPointer &ip) const
195     {
196       ip.p = ip.p->next;
197       skip_blank(ip);
198     }
199 
200   /// We are at the end if the pointer ever becomes NULL
points_to_something(const IPointer & ip)201   bool points_to_something(const IPointer &ip) const { return ip.b<p_num_buckets; }
202 
203   /// Return the contents of this entry.
points_at(const IPointer & ip)204   EST_Hash_Pair<K, V> &points_at(const IPointer &ip) { return *(ip.p); }
205 
206   /// The iterator must be a friend to access this private interface.
207   friend class EST_TStructIterator< EST_THash<K, V>, IPointer, EST_Hash_Pair<K, V> >;
208   friend class EST_TRwStructIterator< EST_THash<K, V>, IPointer, EST_Hash_Pair<K, V> >;
209   friend class EST_TIterator< EST_THash<K, V>, IPointer, EST_Hash_Pair<K, V> >;
210   friend class EST_TRwIterator< EST_THash<K, V>, IPointer, EST_Hash_Pair<K, V> >;
211 
212 public:
213   /// An entry returned by the iterator is a key value pair.
214   typedef EST_Hash_Pair<K, V> Entry;
215 
216   /// Give the iterator a sensible name.
217   typedef EST_TStructIterator< EST_THash<K, V>, IPointer, EST_Hash_Pair<K, V> > Entries;
218   typedef EST_TRwStructIterator< EST_THash<K, V>, IPointer, EST_Hash_Pair<K, V> > RwEntries;
219   //@}
220 
221   /**@name Key Iteration
222     *
223     * This iterator steps through the table returning just keys.
224     */
225   //@{
226 protected:
227   /** A position in the table is given by a bucket number and a
228     * pointer into the bucket.
229     */
230   struct IPointer_k_s {  unsigned int b; EST_Hash_Pair<K, V> *p; };
231 
232   typedef struct IPointer_k_s IPointer_k;
233 
234   /// Shift to point at something.
skip_blank(IPointer_k & ip)235   void skip_blank(IPointer_k &ip) const
236     {
237       while (ip.p==NULL && ip.b<p_num_buckets)
238 	{ip.b++; ip.p = ip.b<p_num_buckets?p_buckets[ip.b]:0; }
239     }
240 
241   /// Go to start of the table.
point_to_first(IPointer_k & ip)242   void point_to_first(IPointer_k &ip) const
243     { ip.b=0; ip.p = ip.b<p_num_buckets?p_buckets[ip.b]:0;
244     skip_blank(ip);}
245 
246   /// Move pointer forwards, at the end of the bucket, move down.
move_pointer_forwards(IPointer_k & ip)247   void move_pointer_forwards(IPointer_k &ip) const
248     {
249       ip.p = ip.p->next;
250       skip_blank(ip);
251     }
252 
253    /// We are at the end if the pointer ever becomes NULL
points_to_something(const IPointer_k & ip)254   bool points_to_something(const IPointer_k &ip) const { return ip.b<p_num_buckets; }
255 
256   /// Return the key of this entry.
points_at(const IPointer_k & ip)257   K &points_at(const IPointer_k &ip) { return (ip.p)->k; }
258 
259   /// The iterator must be a friend to access this private interface.
260   friend class EST_TIterator< EST_THash<K, V>, IPointer_k, K >;
261   friend class EST_TRwIterator< EST_THash<K, V>, IPointer_k, K >;
262 
263 public:
264   /// An entry returned by this iterator is just a key.
265   typedef K KeyEntry;
266 
267   /// Give the iterator a sensible name.
268   typedef EST_TIterator< EST_THash<K, V>, IPointer_k, K > KeyEntries;
269   typedef EST_TRwIterator< EST_THash<K, V>, IPointer_k, K > KeyRwEntries;
270   //@}
271 
272 };
273 
274 /** A specialised hash table for when the key is an EST_String.
275   *
276   * This is just a version of <classname>EST_THash</classname> which
277   * has a different default hash function.
278   */
279 
280 template<class V>
281 class EST_TStringHash : public EST_THash<EST_String, V> {
282 public:
283 
284   /// Create a string hash table of <parameter>size</parameter> buckets.
EST_TStringHash(int size)285   EST_TStringHash(int size) : EST_THash<EST_String, V>(size, EST_HashFunctions::StringHash) {};
286 
287   /// An entry returned by the iterator is a key value pair.
288   typedef EST_Hash_Pair<EST_String, V> Entry;
289 
290 /*    struct IPointer_s{  unsigned int b; Entry *p; };
291       typedef struct IPointer_s IPointer; */
292 
293   // Convince GCC that the IPointer we're going to use is a typename
294   typedef typename EST_THash<EST_String, V>::IPointer TN_IPointer;
295 
296   /// Give the iterator a sensible name.
297    typedef EST_TStructIterator< EST_THash<EST_String, V>, typename EST_THash<EST_String, V>::IPointer,
298  				    EST_Hash_Pair<EST_String, V> > Entries;
299 
300    typedef EST_TRwStructIterator< EST_THash<EST_String, V>, typename EST_THash<EST_String, V>::IPointer,
301  				    EST_Hash_Pair<EST_String, V> > RwEntries;
302   //@}
303 
304   typedef EST_String KeyEntry;
305 
306 /*  struct IPointer_k_s {  unsigned int b; EST_Hash_Pair<EST_String, V> *p; };
307     typedef struct IPointer_k_s IPointer_k; */
308 
309   /// Give the iterator a sensible name.
310 
311   // Convince GCC that the IPointer_k we're going to use is a typename
312   typedef typename EST_THash<EST_String, V>::IPointer_k TN_IPointer_k;
313 
314   typedef EST_TIterator< EST_THash<EST_String, V>, typename EST_THash<EST_String, V>::IPointer_k,
315  			    EST_String > KeyEntries;
316   typedef EST_TRwIterator< EST_THash<EST_String, V>, typename EST_THash<EST_String, V>::IPointer_k,
317  			    EST_String > KeyRwEntries;
318 };
319 
320 
321 /** The default hash function used by <classname>EST_THash</classname>
322   */
DefaultHashFunction(const void * data,size_t size,unsigned int n)323 inline static unsigned int DefaultHashFunction(const void *data, size_t size, unsigned int n)
324 {
325   unsigned int x=0;
326   const char *p = (const char *)data;
327   for(; size>0 ; p++, size--)
328       x = ((x+*p)*33) % n;
329   return x;
330 }
331 
332 //@}
333 #endif
334